diff options
Diffstat (limited to 'xbmc/addons/Scraper.cpp')
| -rw-r--r-- | xbmc/addons/Scraper.cpp | 1033 |
1 files changed, 1033 insertions, 0 deletions
diff --git a/xbmc/addons/Scraper.cpp b/xbmc/addons/Scraper.cpp new file mode 100644 index 0000000..06f34f2 --- /dev/null +++ b/xbmc/addons/Scraper.cpp | |||
| @@ -0,0 +1,1033 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2005-2013 Team XBMC | ||
| 3 | * http://xbmc.org | ||
| 4 | * | ||
| 5 | * This Program is free software; you can redistribute it and/or modify | ||
| 6 | * it under the terms of the GNU General Public License as published by | ||
| 7 | * the Free Software Foundation; either version 2, or (at your option) | ||
| 8 | * any later version. | ||
| 9 | * | ||
| 10 | * This Program is distributed in the hope that it will be useful, | ||
| 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 13 | * GNU General Public License for more details. | ||
| 14 | * | ||
| 15 | * You should have received a copy of the GNU General Public License | ||
| 16 | * along with XBMC; see the file COPYING. If not, see | ||
| 17 | * <http://www.gnu.org/licenses/>. | ||
| 18 | * | ||
| 19 | */ | ||
| 20 | #include "Scraper.h" | ||
| 21 | #include "filesystem/File.h" | ||
| 22 | #include "filesystem/Directory.h" | ||
| 23 | #include "filesystem/CurlFile.h" | ||
| 24 | #include "AddonManager.h" | ||
| 25 | #include "utils/ScraperParser.h" | ||
| 26 | #include "utils/ScraperUrl.h" | ||
| 27 | #include "utils/CharsetConverter.h" | ||
| 28 | #include "utils/log.h" | ||
| 29 | #include "music/infoscanner/MusicAlbumInfo.h" | ||
| 30 | #include "music/infoscanner/MusicArtistInfo.h" | ||
| 31 | #include "utils/fstrcmp.h" | ||
| 32 | #include "settings/AdvancedSettings.h" | ||
| 33 | #include "FileItem.h" | ||
| 34 | #include "utils/URIUtils.h" | ||
| 35 | #include "utils/XMLUtils.h" | ||
| 36 | #include "utils/StringUtils.h" | ||
| 37 | #include "music/MusicDatabase.h" | ||
| 38 | #include "video/VideoDatabase.h" | ||
| 39 | #include "music/Album.h" | ||
| 40 | #include "music/Artist.h" | ||
| 41 | #include "Util.h" | ||
| 42 | #include "URL.h" | ||
| 43 | |||
| 44 | #include <sstream> | ||
| 45 | #include <algorithm> | ||
| 46 | |||
| 47 | using namespace std; | ||
| 48 | using namespace XFILE; | ||
| 49 | using namespace MUSIC_GRABBER; | ||
| 50 | using namespace VIDEO; | ||
| 51 | |||
| 52 | namespace ADDON | ||
| 53 | { | ||
| 54 | |||
| 55 | typedef struct | ||
| 56 | { | ||
| 57 | const char* name; | ||
| 58 | CONTENT_TYPE type; | ||
| 59 | int pretty; | ||
| 60 | } ContentMapping; | ||
| 61 | |||
| 62 | static const ContentMapping content[] = | ||
| 63 | {{"unknown", CONTENT_NONE, 231 }, | ||
| 64 | {"albums", CONTENT_ALBUMS, 132 }, | ||
| 65 | {"music", CONTENT_ALBUMS, 132 }, | ||
| 66 | {"artists", CONTENT_ARTISTS, 133 }, | ||
| 67 | {"movies", CONTENT_MOVIES, 20342 }, | ||
| 68 | {"tvshows", CONTENT_TVSHOWS, 20343 }, | ||
| 69 | {"musicvideos", CONTENT_MUSICVIDEOS, 20389 }}; | ||
| 70 | |||
| 71 | std::string TranslateContent(const CONTENT_TYPE &type, bool pretty/*=false*/) | ||
| 72 | { | ||
| 73 | for (unsigned int index=0; index < ARRAY_SIZE(content); ++index) | ||
| 74 | { | ||
| 75 | const ContentMapping &map = content[index]; | ||
| 76 | if (type == map.type) | ||
| 77 | { | ||
| 78 | if (pretty && map.pretty) | ||
| 79 | return g_localizeStrings.Get(map.pretty); | ||
| 80 | else | ||
| 81 | return map.name; | ||
| 82 | } | ||
| 83 | } | ||
| 84 | return ""; | ||
| 85 | } | ||
| 86 | |||
| 87 | CONTENT_TYPE TranslateContent(const std::string &string) | ||
| 88 | { | ||
| 89 | for (unsigned int index=0; index < ARRAY_SIZE(content); ++index) | ||
| 90 | { | ||
| 91 | const ContentMapping &map = content[index]; | ||
| 92 | if (string == map.name) | ||
| 93 | return map.type; | ||
| 94 | } | ||
| 95 | return CONTENT_NONE; | ||
| 96 | } | ||
| 97 | |||
| 98 | TYPE ScraperTypeFromContent(const CONTENT_TYPE &content) | ||
| 99 | { | ||
| 100 | switch (content) | ||
| 101 | { | ||
| 102 | case CONTENT_ALBUMS: | ||
| 103 | return ADDON_SCRAPER_ALBUMS; | ||
| 104 | case CONTENT_ARTISTS: | ||
| 105 | return ADDON_SCRAPER_ARTISTS; | ||
| 106 | case CONTENT_MOVIES: | ||
| 107 | return ADDON_SCRAPER_MOVIES; | ||
| 108 | case CONTENT_MUSICVIDEOS: | ||
| 109 | return ADDON_SCRAPER_MUSICVIDEOS; | ||
| 110 | case CONTENT_TVSHOWS: | ||
| 111 | return ADDON_SCRAPER_TVSHOWS; | ||
| 112 | default: | ||
| 113 | return ADDON_UNKNOWN; | ||
| 114 | } | ||
| 115 | } | ||
| 116 | |||
| 117 | // if the XML root is <error>, throw CScraperError with enclosed <title>/<message> values | ||
| 118 | static void CheckScraperError(const TiXmlElement *pxeRoot) | ||
| 119 | { | ||
| 120 | if (!pxeRoot || stricmp(pxeRoot->Value(), "error")) | ||
| 121 | return; | ||
| 122 | std::string sTitle; | ||
| 123 | std::string sMessage; | ||
| 124 | XMLUtils::GetString(pxeRoot, "title", sTitle); | ||
| 125 | XMLUtils::GetString(pxeRoot, "message", sMessage); | ||
| 126 | throw CScraperError(sTitle, sMessage); | ||
| 127 | } | ||
| 128 | |||
| 129 | CScraper::CScraper(const cp_extension_t *ext) : CAddon(ext), m_fLoaded(false) | ||
| 130 | { | ||
| 131 | if (ext) | ||
| 132 | { | ||
| 133 | m_language = CAddonMgr::Get().GetExtValue(ext->configuration, "@language"); | ||
| 134 | m_requiressettings = CAddonMgr::Get().GetExtValue(ext->configuration,"@requiressettings") == "true"; | ||
| 135 | std::string persistence = CAddonMgr::Get().GetExtValue(ext->configuration, "@cachepersistence"); | ||
| 136 | if (!persistence.empty()) | ||
| 137 | m_persistence.SetFromTimeString(persistence); | ||
| 138 | } | ||
| 139 | switch (Type()) | ||
| 140 | { | ||
| 141 | case ADDON_SCRAPER_ALBUMS: | ||
| 142 | m_pathContent = CONTENT_ALBUMS; | ||
| 143 | break; | ||
| 144 | case ADDON_SCRAPER_ARTISTS: | ||
| 145 | m_pathContent = CONTENT_ARTISTS; | ||
| 146 | break; | ||
| 147 | case ADDON_SCRAPER_MOVIES: | ||
| 148 | m_pathContent = CONTENT_MOVIES; | ||
| 149 | break; | ||
| 150 | case ADDON_SCRAPER_MUSICVIDEOS: | ||
| 151 | m_pathContent = CONTENT_MUSICVIDEOS; | ||
| 152 | break; | ||
| 153 | case ADDON_SCRAPER_TVSHOWS: | ||
| 154 | m_pathContent = CONTENT_TVSHOWS; | ||
| 155 | break; | ||
| 156 | default: | ||
| 157 | m_pathContent = CONTENT_NONE; | ||
| 158 | break; | ||
| 159 | } | ||
| 160 | } | ||
| 161 | |||
| 162 | AddonPtr CScraper::Clone() const | ||
| 163 | { | ||
| 164 | return AddonPtr(new CScraper(*this)); | ||
| 165 | } | ||
| 166 | |||
| 167 | CScraper::CScraper(const CScraper &rhs) | ||
| 168 | : CAddon(rhs), m_fLoaded(false), | ||
| 169 | m_language(rhs.m_language), | ||
| 170 | m_requiressettings(rhs.m_requiressettings), | ||
| 171 | m_persistence(rhs.m_persistence), | ||
| 172 | m_pathContent(rhs.m_pathContent) | ||
| 173 | { | ||
| 174 | } | ||
| 175 | |||
| 176 | bool CScraper::Supports(const CONTENT_TYPE &content) const | ||
| 177 | { | ||
| 178 | return Type() == ScraperTypeFromContent(content); | ||
| 179 | } | ||
| 180 | |||
| 181 | bool CScraper::SetPathSettings(CONTENT_TYPE content, const std::string& xml) | ||
| 182 | { | ||
| 183 | m_pathContent = content; | ||
| 184 | if (!LoadSettings()) | ||
| 185 | return false; | ||
| 186 | |||
| 187 | if (xml.empty()) | ||
| 188 | return true; | ||
| 189 | |||
| 190 | CXBMCTinyXML doc; | ||
| 191 | doc.Parse(xml); | ||
| 192 | m_userSettingsLoaded = SettingsFromXML(doc); | ||
| 193 | |||
| 194 | return m_userSettingsLoaded; | ||
| 195 | } | ||
| 196 | |||
| 197 | std::string CScraper::GetPathSettings() | ||
| 198 | { | ||
| 199 | if (!LoadSettings()) | ||
| 200 | return ""; | ||
| 201 | |||
| 202 | stringstream stream; | ||
| 203 | CXBMCTinyXML doc; | ||
| 204 | SettingsToXML(doc); | ||
| 205 | if (doc.RootElement()) | ||
| 206 | stream << *doc.RootElement(); | ||
| 207 | |||
| 208 | return stream.str(); | ||
| 209 | } | ||
| 210 | |||
| 211 | void CScraper::ClearCache() | ||
| 212 | { | ||
| 213 | std::string strCachePath = URIUtils::AddFileToFolder(g_advancedSettings.m_cachePath, "scrapers"); | ||
| 214 | |||
| 215 | // create scraper cache dir if needed | ||
| 216 | if (!CDirectory::Exists(strCachePath)) | ||
| 217 | CDirectory::Create(strCachePath); | ||
| 218 | |||
| 219 | strCachePath = URIUtils::AddFileToFolder(strCachePath, ID()); | ||
| 220 | URIUtils::AddSlashAtEnd(strCachePath); | ||
| 221 | |||
| 222 | if (CDirectory::Exists(strCachePath)) | ||
| 223 | { | ||
| 224 | CFileItemList items; | ||
| 225 | CDirectory::GetDirectory(strCachePath,items); | ||
| 226 | for (int i=0;i<items.Size();++i) | ||
| 227 | { | ||
| 228 | // wipe cache | ||
| 229 | if (items[i]->m_dateTime + m_persistence <= CDateTime::GetCurrentDateTime()) | ||
| 230 | CFile::Delete(items[i]->GetPath()); | ||
| 231 | } | ||
| 232 | } | ||
| 233 | else | ||
| 234 | CDirectory::Create(strCachePath); | ||
| 235 | } | ||
| 236 | |||
| 237 | // returns a vector of strings: the first is the XML output by the function; the rest | ||
| 238 | // is XML output by chained functions, possibly recursively | ||
| 239 | // the CCurlFile object is passed in so that URL fetches can be canceled from other threads | ||
| 240 | // throws CScraperError abort on internal failures (e.g., parse errors) | ||
| 241 | vector<string> CScraper::Run(const std::string& function, | ||
| 242 | const CScraperUrl& scrURL, | ||
| 243 | CCurlFile& http, | ||
| 244 | const vector<string>* extras) | ||
| 245 | { | ||
| 246 | if (!Load()) | ||
| 247 | throw CScraperError(); | ||
| 248 | |||
| 249 | std::string strXML = InternalRun(function,scrURL,http,extras); | ||
| 250 | if (strXML.empty()) | ||
| 251 | { | ||
| 252 | if (function != "NfoUrl" && function != "ResolveIDToUrl") | ||
| 253 | CLog::Log(LOGERROR, "%s: Unable to parse web site",__FUNCTION__); | ||
| 254 | throw CScraperError(); | ||
| 255 | } | ||
| 256 | |||
| 257 | CLog::Log(LOGDEBUG,"scraper: %s returned %s",function.c_str(),strXML.c_str()); | ||
| 258 | |||
| 259 | CXBMCTinyXML doc; | ||
| 260 | /* all data was converted to UTF-8 before being processed by scraper */ | ||
| 261 | doc.Parse(strXML, TIXML_ENCODING_UTF8); | ||
| 262 | if (!doc.RootElement()) | ||
| 263 | { | ||
| 264 | CLog::Log(LOGERROR, "%s: Unable to parse XML",__FUNCTION__); | ||
| 265 | throw CScraperError(); | ||
| 266 | } | ||
| 267 | |||
| 268 | vector<string> result; | ||
| 269 | result.push_back(strXML); | ||
| 270 | TiXmlElement* xchain = doc.RootElement()->FirstChildElement(); | ||
| 271 | // skip children of the root element until <url> or <chain> | ||
| 272 | while (xchain && strcmp(xchain->Value(),"url") && strcmp(xchain->Value(),"chain")) | ||
| 273 | xchain = xchain->NextSiblingElement(); | ||
| 274 | while (xchain) | ||
| 275 | { | ||
| 276 | // <chain|url function="...">param</> | ||
| 277 | const char* szFunction = xchain->Attribute("function"); | ||
| 278 | if (szFunction) | ||
| 279 | { | ||
| 280 | CScraperUrl scrURL2; | ||
| 281 | vector<string> extras; | ||
| 282 | // for <chain>, pass the contained text as a parameter; for <url>, as URL content | ||
| 283 | if (strcmp(xchain->Value(),"chain")==0) | ||
| 284 | { | ||
| 285 | if (xchain->FirstChild()) | ||
| 286 | extras.push_back(xchain->FirstChild()->Value()); | ||
| 287 | } | ||
| 288 | else | ||
| 289 | scrURL2.ParseElement(xchain); | ||
| 290 | // Fix for empty chains. $$1 would still contain the | ||
| 291 | // previous value as there is no child of the xml node. | ||
| 292 | // since $$1 will always either contain the data from an | ||
| 293 | // url or the parameters to a chain, we can safely clear it here | ||
| 294 | // to fix this issue | ||
| 295 | m_parser.m_param[0].clear(); | ||
| 296 | vector<string> result2 = RunNoThrow(szFunction,scrURL2,http,&extras); | ||
| 297 | result.insert(result.end(),result2.begin(),result2.end()); | ||
| 298 | } | ||
| 299 | xchain = xchain->NextSiblingElement(); | ||
| 300 | // continue to skip past non-<url> or <chain> elements | ||
| 301 | while (xchain && strcmp(xchain->Value(),"url") && strcmp(xchain->Value(),"chain")) | ||
| 302 | xchain = xchain->NextSiblingElement(); | ||
| 303 | } | ||
| 304 | |||
| 305 | return result; | ||
| 306 | } | ||
| 307 | |||
| 308 | // just like Run, but returns an empty list instead of throwing in case of error | ||
| 309 | // don't use in new code; errors should be handled appropriately | ||
| 310 | vector<string> CScraper::RunNoThrow(const std::string& function, | ||
| 311 | const CScraperUrl& url, | ||
| 312 | XFILE::CCurlFile& http, | ||
| 313 | const vector<string>* extras) | ||
| 314 | { | ||
| 315 | vector<string> vcs; | ||
| 316 | try | ||
| 317 | { | ||
| 318 | vcs = Run(function, url, http, extras); | ||
| 319 | } | ||
| 320 | catch (const CScraperError &sce) | ||
| 321 | { | ||
| 322 | assert(sce.FAborted()); // the only kind we should get | ||
| 323 | } | ||
| 324 | return vcs; | ||
| 325 | } | ||
| 326 | |||
| 327 | std::string CScraper::InternalRun(const std::string& function, | ||
| 328 | const CScraperUrl& scrURL, | ||
| 329 | CCurlFile& http, | ||
| 330 | const vector<string>* extras) | ||
| 331 | { | ||
| 332 | // walk the list of input URLs and fetch each into parser parameters | ||
| 333 | unsigned int i; | ||
| 334 | for (i=0;i<scrURL.m_url.size();++i) | ||
| 335 | { | ||
| 336 | if (!CScraperUrl::Get(scrURL.m_url[i],m_parser.m_param[i],http,ID()) || m_parser.m_param[i].size() == 0) | ||
| 337 | return ""; | ||
| 338 | } | ||
| 339 | // put the 'extra' parameterts into the parser parameter list too | ||
| 340 | if (extras) | ||
| 341 | { | ||
| 342 | for (unsigned int j=0;j<extras->size();++j) | ||
| 343 | m_parser.m_param[j+i] = (*extras)[j]; | ||
| 344 | } | ||
| 345 | |||
| 346 | return m_parser.Parse(function,this); | ||
| 347 | } | ||
| 348 | |||
| 349 | bool CScraper::Load() | ||
| 350 | { | ||
| 351 | if (m_fLoaded) | ||
| 352 | return true; | ||
| 353 | |||
| 354 | bool result=m_parser.Load(LibPath()); | ||
| 355 | if (result) | ||
| 356 | { | ||
| 357 | // TODO: this routine assumes that deps are a single level, and assumes the dep is installed. | ||
| 358 | // 1. Does it make sense to have recursive dependencies? | ||
| 359 | // 2. Should we be checking the dep versions or do we assume it is ok? | ||
| 360 | ADDONDEPS deps = GetDeps(); | ||
| 361 | ADDONDEPS::iterator itr = deps.begin(); | ||
| 362 | while (itr != deps.end()) | ||
| 363 | { | ||
| 364 | if (itr->first == "xbmc.metadata") | ||
| 365 | { | ||
| 366 | ++itr; | ||
| 367 | continue; | ||
| 368 | } | ||
| 369 | AddonPtr dep; | ||
| 370 | |||
| 371 | bool bOptional = itr->second.second; | ||
| 372 | |||
| 373 | if (CAddonMgr::Get().GetAddon((*itr).first, dep)) | ||
| 374 | { | ||
| 375 | CXBMCTinyXML doc; | ||
| 376 | if (dep->Type() == ADDON_SCRAPER_LIBRARY && doc.LoadFile(dep->LibPath())) | ||
| 377 | m_parser.AddDocument(&doc); | ||
| 378 | } | ||
| 379 | else | ||
| 380 | { | ||
| 381 | if (!bOptional) | ||
| 382 | { | ||
| 383 | result = false; | ||
| 384 | break; | ||
| 385 | } | ||
| 386 | } | ||
| 387 | ++itr; | ||
| 388 | } | ||
| 389 | } | ||
| 390 | |||
| 391 | if (!result) | ||
| 392 | CLog::Log(LOGWARNING, "failed to load scraper XML from %s", LibPath().c_str()); | ||
| 393 | return m_fLoaded = result; | ||
| 394 | } | ||
| 395 | |||
| 396 | bool CScraper::IsInUse() const | ||
| 397 | { | ||
| 398 | if (Supports(CONTENT_ALBUMS) || Supports(CONTENT_ARTISTS)) | ||
| 399 | { // music scraper | ||
| 400 | CMusicDatabase db; | ||
| 401 | if (db.Open() && db.ScraperInUse(ID())) | ||
| 402 | return true; | ||
| 403 | } | ||
| 404 | else | ||
| 405 | { // video scraper | ||
| 406 | CVideoDatabase db; | ||
| 407 | if (db.Open() && db.ScraperInUse(ID())) | ||
| 408 | return true; | ||
| 409 | } | ||
| 410 | return false; | ||
| 411 | } | ||
| 412 | |||
| 413 | bool CScraper::IsNoop() | ||
| 414 | { | ||
| 415 | if (!Load()) | ||
| 416 | throw CScraperError(); | ||
| 417 | |||
| 418 | return m_parser.IsNoop(); | ||
| 419 | } | ||
| 420 | |||
| 421 | // pass in contents of .nfo file; returns URL (possibly empty if none found) | ||
| 422 | // and may populate strId, or throws CScraperError on error | ||
| 423 | CScraperUrl CScraper::NfoUrl(const std::string &sNfoContent) | ||
| 424 | { | ||
| 425 | CScraperUrl scurlRet; | ||
| 426 | |||
| 427 | if (IsNoop()) | ||
| 428 | return scurlRet; | ||
| 429 | |||
| 430 | // scraper function takes contents of .nfo file, returns XML (see below) | ||
| 431 | vector<string> vcsIn; | ||
| 432 | vcsIn.push_back(sNfoContent); | ||
| 433 | CScraperUrl scurl; | ||
| 434 | CCurlFile fcurl; | ||
| 435 | vector<string> vcsOut = Run("NfoUrl", scurl, fcurl, &vcsIn); | ||
| 436 | if (vcsOut.empty() || vcsOut[0].empty()) | ||
| 437 | return scurlRet; | ||
| 438 | if (vcsOut.size() > 1) | ||
| 439 | CLog::Log(LOGWARNING, "%s: scraper returned multiple results; using first", __FUNCTION__); | ||
| 440 | |||
| 441 | // parse returned XML: either <error> element on error, blank on failure, | ||
| 442 | // or <url>...</url> or <url>...</url><id>...</id> on success | ||
| 443 | for (unsigned int i=0; i < vcsOut.size(); ++i) | ||
| 444 | { | ||
| 445 | CXBMCTinyXML doc; | ||
| 446 | doc.Parse(vcsOut[i], TIXML_ENCODING_UTF8); | ||
| 447 | CheckScraperError(doc.RootElement()); | ||
| 448 | |||
| 449 | if (doc.RootElement()) | ||
| 450 | { | ||
| 451 | /* | ||
| 452 | NOTE: Scrapers might return invalid xml with some loose | ||
| 453 | elements (eg. '<url>http://some.url</url><id>123</id>'). | ||
| 454 | Since XMLUtils::GetString() is assuming well formed xml | ||
| 455 | with start and end-tags we're not able to use it. | ||
| 456 | Check for the desired Elements instead. | ||
| 457 | */ | ||
| 458 | TiXmlElement* pxeUrl=NULL; | ||
| 459 | TiXmlElement* pId=NULL; | ||
| 460 | if (!strcmp(doc.RootElement()->Value(),"details")) | ||
| 461 | { | ||
| 462 | pxeUrl = doc.RootElement()->FirstChildElement("url"); | ||
| 463 | pId = doc.RootElement()->FirstChildElement("id"); | ||
| 464 | } | ||
| 465 | else | ||
| 466 | { | ||
| 467 | pId = doc.FirstChildElement("id"); | ||
| 468 | pxeUrl = doc.FirstChildElement("url"); | ||
| 469 | } | ||
| 470 | if (pId && pId->FirstChild()) | ||
| 471 | scurlRet.strId = pId->FirstChild()->Value(); | ||
| 472 | |||
| 473 | if (pxeUrl && pxeUrl->Attribute("function")) | ||
| 474 | continue; | ||
| 475 | |||
| 476 | if (pxeUrl) | ||
| 477 | scurlRet.ParseElement(pxeUrl); | ||
| 478 | else if (!strcmp(doc.RootElement()->Value(), "url")) | ||
| 479 | scurlRet.ParseElement(doc.RootElement()); | ||
| 480 | else | ||
| 481 | continue; | ||
| 482 | break; | ||
| 483 | } | ||
| 484 | } | ||
| 485 | return scurlRet; | ||
| 486 | } | ||
| 487 | |||
| 488 | CScraperUrl CScraper::ResolveIDToUrl(const std::string& externalID) | ||
| 489 | { | ||
| 490 | CScraperUrl scurlRet; | ||
| 491 | |||
| 492 | // scraper function takes an external ID, returns XML (see below) | ||
| 493 | vector<string> vcsIn; | ||
| 494 | vcsIn.push_back(externalID); | ||
| 495 | CScraperUrl scurl; | ||
| 496 | CCurlFile fcurl; | ||
| 497 | vector<string> vcsOut = Run("ResolveIDToUrl", scurl, fcurl, &vcsIn); | ||
| 498 | if (vcsOut.empty() || vcsOut[0].empty()) | ||
| 499 | return scurlRet; | ||
| 500 | if (vcsOut.size() > 1) | ||
| 501 | CLog::Log(LOGWARNING, "%s: scraper returned multiple results; using first", __FUNCTION__); | ||
| 502 | |||
| 503 | // parse returned XML: either <error> element on error, blank on failure, | ||
| 504 | // or <url>...</url> or <url>...</url><id>...</id> on success | ||
| 505 | for (unsigned int i=0; i < vcsOut.size(); ++i) | ||
| 506 | { | ||
| 507 | CXBMCTinyXML doc; | ||
| 508 | doc.Parse(vcsOut[i], TIXML_ENCODING_UTF8); | ||
| 509 | CheckScraperError(doc.RootElement()); | ||
| 510 | |||
| 511 | if (doc.RootElement()) | ||
| 512 | { | ||
| 513 | /* | ||
| 514 | NOTE: Scrapers might return invalid xml with some loose | ||
| 515 | elements (eg. '<url>http://some.url</url><id>123</id>'). | ||
| 516 | Since XMLUtils::GetString() is assuming well formed xml | ||
| 517 | with start and end-tags we're not able to use it. | ||
| 518 | Check for the desired Elements instead. | ||
| 519 | */ | ||
| 520 | TiXmlElement* pxeUrl=NULL; | ||
| 521 | TiXmlElement* pId=NULL; | ||
| 522 | if (!strcmp(doc.RootElement()->Value(),"details")) | ||
| 523 | { | ||
| 524 | pxeUrl = doc.RootElement()->FirstChildElement("url"); | ||
| 525 | pId = doc.RootElement()->FirstChildElement("id"); | ||
| 526 | } | ||
| 527 | else | ||
| 528 | { | ||
| 529 | pId = doc.FirstChildElement("id"); | ||
| 530 | pxeUrl = doc.FirstChildElement("url"); | ||
| 531 | } | ||
| 532 | if (pId && pId->FirstChild()) | ||
| 533 | scurlRet.strId = pId->FirstChild()->Value(); | ||
| 534 | |||
| 535 | if (pxeUrl && pxeUrl->Attribute("function")) | ||
| 536 | continue; | ||
| 537 | |||
| 538 | if (pxeUrl) | ||
| 539 | scurlRet.ParseElement(pxeUrl); | ||
| 540 | else if (!strcmp(doc.RootElement()->Value(), "url")) | ||
| 541 | scurlRet.ParseElement(doc.RootElement()); | ||
| 542 | else | ||
| 543 | continue; | ||
| 544 | break; | ||
| 545 | } | ||
| 546 | } | ||
| 547 | return scurlRet; | ||
| 548 | } | ||
| 549 | |||
| 550 | static bool RelevanceSortFunction(const CScraperUrl &left, const CScraperUrl &right) | ||
| 551 | { | ||
| 552 | return left.relevance > right.relevance; | ||
| 553 | } | ||
| 554 | |||
| 555 | // fetch list of matching movies sorted by relevance (may be empty); | ||
| 556 | // throws CScraperError on error; first called with fFirst set, then unset if first try fails | ||
| 557 | std::vector<CScraperUrl> CScraper::FindMovie(XFILE::CCurlFile &fcurl, const std::string &sMovie, | ||
| 558 | bool fFirst) | ||
| 559 | { | ||
| 560 | // prepare parameters for URL creation | ||
| 561 | std::string sTitle, sTitleYear, sYear; | ||
| 562 | CUtil::CleanString(sMovie, sTitle, sTitleYear, sYear, true/*fRemoveExt*/, fFirst); | ||
| 563 | |||
| 564 | CLog::Log(LOGDEBUG, "%s: Searching for '%s' using %s scraper " | ||
| 565 | "(path: '%s', content: '%s', version: '%s')", __FUNCTION__, sTitle.c_str(), | ||
| 566 | Name().c_str(), Path().c_str(), | ||
| 567 | ADDON::TranslateContent(Content()).c_str(), Version().asString().c_str()); | ||
| 568 | |||
| 569 | std::vector<CScraperUrl> vcscurl; | ||
| 570 | if (IsNoop()) | ||
| 571 | return vcscurl; | ||
| 572 | |||
| 573 | if (!fFirst) | ||
| 574 | StringUtils::Replace(sTitle, '-',' '); | ||
| 575 | |||
| 576 | vector<string> vcsIn(1); | ||
| 577 | g_charsetConverter.utf8To(SearchStringEncoding(), sTitle, vcsIn[0]); | ||
| 578 | vcsIn[0] = CURL::Encode(vcsIn[0]); | ||
| 579 | if (fFirst && !sYear.empty()) | ||
| 580 | vcsIn.push_back(sYear); | ||
| 581 | |||
| 582 | // request a search URL from the title/filename/etc. | ||
| 583 | CScraperUrl scurl; | ||
| 584 | vector<string> vcsOut = Run("CreateSearchUrl", scurl, fcurl, &vcsIn); | ||
| 585 | if (vcsOut.empty()) | ||
| 586 | { | ||
| 587 | CLog::Log(LOGDEBUG, "%s: CreateSearchUrl failed", __FUNCTION__); | ||
| 588 | throw CScraperError(); | ||
| 589 | } | ||
| 590 | scurl.ParseString(vcsOut[0]); | ||
| 591 | |||
| 592 | // do the search, and parse the result into a list | ||
| 593 | vcsIn.clear(); | ||
| 594 | vcsIn.push_back(scurl.m_url[0].m_url); | ||
| 595 | vcsOut = Run("GetSearchResults", scurl, fcurl, &vcsIn); | ||
| 596 | |||
| 597 | bool fSort(true); | ||
| 598 | std::set<std::string> stsDupeCheck; | ||
| 599 | bool fResults(false); | ||
| 600 | for (vector<string>::const_iterator i = vcsOut.begin(); i != vcsOut.end(); ++i) | ||
| 601 | { | ||
| 602 | CXBMCTinyXML doc; | ||
| 603 | doc.Parse(*i, TIXML_ENCODING_UTF8); | ||
| 604 | if (!doc.RootElement()) | ||
| 605 | { | ||
| 606 | CLog::Log(LOGERROR, "%s: Unable to parse XML", __FUNCTION__); | ||
| 607 | continue; // might have more valid results later | ||
| 608 | } | ||
| 609 | |||
| 610 | CheckScraperError(doc.RootElement()); | ||
| 611 | |||
| 612 | TiXmlHandle xhDoc(&doc); | ||
| 613 | TiXmlHandle xhResults = xhDoc.FirstChild("results"); | ||
| 614 | if (!xhResults.Element()) | ||
| 615 | continue; | ||
| 616 | fResults = true; // even if empty | ||
| 617 | |||
| 618 | // we need to sort if returned results don't specify 'sorted="yes"' | ||
| 619 | if (fSort) | ||
| 620 | { | ||
| 621 | const char *sorted = xhResults.Element()->Attribute("sorted"); | ||
| 622 | if (sorted != NULL) | ||
| 623 | fSort = !StringUtils::EqualsNoCase(sorted, "yes"); | ||
| 624 | } | ||
| 625 | |||
| 626 | for (TiXmlElement *pxeMovie = xhResults.FirstChild("entity").Element(); | ||
| 627 | pxeMovie; pxeMovie = pxeMovie->NextSiblingElement()) | ||
| 628 | { | ||
| 629 | CScraperUrl scurlMovie; | ||
| 630 | TiXmlNode *pxnTitle = pxeMovie->FirstChild("title"); | ||
| 631 | TiXmlElement *pxeLink = pxeMovie->FirstChildElement("url"); | ||
| 632 | if (pxnTitle && pxnTitle->FirstChild() && pxeLink && pxeLink->FirstChild()) | ||
| 633 | { | ||
| 634 | scurlMovie.strTitle = pxnTitle->FirstChild()->Value(); | ||
| 635 | XMLUtils::GetString(pxeMovie, "id", scurlMovie.strId); | ||
| 636 | |||
| 637 | for ( ; pxeLink && pxeLink->FirstChild(); pxeLink = pxeLink->NextSiblingElement("url")) | ||
| 638 | scurlMovie.ParseElement(pxeLink); | ||
| 639 | |||
| 640 | // calculate the relavance of this hit | ||
| 641 | std::string sCompareTitle = scurlMovie.strTitle; | ||
| 642 | StringUtils::ToLower(sCompareTitle); | ||
| 643 | std::string sMatchTitle = sTitle; | ||
| 644 | StringUtils::ToLower(sMatchTitle); | ||
| 645 | |||
| 646 | /* | ||
| 647 | * Identify the best match by performing a fuzzy string compare on the search term and | ||
| 648 | * the result. Additionally, use the year (if available) to further refine the best match. | ||
| 649 | * An exact match scores 1, a match off by a year scores 0.5 (release dates can vary between | ||
| 650 | * countries), otherwise it scores 0. | ||
| 651 | */ | ||
| 652 | std::string sCompareYear; | ||
| 653 | XMLUtils::GetString(pxeMovie, "year", sCompareYear); | ||
| 654 | |||
| 655 | double yearScore = 0; | ||
| 656 | if (!sYear.empty() && !sCompareYear.empty()) | ||
| 657 | yearScore = std::max(0.0, 1-0.5*abs(atoi(sYear.c_str())-atoi(sCompareYear.c_str()))); | ||
| 658 | |||
| 659 | scurlMovie.relevance = fstrcmp(sMatchTitle.c_str(), sCompareTitle.c_str(), 0.0) + yearScore; | ||
| 660 | |||
| 661 | // reconstruct a title for the user | ||
| 662 | if (!sCompareYear.empty()) | ||
| 663 | scurlMovie.strTitle += StringUtils::Format(" (%s)", sCompareYear.c_str()); | ||
| 664 | |||
| 665 | std::string sLanguage; | ||
| 666 | if (XMLUtils::GetString(pxeMovie, "language", sLanguage) && !sLanguage.empty()) | ||
| 667 | scurlMovie.strTitle += StringUtils::Format(" (%s)", sLanguage.c_str()); | ||
| 668 | |||
| 669 | // filter for dupes from naughty scrapers | ||
| 670 | if (stsDupeCheck.insert(scurlMovie.m_url[0].m_url + " " + scurlMovie.strTitle).second) | ||
| 671 | vcscurl.push_back(scurlMovie); | ||
| 672 | } | ||
| 673 | } | ||
| 674 | } | ||
| 675 | |||
| 676 | if (!fResults) | ||
| 677 | throw CScraperError(); // scraper aborted | ||
| 678 | |||
| 679 | if (fSort) | ||
| 680 | std::stable_sort(vcscurl.begin(), vcscurl.end(), RelevanceSortFunction); | ||
| 681 | |||
| 682 | return vcscurl; | ||
| 683 | } | ||
| 684 | |||
| 685 | // find album by artist, using fcurl for web fetches | ||
| 686 | // returns a list of albums (empty if no match or failure) | ||
| 687 | std::vector<CMusicAlbumInfo> CScraper::FindAlbum(CCurlFile &fcurl, const std::string &sAlbum, | ||
| 688 | const std::string &sArtist) | ||
| 689 | { | ||
| 690 | CLog::Log(LOGDEBUG, "%s: Searching for '%s - %s' using %s scraper " | ||
| 691 | "(path: '%s', content: '%s', version: '%s')", __FUNCTION__, sArtist.c_str(), | ||
| 692 | sAlbum.c_str(), Name().c_str(), Path().c_str(), | ||
| 693 | ADDON::TranslateContent(Content()).c_str(), Version().asString().c_str()); | ||
| 694 | |||
| 695 | std::vector<CMusicAlbumInfo> vcali; | ||
| 696 | if (IsNoop()) | ||
| 697 | return vcali; | ||
| 698 | |||
| 699 | // scraper function is given the album and artist as parameters and | ||
| 700 | // returns an XML <url> element parseable by CScraperUrl | ||
| 701 | std::vector<string> extras(2); | ||
| 702 | g_charsetConverter.utf8To(SearchStringEncoding(), sAlbum, extras[0]); | ||
| 703 | g_charsetConverter.utf8To(SearchStringEncoding(), sArtist, extras[1]); | ||
| 704 | extras[0] = CURL::Encode(extras[0]); | ||
| 705 | extras[1] = CURL::Encode(extras[1]); | ||
| 706 | CScraperUrl scurl; | ||
| 707 | vector<string> vcsOut = RunNoThrow("CreateAlbumSearchUrl", scurl, fcurl, &extras); | ||
| 708 | if (vcsOut.size() > 1) | ||
| 709 | CLog::Log(LOGWARNING, "%s: scraper returned multiple results; using first", __FUNCTION__); | ||
| 710 | |||
| 711 | if (vcsOut.empty() || vcsOut[0].empty()) | ||
| 712 | return vcali; | ||
| 713 | scurl.ParseString(vcsOut[0]); | ||
| 714 | |||
| 715 | // the next function is passed the contents of the returned URL, and returns | ||
| 716 | // an empty string on failure; on success, returns XML matches in the form: | ||
| 717 | // <results> | ||
| 718 | // <entity> | ||
| 719 | // <title>...</title> | ||
| 720 | // <url>...</url> (with the usual CScraperUrl decorations like post or spoof) | ||
| 721 | // <artist>...</artist> | ||
| 722 | // <year>...</year> | ||
| 723 | // <relevance [scale="..."]>...</relevance> (scale defaults to 1; score is divided by it) | ||
| 724 | // </entity> | ||
| 725 | // ... | ||
| 726 | // </results> | ||
| 727 | vcsOut = RunNoThrow("GetAlbumSearchResults", scurl, fcurl); | ||
| 728 | |||
| 729 | // parse the returned XML into a vector of album objects | ||
| 730 | for (vector<string>::const_iterator i = vcsOut.begin(); i != vcsOut.end(); ++i) | ||
| 731 | { | ||
| 732 | CXBMCTinyXML doc; | ||
| 733 | doc.Parse(*i, TIXML_ENCODING_UTF8); | ||
| 734 | TiXmlHandle xhDoc(&doc); | ||
| 735 | |||
| 736 | for (TiXmlElement* pxeAlbum = xhDoc.FirstChild("results").FirstChild("entity").Element(); | ||
| 737 | pxeAlbum; pxeAlbum = pxeAlbum->NextSiblingElement()) | ||
| 738 | { | ||
| 739 | std::string sTitle; | ||
| 740 | if (XMLUtils::GetString(pxeAlbum, "title", sTitle) && !sTitle.empty()) | ||
| 741 | { | ||
| 742 | std::string sArtist; | ||
| 743 | std::string sAlbumName; | ||
| 744 | if (XMLUtils::GetString(pxeAlbum, "artist", sArtist) && !sArtist.empty()) | ||
| 745 | sAlbumName = StringUtils::Format("%s - %s", sArtist.c_str(), sTitle.c_str()); | ||
| 746 | else | ||
| 747 | sAlbumName = sTitle; | ||
| 748 | |||
| 749 | std::string sYear; | ||
| 750 | if (XMLUtils::GetString(pxeAlbum, "year", sYear) && !sYear.empty()) | ||
| 751 | sAlbumName = StringUtils::Format("%s (%s)", sAlbumName.c_str(), sYear.c_str()); | ||
| 752 | |||
| 753 | // if no URL is provided, use the URL we got back from CreateAlbumSearchUrl | ||
| 754 | // (e.g., in case we only got one result back and were sent to the detail page) | ||
| 755 | TiXmlElement* pxeLink = pxeAlbum->FirstChildElement("url"); | ||
| 756 | CScraperUrl scurlAlbum; | ||
| 757 | if (!pxeLink) | ||
| 758 | scurlAlbum.ParseString(scurl.m_xml); | ||
| 759 | for ( ; pxeLink && pxeLink->FirstChild(); pxeLink = pxeLink->NextSiblingElement("url")) | ||
| 760 | scurlAlbum.ParseElement(pxeLink); | ||
| 761 | |||
| 762 | if (!scurlAlbum.m_url.size()) | ||
| 763 | continue; | ||
| 764 | |||
| 765 | CMusicAlbumInfo ali(sTitle, sArtist, sAlbumName, scurlAlbum); | ||
| 766 | |||
| 767 | TiXmlElement* pxeRel = pxeAlbum->FirstChildElement("relevance"); | ||
| 768 | if (pxeRel && pxeRel->FirstChild()) | ||
| 769 | { | ||
| 770 | const char* szScale = pxeRel->Attribute("scale"); | ||
| 771 | float flScale = szScale ? float(atof(szScale)) : 1; | ||
| 772 | ali.SetRelevance(float(atof(pxeRel->FirstChild()->Value())) / flScale); | ||
| 773 | } | ||
| 774 | |||
| 775 | vcali.push_back(ali); | ||
| 776 | } | ||
| 777 | } | ||
| 778 | } | ||
| 779 | return vcali; | ||
| 780 | } | ||
| 781 | |||
| 782 | // find artist, using fcurl for web fetches | ||
| 783 | // returns a list of artists (empty if no match or failure) | ||
| 784 | std::vector<CMusicArtistInfo> CScraper::FindArtist(CCurlFile &fcurl, | ||
| 785 | const std::string &sArtist) | ||
| 786 | { | ||
| 787 | CLog::Log(LOGDEBUG, "%s: Searching for '%s' using %s scraper " | ||
| 788 | "(file: '%s', content: '%s', version: '%s')", __FUNCTION__, sArtist.c_str(), | ||
| 789 | Name().c_str(), Path().c_str(), | ||
| 790 | ADDON::TranslateContent(Content()).c_str(), Version().asString().c_str()); | ||
| 791 | |||
| 792 | std::vector<CMusicArtistInfo> vcari; | ||
| 793 | if (IsNoop()) | ||
| 794 | return vcari; | ||
| 795 | |||
| 796 | // scraper function is given the artist as parameter and | ||
| 797 | // returns an XML <url> element parseable by CScraperUrl | ||
| 798 | std::vector<string> extras(1); | ||
| 799 | g_charsetConverter.utf8To(SearchStringEncoding(), sArtist, extras[0]); | ||
| 800 | extras[0] = CURL::Encode(extras[0]); | ||
| 801 | CScraperUrl scurl; | ||
| 802 | vector<string> vcsOut = RunNoThrow("CreateArtistSearchUrl", scurl, fcurl, &extras); | ||
| 803 | |||
| 804 | if (vcsOut.empty() || vcsOut[0].empty()) | ||
| 805 | return vcari; | ||
| 806 | scurl.ParseString(vcsOut[0]); | ||
| 807 | |||
| 808 | // the next function is passed the contents of the returned URL, and returns | ||
| 809 | // an empty string on failure; on success, returns XML matches in the form: | ||
| 810 | // <results> | ||
| 811 | // <entity> | ||
| 812 | // <title>...</title> | ||
| 813 | // <year>...</year> | ||
| 814 | // <genre>...</genre> | ||
| 815 | // <url>...</url> (with the usual CScraperUrl decorations like post or spoof) | ||
| 816 | // </entity> | ||
| 817 | // ... | ||
| 818 | // </results> | ||
| 819 | vcsOut = RunNoThrow("GetArtistSearchResults", scurl, fcurl); | ||
| 820 | |||
| 821 | // parse the returned XML into a vector of artist objects | ||
| 822 | for (vector<string>::const_iterator i = vcsOut.begin(); i != vcsOut.end(); ++i) | ||
| 823 | { | ||
| 824 | CXBMCTinyXML doc; | ||
| 825 | doc.Parse(*i, TIXML_ENCODING_UTF8); | ||
| 826 | if (!doc.RootElement()) | ||
| 827 | { | ||
| 828 | CLog::Log(LOGERROR, "%s: Unable to parse XML", __FUNCTION__); | ||
| 829 | return vcari; | ||
| 830 | } | ||
| 831 | TiXmlHandle xhDoc(&doc); | ||
| 832 | for (TiXmlElement* pxeArtist = xhDoc.FirstChild("results").FirstChild("entity").Element(); | ||
| 833 | pxeArtist; pxeArtist = pxeArtist->NextSiblingElement()) | ||
| 834 | { | ||
| 835 | TiXmlNode* pxnTitle = pxeArtist->FirstChild("title"); | ||
| 836 | if (pxnTitle && pxnTitle->FirstChild()) | ||
| 837 | { | ||
| 838 | CScraperUrl scurlArtist; | ||
| 839 | |||
| 840 | TiXmlElement* pxeLink = pxeArtist->FirstChildElement("url"); | ||
| 841 | if (!pxeLink) | ||
| 842 | scurlArtist.ParseString(scurl.m_xml); | ||
| 843 | for ( ; pxeLink && pxeLink->FirstChild(); pxeLink = pxeLink->NextSiblingElement("url")) | ||
| 844 | scurlArtist.ParseElement(pxeLink); | ||
| 845 | |||
| 846 | if (!scurlArtist.m_url.size()) | ||
| 847 | continue; | ||
| 848 | |||
| 849 | CMusicArtistInfo ari(pxnTitle->FirstChild()->Value(), scurlArtist); | ||
| 850 | std::string genre; | ||
| 851 | XMLUtils::GetString(pxeArtist, "genre", genre); | ||
| 852 | if (!genre.empty()) | ||
| 853 | ari.GetArtist().genre = StringUtils::Split(genre, g_advancedSettings.m_musicItemSeparator); | ||
| 854 | XMLUtils::GetString(pxeArtist, "year", ari.GetArtist().strBorn); | ||
| 855 | |||
| 856 | vcari.push_back(ari); | ||
| 857 | } | ||
| 858 | } | ||
| 859 | } | ||
| 860 | return vcari; | ||
| 861 | } | ||
| 862 | |||
| 863 | // fetch list of episodes from URL (from video database) | ||
| 864 | EPISODELIST CScraper::GetEpisodeList(XFILE::CCurlFile &fcurl, const CScraperUrl &scurl) | ||
| 865 | { | ||
| 866 | EPISODELIST vcep; | ||
| 867 | if (scurl.m_url.empty()) | ||
| 868 | return vcep; | ||
| 869 | |||
| 870 | CLog::Log(LOGDEBUG, "%s: Searching '%s' using %s scraper " | ||
| 871 | "(file: '%s', content: '%s', version: '%s')", __FUNCTION__, | ||
| 872 | scurl.m_url[0].m_url.c_str(), Name().c_str(), Path().c_str(), | ||
| 873 | ADDON::TranslateContent(Content()).c_str(), Version().asString().c_str()); | ||
| 874 | |||
| 875 | vector<string> vcsIn; | ||
| 876 | vcsIn.push_back(scurl.m_url[0].m_url); | ||
| 877 | vector<string> vcsOut = RunNoThrow("GetEpisodeList", scurl, fcurl, &vcsIn); | ||
| 878 | |||
| 879 | // parse the XML response | ||
| 880 | for (vector<string>::const_iterator i = vcsOut.begin(); i != vcsOut.end(); ++i) | ||
| 881 | { | ||
| 882 | CXBMCTinyXML doc; | ||
| 883 | doc.Parse(*i); | ||
| 884 | if (!doc.RootElement()) | ||
| 885 | { | ||
| 886 | CLog::Log(LOGERROR, "%s: Unable to parse XML",__FUNCTION__); | ||
| 887 | continue; | ||
| 888 | } | ||
| 889 | |||
| 890 | TiXmlHandle xhDoc(&doc); | ||
| 891 | for (TiXmlElement *pxeMovie = xhDoc.FirstChild("episodeguide").FirstChild("episode"). | ||
| 892 | Element(); pxeMovie; pxeMovie = pxeMovie->NextSiblingElement()) | ||
| 893 | { | ||
| 894 | EPISODE ep; | ||
| 895 | TiXmlElement *pxeLink = pxeMovie->FirstChildElement("url"); | ||
| 896 | std::string strEpNum; | ||
| 897 | if (pxeLink && XMLUtils::GetInt(pxeMovie, "season", ep.iSeason) && | ||
| 898 | XMLUtils::GetString(pxeMovie, "epnum", strEpNum) && !strEpNum.empty()) | ||
| 899 | { | ||
| 900 | CScraperUrl &scurlEp(ep.cScraperUrl); | ||
| 901 | size_t dot = strEpNum.find("."); | ||
| 902 | ep.iEpisode = atoi(strEpNum.c_str()); | ||
| 903 | ep.iSubepisode = (dot != std::string::npos) ? atoi(strEpNum.substr(dot + 1).c_str()) : 0; | ||
| 904 | if (!XMLUtils::GetString(pxeMovie, "title", scurlEp.strTitle) || scurlEp.strTitle.empty() ) | ||
| 905 | scurlEp.strTitle = g_localizeStrings.Get(416); | ||
| 906 | XMLUtils::GetString(pxeMovie, "id", scurlEp.strId); | ||
| 907 | |||
| 908 | for ( ; pxeLink && pxeLink->FirstChild(); pxeLink = pxeLink->NextSiblingElement("url")) | ||
| 909 | scurlEp.ParseElement(pxeLink); | ||
| 910 | |||
| 911 | // date must be the format of yyyy-mm-dd | ||
| 912 | ep.cDate.SetValid(FALSE); | ||
| 913 | std::string sDate; | ||
| 914 | if (XMLUtils::GetString(pxeMovie, "aired", sDate) && sDate.length() == 10) | ||
| 915 | { | ||
| 916 | tm tm; | ||
| 917 | if (strptime(sDate.c_str(), "%Y-%m-%d", &tm)) | ||
| 918 | ep.cDate.SetDate(1900+tm.tm_year, tm.tm_mon + 1, tm.tm_mday); | ||
| 919 | } | ||
| 920 | vcep.push_back(ep); | ||
| 921 | } | ||
| 922 | } | ||
| 923 | } | ||
| 924 | |||
| 925 | return vcep; | ||
| 926 | } | ||
| 927 | |||
| 928 | // takes URL; returns true and populates video details on success, false otherwise | ||
| 929 | bool CScraper::GetVideoDetails(XFILE::CCurlFile &fcurl, const CScraperUrl &scurl, | ||
| 930 | bool fMovie/*else episode*/, CVideoInfoTag &video) | ||
| 931 | { | ||
| 932 | CLog::Log(LOGDEBUG, "%s: Reading %s '%s' using %s scraper " | ||
| 933 | "(file: '%s', content: '%s', version: '%s')", __FUNCTION__, | ||
| 934 | fMovie ? MediaTypeMovie : MediaTypeEpisode, scurl.m_url[0].m_url.c_str(), Name().c_str(), Path().c_str(), | ||
| 935 | ADDON::TranslateContent(Content()).c_str(), Version().asString().c_str()); | ||
| 936 | |||
| 937 | video.Reset(); | ||
| 938 | std::string sFunc = fMovie ? "GetDetails" : "GetEpisodeDetails"; | ||
| 939 | vector<string> vcsIn; | ||
| 940 | vcsIn.push_back(scurl.strId); | ||
| 941 | vcsIn.push_back(scurl.m_url[0].m_url); | ||
| 942 | vector<string> vcsOut = RunNoThrow(sFunc, scurl, fcurl, &vcsIn); | ||
| 943 | |||
| 944 | // parse XML output | ||
| 945 | bool fRet(false); | ||
| 946 | for (vector<string>::const_iterator i = vcsOut.begin(); i != vcsOut.end(); ++i) | ||
| 947 | { | ||
| 948 | CXBMCTinyXML doc; | ||
| 949 | doc.Parse(*i, TIXML_ENCODING_UTF8); | ||
| 950 | if (!doc.RootElement()) | ||
| 951 | { | ||
| 952 | CLog::Log(LOGERROR, "%s: Unable to parse XML", __FUNCTION__); | ||
| 953 | continue; | ||
| 954 | } | ||
| 955 | |||
| 956 | TiXmlHandle xhDoc(&doc); | ||
| 957 | TiXmlElement *pxeDetails = xhDoc.FirstChild("details").Element(); | ||
| 958 | if (!pxeDetails) | ||
| 959 | { | ||
| 960 | CLog::Log(LOGERROR, "%s: Invalid XML file (want <details>)", __FUNCTION__); | ||
| 961 | continue; | ||
| 962 | } | ||
| 963 | video.Load(pxeDetails, true/*fChain*/); | ||
| 964 | fRet = true; // but don't exit in case of chaining | ||
| 965 | } | ||
| 966 | return fRet; | ||
| 967 | } | ||
| 968 | |||
| 969 | // takes a URL; returns true and populates album on success, false otherwise | ||
| 970 | bool CScraper::GetAlbumDetails(CCurlFile &fcurl, const CScraperUrl &scurl, CAlbum &album) | ||
| 971 | { | ||
| 972 | CLog::Log(LOGDEBUG, "%s: Reading '%s' using %s scraper " | ||
| 973 | "(file: '%s', content: '%s', version: '%s')", __FUNCTION__, | ||
| 974 | scurl.m_url[0].m_url.c_str(), Name().c_str(), Path().c_str(), | ||
| 975 | ADDON::TranslateContent(Content()).c_str(), Version().asString().c_str()); | ||
| 976 | |||
| 977 | vector<string> vcsOut = RunNoThrow("GetAlbumDetails", scurl, fcurl); | ||
| 978 | |||
| 979 | // parse the returned XML into an album object (see CAlbum::Load for details) | ||
| 980 | bool fRet(false); | ||
| 981 | for (vector<string>::const_iterator i = vcsOut.begin(); i != vcsOut.end(); ++i) | ||
| 982 | { | ||
| 983 | CXBMCTinyXML doc; | ||
| 984 | doc.Parse(*i, TIXML_ENCODING_UTF8); | ||
| 985 | if (!doc.RootElement()) | ||
| 986 | { | ||
| 987 | CLog::Log(LOGERROR, "%s: Unable to parse XML", __FUNCTION__); | ||
| 988 | return false; | ||
| 989 | } | ||
| 990 | fRet = album.Load(doc.RootElement(), i != vcsOut.begin()); | ||
| 991 | } | ||
| 992 | return fRet; | ||
| 993 | } | ||
| 994 | |||
| 995 | // takes a URL (one returned from FindArtist), the original search string, and | ||
| 996 | // returns true and populates artist on success, false on failure | ||
| 997 | bool CScraper::GetArtistDetails(CCurlFile &fcurl, const CScraperUrl &scurl, | ||
| 998 | const std::string &sSearch, CArtist &artist) | ||
| 999 | { | ||
| 1000 | if (!scurl.m_url.size()) | ||
| 1001 | return false; | ||
| 1002 | |||
| 1003 | CLog::Log(LOGDEBUG, "%s: Reading '%s' ('%s') using %s scraper " | ||
| 1004 | "(file: '%s', content: '%s', version: '%s')", __FUNCTION__, | ||
| 1005 | scurl.m_url[0].m_url.c_str(), sSearch.c_str(), Name().c_str(), Path().c_str(), | ||
| 1006 | ADDON::TranslateContent(Content()).c_str(), Version().asString().c_str()); | ||
| 1007 | |||
| 1008 | // pass in the original search string for chaining to search other sites | ||
| 1009 | vector<string> vcIn; | ||
| 1010 | vcIn.push_back(sSearch); | ||
| 1011 | vcIn[0] = CURL::Encode(vcIn[0]); | ||
| 1012 | |||
| 1013 | vector<string> vcsOut = RunNoThrow("GetArtistDetails", scurl, fcurl, &vcIn); | ||
| 1014 | |||
| 1015 | // ok, now parse the xml file | ||
| 1016 | bool fRet(false); | ||
| 1017 | for (vector<string>::const_iterator i = vcsOut.begin(); i != vcsOut.end(); ++i) | ||
| 1018 | { | ||
| 1019 | CXBMCTinyXML doc; | ||
| 1020 | doc.Parse(*i, TIXML_ENCODING_UTF8); | ||
| 1021 | if (!doc.RootElement()) | ||
| 1022 | { | ||
| 1023 | CLog::Log(LOGERROR, "%s: Unable to parse XML", __FUNCTION__); | ||
| 1024 | return false; | ||
| 1025 | } | ||
| 1026 | |||
| 1027 | fRet = artist.Load(doc.RootElement(), i != vcsOut.begin()); | ||
| 1028 | } | ||
| 1029 | return fRet; | ||
| 1030 | } | ||
| 1031 | |||
| 1032 | } | ||
| 1033 | |||
