From 9d11b08ad61b1f0d6d7023ce403285d8662efaed Mon Sep 17 00:00:00 2001 From: manuel Date: Wed, 4 Mar 2015 00:23:39 +0100 Subject: sync with upstream --- xbmc/addons/Scraper.cpp | 1033 ----------------------------------------------- 1 file changed, 1033 deletions(-) delete mode 100644 xbmc/addons/Scraper.cpp (limited to 'xbmc/addons/Scraper.cpp') diff --git a/xbmc/addons/Scraper.cpp b/xbmc/addons/Scraper.cpp deleted file mode 100644 index 06f34f2..0000000 --- a/xbmc/addons/Scraper.cpp +++ /dev/null @@ -1,1033 +0,0 @@ -/* -* Copyright (C) 2005-2013 Team XBMC -* http://xbmc.org -* -* This Program is free software; you can redistribute it and/or modify -* it under the terms of the GNU General Public License as published by -* the Free Software Foundation; either version 2, or (at your option) -* any later version. -* -* This Program is distributed in the hope that it will be useful, -* but WITHOUT ANY WARRANTY; without even the implied warranty of -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -* GNU General Public License for more details. -* -* You should have received a copy of the GNU General Public License -* along with XBMC; see the file COPYING. If not, see -* . -* -*/ -#include "Scraper.h" -#include "filesystem/File.h" -#include "filesystem/Directory.h" -#include "filesystem/CurlFile.h" -#include "AddonManager.h" -#include "utils/ScraperParser.h" -#include "utils/ScraperUrl.h" -#include "utils/CharsetConverter.h" -#include "utils/log.h" -#include "music/infoscanner/MusicAlbumInfo.h" -#include "music/infoscanner/MusicArtistInfo.h" -#include "utils/fstrcmp.h" -#include "settings/AdvancedSettings.h" -#include "FileItem.h" -#include "utils/URIUtils.h" -#include "utils/XMLUtils.h" -#include "utils/StringUtils.h" -#include "music/MusicDatabase.h" -#include "video/VideoDatabase.h" -#include "music/Album.h" -#include "music/Artist.h" -#include "Util.h" -#include "URL.h" - -#include -#include - -using namespace std; -using namespace XFILE; -using namespace MUSIC_GRABBER; -using namespace VIDEO; - -namespace ADDON -{ - -typedef struct -{ - const char* name; - CONTENT_TYPE type; - int pretty; -} ContentMapping; - -static const ContentMapping content[] = - {{"unknown", CONTENT_NONE, 231 }, - {"albums", CONTENT_ALBUMS, 132 }, - {"music", CONTENT_ALBUMS, 132 }, - {"artists", CONTENT_ARTISTS, 133 }, - {"movies", CONTENT_MOVIES, 20342 }, - {"tvshows", CONTENT_TVSHOWS, 20343 }, - {"musicvideos", CONTENT_MUSICVIDEOS, 20389 }}; - -std::string TranslateContent(const CONTENT_TYPE &type, bool pretty/*=false*/) -{ - for (unsigned int index=0; index < ARRAY_SIZE(content); ++index) - { - const ContentMapping &map = content[index]; - if (type == map.type) - { - if (pretty && map.pretty) - return g_localizeStrings.Get(map.pretty); - else - return map.name; - } - } - return ""; -} - -CONTENT_TYPE TranslateContent(const std::string &string) -{ - for (unsigned int index=0; index < ARRAY_SIZE(content); ++index) - { - const ContentMapping &map = content[index]; - if (string == map.name) - return map.type; - } - return CONTENT_NONE; -} - -TYPE ScraperTypeFromContent(const CONTENT_TYPE &content) -{ - switch (content) - { - case CONTENT_ALBUMS: - return ADDON_SCRAPER_ALBUMS; - case CONTENT_ARTISTS: - return ADDON_SCRAPER_ARTISTS; - case CONTENT_MOVIES: - return ADDON_SCRAPER_MOVIES; - case CONTENT_MUSICVIDEOS: - return ADDON_SCRAPER_MUSICVIDEOS; - case CONTENT_TVSHOWS: - return ADDON_SCRAPER_TVSHOWS; - default: - return ADDON_UNKNOWN; - } -} - -// if the XML root is , throw CScraperError with enclosed /<message> values -static void CheckScraperError(const TiXmlElement *pxeRoot) -{ - if (!pxeRoot || stricmp(pxeRoot->Value(), "error")) - return; - std::string sTitle; - std::string sMessage; - XMLUtils::GetString(pxeRoot, "title", sTitle); - XMLUtils::GetString(pxeRoot, "message", sMessage); - throw CScraperError(sTitle, sMessage); -} - -CScraper::CScraper(const cp_extension_t *ext) : CAddon(ext), m_fLoaded(false) -{ - if (ext) - { - m_language = CAddonMgr::Get().GetExtValue(ext->configuration, "@language"); - m_requiressettings = CAddonMgr::Get().GetExtValue(ext->configuration,"@requiressettings") == "true"; - std::string persistence = CAddonMgr::Get().GetExtValue(ext->configuration, "@cachepersistence"); - if (!persistence.empty()) - m_persistence.SetFromTimeString(persistence); - } - switch (Type()) - { - case ADDON_SCRAPER_ALBUMS: - m_pathContent = CONTENT_ALBUMS; - break; - case ADDON_SCRAPER_ARTISTS: - m_pathContent = CONTENT_ARTISTS; - break; - case ADDON_SCRAPER_MOVIES: - m_pathContent = CONTENT_MOVIES; - break; - case ADDON_SCRAPER_MUSICVIDEOS: - m_pathContent = CONTENT_MUSICVIDEOS; - break; - case ADDON_SCRAPER_TVSHOWS: - m_pathContent = CONTENT_TVSHOWS; - break; - default: - m_pathContent = CONTENT_NONE; - break; - } -} - -AddonPtr CScraper::Clone() const -{ - return AddonPtr(new CScraper(*this)); -} - -CScraper::CScraper(const CScraper &rhs) - : CAddon(rhs), m_fLoaded(false), - m_language(rhs.m_language), - m_requiressettings(rhs.m_requiressettings), - m_persistence(rhs.m_persistence), - m_pathContent(rhs.m_pathContent) -{ -} - -bool CScraper::Supports(const CONTENT_TYPE &content) const -{ - return Type() == ScraperTypeFromContent(content); -} - -bool CScraper::SetPathSettings(CONTENT_TYPE content, const std::string& xml) -{ - m_pathContent = content; - if (!LoadSettings()) - return false; - - if (xml.empty()) - return true; - - CXBMCTinyXML doc; - doc.Parse(xml); - m_userSettingsLoaded = SettingsFromXML(doc); - - return m_userSettingsLoaded; -} - -std::string CScraper::GetPathSettings() -{ - if (!LoadSettings()) - return ""; - - stringstream stream; - CXBMCTinyXML doc; - SettingsToXML(doc); - if (doc.RootElement()) - stream << *doc.RootElement(); - - return stream.str(); -} - -void CScraper::ClearCache() -{ - std::string strCachePath = URIUtils::AddFileToFolder(g_advancedSettings.m_cachePath, "scrapers"); - - // create scraper cache dir if needed - if (!CDirectory::Exists(strCachePath)) - CDirectory::Create(strCachePath); - - strCachePath = URIUtils::AddFileToFolder(strCachePath, ID()); - URIUtils::AddSlashAtEnd(strCachePath); - - if (CDirectory::Exists(strCachePath)) - { - CFileItemList items; - CDirectory::GetDirectory(strCachePath,items); - for (int i=0;i<items.Size();++i) - { - // wipe cache - if (items[i]->m_dateTime + m_persistence <= CDateTime::GetCurrentDateTime()) - CFile::Delete(items[i]->GetPath()); - } - } - else - CDirectory::Create(strCachePath); -} - -// returns a vector of strings: the first is the XML output by the function; the rest -// is XML output by chained functions, possibly recursively -// the CCurlFile object is passed in so that URL fetches can be canceled from other threads -// throws CScraperError abort on internal failures (e.g., parse errors) -vector<string> CScraper::Run(const std::string& function, - const CScraperUrl& scrURL, - CCurlFile& http, - const vector<string>* extras) -{ - if (!Load()) - throw CScraperError(); - - std::string strXML = InternalRun(function,scrURL,http,extras); - if (strXML.empty()) - { - if (function != "NfoUrl" && function != "ResolveIDToUrl") - CLog::Log(LOGERROR, "%s: Unable to parse web site",__FUNCTION__); - throw CScraperError(); - } - - CLog::Log(LOGDEBUG,"scraper: %s returned %s",function.c_str(),strXML.c_str()); - - CXBMCTinyXML doc; - /* all data was converted to UTF-8 before being processed by scraper */ - doc.Parse(strXML, TIXML_ENCODING_UTF8); - if (!doc.RootElement()) - { - CLog::Log(LOGERROR, "%s: Unable to parse XML",__FUNCTION__); - throw CScraperError(); - } - - vector<string> result; - result.push_back(strXML); - TiXmlElement* xchain = doc.RootElement()->FirstChildElement(); - // skip children of the root element until <url> or <chain> - while (xchain && strcmp(xchain->Value(),"url") && strcmp(xchain->Value(),"chain")) - xchain = xchain->NextSiblingElement(); - while (xchain) - { - // <chain|url function="...">param</> - const char* szFunction = xchain->Attribute("function"); - if (szFunction) - { - CScraperUrl scrURL2; - vector<string> extras; - // for <chain>, pass the contained text as a parameter; for <url>, as URL content - if (strcmp(xchain->Value(),"chain")==0) - { - if (xchain->FirstChild()) - extras.push_back(xchain->FirstChild()->Value()); - } - else - scrURL2.ParseElement(xchain); - // Fix for empty chains. $$1 would still contain the - // previous value as there is no child of the xml node. - // since $$1 will always either contain the data from an - // url or the parameters to a chain, we can safely clear it here - // to fix this issue - m_parser.m_param[0].clear(); - vector<string> result2 = RunNoThrow(szFunction,scrURL2,http,&extras); - result.insert(result.end(),result2.begin(),result2.end()); - } - xchain = xchain->NextSiblingElement(); - // continue to skip past non-<url> or <chain> elements - while (xchain && strcmp(xchain->Value(),"url") && strcmp(xchain->Value(),"chain")) - xchain = xchain->NextSiblingElement(); - } - - return result; -} - -// just like Run, but returns an empty list instead of throwing in case of error -// don't use in new code; errors should be handled appropriately -vector<string> CScraper::RunNoThrow(const std::string& function, - const CScraperUrl& url, - XFILE::CCurlFile& http, - const vector<string>* extras) -{ - vector<string> vcs; - try - { - vcs = Run(function, url, http, extras); - } - catch (const CScraperError &sce) - { - assert(sce.FAborted()); // the only kind we should get - } - return vcs; -} - -std::string CScraper::InternalRun(const std::string& function, - const CScraperUrl& scrURL, - CCurlFile& http, - const vector<string>* extras) -{ - // walk the list of input URLs and fetch each into parser parameters - unsigned int i; - for (i=0;i<scrURL.m_url.size();++i) - { - if (!CScraperUrl::Get(scrURL.m_url[i],m_parser.m_param[i],http,ID()) || m_parser.m_param[i].size() == 0) - return ""; - } - // put the 'extra' parameterts into the parser parameter list too - if (extras) - { - for (unsigned int j=0;j<extras->size();++j) - m_parser.m_param[j+i] = (*extras)[j]; - } - - return m_parser.Parse(function,this); -} - -bool CScraper::Load() -{ - if (m_fLoaded) - return true; - - bool result=m_parser.Load(LibPath()); - if (result) - { - // TODO: this routine assumes that deps are a single level, and assumes the dep is installed. - // 1. Does it make sense to have recursive dependencies? - // 2. Should we be checking the dep versions or do we assume it is ok? - ADDONDEPS deps = GetDeps(); - ADDONDEPS::iterator itr = deps.begin(); - while (itr != deps.end()) - { - if (itr->first == "xbmc.metadata") - { - ++itr; - continue; - } - AddonPtr dep; - - bool bOptional = itr->second.second; - - if (CAddonMgr::Get().GetAddon((*itr).first, dep)) - { - CXBMCTinyXML doc; - if (dep->Type() == ADDON_SCRAPER_LIBRARY && doc.LoadFile(dep->LibPath())) - m_parser.AddDocument(&doc); - } - else - { - if (!bOptional) - { - result = false; - break; - } - } - ++itr; - } - } - - if (!result) - CLog::Log(LOGWARNING, "failed to load scraper XML from %s", LibPath().c_str()); - return m_fLoaded = result; -} - -bool CScraper::IsInUse() const -{ - if (Supports(CONTENT_ALBUMS) || Supports(CONTENT_ARTISTS)) - { // music scraper - CMusicDatabase db; - if (db.Open() && db.ScraperInUse(ID())) - return true; - } - else - { // video scraper - CVideoDatabase db; - if (db.Open() && db.ScraperInUse(ID())) - return true; - } - return false; -} - -bool CScraper::IsNoop() -{ - if (!Load()) - throw CScraperError(); - - return m_parser.IsNoop(); -} - -// pass in contents of .nfo file; returns URL (possibly empty if none found) -// and may populate strId, or throws CScraperError on error -CScraperUrl CScraper::NfoUrl(const std::string &sNfoContent) -{ - CScraperUrl scurlRet; - - if (IsNoop()) - return scurlRet; - - // scraper function takes contents of .nfo file, returns XML (see below) - vector<string> vcsIn; - vcsIn.push_back(sNfoContent); - CScraperUrl scurl; - CCurlFile fcurl; - vector<string> vcsOut = Run("NfoUrl", scurl, fcurl, &vcsIn); - if (vcsOut.empty() || vcsOut[0].empty()) - return scurlRet; - if (vcsOut.size() > 1) - CLog::Log(LOGWARNING, "%s: scraper returned multiple results; using first", __FUNCTION__); - - // parse returned XML: either <error> element on error, blank on failure, - // or <url>...</url> or <url>...</url><id>...</id> on success - for (unsigned int i=0; i < vcsOut.size(); ++i) - { - CXBMCTinyXML doc; - doc.Parse(vcsOut[i], TIXML_ENCODING_UTF8); - CheckScraperError(doc.RootElement()); - - if (doc.RootElement()) - { - /* - NOTE: Scrapers might return invalid xml with some loose - elements (eg. '<url>http://some.url</url><id>123</id>'). - Since XMLUtils::GetString() is assuming well formed xml - with start and end-tags we're not able to use it. - Check for the desired Elements instead. - */ - TiXmlElement* pxeUrl=NULL; - TiXmlElement* pId=NULL; - if (!strcmp(doc.RootElement()->Value(),"details")) - { - pxeUrl = doc.RootElement()->FirstChildElement("url"); - pId = doc.RootElement()->FirstChildElement("id"); - } - else - { - pId = doc.FirstChildElement("id"); - pxeUrl = doc.FirstChildElement("url"); - } - if (pId && pId->FirstChild()) - scurlRet.strId = pId->FirstChild()->Value(); - - if (pxeUrl && pxeUrl->Attribute("function")) - continue; - - if (pxeUrl) - scurlRet.ParseElement(pxeUrl); - else if (!strcmp(doc.RootElement()->Value(), "url")) - scurlRet.ParseElement(doc.RootElement()); - else - continue; - break; - } - } - return scurlRet; -} - -CScraperUrl CScraper::ResolveIDToUrl(const std::string& externalID) -{ - CScraperUrl scurlRet; - - // scraper function takes an external ID, returns XML (see below) - vector<string> vcsIn; - vcsIn.push_back(externalID); - CScraperUrl scurl; - CCurlFile fcurl; - vector<string> vcsOut = Run("ResolveIDToUrl", scurl, fcurl, &vcsIn); - if (vcsOut.empty() || vcsOut[0].empty()) - return scurlRet; - if (vcsOut.size() > 1) - CLog::Log(LOGWARNING, "%s: scraper returned multiple results; using first", __FUNCTION__); - - // parse returned XML: either <error> element on error, blank on failure, - // or <url>...</url> or <url>...</url><id>...</id> on success - for (unsigned int i=0; i < vcsOut.size(); ++i) - { - CXBMCTinyXML doc; - doc.Parse(vcsOut[i], TIXML_ENCODING_UTF8); - CheckScraperError(doc.RootElement()); - - if (doc.RootElement()) - { - /* - NOTE: Scrapers might return invalid xml with some loose - elements (eg. '<url>http://some.url</url><id>123</id>'). - Since XMLUtils::GetString() is assuming well formed xml - with start and end-tags we're not able to use it. - Check for the desired Elements instead. - */ - TiXmlElement* pxeUrl=NULL; - TiXmlElement* pId=NULL; - if (!strcmp(doc.RootElement()->Value(),"details")) - { - pxeUrl = doc.RootElement()->FirstChildElement("url"); - pId = doc.RootElement()->FirstChildElement("id"); - } - else - { - pId = doc.FirstChildElement("id"); - pxeUrl = doc.FirstChildElement("url"); - } - if (pId && pId->FirstChild()) - scurlRet.strId = pId->FirstChild()->Value(); - - if (pxeUrl && pxeUrl->Attribute("function")) - continue; - - if (pxeUrl) - scurlRet.ParseElement(pxeUrl); - else if (!strcmp(doc.RootElement()->Value(), "url")) - scurlRet.ParseElement(doc.RootElement()); - else - continue; - break; - } - } - return scurlRet; -} - -static bool RelevanceSortFunction(const CScraperUrl &left, const CScraperUrl &right) -{ - return left.relevance > right.relevance; -} - -// fetch list of matching movies sorted by relevance (may be empty); -// throws CScraperError on error; first called with fFirst set, then unset if first try fails -std::vector<CScraperUrl> CScraper::FindMovie(XFILE::CCurlFile &fcurl, const std::string &sMovie, - bool fFirst) -{ - // prepare parameters for URL creation - std::string sTitle, sTitleYear, sYear; - CUtil::CleanString(sMovie, sTitle, sTitleYear, sYear, true/*fRemoveExt*/, fFirst); - - CLog::Log(LOGDEBUG, "%s: Searching for '%s' using %s scraper " - "(path: '%s', content: '%s', version: '%s')", __FUNCTION__, sTitle.c_str(), - Name().c_str(), Path().c_str(), - ADDON::TranslateContent(Content()).c_str(), Version().asString().c_str()); - - std::vector<CScraperUrl> vcscurl; - if (IsNoop()) - return vcscurl; - - if (!fFirst) - StringUtils::Replace(sTitle, '-',' '); - - vector<string> vcsIn(1); - g_charsetConverter.utf8To(SearchStringEncoding(), sTitle, vcsIn[0]); - vcsIn[0] = CURL::Encode(vcsIn[0]); - if (fFirst && !sYear.empty()) - vcsIn.push_back(sYear); - - // request a search URL from the title/filename/etc. - CScraperUrl scurl; - vector<string> vcsOut = Run("CreateSearchUrl", scurl, fcurl, &vcsIn); - if (vcsOut.empty()) - { - CLog::Log(LOGDEBUG, "%s: CreateSearchUrl failed", __FUNCTION__); - throw CScraperError(); - } - scurl.ParseString(vcsOut[0]); - - // do the search, and parse the result into a list - vcsIn.clear(); - vcsIn.push_back(scurl.m_url[0].m_url); - vcsOut = Run("GetSearchResults", scurl, fcurl, &vcsIn); - - bool fSort(true); - std::set<std::string> stsDupeCheck; - bool fResults(false); - for (vector<string>::const_iterator i = vcsOut.begin(); i != vcsOut.end(); ++i) - { - CXBMCTinyXML doc; - doc.Parse(*i, TIXML_ENCODING_UTF8); - if (!doc.RootElement()) - { - CLog::Log(LOGERROR, "%s: Unable to parse XML", __FUNCTION__); - continue; // might have more valid results later - } - - CheckScraperError(doc.RootElement()); - - TiXmlHandle xhDoc(&doc); - TiXmlHandle xhResults = xhDoc.FirstChild("results"); - if (!xhResults.Element()) - continue; - fResults = true; // even if empty - - // we need to sort if returned results don't specify 'sorted="yes"' - if (fSort) - { - const char *sorted = xhResults.Element()->Attribute("sorted"); - if (sorted != NULL) - fSort = !StringUtils::EqualsNoCase(sorted, "yes"); - } - - for (TiXmlElement *pxeMovie = xhResults.FirstChild("entity").Element(); - pxeMovie; pxeMovie = pxeMovie->NextSiblingElement()) - { - CScraperUrl scurlMovie; - TiXmlNode *pxnTitle = pxeMovie->FirstChild("title"); - TiXmlElement *pxeLink = pxeMovie->FirstChildElement("url"); - if (pxnTitle && pxnTitle->FirstChild() && pxeLink && pxeLink->FirstChild()) - { - scurlMovie.strTitle = pxnTitle->FirstChild()->Value(); - XMLUtils::GetString(pxeMovie, "id", scurlMovie.strId); - - for ( ; pxeLink && pxeLink->FirstChild(); pxeLink = pxeLink->NextSiblingElement("url")) - scurlMovie.ParseElement(pxeLink); - - // calculate the relavance of this hit - std::string sCompareTitle = scurlMovie.strTitle; - StringUtils::ToLower(sCompareTitle); - std::string sMatchTitle = sTitle; - StringUtils::ToLower(sMatchTitle); - - /* - * Identify the best match by performing a fuzzy string compare on the search term and - * the result. Additionally, use the year (if available) to further refine the best match. - * An exact match scores 1, a match off by a year scores 0.5 (release dates can vary between - * countries), otherwise it scores 0. - */ - std::string sCompareYear; - XMLUtils::GetString(pxeMovie, "year", sCompareYear); - - double yearScore = 0; - if (!sYear.empty() && !sCompareYear.empty()) - yearScore = std::max(0.0, 1-0.5*abs(atoi(sYear.c_str())-atoi(sCompareYear.c_str()))); - - scurlMovie.relevance = fstrcmp(sMatchTitle.c_str(), sCompareTitle.c_str(), 0.0) + yearScore; - - // reconstruct a title for the user - if (!sCompareYear.empty()) - scurlMovie.strTitle += StringUtils::Format(" (%s)", sCompareYear.c_str()); - - std::string sLanguage; - if (XMLUtils::GetString(pxeMovie, "language", sLanguage) && !sLanguage.empty()) - scurlMovie.strTitle += StringUtils::Format(" (%s)", sLanguage.c_str()); - - // filter for dupes from naughty scrapers - if (stsDupeCheck.insert(scurlMovie.m_url[0].m_url + " " + scurlMovie.strTitle).second) - vcscurl.push_back(scurlMovie); - } - } - } - - if (!fResults) - throw CScraperError(); // scraper aborted - - if (fSort) - std::stable_sort(vcscurl.begin(), vcscurl.end(), RelevanceSortFunction); - - return vcscurl; -} - -// find album by artist, using fcurl for web fetches -// returns a list of albums (empty if no match or failure) -std::vector<CMusicAlbumInfo> CScraper::FindAlbum(CCurlFile &fcurl, const std::string &sAlbum, - const std::string &sArtist) -{ - CLog::Log(LOGDEBUG, "%s: Searching for '%s - %s' using %s scraper " - "(path: '%s', content: '%s', version: '%s')", __FUNCTION__, sArtist.c_str(), - sAlbum.c_str(), Name().c_str(), Path().c_str(), - ADDON::TranslateContent(Content()).c_str(), Version().asString().c_str()); - - std::vector<CMusicAlbumInfo> vcali; - if (IsNoop()) - return vcali; - - // scraper function is given the album and artist as parameters and - // returns an XML <url> element parseable by CScraperUrl - std::vector<string> extras(2); - g_charsetConverter.utf8To(SearchStringEncoding(), sAlbum, extras[0]); - g_charsetConverter.utf8To(SearchStringEncoding(), sArtist, extras[1]); - extras[0] = CURL::Encode(extras[0]); - extras[1] = CURL::Encode(extras[1]); - CScraperUrl scurl; - vector<string> vcsOut = RunNoThrow("CreateAlbumSearchUrl", scurl, fcurl, &extras); - if (vcsOut.size() > 1) - CLog::Log(LOGWARNING, "%s: scraper returned multiple results; using first", __FUNCTION__); - - if (vcsOut.empty() || vcsOut[0].empty()) - return vcali; - scurl.ParseString(vcsOut[0]); - - // the next function is passed the contents of the returned URL, and returns - // an empty string on failure; on success, returns XML matches in the form: - // <results> - // <entity> - // <title>... - // ... (with the usual CScraperUrl decorations like post or spoof) - // ... - // ... - // ... (scale defaults to 1; score is divided by it) - // - // ... - // - vcsOut = RunNoThrow("GetAlbumSearchResults", scurl, fcurl); - - // parse the returned XML into a vector of album objects - for (vector::const_iterator i = vcsOut.begin(); i != vcsOut.end(); ++i) - { - CXBMCTinyXML doc; - doc.Parse(*i, TIXML_ENCODING_UTF8); - TiXmlHandle xhDoc(&doc); - - for (TiXmlElement* pxeAlbum = xhDoc.FirstChild("results").FirstChild("entity").Element(); - pxeAlbum; pxeAlbum = pxeAlbum->NextSiblingElement()) - { - std::string sTitle; - if (XMLUtils::GetString(pxeAlbum, "title", sTitle) && !sTitle.empty()) - { - std::string sArtist; - std::string sAlbumName; - if (XMLUtils::GetString(pxeAlbum, "artist", sArtist) && !sArtist.empty()) - sAlbumName = StringUtils::Format("%s - %s", sArtist.c_str(), sTitle.c_str()); - else - sAlbumName = sTitle; - - std::string sYear; - if (XMLUtils::GetString(pxeAlbum, "year", sYear) && !sYear.empty()) - sAlbumName = StringUtils::Format("%s (%s)", sAlbumName.c_str(), sYear.c_str()); - - // if no URL is provided, use the URL we got back from CreateAlbumSearchUrl - // (e.g., in case we only got one result back and were sent to the detail page) - TiXmlElement* pxeLink = pxeAlbum->FirstChildElement("url"); - CScraperUrl scurlAlbum; - if (!pxeLink) - scurlAlbum.ParseString(scurl.m_xml); - for ( ; pxeLink && pxeLink->FirstChild(); pxeLink = pxeLink->NextSiblingElement("url")) - scurlAlbum.ParseElement(pxeLink); - - if (!scurlAlbum.m_url.size()) - continue; - - CMusicAlbumInfo ali(sTitle, sArtist, sAlbumName, scurlAlbum); - - TiXmlElement* pxeRel = pxeAlbum->FirstChildElement("relevance"); - if (pxeRel && pxeRel->FirstChild()) - { - const char* szScale = pxeRel->Attribute("scale"); - float flScale = szScale ? float(atof(szScale)) : 1; - ali.SetRelevance(float(atof(pxeRel->FirstChild()->Value())) / flScale); - } - - vcali.push_back(ali); - } - } - } - return vcali; -} - -// find artist, using fcurl for web fetches -// returns a list of artists (empty if no match or failure) -std::vector CScraper::FindArtist(CCurlFile &fcurl, - const std::string &sArtist) -{ - CLog::Log(LOGDEBUG, "%s: Searching for '%s' using %s scraper " - "(file: '%s', content: '%s', version: '%s')", __FUNCTION__, sArtist.c_str(), - Name().c_str(), Path().c_str(), - ADDON::TranslateContent(Content()).c_str(), Version().asString().c_str()); - - std::vector vcari; - if (IsNoop()) - return vcari; - - // scraper function is given the artist as parameter and - // returns an XML element parseable by CScraperUrl - std::vector extras(1); - g_charsetConverter.utf8To(SearchStringEncoding(), sArtist, extras[0]); - extras[0] = CURL::Encode(extras[0]); - CScraperUrl scurl; - vector vcsOut = RunNoThrow("CreateArtistSearchUrl", scurl, fcurl, &extras); - - if (vcsOut.empty() || vcsOut[0].empty()) - return vcari; - scurl.ParseString(vcsOut[0]); - - // the next function is passed the contents of the returned URL, and returns - // an empty string on failure; on success, returns XML matches in the form: - // - // - // ... - // ... - // ... - // ... (with the usual CScraperUrl decorations like post or spoof) - // - // ... - // - vcsOut = RunNoThrow("GetArtistSearchResults", scurl, fcurl); - - // parse the returned XML into a vector of artist objects - for (vector::const_iterator i = vcsOut.begin(); i != vcsOut.end(); ++i) - { - CXBMCTinyXML doc; - doc.Parse(*i, TIXML_ENCODING_UTF8); - if (!doc.RootElement()) - { - CLog::Log(LOGERROR, "%s: Unable to parse XML", __FUNCTION__); - return vcari; - } - TiXmlHandle xhDoc(&doc); - for (TiXmlElement* pxeArtist = xhDoc.FirstChild("results").FirstChild("entity").Element(); - pxeArtist; pxeArtist = pxeArtist->NextSiblingElement()) - { - TiXmlNode* pxnTitle = pxeArtist->FirstChild("title"); - if (pxnTitle && pxnTitle->FirstChild()) - { - CScraperUrl scurlArtist; - - TiXmlElement* pxeLink = pxeArtist->FirstChildElement("url"); - if (!pxeLink) - scurlArtist.ParseString(scurl.m_xml); - for ( ; pxeLink && pxeLink->FirstChild(); pxeLink = pxeLink->NextSiblingElement("url")) - scurlArtist.ParseElement(pxeLink); - - if (!scurlArtist.m_url.size()) - continue; - - CMusicArtistInfo ari(pxnTitle->FirstChild()->Value(), scurlArtist); - std::string genre; - XMLUtils::GetString(pxeArtist, "genre", genre); - if (!genre.empty()) - ari.GetArtist().genre = StringUtils::Split(genre, g_advancedSettings.m_musicItemSeparator); - XMLUtils::GetString(pxeArtist, "year", ari.GetArtist().strBorn); - - vcari.push_back(ari); - } - } - } - return vcari; -} - -// fetch list of episodes from URL (from video database) -EPISODELIST CScraper::GetEpisodeList(XFILE::CCurlFile &fcurl, const CScraperUrl &scurl) -{ - EPISODELIST vcep; - if (scurl.m_url.empty()) - return vcep; - - CLog::Log(LOGDEBUG, "%s: Searching '%s' using %s scraper " - "(file: '%s', content: '%s', version: '%s')", __FUNCTION__, - scurl.m_url[0].m_url.c_str(), Name().c_str(), Path().c_str(), - ADDON::TranslateContent(Content()).c_str(), Version().asString().c_str()); - - vector vcsIn; - vcsIn.push_back(scurl.m_url[0].m_url); - vector vcsOut = RunNoThrow("GetEpisodeList", scurl, fcurl, &vcsIn); - - // parse the XML response - for (vector::const_iterator i = vcsOut.begin(); i != vcsOut.end(); ++i) - { - CXBMCTinyXML doc; - doc.Parse(*i); - if (!doc.RootElement()) - { - CLog::Log(LOGERROR, "%s: Unable to parse XML",__FUNCTION__); - continue; - } - - TiXmlHandle xhDoc(&doc); - for (TiXmlElement *pxeMovie = xhDoc.FirstChild("episodeguide").FirstChild("episode"). - Element(); pxeMovie; pxeMovie = pxeMovie->NextSiblingElement()) - { - EPISODE ep; - TiXmlElement *pxeLink = pxeMovie->FirstChildElement("url"); - std::string strEpNum; - if (pxeLink && XMLUtils::GetInt(pxeMovie, "season", ep.iSeason) && - XMLUtils::GetString(pxeMovie, "epnum", strEpNum) && !strEpNum.empty()) - { - CScraperUrl &scurlEp(ep.cScraperUrl); - size_t dot = strEpNum.find("."); - ep.iEpisode = atoi(strEpNum.c_str()); - ep.iSubepisode = (dot != std::string::npos) ? atoi(strEpNum.substr(dot + 1).c_str()) : 0; - if (!XMLUtils::GetString(pxeMovie, "title", scurlEp.strTitle) || scurlEp.strTitle.empty() ) - scurlEp.strTitle = g_localizeStrings.Get(416); - XMLUtils::GetString(pxeMovie, "id", scurlEp.strId); - - for ( ; pxeLink && pxeLink->FirstChild(); pxeLink = pxeLink->NextSiblingElement("url")) - scurlEp.ParseElement(pxeLink); - - // date must be the format of yyyy-mm-dd - ep.cDate.SetValid(FALSE); - std::string sDate; - if (XMLUtils::GetString(pxeMovie, "aired", sDate) && sDate.length() == 10) - { - tm tm; - if (strptime(sDate.c_str(), "%Y-%m-%d", &tm)) - ep.cDate.SetDate(1900+tm.tm_year, tm.tm_mon + 1, tm.tm_mday); - } - vcep.push_back(ep); - } - } - } - - return vcep; -} - -// takes URL; returns true and populates video details on success, false otherwise -bool CScraper::GetVideoDetails(XFILE::CCurlFile &fcurl, const CScraperUrl &scurl, - bool fMovie/*else episode*/, CVideoInfoTag &video) -{ - CLog::Log(LOGDEBUG, "%s: Reading %s '%s' using %s scraper " - "(file: '%s', content: '%s', version: '%s')", __FUNCTION__, - fMovie ? MediaTypeMovie : MediaTypeEpisode, scurl.m_url[0].m_url.c_str(), Name().c_str(), Path().c_str(), - ADDON::TranslateContent(Content()).c_str(), Version().asString().c_str()); - - video.Reset(); - std::string sFunc = fMovie ? "GetDetails" : "GetEpisodeDetails"; - vector vcsIn; - vcsIn.push_back(scurl.strId); - vcsIn.push_back(scurl.m_url[0].m_url); - vector vcsOut = RunNoThrow(sFunc, scurl, fcurl, &vcsIn); - - // parse XML output - bool fRet(false); - for (vector::const_iterator i = vcsOut.begin(); i != vcsOut.end(); ++i) - { - CXBMCTinyXML doc; - doc.Parse(*i, TIXML_ENCODING_UTF8); - if (!doc.RootElement()) - { - CLog::Log(LOGERROR, "%s: Unable to parse XML", __FUNCTION__); - continue; - } - - TiXmlHandle xhDoc(&doc); - TiXmlElement *pxeDetails = xhDoc.FirstChild("details").Element(); - if (!pxeDetails) - { - CLog::Log(LOGERROR, "%s: Invalid XML file (want
)", __FUNCTION__); - continue; - } - video.Load(pxeDetails, true/*fChain*/); - fRet = true; // but don't exit in case of chaining - } - return fRet; -} - -// takes a URL; returns true and populates album on success, false otherwise -bool CScraper::GetAlbumDetails(CCurlFile &fcurl, const CScraperUrl &scurl, CAlbum &album) -{ - CLog::Log(LOGDEBUG, "%s: Reading '%s' using %s scraper " - "(file: '%s', content: '%s', version: '%s')", __FUNCTION__, - scurl.m_url[0].m_url.c_str(), Name().c_str(), Path().c_str(), - ADDON::TranslateContent(Content()).c_str(), Version().asString().c_str()); - - vector vcsOut = RunNoThrow("GetAlbumDetails", scurl, fcurl); - - // parse the returned XML into an album object (see CAlbum::Load for details) - bool fRet(false); - for (vector::const_iterator i = vcsOut.begin(); i != vcsOut.end(); ++i) - { - CXBMCTinyXML doc; - doc.Parse(*i, TIXML_ENCODING_UTF8); - if (!doc.RootElement()) - { - CLog::Log(LOGERROR, "%s: Unable to parse XML", __FUNCTION__); - return false; - } - fRet = album.Load(doc.RootElement(), i != vcsOut.begin()); - } - return fRet; -} - -// takes a URL (one returned from FindArtist), the original search string, and -// returns true and populates artist on success, false on failure -bool CScraper::GetArtistDetails(CCurlFile &fcurl, const CScraperUrl &scurl, - const std::string &sSearch, CArtist &artist) -{ - if (!scurl.m_url.size()) - return false; - - CLog::Log(LOGDEBUG, "%s: Reading '%s' ('%s') using %s scraper " - "(file: '%s', content: '%s', version: '%s')", __FUNCTION__, - scurl.m_url[0].m_url.c_str(), sSearch.c_str(), Name().c_str(), Path().c_str(), - ADDON::TranslateContent(Content()).c_str(), Version().asString().c_str()); - - // pass in the original search string for chaining to search other sites - vector vcIn; - vcIn.push_back(sSearch); - vcIn[0] = CURL::Encode(vcIn[0]); - - vector vcsOut = RunNoThrow("GetArtistDetails", scurl, fcurl, &vcIn); - - // ok, now parse the xml file - bool fRet(false); - for (vector::const_iterator i = vcsOut.begin(); i != vcsOut.end(); ++i) - { - CXBMCTinyXML doc; - doc.Parse(*i, TIXML_ENCODING_UTF8); - if (!doc.RootElement()) - { - CLog::Log(LOGERROR, "%s: Unable to parse XML", __FUNCTION__); - return false; - } - - fRet = artist.Load(doc.RootElement(), i != vcsOut.begin()); - } - return fRet; -} - -} - -- cgit v1.2.3