1 files changed, 432 insertions, 0 deletions
diff --git a/xbmc/utils/ScraperUrl.cpp b/xbmc/utils/ScraperUrl.cpp
new file mode 100644
index 0000000..f242a40
--- /dev/null
+++ b/xbmc/utils/ScraperUrl.cpp
@@ -0,0 +1,432 @@
+/*
+ *  Copyright (C) 2005-2018 Team Kodi
+ *  This file is part of Kodi - https://kodi.tv
+ *
+ *  SPDX-License-Identifier: GPL-2.0-or-later
+ *  See LICENSES/README.md for more information.
+ */
+#include "ScraperUrl.h"
+#include "CharsetConverter.h"
+#include "ServiceBroker.h"
+#include "URIUtils.h"
+#include "URL.h"
+#include "XMLUtils.h"
+#include "filesystem/CurlFile.h"
+#include "filesystem/ZipFile.h"
+#include "settings/AdvancedSettings.h"
+#include "settings/SettingsComponent.h"
+#include "utils/CharsetDetection.h"
+#include "utils/Mime.h"
+#include "utils/StringUtils.h"
+#include "utils/XBMCTinyXML.h"
+#include "utils/log.h"
+#include <algorithm>
+#include <cstring>
+#include <sstream>
+CScraperUrl::CScraperUrl() : m_relevance(0.0), m_parsed(false)
+{
+}
+CScraperUrl::CScraperUrl(std::string strUrl) : CScraperUrl()
+{
+  ParseFromData(std::move(strUrl));
+}
+CScraperUrl::CScraperUrl(const TiXmlElement* element) : CScraperUrl()
+{
+  ParseAndAppendUrl(element);
+}
+CScraperUrl::~CScraperUrl() = default;
+void CScraperUrl::Clear()
+{
+  m_urls.clear();
+  m_data.clear();
+  m_relevance = 0.0;
+  m_parsed = false;
+}
+void CScraperUrl::SetData(std::string data)
+{
+  m_data = std::move(data);
+  m_parsed = false;
+}
+const CScraperUrl::SUrlEntry CScraperUrl::GetFirstUrlByType(const std::string& type) const
+{
+  const auto url = std::find_if(m_urls.begin(), m_urls.end(), [type](const SUrlEntry& url) {
+    return url.m_type == UrlType::General && (type.empty() || url.m_aspect == type);
+  });
+  if (url != m_urls.end())
+    return *url;
+  return SUrlEntry();
+}
+const CScraperUrl::SUrlEntry CScraperUrl::GetSeasonUrl(int season, const std::string& type) const
+{
+  const auto url = std::find_if(m_urls.begin(), m_urls.end(), [season, type](const SUrlEntry& url) {
+    return url.m_type == UrlType::Season && url.m_season == season &&
+           (type.empty() || type == "thumb" || url.m_aspect == type);
+  });
+  if (url != m_urls.end())
+    return *url;
+  return SUrlEntry();
+}
+unsigned int CScraperUrl::GetMaxSeasonUrl() const
+{
+  unsigned int maxSeason = 0;
+  for (const auto& url : m_urls)
+  {
+    if (url.m_type == UrlType::Season && url.m_season > 0 &&
+        static_cast<unsigned int>(url.m_season) > maxSeason)
+      maxSeason = url.m_season;
+  }
+  return maxSeason;
+}
+std::string CScraperUrl::GetFirstThumbUrl() const
+{
+  if (m_urls.empty())
+    return {};
+  return GetThumbUrl(m_urls.front());
+}
+void CScraperUrl::GetThumbUrls(std::vector<std::string>& thumbs,
+                               const std::string& type,
+                               int season,
+                               bool unique) const
+{
+  for (const auto& url : m_urls)
+  {
+    if (url.m_aspect == type || type.empty() || url.m_aspect.empty())
+    {
+      if ((url.m_type == CScraperUrl::UrlType::General && season == -1) ||
+          (url.m_type == CScraperUrl::UrlType::Season && url.m_season == season))
+      {
+        std::string thumbUrl = GetThumbUrl(url);
+        if (!unique || std::find(thumbs.begin(), thumbs.end(), thumbUrl) == thumbs.end())
+          thumbs.push_back(thumbUrl);
+      }
+    }
+  }
+}
+bool CScraperUrl::Parse()
+{
+  if (m_parsed)
+    return true;
+  auto dataToParse = m_data;
+  m_data.clear();
+  return ParseFromData(std::move(dataToParse));
+}
+bool CScraperUrl::ParseFromData(std::string data)
+{
+  if (data.empty())
+    return false;
+  CXBMCTinyXML doc;
+  /* strUrl is coming from internal sources (usually generated by scraper or from database)
+   * so strUrl is always in UTF-8 */
+  doc.Parse(data, TIXML_ENCODING_UTF8);
+  auto pElement = doc.RootElement();
+  if (pElement == nullptr)
+  {
+    m_urls.emplace_back(data);
+    m_data = data;
+  }
+  else
+  {
+    while (pElement != nullptr)
+    {
+      ParseAndAppendUrl(pElement);
+      pElement = pElement->NextSiblingElement(pElement->Value());
+    }
+  }
+  m_parsed = true;
+  return true;
+}
+bool CScraperUrl::ParseAndAppendUrl(const TiXmlElement* element)
+{
+  if (element == nullptr || element->FirstChild() == nullptr ||
+      element->FirstChild()->Value() == nullptr)
+    return false;
+  bool wasEmpty = m_data.empty();
+  std::stringstream stream;
+  stream << *element;
+  m_data += stream.str();
+  SUrlEntry url(element->FirstChild()->ValueStr());
+  url.m_spoof = XMLUtils::GetAttribute(element, "spoof");
+  const char* szPost = element->Attribute("post");
+  if (szPost && StringUtils::CompareNoCase(szPost, "yes") == 0)
+    url.m_post = true;
+  else
+    url.m_post = false;
+  const char* szIsGz = element->Attribute("gzip");
+  if (szIsGz && StringUtils::CompareNoCase(szIsGz, "yes") == 0)
+    url.m_isgz = true;
+  else
+    url.m_isgz = false;
+  url.m_cache = XMLUtils::GetAttribute(element, "cache");
+  const char* szType = element->Attribute("type");
+  if (szType && StringUtils::CompareNoCase(szType, "season") == 0)
+  {
+    url.m_type = UrlType::Season;
+    const char* szSeason = element->Attribute("season");
+    if (szSeason)
+      url.m_season = atoi(szSeason);
+  }
+  url.m_aspect = XMLUtils::GetAttribute(element, "aspect");
+  m_urls.push_back(url);
+  if (wasEmpty)
+    m_parsed = true;
+  return true;
+}
+// XML format is of strUrls is:
+// <TAG><url>...</url>...</TAG> (parsed by ParseElement) or <url>...</url> (ditto)
+bool CScraperUrl::ParseAndAppendUrlsFromEpisodeGuide(std::string episodeGuide)
+{
+  if (episodeGuide.empty())
+    return false;
+  // ok, now parse the xml file
+  CXBMCTinyXML doc;
+  /* strUrls is coming from internal sources so strUrls is always in UTF-8 */
+  doc.Parse(episodeGuide, TIXML_ENCODING_UTF8);
+  if (doc.RootElement() == nullptr)
+    return false;
+  bool wasEmpty = m_data.empty();
+  TiXmlHandle docHandle(&doc);
+  auto link = docHandle.FirstChild("episodeguide").Element();
+  if (link->FirstChildElement("url"))
+  {
+    for (link = link->FirstChildElement("url"); link; link = link->NextSiblingElement("url"))
+      ParseAndAppendUrl(link);
+  }
+  else if (link->FirstChild() && link->FirstChild()->Value())
+    ParseAndAppendUrl(link);
+  if (wasEmpty)
+    m_parsed = true;
+  return true;
+}
+void CScraperUrl::AddParsedUrl(std::string url,
+                               std::string aspect,
+                               std::string preview,
+                               std::string referrer,
+                               std::string cache,
+                               bool post,
+                               bool isgz,
+                               int season)
+{
+  bool wasEmpty = m_data.empty();
+  TiXmlElement thumb("thumb");
+  thumb.SetAttribute("spoof", referrer);
+  thumb.SetAttribute("cache", cache);
+  if (post)
+    thumb.SetAttribute("post", "yes");
+  if (isgz)
+    thumb.SetAttribute("gzip", "yes");
+  if (season >= 0)
+  {
+    thumb.SetAttribute("season", StringUtils::Format("%i", season));
+    thumb.SetAttribute("type", "season");
+  }
+  thumb.SetAttribute("aspect", aspect);
+  thumb.SetAttribute("preview", preview);
+  TiXmlText text(url);
+  thumb.InsertEndChild(text);
+  m_data << thumb;
+  SUrlEntry nUrl(url);
+  nUrl.m_spoof = referrer;
+  nUrl.m_post = post;
+  nUrl.m_isgz = isgz;
+  nUrl.m_cache = cache;
+  if (season >= 0)
+  {
+    nUrl.m_type = UrlType::Season;
+    nUrl.m_season = season;
+  }
+  nUrl.m_aspect = aspect;
+  m_urls.push_back(nUrl);
+  if (wasEmpty)
+    m_parsed = true;
+}
+std::string CScraperUrl::GetThumbUrl(const CScraperUrl::SUrlEntry& entry)
+{
+  if (entry.m_spoof.empty())
+    return entry.m_url;
+  return entry.m_url + "|Referer=" + CURL::Encode(entry.m_spoof);
+}
+bool CScraperUrl::Get(const SUrlEntry& scrURL,
+                      std::string& strHTML,
+                      XFILE::CCurlFile& http,
+                      const std::string& cacheContext)
+{
+  CURL url(scrURL.m_url);
+  http.SetReferer(scrURL.m_spoof);
+  std::string strCachePath;
+  if (!scrURL.m_cache.empty())
+  {
+    strCachePath = URIUtils::AddFileToFolder(
+        CServiceBroker::GetSettingsComponent()->GetAdvancedSettings()->m_cachePath, "scrapers",
+        cacheContext, scrURL.m_cache);
+    if (XFILE::CFile::Exists(strCachePath))
+    {
+      XFILE::CFile file;
+      XFILE::auto_buffer buffer;
+      if (file.LoadFile(strCachePath, buffer) > 0)
+      {
+        strHTML.assign(buffer.get(), buffer.length());
+        return true;
+      }
+    }
+  }
+  auto strHTML1 = strHTML;
+  if (scrURL.m_post)
+  {
+    std::string strOptions = url.GetOptions();
+    strOptions = strOptions.substr(1);
+    url.SetOptions("");
+    if (!http.Post(url.Get(), strOptions, strHTML1))
+      return false;
+  }
+  else if (!http.Get(url.Get(), strHTML1))
+    return false;
+  strHTML = strHTML1;
+  const auto mimeType = http.GetProperty(XFILE::FILE_PROPERTY_MIME_TYPE);
+  CMime::EFileType ftype = CMime::GetFileTypeFromMime(mimeType);
+  if (ftype == CMime::FileTypeUnknown)
+    ftype = CMime::GetFileTypeFromContent(strHTML);
+  if (ftype == CMime::FileTypeZip || ftype == CMime::FileTypeGZip)
+  {
+    XFILE::CZipFile file;
+    std::string strBuffer;
+    auto iSize = file.UnpackFromMemory(
+        strBuffer, strHTML, scrURL.m_isgz); // FIXME: use FileTypeGZip instead of scrURL.m_isgz?
+    if (iSize > 0)
+    {
+      strHTML = strBuffer;
+      CLog::Log(LOGDEBUG, "{}: Archive \"{}\" was unpacked in memory", __FUNCTION__, scrURL.m_url);
+    }
+    else
+      CLog::Log(LOGWARNING, "{}: \"{}\" looks like archive but cannot be unpacked", __FUNCTION__,
+                scrURL.m_url);
+  }
+  const auto reportedCharset = http.GetProperty(XFILE::FILE_PROPERTY_CONTENT_CHARSET);
+  if (ftype == CMime::FileTypeHtml)
+  {
+    std::string realHtmlCharset, converted;
+    if (!CCharsetDetection::ConvertHtmlToUtf8(strHTML, converted, reportedCharset, realHtmlCharset))
+      CLog::Log(LOGWARNING,
+                "{}: Can't find precise charset for HTML \"{}\", using \"{}\" as fallback",
+                __FUNCTION__, scrURL.m_url, realHtmlCharset);
+    else
+      CLog::Log(LOGDEBUG, "{}: Using \"{}\" charset for HTML \"{}\"", __FUNCTION__, realHtmlCharset,
+                scrURL.m_url);
+    strHTML = converted;
+  }
+  else if (ftype == CMime::FileTypeXml)
+  {
+    CXBMCTinyXML xmlDoc;
+    xmlDoc.Parse(strHTML, reportedCharset);
+    const auto realXmlCharset = xmlDoc.GetUsedCharset();
+    if (!realXmlCharset.empty())
+    {
+      CLog::Log(LOGDEBUG, "{}: Using \"{}\" charset for XML \"{}\"", __FUNCTION__, realXmlCharset,
+                scrURL.m_url);
+      std::string converted;
+      g_charsetConverter.ToUtf8(realXmlCharset, strHTML, converted);
+      strHTML = converted;
+    }
+  }
+  else if (ftype == CMime::FileTypePlainText ||
+           StringUtils::EqualsNoCase(mimeType.substr(0, 5), "text/"))
+  {
+    std::string realTextCharset;
+    std::string converted;
+    CCharsetDetection::ConvertPlainTextToUtf8(strHTML, converted, reportedCharset, realTextCharset);
+    strHTML = converted;
+    if (reportedCharset != realTextCharset)
+      CLog::Log(LOGWARNING,
+                "{}: Using \"{}\" charset for plain text \"{}\" instead of server reported \"{}\" "
+                "charset",
+                __FUNCTION__, realTextCharset, scrURL.m_url, reportedCharset);
+    else
+      CLog::Log(LOGDEBUG, "{}: Using \"{}\" charset for plain text \"{}\"", __FUNCTION__,
+                realTextCharset, scrURL.m_url);
+  }
+  else if (!reportedCharset.empty())
+  {
+    CLog::Log(LOGDEBUG, "{}: Using \"{}\" charset for \"{}\"", __FUNCTION__, reportedCharset,
+              scrURL.m_url);
+    if (reportedCharset != "UTF-8")
+    {
+      std::string converted;
+      g_charsetConverter.ToUtf8(reportedCharset, strHTML, converted);
+      strHTML = converted;
+    }
+  }
+  else
+    CLog::Log(LOGDEBUG, "{}: Using content of \"{}\" as binary or text with \"UTF-8\" charset",
+              __FUNCTION__, scrURL.m_url);
+  if (!scrURL.m_cache.empty())
+  {
+    const auto strCachePath = URIUtils::AddFileToFolder(
+        CServiceBroker::GetSettingsComponent()->GetAdvancedSettings()->m_cachePath, "scrapers",
+        cacheContext, scrURL.m_cache);
+    XFILE::CFile file;
+    if (!file.OpenForWrite(strCachePath, true) ||
+        file.Write(strHTML.data(), strHTML.size()) != static_cast<ssize_t>(strHTML.size()))
+      return false;
+  }
+  return true;
+}

diff --git a/xbmc/utils/ScraperUrl.cpp b/xbmc/utils/ScraperUrl.cpp new file mode 100644 index 0000000..f242a40 --- /dev/null +++ b/xbmc/utils/ScraperUrl.cpp
@@ -0,0 +1,432 @@
	1	/*
	2	* Copyright (C) 2005-2018 Team Kodi
	3	* This file is part of Kodi - https://kodi.tv
	4	*
	5	* SPDX-License-Identifier: GPL-2.0-or-later
	6	* See LICENSES/README.md for more information.
	7	*/
	8
	9	#include "ScraperUrl.h"
	10
	11	#include "CharsetConverter.h"
	12	#include "ServiceBroker.h"
	13	#include "URIUtils.h"
	14	#include "URL.h"
	15	#include "XMLUtils.h"
	16	#include "filesystem/CurlFile.h"
	17	#include "filesystem/ZipFile.h"
	18	#include "settings/AdvancedSettings.h"
	19	#include "settings/SettingsComponent.h"
	20	#include "utils/CharsetDetection.h"
	21	#include "utils/Mime.h"
	22	#include "utils/StringUtils.h"
	23	#include "utils/XBMCTinyXML.h"
	24	#include "utils/log.h"
	25
	26	#include <algorithm>
	27	#include <cstring>
	28	#include <sstream>
	29
	30	CScraperUrl::CScraperUrl() : m_relevance(0.0), m_parsed(false)
	31	{
	32	}
	33
	34	CScraperUrl::CScraperUrl(std::string strUrl) : CScraperUrl()
	35	{
	36	ParseFromData(std::move(strUrl));
	37	}
	38
	39	CScraperUrl::CScraperUrl(const TiXmlElement* element) : CScraperUrl()
	40	{
	41	ParseAndAppendUrl(element);
	42	}
	43
	44	CScraperUrl::~CScraperUrl() = default;
	45
	46	void CScraperUrl::Clear()
	47	{
	48	m_urls.clear();
	49	m_data.clear();
	50	m_relevance = 0.0;
	51	m_parsed = false;
	52	}
	53
	54	void CScraperUrl::SetData(std::string data)
	55	{
	56	m_data = std::move(data);
	57	m_parsed = false;
	58	}
	59
	60	const CScraperUrl::SUrlEntry CScraperUrl::GetFirstUrlByType(const std::string& type) const
	61	{
	62	const auto url = std::find_if(m_urls.begin(), m_urls.end(), [type](const SUrlEntry& url) {
	63	return url.m_type == UrlType::General && (type.empty() \|\| url.m_aspect == type);
	64	});
	65	if (url != m_urls.end())
	66	return *url;
	67
	68	return SUrlEntry();
	69	}
	70
	71	const CScraperUrl::SUrlEntry CScraperUrl::GetSeasonUrl(int season, const std::string& type) const
	72	{
	73	const auto url = std::find_if(m_urls.begin(), m_urls.end(), [season, type](const SUrlEntry& url) {
	74	return url.m_type == UrlType::Season && url.m_season == season &&
	75	(type.empty() \|\| type == "thumb" \|\| url.m_aspect == type);
	76	});
	77	if (url != m_urls.end())
	78	return *url;
	79
	80	return SUrlEntry();
	81	}
	82
	83	unsigned int CScraperUrl::GetMaxSeasonUrl() const
	84	{
	85	unsigned int maxSeason = 0;
	86	for (const auto& url : m_urls)
	87	{
	88	if (url.m_type == UrlType::Season && url.m_season > 0 &&
	89	static_cast<unsigned int>(url.m_season) > maxSeason)
	90	maxSeason = url.m_season;
	91	}
	92	return maxSeason;
	93	}
	94
	95	std::string CScraperUrl::GetFirstThumbUrl() const
	96	{
	97	if (m_urls.empty())
	98	return {};
	99
	100	return GetThumbUrl(m_urls.front());
	101	}
	102
	103	void CScraperUrl::GetThumbUrls(std::vector<std::string>& thumbs,
	104	const std::string& type,
	105	int season,
	106	bool unique) const
	107	{
	108	for (const auto& url : m_urls)
	109	{
	110	if (url.m_aspect == type \|\| type.empty() \|\| url.m_aspect.empty())
	111	{
	112	if ((url.m_type == CScraperUrl::UrlType::General && season == -1) \|\|
	113	(url.m_type == CScraperUrl::UrlType::Season && url.m_season == season))
	114	{
	115	std::string thumbUrl = GetThumbUrl(url);
	116	if (!unique \|\| std::find(thumbs.begin(), thumbs.end(), thumbUrl) == thumbs.end())
	117	thumbs.push_back(thumbUrl);
	118	}
	119	}
	120	}
	121	}
	122
	123	bool CScraperUrl::Parse()
	124	{
	125	if (m_parsed)
	126	return true;
	127
	128	auto dataToParse = m_data;
	129	m_data.clear();
	130	return ParseFromData(std::move(dataToParse));
	131	}
	132
	133	bool CScraperUrl::ParseFromData(std::string data)
	134	{
	135	if (data.empty())
	136	return false;
	137
	138	CXBMCTinyXML doc;
	139	/* strUrl is coming from internal sources (usually generated by scraper or from database)
	140	* so strUrl is always in UTF-8 */
	141	doc.Parse(data, TIXML_ENCODING_UTF8);
	142
	143	auto pElement = doc.RootElement();
	144	if (pElement == nullptr)
	145	{
	146	m_urls.emplace_back(data);
	147	m_data = data;
	148	}
	149	else
	150	{
	151	while (pElement != nullptr)
	152	{
	153	ParseAndAppendUrl(pElement);
	154	pElement = pElement->NextSiblingElement(pElement->Value());
	155	}
	156	}
	157
	158	m_parsed = true;
	159	return true;
	160	}
	161
	162	bool CScraperUrl::ParseAndAppendUrl(const TiXmlElement* element)
	163	{
	164	if (element == nullptr \|\| element->FirstChild() == nullptr \|\|
	165	element->FirstChild()->Value() == nullptr)
	166	return false;
	167
	168	bool wasEmpty = m_data.empty();
	169
	170	std::stringstream stream;
	171	stream << *element;
	172	m_data += stream.str();
	173
	174	SUrlEntry url(element->FirstChild()->ValueStr());
	175	url.m_spoof = XMLUtils::GetAttribute(element, "spoof");
	176
	177	const char* szPost = element->Attribute("post");
	178	if (szPost && StringUtils::CompareNoCase(szPost, "yes") == 0)
	179	url.m_post = true;
	180	else
	181	url.m_post = false;
	182
	183	const char* szIsGz = element->Attribute("gzip");
	184	if (szIsGz && StringUtils::CompareNoCase(szIsGz, "yes") == 0)
	185	url.m_isgz = true;
	186	else
	187	url.m_isgz = false;
	188
	189	url.m_cache = XMLUtils::GetAttribute(element, "cache");
	190
	191	const char* szType = element->Attribute("type");
	192	if (szType && StringUtils::CompareNoCase(szType, "season") == 0)
	193	{
	194	url.m_type = UrlType::Season;
	195	const char* szSeason = element->Attribute("season");
	196	if (szSeason)
	197	url.m_season = atoi(szSeason);
	198	}
	199
	200	url.m_aspect = XMLUtils::GetAttribute(element, "aspect");
	201
	202	m_urls.push_back(url);
	203
	204	if (wasEmpty)
	205	m_parsed = true;
	206
	207	return true;
	208	}
	209
	210	// XML format is of strUrls is:
	211	// <TAG><url>...</url>...</TAG> (parsed by ParseElement) or <url>...</url> (ditto)
	212	bool CScraperUrl::ParseAndAppendUrlsFromEpisodeGuide(std::string episodeGuide)
	213	{
	214	if (episodeGuide.empty())
	215	return false;
	216
	217	// ok, now parse the xml file
	218	CXBMCTinyXML doc;
	219	/* strUrls is coming from internal sources so strUrls is always in UTF-8 */
	220	doc.Parse(episodeGuide, TIXML_ENCODING_UTF8);
	221	if (doc.RootElement() == nullptr)
	222	return false;
	223
	224	bool wasEmpty = m_data.empty();
	225
	226	TiXmlHandle docHandle(&doc);
	227	auto link = docHandle.FirstChild("episodeguide").Element();
	228	if (link->FirstChildElement("url"))
	229	{
	230	for (link = link->FirstChildElement("url"); link; link = link->NextSiblingElement("url"))
	231	ParseAndAppendUrl(link);
	232	}
	233	else if (link->FirstChild() && link->FirstChild()->Value())
	234	ParseAndAppendUrl(link);
	235
	236	if (wasEmpty)
	237	m_parsed = true;
	238
	239	return true;
	240	}
	241
	242	void CScraperUrl::AddParsedUrl(std::string url,
	243	std::string aspect,
	244	std::string preview,
	245	std::string referrer,
	246	std::string cache,
	247	bool post,
	248	bool isgz,
	249	int season)
	250	{
	251	bool wasEmpty = m_data.empty();
	252
	253	TiXmlElement thumb("thumb");
	254	thumb.SetAttribute("spoof", referrer);
	255	thumb.SetAttribute("cache", cache);
	256	if (post)
	257	thumb.SetAttribute("post", "yes");
	258	if (isgz)
	259	thumb.SetAttribute("gzip", "yes");
	260	if (season >= 0)
	261	{
	262	thumb.SetAttribute("season", StringUtils::Format("%i", season));
	263	thumb.SetAttribute("type", "season");
	264	}
	265	thumb.SetAttribute("aspect", aspect);
	266	thumb.SetAttribute("preview", preview);
	267	TiXmlText text(url);
	268	thumb.InsertEndChild(text);
	269
	270	m_data << thumb;
	271
	272	SUrlEntry nUrl(url);
	273	nUrl.m_spoof = referrer;
	274	nUrl.m_post = post;
	275	nUrl.m_isgz = isgz;
	276	nUrl.m_cache = cache;
	277	if (season >= 0)
	278	{
	279	nUrl.m_type = UrlType::Season;
	280	nUrl.m_season = season;
	281	}
	282	nUrl.m_aspect = aspect;
	283
	284	m_urls.push_back(nUrl);
	285
	286	if (wasEmpty)
	287	m_parsed = true;
	288	}
	289
	290	std::string CScraperUrl::GetThumbUrl(const CScraperUrl::SUrlEntry& entry)
	291	{
	292	if (entry.m_spoof.empty())
	293	return entry.m_url;
	294
	295	return entry.m_url + "\|Referer=" + CURL::Encode(entry.m_spoof);
	296	}
	297
	298	bool CScraperUrl::Get(const SUrlEntry& scrURL,
	299	std::string& strHTML,
	300	XFILE::CCurlFile& http,
	301	const std::string& cacheContext)
	302	{
	303	CURL url(scrURL.m_url);
	304	http.SetReferer(scrURL.m_spoof);
	305	std::string strCachePath;
	306
	307	if (!scrURL.m_cache.empty())
	308	{
	309	strCachePath = URIUtils::AddFileToFolder(
	310	CServiceBroker::GetSettingsComponent()->GetAdvancedSettings()->m_cachePath, "scrapers",
	311	cacheContext, scrURL.m_cache);
	312	if (XFILE::CFile::Exists(strCachePath))
	313	{
	314	XFILE::CFile file;
	315	XFILE::auto_buffer buffer;
	316	if (file.LoadFile(strCachePath, buffer) > 0)
	317	{
	318	strHTML.assign(buffer.get(), buffer.length());
	319	return true;
	320	}
	321	}
	322	}
	323
	324	auto strHTML1 = strHTML;
	325
	326	if (scrURL.m_post)
	327	{
	328	std::string strOptions = url.GetOptions();
	329	strOptions = strOptions.substr(1);
	330	url.SetOptions("");
	331
	332	if (!http.Post(url.Get(), strOptions, strHTML1))
	333	return false;
	334	}
	335	else if (!http.Get(url.Get(), strHTML1))
	336	return false;
	337
	338	strHTML = strHTML1;
	339
	340	const auto mimeType = http.GetProperty(XFILE::FILE_PROPERTY_MIME_TYPE);
	341	CMime::EFileType ftype = CMime::GetFileTypeFromMime(mimeType);
	342	if (ftype == CMime::FileTypeUnknown)
	343	ftype = CMime::GetFileTypeFromContent(strHTML);
	344
	345	if (ftype == CMime::FileTypeZip \|\| ftype == CMime::FileTypeGZip)
	346	{
	347	XFILE::CZipFile file;
	348	std::string strBuffer;
	349	auto iSize = file.UnpackFromMemory(
	350	strBuffer, strHTML, scrURL.m_isgz); // FIXME: use FileTypeGZip instead of scrURL.m_isgz?
	351	if (iSize > 0)
	352	{
	353	strHTML = strBuffer;
	354	CLog::Log(LOGDEBUG, "{}: Archive \"{}\" was unpacked in memory", __FUNCTION__, scrURL.m_url);
	355	}
	356	else
	357	CLog::Log(LOGWARNING, "{}: \"{}\" looks like archive but cannot be unpacked", __FUNCTION__,
	358	scrURL.m_url);
	359	}
	360
	361	const auto reportedCharset = http.GetProperty(XFILE::FILE_PROPERTY_CONTENT_CHARSET);
	362	if (ftype == CMime::FileTypeHtml)
	363	{
	364	std::string realHtmlCharset, converted;
	365	if (!CCharsetDetection::ConvertHtmlToUtf8(strHTML, converted, reportedCharset, realHtmlCharset))
	366	CLog::Log(LOGWARNING,
	367	"{}: Can't find precise charset for HTML \"{}\", using \"{}\" as fallback",
	368	__FUNCTION__, scrURL.m_url, realHtmlCharset);
	369	else
	370	CLog::Log(LOGDEBUG, "{}: Using \"{}\" charset for HTML \"{}\"", __FUNCTION__, realHtmlCharset,
	371	scrURL.m_url);
	372
	373	strHTML = converted;
	374	}
	375	else if (ftype == CMime::FileTypeXml)
	376	{
	377	CXBMCTinyXML xmlDoc;
	378	xmlDoc.Parse(strHTML, reportedCharset);
	379
	380	const auto realXmlCharset = xmlDoc.GetUsedCharset();
	381	if (!realXmlCharset.empty())
	382	{
	383	CLog::Log(LOGDEBUG, "{}: Using \"{}\" charset for XML \"{}\"", __FUNCTION__, realXmlCharset,
	384	scrURL.m_url);
	385	std::string converted;
	386	g_charsetConverter.ToUtf8(realXmlCharset, strHTML, converted);
	387	strHTML = converted;
	388	}
	389	}
	390	else if (ftype == CMime::FileTypePlainText \|\|
	391	StringUtils::EqualsNoCase(mimeType.substr(0, 5), "text/"))
	392	{
	393	std::string realTextCharset;
	394	std::string converted;
	395	CCharsetDetection::ConvertPlainTextToUtf8(strHTML, converted, reportedCharset, realTextCharset);
	396	strHTML = converted;
	397	if (reportedCharset != realTextCharset)
	398	CLog::Log(LOGWARNING,
	399	"{}: Using \"{}\" charset for plain text \"{}\" instead of server reported \"{}\" "
	400	"charset",
	401	__FUNCTION__, realTextCharset, scrURL.m_url, reportedCharset);
	402	else
	403	CLog::Log(LOGDEBUG, "{}: Using \"{}\" charset for plain text \"{}\"", __FUNCTION__,
	404	realTextCharset, scrURL.m_url);
	405	}
	406	else if (!reportedCharset.empty())
	407	{
	408	CLog::Log(LOGDEBUG, "{}: Using \"{}\" charset for \"{}\"", __FUNCTION__, reportedCharset,
	409	scrURL.m_url);
	410	if (reportedCharset != "UTF-8")
	411	{
	412	std::string converted;
	413	g_charsetConverter.ToUtf8(reportedCharset, strHTML, converted);
	414	strHTML = converted;
	415	}
	416	}
	417	else
	418	CLog::Log(LOGDEBUG, "{}: Using content of \"{}\" as binary or text with \"UTF-8\" charset",
	419	__FUNCTION__, scrURL.m_url);
	420
	421	if (!scrURL.m_cache.empty())
	422	{
	423	const auto strCachePath = URIUtils::AddFileToFolder(
	424	CServiceBroker::GetSettingsComponent()->GetAdvancedSettings()->m_cachePath, "scrapers",
	425	cacheContext, scrURL.m_cache);
	426	XFILE::CFile file;
	427	if (!file.OpenForWrite(strCachePath, true) \|\|
	428	file.Write(strHTML.data(), strHTML.size()) != static_cast<ssize_t>(strHTML.size()))
	429	return false;
	430	}
	431	return true;
	432	}