summaryrefslogtreecommitdiffstats
path: root/xbmc/utils/XBMCTinyXML.cpp
diff options
context:
space:
mode:
authormanuel <manuel@mausz.at>2020-10-19 00:52:24 +0200
committermanuel <manuel@mausz.at>2020-10-19 00:52:24 +0200
commitbe933ef2241d79558f91796cc5b3a161f72ebf9c (patch)
treefe3ab2f130e20c99001f2d7a81d610c78c96a3f4 /xbmc/utils/XBMCTinyXML.cpp
parent5f8335c1e49ce108ef3481863833c98efa00411b (diff)
downloadkodi-pvr-build-be933ef2241d79558f91796cc5b3a161f72ebf9c.tar.gz
kodi-pvr-build-be933ef2241d79558f91796cc5b3a161f72ebf9c.tar.bz2
kodi-pvr-build-be933ef2241d79558f91796cc5b3a161f72ebf9c.zip
sync with upstream
Diffstat (limited to 'xbmc/utils/XBMCTinyXML.cpp')
-rw-r--r--xbmc/utils/XBMCTinyXML.cpp264
1 files changed, 264 insertions, 0 deletions
diff --git a/xbmc/utils/XBMCTinyXML.cpp b/xbmc/utils/XBMCTinyXML.cpp
new file mode 100644
index 0000000..6180522
--- /dev/null
+++ b/xbmc/utils/XBMCTinyXML.cpp
@@ -0,0 +1,264 @@
1/*
2 * Copyright (C) 2005-2018 Team Kodi
3 * This file is part of Kodi - https://kodi.tv
4 *
5 * SPDX-License-Identifier: GPL-2.0-or-later
6 * See LICENSES/README.md for more information.
7 */
8
9#include "XBMCTinyXML.h"
10
11#include "LangInfo.h"
12#include "RegExp.h"
13#include "filesystem/File.h"
14#include "utils/CharsetConverter.h"
15#include "utils/CharsetDetection.h"
16#include "utils/StringUtils.h"
17#include "utils/Utf8Utils.h"
18#include "utils/log.h"
19
20#define MAX_ENTITY_LENGTH 8 // size of largest entity "&#xNNNN;"
21#define BUFFER_SIZE 4096
22
23CXBMCTinyXML::CXBMCTinyXML()
24: TiXmlDocument()
25{
26}
27
28CXBMCTinyXML::CXBMCTinyXML(const char *documentName)
29: TiXmlDocument(documentName)
30{
31}
32
33CXBMCTinyXML::CXBMCTinyXML(const std::string& documentName)
34: TiXmlDocument(documentName)
35{
36}
37
38CXBMCTinyXML::CXBMCTinyXML(const std::string& documentName, const std::string& documentCharset)
39: TiXmlDocument(documentName), m_SuggestedCharset(documentCharset)
40{
41 StringUtils::ToUpper(m_SuggestedCharset);
42}
43
44bool CXBMCTinyXML::LoadFile(TiXmlEncoding encoding)
45{
46 return LoadFile(value, encoding);
47}
48
49bool CXBMCTinyXML::LoadFile(const char *_filename, TiXmlEncoding encoding)
50{
51 return LoadFile(std::string(_filename), encoding);
52}
53
54bool CXBMCTinyXML::LoadFile(const std::string& _filename, TiXmlEncoding encoding)
55{
56 value = _filename.c_str();
57
58 XFILE::CFile file;
59 XFILE::auto_buffer buffer;
60
61 if (file.LoadFile(value, buffer) <= 0)
62 {
63 SetError(TIXML_ERROR_OPENING_FILE, NULL, NULL, TIXML_ENCODING_UNKNOWN);
64 return false;
65 }
66
67 // Delete the existing data:
68 Clear();
69 location.Clear();
70
71 std::string data(buffer.get(), buffer.length());
72 buffer.clear(); // free memory early
73
74 if (encoding == TIXML_ENCODING_UNKNOWN)
75 Parse(data, file.GetProperty(XFILE::FILE_PROPERTY_CONTENT_CHARSET));
76 else
77 Parse(data, encoding);
78
79 if (Error())
80 return false;
81 return true;
82}
83
84bool CXBMCTinyXML::LoadFile(const std::string& _filename, const std::string& documentCharset)
85{
86 m_SuggestedCharset = documentCharset;
87 StringUtils::ToUpper(m_SuggestedCharset);
88 return LoadFile(_filename, TIXML_ENCODING_UNKNOWN);
89}
90
91bool CXBMCTinyXML::LoadFile(FILE *f, TiXmlEncoding encoding)
92{
93 std::string data;
94 char buf[BUFFER_SIZE];
95 memset(buf, 0, BUFFER_SIZE);
96 int result;
97 while ((result = fread(buf, 1, BUFFER_SIZE, f)) > 0)
98 data.append(buf, result);
99 return Parse(data, encoding);
100}
101
102bool CXBMCTinyXML::SaveFile(const char *_filename) const
103{
104 return SaveFile(std::string(_filename));
105}
106
107bool CXBMCTinyXML::SaveFile(const std::string& filename) const
108{
109 XFILE::CFile file;
110 if (file.OpenForWrite(filename, true))
111 {
112 TiXmlPrinter printer;
113 Accept(&printer);
114 bool suc = file.Write(printer.CStr(), printer.Size()) == static_cast<ssize_t>(printer.Size());
115 if (suc)
116 file.Flush();
117
118 return suc;
119 }
120 return false;
121}
122
123bool CXBMCTinyXML::Parse(const std::string& data, const std::string& dataCharset)
124{
125 m_SuggestedCharset = dataCharset;
126 StringUtils::ToUpper(m_SuggestedCharset);
127 return Parse(data, TIXML_ENCODING_UNKNOWN);
128}
129
130bool CXBMCTinyXML::Parse(const std::string& data, TiXmlEncoding encoding /*= TIXML_DEFAULT_ENCODING */)
131{
132 m_UsedCharset.clear();
133 if (encoding != TIXML_ENCODING_UNKNOWN)
134 { // encoding != TIXML_ENCODING_UNKNOWN means "do not use m_SuggestedCharset and charset detection"
135 m_SuggestedCharset.clear();
136 if (encoding == TIXML_ENCODING_UTF8)
137 m_UsedCharset = "UTF-8";
138
139 return InternalParse(data, encoding);
140 }
141
142 if (!m_SuggestedCharset.empty() && TryParse(data, m_SuggestedCharset))
143 return true;
144
145 std::string detectedCharset;
146 if (CCharsetDetection::DetectXmlEncoding(data, detectedCharset) && TryParse(data, detectedCharset))
147 {
148 if (!m_SuggestedCharset.empty())
149 CLog::Log(LOGWARNING, "%s: \"%s\" charset was used instead of suggested charset \"%s\" for %s", __FUNCTION__, m_UsedCharset.c_str(), m_SuggestedCharset.c_str(),
150 (value.empty() ? "XML data" : ("file \"" + value + "\"").c_str()));
151
152 return true;
153 }
154
155 // check for valid UTF-8
156 if (m_SuggestedCharset != "UTF-8" && detectedCharset != "UTF-8" && CUtf8Utils::isValidUtf8(data) &&
157 TryParse(data, "UTF-8"))
158 {
159 if (!m_SuggestedCharset.empty())
160 CLog::Log(LOGWARNING, "%s: \"%s\" charset was used instead of suggested charset \"%s\" for %s", __FUNCTION__, m_UsedCharset.c_str(), m_SuggestedCharset.c_str(),
161 (value.empty() ? "XML data" : ("file \"" + value + "\"").c_str()));
162 else if (!detectedCharset.empty())
163 CLog::Log(LOGWARNING, "%s: \"%s\" charset was used instead of detected charset \"%s\" for %s", __FUNCTION__, m_UsedCharset.c_str(), detectedCharset.c_str(),
164 (value.empty() ? "XML data" : ("file \"" + value + "\"").c_str()));
165 return true;
166 }
167
168 // fallback: try user GUI charset
169 if (TryParse(data, g_langInfo.GetGuiCharSet()))
170 {
171 if (!m_SuggestedCharset.empty())
172 CLog::Log(LOGWARNING, "%s: \"%s\" charset was used instead of suggested charset \"%s\" for %s", __FUNCTION__, m_UsedCharset.c_str(), m_SuggestedCharset.c_str(),
173 (value.empty() ? "XML data" : ("file \"" + value + "\"").c_str()));
174 else if (!detectedCharset.empty())
175 CLog::Log(LOGWARNING, "%s: \"%s\" charset was used instead of detected charset \"%s\" for %s", __FUNCTION__, m_UsedCharset.c_str(), detectedCharset.c_str(),
176 (value.empty() ? "XML data" : ("file \"" + value + "\"").c_str()));
177 return true;
178 }
179
180 // can't detect correct data charset, try to process data as is
181 if (InternalParse(data, TIXML_ENCODING_UNKNOWN))
182 {
183 if (!m_SuggestedCharset.empty())
184 CLog::Log(LOGWARNING, "%s: Processed %s as unknown encoding instead of suggested \"%s\"", __FUNCTION__,
185 (value.empty() ? "XML data" : ("file \"" + value + "\"").c_str()), m_SuggestedCharset.c_str());
186 else if (!detectedCharset.empty())
187 CLog::Log(LOGWARNING, "%s: Processed %s as unknown encoding instead of detected \"%s\"", __FUNCTION__,
188 (value.empty() ? "XML data" : ("file \"" + value + "\"").c_str()), detectedCharset.c_str());
189 return true;
190 }
191
192 return false;
193}
194
195bool CXBMCTinyXML::TryParse(const std::string& data, const std::string& tryDataCharset)
196{
197 if (tryDataCharset == "UTF-8")
198 InternalParse(data, TIXML_ENCODING_UTF8); // process data without conversion
199 else if (!tryDataCharset.empty())
200 {
201 std::string converted;
202 /* some wrong conversions can leave US-ASCII XML header and structure untouched but break non-English data
203 * so conversion must fail on wrong character and then other encodings will be tried */
204 if (!g_charsetConverter.ToUtf8(tryDataCharset, data, converted, true) || converted.empty())
205 return false; // can't convert data
206
207 InternalParse(converted, TIXML_ENCODING_UTF8);
208 }
209 else
210 InternalParse(data, TIXML_ENCODING_LEGACY);
211
212 // 'Error()' contains result of last run of 'TiXmlDocument::Parse()'
213 if (Error())
214 {
215 Clear();
216 location.Clear();
217
218 return false;
219 }
220
221 m_UsedCharset = tryDataCharset;
222 return true;
223}
224
225bool CXBMCTinyXML::InternalParse(const std::string& rawdata, TiXmlEncoding encoding /*= TIXML_DEFAULT_ENCODING */)
226{
227 // Preprocess string, replacing '&' with '&amp; for invalid XML entities
228 size_t pos = rawdata.find('&');
229 if (pos == std::string::npos)
230 return (TiXmlDocument::Parse(rawdata.c_str(), NULL, encoding) != NULL); // nothing to fix, process data directly
231
232 std::string data(rawdata);
233 CRegExp re(false, CRegExp::asciiOnly, "^&(amp|lt|gt|quot|apos|#x[a-fA-F0-9]{1,4}|#[0-9]{1,5});.*");
234 do
235 {
236 if (re.RegFind(data, pos, MAX_ENTITY_LENGTH) < 0)
237 data.insert(pos + 1, "amp;");
238 pos = data.find('&', pos + 1);
239 } while (pos != std::string::npos);
240
241 return (TiXmlDocument::Parse(data.c_str(), NULL, encoding) != NULL);
242}
243
244bool CXBMCTinyXML::Test()
245{
246 // scraper results with unescaped &
247 CXBMCTinyXML doc;
248 std::string data("<details><url function=\"ParseTMDBRating\" "
249 "cache=\"tmdb-en-12244.json\">"
250 "http://api.themoviedb.org/3/movie/12244"
251 "?api_key=57983e31fb435df4df77afb854740ea9"
252 "&language=en&#x3f;&#x003F;&#0063;</url></details>");
253 doc.Parse(data, TIXML_DEFAULT_ENCODING);
254 TiXmlNode *root = doc.RootElement();
255 if (root && root->ValueStr() == "details")
256 {
257 TiXmlElement *url = root->FirstChildElement("url");
258 if (url && url->FirstChild())
259 {
260 return (url->FirstChild()->ValueStr() == "http://api.themoviedb.org/3/movie/12244?api_key=57983e31fb435df4df77afb854740ea9&language=en???");
261 }
262 }
263 return false;
264}