summaryrefslogtreecommitdiffstats
path: root/xbmc/utils/HTMLUtil.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'xbmc/utils/HTMLUtil.cpp')
-rw-r--r--xbmc/utils/HTMLUtil.cpp229
1 files changed, 229 insertions, 0 deletions
diff --git a/xbmc/utils/HTMLUtil.cpp b/xbmc/utils/HTMLUtil.cpp
new file mode 100644
index 0000000..dbe1843
--- /dev/null
+++ b/xbmc/utils/HTMLUtil.cpp
@@ -0,0 +1,229 @@
1/*
2 * Copyright (C) 2005-2018 Team Kodi
3 * This file is part of Kodi - https://kodi.tv
4 *
5 * SPDX-License-Identifier: GPL-2.0-or-later
6 * See LICENSES/README.md for more information.
7 */
8
9#include "HTMLUtil.h"
10
11#include "utils/StringUtils.h"
12
13#include <wctype.h>
14
15using namespace HTML;
16
17CHTMLUtil::CHTMLUtil(void) = default;
18
19CHTMLUtil::~CHTMLUtil(void) = default;
20
21void CHTMLUtil::RemoveTags(std::string& strHTML)
22{
23 int iNested = 0;
24 std::string strReturn = "";
25 for (int i = 0; i < (int) strHTML.size(); ++i)
26 {
27 if (strHTML[i] == '<') iNested++;
28 else if (strHTML[i] == '>') iNested--;
29 else
30 {
31 if (!iNested)
32 {
33 strReturn += strHTML[i];
34 }
35 }
36 }
37
38 strHTML = strReturn;
39}
40
41typedef struct
42{
43 const wchar_t* html;
44 const wchar_t w;
45} HTMLMapping;
46
47static const HTMLMapping mappings[] =
48 {{L"&amp;", 0x0026},
49 {L"&apos;", 0x0027},
50 {L"&acute;", 0x00B4},
51 {L"&agrave;", 0x00E0},
52 {L"&aacute;", 0x00E1},
53 {L"&acirc;", 0x00E2},
54 {L"&atilde;", 0x00E3},
55 {L"&auml;", 0x00E4},
56 {L"&aring;", 0x00E5},
57 {L"&aelig;", 0x00E6},
58 {L"&Agrave;", 0x00C0},
59 {L"&Aacute;", 0x00C1},
60 {L"&Acirc;", 0x00C2},
61 {L"&Atilde;", 0x00C3},
62 {L"&Auml;", 0x00C4},
63 {L"&Aring;", 0x00C5},
64 {L"&AElig;", 0x00C6},
65 {L"&bdquo;", 0x201E},
66 {L"&brvbar;", 0x00A6},
67 {L"&bull;", 0x2022},
68 {L"&bullet;", 0x2022},
69 {L"&cent;", 0x00A2},
70 {L"&circ;", 0x02C6},
71 {L"&curren;", 0x00A4},
72 {L"&copy;", 0x00A9},
73 {L"&cedil;", 0x00B8},
74 {L"&Ccedil;", 0x00C7},
75 {L"&ccedil;", 0x00E7},
76 {L"&dagger;", 0x2020},
77 {L"&deg;", 0x00B0},
78 {L"&divide;", 0x00F7},
79 {L"&Dagger;", 0x2021},
80 {L"&egrave;", 0x00E8},
81 {L"&eacute;", 0x00E9},
82 {L"&ecirc;", 0x00EA},
83 {L"&emsp;", 0x2003},
84 {L"&ensp;", 0x2002},
85 {L"&euml;", 0x00EB},
86 {L"&eth;", 0x00F0},
87 {L"&euro;", 0x20AC},
88 {L"&Egrave;", 0x00C8},
89 {L"&Eacute;", 0x00C9},
90 {L"&Ecirc;", 0x00CA},
91 {L"&Euml;", 0x00CB},
92 {L"&ETH;", 0x00D0},
93 {L"&quot;", 0x0022},
94 {L"&frasl;", 0x2044},
95 {L"&frac14;", 0x00BC},
96 {L"&frac12;", 0x00BD},
97 {L"&frac34;", 0x00BE},
98 {L"&gt;", 0x003E},
99 {L"&hellip;", 0x2026},
100 {L"&iexcl;", 0x00A1},
101 {L"&iquest;", 0x00BF},
102 {L"&igrave;", 0x00EC},
103 {L"&iacute;", 0x00ED},
104 {L"&icirc;", 0x00EE},
105 {L"&iuml;", 0x00EF},
106 {L"&Igrave;", 0x00CC},
107 {L"&Iacute;", 0x00CD},
108 {L"&Icirc;", 0x00CE},
109 {L"&Iuml;", 0x00CF},
110 {L"&lrm;", 0x200E},
111 {L"&lt;", 0x003C},
112 {L"&laquo;", 0x00AB},
113 {L"&ldquo;", 0x201C},
114 {L"&lsaquo;", 0x2039},
115 {L"&lsquo;", 0x2018},
116 {L"&macr;", 0x00AF},
117 {L"&micro;", 0x00B5},
118 {L"&middot;", 0x00B7},
119 {L"&mdash;", 0x2014},
120 {L"&nbsp;", 0x00A0},
121 {L"&ndash;", 0x2013},
122 {L"&ntilde;", 0x00F1},
123 {L"&not;", 0x00AC},
124 {L"&Ntilde;", 0x00D1},
125 {L"&ordf;", 0x00AA},
126 {L"&ordm;", 0x00BA},
127 {L"&oelig;", 0x0153},
128 {L"&ograve;", 0x00F2},
129 {L"&oacute;", 0x00F3},
130 {L"&ocirc;", 0x00F4},
131 {L"&otilde;", 0x00F5},
132 {L"&ouml;", 0x00F6},
133 {L"&oslash;", 0x00F8},
134 {L"&OElig;", 0x0152},
135 {L"&Ograve;", 0x00D2},
136 {L"&Oacute;", 0x00D3},
137 {L"&Ocirc;", 0x00D4},
138 {L"&Otilde;", 0x00D5},
139 {L"&Ouml;", 0x00D6},
140 {L"&Oslash;", 0x00D8},
141 {L"&para;", 0x00B6},
142 {L"&permil;", 0x2030},
143 {L"&plusmn;", 0x00B1},
144 {L"&pound;", 0x00A3},
145 {L"&raquo;", 0x00BB},
146 {L"&rdquo;", 0x201D},
147 {L"&reg;", 0x00AE},
148 {L"&rlm;", 0x200F},
149 {L"&rsaquo;", 0x203A},
150 {L"&rsquo;", 0x2019},
151 {L"&sbquo;", 0x201A},
152 {L"&scaron;", 0x0161},
153 {L"&sect;", 0x00A7},
154 {L"&shy;", 0x00AD},
155 {L"&sup1;", 0x00B9},
156 {L"&sup2;", 0x00B2},
157 {L"&sup3;", 0x00B3},
158 {L"&szlig;", 0x00DF},
159 {L"&Scaron;", 0x0160},
160 {L"&thinsp;", 0x2009},
161 {L"&thorn;", 0x00FE},
162 {L"&tilde;", 0x02DC},
163 {L"&times;", 0x00D7},
164 {L"&trade;", 0x2122},
165 {L"&THORN;", 0x00DE},
166 {L"&uml;", 0x00A8},
167 {L"&ugrave;", 0x00F9},
168 {L"&uacute;", 0x00FA},
169 {L"&ucirc;", 0x00FB},
170 {L"&uuml;", 0x00FC},
171 {L"&Ugrave;", 0x00D9},
172 {L"&Uacute;", 0x00DA},
173 {L"&Ucirc;", 0x00DB},
174 {L"&Uuml;", 0x00DC},
175 {L"&yen;", 0x00A5},
176 {L"&yuml;", 0x00FF},
177 {L"&yacute;", 0x00FD},
178 {L"&Yacute;", 0x00DD},
179 {L"&Yuml;", 0x0178},
180 {L"&zwj;", 0x200D},
181 {L"&zwnj;", 0x200C},
182 {NULL, L'\0'}};
183
184void CHTMLUtil::ConvertHTMLToW(const std::wstring& strHTML, std::wstring& strStripped)
185{
186 //! @todo STRING_CLEANUP
187 if (strHTML.empty())
188 {
189 strStripped.clear();
190 return ;
191 }
192 size_t iPos = 0;
193 strStripped = strHTML;
194 while (mappings[iPos].html)
195 {
196 StringUtils::Replace(strStripped, mappings[iPos].html,std::wstring(1, mappings[iPos].w));
197 iPos++;
198 }
199
200 iPos = strStripped.find(L"&#");
201 while (iPos > 0 && iPos < strStripped.size() - 4)
202 {
203 size_t iStart = iPos + 1;
204 iPos += 2;
205 std::wstring num;
206 int base = 10;
207 if (strStripped[iPos] == L'x')
208 {
209 base = 16;
210 iPos++;
211 }
212
213 size_t i = iPos;
214 while (iPos < strStripped.size() &&
215 (base == 16 ? iswxdigit(strStripped[iPos]) : iswdigit(strStripped[iPos])))
216 iPos++;
217
218 num = strStripped.substr(i, iPos-i);
219 wchar_t val = (wchar_t)wcstol(num.c_str(),NULL,base);
220 if (base == 10)
221 num = StringUtils::Format(L"&#%ls;", num.c_str());
222 else
223 num = StringUtils::Format(L"&#x%ls;", num.c_str());
224
225 StringUtils::Replace(strStripped, num,std::wstring(1,val));
226 iPos = strStripped.find(L"&#", iStart);
227 }
228}
229