diff options
Diffstat (limited to 'xbmc/utils/HTMLUtil.cpp')
| -rw-r--r-- | xbmc/utils/HTMLUtil.cpp | 229 |
1 files changed, 229 insertions, 0 deletions
diff --git a/xbmc/utils/HTMLUtil.cpp b/xbmc/utils/HTMLUtil.cpp new file mode 100644 index 0000000..dbe1843 --- /dev/null +++ b/xbmc/utils/HTMLUtil.cpp | |||
| @@ -0,0 +1,229 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2005-2018 Team Kodi | ||
| 3 | * This file is part of Kodi - https://kodi.tv | ||
| 4 | * | ||
| 5 | * SPDX-License-Identifier: GPL-2.0-or-later | ||
| 6 | * See LICENSES/README.md for more information. | ||
| 7 | */ | ||
| 8 | |||
| 9 | #include "HTMLUtil.h" | ||
| 10 | |||
| 11 | #include "utils/StringUtils.h" | ||
| 12 | |||
| 13 | #include <wctype.h> | ||
| 14 | |||
| 15 | using namespace HTML; | ||
| 16 | |||
| 17 | CHTMLUtil::CHTMLUtil(void) = default; | ||
| 18 | |||
| 19 | CHTMLUtil::~CHTMLUtil(void) = default; | ||
| 20 | |||
| 21 | void CHTMLUtil::RemoveTags(std::string& strHTML) | ||
| 22 | { | ||
| 23 | int iNested = 0; | ||
| 24 | std::string strReturn = ""; | ||
| 25 | for (int i = 0; i < (int) strHTML.size(); ++i) | ||
| 26 | { | ||
| 27 | if (strHTML[i] == '<') iNested++; | ||
| 28 | else if (strHTML[i] == '>') iNested--; | ||
| 29 | else | ||
| 30 | { | ||
| 31 | if (!iNested) | ||
| 32 | { | ||
| 33 | strReturn += strHTML[i]; | ||
| 34 | } | ||
| 35 | } | ||
| 36 | } | ||
| 37 | |||
| 38 | strHTML = strReturn; | ||
| 39 | } | ||
| 40 | |||
| 41 | typedef struct | ||
| 42 | { | ||
| 43 | const wchar_t* html; | ||
| 44 | const wchar_t w; | ||
| 45 | } HTMLMapping; | ||
| 46 | |||
| 47 | static const HTMLMapping mappings[] = | ||
| 48 | {{L"&", 0x0026}, | ||
| 49 | {L"'", 0x0027}, | ||
| 50 | {L"´", 0x00B4}, | ||
| 51 | {L"à", 0x00E0}, | ||
| 52 | {L"á", 0x00E1}, | ||
| 53 | {L"â", 0x00E2}, | ||
| 54 | {L"ã", 0x00E3}, | ||
| 55 | {L"ä", 0x00E4}, | ||
| 56 | {L"å", 0x00E5}, | ||
| 57 | {L"æ", 0x00E6}, | ||
| 58 | {L"À", 0x00C0}, | ||
| 59 | {L"Á", 0x00C1}, | ||
| 60 | {L"Â", 0x00C2}, | ||
| 61 | {L"Ã", 0x00C3}, | ||
| 62 | {L"Ä", 0x00C4}, | ||
| 63 | {L"Å", 0x00C5}, | ||
| 64 | {L"Æ", 0x00C6}, | ||
| 65 | {L"„", 0x201E}, | ||
| 66 | {L"¦", 0x00A6}, | ||
| 67 | {L"•", 0x2022}, | ||
| 68 | {L"•", 0x2022}, | ||
| 69 | {L"¢", 0x00A2}, | ||
| 70 | {L"ˆ", 0x02C6}, | ||
| 71 | {L"¤", 0x00A4}, | ||
| 72 | {L"©", 0x00A9}, | ||
| 73 | {L"¸", 0x00B8}, | ||
| 74 | {L"Ç", 0x00C7}, | ||
| 75 | {L"ç", 0x00E7}, | ||
| 76 | {L"†", 0x2020}, | ||
| 77 | {L"°", 0x00B0}, | ||
| 78 | {L"÷", 0x00F7}, | ||
| 79 | {L"‡", 0x2021}, | ||
| 80 | {L"è", 0x00E8}, | ||
| 81 | {L"é", 0x00E9}, | ||
| 82 | {L"ê", 0x00EA}, | ||
| 83 | {L" ", 0x2003}, | ||
| 84 | {L" ", 0x2002}, | ||
| 85 | {L"ë", 0x00EB}, | ||
| 86 | {L"ð", 0x00F0}, | ||
| 87 | {L"€", 0x20AC}, | ||
| 88 | {L"È", 0x00C8}, | ||
| 89 | {L"É", 0x00C9}, | ||
| 90 | {L"Ê", 0x00CA}, | ||
| 91 | {L"Ë", 0x00CB}, | ||
| 92 | {L"Ð", 0x00D0}, | ||
| 93 | {L""", 0x0022}, | ||
| 94 | {L"⁄", 0x2044}, | ||
| 95 | {L"¼", 0x00BC}, | ||
| 96 | {L"½", 0x00BD}, | ||
| 97 | {L"¾", 0x00BE}, | ||
| 98 | {L">", 0x003E}, | ||
| 99 | {L"…", 0x2026}, | ||
| 100 | {L"¡", 0x00A1}, | ||
| 101 | {L"¿", 0x00BF}, | ||
| 102 | {L"ì", 0x00EC}, | ||
| 103 | {L"í", 0x00ED}, | ||
| 104 | {L"î", 0x00EE}, | ||
| 105 | {L"ï", 0x00EF}, | ||
| 106 | {L"Ì", 0x00CC}, | ||
| 107 | {L"Í", 0x00CD}, | ||
| 108 | {L"Î", 0x00CE}, | ||
| 109 | {L"Ï", 0x00CF}, | ||
| 110 | {L"‎", 0x200E}, | ||
| 111 | {L"<", 0x003C}, | ||
| 112 | {L"«", 0x00AB}, | ||
| 113 | {L"“", 0x201C}, | ||
| 114 | {L"‹", 0x2039}, | ||
| 115 | {L"‘", 0x2018}, | ||
| 116 | {L"¯", 0x00AF}, | ||
| 117 | {L"µ", 0x00B5}, | ||
| 118 | {L"·", 0x00B7}, | ||
| 119 | {L"—", 0x2014}, | ||
| 120 | {L" ", 0x00A0}, | ||
| 121 | {L"–", 0x2013}, | ||
| 122 | {L"ñ", 0x00F1}, | ||
| 123 | {L"¬", 0x00AC}, | ||
| 124 | {L"Ñ", 0x00D1}, | ||
| 125 | {L"ª", 0x00AA}, | ||
| 126 | {L"º", 0x00BA}, | ||
| 127 | {L"œ", 0x0153}, | ||
| 128 | {L"ò", 0x00F2}, | ||
| 129 | {L"ó", 0x00F3}, | ||
| 130 | {L"ô", 0x00F4}, | ||
| 131 | {L"õ", 0x00F5}, | ||
| 132 | {L"ö", 0x00F6}, | ||
| 133 | {L"ø", 0x00F8}, | ||
| 134 | {L"Œ", 0x0152}, | ||
| 135 | {L"Ò", 0x00D2}, | ||
| 136 | {L"Ó", 0x00D3}, | ||
| 137 | {L"Ô", 0x00D4}, | ||
| 138 | {L"Õ", 0x00D5}, | ||
| 139 | {L"Ö", 0x00D6}, | ||
| 140 | {L"Ø", 0x00D8}, | ||
| 141 | {L"¶", 0x00B6}, | ||
| 142 | {L"‰", 0x2030}, | ||
| 143 | {L"±", 0x00B1}, | ||
| 144 | {L"£", 0x00A3}, | ||
| 145 | {L"»", 0x00BB}, | ||
| 146 | {L"”", 0x201D}, | ||
| 147 | {L"®", 0x00AE}, | ||
| 148 | {L"‏", 0x200F}, | ||
| 149 | {L"›", 0x203A}, | ||
| 150 | {L"’", 0x2019}, | ||
| 151 | {L"‚", 0x201A}, | ||
| 152 | {L"š", 0x0161}, | ||
| 153 | {L"§", 0x00A7}, | ||
| 154 | {L"­", 0x00AD}, | ||
| 155 | {L"¹", 0x00B9}, | ||
| 156 | {L"²", 0x00B2}, | ||
| 157 | {L"³", 0x00B3}, | ||
| 158 | {L"ß", 0x00DF}, | ||
| 159 | {L"Š", 0x0160}, | ||
| 160 | {L" ", 0x2009}, | ||
| 161 | {L"þ", 0x00FE}, | ||
| 162 | {L"˜", 0x02DC}, | ||
| 163 | {L"×", 0x00D7}, | ||
| 164 | {L"™", 0x2122}, | ||
| 165 | {L"Þ", 0x00DE}, | ||
| 166 | {L"¨", 0x00A8}, | ||
| 167 | {L"ù", 0x00F9}, | ||
| 168 | {L"ú", 0x00FA}, | ||
| 169 | {L"û", 0x00FB}, | ||
| 170 | {L"ü", 0x00FC}, | ||
| 171 | {L"Ù", 0x00D9}, | ||
| 172 | {L"Ú", 0x00DA}, | ||
| 173 | {L"Û", 0x00DB}, | ||
| 174 | {L"Ü", 0x00DC}, | ||
| 175 | {L"¥", 0x00A5}, | ||
| 176 | {L"ÿ", 0x00FF}, | ||
| 177 | {L"ý", 0x00FD}, | ||
| 178 | {L"Ý", 0x00DD}, | ||
| 179 | {L"Ÿ", 0x0178}, | ||
| 180 | {L"‍", 0x200D}, | ||
| 181 | {L"‌", 0x200C}, | ||
| 182 | {NULL, L'\0'}}; | ||
| 183 | |||
| 184 | void CHTMLUtil::ConvertHTMLToW(const std::wstring& strHTML, std::wstring& strStripped) | ||
| 185 | { | ||
| 186 | //! @todo STRING_CLEANUP | ||
| 187 | if (strHTML.empty()) | ||
| 188 | { | ||
| 189 | strStripped.clear(); | ||
| 190 | return ; | ||
| 191 | } | ||
| 192 | size_t iPos = 0; | ||
| 193 | strStripped = strHTML; | ||
| 194 | while (mappings[iPos].html) | ||
| 195 | { | ||
| 196 | StringUtils::Replace(strStripped, mappings[iPos].html,std::wstring(1, mappings[iPos].w)); | ||
| 197 | iPos++; | ||
| 198 | } | ||
| 199 | |||
| 200 | iPos = strStripped.find(L"&#"); | ||
| 201 | while (iPos > 0 && iPos < strStripped.size() - 4) | ||
| 202 | { | ||
| 203 | size_t iStart = iPos + 1; | ||
| 204 | iPos += 2; | ||
| 205 | std::wstring num; | ||
| 206 | int base = 10; | ||
| 207 | if (strStripped[iPos] == L'x') | ||
| 208 | { | ||
| 209 | base = 16; | ||
| 210 | iPos++; | ||
| 211 | } | ||
| 212 | |||
| 213 | size_t i = iPos; | ||
| 214 | while (iPos < strStripped.size() && | ||
| 215 | (base == 16 ? iswxdigit(strStripped[iPos]) : iswdigit(strStripped[iPos]))) | ||
| 216 | iPos++; | ||
| 217 | |||
| 218 | num = strStripped.substr(i, iPos-i); | ||
| 219 | wchar_t val = (wchar_t)wcstol(num.c_str(),NULL,base); | ||
| 220 | if (base == 10) | ||
| 221 | num = StringUtils::Format(L"&#%ls;", num.c_str()); | ||
| 222 | else | ||
| 223 | num = StringUtils::Format(L"&#x%ls;", num.c_str()); | ||
| 224 | |||
| 225 | StringUtils::Replace(strStripped, num,std::wstring(1,val)); | ||
| 226 | iPos = strStripped.find(L"&#", iStart); | ||
| 227 | } | ||
| 228 | } | ||
| 229 | |||
