diff options
Diffstat (limited to 'xbmc/utils/CharsetConverter.cpp')
| -rw-r--r-- | xbmc/utils/CharsetConverter.cpp | 871 |
1 files changed, 871 insertions, 0 deletions
diff --git a/xbmc/utils/CharsetConverter.cpp b/xbmc/utils/CharsetConverter.cpp new file mode 100644 index 0000000..8dffd65 --- /dev/null +++ b/xbmc/utils/CharsetConverter.cpp | |||
| @@ -0,0 +1,871 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2005-2018 Team Kodi | ||
| 3 | * This file is part of Kodi - https://kodi.tv | ||
| 4 | * | ||
| 5 | * SPDX-License-Identifier: GPL-2.0-or-later | ||
| 6 | * See LICENSES/README.md for more information. | ||
| 7 | */ | ||
| 8 | |||
| 9 | #include "CharsetConverter.h" | ||
| 10 | |||
| 11 | #include "LangInfo.h" | ||
| 12 | #include "guilib/LocalizeStrings.h" | ||
| 13 | #include "log.h" | ||
| 14 | #include "settings/Settings.h" | ||
| 15 | #include "settings/lib/Setting.h" | ||
| 16 | #include "settings/lib/SettingDefinitions.h" | ||
| 17 | #include "utils/StringUtils.h" | ||
| 18 | #include "utils/Utf8Utils.h" | ||
| 19 | |||
| 20 | #include <algorithm> | ||
| 21 | |||
| 22 | #include <fribidi.h> | ||
| 23 | #include <iconv.h> | ||
| 24 | |||
| 25 | #ifdef WORDS_BIGENDIAN | ||
| 26 | #define ENDIAN_SUFFIX "BE" | ||
| 27 | #else | ||
| 28 | #define ENDIAN_SUFFIX "LE" | ||
| 29 | #endif | ||
| 30 | |||
| 31 | #if defined(TARGET_DARWIN) | ||
| 32 | #define WCHAR_IS_UCS_4 1 | ||
| 33 | #define UTF16_CHARSET "UTF-16" ENDIAN_SUFFIX | ||
| 34 | #define UTF32_CHARSET "UTF-32" ENDIAN_SUFFIX | ||
| 35 | #define UTF8_SOURCE "UTF-8-MAC" | ||
| 36 | #define WCHAR_CHARSET UTF32_CHARSET | ||
| 37 | #elif defined(TARGET_WINDOWS) | ||
| 38 | #define WCHAR_IS_UTF16 1 | ||
| 39 | #define UTF16_CHARSET "UTF-16" ENDIAN_SUFFIX | ||
| 40 | #define UTF32_CHARSET "UTF-32" ENDIAN_SUFFIX | ||
| 41 | #define UTF8_SOURCE "UTF-8" | ||
| 42 | #define WCHAR_CHARSET UTF16_CHARSET | ||
| 43 | #elif defined(TARGET_FREEBSD) | ||
| 44 | #define WCHAR_IS_UCS_4 1 | ||
| 45 | #define UTF16_CHARSET "UTF-16" ENDIAN_SUFFIX | ||
| 46 | #define UTF32_CHARSET "UTF-32" ENDIAN_SUFFIX | ||
| 47 | #define UTF8_SOURCE "UTF-8" | ||
| 48 | #define WCHAR_CHARSET UTF32_CHARSET | ||
| 49 | #elif defined(TARGET_ANDROID) | ||
| 50 | #define WCHAR_IS_UCS_4 1 | ||
| 51 | #define UTF16_CHARSET "UTF-16" ENDIAN_SUFFIX | ||
| 52 | #define UTF32_CHARSET "UTF-32" ENDIAN_SUFFIX | ||
| 53 | #define UTF8_SOURCE "UTF-8" | ||
| 54 | #define WCHAR_CHARSET UTF32_CHARSET | ||
| 55 | #else | ||
| 56 | #define UTF16_CHARSET "UTF-16" ENDIAN_SUFFIX | ||
| 57 | #define UTF32_CHARSET "UTF-32" ENDIAN_SUFFIX | ||
| 58 | #define UTF8_SOURCE "UTF-8" | ||
| 59 | #define WCHAR_CHARSET "WCHAR_T" | ||
| 60 | #if __STDC_ISO_10646__ | ||
| 61 | #ifdef SIZEOF_WCHAR_T | ||
| 62 | #if SIZEOF_WCHAR_T == 4 | ||
| 63 | #define WCHAR_IS_UCS_4 1 | ||
| 64 | #elif SIZEOF_WCHAR_T == 2 | ||
| 65 | #define WCHAR_IS_UCS_2 1 | ||
| 66 | #endif | ||
| 67 | #endif | ||
| 68 | #endif | ||
| 69 | #endif | ||
| 70 | |||
| 71 | #define NO_ICONV ((iconv_t)-1) | ||
| 72 | |||
| 73 | enum SpecialCharset | ||
| 74 | { | ||
| 75 | NotSpecialCharset = 0, | ||
| 76 | SystemCharset, | ||
| 77 | UserCharset /* locale.charset */, | ||
| 78 | SubtitleCharset /* subtitles.charset */, | ||
| 79 | }; | ||
| 80 | |||
| 81 | class CConverterType : public CCriticalSection | ||
| 82 | { | ||
| 83 | public: | ||
| 84 | CConverterType(const std::string& sourceCharset, const std::string& targetCharset, unsigned int targetSingleCharMaxLen = 1); | ||
| 85 | CConverterType(enum SpecialCharset sourceSpecialCharset, const std::string& targetCharset, unsigned int targetSingleCharMaxLen = 1); | ||
| 86 | CConverterType(const std::string& sourceCharset, enum SpecialCharset targetSpecialCharset, unsigned int targetSingleCharMaxLen = 1); | ||
| 87 | CConverterType(enum SpecialCharset sourceSpecialCharset, enum SpecialCharset targetSpecialCharset, unsigned int targetSingleCharMaxLen = 1); | ||
| 88 | CConverterType(const CConverterType& other); | ||
| 89 | ~CConverterType(); | ||
| 90 | |||
| 91 | iconv_t GetConverter(CSingleLock& converterLock); | ||
| 92 | |||
| 93 | void Reset(void); | ||
| 94 | void ReinitTo(const std::string& sourceCharset, const std::string& targetCharset, unsigned int targetSingleCharMaxLen = 1); | ||
| 95 | std::string GetSourceCharset(void) const { return m_sourceCharset; } | ||
| 96 | std::string GetTargetCharset(void) const { return m_targetCharset; } | ||
| 97 | unsigned int GetTargetSingleCharMaxLen(void) const { return m_targetSingleCharMaxLen; } | ||
| 98 | |||
| 99 | private: | ||
| 100 | static std::string ResolveSpecialCharset(enum SpecialCharset charset); | ||
| 101 | |||
| 102 | enum SpecialCharset m_sourceSpecialCharset; | ||
| 103 | std::string m_sourceCharset; | ||
| 104 | enum SpecialCharset m_targetSpecialCharset; | ||
| 105 | std::string m_targetCharset; | ||
| 106 | iconv_t m_iconv; | ||
| 107 | unsigned int m_targetSingleCharMaxLen; | ||
| 108 | }; | ||
| 109 | |||
| 110 | CConverterType::CConverterType(const std::string& sourceCharset, const std::string& targetCharset, unsigned int targetSingleCharMaxLen /*= 1*/) : CCriticalSection(), | ||
| 111 | m_sourceSpecialCharset(NotSpecialCharset), | ||
| 112 | m_sourceCharset(sourceCharset), | ||
| 113 | m_targetSpecialCharset(NotSpecialCharset), | ||
| 114 | m_targetCharset(targetCharset), | ||
| 115 | m_iconv(NO_ICONV), | ||
| 116 | m_targetSingleCharMaxLen(targetSingleCharMaxLen) | ||
| 117 | { | ||
| 118 | } | ||
| 119 | |||
| 120 | CConverterType::CConverterType(enum SpecialCharset sourceSpecialCharset, const std::string& targetCharset, unsigned int targetSingleCharMaxLen /*= 1*/) : CCriticalSection(), | ||
| 121 | m_sourceSpecialCharset(sourceSpecialCharset), | ||
| 122 | m_sourceCharset(), | ||
| 123 | m_targetSpecialCharset(NotSpecialCharset), | ||
| 124 | m_targetCharset(targetCharset), | ||
| 125 | m_iconv(NO_ICONV), | ||
| 126 | m_targetSingleCharMaxLen(targetSingleCharMaxLen) | ||
| 127 | { | ||
| 128 | } | ||
| 129 | |||
| 130 | CConverterType::CConverterType(const std::string& sourceCharset, enum SpecialCharset targetSpecialCharset, unsigned int targetSingleCharMaxLen /*= 1*/) : CCriticalSection(), | ||
| 131 | m_sourceSpecialCharset(NotSpecialCharset), | ||
| 132 | m_sourceCharset(sourceCharset), | ||
| 133 | m_targetSpecialCharset(targetSpecialCharset), | ||
| 134 | m_targetCharset(), | ||
| 135 | m_iconv(NO_ICONV), | ||
| 136 | m_targetSingleCharMaxLen(targetSingleCharMaxLen) | ||
| 137 | { | ||
| 138 | } | ||
| 139 | |||
| 140 | CConverterType::CConverterType(enum SpecialCharset sourceSpecialCharset, enum SpecialCharset targetSpecialCharset, unsigned int targetSingleCharMaxLen /*= 1*/) : CCriticalSection(), | ||
| 141 | m_sourceSpecialCharset(sourceSpecialCharset), | ||
| 142 | m_sourceCharset(), | ||
| 143 | m_targetSpecialCharset(targetSpecialCharset), | ||
| 144 | m_targetCharset(), | ||
| 145 | m_iconv(NO_ICONV), | ||
| 146 | m_targetSingleCharMaxLen(targetSingleCharMaxLen) | ||
| 147 | { | ||
| 148 | } | ||
| 149 | |||
| 150 | CConverterType::CConverterType(const CConverterType& other) : CCriticalSection(), | ||
| 151 | m_sourceSpecialCharset(other.m_sourceSpecialCharset), | ||
| 152 | m_sourceCharset(other.m_sourceCharset), | ||
| 153 | m_targetSpecialCharset(other.m_targetSpecialCharset), | ||
| 154 | m_targetCharset(other.m_targetCharset), | ||
| 155 | m_iconv(NO_ICONV), | ||
| 156 | m_targetSingleCharMaxLen(other.m_targetSingleCharMaxLen) | ||
| 157 | { | ||
| 158 | } | ||
| 159 | |||
| 160 | CConverterType::~CConverterType() | ||
| 161 | { | ||
| 162 | CSingleLock lock(*this); | ||
| 163 | if (m_iconv != NO_ICONV) | ||
| 164 | iconv_close(m_iconv); | ||
| 165 | lock.Leave(); // ensure unlocking before final destruction | ||
| 166 | } | ||
| 167 | |||
| 168 | iconv_t CConverterType::GetConverter(CSingleLock& converterLock) | ||
| 169 | { | ||
| 170 | // ensure that this unique instance is locked externally | ||
| 171 | if (&converterLock.get_underlying() != this) | ||
| 172 | return NO_ICONV; | ||
| 173 | |||
| 174 | if (m_iconv == NO_ICONV) | ||
| 175 | { | ||
| 176 | if (m_sourceSpecialCharset) | ||
| 177 | m_sourceCharset = ResolveSpecialCharset(m_sourceSpecialCharset); | ||
| 178 | if (m_targetSpecialCharset) | ||
| 179 | m_targetCharset = ResolveSpecialCharset(m_targetSpecialCharset); | ||
| 180 | |||
| 181 | m_iconv = iconv_open(m_targetCharset.c_str(), m_sourceCharset.c_str()); | ||
| 182 | |||
| 183 | if (m_iconv == NO_ICONV) | ||
| 184 | CLog::Log(LOGERROR, "%s: iconv_open() for \"%s\" -> \"%s\" failed, errno = %d (%s)", | ||
| 185 | __FUNCTION__, m_sourceCharset.c_str(), m_targetCharset.c_str(), errno, strerror(errno)); | ||
| 186 | } | ||
| 187 | |||
| 188 | return m_iconv; | ||
| 189 | } | ||
| 190 | |||
| 191 | void CConverterType::Reset(void) | ||
| 192 | { | ||
| 193 | CSingleLock lock(*this); | ||
| 194 | if (m_iconv != NO_ICONV) | ||
| 195 | { | ||
| 196 | iconv_close(m_iconv); | ||
| 197 | m_iconv = NO_ICONV; | ||
| 198 | } | ||
| 199 | |||
| 200 | if (m_sourceSpecialCharset) | ||
| 201 | m_sourceCharset.clear(); | ||
| 202 | if (m_targetSpecialCharset) | ||
| 203 | m_targetCharset.clear(); | ||
| 204 | |||
| 205 | } | ||
| 206 | |||
| 207 | void CConverterType::ReinitTo(const std::string& sourceCharset, const std::string& targetCharset, unsigned int targetSingleCharMaxLen /*= 1*/) | ||
| 208 | { | ||
| 209 | CSingleLock lock(*this); | ||
| 210 | if (sourceCharset != m_sourceCharset || targetCharset != m_targetCharset) | ||
| 211 | { | ||
| 212 | if (m_iconv != NO_ICONV) | ||
| 213 | { | ||
| 214 | iconv_close(m_iconv); | ||
| 215 | m_iconv = NO_ICONV; | ||
| 216 | } | ||
| 217 | |||
| 218 | m_sourceSpecialCharset = NotSpecialCharset; | ||
| 219 | m_sourceCharset = sourceCharset; | ||
| 220 | m_targetSpecialCharset = NotSpecialCharset; | ||
| 221 | m_targetCharset = targetCharset; | ||
| 222 | m_targetSingleCharMaxLen = targetSingleCharMaxLen; | ||
| 223 | } | ||
| 224 | } | ||
| 225 | |||
| 226 | std::string CConverterType::ResolveSpecialCharset(enum SpecialCharset charset) | ||
| 227 | { | ||
| 228 | switch (charset) | ||
| 229 | { | ||
| 230 | case SystemCharset: | ||
| 231 | return ""; | ||
| 232 | case UserCharset: | ||
| 233 | return g_langInfo.GetGuiCharSet(); | ||
| 234 | case SubtitleCharset: | ||
| 235 | return g_langInfo.GetSubtitleCharSet(); | ||
| 236 | case NotSpecialCharset: | ||
| 237 | default: | ||
| 238 | return "UTF-8"; /* dummy value */ | ||
| 239 | } | ||
| 240 | } | ||
| 241 | |||
| 242 | enum StdConversionType /* Keep it in sync with CCharsetConverter::CInnerConverter::m_stdConversion */ | ||
| 243 | { | ||
| 244 | NoConversion = -1, | ||
| 245 | Utf8ToUtf32 = 0, | ||
| 246 | Utf32ToUtf8, | ||
| 247 | Utf32ToW, | ||
| 248 | WToUtf32, | ||
| 249 | SubtitleCharsetToUtf8, | ||
| 250 | Utf8ToUserCharset, | ||
| 251 | UserCharsetToUtf8, | ||
| 252 | Utf32ToUserCharset, | ||
| 253 | WtoUtf8, | ||
| 254 | Utf16LEtoW, | ||
| 255 | Utf16BEtoUtf8, | ||
| 256 | Utf16LEtoUtf8, | ||
| 257 | Utf8toW, | ||
| 258 | Utf8ToSystem, | ||
| 259 | SystemToUtf8, | ||
| 260 | Ucs2CharsetToUtf8, | ||
| 261 | NumberOfStdConversionTypes /* Dummy sentinel entry */ | ||
| 262 | }; | ||
| 263 | |||
| 264 | /* We don't want to pollute header file with many additional includes and definitions, so put | ||
| 265 | here all staff that require usage of types defined in this file or in additional headers */ | ||
| 266 | class CCharsetConverter::CInnerConverter | ||
| 267 | { | ||
| 268 | public: | ||
| 269 | static bool logicalToVisualBiDi(const std::u32string& stringSrc, | ||
| 270 | std::u32string& stringDst, | ||
| 271 | FriBidiCharType base = FRIBIDI_TYPE_LTR, | ||
| 272 | const bool failOnBadString = false, | ||
| 273 | int* visualToLogicalMap = nullptr); | ||
| 274 | |||
| 275 | template<class INPUT,class OUTPUT> | ||
| 276 | static bool stdConvert(StdConversionType convertType, const INPUT& strSource, OUTPUT& strDest, bool failOnInvalidChar = false); | ||
| 277 | template<class INPUT,class OUTPUT> | ||
| 278 | static bool customConvert(const std::string& sourceCharset, const std::string& targetCharset, const INPUT& strSource, OUTPUT& strDest, bool failOnInvalidChar = false); | ||
| 279 | |||
| 280 | template<class INPUT,class OUTPUT> | ||
| 281 | static bool convert(iconv_t type, int multiplier, const INPUT& strSource, OUTPUT& strDest, bool failOnInvalidChar = false); | ||
| 282 | |||
| 283 | static CConverterType m_stdConversion[NumberOfStdConversionTypes]; | ||
| 284 | static CCriticalSection m_critSectionFriBiDi; | ||
| 285 | }; | ||
| 286 | |||
| 287 | /* single symbol sizes in chars */ | ||
| 288 | const int CCharsetConverter::m_Utf8CharMinSize = 1; | ||
| 289 | const int CCharsetConverter::m_Utf8CharMaxSize = 4; | ||
| 290 | |||
| 291 | CConverterType CCharsetConverter::CInnerConverter::m_stdConversion[NumberOfStdConversionTypes] = /* keep it in sync with enum StdConversionType */ | ||
| 292 | { | ||
| 293 | /* Utf8ToUtf32 */ CConverterType(UTF8_SOURCE, UTF32_CHARSET), | ||
| 294 | /* Utf32ToUtf8 */ CConverterType(UTF32_CHARSET, "UTF-8", CCharsetConverter::m_Utf8CharMaxSize), | ||
| 295 | /* Utf32ToW */ CConverterType(UTF32_CHARSET, WCHAR_CHARSET), | ||
| 296 | /* WToUtf32 */ CConverterType(WCHAR_CHARSET, UTF32_CHARSET), | ||
| 297 | /* SubtitleCharsetToUtf8*/CConverterType(SubtitleCharset, "UTF-8", CCharsetConverter::m_Utf8CharMaxSize), | ||
| 298 | /* Utf8ToUserCharset */ CConverterType(UTF8_SOURCE, UserCharset), | ||
| 299 | /* UserCharsetToUtf8 */ CConverterType(UserCharset, "UTF-8", CCharsetConverter::m_Utf8CharMaxSize), | ||
| 300 | /* Utf32ToUserCharset */ CConverterType(UTF32_CHARSET, UserCharset), | ||
| 301 | /* WtoUtf8 */ CConverterType(WCHAR_CHARSET, "UTF-8", CCharsetConverter::m_Utf8CharMaxSize), | ||
| 302 | /* Utf16LEtoW */ CConverterType("UTF-16LE", WCHAR_CHARSET), | ||
| 303 | /* Utf16BEtoUtf8 */ CConverterType("UTF-16BE", "UTF-8", CCharsetConverter::m_Utf8CharMaxSize), | ||
| 304 | /* Utf16LEtoUtf8 */ CConverterType("UTF-16LE", "UTF-8", CCharsetConverter::m_Utf8CharMaxSize), | ||
| 305 | /* Utf8toW */ CConverterType(UTF8_SOURCE, WCHAR_CHARSET), | ||
| 306 | /* Utf8ToSystem */ CConverterType(UTF8_SOURCE, SystemCharset), | ||
| 307 | /* SystemToUtf8 */ CConverterType(SystemCharset, UTF8_SOURCE), | ||
| 308 | /* Ucs2CharsetToUtf8 */ CConverterType("UCS-2LE", "UTF-8", CCharsetConverter::m_Utf8CharMaxSize) | ||
| 309 | }; | ||
| 310 | |||
| 311 | CCriticalSection CCharsetConverter::CInnerConverter::m_critSectionFriBiDi; | ||
| 312 | |||
| 313 | template<class INPUT,class OUTPUT> | ||
| 314 | bool CCharsetConverter::CInnerConverter::stdConvert(StdConversionType convertType, const INPUT& strSource, OUTPUT& strDest, bool failOnInvalidChar /*= false*/) | ||
| 315 | { | ||
| 316 | strDest.clear(); | ||
| 317 | if (strSource.empty()) | ||
| 318 | return true; | ||
| 319 | |||
| 320 | if (convertType < 0 || convertType >= NumberOfStdConversionTypes) | ||
| 321 | return false; | ||
| 322 | |||
| 323 | CConverterType& convType = m_stdConversion[convertType]; | ||
| 324 | CSingleLock converterLock(convType); | ||
| 325 | |||
| 326 | return convert(convType.GetConverter(converterLock), convType.GetTargetSingleCharMaxLen(), strSource, strDest, failOnInvalidChar); | ||
| 327 | } | ||
| 328 | |||
| 329 | template<class INPUT,class OUTPUT> | ||
| 330 | bool CCharsetConverter::CInnerConverter::customConvert(const std::string& sourceCharset, const std::string& targetCharset, const INPUT& strSource, OUTPUT& strDest, bool failOnInvalidChar /*= false*/) | ||
| 331 | { | ||
| 332 | strDest.clear(); | ||
| 333 | if (strSource.empty()) | ||
| 334 | return true; | ||
| 335 | |||
| 336 | iconv_t conv = iconv_open(targetCharset.c_str(), sourceCharset.c_str()); | ||
| 337 | if (conv == NO_ICONV) | ||
| 338 | { | ||
| 339 | CLog::Log(LOGERROR, "%s: iconv_open() for \"%s\" -> \"%s\" failed, errno = %d (%s)", | ||
| 340 | __FUNCTION__, sourceCharset.c_str(), targetCharset.c_str(), errno, strerror(errno)); | ||
| 341 | return false; | ||
| 342 | } | ||
| 343 | const int dstMultp = (targetCharset.compare(0, 5, "UTF-8") == 0) ? CCharsetConverter::m_Utf8CharMaxSize : 1; | ||
| 344 | const bool result = convert(conv, dstMultp, strSource, strDest, failOnInvalidChar); | ||
| 345 | iconv_close(conv); | ||
| 346 | |||
| 347 | return result; | ||
| 348 | } | ||
| 349 | |||
| 350 | /* iconv may declare inbuf to be char** rather than const char** depending on platform and version, | ||
| 351 | so provide a wrapper that handles both */ | ||
| 352 | struct charPtrPtrAdapter | ||
| 353 | { | ||
| 354 | const char** pointer; | ||
| 355 | explicit charPtrPtrAdapter(const char** p) : | ||
| 356 | pointer(p) { } | ||
| 357 | operator char**() | ||
| 358 | { return const_cast<char**>(pointer); } | ||
| 359 | operator const char**() | ||
| 360 | { return pointer; } | ||
| 361 | }; | ||
| 362 | |||
| 363 | template<class INPUT,class OUTPUT> | ||
| 364 | bool CCharsetConverter::CInnerConverter::convert(iconv_t type, int multiplier, const INPUT& strSource, OUTPUT& strDest, bool failOnInvalidChar /*= false*/) | ||
| 365 | { | ||
| 366 | if (type == NO_ICONV) | ||
| 367 | return false; | ||
| 368 | |||
| 369 | //input buffer for iconv() is the buffer from strSource | ||
| 370 | size_t inBufSize = (strSource.length() + 1) * sizeof(typename INPUT::value_type); | ||
| 371 | const char* inBuf = (const char*)strSource.c_str(); | ||
| 372 | |||
| 373 | //allocate output buffer for iconv() | ||
| 374 | size_t outBufSize = (strSource.length() + 1) * sizeof(typename OUTPUT::value_type) * multiplier; | ||
| 375 | char* outBuf = (char*)malloc(outBufSize); | ||
| 376 | if (outBuf == NULL) | ||
| 377 | { | ||
| 378 | CLog::Log(LOGFATAL, "%s: malloc failed", __FUNCTION__); | ||
| 379 | return false; | ||
| 380 | } | ||
| 381 | |||
| 382 | size_t inBytesAvail = inBufSize; //how many bytes iconv() can read | ||
| 383 | size_t outBytesAvail = outBufSize; //how many bytes iconv() can write | ||
| 384 | const char* inBufStart = inBuf; //where in our input buffer iconv() should start reading | ||
| 385 | char* outBufStart = outBuf; //where in out output buffer iconv() should start writing | ||
| 386 | |||
| 387 | size_t returnV; | ||
| 388 | while(true) | ||
| 389 | { | ||
| 390 | //iconv() will update inBufStart, inBytesAvail, outBufStart and outBytesAvail | ||
| 391 | returnV = iconv(type, charPtrPtrAdapter(&inBufStart), &inBytesAvail, &outBufStart, &outBytesAvail); | ||
| 392 | |||
| 393 | if (returnV == (size_t)-1) | ||
| 394 | { | ||
| 395 | if (errno == E2BIG) //output buffer is not big enough | ||
| 396 | { | ||
| 397 | //save where iconv() ended converting, realloc might make outBufStart invalid | ||
| 398 | size_t bytesConverted = outBufSize - outBytesAvail; | ||
| 399 | |||
| 400 | //make buffer twice as big | ||
| 401 | outBufSize *= 2; | ||
| 402 | char* newBuf = (char*)realloc(outBuf, outBufSize); | ||
| 403 | if (!newBuf) | ||
| 404 | { | ||
| 405 | CLog::Log(LOGFATAL, "%s realloc failed with errno=%d(%s)", __FUNCTION__, errno, | ||
| 406 | strerror(errno)); | ||
| 407 | break; | ||
| 408 | } | ||
| 409 | outBuf = newBuf; | ||
| 410 | |||
| 411 | //update the buffer pointer and counter | ||
| 412 | outBufStart = outBuf + bytesConverted; | ||
| 413 | outBytesAvail = outBufSize - bytesConverted; | ||
| 414 | |||
| 415 | //continue in the loop and convert the rest | ||
| 416 | continue; | ||
| 417 | } | ||
| 418 | else if (errno == EILSEQ) //An invalid multibyte sequence has been encountered in the input | ||
| 419 | { | ||
| 420 | if (failOnInvalidChar) | ||
| 421 | break; | ||
| 422 | |||
| 423 | //skip invalid byte | ||
| 424 | inBufStart++; | ||
| 425 | inBytesAvail--; | ||
| 426 | //continue in the loop and convert the rest | ||
| 427 | continue; | ||
| 428 | } | ||
| 429 | else if (errno == EINVAL) /* Invalid sequence at the end of input buffer */ | ||
| 430 | { | ||
| 431 | if (!failOnInvalidChar) | ||
| 432 | returnV = 0; /* reset error status to use converted part */ | ||
| 433 | |||
| 434 | break; | ||
| 435 | } | ||
| 436 | else //iconv() had some other error | ||
| 437 | { | ||
| 438 | CLog::Log(LOGERROR, "%s: iconv() failed, errno=%d (%s)", | ||
| 439 | __FUNCTION__, errno, strerror(errno)); | ||
| 440 | } | ||
| 441 | } | ||
| 442 | break; | ||
| 443 | } | ||
| 444 | |||
| 445 | //complete the conversion (reset buffers), otherwise the current data will prefix the data on the next call | ||
| 446 | if (iconv(type, NULL, NULL, &outBufStart, &outBytesAvail) == (size_t)-1) | ||
| 447 | CLog::Log(LOGERROR, "%s failed cleanup errno=%d(%s)", __FUNCTION__, errno, strerror(errno)); | ||
| 448 | |||
| 449 | if (returnV == (size_t)-1) | ||
| 450 | { | ||
| 451 | free(outBuf); | ||
| 452 | return false; | ||
| 453 | } | ||
| 454 | //we're done | ||
| 455 | |||
| 456 | const typename OUTPUT::size_type sizeInChars = (typename OUTPUT::size_type) (outBufSize - outBytesAvail) / sizeof(typename OUTPUT::value_type); | ||
| 457 | typename OUTPUT::const_pointer strPtr = (typename OUTPUT::const_pointer) outBuf; | ||
| 458 | /* Make sure that all buffer is assigned and string is stopped at end of buffer */ | ||
| 459 | if (strPtr[sizeInChars-1] == 0 && strSource[strSource.length()-1] != 0) | ||
| 460 | strDest.assign(strPtr, sizeInChars-1); | ||
| 461 | else | ||
| 462 | strDest.assign(strPtr, sizeInChars); | ||
| 463 | |||
| 464 | free(outBuf); | ||
| 465 | |||
| 466 | return true; | ||
| 467 | } | ||
| 468 | |||
| 469 | bool CCharsetConverter::CInnerConverter::logicalToVisualBiDi( | ||
| 470 | const std::u32string& stringSrc, | ||
| 471 | std::u32string& stringDst, | ||
| 472 | FriBidiCharType base /*= FRIBIDI_TYPE_LTR*/, | ||
| 473 | const bool failOnBadString /*= false*/, | ||
| 474 | int* visualToLogicalMap /*= nullptr*/) | ||
| 475 | { | ||
| 476 | stringDst.clear(); | ||
| 477 | |||
| 478 | const size_t srcLen = stringSrc.length(); | ||
| 479 | if (srcLen == 0) | ||
| 480 | return true; | ||
| 481 | |||
| 482 | stringDst.reserve(srcLen); | ||
| 483 | size_t lineStart = 0; | ||
| 484 | |||
| 485 | // libfribidi is not threadsafe, so make sure we make it so | ||
| 486 | CSingleLock lock(m_critSectionFriBiDi); | ||
| 487 | do | ||
| 488 | { | ||
| 489 | size_t lineEnd = stringSrc.find('\n', lineStart); | ||
| 490 | if (lineEnd >= srcLen) // equal to 'lineEnd == std::string::npos' | ||
| 491 | lineEnd = srcLen; | ||
| 492 | else | ||
| 493 | lineEnd++; // include '\n' | ||
| 494 | |||
| 495 | const size_t lineLen = lineEnd - lineStart; | ||
| 496 | |||
| 497 | FriBidiChar* visual = (FriBidiChar*) malloc((lineLen + 1) * sizeof(FriBidiChar)); | ||
| 498 | if (visual == NULL) | ||
| 499 | { | ||
| 500 | free(visual); | ||
| 501 | CLog::Log(LOGFATAL, "%s: can't allocate memory", __FUNCTION__); | ||
| 502 | return false; | ||
| 503 | } | ||
| 504 | |||
| 505 | bool bidiFailed = false; | ||
| 506 | FriBidiCharType baseCopy = base; // preserve same value for all lines, required because fribidi_log2vis will modify parameter value | ||
| 507 | if (fribidi_log2vis(reinterpret_cast<const FriBidiChar*>(stringSrc.c_str() + lineStart), | ||
| 508 | lineLen, &baseCopy, visual, nullptr, | ||
| 509 | !visualToLogicalMap ? nullptr : visualToLogicalMap + lineStart, nullptr)) | ||
| 510 | { | ||
| 511 | // Removes bidirectional marks | ||
| 512 | const int newLen = fribidi_remove_bidi_marks( | ||
| 513 | visual, lineLen, nullptr, !visualToLogicalMap ? nullptr : visualToLogicalMap + lineStart, | ||
| 514 | nullptr); | ||
| 515 | if (newLen > 0) | ||
| 516 | stringDst.append((const char32_t*)visual, (size_t)newLen); | ||
| 517 | else if (newLen < 0) | ||
| 518 | bidiFailed = failOnBadString; | ||
| 519 | } | ||
| 520 | else | ||
| 521 | bidiFailed = failOnBadString; | ||
| 522 | |||
| 523 | free(visual); | ||
| 524 | |||
| 525 | if (bidiFailed) | ||
| 526 | return false; | ||
| 527 | |||
| 528 | lineStart = lineEnd; | ||
| 529 | } while (lineStart < srcLen); | ||
| 530 | |||
| 531 | return !stringDst.empty(); | ||
| 532 | } | ||
| 533 | |||
| 534 | static struct SCharsetMapping | ||
| 535 | { | ||
| 536 | const char* charset; | ||
| 537 | const char* caption; | ||
| 538 | } g_charsets[] = { | ||
| 539 | { "ISO-8859-1", "Western Europe (ISO)" } | ||
| 540 | , { "ISO-8859-2", "Central Europe (ISO)" } | ||
| 541 | , { "ISO-8859-3", "South Europe (ISO)" } | ||
| 542 | , { "ISO-8859-4", "Baltic (ISO)" } | ||
| 543 | , { "ISO-8859-5", "Cyrillic (ISO)" } | ||
| 544 | , { "ISO-8859-6", "Arabic (ISO)" } | ||
| 545 | , { "ISO-8859-7", "Greek (ISO)" } | ||
| 546 | , { "ISO-8859-8", "Hebrew (ISO)" } | ||
| 547 | , { "ISO-8859-9", "Turkish (ISO)" } | ||
| 548 | , { "CP1250", "Central Europe (Windows)" } | ||
| 549 | , { "CP1251", "Cyrillic (Windows)" } | ||
| 550 | , { "CP1252", "Western Europe (Windows)" } | ||
| 551 | , { "CP1253", "Greek (Windows)" } | ||
| 552 | , { "CP1254", "Turkish (Windows)" } | ||
| 553 | , { "CP1255", "Hebrew (Windows)" } | ||
| 554 | , { "CP1256", "Arabic (Windows)" } | ||
| 555 | , { "CP1257", "Baltic (Windows)" } | ||
| 556 | , { "CP1258", "Vietnamese (Windows)" } | ||
| 557 | , { "CP874", "Thai (Windows)" } | ||
| 558 | , { "BIG5", "Chinese Traditional (Big5)" } | ||
| 559 | , { "GBK", "Chinese Simplified (GBK)" } | ||
| 560 | , { "SHIFT_JIS", "Japanese (Shift-JIS)" } | ||
| 561 | , { "CP949", "Korean" } | ||
| 562 | , { "BIG5-HKSCS", "Hong Kong (Big5-HKSCS)" } | ||
| 563 | , { NULL, NULL } | ||
| 564 | }; | ||
| 565 | |||
| 566 | CCharsetConverter::CCharsetConverter() = default; | ||
| 567 | |||
| 568 | void CCharsetConverter::OnSettingChanged(std::shared_ptr<const CSetting> setting) | ||
| 569 | { | ||
| 570 | if (setting == NULL) | ||
| 571 | return; | ||
| 572 | |||
| 573 | const std::string& settingId = setting->GetId(); | ||
| 574 | if (settingId == CSettings::SETTING_LOCALE_CHARSET) | ||
| 575 | resetUserCharset(); | ||
| 576 | else if (settingId == CSettings::SETTING_SUBTITLES_CHARSET) | ||
| 577 | resetSubtitleCharset(); | ||
| 578 | } | ||
| 579 | |||
| 580 | void CCharsetConverter::clear() | ||
| 581 | { | ||
| 582 | } | ||
| 583 | |||
| 584 | std::vector<std::string> CCharsetConverter::getCharsetLabels() | ||
| 585 | { | ||
| 586 | std::vector<std::string> lab; | ||
| 587 | for(SCharsetMapping* c = g_charsets; c->charset; c++) | ||
| 588 | lab.emplace_back(c->caption); | ||
| 589 | |||
| 590 | return lab; | ||
| 591 | } | ||
| 592 | |||
| 593 | std::string CCharsetConverter::getCharsetLabelByName(const std::string& charsetName) | ||
| 594 | { | ||
| 595 | for(SCharsetMapping* c = g_charsets; c->charset; c++) | ||
| 596 | { | ||
| 597 | if (StringUtils::EqualsNoCase(charsetName,c->charset)) | ||
| 598 | return c->caption; | ||
| 599 | } | ||
| 600 | |||
| 601 | return ""; | ||
| 602 | } | ||
| 603 | |||
| 604 | std::string CCharsetConverter::getCharsetNameByLabel(const std::string& charsetLabel) | ||
| 605 | { | ||
| 606 | for(SCharsetMapping* c = g_charsets; c->charset; c++) | ||
| 607 | { | ||
| 608 | if (StringUtils::EqualsNoCase(charsetLabel, c->caption)) | ||
| 609 | return c->charset; | ||
| 610 | } | ||
| 611 | |||
| 612 | return ""; | ||
| 613 | } | ||
| 614 | |||
| 615 | void CCharsetConverter::reset(void) | ||
| 616 | { | ||
| 617 | for (CConverterType& conversion : CInnerConverter::m_stdConversion) | ||
| 618 | conversion.Reset(); | ||
| 619 | } | ||
| 620 | |||
| 621 | void CCharsetConverter::resetSystemCharset(void) | ||
| 622 | { | ||
| 623 | CInnerConverter::m_stdConversion[Utf8ToSystem].Reset(); | ||
| 624 | CInnerConverter::m_stdConversion[SystemToUtf8].Reset(); | ||
| 625 | } | ||
| 626 | |||
| 627 | void CCharsetConverter::resetUserCharset(void) | ||
| 628 | { | ||
| 629 | CInnerConverter::m_stdConversion[UserCharsetToUtf8].Reset(); | ||
| 630 | CInnerConverter::m_stdConversion[UserCharsetToUtf8].Reset(); | ||
| 631 | CInnerConverter::m_stdConversion[Utf32ToUserCharset].Reset(); | ||
| 632 | resetSubtitleCharset(); | ||
| 633 | } | ||
| 634 | |||
| 635 | void CCharsetConverter::resetSubtitleCharset(void) | ||
| 636 | { | ||
| 637 | CInnerConverter::m_stdConversion[SubtitleCharsetToUtf8].Reset(); | ||
| 638 | } | ||
| 639 | |||
| 640 | void CCharsetConverter::reinitCharsetsFromSettings(void) | ||
| 641 | { | ||
| 642 | resetUserCharset(); // this will also reinit Subtitle charsets | ||
| 643 | } | ||
| 644 | |||
| 645 | bool CCharsetConverter::utf8ToUtf32(const std::string& utf8StringSrc, std::u32string& utf32StringDst, bool failOnBadChar /*= true*/) | ||
| 646 | { | ||
| 647 | return CInnerConverter::stdConvert(Utf8ToUtf32, utf8StringSrc, utf32StringDst, failOnBadChar); | ||
| 648 | } | ||
| 649 | |||
| 650 | std::u32string CCharsetConverter::utf8ToUtf32(const std::string& utf8StringSrc, bool failOnBadChar /*= true*/) | ||
| 651 | { | ||
| 652 | std::u32string converted; | ||
| 653 | utf8ToUtf32(utf8StringSrc, converted, failOnBadChar); | ||
| 654 | return converted; | ||
| 655 | } | ||
| 656 | |||
| 657 | bool CCharsetConverter::utf8ToUtf32Visual(const std::string& utf8StringSrc, std::u32string& utf32StringDst, bool bVisualBiDiFlip /*= false*/, bool forceLTRReadingOrder /*= false*/, bool failOnBadChar /*= false*/) | ||
| 658 | { | ||
| 659 | if (bVisualBiDiFlip) | ||
| 660 | { | ||
| 661 | std::u32string converted; | ||
| 662 | if (!CInnerConverter::stdConvert(Utf8ToUtf32, utf8StringSrc, converted, failOnBadChar)) | ||
| 663 | return false; | ||
| 664 | |||
| 665 | return CInnerConverter::logicalToVisualBiDi(converted, utf32StringDst, forceLTRReadingOrder ? FRIBIDI_TYPE_LTR : FRIBIDI_TYPE_PDF, failOnBadChar); | ||
| 666 | } | ||
| 667 | return CInnerConverter::stdConvert(Utf8ToUtf32, utf8StringSrc, utf32StringDst, failOnBadChar); | ||
| 668 | } | ||
| 669 | |||
| 670 | bool CCharsetConverter::utf32ToUtf8(const std::u32string& utf32StringSrc, std::string& utf8StringDst, bool failOnBadChar /*= true*/) | ||
| 671 | { | ||
| 672 | return CInnerConverter::stdConvert(Utf32ToUtf8, utf32StringSrc, utf8StringDst, failOnBadChar); | ||
| 673 | } | ||
| 674 | |||
| 675 | std::string CCharsetConverter::utf32ToUtf8(const std::u32string& utf32StringSrc, bool failOnBadChar /*= false*/) | ||
| 676 | { | ||
| 677 | std::string converted; | ||
| 678 | utf32ToUtf8(utf32StringSrc, converted, failOnBadChar); | ||
| 679 | return converted; | ||
| 680 | } | ||
| 681 | |||
| 682 | bool CCharsetConverter::utf32ToW(const std::u32string& utf32StringSrc, std::wstring& wStringDst, bool failOnBadChar /*= true*/) | ||
| 683 | { | ||
| 684 | #ifdef WCHAR_IS_UCS_4 | ||
| 685 | wStringDst.assign((const wchar_t*)utf32StringSrc.c_str(), utf32StringSrc.length()); | ||
| 686 | return true; | ||
| 687 | #else // !WCHAR_IS_UCS_4 | ||
| 688 | return CInnerConverter::stdConvert(Utf32ToW, utf32StringSrc, wStringDst, failOnBadChar); | ||
| 689 | #endif // !WCHAR_IS_UCS_4 | ||
| 690 | } | ||
| 691 | |||
| 692 | bool CCharsetConverter::utf32logicalToVisualBiDi(const std::u32string& logicalStringSrc, | ||
| 693 | std::u32string& visualStringDst, | ||
| 694 | bool forceLTRReadingOrder /*= false*/, | ||
| 695 | bool failOnBadString /*= false*/, | ||
| 696 | int* visualToLogicalMap /*= nullptr*/) | ||
| 697 | { | ||
| 698 | return CInnerConverter::logicalToVisualBiDi( | ||
| 699 | logicalStringSrc, visualStringDst, forceLTRReadingOrder ? FRIBIDI_TYPE_LTR : FRIBIDI_TYPE_PDF, | ||
| 700 | failOnBadString, visualToLogicalMap); | ||
| 701 | } | ||
| 702 | |||
| 703 | bool CCharsetConverter::wToUtf32(const std::wstring& wStringSrc, std::u32string& utf32StringDst, bool failOnBadChar /*= true*/) | ||
| 704 | { | ||
| 705 | #ifdef WCHAR_IS_UCS_4 | ||
| 706 | /* UCS-4 is almost equal to UTF-32, but UTF-32 has strict limits on possible values, while UCS-4 is usually unchecked. | ||
| 707 | * With this "conversion" we ensure that output will be valid UTF-32 string. */ | ||
| 708 | #endif | ||
| 709 | return CInnerConverter::stdConvert(WToUtf32, wStringSrc, utf32StringDst, failOnBadChar); | ||
| 710 | } | ||
| 711 | |||
| 712 | // The bVisualBiDiFlip forces a flip of characters for hebrew/arabic languages, only set to false if the flipping | ||
| 713 | // of the string is already made or the string is not displayed in the GUI | ||
| 714 | bool CCharsetConverter::utf8ToW(const std::string& utf8StringSrc, std::wstring& wStringDst, bool bVisualBiDiFlip /*= true*/, | ||
| 715 | bool forceLTRReadingOrder /*= false*/, bool failOnBadChar /*= false*/) | ||
| 716 | { | ||
| 717 | // Try to flip hebrew/arabic characters, if any | ||
| 718 | if (bVisualBiDiFlip) | ||
| 719 | { | ||
| 720 | wStringDst.clear(); | ||
| 721 | std::u32string utf32str; | ||
| 722 | if (!CInnerConverter::stdConvert(Utf8ToUtf32, utf8StringSrc, utf32str, failOnBadChar)) | ||
| 723 | return false; | ||
| 724 | |||
| 725 | std::u32string utf32flipped; | ||
| 726 | const bool bidiResult = CInnerConverter::logicalToVisualBiDi(utf32str, utf32flipped, forceLTRReadingOrder ? FRIBIDI_TYPE_LTR : FRIBIDI_TYPE_PDF, failOnBadChar); | ||
| 727 | |||
| 728 | return CInnerConverter::stdConvert(Utf32ToW, utf32flipped, wStringDst, failOnBadChar) && bidiResult; | ||
| 729 | } | ||
| 730 | |||
| 731 | return CInnerConverter::stdConvert(Utf8toW, utf8StringSrc, wStringDst, failOnBadChar); | ||
| 732 | } | ||
| 733 | |||
| 734 | bool CCharsetConverter::subtitleCharsetToUtf8(const std::string& stringSrc, std::string& utf8StringDst) | ||
| 735 | { | ||
| 736 | return CInnerConverter::stdConvert(SubtitleCharsetToUtf8, stringSrc, utf8StringDst, false); | ||
| 737 | } | ||
| 738 | |||
| 739 | bool CCharsetConverter::fromW(const std::wstring& wStringSrc, | ||
| 740 | std::string& stringDst, const std::string& enc) | ||
| 741 | { | ||
| 742 | return CInnerConverter::customConvert(WCHAR_CHARSET, enc, wStringSrc, stringDst); | ||
| 743 | } | ||
| 744 | |||
| 745 | bool CCharsetConverter::toW(const std::string& stringSrc, | ||
| 746 | std::wstring& wStringDst, const std::string& enc) | ||
| 747 | { | ||
| 748 | return CInnerConverter::customConvert(enc, WCHAR_CHARSET, stringSrc, wStringDst); | ||
| 749 | } | ||
| 750 | |||
| 751 | bool CCharsetConverter::utf8ToStringCharset(const std::string& utf8StringSrc, std::string& stringDst) | ||
| 752 | { | ||
| 753 | return CInnerConverter::stdConvert(Utf8ToUserCharset, utf8StringSrc, stringDst); | ||
| 754 | } | ||
| 755 | |||
| 756 | bool CCharsetConverter::utf8ToStringCharset(std::string& stringSrcDst) | ||
| 757 | { | ||
| 758 | std::string strSrc(stringSrcDst); | ||
| 759 | return utf8ToStringCharset(strSrc, stringSrcDst); | ||
| 760 | } | ||
| 761 | |||
| 762 | bool CCharsetConverter::ToUtf8(const std::string& strSourceCharset, const std::string& stringSrc, std::string& utf8StringDst, bool failOnBadChar /*= false*/) | ||
| 763 | { | ||
| 764 | if (strSourceCharset == "UTF-8") | ||
| 765 | { // simple case - no conversion necessary | ||
| 766 | utf8StringDst = stringSrc; | ||
| 767 | return true; | ||
| 768 | } | ||
| 769 | |||
| 770 | return CInnerConverter::customConvert(strSourceCharset, "UTF-8", stringSrc, utf8StringDst, failOnBadChar); | ||
| 771 | } | ||
| 772 | |||
| 773 | bool CCharsetConverter::utf8To(const std::string& strDestCharset, const std::string& utf8StringSrc, std::string& stringDst) | ||
| 774 | { | ||
| 775 | if (strDestCharset == "UTF-8") | ||
| 776 | { // simple case - no conversion necessary | ||
| 777 | stringDst = utf8StringSrc; | ||
| 778 | return true; | ||
| 779 | } | ||
| 780 | |||
| 781 | return CInnerConverter::customConvert(UTF8_SOURCE, strDestCharset, utf8StringSrc, stringDst); | ||
| 782 | } | ||
| 783 | |||
| 784 | bool CCharsetConverter::utf8To(const std::string& strDestCharset, const std::string& utf8StringSrc, std::u16string& utf16StringDst) | ||
| 785 | { | ||
| 786 | return CInnerConverter::customConvert(UTF8_SOURCE, strDestCharset, utf8StringSrc, utf16StringDst); | ||
| 787 | } | ||
| 788 | |||
| 789 | bool CCharsetConverter::utf8To(const std::string& strDestCharset, const std::string& utf8StringSrc, std::u32string& utf32StringDst) | ||
| 790 | { | ||
| 791 | return CInnerConverter::customConvert(UTF8_SOURCE, strDestCharset, utf8StringSrc, utf32StringDst); | ||
| 792 | } | ||
| 793 | |||
| 794 | bool CCharsetConverter::unknownToUTF8(std::string& stringSrcDst) | ||
| 795 | { | ||
| 796 | std::string source(stringSrcDst); | ||
| 797 | return unknownToUTF8(source, stringSrcDst); | ||
| 798 | } | ||
| 799 | |||
| 800 | bool CCharsetConverter::unknownToUTF8(const std::string& stringSrc, std::string& utf8StringDst, bool failOnBadChar /*= false*/) | ||
| 801 | { | ||
| 802 | // checks whether it's utf8 already, and if not converts using the sourceCharset if given, else the string charset | ||
| 803 | if (CUtf8Utils::isValidUtf8(stringSrc)) | ||
| 804 | { | ||
| 805 | utf8StringDst = stringSrc; | ||
| 806 | return true; | ||
| 807 | } | ||
| 808 | return CInnerConverter::stdConvert(UserCharsetToUtf8, stringSrc, utf8StringDst, failOnBadChar); | ||
| 809 | } | ||
| 810 | |||
| 811 | bool CCharsetConverter::wToUTF8(const std::wstring& wStringSrc, std::string& utf8StringDst, bool failOnBadChar /*= false*/) | ||
| 812 | { | ||
| 813 | return CInnerConverter::stdConvert(WtoUtf8, wStringSrc, utf8StringDst, failOnBadChar); | ||
| 814 | } | ||
| 815 | |||
| 816 | bool CCharsetConverter::utf16BEtoUTF8(const std::u16string& utf16StringSrc, std::string& utf8StringDst) | ||
| 817 | { | ||
| 818 | return CInnerConverter::stdConvert(Utf16BEtoUtf8, utf16StringSrc, utf8StringDst); | ||
| 819 | } | ||
| 820 | |||
| 821 | bool CCharsetConverter::utf16LEtoUTF8(const std::u16string& utf16StringSrc, | ||
| 822 | std::string& utf8StringDst) | ||
| 823 | { | ||
| 824 | return CInnerConverter::stdConvert(Utf16LEtoUtf8, utf16StringSrc, utf8StringDst); | ||
| 825 | } | ||
| 826 | |||
| 827 | bool CCharsetConverter::ucs2ToUTF8(const std::u16string& ucs2StringSrc, std::string& utf8StringDst) | ||
| 828 | { | ||
| 829 | return CInnerConverter::stdConvert(Ucs2CharsetToUtf8, ucs2StringSrc,utf8StringDst); | ||
| 830 | } | ||
| 831 | |||
| 832 | bool CCharsetConverter::utf16LEtoW(const std::u16string& utf16String, std::wstring& wString) | ||
| 833 | { | ||
| 834 | return CInnerConverter::stdConvert(Utf16LEtoW, utf16String, wString); | ||
| 835 | } | ||
| 836 | |||
| 837 | bool CCharsetConverter::utf32ToStringCharset(const std::u32string& utf32StringSrc, std::string& stringDst) | ||
| 838 | { | ||
| 839 | return CInnerConverter::stdConvert(Utf32ToUserCharset, utf32StringSrc, stringDst); | ||
| 840 | } | ||
| 841 | |||
| 842 | bool CCharsetConverter::utf8ToSystem(std::string& stringSrcDst, bool failOnBadChar /*= false*/) | ||
| 843 | { | ||
| 844 | std::string strSrc(stringSrcDst); | ||
| 845 | return CInnerConverter::stdConvert(Utf8ToSystem, strSrc, stringSrcDst, failOnBadChar); | ||
| 846 | } | ||
| 847 | |||
| 848 | bool CCharsetConverter::systemToUtf8(const std::string& sysStringSrc, std::string& utf8StringDst, bool failOnBadChar /*= false*/) | ||
| 849 | { | ||
| 850 | return CInnerConverter::stdConvert(SystemToUtf8, sysStringSrc, utf8StringDst, failOnBadChar); | ||
| 851 | } | ||
| 852 | |||
| 853 | bool CCharsetConverter::utf8logicalToVisualBiDi(const std::string& utf8StringSrc, std::string& utf8StringDst, bool failOnBadString /*= false*/) | ||
| 854 | { | ||
| 855 | utf8StringDst.clear(); | ||
| 856 | std::u32string utf32flipped; | ||
| 857 | if (!utf8ToUtf32Visual(utf8StringSrc, utf32flipped, true, true, failOnBadString)) | ||
| 858 | return false; | ||
| 859 | |||
| 860 | return CInnerConverter::stdConvert(Utf32ToUtf8, utf32flipped, utf8StringDst, failOnBadString); | ||
| 861 | } | ||
| 862 | |||
| 863 | void CCharsetConverter::SettingOptionsCharsetsFiller(SettingConstPtr setting, std::vector<StringSettingOption>& list, std::string& current, void *data) | ||
| 864 | { | ||
| 865 | std::vector<std::string> vecCharsets = g_charsetConverter.getCharsetLabels(); | ||
| 866 | sort(vecCharsets.begin(), vecCharsets.end(), sortstringbyname()); | ||
| 867 | |||
| 868 | list.emplace_back(g_localizeStrings.Get(13278), "DEFAULT"); // "Default" | ||
| 869 | for (int i = 0; i < (int) vecCharsets.size(); ++i) | ||
| 870 | list.emplace_back(vecCharsets[i], g_charsetConverter.getCharsetNameByLabel(vecCharsets[i])); | ||
| 871 | } | ||
