summaryrefslogtreecommitdiffstats
path: root/xbmc/utils/StringUtils.cpp
diff options
context:
space:
mode:
authormanuel <manuel@mausz.at>2020-10-19 00:52:24 +0200
committermanuel <manuel@mausz.at>2020-10-19 00:52:24 +0200
commitbe933ef2241d79558f91796cc5b3a161f72ebf9c (patch)
treefe3ab2f130e20c99001f2d7a81d610c78c96a3f4 /xbmc/utils/StringUtils.cpp
parent5f8335c1e49ce108ef3481863833c98efa00411b (diff)
downloadkodi-pvr-build-be933ef2241d79558f91796cc5b3a161f72ebf9c.tar.gz
kodi-pvr-build-be933ef2241d79558f91796cc5b3a161f72ebf9c.tar.bz2
kodi-pvr-build-be933ef2241d79558f91796cc5b3a161f72ebf9c.zip
sync with upstream
Diffstat (limited to 'xbmc/utils/StringUtils.cpp')
-rw-r--r--xbmc/utils/StringUtils.cpp1808
1 files changed, 1808 insertions, 0 deletions
diff --git a/xbmc/utils/StringUtils.cpp b/xbmc/utils/StringUtils.cpp
new file mode 100644
index 0000000..4195b18
--- /dev/null
+++ b/xbmc/utils/StringUtils.cpp
@@ -0,0 +1,1808 @@
1/*
2 * Copyright (C) 2005-2018 Team Kodi
3 * This file is part of Kodi - https://kodi.tv
4 *
5 * SPDX-License-Identifier: GPL-2.0-or-later
6 * See LICENSES/README.md for more information.
7 */
8//-----------------------------------------------------------------------
9//
10// File: StringUtils.cpp
11//
12// Purpose: ATL split string utility
13// Author: Paul J. Weiss
14//
15// Modified to use J O'Leary's std::string class by kraqh3d
16//
17//------------------------------------------------------------------------
18
19#ifdef HAVE_NEW_CROSSGUID
20#include <guid.hpp>
21#else
22#include <guid.h>
23#endif
24
25#if defined(TARGET_ANDROID)
26#include <androidjni/JNIThreading.h>
27#endif
28
29#include "CharsetConverter.h"
30#include "LangInfo.h"
31#include "StringUtils.h"
32#include "Util.h"
33
34#include <algorithm>
35#include <array>
36#include <assert.h>
37#include <functional>
38#include <inttypes.h>
39#include <iomanip>
40#include <math.h>
41#include <stdio.h>
42#include <stdlib.h>
43#include <string.h>
44#include <time.h>
45
46#include <fstrcmp.h>
47#include <memory.h>
48
49// don't move or std functions end up in PCRE namespace
50// clang-format off
51#include "utils/RegExp.h"
52// clang-format on
53
54#define FORMAT_BLOCK_SIZE 512 // # of bytes for initial allocation for printf
55
56static constexpr const char* ADDON_GUID_RE = "^(\\{){0,1}[0-9a-fA-F]{8}\\-[0-9a-fA-F]{4}\\-[0-9a-fA-F]{4}\\-[0-9a-fA-F]{4}\\-[0-9a-fA-F]{12}(\\}){0,1}$";
57
58/* empty string for use in returns by ref */
59const std::string StringUtils::Empty = "";
60
61// Copyright (c) Leigh Brasington 2012. All rights reserved.
62// This code may be used and reproduced without written permission.
63// http://www.leighb.com/tounicupper.htm
64//
65// The tables were constructed from
66// http://publib.boulder.ibm.com/infocenter/iseries/v7r1m0/index.jsp?topic=%2Fnls%2Frbagslowtoupmaptable.htm
67
68static constexpr wchar_t unicode_lowers[] = {
69 (wchar_t)0x0061, (wchar_t)0x0062, (wchar_t)0x0063, (wchar_t)0x0064, (wchar_t)0x0065, (wchar_t)0x0066, (wchar_t)0x0067, (wchar_t)0x0068, (wchar_t)0x0069,
70 (wchar_t)0x006A, (wchar_t)0x006B, (wchar_t)0x006C, (wchar_t)0x006D, (wchar_t)0x006E, (wchar_t)0x006F, (wchar_t)0x0070, (wchar_t)0x0071, (wchar_t)0x0072,
71 (wchar_t)0x0073, (wchar_t)0x0074, (wchar_t)0x0075, (wchar_t)0x0076, (wchar_t)0x0077, (wchar_t)0x0078, (wchar_t)0x0079, (wchar_t)0x007A, (wchar_t)0x00E0,
72 (wchar_t)0x00E1, (wchar_t)0x00E2, (wchar_t)0x00E3, (wchar_t)0x00E4, (wchar_t)0x00E5, (wchar_t)0x00E6, (wchar_t)0x00E7, (wchar_t)0x00E8, (wchar_t)0x00E9,
73 (wchar_t)0x00EA, (wchar_t)0x00EB, (wchar_t)0x00EC, (wchar_t)0x00ED, (wchar_t)0x00EE, (wchar_t)0x00EF, (wchar_t)0x00F0, (wchar_t)0x00F1, (wchar_t)0x00F2,
74 (wchar_t)0x00F3, (wchar_t)0x00F4, (wchar_t)0x00F5, (wchar_t)0x00F6, (wchar_t)0x00F8, (wchar_t)0x00F9, (wchar_t)0x00FA, (wchar_t)0x00FB, (wchar_t)0x00FC,
75 (wchar_t)0x00FD, (wchar_t)0x00FE, (wchar_t)0x00FF, (wchar_t)0x0101, (wchar_t)0x0103, (wchar_t)0x0105, (wchar_t)0x0107, (wchar_t)0x0109, (wchar_t)0x010B,
76 (wchar_t)0x010D, (wchar_t)0x010F, (wchar_t)0x0111, (wchar_t)0x0113, (wchar_t)0x0115, (wchar_t)0x0117, (wchar_t)0x0119, (wchar_t)0x011B, (wchar_t)0x011D,
77 (wchar_t)0x011F, (wchar_t)0x0121, (wchar_t)0x0123, (wchar_t)0x0125, (wchar_t)0x0127, (wchar_t)0x0129, (wchar_t)0x012B, (wchar_t)0x012D, (wchar_t)0x012F,
78 (wchar_t)0x0131, (wchar_t)0x0133, (wchar_t)0x0135, (wchar_t)0x0137, (wchar_t)0x013A, (wchar_t)0x013C, (wchar_t)0x013E, (wchar_t)0x0140, (wchar_t)0x0142,
79 (wchar_t)0x0144, (wchar_t)0x0146, (wchar_t)0x0148, (wchar_t)0x014B, (wchar_t)0x014D, (wchar_t)0x014F, (wchar_t)0x0151, (wchar_t)0x0153, (wchar_t)0x0155,
80 (wchar_t)0x0157, (wchar_t)0x0159, (wchar_t)0x015B, (wchar_t)0x015D, (wchar_t)0x015F, (wchar_t)0x0161, (wchar_t)0x0163, (wchar_t)0x0165, (wchar_t)0x0167,
81 (wchar_t)0x0169, (wchar_t)0x016B, (wchar_t)0x016D, (wchar_t)0x016F, (wchar_t)0x0171, (wchar_t)0x0173, (wchar_t)0x0175, (wchar_t)0x0177, (wchar_t)0x017A,
82 (wchar_t)0x017C, (wchar_t)0x017E, (wchar_t)0x0183, (wchar_t)0x0185, (wchar_t)0x0188, (wchar_t)0x018C, (wchar_t)0x0192, (wchar_t)0x0199, (wchar_t)0x01A1,
83 (wchar_t)0x01A3, (wchar_t)0x01A5, (wchar_t)0x01A8, (wchar_t)0x01AD, (wchar_t)0x01B0, (wchar_t)0x01B4, (wchar_t)0x01B6, (wchar_t)0x01B9, (wchar_t)0x01BD,
84 (wchar_t)0x01C6, (wchar_t)0x01C9, (wchar_t)0x01CC, (wchar_t)0x01CE, (wchar_t)0x01D0, (wchar_t)0x01D2, (wchar_t)0x01D4, (wchar_t)0x01D6, (wchar_t)0x01D8,
85 (wchar_t)0x01DA, (wchar_t)0x01DC, (wchar_t)0x01DF, (wchar_t)0x01E1, (wchar_t)0x01E3, (wchar_t)0x01E5, (wchar_t)0x01E7, (wchar_t)0x01E9, (wchar_t)0x01EB,
86 (wchar_t)0x01ED, (wchar_t)0x01EF, (wchar_t)0x01F3, (wchar_t)0x01F5, (wchar_t)0x01FB, (wchar_t)0x01FD, (wchar_t)0x01FF, (wchar_t)0x0201, (wchar_t)0x0203,
87 (wchar_t)0x0205, (wchar_t)0x0207, (wchar_t)0x0209, (wchar_t)0x020B, (wchar_t)0x020D, (wchar_t)0x020F, (wchar_t)0x0211, (wchar_t)0x0213, (wchar_t)0x0215,
88 (wchar_t)0x0217, (wchar_t)0x0253, (wchar_t)0x0254, (wchar_t)0x0257, (wchar_t)0x0258, (wchar_t)0x0259, (wchar_t)0x025B, (wchar_t)0x0260, (wchar_t)0x0263,
89 (wchar_t)0x0268, (wchar_t)0x0269, (wchar_t)0x026F, (wchar_t)0x0272, (wchar_t)0x0275, (wchar_t)0x0283, (wchar_t)0x0288, (wchar_t)0x028A, (wchar_t)0x028B,
90 (wchar_t)0x0292, (wchar_t)0x03AC, (wchar_t)0x03AD, (wchar_t)0x03AE, (wchar_t)0x03AF, (wchar_t)0x03B1, (wchar_t)0x03B2, (wchar_t)0x03B3, (wchar_t)0x03B4,
91 (wchar_t)0x03B5, (wchar_t)0x03B6, (wchar_t)0x03B7, (wchar_t)0x03B8, (wchar_t)0x03B9, (wchar_t)0x03BA, (wchar_t)0x03BB, (wchar_t)0x03BC, (wchar_t)0x03BD,
92 (wchar_t)0x03BE, (wchar_t)0x03BF, (wchar_t)0x03C0, (wchar_t)0x03C1, (wchar_t)0x03C3, (wchar_t)0x03C4, (wchar_t)0x03C5, (wchar_t)0x03C6, (wchar_t)0x03C7,
93 (wchar_t)0x03C8, (wchar_t)0x03C9, (wchar_t)0x03CA, (wchar_t)0x03CB, (wchar_t)0x03CC, (wchar_t)0x03CD, (wchar_t)0x03CE, (wchar_t)0x03E3, (wchar_t)0x03E5,
94 (wchar_t)0x03E7, (wchar_t)0x03E9, (wchar_t)0x03EB, (wchar_t)0x03ED, (wchar_t)0x03EF, (wchar_t)0x0430, (wchar_t)0x0431, (wchar_t)0x0432, (wchar_t)0x0433,
95 (wchar_t)0x0434, (wchar_t)0x0435, (wchar_t)0x0436, (wchar_t)0x0437, (wchar_t)0x0438, (wchar_t)0x0439, (wchar_t)0x043A, (wchar_t)0x043B, (wchar_t)0x043C,
96 (wchar_t)0x043D, (wchar_t)0x043E, (wchar_t)0x043F, (wchar_t)0x0440, (wchar_t)0x0441, (wchar_t)0x0442, (wchar_t)0x0443, (wchar_t)0x0444, (wchar_t)0x0445,
97 (wchar_t)0x0446, (wchar_t)0x0447, (wchar_t)0x0448, (wchar_t)0x0449, (wchar_t)0x044A, (wchar_t)0x044B, (wchar_t)0x044C, (wchar_t)0x044D, (wchar_t)0x044E,
98 (wchar_t)0x044F, (wchar_t)0x0451, (wchar_t)0x0452, (wchar_t)0x0453, (wchar_t)0x0454, (wchar_t)0x0455, (wchar_t)0x0456, (wchar_t)0x0457, (wchar_t)0x0458,
99 (wchar_t)0x0459, (wchar_t)0x045A, (wchar_t)0x045B, (wchar_t)0x045C, (wchar_t)0x045E, (wchar_t)0x045F, (wchar_t)0x0461, (wchar_t)0x0463, (wchar_t)0x0465,
100 (wchar_t)0x0467, (wchar_t)0x0469, (wchar_t)0x046B, (wchar_t)0x046D, (wchar_t)0x046F, (wchar_t)0x0471, (wchar_t)0x0473, (wchar_t)0x0475, (wchar_t)0x0477,
101 (wchar_t)0x0479, (wchar_t)0x047B, (wchar_t)0x047D, (wchar_t)0x047F, (wchar_t)0x0481, (wchar_t)0x0491, (wchar_t)0x0493, (wchar_t)0x0495, (wchar_t)0x0497,
102 (wchar_t)0x0499, (wchar_t)0x049B, (wchar_t)0x049D, (wchar_t)0x049F, (wchar_t)0x04A1, (wchar_t)0x04A3, (wchar_t)0x04A5, (wchar_t)0x04A7, (wchar_t)0x04A9,
103 (wchar_t)0x04AB, (wchar_t)0x04AD, (wchar_t)0x04AF, (wchar_t)0x04B1, (wchar_t)0x04B3, (wchar_t)0x04B5, (wchar_t)0x04B7, (wchar_t)0x04B9, (wchar_t)0x04BB,
104 (wchar_t)0x04BD, (wchar_t)0x04BF, (wchar_t)0x04C2, (wchar_t)0x04C4, (wchar_t)0x04C8, (wchar_t)0x04CC, (wchar_t)0x04D1, (wchar_t)0x04D3, (wchar_t)0x04D5,
105 (wchar_t)0x04D7, (wchar_t)0x04D9, (wchar_t)0x04DB, (wchar_t)0x04DD, (wchar_t)0x04DF, (wchar_t)0x04E1, (wchar_t)0x04E3, (wchar_t)0x04E5, (wchar_t)0x04E7,
106 (wchar_t)0x04E9, (wchar_t)0x04EB, (wchar_t)0x04EF, (wchar_t)0x04F1, (wchar_t)0x04F3, (wchar_t)0x04F5, (wchar_t)0x04F9, (wchar_t)0x0561, (wchar_t)0x0562,
107 (wchar_t)0x0563, (wchar_t)0x0564, (wchar_t)0x0565, (wchar_t)0x0566, (wchar_t)0x0567, (wchar_t)0x0568, (wchar_t)0x0569, (wchar_t)0x056A, (wchar_t)0x056B,
108 (wchar_t)0x056C, (wchar_t)0x056D, (wchar_t)0x056E, (wchar_t)0x056F, (wchar_t)0x0570, (wchar_t)0x0571, (wchar_t)0x0572, (wchar_t)0x0573, (wchar_t)0x0574,
109 (wchar_t)0x0575, (wchar_t)0x0576, (wchar_t)0x0577, (wchar_t)0x0578, (wchar_t)0x0579, (wchar_t)0x057A, (wchar_t)0x057B, (wchar_t)0x057C, (wchar_t)0x057D,
110 (wchar_t)0x057E, (wchar_t)0x057F, (wchar_t)0x0580, (wchar_t)0x0581, (wchar_t)0x0582, (wchar_t)0x0583, (wchar_t)0x0584, (wchar_t)0x0585, (wchar_t)0x0586,
111 (wchar_t)0x10D0, (wchar_t)0x10D1, (wchar_t)0x10D2, (wchar_t)0x10D3, (wchar_t)0x10D4, (wchar_t)0x10D5, (wchar_t)0x10D6, (wchar_t)0x10D7, (wchar_t)0x10D8,
112 (wchar_t)0x10D9, (wchar_t)0x10DA, (wchar_t)0x10DB, (wchar_t)0x10DC, (wchar_t)0x10DD, (wchar_t)0x10DE, (wchar_t)0x10DF, (wchar_t)0x10E0, (wchar_t)0x10E1,
113 (wchar_t)0x10E2, (wchar_t)0x10E3, (wchar_t)0x10E4, (wchar_t)0x10E5, (wchar_t)0x10E6, (wchar_t)0x10E7, (wchar_t)0x10E8, (wchar_t)0x10E9, (wchar_t)0x10EA,
114 (wchar_t)0x10EB, (wchar_t)0x10EC, (wchar_t)0x10ED, (wchar_t)0x10EE, (wchar_t)0x10EF, (wchar_t)0x10F0, (wchar_t)0x10F1, (wchar_t)0x10F2, (wchar_t)0x10F3,
115 (wchar_t)0x10F4, (wchar_t)0x10F5, (wchar_t)0x1E01, (wchar_t)0x1E03, (wchar_t)0x1E05, (wchar_t)0x1E07, (wchar_t)0x1E09, (wchar_t)0x1E0B, (wchar_t)0x1E0D,
116 (wchar_t)0x1E0F, (wchar_t)0x1E11, (wchar_t)0x1E13, (wchar_t)0x1E15, (wchar_t)0x1E17, (wchar_t)0x1E19, (wchar_t)0x1E1B, (wchar_t)0x1E1D, (wchar_t)0x1E1F,
117 (wchar_t)0x1E21, (wchar_t)0x1E23, (wchar_t)0x1E25, (wchar_t)0x1E27, (wchar_t)0x1E29, (wchar_t)0x1E2B, (wchar_t)0x1E2D, (wchar_t)0x1E2F, (wchar_t)0x1E31,
118 (wchar_t)0x1E33, (wchar_t)0x1E35, (wchar_t)0x1E37, (wchar_t)0x1E39, (wchar_t)0x1E3B, (wchar_t)0x1E3D, (wchar_t)0x1E3F, (wchar_t)0x1E41, (wchar_t)0x1E43,
119 (wchar_t)0x1E45, (wchar_t)0x1E47, (wchar_t)0x1E49, (wchar_t)0x1E4B, (wchar_t)0x1E4D, (wchar_t)0x1E4F, (wchar_t)0x1E51, (wchar_t)0x1E53, (wchar_t)0x1E55,
120 (wchar_t)0x1E57, (wchar_t)0x1E59, (wchar_t)0x1E5B, (wchar_t)0x1E5D, (wchar_t)0x1E5F, (wchar_t)0x1E61, (wchar_t)0x1E63, (wchar_t)0x1E65, (wchar_t)0x1E67,
121 (wchar_t)0x1E69, (wchar_t)0x1E6B, (wchar_t)0x1E6D, (wchar_t)0x1E6F, (wchar_t)0x1E71, (wchar_t)0x1E73, (wchar_t)0x1E75, (wchar_t)0x1E77, (wchar_t)0x1E79,
122 (wchar_t)0x1E7B, (wchar_t)0x1E7D, (wchar_t)0x1E7F, (wchar_t)0x1E81, (wchar_t)0x1E83, (wchar_t)0x1E85, (wchar_t)0x1E87, (wchar_t)0x1E89, (wchar_t)0x1E8B,
123 (wchar_t)0x1E8D, (wchar_t)0x1E8F, (wchar_t)0x1E91, (wchar_t)0x1E93, (wchar_t)0x1E95, (wchar_t)0x1EA1, (wchar_t)0x1EA3, (wchar_t)0x1EA5, (wchar_t)0x1EA7,
124 (wchar_t)0x1EA9, (wchar_t)0x1EAB, (wchar_t)0x1EAD, (wchar_t)0x1EAF, (wchar_t)0x1EB1, (wchar_t)0x1EB3, (wchar_t)0x1EB5, (wchar_t)0x1EB7, (wchar_t)0x1EB9,
125 (wchar_t)0x1EBB, (wchar_t)0x1EBD, (wchar_t)0x1EBF, (wchar_t)0x1EC1, (wchar_t)0x1EC3, (wchar_t)0x1EC5, (wchar_t)0x1EC7, (wchar_t)0x1EC9, (wchar_t)0x1ECB,
126 (wchar_t)0x1ECD, (wchar_t)0x1ECF, (wchar_t)0x1ED1, (wchar_t)0x1ED3, (wchar_t)0x1ED5, (wchar_t)0x1ED7, (wchar_t)0x1ED9, (wchar_t)0x1EDB, (wchar_t)0x1EDD,
127 (wchar_t)0x1EDF, (wchar_t)0x1EE1, (wchar_t)0x1EE3, (wchar_t)0x1EE5, (wchar_t)0x1EE7, (wchar_t)0x1EE9, (wchar_t)0x1EEB, (wchar_t)0x1EED, (wchar_t)0x1EEF,
128 (wchar_t)0x1EF1, (wchar_t)0x1EF3, (wchar_t)0x1EF5, (wchar_t)0x1EF7, (wchar_t)0x1EF9, (wchar_t)0x1F00, (wchar_t)0x1F01, (wchar_t)0x1F02, (wchar_t)0x1F03,
129 (wchar_t)0x1F04, (wchar_t)0x1F05, (wchar_t)0x1F06, (wchar_t)0x1F07, (wchar_t)0x1F10, (wchar_t)0x1F11, (wchar_t)0x1F12, (wchar_t)0x1F13, (wchar_t)0x1F14,
130 (wchar_t)0x1F15, (wchar_t)0x1F20, (wchar_t)0x1F21, (wchar_t)0x1F22, (wchar_t)0x1F23, (wchar_t)0x1F24, (wchar_t)0x1F25, (wchar_t)0x1F26, (wchar_t)0x1F27,
131 (wchar_t)0x1F30, (wchar_t)0x1F31, (wchar_t)0x1F32, (wchar_t)0x1F33, (wchar_t)0x1F34, (wchar_t)0x1F35, (wchar_t)0x1F36, (wchar_t)0x1F37, (wchar_t)0x1F40,
132 (wchar_t)0x1F41, (wchar_t)0x1F42, (wchar_t)0x1F43, (wchar_t)0x1F44, (wchar_t)0x1F45, (wchar_t)0x1F51, (wchar_t)0x1F53, (wchar_t)0x1F55, (wchar_t)0x1F57,
133 (wchar_t)0x1F60, (wchar_t)0x1F61, (wchar_t)0x1F62, (wchar_t)0x1F63, (wchar_t)0x1F64, (wchar_t)0x1F65, (wchar_t)0x1F66, (wchar_t)0x1F67, (wchar_t)0x1F80,
134 (wchar_t)0x1F81, (wchar_t)0x1F82, (wchar_t)0x1F83, (wchar_t)0x1F84, (wchar_t)0x1F85, (wchar_t)0x1F86, (wchar_t)0x1F87, (wchar_t)0x1F90, (wchar_t)0x1F91,
135 (wchar_t)0x1F92, (wchar_t)0x1F93, (wchar_t)0x1F94, (wchar_t)0x1F95, (wchar_t)0x1F96, (wchar_t)0x1F97, (wchar_t)0x1FA0, (wchar_t)0x1FA1, (wchar_t)0x1FA2,
136 (wchar_t)0x1FA3, (wchar_t)0x1FA4, (wchar_t)0x1FA5, (wchar_t)0x1FA6, (wchar_t)0x1FA7, (wchar_t)0x1FB0, (wchar_t)0x1FB1, (wchar_t)0x1FD0, (wchar_t)0x1FD1,
137 (wchar_t)0x1FE0, (wchar_t)0x1FE1, (wchar_t)0x24D0, (wchar_t)0x24D1, (wchar_t)0x24D2, (wchar_t)0x24D3, (wchar_t)0x24D4, (wchar_t)0x24D5, (wchar_t)0x24D6,
138 (wchar_t)0x24D7, (wchar_t)0x24D8, (wchar_t)0x24D9, (wchar_t)0x24DA, (wchar_t)0x24DB, (wchar_t)0x24DC, (wchar_t)0x24DD, (wchar_t)0x24DE, (wchar_t)0x24DF,
139 (wchar_t)0x24E0, (wchar_t)0x24E1, (wchar_t)0x24E2, (wchar_t)0x24E3, (wchar_t)0x24E4, (wchar_t)0x24E5, (wchar_t)0x24E6, (wchar_t)0x24E7, (wchar_t)0x24E8,
140 (wchar_t)0x24E9, (wchar_t)0xFF41, (wchar_t)0xFF42, (wchar_t)0xFF43, (wchar_t)0xFF44, (wchar_t)0xFF45, (wchar_t)0xFF46, (wchar_t)0xFF47, (wchar_t)0xFF48,
141 (wchar_t)0xFF49, (wchar_t)0xFF4A, (wchar_t)0xFF4B, (wchar_t)0xFF4C, (wchar_t)0xFF4D, (wchar_t)0xFF4E, (wchar_t)0xFF4F, (wchar_t)0xFF50, (wchar_t)0xFF51,
142 (wchar_t)0xFF52, (wchar_t)0xFF53, (wchar_t)0xFF54, (wchar_t)0xFF55, (wchar_t)0xFF56, (wchar_t)0xFF57, (wchar_t)0xFF58, (wchar_t)0xFF59, (wchar_t)0xFF5A
143};
144
145static const wchar_t unicode_uppers[] = {
146 (wchar_t)0x0041, (wchar_t)0x0042, (wchar_t)0x0043, (wchar_t)0x0044, (wchar_t)0x0045, (wchar_t)0x0046, (wchar_t)0x0047, (wchar_t)0x0048, (wchar_t)0x0049,
147 (wchar_t)0x004A, (wchar_t)0x004B, (wchar_t)0x004C, (wchar_t)0x004D, (wchar_t)0x004E, (wchar_t)0x004F, (wchar_t)0x0050, (wchar_t)0x0051, (wchar_t)0x0052,
148 (wchar_t)0x0053, (wchar_t)0x0054, (wchar_t)0x0055, (wchar_t)0x0056, (wchar_t)0x0057, (wchar_t)0x0058, (wchar_t)0x0059, (wchar_t)0x005A, (wchar_t)0x00C0,
149 (wchar_t)0x00C1, (wchar_t)0x00C2, (wchar_t)0x00C3, (wchar_t)0x00C4, (wchar_t)0x00C5, (wchar_t)0x00C6, (wchar_t)0x00C7, (wchar_t)0x00C8, (wchar_t)0x00C9,
150 (wchar_t)0x00CA, (wchar_t)0x00CB, (wchar_t)0x00CC, (wchar_t)0x00CD, (wchar_t)0x00CE, (wchar_t)0x00CF, (wchar_t)0x00D0, (wchar_t)0x00D1, (wchar_t)0x00D2,
151 (wchar_t)0x00D3, (wchar_t)0x00D4, (wchar_t)0x00D5, (wchar_t)0x00D6, (wchar_t)0x00D8, (wchar_t)0x00D9, (wchar_t)0x00DA, (wchar_t)0x00DB, (wchar_t)0x00DC,
152 (wchar_t)0x00DD, (wchar_t)0x00DE, (wchar_t)0x0178, (wchar_t)0x0100, (wchar_t)0x0102, (wchar_t)0x0104, (wchar_t)0x0106, (wchar_t)0x0108, (wchar_t)0x010A,
153 (wchar_t)0x010C, (wchar_t)0x010E, (wchar_t)0x0110, (wchar_t)0x0112, (wchar_t)0x0114, (wchar_t)0x0116, (wchar_t)0x0118, (wchar_t)0x011A, (wchar_t)0x011C,
154 (wchar_t)0x011E, (wchar_t)0x0120, (wchar_t)0x0122, (wchar_t)0x0124, (wchar_t)0x0126, (wchar_t)0x0128, (wchar_t)0x012A, (wchar_t)0x012C, (wchar_t)0x012E,
155 (wchar_t)0x0049, (wchar_t)0x0132, (wchar_t)0x0134, (wchar_t)0x0136, (wchar_t)0x0139, (wchar_t)0x013B, (wchar_t)0x013D, (wchar_t)0x013F, (wchar_t)0x0141,
156 (wchar_t)0x0143, (wchar_t)0x0145, (wchar_t)0x0147, (wchar_t)0x014A, (wchar_t)0x014C, (wchar_t)0x014E, (wchar_t)0x0150, (wchar_t)0x0152, (wchar_t)0x0154,
157 (wchar_t)0x0156, (wchar_t)0x0158, (wchar_t)0x015A, (wchar_t)0x015C, (wchar_t)0x015E, (wchar_t)0x0160, (wchar_t)0x0162, (wchar_t)0x0164, (wchar_t)0x0166,
158 (wchar_t)0x0168, (wchar_t)0x016A, (wchar_t)0x016C, (wchar_t)0x016E, (wchar_t)0x0170, (wchar_t)0x0172, (wchar_t)0x0174, (wchar_t)0x0176, (wchar_t)0x0179,
159 (wchar_t)0x017B, (wchar_t)0x017D, (wchar_t)0x0182, (wchar_t)0x0184, (wchar_t)0x0187, (wchar_t)0x018B, (wchar_t)0x0191, (wchar_t)0x0198, (wchar_t)0x01A0,
160 (wchar_t)0x01A2, (wchar_t)0x01A4, (wchar_t)0x01A7, (wchar_t)0x01AC, (wchar_t)0x01AF, (wchar_t)0x01B3, (wchar_t)0x01B5, (wchar_t)0x01B8, (wchar_t)0x01BC,
161 (wchar_t)0x01C4, (wchar_t)0x01C7, (wchar_t)0x01CA, (wchar_t)0x01CD, (wchar_t)0x01CF, (wchar_t)0x01D1, (wchar_t)0x01D3, (wchar_t)0x01D5, (wchar_t)0x01D7,
162 (wchar_t)0x01D9, (wchar_t)0x01DB, (wchar_t)0x01DE, (wchar_t)0x01E0, (wchar_t)0x01E2, (wchar_t)0x01E4, (wchar_t)0x01E6, (wchar_t)0x01E8, (wchar_t)0x01EA,
163 (wchar_t)0x01EC, (wchar_t)0x01EE, (wchar_t)0x01F1, (wchar_t)0x01F4, (wchar_t)0x01FA, (wchar_t)0x01FC, (wchar_t)0x01FE, (wchar_t)0x0200, (wchar_t)0x0202,
164 (wchar_t)0x0204, (wchar_t)0x0206, (wchar_t)0x0208, (wchar_t)0x020A, (wchar_t)0x020C, (wchar_t)0x020E, (wchar_t)0x0210, (wchar_t)0x0212, (wchar_t)0x0214,
165 (wchar_t)0x0216, (wchar_t)0x0181, (wchar_t)0x0186, (wchar_t)0x018A, (wchar_t)0x018E, (wchar_t)0x018F, (wchar_t)0x0190, (wchar_t)0x0193, (wchar_t)0x0194,
166 (wchar_t)0x0197, (wchar_t)0x0196, (wchar_t)0x019C, (wchar_t)0x019D, (wchar_t)0x019F, (wchar_t)0x01A9, (wchar_t)0x01AE, (wchar_t)0x01B1, (wchar_t)0x01B2,
167 (wchar_t)0x01B7, (wchar_t)0x0386, (wchar_t)0x0388, (wchar_t)0x0389, (wchar_t)0x038A, (wchar_t)0x0391, (wchar_t)0x0392, (wchar_t)0x0393, (wchar_t)0x0394,
168 (wchar_t)0x0395, (wchar_t)0x0396, (wchar_t)0x0397, (wchar_t)0x0398, (wchar_t)0x0399, (wchar_t)0x039A, (wchar_t)0x039B, (wchar_t)0x039C, (wchar_t)0x039D,
169 (wchar_t)0x039E, (wchar_t)0x039F, (wchar_t)0x03A0, (wchar_t)0x03A1, (wchar_t)0x03A3, (wchar_t)0x03A4, (wchar_t)0x03A5, (wchar_t)0x03A6, (wchar_t)0x03A7,
170 (wchar_t)0x03A8, (wchar_t)0x03A9, (wchar_t)0x03AA, (wchar_t)0x03AB, (wchar_t)0x038C, (wchar_t)0x038E, (wchar_t)0x038F, (wchar_t)0x03E2, (wchar_t)0x03E4,
171 (wchar_t)0x03E6, (wchar_t)0x03E8, (wchar_t)0x03EA, (wchar_t)0x03EC, (wchar_t)0x03EE, (wchar_t)0x0410, (wchar_t)0x0411, (wchar_t)0x0412, (wchar_t)0x0413,
172 (wchar_t)0x0414, (wchar_t)0x0415, (wchar_t)0x0416, (wchar_t)0x0417, (wchar_t)0x0418, (wchar_t)0x0419, (wchar_t)0x041A, (wchar_t)0x041B, (wchar_t)0x041C,
173 (wchar_t)0x041D, (wchar_t)0x041E, (wchar_t)0x041F, (wchar_t)0x0420, (wchar_t)0x0421, (wchar_t)0x0422, (wchar_t)0x0423, (wchar_t)0x0424, (wchar_t)0x0425,
174 (wchar_t)0x0426, (wchar_t)0x0427, (wchar_t)0x0428, (wchar_t)0x0429, (wchar_t)0x042A, (wchar_t)0x042B, (wchar_t)0x042C, (wchar_t)0x042D, (wchar_t)0x042E,
175 (wchar_t)0x042F, (wchar_t)0x0401, (wchar_t)0x0402, (wchar_t)0x0403, (wchar_t)0x0404, (wchar_t)0x0405, (wchar_t)0x0406, (wchar_t)0x0407, (wchar_t)0x0408,
176 (wchar_t)0x0409, (wchar_t)0x040A, (wchar_t)0x040B, (wchar_t)0x040C, (wchar_t)0x040E, (wchar_t)0x040F, (wchar_t)0x0460, (wchar_t)0x0462, (wchar_t)0x0464,
177 (wchar_t)0x0466, (wchar_t)0x0468, (wchar_t)0x046A, (wchar_t)0x046C, (wchar_t)0x046E, (wchar_t)0x0470, (wchar_t)0x0472, (wchar_t)0x0474, (wchar_t)0x0476,
178 (wchar_t)0x0478, (wchar_t)0x047A, (wchar_t)0x047C, (wchar_t)0x047E, (wchar_t)0x0480, (wchar_t)0x0490, (wchar_t)0x0492, (wchar_t)0x0494, (wchar_t)0x0496,
179 (wchar_t)0x0498, (wchar_t)0x049A, (wchar_t)0x049C, (wchar_t)0x049E, (wchar_t)0x04A0, (wchar_t)0x04A2, (wchar_t)0x04A4, (wchar_t)0x04A6, (wchar_t)0x04A8,
180 (wchar_t)0x04AA, (wchar_t)0x04AC, (wchar_t)0x04AE, (wchar_t)0x04B0, (wchar_t)0x04B2, (wchar_t)0x04B4, (wchar_t)0x04B6, (wchar_t)0x04B8, (wchar_t)0x04BA,
181 (wchar_t)0x04BC, (wchar_t)0x04BE, (wchar_t)0x04C1, (wchar_t)0x04C3, (wchar_t)0x04C7, (wchar_t)0x04CB, (wchar_t)0x04D0, (wchar_t)0x04D2, (wchar_t)0x04D4,
182 (wchar_t)0x04D6, (wchar_t)0x04D8, (wchar_t)0x04DA, (wchar_t)0x04DC, (wchar_t)0x04DE, (wchar_t)0x04E0, (wchar_t)0x04E2, (wchar_t)0x04E4, (wchar_t)0x04E6,
183 (wchar_t)0x04E8, (wchar_t)0x04EA, (wchar_t)0x04EE, (wchar_t)0x04F0, (wchar_t)0x04F2, (wchar_t)0x04F4, (wchar_t)0x04F8, (wchar_t)0x0531, (wchar_t)0x0532,
184 (wchar_t)0x0533, (wchar_t)0x0534, (wchar_t)0x0535, (wchar_t)0x0536, (wchar_t)0x0537, (wchar_t)0x0538, (wchar_t)0x0539, (wchar_t)0x053A, (wchar_t)0x053B,
185 (wchar_t)0x053C, (wchar_t)0x053D, (wchar_t)0x053E, (wchar_t)0x053F, (wchar_t)0x0540, (wchar_t)0x0541, (wchar_t)0x0542, (wchar_t)0x0543, (wchar_t)0x0544,
186 (wchar_t)0x0545, (wchar_t)0x0546, (wchar_t)0x0547, (wchar_t)0x0548, (wchar_t)0x0549, (wchar_t)0x054A, (wchar_t)0x054B, (wchar_t)0x054C, (wchar_t)0x054D,
187 (wchar_t)0x054E, (wchar_t)0x054F, (wchar_t)0x0550, (wchar_t)0x0551, (wchar_t)0x0552, (wchar_t)0x0553, (wchar_t)0x0554, (wchar_t)0x0555, (wchar_t)0x0556,
188 (wchar_t)0x10A0, (wchar_t)0x10A1, (wchar_t)0x10A2, (wchar_t)0x10A3, (wchar_t)0x10A4, (wchar_t)0x10A5, (wchar_t)0x10A6, (wchar_t)0x10A7, (wchar_t)0x10A8,
189 (wchar_t)0x10A9, (wchar_t)0x10AA, (wchar_t)0x10AB, (wchar_t)0x10AC, (wchar_t)0x10AD, (wchar_t)0x10AE, (wchar_t)0x10AF, (wchar_t)0x10B0, (wchar_t)0x10B1,
190 (wchar_t)0x10B2, (wchar_t)0x10B3, (wchar_t)0x10B4, (wchar_t)0x10B5, (wchar_t)0x10B6, (wchar_t)0x10B7, (wchar_t)0x10B8, (wchar_t)0x10B9, (wchar_t)0x10BA,
191 (wchar_t)0x10BB, (wchar_t)0x10BC, (wchar_t)0x10BD, (wchar_t)0x10BE, (wchar_t)0x10BF, (wchar_t)0x10C0, (wchar_t)0x10C1, (wchar_t)0x10C2, (wchar_t)0x10C3,
192 (wchar_t)0x10C4, (wchar_t)0x10C5, (wchar_t)0x1E00, (wchar_t)0x1E02, (wchar_t)0x1E04, (wchar_t)0x1E06, (wchar_t)0x1E08, (wchar_t)0x1E0A, (wchar_t)0x1E0C,
193 (wchar_t)0x1E0E, (wchar_t)0x1E10, (wchar_t)0x1E12, (wchar_t)0x1E14, (wchar_t)0x1E16, (wchar_t)0x1E18, (wchar_t)0x1E1A, (wchar_t)0x1E1C, (wchar_t)0x1E1E,
194 (wchar_t)0x1E20, (wchar_t)0x1E22, (wchar_t)0x1E24, (wchar_t)0x1E26, (wchar_t)0x1E28, (wchar_t)0x1E2A, (wchar_t)0x1E2C, (wchar_t)0x1E2E, (wchar_t)0x1E30,
195 (wchar_t)0x1E32, (wchar_t)0x1E34, (wchar_t)0x1E36, (wchar_t)0x1E38, (wchar_t)0x1E3A, (wchar_t)0x1E3C, (wchar_t)0x1E3E, (wchar_t)0x1E40, (wchar_t)0x1E42,
196 (wchar_t)0x1E44, (wchar_t)0x1E46, (wchar_t)0x1E48, (wchar_t)0x1E4A, (wchar_t)0x1E4C, (wchar_t)0x1E4E, (wchar_t)0x1E50, (wchar_t)0x1E52, (wchar_t)0x1E54,
197 (wchar_t)0x1E56, (wchar_t)0x1E58, (wchar_t)0x1E5A, (wchar_t)0x1E5C, (wchar_t)0x1E5E, (wchar_t)0x1E60, (wchar_t)0x1E62, (wchar_t)0x1E64, (wchar_t)0x1E66,
198 (wchar_t)0x1E68, (wchar_t)0x1E6A, (wchar_t)0x1E6C, (wchar_t)0x1E6E, (wchar_t)0x1E70, (wchar_t)0x1E72, (wchar_t)0x1E74, (wchar_t)0x1E76, (wchar_t)0x1E78,
199 (wchar_t)0x1E7A, (wchar_t)0x1E7C, (wchar_t)0x1E7E, (wchar_t)0x1E80, (wchar_t)0x1E82, (wchar_t)0x1E84, (wchar_t)0x1E86, (wchar_t)0x1E88, (wchar_t)0x1E8A,
200 (wchar_t)0x1E8C, (wchar_t)0x1E8E, (wchar_t)0x1E90, (wchar_t)0x1E92, (wchar_t)0x1E94, (wchar_t)0x1EA0, (wchar_t)0x1EA2, (wchar_t)0x1EA4, (wchar_t)0x1EA6,
201 (wchar_t)0x1EA8, (wchar_t)0x1EAA, (wchar_t)0x1EAC, (wchar_t)0x1EAE, (wchar_t)0x1EB0, (wchar_t)0x1EB2, (wchar_t)0x1EB4, (wchar_t)0x1EB6, (wchar_t)0x1EB8,
202 (wchar_t)0x1EBA, (wchar_t)0x1EBC, (wchar_t)0x1EBE, (wchar_t)0x1EC0, (wchar_t)0x1EC2, (wchar_t)0x1EC4, (wchar_t)0x1EC6, (wchar_t)0x1EC8, (wchar_t)0x1ECA,
203 (wchar_t)0x1ECC, (wchar_t)0x1ECE, (wchar_t)0x1ED0, (wchar_t)0x1ED2, (wchar_t)0x1ED4, (wchar_t)0x1ED6, (wchar_t)0x1ED8, (wchar_t)0x1EDA, (wchar_t)0x1EDC,
204 (wchar_t)0x1EDE, (wchar_t)0x1EE0, (wchar_t)0x1EE2, (wchar_t)0x1EE4, (wchar_t)0x1EE6, (wchar_t)0x1EE8, (wchar_t)0x1EEA, (wchar_t)0x1EEC, (wchar_t)0x1EEE,
205 (wchar_t)0x1EF0, (wchar_t)0x1EF2, (wchar_t)0x1EF4, (wchar_t)0x1EF6, (wchar_t)0x1EF8, (wchar_t)0x1F08, (wchar_t)0x1F09, (wchar_t)0x1F0A, (wchar_t)0x1F0B,
206 (wchar_t)0x1F0C, (wchar_t)0x1F0D, (wchar_t)0x1F0E, (wchar_t)0x1F0F, (wchar_t)0x1F18, (wchar_t)0x1F19, (wchar_t)0x1F1A, (wchar_t)0x1F1B, (wchar_t)0x1F1C,
207 (wchar_t)0x1F1D, (wchar_t)0x1F28, (wchar_t)0x1F29, (wchar_t)0x1F2A, (wchar_t)0x1F2B, (wchar_t)0x1F2C, (wchar_t)0x1F2D, (wchar_t)0x1F2E, (wchar_t)0x1F2F,
208 (wchar_t)0x1F38, (wchar_t)0x1F39, (wchar_t)0x1F3A, (wchar_t)0x1F3B, (wchar_t)0x1F3C, (wchar_t)0x1F3D, (wchar_t)0x1F3E, (wchar_t)0x1F3F, (wchar_t)0x1F48,
209 (wchar_t)0x1F49, (wchar_t)0x1F4A, (wchar_t)0x1F4B, (wchar_t)0x1F4C, (wchar_t)0x1F4D, (wchar_t)0x1F59, (wchar_t)0x1F5B, (wchar_t)0x1F5D, (wchar_t)0x1F5F,
210 (wchar_t)0x1F68, (wchar_t)0x1F69, (wchar_t)0x1F6A, (wchar_t)0x1F6B, (wchar_t)0x1F6C, (wchar_t)0x1F6D, (wchar_t)0x1F6E, (wchar_t)0x1F6F, (wchar_t)0x1F88,
211 (wchar_t)0x1F89, (wchar_t)0x1F8A, (wchar_t)0x1F8B, (wchar_t)0x1F8C, (wchar_t)0x1F8D, (wchar_t)0x1F8E, (wchar_t)0x1F8F, (wchar_t)0x1F98, (wchar_t)0x1F99,
212 (wchar_t)0x1F9A, (wchar_t)0x1F9B, (wchar_t)0x1F9C, (wchar_t)0x1F9D, (wchar_t)0x1F9E, (wchar_t)0x1F9F, (wchar_t)0x1FA8, (wchar_t)0x1FA9, (wchar_t)0x1FAA,
213 (wchar_t)0x1FAB, (wchar_t)0x1FAC, (wchar_t)0x1FAD, (wchar_t)0x1FAE, (wchar_t)0x1FAF, (wchar_t)0x1FB8, (wchar_t)0x1FB9, (wchar_t)0x1FD8, (wchar_t)0x1FD9,
214 (wchar_t)0x1FE8, (wchar_t)0x1FE9, (wchar_t)0x24B6, (wchar_t)0x24B7, (wchar_t)0x24B8, (wchar_t)0x24B9, (wchar_t)0x24BA, (wchar_t)0x24BB, (wchar_t)0x24BC,
215 (wchar_t)0x24BD, (wchar_t)0x24BE, (wchar_t)0x24BF, (wchar_t)0x24C0, (wchar_t)0x24C1, (wchar_t)0x24C2, (wchar_t)0x24C3, (wchar_t)0x24C4, (wchar_t)0x24C5,
216 (wchar_t)0x24C6, (wchar_t)0x24C7, (wchar_t)0x24C8, (wchar_t)0x24C9, (wchar_t)0x24CA, (wchar_t)0x24CB, (wchar_t)0x24CC, (wchar_t)0x24CD, (wchar_t)0x24CE,
217 (wchar_t)0x24CF, (wchar_t)0xFF21, (wchar_t)0xFF22, (wchar_t)0xFF23, (wchar_t)0xFF24, (wchar_t)0xFF25, (wchar_t)0xFF26, (wchar_t)0xFF27, (wchar_t)0xFF28,
218 (wchar_t)0xFF29, (wchar_t)0xFF2A, (wchar_t)0xFF2B, (wchar_t)0xFF2C, (wchar_t)0xFF2D, (wchar_t)0xFF2E, (wchar_t)0xFF2F, (wchar_t)0xFF30, (wchar_t)0xFF31,
219 (wchar_t)0xFF32, (wchar_t)0xFF33, (wchar_t)0xFF34, (wchar_t)0xFF35, (wchar_t)0xFF36, (wchar_t)0xFF37, (wchar_t)0xFF38, (wchar_t)0xFF39, (wchar_t)0xFF3A
220};
221
222
223std::string StringUtils::FormatV(const char *fmt, va_list args)
224{
225 if (!fmt || !fmt[0])
226 return "";
227
228 int size = FORMAT_BLOCK_SIZE;
229 va_list argCopy;
230
231 while (true)
232 {
233 char *cstr = reinterpret_cast<char*>(malloc(sizeof(char) * size));
234 if (!cstr)
235 return "";
236
237 va_copy(argCopy, args);
238 int nActual = vsnprintf(cstr, size, fmt, argCopy);
239 va_end(argCopy);
240
241 if (nActual > -1 && nActual < size) // We got a valid result
242 {
243 std::string str(cstr, nActual);
244 free(cstr);
245 return str;
246 }
247 free(cstr);
248#ifndef TARGET_WINDOWS
249 if (nActual > -1) // Exactly what we will need (glibc 2.1)
250 size = nActual + 1;
251 else // Let's try to double the size (glibc 2.0)
252 size *= 2;
253#else // TARGET_WINDOWS
254 va_copy(argCopy, args);
255 size = _vscprintf(fmt, argCopy);
256 va_end(argCopy);
257 if (size < 0)
258 return "";
259 else
260 size++; // increment for null-termination
261#endif // TARGET_WINDOWS
262 }
263
264 return ""; // unreachable
265}
266
267std::wstring StringUtils::FormatV(const wchar_t *fmt, va_list args)
268{
269 if (!fmt || !fmt[0])
270 return L"";
271
272 int size = FORMAT_BLOCK_SIZE;
273 va_list argCopy;
274
275 while (true)
276 {
277 wchar_t *cstr = reinterpret_cast<wchar_t*>(malloc(sizeof(wchar_t) * size));
278 if (!cstr)
279 return L"";
280
281 va_copy(argCopy, args);
282 int nActual = vswprintf(cstr, size, fmt, argCopy);
283 va_end(argCopy);
284
285 if (nActual > -1 && nActual < size) // We got a valid result
286 {
287 std::wstring str(cstr, nActual);
288 free(cstr);
289 return str;
290 }
291 free(cstr);
292
293#ifndef TARGET_WINDOWS
294 if (nActual > -1) // Exactly what we will need (glibc 2.1)
295 size = nActual + 1;
296 else // Let's try to double the size (glibc 2.0)
297 size *= 2;
298#else // TARGET_WINDOWS
299 va_copy(argCopy, args);
300 size = _vscwprintf(fmt, argCopy);
301 va_end(argCopy);
302 if (size < 0)
303 return L"";
304 else
305 size++; // increment for null-termination
306#endif // TARGET_WINDOWS
307 }
308
309 return L"";
310}
311
312int compareWchar (const void* a, const void* b)
313{
314 if (*(const wchar_t*)a < *(const wchar_t*)b)
315 return -1;
316 else if (*(const wchar_t*)a > *(const wchar_t*)b)
317 return 1;
318 return 0;
319}
320
321wchar_t tolowerUnicode(const wchar_t& c)
322{
323 wchar_t* p = (wchar_t*) bsearch (&c, unicode_uppers, sizeof(unicode_uppers) / sizeof(wchar_t), sizeof(wchar_t), compareWchar);
324 if (p)
325 return *(unicode_lowers + (p - unicode_uppers));
326
327 return c;
328}
329
330wchar_t toupperUnicode(const wchar_t& c)
331{
332 wchar_t* p = (wchar_t*) bsearch (&c, unicode_lowers, sizeof(unicode_lowers) / sizeof(wchar_t), sizeof(wchar_t), compareWchar);
333 if (p)
334 return *(unicode_uppers + (p - unicode_lowers));
335
336 return c;
337}
338
339void StringUtils::ToUpper(std::string &str)
340{
341 std::transform(str.begin(), str.end(), str.begin(), ::toupper);
342}
343
344void StringUtils::ToUpper(std::wstring &str)
345{
346 transform(str.begin(), str.end(), str.begin(), toupperUnicode);
347}
348
349void StringUtils::ToLower(std::string &str)
350{
351 transform(str.begin(), str.end(), str.begin(), ::tolower);
352}
353
354void StringUtils::ToLower(std::wstring &str)
355{
356 transform(str.begin(), str.end(), str.begin(), tolowerUnicode);
357}
358
359void StringUtils::ToCapitalize(std::string &str)
360{
361 std::wstring wstr;
362 g_charsetConverter.utf8ToW(str, wstr);
363 ToCapitalize(wstr);
364 g_charsetConverter.wToUTF8(wstr, str);
365}
366
367void StringUtils::ToCapitalize(std::wstring &str)
368{
369 const std::locale& loc = g_langInfo.GetSystemLocale();
370 bool isFirstLetter = true;
371 for (std::wstring::iterator it = str.begin(); it < str.end(); ++it)
372 {
373 // capitalize after spaces and punctuation characters (except apostrophes)
374 if (std::isspace(*it, loc) || (std::ispunct(*it, loc) && *it != '\''))
375 isFirstLetter = true;
376 else if (isFirstLetter)
377 {
378 *it = std::toupper(*it, loc);
379 isFirstLetter = false;
380 }
381 }
382}
383
384bool StringUtils::EqualsNoCase(const std::string &str1, const std::string &str2)
385{
386 // before we do the char-by-char comparison, first compare sizes of both strings.
387 // This led to a 33% improvement in benchmarking on average. (size() just returns a member of std::string)
388 if (str1.size() != str2.size())
389 return false;
390 return EqualsNoCase(str1.c_str(), str2.c_str());
391}
392
393bool StringUtils::EqualsNoCase(const std::string &str1, const char *s2)
394{
395 return EqualsNoCase(str1.c_str(), s2);
396}
397
398bool StringUtils::EqualsNoCase(const char *s1, const char *s2)
399{
400 char c2; // we need only one char outside the loop
401 do
402 {
403 const char c1 = *s1++; // const local variable should help compiler to optimize
404 c2 = *s2++;
405 if (c1 != c2 && ::tolower(c1) != ::tolower(c2)) // This includes the possibility that one of the characters is the null-terminator, which implies a string mismatch.
406 return false;
407 } while (c2 != '\0'); // At this point, we know c1 == c2, so there's no need to test them both.
408 return true;
409}
410
411int StringUtils::CompareNoCase(const std::string& str1, const std::string& str2, size_t n /* = 0 */)
412{
413 return CompareNoCase(str1.c_str(), str2.c_str(), n);
414}
415
416int StringUtils::CompareNoCase(const char* s1, const char* s2, size_t n /* = 0 */)
417{
418 char c2; // we need only one char outside the loop
419 size_t index = 0;
420 do
421 {
422 const char c1 = *s1++; // const local variable should help compiler to optimize
423 c2 = *s2++;
424 index++;
425 if (c1 != c2 && ::tolower(c1) != ::tolower(c2)) // This includes the possibility that one of the characters is the null-terminator, which implies a string mismatch.
426 return ::tolower(c1) - ::tolower(c2);
427 } while (c2 != '\0' &&
428 index != n); // At this point, we know c1 == c2, so there's no need to test them both.
429 return 0;
430}
431
432std::string StringUtils::Left(const std::string &str, size_t count)
433{
434 count = std::max((size_t)0, std::min(count, str.size()));
435 return str.substr(0, count);
436}
437
438std::string StringUtils::Mid(const std::string &str, size_t first, size_t count /* = string::npos */)
439{
440 if (first + count > str.size())
441 count = str.size() - first;
442
443 if (first > str.size())
444 return std::string();
445
446 assert(first + count <= str.size());
447
448 return str.substr(first, count);
449}
450
451std::string StringUtils::Right(const std::string &str, size_t count)
452{
453 count = std::max((size_t)0, std::min(count, str.size()));
454 return str.substr(str.size() - count);
455}
456
457std::string& StringUtils::Trim(std::string &str)
458{
459 TrimLeft(str);
460 return TrimRight(str);
461}
462
463std::string& StringUtils::Trim(std::string &str, const char* const chars)
464{
465 TrimLeft(str, chars);
466 return TrimRight(str, chars);
467}
468
469// hack to check only first byte of UTF-8 character
470// without this hack "TrimX" functions failed on Win32 and OS X with UTF-8 strings
471static int isspace_c(char c)
472{
473 return (c & 0x80) == 0 && ::isspace(c);
474}
475
476std::string& StringUtils::TrimLeft(std::string &str)
477{
478 str.erase(str.begin(), std::find_if(str.begin(), str.end(), std::not1(std::function<int(char)>(isspace_c))));
479 return str;
480}
481
482std::string& StringUtils::TrimLeft(std::string &str, const char* const chars)
483{
484 size_t nidx = str.find_first_not_of(chars);
485 str.erase(0, nidx);
486 return str;
487}
488
489std::string& StringUtils::TrimRight(std::string &str)
490{
491 str.erase(std::find_if(str.rbegin(), str.rend(), std::not1(std::function<int(char)>(isspace_c))).base(), str.end());
492 return str;
493}
494
495std::string& StringUtils::TrimRight(std::string &str, const char* const chars)
496{
497 size_t nidx = str.find_last_not_of(chars);
498 str.erase(str.npos == nidx ? 0 : ++nidx);
499 return str;
500}
501
502int StringUtils::ReturnDigits(const std::string& str)
503{
504 std::stringstream ss;
505 for (const auto& character : str)
506 {
507 if (isdigit(character))
508 ss << character;
509 }
510 return atoi(ss.str().c_str());
511}
512
513std::string& StringUtils::RemoveDuplicatedSpacesAndTabs(std::string& str)
514{
515 std::string::iterator it = str.begin();
516 bool onSpace = false;
517 while(it != str.end())
518 {
519 if (*it == '\t')
520 *it = ' ';
521
522 if (*it == ' ')
523 {
524 if (onSpace)
525 {
526 it = str.erase(it);
527 continue;
528 }
529 else
530 onSpace = true;
531 }
532 else
533 onSpace = false;
534
535 ++it;
536 }
537 return str;
538}
539
540int StringUtils::Replace(std::string &str, char oldChar, char newChar)
541{
542 int replacedChars = 0;
543 for (std::string::iterator it = str.begin(); it != str.end(); ++it)
544 {
545 if (*it == oldChar)
546 {
547 *it = newChar;
548 replacedChars++;
549 }
550 }
551
552 return replacedChars;
553}
554
555int StringUtils::Replace(std::string &str, const std::string &oldStr, const std::string &newStr)
556{
557 if (oldStr.empty())
558 return 0;
559
560 int replacedChars = 0;
561 size_t index = 0;
562
563 while (index < str.size() && (index = str.find(oldStr, index)) != std::string::npos)
564 {
565 str.replace(index, oldStr.size(), newStr);
566 index += newStr.size();
567 replacedChars++;
568 }
569
570 return replacedChars;
571}
572
573int StringUtils::Replace(std::wstring &str, const std::wstring &oldStr, const std::wstring &newStr)
574{
575 if (oldStr.empty())
576 return 0;
577
578 int replacedChars = 0;
579 size_t index = 0;
580
581 while (index < str.size() && (index = str.find(oldStr, index)) != std::string::npos)
582 {
583 str.replace(index, oldStr.size(), newStr);
584 index += newStr.size();
585 replacedChars++;
586 }
587
588 return replacedChars;
589}
590
591bool StringUtils::StartsWith(const std::string &str1, const std::string &str2)
592{
593 return str1.compare(0, str2.size(), str2) == 0;
594}
595
596bool StringUtils::StartsWith(const std::string &str1, const char *s2)
597{
598 return StartsWith(str1.c_str(), s2);
599}
600
601bool StringUtils::StartsWith(const char *s1, const char *s2)
602{
603 while (*s2 != '\0')
604 {
605 if (*s1 != *s2)
606 return false;
607 s1++;
608 s2++;
609 }
610 return true;
611}
612
613bool StringUtils::StartsWithNoCase(const std::string &str1, const std::string &str2)
614{
615 return StartsWithNoCase(str1.c_str(), str2.c_str());
616}
617
618bool StringUtils::StartsWithNoCase(const std::string &str1, const char *s2)
619{
620 return StartsWithNoCase(str1.c_str(), s2);
621}
622
623bool StringUtils::StartsWithNoCase(const char *s1, const char *s2)
624{
625 while (*s2 != '\0')
626 {
627 if (::tolower(*s1) != ::tolower(*s2))
628 return false;
629 s1++;
630 s2++;
631 }
632 return true;
633}
634
635bool StringUtils::EndsWith(const std::string &str1, const std::string &str2)
636{
637 if (str1.size() < str2.size())
638 return false;
639 return str1.compare(str1.size() - str2.size(), str2.size(), str2) == 0;
640}
641
642bool StringUtils::EndsWith(const std::string &str1, const char *s2)
643{
644 size_t len2 = strlen(s2);
645 if (str1.size() < len2)
646 return false;
647 return str1.compare(str1.size() - len2, len2, s2) == 0;
648}
649
650bool StringUtils::EndsWithNoCase(const std::string &str1, const std::string &str2)
651{
652 if (str1.size() < str2.size())
653 return false;
654 const char *s1 = str1.c_str() + str1.size() - str2.size();
655 const char *s2 = str2.c_str();
656 while (*s2 != '\0')
657 {
658 if (::tolower(*s1) != ::tolower(*s2))
659 return false;
660 s1++;
661 s2++;
662 }
663 return true;
664}
665
666bool StringUtils::EndsWithNoCase(const std::string &str1, const char *s2)
667{
668 size_t len2 = strlen(s2);
669 if (str1.size() < len2)
670 return false;
671 const char *s1 = str1.c_str() + str1.size() - len2;
672 while (*s2 != '\0')
673 {
674 if (::tolower(*s1) != ::tolower(*s2))
675 return false;
676 s1++;
677 s2++;
678 }
679 return true;
680}
681
682std::vector<std::string> StringUtils::Split(const std::string& input, const std::string& delimiter, unsigned int iMaxStrings)
683{
684 std::vector<std::string> result;
685 SplitTo(std::back_inserter(result), input, delimiter, iMaxStrings);
686 return result;
687}
688
689std::vector<std::string> StringUtils::Split(const std::string& input, const char delimiter, size_t iMaxStrings)
690{
691 std::vector<std::string> result;
692 SplitTo(std::back_inserter(result), input, delimiter, iMaxStrings);
693 return result;
694}
695
696std::vector<std::string> StringUtils::Split(const std::string& input, const std::vector<std::string>& delimiters)
697{
698 std::vector<std::string> result;
699 SplitTo(std::back_inserter(result), input, delimiters);
700 return result;
701}
702
703std::vector<std::string> StringUtils::SplitMulti(const std::vector<std::string> &input, const std::vector<std::string> &delimiters, unsigned int iMaxStrings /* = 0 */)
704{
705 if (input.empty())
706 return std::vector<std::string>();
707
708 std::vector<std::string> results(input);
709
710 if (delimiters.empty() || (iMaxStrings > 0 && iMaxStrings <= input.size()))
711 return results;
712
713 std::vector<std::string> strings1;
714 if (iMaxStrings == 0)
715 {
716 for (size_t di = 0; di < delimiters.size(); di++)
717 {
718 for (size_t i = 0; i < results.size(); i++)
719 {
720 std::vector<std::string> substrings = StringUtils::Split(results[i], delimiters[di]);
721 for (size_t j = 0; j < substrings.size(); j++)
722 strings1.push_back(substrings[j]);
723 }
724 results = strings1;
725 strings1.clear();
726 }
727 return results;
728 }
729
730 // Control the number of strings input is split into, keeping the original strings.
731 // Note iMaxStrings > input.size()
732 int iNew = iMaxStrings - results.size();
733 for (size_t di = 0; di < delimiters.size(); di++)
734 {
735 for (size_t i = 0; i < results.size(); i++)
736 {
737 if (iNew > 0)
738 {
739 std::vector<std::string> substrings = StringUtils::Split(results[i], delimiters[di], iNew + 1);
740 iNew = iNew - substrings.size() + 1;
741 for (size_t j = 0; j < substrings.size(); j++)
742 strings1.push_back(substrings[j]);
743 }
744 else
745 strings1.push_back(results[i]);
746 }
747 results = strings1;
748 iNew = iMaxStrings - results.size();
749 strings1.clear();
750 if ((iNew <= 0))
751 break; //Stop trying any more delimiters
752 }
753 return results;
754}
755
756// returns the number of occurrences of strFind in strInput.
757int StringUtils::FindNumber(const std::string& strInput, const std::string &strFind)
758{
759 size_t pos = strInput.find(strFind, 0);
760 int numfound = 0;
761 while (pos != std::string::npos)
762 {
763 numfound++;
764 pos = strInput.find(strFind, pos + 1);
765 }
766 return numfound;
767}
768
769// Plane maps for MySQL utf8_general_ci (now known as utf8mb3_general_ci) collation
770// Derived from https://github.com/MariaDB/server/blob/10.5/strings/ctype-utf8.c
771
772// clang-format off
773static const uint16_t plane00[] = {
774 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F,
775 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x0018, 0x0019, 0x001A, 0x001B, 0x001C, 0x001D, 0x001E, 0x001F,
776 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F,
777 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F,
778 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F,
779 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x005F,
780 0x0060, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F,
781 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x007B, 0x007C, 0x007D, 0x007E, 0x007F,
782 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
783 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
784 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
785 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x039C, 0x00B6, 0x00B7, 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF,
786 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x00C6, 0x0043, 0x0045, 0x0045, 0x0045, 0x0045, 0x0049, 0x0049, 0x0049, 0x0049,
787 0x00D0, 0x004E, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F, 0x00D7, 0x00D8, 0x0055, 0x0055, 0x0055, 0x0055, 0x0059, 0x00DE, 0x0053,
788 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x00C6, 0x0043, 0x0045, 0x0045, 0x0045, 0x0045, 0x0049, 0x0049, 0x0049, 0x0049,
789 0x00D0, 0x004E, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F, 0x00F7, 0x00D8, 0x0055, 0x0055, 0x0055, 0x0055, 0x0059, 0x00DE, 0x0059
790};
791
792static const uint16_t plane01[] = {
793 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0043, 0x0043, 0x0043, 0x0043, 0x0043, 0x0043, 0x0043, 0x0043, 0x0044, 0x0044,
794 0x0110, 0x0110, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0047, 0x0047, 0x0047, 0x0047,
795 0x0047, 0x0047, 0x0047, 0x0047, 0x0048, 0x0048, 0x0126, 0x0126, 0x0049, 0x0049, 0x0049, 0x0049, 0x0049, 0x0049, 0x0049, 0x0049,
796 0x0049, 0x0049, 0x0132, 0x0132, 0x004A, 0x004A, 0x004B, 0x004B, 0x0138, 0x004C, 0x004C, 0x004C, 0x004C, 0x004C, 0x004C, 0x013F,
797 0x013F, 0x0141, 0x0141, 0x004E, 0x004E, 0x004E, 0x004E, 0x004E, 0x004E, 0x0149, 0x014A, 0x014A, 0x004F, 0x004F, 0x004F, 0x004F,
798 0x004F, 0x004F, 0x0152, 0x0152, 0x0052, 0x0052, 0x0052, 0x0052, 0x0052, 0x0052, 0x0053, 0x0053, 0x0053, 0x0053, 0x0053, 0x0053,
799 0x0053, 0x0053, 0x0054, 0x0054, 0x0054, 0x0054, 0x0166, 0x0166, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055,
800 0x0055, 0x0055, 0x0055, 0x0055, 0x0057, 0x0057, 0x0059, 0x0059, 0x0059, 0x005A, 0x005A, 0x005A, 0x005A, 0x005A, 0x005A, 0x0053,
801 0x0180, 0x0181, 0x0182, 0x0182, 0x0184, 0x0184, 0x0186, 0x0187, 0x0187, 0x0189, 0x018A, 0x018B, 0x018B, 0x018D, 0x018E, 0x018F,
802 0x0190, 0x0191, 0x0191, 0x0193, 0x0194, 0x01F6, 0x0196, 0x0197, 0x0198, 0x0198, 0x019A, 0x019B, 0x019C, 0x019D, 0x019E, 0x019F,
803 0x004F, 0x004F, 0x01A2, 0x01A2, 0x01A4, 0x01A4, 0x01A6, 0x01A7, 0x01A7, 0x01A9, 0x01AA, 0x01AB, 0x01AC, 0x01AC, 0x01AE, 0x0055,
804 0x0055, 0x01B1, 0x01B2, 0x01B3, 0x01B3, 0x01B5, 0x01B5, 0x01B7, 0x01B8, 0x01B8, 0x01BA, 0x01BB, 0x01BC, 0x01BC, 0x01BE, 0x01F7,
805 0x01C0, 0x01C1, 0x01C2, 0x01C3, 0x01C4, 0x01C4, 0x01C4, 0x01C7, 0x01C7, 0x01C7, 0x01CA, 0x01CA, 0x01CA, 0x0041, 0x0041, 0x0049,
806 0x0049, 0x004F, 0x004F, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x018E, 0x0041, 0x0041,
807 0x0041, 0x0041, 0x00C6, 0x00C6, 0x01E4, 0x01E4, 0x0047, 0x0047, 0x004B, 0x004B, 0x004F, 0x004F, 0x004F, 0x004F, 0x01B7, 0x01B7,
808 0x004A, 0x01F1, 0x01F1, 0x01F1, 0x0047, 0x0047, 0x01F6, 0x01F7, 0x004E, 0x004E, 0x0041, 0x0041, 0x00C6, 0x00C6, 0x00D8, 0x00D8
809};
810
811static const uint16_t plane02[] = {
812 0x0041, 0x0041, 0x0041, 0x0041, 0x0045, 0x0045, 0x0045, 0x0045, 0x0049, 0x0049, 0x0049, 0x0049, 0x004F, 0x004F, 0x004F, 0x004F,
813 0x0052, 0x0052, 0x0052, 0x0052, 0x0055, 0x0055, 0x0055, 0x0055, 0x0053, 0x0053, 0x0054, 0x0054, 0x021C, 0x021C, 0x0048, 0x0048,
814 0x0220, 0x0221, 0x0222, 0x0222, 0x0224, 0x0224, 0x0041, 0x0041, 0x0045, 0x0045, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F,
815 0x004F, 0x004F, 0x0059, 0x0059, 0x0234, 0x0235, 0x0236, 0x0237, 0x0238, 0x0239, 0x023A, 0x023B, 0x023C, 0x023D, 0x023E, 0x023F,
816 0x0240, 0x0241, 0x0242, 0x0243, 0x0244, 0x0245, 0x0246, 0x0247, 0x0248, 0x0249, 0x024A, 0x024B, 0x024C, 0x024D, 0x024E, 0x024F,
817 0x0250, 0x0251, 0x0252, 0x0181, 0x0186, 0x0255, 0x0189, 0x018A, 0x0258, 0x018F, 0x025A, 0x0190, 0x025C, 0x025D, 0x025E, 0x025F,
818 0x0193, 0x0261, 0x0262, 0x0194, 0x0264, 0x0265, 0x0266, 0x0267, 0x0197, 0x0196, 0x026A, 0x026B, 0x026C, 0x026D, 0x026E, 0x019C,
819 0x0270, 0x0271, 0x019D, 0x0273, 0x0274, 0x019F, 0x0276, 0x0277, 0x0278, 0x0279, 0x027A, 0x027B, 0x027C, 0x027D, 0x027E, 0x027F,
820 0x01A6, 0x0281, 0x0282, 0x01A9, 0x0284, 0x0285, 0x0286, 0x0287, 0x01AE, 0x0289, 0x01B1, 0x01B2, 0x028C, 0x028D, 0x028E, 0x028F,
821 0x0290, 0x0291, 0x01B7, 0x0293, 0x0294, 0x0295, 0x0296, 0x0297, 0x0298, 0x0299, 0x029A, 0x029B, 0x029C, 0x029D, 0x029E, 0x029F,
822 0x02A0, 0x02A1, 0x02A2, 0x02A3, 0x02A4, 0x02A5, 0x02A6, 0x02A7, 0x02A8, 0x02A9, 0x02AA, 0x02AB, 0x02AC, 0x02AD, 0x02AE, 0x02AF,
823 0x02B0, 0x02B1, 0x02B2, 0x02B3, 0x02B4, 0x02B5, 0x02B6, 0x02B7, 0x02B8, 0x02B9, 0x02BA, 0x02BB, 0x02BC, 0x02BD, 0x02BE, 0x02BF,
824 0x02C0, 0x02C1, 0x02C2, 0x02C3, 0x02C4, 0x02C5, 0x02C6, 0x02C7, 0x02C8, 0x02C9, 0x02CA, 0x02CB, 0x02CC, 0x02CD, 0x02CE, 0x02CF,
825 0x02D0, 0x02D1, 0x02D2, 0x02D3, 0x02D4, 0x02D5, 0x02D6, 0x02D7, 0x02D8, 0x02D9, 0x02DA, 0x02DB, 0x02DC, 0x02DD, 0x02DE, 0x02DF,
826 0x02E0, 0x02E1, 0x02E2, 0x02E3, 0x02E4, 0x02E5, 0x02E6, 0x02E7, 0x02E8, 0x02E9, 0x02EA, 0x02EB, 0x02EC, 0x02ED, 0x02EE, 0x02EF,
827 0x02F0, 0x02F1, 0x02F2, 0x02F3, 0x02F4, 0x02F5, 0x02F6, 0x02F7, 0x02F8, 0x02F9, 0x02FA, 0x02FB, 0x02FC, 0x02FD, 0x02FE, 0x02FF
828};
829
830static const uint16_t plane03[] = {
831 0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x0305, 0x0306, 0x0307, 0x0308, 0x0309, 0x030A, 0x030B, 0x030C, 0x030D, 0x030E, 0x030F,
832 0x0310, 0x0311, 0x0312, 0x0313, 0x0314, 0x0315, 0x0316, 0x0317, 0x0318, 0x0319, 0x031A, 0x031B, 0x031C, 0x031D, 0x031E, 0x031F,
833 0x0320, 0x0321, 0x0322, 0x0323, 0x0324, 0x0325, 0x0326, 0x0327, 0x0328, 0x0329, 0x032A, 0x032B, 0x032C, 0x032D, 0x032E, 0x032F,
834 0x0330, 0x0331, 0x0332, 0x0333, 0x0334, 0x0335, 0x0336, 0x0337, 0x0338, 0x0339, 0x033A, 0x033B, 0x033C, 0x033D, 0x033E, 0x033F,
835 0x0340, 0x0341, 0x0342, 0x0343, 0x0344, 0x0399, 0x0346, 0x0347, 0x0348, 0x0349, 0x034A, 0x034B, 0x034C, 0x034D, 0x034E, 0x034F,
836 0x0350, 0x0351, 0x0352, 0x0353, 0x0354, 0x0355, 0x0356, 0x0357, 0x0358, 0x0359, 0x035A, 0x035B, 0x035C, 0x035D, 0x035E, 0x035F,
837 0x0360, 0x0361, 0x0362, 0x0363, 0x0364, 0x0365, 0x0366, 0x0367, 0x0368, 0x0369, 0x036A, 0x036B, 0x036C, 0x036D, 0x036E, 0x036F,
838 0x0370, 0x0371, 0x0372, 0x0373, 0x0374, 0x0375, 0x0376, 0x0377, 0x0378, 0x0379, 0x037A, 0x037B, 0x037C, 0x037D, 0x037E, 0x037F,
839 0x0380, 0x0381, 0x0382, 0x0383, 0x0384, 0x0385, 0x0391, 0x0387, 0x0395, 0x0397, 0x0399, 0x038B, 0x039F, 0x038D, 0x03A5, 0x03A9,
840 0x0399, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x039B, 0x039C, 0x039D, 0x039E, 0x039F,
841 0x03A0, 0x03A1, 0x03A2, 0x03A3, 0x03A4, 0x03A5, 0x03A6, 0x03A7, 0x03A8, 0x03A9, 0x0399, 0x03A5, 0x0391, 0x0395, 0x0397, 0x0399,
842 0x03A5, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x039B, 0x039C, 0x039D, 0x039E, 0x039F,
843 0x03A0, 0x03A1, 0x03A3, 0x03A3, 0x03A4, 0x03A5, 0x03A6, 0x03A7, 0x03A8, 0x03A9, 0x0399, 0x03A5, 0x039F, 0x03A5, 0x03A9, 0x03CF,
844 0x0392, 0x0398, 0x03D2, 0x03D2, 0x03D2, 0x03A6, 0x03A0, 0x03D7, 0x03D8, 0x03D9, 0x03DA, 0x03DA, 0x03DC, 0x03DC, 0x03DE, 0x03DE,
845 0x03E0, 0x03E0, 0x03E2, 0x03E2, 0x03E4, 0x03E4, 0x03E6, 0x03E6, 0x03E8, 0x03E8, 0x03EA, 0x03EA, 0x03EC, 0x03EC, 0x03EE, 0x03EE,
846 0x039A, 0x03A1, 0x03A3, 0x03F3, 0x03F4, 0x03F5, 0x03F6, 0x03F7, 0x03F8, 0x03F9, 0x03FA, 0x03FB, 0x03FC, 0x03FD, 0x03FE, 0x03FF
847};
848
849static const uint16_t plane04[] = {
850 0x0415, 0x0415, 0x0402, 0x0413, 0x0404, 0x0405, 0x0406, 0x0406, 0x0408, 0x0409, 0x040A, 0x040B, 0x041A, 0x0418, 0x0423, 0x040F,
851 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F,
852 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, 0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F,
853 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F,
854 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, 0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F,
855 0x0415, 0x0415, 0x0402, 0x0413, 0x0404, 0x0405, 0x0406, 0x0406, 0x0408, 0x0409, 0x040A, 0x040B, 0x041A, 0x0418, 0x0423, 0x040F,
856 0x0460, 0x0460, 0x0462, 0x0462, 0x0464, 0x0464, 0x0466, 0x0466, 0x0468, 0x0468, 0x046A, 0x046A, 0x046C, 0x046C, 0x046E, 0x046E,
857 0x0470, 0x0470, 0x0472, 0x0472, 0x0474, 0x0474, 0x0474, 0x0474, 0x0478, 0x0478, 0x047A, 0x047A, 0x047C, 0x047C, 0x047E, 0x047E,
858 0x0480, 0x0480, 0x0482, 0x0483, 0x0484, 0x0485, 0x0486, 0x0487, 0x0488, 0x0489, 0x048A, 0x048B, 0x048C, 0x048C, 0x048E, 0x048E,
859 0x0490, 0x0490, 0x0492, 0x0492, 0x0494, 0x0494, 0x0496, 0x0496, 0x0498, 0x0498, 0x049A, 0x049A, 0x049C, 0x049C, 0x049E, 0x049E,
860 0x04A0, 0x04A0, 0x04A2, 0x04A2, 0x04A4, 0x04A4, 0x04A6, 0x04A6, 0x04A8, 0x04A8, 0x04AA, 0x04AA, 0x04AC, 0x04AC, 0x04AE, 0x04AE,
861 0x04B0, 0x04B0, 0x04B2, 0x04B2, 0x04B4, 0x04B4, 0x04B6, 0x04B6, 0x04B8, 0x04B8, 0x04BA, 0x04BA, 0x04BC, 0x04BC, 0x04BE, 0x04BE,
862 0x04C0, 0x0416, 0x0416, 0x04C3, 0x04C3, 0x04C5, 0x04C6, 0x04C7, 0x04C7, 0x04C9, 0x04CA, 0x04CB, 0x04CB, 0x04CD, 0x04CE, 0x04CF,
863 0x0410, 0x0410, 0x0410, 0x0410, 0x04D4, 0x04D4, 0x0415, 0x0415, 0x04D8, 0x04D8, 0x04D8, 0x04D8, 0x0416, 0x0416, 0x0417, 0x0417,
864 0x04E0, 0x04E0, 0x0418, 0x0418, 0x0418, 0x0418, 0x041E, 0x041E, 0x04E8, 0x04E8, 0x04E8, 0x04E8, 0x042D, 0x042D, 0x0423, 0x0423,
865 0x0423, 0x0423, 0x0423, 0x0423, 0x0427, 0x0427, 0x04F6, 0x04F7, 0x042B, 0x042B, 0x04FA, 0x04FB, 0x04FC, 0x04FD, 0x04FE, 0x04FF
866};
867
868static const uint16_t plane05[] = {
869 0x0500, 0x0501, 0x0502, 0x0503, 0x0504, 0x0505, 0x0506, 0x0507, 0x0508, 0x0509, 0x050A, 0x050B, 0x050C, 0x050D, 0x050E, 0x050F,
870 0x0510, 0x0511, 0x0512, 0x0513, 0x0514, 0x0515, 0x0516, 0x0517, 0x0518, 0x0519, 0x051A, 0x051B, 0x051C, 0x051D, 0x051E, 0x051F,
871 0x0520, 0x0521, 0x0522, 0x0523, 0x0524, 0x0525, 0x0526, 0x0527, 0x0528, 0x0529, 0x052A, 0x052B, 0x052C, 0x052D, 0x052E, 0x052F,
872 0x0530, 0x0531, 0x0532, 0x0533, 0x0534, 0x0535, 0x0536, 0x0537, 0x0538, 0x0539, 0x053A, 0x053B, 0x053C, 0x053D, 0x053E, 0x053F,
873 0x0540, 0x0541, 0x0542, 0x0543, 0x0544, 0x0545, 0x0546, 0x0547, 0x0548, 0x0549, 0x054A, 0x054B, 0x054C, 0x054D, 0x054E, 0x054F,
874 0x0550, 0x0551, 0x0552, 0x0553, 0x0554, 0x0555, 0x0556, 0x0557, 0x0558, 0x0559, 0x055A, 0x055B, 0x055C, 0x055D, 0x055E, 0x055F,
875 0x0560, 0x0531, 0x0532, 0x0533, 0x0534, 0x0535, 0x0536, 0x0537, 0x0538, 0x0539, 0x053A, 0x053B, 0x053C, 0x053D, 0x053E, 0x053F,
876 0x0540, 0x0541, 0x0542, 0x0543, 0x0544, 0x0545, 0x0546, 0x0547, 0x0548, 0x0549, 0x054A, 0x054B, 0x054C, 0x054D, 0x054E, 0x054F,
877 0x0550, 0x0551, 0x0552, 0x0553, 0x0554, 0x0555, 0x0556, 0x0587, 0x0588, 0x0589, 0x058A, 0x058B, 0x058C, 0x058D, 0x058E, 0x058F,
878 0x0590, 0x0591, 0x0592, 0x0593, 0x0594, 0x0595, 0x0596, 0x0597, 0x0598, 0x0599, 0x059A, 0x059B, 0x059C, 0x059D, 0x059E, 0x059F,
879 0x05A0, 0x05A1, 0x05A2, 0x05A3, 0x05A4, 0x05A5, 0x05A6, 0x05A7, 0x05A8, 0x05A9, 0x05AA, 0x05AB, 0x05AC, 0x05AD, 0x05AE, 0x05AF,
880 0x05B0, 0x05B1, 0x05B2, 0x05B3, 0x05B4, 0x05B5, 0x05B6, 0x05B7, 0x05B8, 0x05B9, 0x05BA, 0x05BB, 0x05BC, 0x05BD, 0x05BE, 0x05BF,
881 0x05C0, 0x05C1, 0x05C2, 0x05C3, 0x05C4, 0x05C5, 0x05C6, 0x05C7, 0x05C8, 0x05C9, 0x05CA, 0x05CB, 0x05CC, 0x05CD, 0x05CE, 0x05CF,
882 0x05D0, 0x05D1, 0x05D2, 0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7, 0x05D8, 0x05D9, 0x05DA, 0x05DB, 0x05DC, 0x05DD, 0x05DE, 0x05DF,
883 0x05E0, 0x05E1, 0x05E2, 0x05E3, 0x05E4, 0x05E5, 0x05E6, 0x05E7, 0x05E8, 0x05E9, 0x05EA, 0x05EB, 0x05EC, 0x05ED, 0x05EE, 0x05EF,
884 0x05F0, 0x05F1, 0x05F2, 0x05F3, 0x05F4, 0x05F5, 0x05F6, 0x05F7, 0x05F8, 0x05F9, 0x05FA, 0x05FB, 0x05FC, 0x05FD, 0x05FE, 0x05FF
885};
886
887static const uint16_t plane1E[] = {
888 0x0041, 0x0041, 0x0042, 0x0042, 0x0042, 0x0042, 0x0042, 0x0042, 0x0043, 0x0043, 0x0044, 0x0044, 0x0044, 0x0044, 0x0044, 0x0044,
889 0x0044, 0x0044, 0x0044, 0x0044, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0046, 0x0046,
890 0x0047, 0x0047, 0x0048, 0x0048, 0x0048, 0x0048, 0x0048, 0x0048, 0x0048, 0x0048, 0x0048, 0x0048, 0x0049, 0x0049, 0x0049, 0x0049,
891 0x004B, 0x004B, 0x004B, 0x004B, 0x004B, 0x004B, 0x004C, 0x004C, 0x004C, 0x004C, 0x004C, 0x004C, 0x004C, 0x004C, 0x004D, 0x004D,
892 0x004D, 0x004D, 0x004D, 0x004D, 0x004E, 0x004E, 0x004E, 0x004E, 0x004E, 0x004E, 0x004E, 0x004E, 0x004F, 0x004F, 0x004F, 0x004F,
893 0x004F, 0x004F, 0x004F, 0x004F, 0x0050, 0x0050, 0x0050, 0x0050, 0x0052, 0x0052, 0x0052, 0x0052, 0x0052, 0x0052, 0x0052, 0x0052,
894 0x0053, 0x0053, 0x0053, 0x0053, 0x0053, 0x0053, 0x0053, 0x0053, 0x0053, 0x0053, 0x0054, 0x0054, 0x0054, 0x0054, 0x0054, 0x0054,
895 0x0054, 0x0054, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0056, 0x0056, 0x0056, 0x0056,
896 0x0057, 0x0057, 0x0057, 0x0057, 0x0057, 0x0057, 0x0057, 0x0057, 0x0057, 0x0057, 0x0058, 0x0058, 0x0058, 0x0058, 0x0059, 0x0059,
897 0x005A, 0x005A, 0x005A, 0x005A, 0x005A, 0x005A, 0x0048, 0x0054, 0x0057, 0x0059, 0x1E9A, 0x0053, 0x1E9C, 0x1E9D, 0x1E9E, 0x1E9F,
898 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041,
899 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045,
900 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0049, 0x0049, 0x0049, 0x0049, 0x004F, 0x004F, 0x004F, 0x004F,
901 0x004F, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F,
902 0x004F, 0x004F, 0x004F, 0x004F, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055,
903 0x0055, 0x0055, 0x0059, 0x0059, 0x0059, 0x0059, 0x0059, 0x0059, 0x0059, 0x0059, 0x1EFA, 0x1EFB, 0x1EFC, 0x1EFD, 0x1EFE, 0x1EFF
904};
905
906static const uint16_t plane1F[] = {
907 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391,
908 0x0395, 0x0395, 0x0395, 0x0395, 0x0395, 0x0395, 0x1F16, 0x1F17, 0x0395, 0x0395, 0x0395, 0x0395, 0x0395, 0x0395, 0x1F1E, 0x1F1F,
909 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397,
910 0x0399, 0x0399, 0x0399, 0x0399, 0x0399, 0x0399, 0x0399, 0x0399, 0x0399, 0x0399, 0x0399, 0x0399, 0x0399, 0x0399, 0x0399, 0x0399,
911 0x039F, 0x039F, 0x039F, 0x039F, 0x039F, 0x039F, 0x1F46, 0x1F47, 0x039F, 0x039F, 0x039F, 0x039F, 0x039F, 0x039F, 0x1F4E, 0x1F4F,
912 0x03A5, 0x03A5, 0x03A5, 0x03A5, 0x03A5, 0x03A5, 0x03A5, 0x03A5, 0x1F58, 0x03A5, 0x1F5A, 0x03A5, 0x1F5C, 0x03A5, 0x1F5E, 0x03A5,
913 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9,
914 0x0391, 0x1FBB, 0x0395, 0x1FC9, 0x0397, 0x1FCB, 0x0399, 0x1FDB, 0x039F, 0x1FF9, 0x03A5, 0x1FEB, 0x03A9, 0x1FFB, 0x1F7E, 0x1F7F,
915 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391,
916 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397,
917 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9,
918 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x1FB5, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x1FBB, 0x0391, 0x1FBD, 0x0399, 0x1FBF,
919 0x1FC0, 0x1FC1, 0x0397, 0x0397, 0x0397, 0x1FC5, 0x0397, 0x0397, 0x0395, 0x1FC9, 0x0397, 0x1FCB, 0x0397, 0x1FCD, 0x1FCE, 0x1FCF,
920 0x0399, 0x0399, 0x0399, 0x1FD3, 0x1FD4, 0x1FD5, 0x0399, 0x0399, 0x0399, 0x0399, 0x0399, 0x1FDB, 0x1FDC, 0x1FDD, 0x1FDE, 0x1FDF,
921 0x03A5, 0x03A5, 0x03A5, 0x1FE3, 0x03A1, 0x03A1, 0x03A5, 0x03A5, 0x03A5, 0x03A5, 0x03A5, 0x1FEB, 0x03A1, 0x1FED, 0x1FEE, 0x1FEF,
922 0x1FF0, 0x1FF1, 0x03A9, 0x03A9, 0x03A9, 0x1FF5, 0x03A9, 0x03A9, 0x039F, 0x1FF9, 0x03A9, 0x1FFB, 0x03A9, 0x1FFD, 0x1FFE, 0x1FFF
923};
924
925static const uint16_t plane21[] = {
926 0x2100, 0x2101, 0x2102, 0x2103, 0x2104, 0x2105, 0x2106, 0x2107, 0x2108, 0x2109, 0x210A, 0x210B, 0x210C, 0x210D, 0x210E, 0x210F,
927 0x2110, 0x2111, 0x2112, 0x2113, 0x2114, 0x2115, 0x2116, 0x2117, 0x2118, 0x2119, 0x211A, 0x211B, 0x211C, 0x211D, 0x211E, 0x211F,
928 0x2120, 0x2121, 0x2122, 0x2123, 0x2124, 0x2125, 0x2126, 0x2127, 0x2128, 0x2129, 0x212A, 0x212B, 0x212C, 0x212D, 0x212E, 0x212F,
929 0x2130, 0x2131, 0x2132, 0x2133, 0x2134, 0x2135, 0x2136, 0x2137, 0x2138, 0x2139, 0x213A, 0x213B, 0x213C, 0x213D, 0x213E, 0x213F,
930 0x2140, 0x2141, 0x2142, 0x2143, 0x2144, 0x2145, 0x2146, 0x2147, 0x2148, 0x2149, 0x214A, 0x214B, 0x214C, 0x214D, 0x214E, 0x214F,
931 0x2150, 0x2151, 0x2152, 0x2153, 0x2154, 0x2155, 0x2156, 0x2157, 0x2158, 0x2159, 0x215A, 0x215B, 0x215C, 0x215D, 0x215E, 0x215F,
932 0x2160, 0x2161, 0x2162, 0x2163, 0x2164, 0x2165, 0x2166, 0x2167, 0x2168, 0x2169, 0x216A, 0x216B, 0x216C, 0x216D, 0x216E, 0x216F,
933 0x2160, 0x2161, 0x2162, 0x2163, 0x2164, 0x2165, 0x2166, 0x2167, 0x2168, 0x2169, 0x216A, 0x216B, 0x216C, 0x216D, 0x216E, 0x216F,
934 0x2180, 0x2181, 0x2182, 0x2183, 0x2184, 0x2185, 0x2186, 0x2187, 0x2188, 0x2189, 0x218A, 0x218B, 0x218C, 0x218D, 0x218E, 0x218F,
935 0x2190, 0x2191, 0x2192, 0x2193, 0x2194, 0x2195, 0x2196, 0x2197, 0x2198, 0x2199, 0x219A, 0x219B, 0x219C, 0x219D, 0x219E, 0x219F,
936 0x21A0, 0x21A1, 0x21A2, 0x21A3, 0x21A4, 0x21A5, 0x21A6, 0x21A7, 0x21A8, 0x21A9, 0x21AA, 0x21AB, 0x21AC, 0x21AD, 0x21AE, 0x21AF,
937 0x21B0, 0x21B1, 0x21B2, 0x21B3, 0x21B4, 0x21B5, 0x21B6, 0x21B7, 0x21B8, 0x21B9, 0x21BA, 0x21BB, 0x21BC, 0x21BD, 0x21BE, 0x21BF,
938 0x21C0, 0x21C1, 0x21C2, 0x21C3, 0x21C4, 0x21C5, 0x21C6, 0x21C7, 0x21C8, 0x21C9, 0x21CA, 0x21CB, 0x21CC, 0x21CD, 0x21CE, 0x21CF,
939 0x21D0, 0x21D1, 0x21D2, 0x21D3, 0x21D4, 0x21D5, 0x21D6, 0x21D7, 0x21D8, 0x21D9, 0x21DA, 0x21DB, 0x21DC, 0x21DD, 0x21DE, 0x21DF,
940 0x21E0, 0x21E1, 0x21E2, 0x21E3, 0x21E4, 0x21E5, 0x21E6, 0x21E7, 0x21E8, 0x21E9, 0x21EA, 0x21EB, 0x21EC, 0x21ED, 0x21EE, 0x21EF,
941 0x21F0, 0x21F1, 0x21F2, 0x21F3, 0x21F4, 0x21F5, 0x21F6, 0x21F7, 0x21F8, 0x21F9, 0x21FA, 0x21FB, 0x21FC, 0x21FD, 0x21FE, 0x21FF
942};
943
944static const uint16_t plane24[] = {
945 0x2400, 0x2401, 0x2402, 0x2403, 0x2404, 0x2405, 0x2406, 0x2407, 0x2408, 0x2409, 0x240A, 0x240B, 0x240C, 0x240D, 0x240E, 0x240F,
946 0x2410, 0x2411, 0x2412, 0x2413, 0x2414, 0x2415, 0x2416, 0x2417, 0x2418, 0x2419, 0x241A, 0x241B, 0x241C, 0x241D, 0x241E, 0x241F,
947 0x2420, 0x2421, 0x2422, 0x2423, 0x2424, 0x2425, 0x2426, 0x2427, 0x2428, 0x2429, 0x242A, 0x242B, 0x242C, 0x242D, 0x242E, 0x242F,
948 0x2430, 0x2431, 0x2432, 0x2433, 0x2434, 0x2435, 0x2436, 0x2437, 0x2438, 0x2439, 0x243A, 0x243B, 0x243C, 0x243D, 0x243E, 0x243F,
949 0x2440, 0x2441, 0x2442, 0x2443, 0x2444, 0x2445, 0x2446, 0x2447, 0x2448, 0x2449, 0x244A, 0x244B, 0x244C, 0x244D, 0x244E, 0x244F,
950 0x2450, 0x2451, 0x2452, 0x2453, 0x2454, 0x2455, 0x2456, 0x2457, 0x2458, 0x2459, 0x245A, 0x245B, 0x245C, 0x245D, 0x245E, 0x245F,
951 0x2460, 0x2461, 0x2462, 0x2463, 0x2464, 0x2465, 0x2466, 0x2467, 0x2468, 0x2469, 0x246A, 0x246B, 0x246C, 0x246D, 0x246E, 0x246F,
952 0x2470, 0x2471, 0x2472, 0x2473, 0x2474, 0x2475, 0x2476, 0x2477, 0x2478, 0x2479, 0x247A, 0x247B, 0x247C, 0x247D, 0x247E, 0x247F,
953 0x2480, 0x2481, 0x2482, 0x2483, 0x2484, 0x2485, 0x2486, 0x2487, 0x2488, 0x2489, 0x248A, 0x248B, 0x248C, 0x248D, 0x248E, 0x248F,
954 0x2490, 0x2491, 0x2492, 0x2493, 0x2494, 0x2495, 0x2496, 0x2497, 0x2498, 0x2499, 0x249A, 0x249B, 0x249C, 0x249D, 0x249E, 0x249F,
955 0x24A0, 0x24A1, 0x24A2, 0x24A3, 0x24A4, 0x24A5, 0x24A6, 0x24A7, 0x24A8, 0x24A9, 0x24AA, 0x24AB, 0x24AC, 0x24AD, 0x24AE, 0x24AF,
956 0x24B0, 0x24B1, 0x24B2, 0x24B3, 0x24B4, 0x24B5, 0x24B6, 0x24B7, 0x24B8, 0x24B9, 0x24BA, 0x24BB, 0x24BC, 0x24BD, 0x24BE, 0x24BF,
957 0x24C0, 0x24C1, 0x24C2, 0x24C3, 0x24C4, 0x24C5, 0x24C6, 0x24C7, 0x24C8, 0x24C9, 0x24CA, 0x24CB, 0x24CC, 0x24CD, 0x24CE, 0x24CF,
958 0x24B6, 0x24B7, 0x24B8, 0x24B9, 0x24BA, 0x24BB, 0x24BC, 0x24BD, 0x24BE, 0x24BF, 0x24C0, 0x24C1, 0x24C2, 0x24C3, 0x24C4, 0x24C5,
959 0x24C6, 0x24C7, 0x24C8, 0x24C9, 0x24CA, 0x24CB, 0x24CC, 0x24CD, 0x24CE, 0x24CF, 0x24EA, 0x24EB, 0x24EC, 0x24ED, 0x24EE, 0x24EF,
960 0x24F0, 0x24F1, 0x24F2, 0x24F3, 0x24F4, 0x24F5, 0x24F6, 0x24F7, 0x24F8, 0x24F9, 0x24FA, 0x24FB, 0x24FC, 0x24FD, 0x24FE, 0x24FF
961};
962
963static const uint16_t planeFF[] = {
964 0xFF00, 0xFF01, 0xFF02, 0xFF03, 0xFF04, 0xFF05, 0xFF06, 0xFF07, 0xFF08, 0xFF09, 0xFF0A, 0xFF0B, 0xFF0C, 0xFF0D, 0xFF0E, 0xFF0F,
965 0xFF10, 0xFF11, 0xFF12, 0xFF13, 0xFF14, 0xFF15, 0xFF16, 0xFF17, 0xFF18, 0xFF19, 0xFF1A, 0xFF1B, 0xFF1C, 0xFF1D, 0xFF1E, 0xFF1F,
966 0xFF20, 0xFF21, 0xFF22, 0xFF23, 0xFF24, 0xFF25, 0xFF26, 0xFF27, 0xFF28, 0xFF29, 0xFF2A, 0xFF2B, 0xFF2C, 0xFF2D, 0xFF2E, 0xFF2F,
967 0xFF30, 0xFF31, 0xFF32, 0xFF33, 0xFF34, 0xFF35, 0xFF36, 0xFF37, 0xFF38, 0xFF39, 0xFF3A, 0xFF3B, 0xFF3C, 0xFF3D, 0xFF3E, 0xFF3F,
968 0xFF40, 0xFF21, 0xFF22, 0xFF23, 0xFF24, 0xFF25, 0xFF26, 0xFF27, 0xFF28, 0xFF29, 0xFF2A, 0xFF2B, 0xFF2C, 0xFF2D, 0xFF2E, 0xFF2F,
969 0xFF30, 0xFF31, 0xFF32, 0xFF33, 0xFF34, 0xFF35, 0xFF36, 0xFF37, 0xFF38, 0xFF39, 0xFF3A, 0xFF5B, 0xFF5C, 0xFF5D, 0xFF5E, 0xFF5F,
970 0xFF60, 0xFF61, 0xFF62, 0xFF63, 0xFF64, 0xFF65, 0xFF66, 0xFF67, 0xFF68, 0xFF69, 0xFF6A, 0xFF6B, 0xFF6C, 0xFF6D, 0xFF6E, 0xFF6F,
971 0xFF70, 0xFF71, 0xFF72, 0xFF73, 0xFF74, 0xFF75, 0xFF76, 0xFF77, 0xFF78, 0xFF79, 0xFF7A, 0xFF7B, 0xFF7C, 0xFF7D, 0xFF7E, 0xFF7F,
972 0xFF80, 0xFF81, 0xFF82, 0xFF83, 0xFF84, 0xFF85, 0xFF86, 0xFF87, 0xFF88, 0xFF89, 0xFF8A, 0xFF8B, 0xFF8C, 0xFF8D, 0xFF8E, 0xFF8F,
973 0xFF90, 0xFF91, 0xFF92, 0xFF93, 0xFF94, 0xFF95, 0xFF96, 0xFF97, 0xFF98, 0xFF99, 0xFF9A, 0xFF9B, 0xFF9C, 0xFF9D, 0xFF9E, 0xFF9F,
974 0xFFA0, 0xFFA1, 0xFFA2, 0xFFA3, 0xFFA4, 0xFFA5, 0xFFA6, 0xFFA7, 0xFFA8, 0xFFA9, 0xFFAA, 0xFFAB, 0xFFAC, 0xFFAD, 0xFFAE, 0xFFAF,
975 0xFFB0, 0xFFB1, 0xFFB2, 0xFFB3, 0xFFB4, 0xFFB5, 0xFFB6, 0xFFB7, 0xFFB8, 0xFFB9, 0xFFBA, 0xFFBB, 0xFFBC, 0xFFBD, 0xFFBE, 0xFFBF,
976 0xFFC0, 0xFFC1, 0xFFC2, 0xFFC3, 0xFFC4, 0xFFC5, 0xFFC6, 0xFFC7, 0xFFC8, 0xFFC9, 0xFFCA, 0xFFCB, 0xFFCC, 0xFFCD, 0xFFCE, 0xFFCF,
977 0xFFD0, 0xFFD1, 0xFFD2, 0xFFD3, 0xFFD4, 0xFFD5, 0xFFD6, 0xFFD7, 0xFFD8, 0xFFD9, 0xFFDA, 0xFFDB, 0xFFDC, 0xFFDD, 0xFFDE, 0xFFDF,
978 0xFFE0, 0xFFE1, 0xFFE2, 0xFFE3, 0xFFE4, 0xFFE5, 0xFFE6, 0xFFE7, 0xFFE8, 0xFFE9, 0xFFEA, 0xFFEB, 0xFFEC, 0xFFED, 0xFFEE, 0xFFEF,
979 0xFFF0, 0xFFF1, 0xFFF2, 0xFFF3, 0xFFF4, 0xFFF5, 0xFFF6, 0xFFF7, 0xFFF8, 0xFFF9, 0xFFFA, 0xFFFB, 0xFFFC, 0xFFFD, 0xFFFE, 0xFFFF
980};
981
982static const uint16_t* const planemap[256] = {
983 plane00, plane01, plane02, plane03, plane04, plane05, NULL, NULL, NULL, NULL, NULL,
984 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
985 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, plane1E, plane1F, NULL,
986 plane21, NULL, NULL, plane24, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
987 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
988 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
989 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
990 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
991 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
992 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
993 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
994 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
995 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
996 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
997 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
998 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
999 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1000 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1001 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1002 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1003 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1004 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1005 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1006 NULL, NULL, planeFF
1007};
1008// clang-format on
1009
1010static wchar_t GetCollationWeight(const wchar_t& r)
1011{
1012 // Lookup the "weight" of a UTF8 char, equivalent lowercase ascii letter, in the plane map,
1013 // the character comparison value used by using "accent folding" collation utf8_general_ci
1014 // in MySQL (AKA utf8mb3_general_ci in MariaDB 10)
1015 auto index = r >> 8;
1016 if (index > 255)
1017 return 0xFFFD;
1018 auto plane = planemap[index];
1019 if (plane == nullptr)
1020 return r;
1021 return static_cast<wchar_t>(plane[r & 0xFF]);
1022}
1023
1024// Compares separately the numeric and alphabetic parts of a wide string.
1025// returns negative if left < right, positive if left > right
1026// and 0 if they are identical.
1027// See also the equivalent StringUtils::AlphaNumericCollation() for UFT8 data
1028int64_t StringUtils::AlphaNumericCompare(const wchar_t* left, const wchar_t* right)
1029{
1030 const wchar_t *l = left;
1031 const wchar_t *r = right;
1032 const wchar_t *ld, *rd;
1033 wchar_t lc, rc;
1034 int64_t lnum, rnum;
1035 bool lsym, rsym;
1036 while (*l != 0 && *r != 0)
1037 {
1038 // check if we have a numerical value
1039 if (*l >= L'0' && *l <= L'9' && *r >= L'0' && *r <= L'9')
1040 {
1041 ld = l;
1042 lnum = *ld++ - L'0';
1043 while (*ld >= L'0' && *ld <= L'9' && ld < l + 15)
1044 { // compare only up to 15 digits
1045 lnum *= 10;
1046 lnum += *ld++ - L'0';
1047 }
1048 rd = r;
1049 rnum = *rd++ - L'0';
1050 while (*rd >= L'0' && *rd <= L'9' && rd < r + 15)
1051 { // compare only up to 15 digits
1052 rnum *= 10;
1053 rnum += *rd++ - L'0';
1054 }
1055 // do we have numbers?
1056 if (lnum != rnum)
1057 { // yes - and they're different!
1058 return lnum - rnum;
1059 }
1060 l = ld;
1061 r = rd;
1062 continue;
1063 }
1064
1065 lc = *l;
1066 rc = *r;
1067 // Put ascii punctuation and symbols e.g. !#$&()*+,-./:;<=>?@[\]^_ `{|}~ above the other
1068 // alphanumeric ascii, rather than some being mixed between the numbers and letters, and
1069 // above all other unicode letters, symbols and punctuation.
1070 // (Locale collation of these chars varies across platforms)
1071 lsym = (lc >= 32 && lc < L'0') || (lc > L'9' && lc < L'A') ||
1072 (lc > L'Z' && lc < L'a') || (lc > L'z' && lc < 128);
1073 rsym = (rc >= 32 && rc < L'0') || (rc > L'9' && rc < L'A') ||
1074 (rc > L'Z' && rc < L'a') || (rc > L'z' && rc < 128);
1075 if (lsym && !rsym)
1076 return -1;
1077 if (!lsym && rsym)
1078 return 1;
1079 if (lsym && rsym)
1080 {
1081 if (lc != rc)
1082 return lc - rc;
1083 else
1084 { // Same symbol advance to next wchar
1085 l++;
1086 r++;
1087 continue;
1088 }
1089 }
1090 if (!g_langInfo.UseLocaleCollation())
1091 {
1092 // Apply case sensitive accent folding collation to non-ascii chars.
1093 // This mimics utf8_general_ci collation, and provides simple collation of LATIN-1 chars
1094 // for any platformthat doesn't have a language specific collate facet implemented
1095 if (lc > 128)
1096 lc = GetCollationWeight(lc);
1097 if (rc > 128)
1098 rc = GetCollationWeight(rc);
1099 }
1100 // Do case less comparison, convert ascii upper case to lower case
1101 if (lc >= L'A' && lc <= L'Z')
1102 lc += L'a' - L'A';
1103 if (rc >= L'A' && rc <= L'Z')
1104 rc += L'a' - L'A';
1105
1106 if (lc != rc)
1107 {
1108 if (!g_langInfo.UseLocaleCollation())
1109 {
1110 // Compare unicode (having applied accent folding collation to non-ascii chars).
1111 int i = wcsncmp(&lc, &rc, 1);
1112 return i;
1113 }
1114 else
1115 {
1116 // Fetch collation facet from locale to do comparison of wide char although on some
1117 // platforms this is not langauge specific but just compares unicode
1118 const std::collate<wchar_t>& coll =
1119 std::use_facet<std::collate<wchar_t>>(g_langInfo.GetSystemLocale());
1120 int cmp_res = coll.compare(&lc, &lc + 1, &rc, &rc + 1);
1121 if (cmp_res != 0)
1122 return cmp_res;
1123 }
1124 }
1125 l++; r++;
1126 }
1127 if (*r)
1128 { // r is longer
1129 return -1;
1130 }
1131 else if (*l)
1132 { // l is longer
1133 return 1;
1134 }
1135 return 0; // files are the same
1136}
1137
1138/*
1139 Convert the UTF8 character to which z points into a 31-bit Unicode point.
1140 Return how many bytes (0 to 3) of UTF8 data encode the character.
1141 This only works right if z points to a well-formed UTF8 string.
1142 Byte-0 Byte-1 Byte-2 Byte-3 Value
1143 0xxxxxxx 00000000 00000000 0xxxxxxx
1144 110yyyyy 10xxxxxx 00000000 00000yyy yyxxxxxx
1145 1110zzzz 10yyyyyy 10xxxxxx 00000000 zzzzyyyy yyxxxxxx
1146 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx 000uuuuu zzzzyyyy yyxxxxxx
1147*/
1148static uint32_t UTF8ToUnicode(const unsigned char* z, int nKey, unsigned char& bytes)
1149{
1150 // Lookup table used decode the first byte of a multi-byte UTF8 character
1151 // clang-format off
1152 static const unsigned char utf8Trans1[] = {
1153 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1154 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
1155 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
1156 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
1157 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1158 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
1159 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1160 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00,
1161 };
1162 // clang-format on
1163
1164 uint32_t c;
1165 bytes = 0;
1166 c = z[0];
1167 if (c >= 0xc0)
1168 {
1169 c = utf8Trans1[c - 0xc0];
1170 int index = 1;
1171 while (index < nKey && (z[index] & 0xc0) == 0x80)
1172 {
1173 c = (c << 6) + (0x3f & z[index]);
1174 index++;
1175 }
1176 if (c < 0x80 || (c & 0xFFFFF800) == 0xD800 || (c & 0xFFFFFFFE) == 0xFFFE)
1177 c = 0xFFFD;
1178 bytes = static_cast<unsigned char>(index - 1);
1179 }
1180 return c;
1181}
1182
1183/*
1184 SQLite collating function, see sqlite3_create_collation
1185 The equivalent of AlphaNumericCompare() but for comparing UTF8 encoded data
1186
1187 This only processes enough data to find a difference, and avoids expensive data conversions.
1188 When sorting in memory item data is converted once to wstring in advance prior to sorting, the
1189 SQLite callback function can not do that kind of preparation. Instead, in order to use
1190 AlphaNumericCompare(), it would have to repeatedly convert the full input data to wstring for
1191 every pair comparison made. That approach was found to be 10 times slower than using this
1192 separate routine.
1193*/
1194int StringUtils::AlphaNumericCollation(int nKey1, const void* pKey1, int nKey2, const void* pKey2)
1195{
1196 // Get exact matches of shorter text to start of larger test fast
1197 int n = std::min(nKey1, nKey2);
1198 int r = memcmp(pKey1, pKey2, n);
1199 if (r == 0)
1200 return nKey1 - nKey2;
1201
1202 //Not a binary match, so process character at a time
1203 const unsigned char* zA = static_cast<const unsigned char*>(pKey1);
1204 const unsigned char* zB = static_cast<const unsigned char*>(pKey2);
1205 wchar_t lc, rc;
1206 unsigned char bytes;
1207 int64_t lnum, rnum;
1208 bool lsym, rsym;
1209 int ld, rd;
1210 int i = 0;
1211 int j = 0;
1212 // Looping Unicode point at a time through potentially 1 to 4 multi-byte encoded UTF8 data
1213 while (i < nKey1 && j < nKey2)
1214 {
1215 // Check if we have numerical values, compare only up to 15 digits
1216 if (isdigit(zA[i]) && isdigit(zB[j]))
1217 {
1218 lnum = zA[i] - '0';
1219 ld = i + 1;
1220 while (ld < nKey1 && isdigit(zA[ld]) && ld < i + 15)
1221 {
1222 lnum *= 10;
1223 lnum += zA[ld] - '0';
1224 ld++;
1225 }
1226 rnum = zB[j] - '0';
1227 rd = j + 1;
1228 while (rd < nKey2 && isdigit(zB[rd]) && rd < j + 15)
1229 {
1230 rnum *= 10;
1231 rnum += zB[rd] - '0';
1232 rd++;
1233 }
1234 // do we have numbers?
1235 if (lnum != rnum)
1236 { // yes - and they're different!
1237 return lnum - rnum;
1238 }
1239 // Advance to after digits
1240 i = ld;
1241 j = rd;
1242 continue;
1243 }
1244 // Put ascii punctuation and symbols e.g. !#$&()*+,-./:;<=>?@[\]^_ `{|}~ before the other
1245 // alphanumeric ascii, rather than some being mixed between the numbers and letters, and
1246 // above all other unicode letters, symbols and punctuation.
1247 // (Locale collation of these chars varies across platforms)
1248 lsym = (zA[i] >= 32 && zA[i] < '0') || (zA[i] > '9' && zA[i] < 'A') ||
1249 (zA[i] > 'Z' && zA[i] < 'a') || (zA[i] > 'z' && zA[i] < 128);
1250 rsym = (zB[j] >= 32 && zB[j] < '0') || (zB[j] > '9' && zB[j] < 'A') ||
1251 (zB[j] > 'Z' && zB[j] < 'a') || (zB[j] > 'z' && zB[j] < 128);
1252 if (lsym && !rsym)
1253 return -1;
1254 if (!lsym && rsym)
1255 return 1;
1256 if (lsym && rsym)
1257 {
1258 if (zA[i] != zB[j])
1259 return zA[i] - zB[j];
1260 else
1261 { // Same symbol advance to next
1262 i++;
1263 j++;
1264 continue;
1265 }
1266 }
1267 //Decode single (1 to 4 bytes) UTF8 character to Unicode
1268 lc = UTF8ToUnicode(&zA[i], nKey1 - i, bytes);
1269 i += bytes;
1270 rc = UTF8ToUnicode(&zB[j], nKey2 - j, bytes);
1271 j += bytes;
1272 if (!g_langInfo.UseLocaleCollation())
1273 {
1274 // Apply case sensitive accent folding collation to non-ascii chars.
1275 // This mimics utf8_general_ci collation, and provides simple collation of LATIN-1 chars
1276 // for any platform that doesn't have a language specific collate facet implemented
1277 if (lc > 128)
1278 lc = GetCollationWeight(lc);
1279 if (rc > 128)
1280 rc = GetCollationWeight(rc);
1281 }
1282 // Caseless comparison so convert ascii upper case to lower case
1283 if (lc >= 'A' && lc <= 'Z')
1284 lc += 'a' - 'A';
1285 if (rc >= 'A' && rc <= 'Z')
1286 rc += 'a' - 'A';
1287
1288 if (lc != rc)
1289 {
1290 if (!g_langInfo.UseLocaleCollation() || (lc <= 128 && rc <= 128))
1291 // Compare unicode (having applied accent folding collation to non-ascii chars).
1292 return lc - rc;
1293 else
1294 {
1295 // Fetch collation facet from locale to do comparison of wide char although on some
1296 // platforms this is not langauge specific but just compares unicode
1297 const std::collate<wchar_t>& coll =
1298 std::use_facet<std::collate<wchar_t>>(g_langInfo.GetSystemLocale());
1299 int cmp_res = coll.compare(&lc, &lc + 1, &rc, &rc + 1);
1300 if (cmp_res != 0)
1301 return cmp_res;
1302 }
1303 }
1304 i++;
1305 j++;
1306 }
1307 // Compared characters of shortest are the same as longest, length determines order
1308 return (nKey1 - nKey2);
1309}
1310
1311int StringUtils::DateStringToYYYYMMDD(const std::string &dateString)
1312{
1313 std::vector<std::string> days = StringUtils::Split(dateString, '-');
1314 if (days.size() == 1)
1315 return atoi(days[0].c_str());
1316 else if (days.size() == 2)
1317 return atoi(days[0].c_str())*100+atoi(days[1].c_str());
1318 else if (days.size() == 3)
1319 return atoi(days[0].c_str())*10000+atoi(days[1].c_str())*100+atoi(days[2].c_str());
1320 else
1321 return -1;
1322}
1323
1324std::string StringUtils::ISODateToLocalizedDate(const std::string& strIsoDate)
1325{
1326 // Convert ISO8601 date strings YYYY, YYYY-MM, or YYYY-MM-DD to (partial) localized date strings
1327 CDateTime date;
1328 std::string formattedDate = strIsoDate;
1329 if (formattedDate.size() == 10)
1330 {
1331 date.SetFromDBDate(strIsoDate);
1332 formattedDate = date.GetAsLocalizedDate();
1333 }
1334 else if (formattedDate.size() == 7)
1335 {
1336 std::string strFormat = date.GetAsLocalizedDate(false);
1337 std::string tempdate;
1338 // find which date separator we are using. Can be -./
1339 size_t pos = strFormat.find_first_of("-./");
1340 if (pos != std::string::npos)
1341 {
1342 bool yearFirst = strFormat.find("1601") == 0; // true if year comes first
1343 std::string sep = strFormat.substr(pos, 1);
1344 if (yearFirst)
1345 { // build formatted date with year first, then separator and month
1346 tempdate = formattedDate.substr(0, 4);
1347 tempdate += sep;
1348 tempdate += formattedDate.substr(5, 2);
1349 }
1350 else
1351 {
1352 tempdate = formattedDate.substr(5, 2);
1353 tempdate += sep;
1354 tempdate += formattedDate.substr(0, 4);
1355 }
1356 formattedDate = tempdate;
1357 }
1358 // return either just the year or the locally formatted version of the ISO date
1359 }
1360 return formattedDate;
1361}
1362
1363long StringUtils::TimeStringToSeconds(const std::string &timeString)
1364{
1365 std::string strCopy(timeString);
1366 StringUtils::Trim(strCopy);
1367 if(StringUtils::EndsWithNoCase(strCopy, " min"))
1368 {
1369 // this is imdb format of "XXX min"
1370 return 60 * atoi(strCopy.c_str());
1371 }
1372 else
1373 {
1374 std::vector<std::string> secs = StringUtils::Split(strCopy, ':');
1375 int timeInSecs = 0;
1376 for (unsigned int i = 0; i < 3 && i < secs.size(); i++)
1377 {
1378 timeInSecs *= 60;
1379 timeInSecs += atoi(secs[i].c_str());
1380 }
1381 return timeInSecs;
1382 }
1383}
1384
1385std::string StringUtils::SecondsToTimeString(long lSeconds, TIME_FORMAT format)
1386{
1387 bool isNegative = lSeconds < 0;
1388 lSeconds = std::abs(lSeconds);
1389
1390 std::string strHMS;
1391 if (format == TIME_FORMAT_SECS)
1392 strHMS = StringUtils::Format("%i", lSeconds);
1393 else if (format == TIME_FORMAT_MINS)
1394 strHMS = StringUtils::Format("%i", lrintf(static_cast<float>(lSeconds) / 60.0f));
1395 else if (format == TIME_FORMAT_HOURS)
1396 strHMS = StringUtils::Format("%i", lrintf(static_cast<float>(lSeconds) / 3600.0f));
1397 else if (format & TIME_FORMAT_M)
1398 strHMS += StringUtils::Format("%i", lSeconds % 3600 / 60);
1399 else
1400 {
1401 int hh = lSeconds / 3600;
1402 lSeconds = lSeconds % 3600;
1403 int mm = lSeconds / 60;
1404 int ss = lSeconds % 60;
1405
1406 if (format == TIME_FORMAT_GUESS)
1407 format = (hh >= 1) ? TIME_FORMAT_HH_MM_SS : TIME_FORMAT_MM_SS;
1408 if (format & TIME_FORMAT_HH)
1409 strHMS += StringUtils::Format("%2.2i", hh);
1410 else if (format & TIME_FORMAT_H)
1411 strHMS += StringUtils::Format("%i", hh);
1412 if (format & TIME_FORMAT_MM)
1413 strHMS += StringUtils::Format(strHMS.empty() ? "%2.2i" : ":%2.2i", mm);
1414 if (format & TIME_FORMAT_SS)
1415 strHMS += StringUtils::Format(strHMS.empty() ? "%2.2i" : ":%2.2i", ss);
1416 }
1417
1418 if (isNegative)
1419 strHMS = "-" + strHMS;
1420
1421 return strHMS;
1422}
1423
1424bool StringUtils::IsNaturalNumber(const std::string& str)
1425{
1426 size_t i = 0, n = 0;
1427 // allow whitespace,digits,whitespace
1428 while (i < str.size() && isspace((unsigned char) str[i]))
1429 i++;
1430 while (i < str.size() && isdigit((unsigned char) str[i]))
1431 {
1432 i++; n++;
1433 }
1434 while (i < str.size() && isspace((unsigned char) str[i]))
1435 i++;
1436 return i == str.size() && n > 0;
1437}
1438
1439bool StringUtils::IsInteger(const std::string& str)
1440{
1441 size_t i = 0, n = 0;
1442 // allow whitespace,-,digits,whitespace
1443 while (i < str.size() && isspace((unsigned char) str[i]))
1444 i++;
1445 if (i < str.size() && str[i] == '-')
1446 i++;
1447 while (i < str.size() && isdigit((unsigned char) str[i]))
1448 {
1449 i++; n++;
1450 }
1451 while (i < str.size() && isspace((unsigned char) str[i]))
1452 i++;
1453 return i == str.size() && n > 0;
1454}
1455
1456int StringUtils::asciidigitvalue(char chr)
1457{
1458 if (!isasciidigit(chr))
1459 return -1;
1460
1461 return chr - '0';
1462}
1463
1464int StringUtils::asciixdigitvalue(char chr)
1465{
1466 int v = asciidigitvalue(chr);
1467 if (v >= 0)
1468 return v;
1469 if (chr >= 'a' && chr <= 'f')
1470 return chr - 'a' + 10;
1471 if (chr >= 'A' && chr <= 'F')
1472 return chr - 'A' + 10;
1473
1474 return -1;
1475}
1476
1477
1478void StringUtils::RemoveCRLF(std::string& strLine)
1479{
1480 StringUtils::TrimRight(strLine, "\n\r");
1481}
1482
1483std::string StringUtils::SizeToString(int64_t size)
1484{
1485 std::string strLabel;
1486 const char prefixes[] = {' ', 'k', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y'};
1487 unsigned int i = 0;
1488 double s = (double)size;
1489 while (i < ARRAY_SIZE(prefixes) && s >= 1000.0)
1490 {
1491 s /= 1024.0;
1492 i++;
1493 }
1494
1495 if (!i)
1496 strLabel = StringUtils::Format("%.lf B", s);
1497 else if (i == ARRAY_SIZE(prefixes))
1498 {
1499 if (s >= 1000.0)
1500 strLabel = StringUtils::Format(">999.99 %cB", prefixes[i - 1]);
1501 else
1502 strLabel = StringUtils::Format("%.2lf %cB", s, prefixes[i - 1]);
1503 }
1504 else if (s >= 100.0)
1505 strLabel = StringUtils::Format("%.1lf %cB", s, prefixes[i]);
1506 else
1507 strLabel = StringUtils::Format("%.2lf %cB", s, prefixes[i]);
1508
1509 return strLabel;
1510}
1511
1512std::string StringUtils::BinaryStringToString(const std::string& in)
1513{
1514 std::string out;
1515 out.reserve(in.size() / 2);
1516 for (const char *cur = in.c_str(), *end = cur + in.size(); cur != end; ++cur) {
1517 if (*cur == '\\') {
1518 ++cur;
1519 if (cur == end) {
1520 break;
1521 }
1522 if (isdigit(*cur)) {
1523 char* end;
1524 unsigned long num = strtol(cur, &end, 10);
1525 cur = end - 1;
1526 out.push_back(num);
1527 continue;
1528 }
1529 }
1530 out.push_back(*cur);
1531 }
1532 return out;
1533}
1534
1535std::string StringUtils::ToHexadecimal(const std::string& in)
1536{
1537 std::ostringstream ss;
1538 ss << std::hex;
1539 for (unsigned char ch : in) {
1540 ss << std::setw(2) << std::setfill('0') << static_cast<unsigned long> (ch);
1541 }
1542 return ss.str();
1543}
1544
1545// return -1 if not, else return the utf8 char length.
1546int IsUTF8Letter(const unsigned char *str)
1547{
1548 // reference:
1549 // unicode -> utf8 table: http://www.utf8-chartable.de/
1550 // latin characters in unicode: http://en.wikipedia.org/wiki/Latin_characters_in_Unicode
1551 unsigned char ch = str[0];
1552 if (!ch)
1553 return -1;
1554 if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'))
1555 return 1;
1556 if (!(ch & 0x80))
1557 return -1;
1558 unsigned char ch2 = str[1];
1559 if (!ch2)
1560 return -1;
1561 // check latin 1 letter table: http://en.wikipedia.org/wiki/C1_Controls_and_Latin-1_Supplement
1562 if (ch == 0xC3 && ch2 >= 0x80 && ch2 <= 0xBF && ch2 != 0x97 && ch2 != 0xB7)
1563 return 2;
1564 // check latin extended A table: http://en.wikipedia.org/wiki/Latin_Extended-A
1565 if (ch >= 0xC4 && ch <= 0xC7 && ch2 >= 0x80 && ch2 <= 0xBF)
1566 return 2;
1567 // check latin extended B table: http://en.wikipedia.org/wiki/Latin_Extended-B
1568 // and International Phonetic Alphabet: http://en.wikipedia.org/wiki/IPA_Extensions_(Unicode_block)
1569 if (((ch == 0xC8 || ch == 0xC9) && ch2 >= 0x80 && ch2 <= 0xBF)
1570 || (ch == 0xCA && ch2 >= 0x80 && ch2 <= 0xAF))
1571 return 2;
1572 return -1;
1573}
1574
1575size_t StringUtils::FindWords(const char *str, const char *wordLowerCase)
1576{
1577 // NOTE: This assumes word is lowercase!
1578 const unsigned char *s = (const unsigned char *)str;
1579 do
1580 {
1581 // start with a compare
1582 const unsigned char *c = s;
1583 const unsigned char *w = (const unsigned char *)wordLowerCase;
1584 bool same = true;
1585 while (same && *c && *w)
1586 {
1587 unsigned char lc = *c++;
1588 if (lc >= 'A' && lc <= 'Z')
1589 lc += 'a'-'A';
1590
1591 if (lc != *w++) // different
1592 same = false;
1593 }
1594 if (same && *w == 0) // only the same if word has been exhausted
1595 return (const char *)s - str;
1596
1597 // otherwise, skip current word (composed by latin letters) or number
1598 int l;
1599 if (*s >= '0' && *s <= '9')
1600 {
1601 ++s;
1602 while (*s >= '0' && *s <= '9') ++s;
1603 }
1604 else if ((l = IsUTF8Letter(s)) > 0)
1605 {
1606 s += l;
1607 while ((l = IsUTF8Letter(s)) > 0) s += l;
1608 }
1609 else
1610 ++s;
1611 while (*s && *s == ' ') s++;
1612
1613 // and repeat until we're done
1614 } while (*s);
1615
1616 return std::string::npos;
1617}
1618
1619// assumes it is called from after the first open bracket is found
1620int StringUtils::FindEndBracket(const std::string &str, char opener, char closer, int startPos)
1621{
1622 int blocks = 1;
1623 for (unsigned int i = startPos; i < str.size(); i++)
1624 {
1625 if (str[i] == opener)
1626 blocks++;
1627 else if (str[i] == closer)
1628 {
1629 blocks--;
1630 if (!blocks)
1631 return i;
1632 }
1633 }
1634
1635 return (int)std::string::npos;
1636}
1637
1638void StringUtils::WordToDigits(std::string &word)
1639{
1640 static const char word_to_letter[] = "22233344455566677778889999";
1641 StringUtils::ToLower(word);
1642 for (unsigned int i = 0; i < word.size(); ++i)
1643 { // NB: This assumes ascii, which probably needs extending at some point.
1644 char letter = word[i];
1645 if ((letter >= 'a' && letter <= 'z')) // assume contiguous letter range
1646 {
1647 word[i] = word_to_letter[letter-'a'];
1648 }
1649 else if (letter < '0' || letter > '9') // We want to keep 0-9!
1650 {
1651 word[i] = ' '; // replace everything else with a space
1652 }
1653 }
1654}
1655
1656std::string StringUtils::CreateUUID()
1657{
1658#ifdef HAVE_NEW_CROSSGUID
1659 return xg::newGuid().str();
1660#else
1661 static GuidGenerator guidGenerator;
1662 auto guid = guidGenerator.newGuid();
1663
1664 std::stringstream strGuid; strGuid << guid;
1665 return strGuid.str();
1666#endif
1667}
1668
1669bool StringUtils::ValidateUUID(const std::string &uuid)
1670{
1671 CRegExp guidRE;
1672 guidRE.RegComp(ADDON_GUID_RE);
1673 return (guidRE.RegFind(uuid.c_str()) == 0);
1674}
1675
1676double StringUtils::CompareFuzzy(const std::string &left, const std::string &right)
1677{
1678 return (0.5 + fstrcmp(left.c_str(), right.c_str()) * (left.length() + right.length())) / 2.0;
1679}
1680
1681int StringUtils::FindBestMatch(const std::string &str, const std::vector<std::string> &strings, double &matchscore)
1682{
1683 int best = -1;
1684 matchscore = 0;
1685
1686 int i = 0;
1687 for (std::vector<std::string>::const_iterator it = strings.begin(); it != strings.end(); ++it, i++)
1688 {
1689 int maxlength = std::max(str.length(), it->length());
1690 double score = StringUtils::CompareFuzzy(str, *it) / maxlength;
1691 if (score > matchscore)
1692 {
1693 matchscore = score;
1694 best = i;
1695 }
1696 }
1697 return best;
1698}
1699
1700bool StringUtils::ContainsKeyword(const std::string &str, const std::vector<std::string> &keywords)
1701{
1702 for (std::vector<std::string>::const_iterator it = keywords.begin(); it != keywords.end(); ++it)
1703 {
1704 if (str.find(*it) != str.npos)
1705 return true;
1706 }
1707 return false;
1708}
1709
1710size_t StringUtils::utf8_strlen(const char *s)
1711{
1712 size_t length = 0;
1713 while (*s)
1714 {
1715 if ((*s++ & 0xC0) != 0x80)
1716 length++;
1717 }
1718 return length;
1719}
1720
1721std::string StringUtils::Paramify(const std::string &param)
1722{
1723 std::string result = param;
1724 // escape backspaces
1725 StringUtils::Replace(result, "\\", "\\\\");
1726 // escape double quotes
1727 StringUtils::Replace(result, "\"", "\\\"");
1728
1729 // add double quotes around the whole string
1730 return "\"" + result + "\"";
1731}
1732
1733std::vector<std::string> StringUtils::Tokenize(const std::string &input, const std::string &delimiters)
1734{
1735 std::vector<std::string> tokens;
1736 Tokenize(input, tokens, delimiters);
1737 return tokens;
1738}
1739
1740void StringUtils::Tokenize(const std::string& input, std::vector<std::string>& tokens, const std::string& delimiters)
1741{
1742 tokens.clear();
1743 // Skip delimiters at beginning.
1744 std::string::size_type dataPos = input.find_first_not_of(delimiters);
1745 while (dataPos != std::string::npos)
1746 {
1747 // Find next delimiter
1748 const std::string::size_type nextDelimPos = input.find_first_of(delimiters, dataPos);
1749 // Found a token, add it to the vector.
1750 tokens.push_back(input.substr(dataPos, nextDelimPos - dataPos));
1751 // Skip delimiters. Note the "not_of"
1752 dataPos = input.find_first_not_of(delimiters, nextDelimPos);
1753 }
1754}
1755
1756std::vector<std::string> StringUtils::Tokenize(const std::string &input, const char delimiter)
1757{
1758 std::vector<std::string> tokens;
1759 Tokenize(input, tokens, delimiter);
1760 return tokens;
1761}
1762
1763void StringUtils::Tokenize(const std::string& input, std::vector<std::string>& tokens, const char delimiter)
1764{
1765 tokens.clear();
1766 // Skip delimiters at beginning.
1767 std::string::size_type dataPos = input.find_first_not_of(delimiter);
1768 while (dataPos != std::string::npos)
1769 {
1770 // Find next delimiter
1771 const std::string::size_type nextDelimPos = input.find(delimiter, dataPos);
1772 // Found a token, add it to the vector.
1773 tokens.push_back(input.substr(dataPos, nextDelimPos - dataPos));
1774 // Skip delimiters. Note the "not_of"
1775 dataPos = input.find_first_not_of(delimiter, nextDelimPos);
1776 }
1777}
1778
1779uint64_t StringUtils::ToUint64(std::string str, uint64_t fallback) noexcept
1780{
1781 std::istringstream iss(str);
1782 uint64_t result(fallback);
1783 iss >> result;
1784 return result;
1785}
1786
1787std::string StringUtils::FormatFileSize(uint64_t bytes)
1788{
1789 const std::array<std::string, 6> units{{"B", "kB", "MB", "GB", "TB", "PB"}};
1790 if (bytes < 1000)
1791 return Format("%" PRIu64 "B", bytes);
1792
1793 size_t i = 0;
1794 double value = static_cast<double>(bytes);
1795 while (i + 1 < units.size() && value >= 999.5)
1796 {
1797 ++i;
1798 value /= 1024.0;
1799 }
1800 unsigned int decimals = value < 9.995 ? 2 : (value < 99.95 ? 1 : 0);
1801 auto frmt = "%." + Format("%u", decimals) + "f%s";
1802 return Format(frmt.c_str(), value, units[i].c_str());
1803}
1804
1805const std::locale& StringUtils::GetOriginalLocale() noexcept
1806{
1807 return g_langInfo.GetOriginalLocale();
1808}