summaryrefslogtreecommitdiffstats
path: root/xbmc/utils/test/TestCharsetConverter.cpp
diff options
context:
space:
mode:
authormanuel <manuel@mausz.at>2020-10-19 00:52:24 +0200
committermanuel <manuel@mausz.at>2020-10-19 00:52:24 +0200
commitbe933ef2241d79558f91796cc5b3a161f72ebf9c (patch)
treefe3ab2f130e20c99001f2d7a81d610c78c96a3f4 /xbmc/utils/test/TestCharsetConverter.cpp
parent5f8335c1e49ce108ef3481863833c98efa00411b (diff)
downloadkodi-pvr-build-be933ef2241d79558f91796cc5b3a161f72ebf9c.tar.gz
kodi-pvr-build-be933ef2241d79558f91796cc5b3a161f72ebf9c.tar.bz2
kodi-pvr-build-be933ef2241d79558f91796cc5b3a161f72ebf9c.zip
sync with upstream
Diffstat (limited to 'xbmc/utils/test/TestCharsetConverter.cpp')
-rw-r--r--xbmc/utils/test/TestCharsetConverter.cpp401
1 files changed, 401 insertions, 0 deletions
diff --git a/xbmc/utils/test/TestCharsetConverter.cpp b/xbmc/utils/test/TestCharsetConverter.cpp
new file mode 100644
index 0000000..f8736b7
--- /dev/null
+++ b/xbmc/utils/test/TestCharsetConverter.cpp
@@ -0,0 +1,401 @@
1/*
2 * Copyright (C) 2005-2018 Team Kodi
3 * This file is part of Kodi - https://kodi.tv
4 *
5 * SPDX-License-Identifier: GPL-2.0-or-later
6 * See LICENSES/README.md for more information.
7 */
8
9#include "ServiceBroker.h"
10#include "settings/Settings.h"
11#include "settings/SettingsComponent.h"
12#include "utils/CharsetConverter.h"
13#include "utils/Utf8Utils.h"
14
15#include <gtest/gtest.h>
16
17#if 0
18static const uint16_t refutf16LE1[] = { 0xff54, 0xff45, 0xff53, 0xff54,
19 0xff3f, 0xff55, 0xff54, 0xff46,
20 0xff11, 0xff16, 0xff2c, 0xff25,
21 0xff54, 0xff4f, 0xff57, 0x0 };
22
23static const uint16_t refutf16LE2[] = { 0xff54, 0xff45, 0xff53, 0xff54,
24 0xff3f, 0xff55, 0xff54, 0xff46,
25 0xff18, 0xff34, 0xff4f, 0xff1a,
26 0xff3f, 0xff43, 0xff48, 0xff41,
27 0xff52, 0xff53, 0xff45, 0xff54,
28 0xff3f, 0xff35, 0xff34, 0xff26,
29 0xff0d, 0xff11, 0xff16, 0xff2c,
30 0xff25, 0xff0c, 0xff3f, 0xff23,
31 0xff33, 0xff54, 0xff44, 0xff33,
32 0xff54, 0xff52, 0xff49, 0xff4e,
33 0xff47, 0xff11, 0xff16, 0x0 };
34#endif
35
36static const char refutf16LE3[] = "T\377E\377S\377T\377?\377S\377T\377"
37 "R\377I\377N\377G\377#\377H\377A\377"
38 "R\377S\377E\377T\377\064\377O\377\065"
39 "\377T\377F\377\030\377";
40
41#if 0
42static const uint16_t refutf16LE4[] = { 0xff54, 0xff45, 0xff53, 0xff54,
43 0xff3f, 0xff55, 0xff54, 0xff46,
44 0xff11, 0xff16, 0xff2c, 0xff25,
45 0xff54, 0xff4f, 0xff35, 0xff34,
46 0xff26, 0xff18, 0x0 };
47
48static const uint32_t refutf32LE1[] = { 0xff54, 0xff45, 0xff53, 0xff54,
49 0xff3f, 0xff55, 0xff54, 0xff46,
50 0xff18, 0xff34, 0xff4f, 0xff1a,
51 0xff3f, 0xff43, 0xff48, 0xff41,
52 0xff52, 0xff53, 0xff45, 0xff54,
53 0xff3f, 0xff35, 0xff34, 0xff26,
54 0xff0d, 0xff13, 0xff12, 0xff2c,
55 0xff25, 0xff0c, 0xff3f, 0xff23,
56 0xff33, 0xff54, 0xff44, 0xff33,
57 0xff54, 0xff52, 0xff49, 0xff4e,
58 0xff47, 0xff13, 0xff12, 0xff3f,
59#ifdef TARGET_DARWIN
60 0x0 };
61#else
62 0x1f42d, 0x1f42e, 0x0 };
63#endif
64
65static const uint16_t refutf16BE[] = { 0x54ff, 0x45ff, 0x53ff, 0x54ff,
66 0x3fff, 0x55ff, 0x54ff, 0x46ff,
67 0x11ff, 0x16ff, 0x22ff, 0x25ff,
68 0x54ff, 0x4fff, 0x35ff, 0x34ff,
69 0x26ff, 0x18ff, 0x0};
70
71static const uint16_t refucs2[] = { 0xff54, 0xff45, 0xff53, 0xff54,
72 0xff3f, 0xff55, 0xff43, 0xff53,
73 0xff12, 0xff54, 0xff4f, 0xff35,
74 0xff34, 0xff26, 0xff18, 0x0 };
75#endif
76
77class TestCharsetConverter : public testing::Test
78{
79protected:
80 TestCharsetConverter()
81 {
82 /* Add default settings for locale.
83 * Settings here are taken from CGUISettings::Initialize()
84 */
85 /*
86 //! @todo implement
87 CSettingsCategory *loc = CServiceBroker::GetSettingsComponent()->GetSettings()->AddCategory(7, "locale", 14090);
88 CServiceBroker::GetSettingsComponent()->GetSettings()->AddString(loc, CSettings::SETTING_LOCALE_LANGUAGE,248,"english",
89 SPIN_CONTROL_TEXT);
90 CServiceBroker::GetSettingsComponent()->GetSettings()->AddString(loc, CSettings::SETTING_LOCALE_COUNTRY, 20026, "USA",
91 SPIN_CONTROL_TEXT);
92 CServiceBroker::GetSettingsComponent()->GetSettings()->AddString(loc, CSettings::SETTING_LOCALE_CHARSET, 14091, "DEFAULT",
93 SPIN_CONTROL_TEXT); // charset is set by the
94 // language file
95
96 // Add default settings for subtitles
97 CSettingsCategory *sub = CServiceBroker::GetSettingsComponent()->GetSettings()->AddCategory(5, "subtitles", 287);
98 CServiceBroker::GetSettingsComponent()->GetSettings()->AddString(sub, CSettings::SETTING_SUBTITLES_CHARSET, 735, "DEFAULT",
99 SPIN_CONTROL_TEXT);
100 */
101 g_charsetConverter.reset();
102 g_charsetConverter.clear();
103 }
104
105 ~TestCharsetConverter() override
106 {
107 CServiceBroker::GetSettingsComponent()->GetSettings()->Unload();
108 }
109
110 std::string refstra1, refstra2, varstra1;
111 std::wstring refstrw1, varstrw1;
112 std::string refstr1;
113};
114
115TEST_F(TestCharsetConverter, utf8ToW)
116{
117 refstra1 = "test utf8ToW";
118 refstrw1 = L"test utf8ToW";
119 varstrw1.clear();
120 g_charsetConverter.utf8ToW(refstra1, varstrw1, true, false, false);
121 EXPECT_STREQ(refstrw1.c_str(), varstrw1.c_str());
122}
123
124
125//TEST_F(TestCharsetConverter, utf16LEtoW)
126//{
127// refstrw1 = L"test_utf16LEtow";
128// //! @todo Should be able to use '=' operator instead of assign()
129// std::wstring refstr16_1;
130// refstr16_1.assign(refutf16LE1);
131// varstrw1.clear();
132// g_charsetConverter.utf16LEtoW(refstr16_1, varstrw1);
133// EXPECT_STREQ(refstrw1.c_str(), varstrw1.c_str());
134//}
135
136TEST_F(TestCharsetConverter, subtitleCharsetToUtf8)
137{
138 refstra1 = "test subtitleCharsetToW";
139 varstra1.clear();
140 g_charsetConverter.subtitleCharsetToUtf8(refstra1, varstra1);
141
142 /* Assign refstra1 to refstrw1 so that we can compare */
143 EXPECT_STREQ(refstra1.c_str(), varstra1.c_str());
144}
145
146TEST_F(TestCharsetConverter, utf8ToStringCharset_1)
147{
148 refstra1 = "test utf8ToStringCharset";
149 varstra1.clear();
150 g_charsetConverter.utf8ToStringCharset(refstra1, varstra1);
151 EXPECT_STREQ(refstra1.c_str(), varstra1.c_str());
152}
153
154TEST_F(TestCharsetConverter, utf8ToStringCharset_2)
155{
156 refstra1 = "test utf8ToStringCharset";
157 varstra1 = "test utf8ToStringCharset";
158 g_charsetConverter.utf8ToStringCharset(varstra1);
159 EXPECT_STREQ(refstra1.c_str(), varstra1.c_str());
160}
161
162TEST_F(TestCharsetConverter, utf8ToSystem)
163{
164 refstra1 = "test utf8ToSystem";
165 varstra1 = "test utf8ToSystem";
166 g_charsetConverter.utf8ToSystem(varstra1);
167 EXPECT_STREQ(refstra1.c_str(), varstra1.c_str());
168}
169
170TEST_F(TestCharsetConverter, utf8To_ASCII)
171{
172 refstra1 = "test utf8To: charset ASCII, std::string";
173 varstra1.clear();
174 g_charsetConverter.utf8To("ASCII", refstra1, varstra1);
175 EXPECT_STREQ(refstra1.c_str(), varstra1.c_str());
176}
177
178/*
179TEST_F(TestCharsetConverter, utf8To_UTF16LE)
180{
181 refstra1 = "test_utf8To:_charset_UTF-16LE,_"
182 "CStdString16";
183 refstr16_1.assign(refutf16LE2);
184 varstr16_1.clear();
185 g_charsetConverter.utf8To("UTF-16LE", refstra1, varstr16_1);
186 EXPECT_TRUE(!memcmp(refstr16_1.c_str(), varstr16_1.c_str(),
187 refstr16_1.length() * sizeof(uint16_t)));
188}
189*/
190
191//TEST_F(TestCharsetConverter, utf8To_UTF32LE)
192//{
193// refstra1 = "test_utf8To:_charset_UTF-32LE,_"
194//#ifdef TARGET_DARWIN
195///* OSX has its own 'special' utf-8 charset which we use (see UTF8_SOURCE in CharsetConverter.cpp)
196// which is basically NFD (decomposed) utf-8. The trouble is, it fails on the COW FACE and MOUSE FACE
197// characters for some reason (possibly anything over 0x100000, or maybe there's a decomposed form of these
198// that I couldn't find???) If UTF8_SOURCE is switched to UTF-8 then this test would pass as-is, but then
199// some filenames stored in utf8-mac wouldn't display correctly in the UI. */
200// "CStdString32_";
201//#else
202// "CStdString32_🐭🐮";
203//#endif
204// refstr32_1.assign(refutf32LE1);
205// varstr32_1.clear();
206// g_charsetConverter.utf8To("UTF-32LE", refstra1, varstr32_1);
207// EXPECT_TRUE(!memcmp(refstr32_1.c_str(), varstr32_1.c_str(),
208// sizeof(refutf32LE1)));
209//}
210
211TEST_F(TestCharsetConverter, stringCharsetToUtf8)
212{
213 refstra1 = "test_stringCharsetToUtf8";
214 varstra1.clear();
215 g_charsetConverter.ToUtf8("UTF-16LE", refutf16LE3, varstra1);
216 EXPECT_STREQ(refstra1.c_str(), varstra1.c_str());
217}
218
219TEST_F(TestCharsetConverter, isValidUtf8_1)
220{
221 varstra1.clear();
222 g_charsetConverter.ToUtf8("UTF-16LE", refutf16LE3, varstra1);
223 EXPECT_TRUE(CUtf8Utils::isValidUtf8(varstra1.c_str()));
224}
225
226TEST_F(TestCharsetConverter, isValidUtf8_2)
227{
228 refstr1 = refutf16LE3;
229 EXPECT_FALSE(CUtf8Utils::isValidUtf8(refstr1));
230}
231
232TEST_F(TestCharsetConverter, isValidUtf8_3)
233{
234 varstra1.clear();
235 g_charsetConverter.ToUtf8("UTF-16LE", refutf16LE3, varstra1);
236 EXPECT_TRUE(CUtf8Utils::isValidUtf8(varstra1.c_str()));
237}
238
239TEST_F(TestCharsetConverter, isValidUtf8_4)
240{
241 EXPECT_FALSE(CUtf8Utils::isValidUtf8(refutf16LE3));
242}
243
244//! @todo Resolve correct input/output for this function
245// TEST_F(TestCharsetConverter, ucs2CharsetToStringCharset)
246// {
247// void ucs2CharsetToStringCharset(const std::wstring& strSource,
248// std::string& strDest, bool swap = false);
249// }
250
251TEST_F(TestCharsetConverter, wToUTF8)
252{
253 refstrw1 = L"test_wToUTF8";
254 refstra1 = u8"test_wToUTF8";
255 varstra1.clear();
256 g_charsetConverter.wToUTF8(refstrw1, varstra1);
257 EXPECT_STREQ(refstra1.c_str(), varstra1.c_str());
258}
259
260//TEST_F(TestCharsetConverter, utf16BEtoUTF8)
261//{
262// refstr16_1.assign(refutf16BE);
263// refstra1 = "test_utf16BEtoUTF8";
264// varstra1.clear();
265// g_charsetConverter.utf16BEtoUTF8(refstr16_1, varstra1);
266// EXPECT_STREQ(refstra1.c_str(), varstra1.c_str());
267//}
268
269//TEST_F(TestCharsetConverter, utf16LEtoUTF8)
270//{
271// refstr16_1.assign(refutf16LE4);
272// refstra1 = "test_utf16LEtoUTF8";
273// varstra1.clear();
274// g_charsetConverter.utf16LEtoUTF8(refstr16_1, varstra1);
275// EXPECT_STREQ(refstra1.c_str(), varstra1.c_str());
276//}
277
278//TEST_F(TestCharsetConverter, ucs2ToUTF8)
279//{
280// refstr16_1.assign(refucs2);
281// refstra1 = "test_ucs2toUTF8";
282// varstra1.clear();
283// g_charsetConverter.ucs2ToUTF8(refstr16_1, varstra1);
284// EXPECT_STREQ(refstra1.c_str(), varstra1.c_str());
285//}
286
287TEST_F(TestCharsetConverter, utf8logicalToVisualBiDi)
288{
289 refstra1 = "test_utf8logicalToVisualBiDi";
290 refstra2 = "test_utf8logicalToVisualBiDi";
291 varstra1.clear();
292 g_charsetConverter.utf8logicalToVisualBiDi(refstra1, varstra1);
293 EXPECT_STREQ(refstra2.c_str(), varstra1.c_str());
294}
295
296//! @todo Resolve correct input/output for this function
297// TEST_F(TestCharsetConverter, utf32ToStringCharset)
298// {
299// void utf32ToStringCharset(const unsigned long* strSource, std::string& strDest);
300// }
301
302TEST_F(TestCharsetConverter, getCharsetLabels)
303{
304 std::vector<std::string> reflabels;
305 reflabels.emplace_back("Western Europe (ISO)");
306 reflabels.emplace_back("Central Europe (ISO)");
307 reflabels.emplace_back("South Europe (ISO)");
308 reflabels.emplace_back("Baltic (ISO)");
309 reflabels.emplace_back("Cyrillic (ISO)");
310 reflabels.emplace_back("Arabic (ISO)");
311 reflabels.emplace_back("Greek (ISO)");
312 reflabels.emplace_back("Hebrew (ISO)");
313 reflabels.emplace_back("Turkish (ISO)");
314 reflabels.emplace_back("Central Europe (Windows)");
315 reflabels.emplace_back("Cyrillic (Windows)");
316 reflabels.emplace_back("Western Europe (Windows)");
317 reflabels.emplace_back("Greek (Windows)");
318 reflabels.emplace_back("Turkish (Windows)");
319 reflabels.emplace_back("Hebrew (Windows)");
320 reflabels.emplace_back("Arabic (Windows)");
321 reflabels.emplace_back("Baltic (Windows)");
322 reflabels.emplace_back("Vietnamese (Windows)");
323 reflabels.emplace_back("Thai (Windows)");
324 reflabels.emplace_back("Chinese Traditional (Big5)");
325 reflabels.emplace_back("Chinese Simplified (GBK)");
326 reflabels.emplace_back("Japanese (Shift-JIS)");
327 reflabels.emplace_back("Korean");
328 reflabels.emplace_back("Hong Kong (Big5-HKSCS)");
329
330 std::vector<std::string> varlabels = g_charsetConverter.getCharsetLabels();
331 ASSERT_EQ(reflabels.size(), varlabels.size());
332
333 size_t pos = 0;
334 for (const auto& it : varlabels)
335 {
336 EXPECT_STREQ((reflabels.at(pos++)).c_str(), it.c_str());
337 }
338}
339
340TEST_F(TestCharsetConverter, getCharsetLabelByName)
341{
342 std::string varstr =
343 g_charsetConverter.getCharsetLabelByName("ISO-8859-1");
344 EXPECT_STREQ("Western Europe (ISO)", varstr.c_str());
345 varstr.clear();
346 varstr = g_charsetConverter.getCharsetLabelByName("Bogus");
347 EXPECT_STREQ("", varstr.c_str());
348}
349
350TEST_F(TestCharsetConverter, getCharsetNameByLabel)
351{
352 std::string varstr =
353 g_charsetConverter.getCharsetNameByLabel("Western Europe (ISO)");
354 EXPECT_STREQ("ISO-8859-1", varstr.c_str());
355 varstr.clear();
356 varstr = g_charsetConverter.getCharsetNameByLabel("Bogus");
357 EXPECT_STREQ("", varstr.c_str());
358}
359
360TEST_F(TestCharsetConverter, unknownToUTF8_1)
361{
362 refstra1 = "test_unknownToUTF8";
363 varstra1 = "test_unknownToUTF8";
364 g_charsetConverter.unknownToUTF8(varstra1);
365 EXPECT_STREQ(refstra1.c_str(), varstra1.c_str());
366}
367
368TEST_F(TestCharsetConverter, unknownToUTF8_2)
369{
370 refstra1 = "test_unknownToUTF8";
371 varstra1.clear();
372 g_charsetConverter.unknownToUTF8(refstra1, varstra1);
373 EXPECT_STREQ(refstra1.c_str(), varstra1.c_str());
374}
375
376TEST_F(TestCharsetConverter, toW)
377{
378 refstra1 = "test_toW:_charset_UTF-16LE";
379 refstrw1 = L"\xBDEF\xEF94\x85BD\xBDEF\xEF93\x94BD\xBCEF\xEFBF"
380 L"\x94BD\xBDEF\xEF8F\xB7BC\xBCEF\xEF9A\xBFBC\xBDEF"
381 L"\xEF83\x88BD\xBDEF\xEF81\x92BD\xBDEF\xEF93\x85BD"
382 L"\xBDEF\xEF94\xBFBC\xBCEF\xEFB5\xB4BC\xBCEF\xEFA6"
383 L"\x8DBC\xBCEF\xEF91\x96BC\xBCEF\xEFAC\xA5BC";
384 varstrw1.clear();
385 g_charsetConverter.toW(refstra1, varstrw1, "UTF-16LE");
386 EXPECT_STREQ(refstrw1.c_str(), varstrw1.c_str());
387}
388
389TEST_F(TestCharsetConverter, fromW)
390{
391 refstrw1 = L"\xBDEF\xEF94\x85BD\xBDEF\xEF93\x94BD\xBCEF\xEFBF"
392 L"\x86BD\xBDEF\xEF92\x8FBD\xBDEF\xEF8D\xB7BC\xBCEF"
393 L"\xEF9A\xBFBC\xBDEF\xEF83\x88BD\xBDEF\xEF81\x92BD"
394 L"\xBDEF\xEF93\x85BD\xBDEF\xEF94\xBFBC\xBCEF\xEFB5"
395 L"\xB4BC\xBCEF\xEFA6\x8DBC\xBCEF\xEF91\x96BC\xBCEF"
396 L"\xEFAC\xA5BC";
397 refstra1 = "test_fromW:_charset_UTF-16LE";
398 varstra1.clear();
399 g_charsetConverter.fromW(refstrw1, varstra1, "UTF-16LE");
400 EXPECT_STREQ(refstra1.c_str(), varstra1.c_str());
401}