summaryrefslogtreecommitdiffstats
path: root/xbmc/utils/StringUtils.h
diff options
context:
space:
mode:
Diffstat (limited to 'xbmc/utils/StringUtils.h')
-rw-r--r--xbmc/utils/StringUtils.h403
1 files changed, 403 insertions, 0 deletions
diff --git a/xbmc/utils/StringUtils.h b/xbmc/utils/StringUtils.h
new file mode 100644
index 0000000..6aab4cd
--- /dev/null
+++ b/xbmc/utils/StringUtils.h
@@ -0,0 +1,403 @@
1/*
2 * Copyright (C) 2005-2018 Team Kodi
3 * This file is part of Kodi - https://kodi.tv
4 *
5 * SPDX-License-Identifier: GPL-2.0-or-later
6 * See LICENSES/README.md for more information.
7 */
8
9#pragma once
10
11//-----------------------------------------------------------------------
12//
13// File: StringUtils.h
14//
15// Purpose: ATL split string utility
16// Author: Paul J. Weiss
17//
18// Modified to support J O'Leary's std::string class by kraqh3d
19//
20//------------------------------------------------------------------------
21
22#include <stdarg.h>
23#include <stdint.h>
24#include <string>
25#include <vector>
26#include <sstream>
27#include <locale>
28
29// workaround for broken [[depreciated]] in coverity
30#if defined(__COVERITY__)
31#undef FMT_DEPRECATED
32#define FMT_DEPRECATED
33#endif
34#include <fmt/format.h>
35
36#if FMT_VERSION >= 40000
37#include <fmt/printf.h>
38#endif
39
40#include "XBDateTime.h"
41#include "utils/params_check_macros.h"
42
43/*! \brief C-processor Token stringification
44
45The following macros can be used to stringify definitions to
46C style strings.
47
48Example:
49
50#define foo 4
51DEF_TO_STR_NAME(foo) // outputs "foo"
52DEF_TO_STR_VALUE(foo) // outputs "4"
53
54*/
55
56#define DEF_TO_STR_NAME(x) #x
57#define DEF_TO_STR_VALUE(x) DEF_TO_STR_NAME(x)
58
59template<typename T, std::enable_if_t<!std::is_enum<T>::value, int> = 0>
60constexpr auto&& EnumToInt(T&& arg) noexcept
61{
62 return arg;
63}
64template<typename T, std::enable_if_t<std::is_enum<T>::value, int> = 0>
65constexpr auto EnumToInt(T&& arg) noexcept
66{
67 return static_cast<int>(arg);
68}
69
70class StringUtils
71{
72public:
73 /*! \brief Get a formatted string similar to sprintf
74
75 Beware that this does not support directly passing in
76 std::string objects. You need to call c_str() to pass
77 the const char* buffer representing the value of the
78 std::string object.
79
80 \param fmt Format of the resulting string
81 \param ... variable number of value type arguments
82 \return Formatted string
83 */
84 template<typename... Args>
85 static std::string Format(const std::string& fmt, Args&&... args)
86 {
87 // coverity[fun_call_w_exception : FALSE]
88 auto result = ::fmt::format(fmt, EnumToInt(std::forward<Args>(args))...);
89 if (result == fmt)
90 result = ::fmt::sprintf(fmt, EnumToInt(std::forward<Args>(args))...);
91
92 return result;
93 }
94 template<typename... Args>
95 static std::wstring Format(const std::wstring& fmt, Args&&... args)
96 {
97 // coverity[fun_call_w_exception : FALSE]
98 auto result = ::fmt::format(fmt, EnumToInt(std::forward<Args>(args))...);
99 if (result == fmt)
100 result = ::fmt::sprintf(fmt, EnumToInt(std::forward<Args>(args))...);
101
102 return result;
103 }
104
105 static std::string FormatV(PRINTF_FORMAT_STRING const char *fmt, va_list args);
106 static std::wstring FormatV(PRINTF_FORMAT_STRING const wchar_t *fmt, va_list args);
107 static void ToUpper(std::string &str);
108 static void ToUpper(std::wstring &str);
109 static void ToLower(std::string &str);
110 static void ToLower(std::wstring &str);
111 static void ToCapitalize(std::string &str);
112 static void ToCapitalize(std::wstring &str);
113 static bool EqualsNoCase(const std::string &str1, const std::string &str2);
114 static bool EqualsNoCase(const std::string &str1, const char *s2);
115 static bool EqualsNoCase(const char *s1, const char *s2);
116 static int CompareNoCase(const std::string& str1, const std::string& str2, size_t n = 0);
117 static int CompareNoCase(const char* s1, const char* s2, size_t n = 0);
118 static int ReturnDigits(const std::string &str);
119 static std::string Left(const std::string &str, size_t count);
120 static std::string Mid(const std::string &str, size_t first, size_t count = std::string::npos);
121 static std::string Right(const std::string &str, size_t count);
122 static std::string& Trim(std::string &str);
123 static std::string& Trim(std::string &str, const char* const chars);
124 static std::string& TrimLeft(std::string &str);
125 static std::string& TrimLeft(std::string &str, const char* const chars);
126 static std::string& TrimRight(std::string &str);
127 static std::string& TrimRight(std::string &str, const char* const chars);
128 static std::string& RemoveDuplicatedSpacesAndTabs(std::string& str);
129 static int Replace(std::string &str, char oldChar, char newChar);
130 static int Replace(std::string &str, const std::string &oldStr, const std::string &newStr);
131 static int Replace(std::wstring &str, const std::wstring &oldStr, const std::wstring &newStr);
132 static bool StartsWith(const std::string &str1, const std::string &str2);
133 static bool StartsWith(const std::string &str1, const char *s2);
134 static bool StartsWith(const char *s1, const char *s2);
135 static bool StartsWithNoCase(const std::string &str1, const std::string &str2);
136 static bool StartsWithNoCase(const std::string &str1, const char *s2);
137 static bool StartsWithNoCase(const char *s1, const char *s2);
138 static bool EndsWith(const std::string &str1, const std::string &str2);
139 static bool EndsWith(const std::string &str1, const char *s2);
140 static bool EndsWithNoCase(const std::string &str1, const std::string &str2);
141 static bool EndsWithNoCase(const std::string &str1, const char *s2);
142
143 template<typename CONTAINER>
144 static std::string Join(const CONTAINER &strings, const std::string& delimiter)
145 {
146 std::string result;
147 for (const auto& str : strings)
148 result += str + delimiter;
149
150 if (!result.empty())
151 result.erase(result.size() - delimiter.size());
152 return result;
153 }
154
155 /*! \brief Splits the given input string using the given delimiter into separate strings.
156
157 If the given input string is empty the result will be an empty array (not
158 an array containing an empty string).
159
160 \param input Input string to be split
161 \param delimiter Delimiter to be used to split the input string
162 \param iMaxStrings (optional) Maximum number of splitted strings
163 */
164 static std::vector<std::string> Split(const std::string& input, const std::string& delimiter, unsigned int iMaxStrings = 0);
165 static std::vector<std::string> Split(const std::string& input, const char delimiter, size_t iMaxStrings = 0);
166 static std::vector<std::string> Split(const std::string& input, const std::vector<std::string> &delimiters);
167 /*! \brief Splits the given input string using the given delimiter into separate strings.
168
169 If the given input string is empty nothing will be put into the target iterator.
170
171 \param d_first the beginning of the destination range
172 \param input Input string to be split
173 \param delimiter Delimiter to be used to split the input string
174 \param iMaxStrings (optional) Maximum number of splitted strings
175 \return output iterator to the element in the destination range, one past the last element
176 * that was put there
177 */
178 template<typename OutputIt>
179 static OutputIt SplitTo(OutputIt d_first, const std::string& input, const std::string& delimiter, unsigned int iMaxStrings = 0)
180 {
181 OutputIt dest = d_first;
182
183 if (input.empty())
184 return dest;
185 if (delimiter.empty())
186 {
187 *d_first++ = input;
188 return dest;
189 }
190
191 const size_t delimLen = delimiter.length();
192 size_t nextDelim;
193 size_t textPos = 0;
194 do
195 {
196 if (--iMaxStrings == 0)
197 {
198 *dest++ = input.substr(textPos);
199 break;
200 }
201 nextDelim = input.find(delimiter, textPos);
202 *dest++ = input.substr(textPos, nextDelim - textPos);
203 textPos = nextDelim + delimLen;
204 } while (nextDelim != std::string::npos);
205
206 return dest;
207 }
208 template<typename OutputIt>
209 static OutputIt SplitTo(OutputIt d_first, const std::string& input, const char delimiter, size_t iMaxStrings = 0)
210 {
211 return SplitTo(d_first, input, std::string(1, delimiter), iMaxStrings);
212 }
213 template<typename OutputIt>
214 static OutputIt SplitTo(OutputIt d_first, const std::string& input, const std::vector<std::string> &delimiters)
215 {
216 OutputIt dest = d_first;
217 if (input.empty())
218 return dest;
219
220 if (delimiters.empty())
221 {
222 *dest++ = input;
223 return dest;
224 }
225 std::string str = input;
226 for (size_t di = 1; di < delimiters.size(); di++)
227 StringUtils::Replace(str, delimiters[di], delimiters[0]);
228 return SplitTo(dest, str, delimiters[0]);
229 }
230
231 /*! \brief Splits the given input strings using the given delimiters into further separate strings.
232
233 If the given input string vector is empty the result will be an empty array (not
234 an array containing an empty string).
235
236 Delimiter strings are applied in order, so once the (optional) maximum number of
237 items is produced no other delimiters are applied. This produces different results
238 to applying all delimiters at once e.g. "a/b#c/d" becomes "a", "b#c", "d" rather
239 than "a", "b", "c/d"
240
241 \param input Input vector of strings each to be split
242 \param delimiters Delimiter strings to be used to split the input strings
243 \param iMaxStrings (optional) Maximum number of resulting split strings
244 */
245 static std::vector<std::string> SplitMulti(const std::vector<std::string> &input, const std::vector<std::string> &delimiters, unsigned int iMaxStrings = 0);
246 static int FindNumber(const std::string& strInput, const std::string &strFind);
247 static int64_t AlphaNumericCompare(const wchar_t *left, const wchar_t *right);
248 static int AlphaNumericCollation(int nKey1, const void* pKey1, int nKey2, const void* pKey2);
249 static long TimeStringToSeconds(const std::string &timeString);
250 static void RemoveCRLF(std::string& strLine);
251
252 /*! \brief utf8 version of strlen - skips any non-starting bytes in the count, thus returning the number of utf8 characters
253 \param s c-string to find the length of.
254 \return the number of utf8 characters in the string.
255 */
256 static size_t utf8_strlen(const char *s);
257
258 /*! \brief convert a time in seconds to a string based on the given time format
259 \param seconds time in seconds
260 \param format the format we want the time in.
261 \return the formatted time
262 \sa TIME_FORMAT
263 */
264 static std::string SecondsToTimeString(long seconds, TIME_FORMAT format = TIME_FORMAT_GUESS);
265
266 /*! \brief check whether a string is a natural number.
267 Matches [ \t]*[0-9]+[ \t]*
268 \param str the string to check
269 \return true if the string is a natural number, false otherwise.
270 */
271 static bool IsNaturalNumber(const std::string& str);
272
273 /*! \brief check whether a string is an integer.
274 Matches [ \t]*[\-]*[0-9]+[ \t]*
275 \param str the string to check
276 \return true if the string is an integer, false otherwise.
277 */
278 static bool IsInteger(const std::string& str);
279
280 /* The next several isasciiXX and asciiXXvalue functions are locale independent (US-ASCII only),
281 * as opposed to standard ::isXX (::isalpha, ::isdigit...) which are locale dependent.
282 * Next functions get parameter as char and don't need double cast ((int)(unsigned char) is required for standard functions). */
283 inline static bool isasciidigit(char chr) // locale independent
284 {
285 return chr >= '0' && chr <= '9';
286 }
287 inline static bool isasciixdigit(char chr) // locale independent
288 {
289 return (chr >= '0' && chr <= '9') || (chr >= 'a' && chr <= 'f') || (chr >= 'A' && chr <= 'F');
290 }
291 static int asciidigitvalue(char chr); // locale independent
292 static int asciixdigitvalue(char chr); // locale independent
293 inline static bool isasciiuppercaseletter(char chr) // locale independent
294 {
295 return (chr >= 'A' && chr <= 'Z');
296 }
297 inline static bool isasciilowercaseletter(char chr) // locale independent
298 {
299 return (chr >= 'a' && chr <= 'z');
300 }
301 inline static bool isasciialphanum(char chr) // locale independent
302 {
303 return isasciiuppercaseletter(chr) || isasciilowercaseletter(chr) || isasciidigit(chr);
304 }
305 static std::string SizeToString(int64_t size);
306 static const std::string Empty;
307 static size_t FindWords(const char *str, const char *wordLowerCase);
308 static int FindEndBracket(const std::string &str, char opener, char closer, int startPos = 0);
309 static int DateStringToYYYYMMDD(const std::string &dateString);
310 static std::string ISODateToLocalizedDate (const std::string& strIsoDate);
311 static void WordToDigits(std::string &word);
312 static std::string CreateUUID();
313 static bool ValidateUUID(const std::string &uuid); // NB only validates syntax
314 static double CompareFuzzy(const std::string &left, const std::string &right);
315 static int FindBestMatch(const std::string &str, const std::vector<std::string> &strings, double &matchscore);
316 static bool ContainsKeyword(const std::string &str, const std::vector<std::string> &keywords);
317
318 /*! \brief Convert the string of binary chars to the actual string.
319
320 Convert the string representation of binary chars to the actual string.
321 For example \1\2\3 is converted to a string with binary char \1, \2 and \3
322
323 \param param String to convert
324 \return Converted string
325 */
326 static std::string BinaryStringToString(const std::string& in);
327 /**
328 * Convert each character in the string to its hexadecimal
329 * representation and return the concatenated result
330 *
331 * example: "abc\n" -> "6162630a"
332 */
333 static std::string ToHexadecimal(const std::string& in);
334 /*! \brief Format the string with locale separators.
335
336 Format the string with locale separators.
337 For example 10000.57 in en-us is '10,000.57' but in italian is '10.000,57'
338
339 \param param String to format
340 \return Formatted string
341 */
342 template<typename T>
343 static std::string FormatNumber(T num)
344 {
345 std::stringstream ss;
346// ifdef is needed because when you set _ITERATOR_DEBUG_LEVEL=0 and you use custom numpunct you will get runtime error in debug mode
347// for more info https://connect.microsoft.com/VisualStudio/feedback/details/2655363
348#if !(defined(_DEBUG) && defined(TARGET_WINDOWS))
349 ss.imbue(GetOriginalLocale());
350#endif
351 ss.precision(1);
352 ss << std::fixed << num;
353 return ss.str();
354 }
355
356 /*! \brief Escapes the given string to be able to be used as a parameter.
357
358 Escapes backslashes and double-quotes with an additional backslash and
359 adds double-quotes around the whole string.
360
361 \param param String to escape/paramify
362 \return Escaped/Paramified string
363 */
364 static std::string Paramify(const std::string &param);
365
366 /*! \brief Split a string by the specified delimiters.
367 Splits a string using one or more delimiting characters, ignoring empty tokens.
368 Differs from Split() in two ways:
369 1. The delimiters are treated as individual characters, rather than a single delimiting string.
370 2. Empty tokens are ignored.
371 \return a vector of tokens
372 */
373 static std::vector<std::string> Tokenize(const std::string& input, const std::string& delimiters);
374 static void Tokenize(const std::string& input, std::vector<std::string>& tokens, const std::string& delimiters);
375 static std::vector<std::string> Tokenize(const std::string& input, const char delimiter);
376 static void Tokenize(const std::string& input, std::vector<std::string>& tokens, const char delimiter);
377 static uint64_t ToUint64(std::string str, uint64_t fallback) noexcept;
378
379 /*!
380 * Returns bytes in a human readable format using the smallest unit that will fit `bytes` in at
381 * most three digits. The number of decimals are adjusted with significance such that 'small'
382 * numbers will have more decimals than larger ones.
383 *
384 * For example: 1024 bytes will be formatted as "1.00kB", 10240 bytes as "10.0kB" and
385 * 102400 bytes as "100kB". See TestStringUtils for more examples.
386 */
387 static std::string FormatFileSize(uint64_t bytes);
388
389private:
390 /*!
391 * Wrapper for CLangInfo::GetOriginalLocale() which allows us to
392 * avoid including LangInfo.h from this header.
393 */
394 static const std::locale& GetOriginalLocale() noexcept;
395};
396
397struct sortstringbyname
398{
399 bool operator()(const std::string& strItem1, const std::string& strItem2)
400 {
401 return StringUtils::CompareNoCase(strItem1, strItem2) < 0;
402 }
403};