diff options
Diffstat (limited to 'xbmc/utils/StringUtils.h')
| -rw-r--r-- | xbmc/utils/StringUtils.h | 403 |
1 files changed, 403 insertions, 0 deletions
diff --git a/xbmc/utils/StringUtils.h b/xbmc/utils/StringUtils.h new file mode 100644 index 0000000..6aab4cd --- /dev/null +++ b/xbmc/utils/StringUtils.h | |||
| @@ -0,0 +1,403 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2005-2018 Team Kodi | ||
| 3 | * This file is part of Kodi - https://kodi.tv | ||
| 4 | * | ||
| 5 | * SPDX-License-Identifier: GPL-2.0-or-later | ||
| 6 | * See LICENSES/README.md for more information. | ||
| 7 | */ | ||
| 8 | |||
| 9 | #pragma once | ||
| 10 | |||
| 11 | //----------------------------------------------------------------------- | ||
| 12 | // | ||
| 13 | // File: StringUtils.h | ||
| 14 | // | ||
| 15 | // Purpose: ATL split string utility | ||
| 16 | // Author: Paul J. Weiss | ||
| 17 | // | ||
| 18 | // Modified to support J O'Leary's std::string class by kraqh3d | ||
| 19 | // | ||
| 20 | //------------------------------------------------------------------------ | ||
| 21 | |||
| 22 | #include <stdarg.h> | ||
| 23 | #include <stdint.h> | ||
| 24 | #include <string> | ||
| 25 | #include <vector> | ||
| 26 | #include <sstream> | ||
| 27 | #include <locale> | ||
| 28 | |||
| 29 | // workaround for broken [[depreciated]] in coverity | ||
| 30 | #if defined(__COVERITY__) | ||
| 31 | #undef FMT_DEPRECATED | ||
| 32 | #define FMT_DEPRECATED | ||
| 33 | #endif | ||
| 34 | #include <fmt/format.h> | ||
| 35 | |||
| 36 | #if FMT_VERSION >= 40000 | ||
| 37 | #include <fmt/printf.h> | ||
| 38 | #endif | ||
| 39 | |||
| 40 | #include "XBDateTime.h" | ||
| 41 | #include "utils/params_check_macros.h" | ||
| 42 | |||
| 43 | /*! \brief C-processor Token stringification | ||
| 44 | |||
| 45 | The following macros can be used to stringify definitions to | ||
| 46 | C style strings. | ||
| 47 | |||
| 48 | Example: | ||
| 49 | |||
| 50 | #define foo 4 | ||
| 51 | DEF_TO_STR_NAME(foo) // outputs "foo" | ||
| 52 | DEF_TO_STR_VALUE(foo) // outputs "4" | ||
| 53 | |||
| 54 | */ | ||
| 55 | |||
| 56 | #define DEF_TO_STR_NAME(x) #x | ||
| 57 | #define DEF_TO_STR_VALUE(x) DEF_TO_STR_NAME(x) | ||
| 58 | |||
| 59 | template<typename T, std::enable_if_t<!std::is_enum<T>::value, int> = 0> | ||
| 60 | constexpr auto&& EnumToInt(T&& arg) noexcept | ||
| 61 | { | ||
| 62 | return arg; | ||
| 63 | } | ||
| 64 | template<typename T, std::enable_if_t<std::is_enum<T>::value, int> = 0> | ||
| 65 | constexpr auto EnumToInt(T&& arg) noexcept | ||
| 66 | { | ||
| 67 | return static_cast<int>(arg); | ||
| 68 | } | ||
| 69 | |||
| 70 | class StringUtils | ||
| 71 | { | ||
| 72 | public: | ||
| 73 | /*! \brief Get a formatted string similar to sprintf | ||
| 74 | |||
| 75 | Beware that this does not support directly passing in | ||
| 76 | std::string objects. You need to call c_str() to pass | ||
| 77 | the const char* buffer representing the value of the | ||
| 78 | std::string object. | ||
| 79 | |||
| 80 | \param fmt Format of the resulting string | ||
| 81 | \param ... variable number of value type arguments | ||
| 82 | \return Formatted string | ||
| 83 | */ | ||
| 84 | template<typename... Args> | ||
| 85 | static std::string Format(const std::string& fmt, Args&&... args) | ||
| 86 | { | ||
| 87 | // coverity[fun_call_w_exception : FALSE] | ||
| 88 | auto result = ::fmt::format(fmt, EnumToInt(std::forward<Args>(args))...); | ||
| 89 | if (result == fmt) | ||
| 90 | result = ::fmt::sprintf(fmt, EnumToInt(std::forward<Args>(args))...); | ||
| 91 | |||
| 92 | return result; | ||
| 93 | } | ||
| 94 | template<typename... Args> | ||
| 95 | static std::wstring Format(const std::wstring& fmt, Args&&... args) | ||
| 96 | { | ||
| 97 | // coverity[fun_call_w_exception : FALSE] | ||
| 98 | auto result = ::fmt::format(fmt, EnumToInt(std::forward<Args>(args))...); | ||
| 99 | if (result == fmt) | ||
| 100 | result = ::fmt::sprintf(fmt, EnumToInt(std::forward<Args>(args))...); | ||
| 101 | |||
| 102 | return result; | ||
| 103 | } | ||
| 104 | |||
| 105 | static std::string FormatV(PRINTF_FORMAT_STRING const char *fmt, va_list args); | ||
| 106 | static std::wstring FormatV(PRINTF_FORMAT_STRING const wchar_t *fmt, va_list args); | ||
| 107 | static void ToUpper(std::string &str); | ||
| 108 | static void ToUpper(std::wstring &str); | ||
| 109 | static void ToLower(std::string &str); | ||
| 110 | static void ToLower(std::wstring &str); | ||
| 111 | static void ToCapitalize(std::string &str); | ||
| 112 | static void ToCapitalize(std::wstring &str); | ||
| 113 | static bool EqualsNoCase(const std::string &str1, const std::string &str2); | ||
| 114 | static bool EqualsNoCase(const std::string &str1, const char *s2); | ||
| 115 | static bool EqualsNoCase(const char *s1, const char *s2); | ||
| 116 | static int CompareNoCase(const std::string& str1, const std::string& str2, size_t n = 0); | ||
| 117 | static int CompareNoCase(const char* s1, const char* s2, size_t n = 0); | ||
| 118 | static int ReturnDigits(const std::string &str); | ||
| 119 | static std::string Left(const std::string &str, size_t count); | ||
| 120 | static std::string Mid(const std::string &str, size_t first, size_t count = std::string::npos); | ||
| 121 | static std::string Right(const std::string &str, size_t count); | ||
| 122 | static std::string& Trim(std::string &str); | ||
| 123 | static std::string& Trim(std::string &str, const char* const chars); | ||
| 124 | static std::string& TrimLeft(std::string &str); | ||
| 125 | static std::string& TrimLeft(std::string &str, const char* const chars); | ||
| 126 | static std::string& TrimRight(std::string &str); | ||
| 127 | static std::string& TrimRight(std::string &str, const char* const chars); | ||
| 128 | static std::string& RemoveDuplicatedSpacesAndTabs(std::string& str); | ||
| 129 | static int Replace(std::string &str, char oldChar, char newChar); | ||
| 130 | static int Replace(std::string &str, const std::string &oldStr, const std::string &newStr); | ||
| 131 | static int Replace(std::wstring &str, const std::wstring &oldStr, const std::wstring &newStr); | ||
| 132 | static bool StartsWith(const std::string &str1, const std::string &str2); | ||
| 133 | static bool StartsWith(const std::string &str1, const char *s2); | ||
| 134 | static bool StartsWith(const char *s1, const char *s2); | ||
| 135 | static bool StartsWithNoCase(const std::string &str1, const std::string &str2); | ||
| 136 | static bool StartsWithNoCase(const std::string &str1, const char *s2); | ||
| 137 | static bool StartsWithNoCase(const char *s1, const char *s2); | ||
| 138 | static bool EndsWith(const std::string &str1, const std::string &str2); | ||
| 139 | static bool EndsWith(const std::string &str1, const char *s2); | ||
| 140 | static bool EndsWithNoCase(const std::string &str1, const std::string &str2); | ||
| 141 | static bool EndsWithNoCase(const std::string &str1, const char *s2); | ||
| 142 | |||
| 143 | template<typename CONTAINER> | ||
| 144 | static std::string Join(const CONTAINER &strings, const std::string& delimiter) | ||
| 145 | { | ||
| 146 | std::string result; | ||
| 147 | for (const auto& str : strings) | ||
| 148 | result += str + delimiter; | ||
| 149 | |||
| 150 | if (!result.empty()) | ||
| 151 | result.erase(result.size() - delimiter.size()); | ||
| 152 | return result; | ||
| 153 | } | ||
| 154 | |||
| 155 | /*! \brief Splits the given input string using the given delimiter into separate strings. | ||
| 156 | |||
| 157 | If the given input string is empty the result will be an empty array (not | ||
| 158 | an array containing an empty string). | ||
| 159 | |||
| 160 | \param input Input string to be split | ||
| 161 | \param delimiter Delimiter to be used to split the input string | ||
| 162 | \param iMaxStrings (optional) Maximum number of splitted strings | ||
| 163 | */ | ||
| 164 | static std::vector<std::string> Split(const std::string& input, const std::string& delimiter, unsigned int iMaxStrings = 0); | ||
| 165 | static std::vector<std::string> Split(const std::string& input, const char delimiter, size_t iMaxStrings = 0); | ||
| 166 | static std::vector<std::string> Split(const std::string& input, const std::vector<std::string> &delimiters); | ||
| 167 | /*! \brief Splits the given input string using the given delimiter into separate strings. | ||
| 168 | |||
| 169 | If the given input string is empty nothing will be put into the target iterator. | ||
| 170 | |||
| 171 | \param d_first the beginning of the destination range | ||
| 172 | \param input Input string to be split | ||
| 173 | \param delimiter Delimiter to be used to split the input string | ||
| 174 | \param iMaxStrings (optional) Maximum number of splitted strings | ||
| 175 | \return output iterator to the element in the destination range, one past the last element | ||
| 176 | * that was put there | ||
| 177 | */ | ||
| 178 | template<typename OutputIt> | ||
| 179 | static OutputIt SplitTo(OutputIt d_first, const std::string& input, const std::string& delimiter, unsigned int iMaxStrings = 0) | ||
| 180 | { | ||
| 181 | OutputIt dest = d_first; | ||
| 182 | |||
| 183 | if (input.empty()) | ||
| 184 | return dest; | ||
| 185 | if (delimiter.empty()) | ||
| 186 | { | ||
| 187 | *d_first++ = input; | ||
| 188 | return dest; | ||
| 189 | } | ||
| 190 | |||
| 191 | const size_t delimLen = delimiter.length(); | ||
| 192 | size_t nextDelim; | ||
| 193 | size_t textPos = 0; | ||
| 194 | do | ||
| 195 | { | ||
| 196 | if (--iMaxStrings == 0) | ||
| 197 | { | ||
| 198 | *dest++ = input.substr(textPos); | ||
| 199 | break; | ||
| 200 | } | ||
| 201 | nextDelim = input.find(delimiter, textPos); | ||
| 202 | *dest++ = input.substr(textPos, nextDelim - textPos); | ||
| 203 | textPos = nextDelim + delimLen; | ||
| 204 | } while (nextDelim != std::string::npos); | ||
| 205 | |||
| 206 | return dest; | ||
| 207 | } | ||
| 208 | template<typename OutputIt> | ||
| 209 | static OutputIt SplitTo(OutputIt d_first, const std::string& input, const char delimiter, size_t iMaxStrings = 0) | ||
| 210 | { | ||
| 211 | return SplitTo(d_first, input, std::string(1, delimiter), iMaxStrings); | ||
| 212 | } | ||
| 213 | template<typename OutputIt> | ||
| 214 | static OutputIt SplitTo(OutputIt d_first, const std::string& input, const std::vector<std::string> &delimiters) | ||
| 215 | { | ||
| 216 | OutputIt dest = d_first; | ||
| 217 | if (input.empty()) | ||
| 218 | return dest; | ||
| 219 | |||
| 220 | if (delimiters.empty()) | ||
| 221 | { | ||
| 222 | *dest++ = input; | ||
| 223 | return dest; | ||
| 224 | } | ||
| 225 | std::string str = input; | ||
| 226 | for (size_t di = 1; di < delimiters.size(); di++) | ||
| 227 | StringUtils::Replace(str, delimiters[di], delimiters[0]); | ||
| 228 | return SplitTo(dest, str, delimiters[0]); | ||
| 229 | } | ||
| 230 | |||
| 231 | /*! \brief Splits the given input strings using the given delimiters into further separate strings. | ||
| 232 | |||
| 233 | If the given input string vector is empty the result will be an empty array (not | ||
| 234 | an array containing an empty string). | ||
| 235 | |||
| 236 | Delimiter strings are applied in order, so once the (optional) maximum number of | ||
| 237 | items is produced no other delimiters are applied. This produces different results | ||
| 238 | to applying all delimiters at once e.g. "a/b#c/d" becomes "a", "b#c", "d" rather | ||
| 239 | than "a", "b", "c/d" | ||
| 240 | |||
| 241 | \param input Input vector of strings each to be split | ||
| 242 | \param delimiters Delimiter strings to be used to split the input strings | ||
| 243 | \param iMaxStrings (optional) Maximum number of resulting split strings | ||
| 244 | */ | ||
| 245 | static std::vector<std::string> SplitMulti(const std::vector<std::string> &input, const std::vector<std::string> &delimiters, unsigned int iMaxStrings = 0); | ||
| 246 | static int FindNumber(const std::string& strInput, const std::string &strFind); | ||
| 247 | static int64_t AlphaNumericCompare(const wchar_t *left, const wchar_t *right); | ||
| 248 | static int AlphaNumericCollation(int nKey1, const void* pKey1, int nKey2, const void* pKey2); | ||
| 249 | static long TimeStringToSeconds(const std::string &timeString); | ||
| 250 | static void RemoveCRLF(std::string& strLine); | ||
| 251 | |||
| 252 | /*! \brief utf8 version of strlen - skips any non-starting bytes in the count, thus returning the number of utf8 characters | ||
| 253 | \param s c-string to find the length of. | ||
| 254 | \return the number of utf8 characters in the string. | ||
| 255 | */ | ||
| 256 | static size_t utf8_strlen(const char *s); | ||
| 257 | |||
| 258 | /*! \brief convert a time in seconds to a string based on the given time format | ||
| 259 | \param seconds time in seconds | ||
| 260 | \param format the format we want the time in. | ||
| 261 | \return the formatted time | ||
| 262 | \sa TIME_FORMAT | ||
| 263 | */ | ||
| 264 | static std::string SecondsToTimeString(long seconds, TIME_FORMAT format = TIME_FORMAT_GUESS); | ||
| 265 | |||
| 266 | /*! \brief check whether a string is a natural number. | ||
| 267 | Matches [ \t]*[0-9]+[ \t]* | ||
| 268 | \param str the string to check | ||
| 269 | \return true if the string is a natural number, false otherwise. | ||
| 270 | */ | ||
| 271 | static bool IsNaturalNumber(const std::string& str); | ||
| 272 | |||
| 273 | /*! \brief check whether a string is an integer. | ||
| 274 | Matches [ \t]*[\-]*[0-9]+[ \t]* | ||
| 275 | \param str the string to check | ||
| 276 | \return true if the string is an integer, false otherwise. | ||
| 277 | */ | ||
| 278 | static bool IsInteger(const std::string& str); | ||
| 279 | |||
| 280 | /* The next several isasciiXX and asciiXXvalue functions are locale independent (US-ASCII only), | ||
| 281 | * as opposed to standard ::isXX (::isalpha, ::isdigit...) which are locale dependent. | ||
| 282 | * Next functions get parameter as char and don't need double cast ((int)(unsigned char) is required for standard functions). */ | ||
| 283 | inline static bool isasciidigit(char chr) // locale independent | ||
| 284 | { | ||
| 285 | return chr >= '0' && chr <= '9'; | ||
| 286 | } | ||
| 287 | inline static bool isasciixdigit(char chr) // locale independent | ||
| 288 | { | ||
| 289 | return (chr >= '0' && chr <= '9') || (chr >= 'a' && chr <= 'f') || (chr >= 'A' && chr <= 'F'); | ||
| 290 | } | ||
| 291 | static int asciidigitvalue(char chr); // locale independent | ||
| 292 | static int asciixdigitvalue(char chr); // locale independent | ||
| 293 | inline static bool isasciiuppercaseletter(char chr) // locale independent | ||
| 294 | { | ||
| 295 | return (chr >= 'A' && chr <= 'Z'); | ||
| 296 | } | ||
| 297 | inline static bool isasciilowercaseletter(char chr) // locale independent | ||
| 298 | { | ||
| 299 | return (chr >= 'a' && chr <= 'z'); | ||
| 300 | } | ||
| 301 | inline static bool isasciialphanum(char chr) // locale independent | ||
| 302 | { | ||
| 303 | return isasciiuppercaseletter(chr) || isasciilowercaseletter(chr) || isasciidigit(chr); | ||
| 304 | } | ||
| 305 | static std::string SizeToString(int64_t size); | ||
| 306 | static const std::string Empty; | ||
| 307 | static size_t FindWords(const char *str, const char *wordLowerCase); | ||
| 308 | static int FindEndBracket(const std::string &str, char opener, char closer, int startPos = 0); | ||
| 309 | static int DateStringToYYYYMMDD(const std::string &dateString); | ||
| 310 | static std::string ISODateToLocalizedDate (const std::string& strIsoDate); | ||
| 311 | static void WordToDigits(std::string &word); | ||
| 312 | static std::string CreateUUID(); | ||
| 313 | static bool ValidateUUID(const std::string &uuid); // NB only validates syntax | ||
| 314 | static double CompareFuzzy(const std::string &left, const std::string &right); | ||
| 315 | static int FindBestMatch(const std::string &str, const std::vector<std::string> &strings, double &matchscore); | ||
| 316 | static bool ContainsKeyword(const std::string &str, const std::vector<std::string> &keywords); | ||
| 317 | |||
| 318 | /*! \brief Convert the string of binary chars to the actual string. | ||
| 319 | |||
| 320 | Convert the string representation of binary chars to the actual string. | ||
| 321 | For example \1\2\3 is converted to a string with binary char \1, \2 and \3 | ||
| 322 | |||
| 323 | \param param String to convert | ||
| 324 | \return Converted string | ||
| 325 | */ | ||
| 326 | static std::string BinaryStringToString(const std::string& in); | ||
| 327 | /** | ||
| 328 | * Convert each character in the string to its hexadecimal | ||
| 329 | * representation and return the concatenated result | ||
| 330 | * | ||
| 331 | * example: "abc\n" -> "6162630a" | ||
| 332 | */ | ||
| 333 | static std::string ToHexadecimal(const std::string& in); | ||
| 334 | /*! \brief Format the string with locale separators. | ||
| 335 | |||
| 336 | Format the string with locale separators. | ||
| 337 | For example 10000.57 in en-us is '10,000.57' but in italian is '10.000,57' | ||
| 338 | |||
| 339 | \param param String to format | ||
| 340 | \return Formatted string | ||
| 341 | */ | ||
| 342 | template<typename T> | ||
| 343 | static std::string FormatNumber(T num) | ||
| 344 | { | ||
| 345 | std::stringstream ss; | ||
| 346 | // ifdef is needed because when you set _ITERATOR_DEBUG_LEVEL=0 and you use custom numpunct you will get runtime error in debug mode | ||
| 347 | // for more info https://connect.microsoft.com/VisualStudio/feedback/details/2655363 | ||
| 348 | #if !(defined(_DEBUG) && defined(TARGET_WINDOWS)) | ||
| 349 | ss.imbue(GetOriginalLocale()); | ||
| 350 | #endif | ||
| 351 | ss.precision(1); | ||
| 352 | ss << std::fixed << num; | ||
| 353 | return ss.str(); | ||
| 354 | } | ||
| 355 | |||
| 356 | /*! \brief Escapes the given string to be able to be used as a parameter. | ||
| 357 | |||
| 358 | Escapes backslashes and double-quotes with an additional backslash and | ||
| 359 | adds double-quotes around the whole string. | ||
| 360 | |||
| 361 | \param param String to escape/paramify | ||
| 362 | \return Escaped/Paramified string | ||
| 363 | */ | ||
| 364 | static std::string Paramify(const std::string ¶m); | ||
| 365 | |||
| 366 | /*! \brief Split a string by the specified delimiters. | ||
| 367 | Splits a string using one or more delimiting characters, ignoring empty tokens. | ||
| 368 | Differs from Split() in two ways: | ||
| 369 | 1. The delimiters are treated as individual characters, rather than a single delimiting string. | ||
| 370 | 2. Empty tokens are ignored. | ||
| 371 | \return a vector of tokens | ||
| 372 | */ | ||
| 373 | static std::vector<std::string> Tokenize(const std::string& input, const std::string& delimiters); | ||
| 374 | static void Tokenize(const std::string& input, std::vector<std::string>& tokens, const std::string& delimiters); | ||
| 375 | static std::vector<std::string> Tokenize(const std::string& input, const char delimiter); | ||
| 376 | static void Tokenize(const std::string& input, std::vector<std::string>& tokens, const char delimiter); | ||
| 377 | static uint64_t ToUint64(std::string str, uint64_t fallback) noexcept; | ||
| 378 | |||
| 379 | /*! | ||
| 380 | * Returns bytes in a human readable format using the smallest unit that will fit `bytes` in at | ||
| 381 | * most three digits. The number of decimals are adjusted with significance such that 'small' | ||
| 382 | * numbers will have more decimals than larger ones. | ||
| 383 | * | ||
| 384 | * For example: 1024 bytes will be formatted as "1.00kB", 10240 bytes as "10.0kB" and | ||
| 385 | * 102400 bytes as "100kB". See TestStringUtils for more examples. | ||
| 386 | */ | ||
| 387 | static std::string FormatFileSize(uint64_t bytes); | ||
| 388 | |||
| 389 | private: | ||
| 390 | /*! | ||
| 391 | * Wrapper for CLangInfo::GetOriginalLocale() which allows us to | ||
| 392 | * avoid including LangInfo.h from this header. | ||
| 393 | */ | ||
| 394 | static const std::locale& GetOriginalLocale() noexcept; | ||
| 395 | }; | ||
| 396 | |||
| 397 | struct sortstringbyname | ||
| 398 | { | ||
| 399 | bool operator()(const std::string& strItem1, const std::string& strItem2) | ||
| 400 | { | ||
| 401 | return StringUtils::CompareNoCase(strItem1, strItem2) < 0; | ||
| 402 | } | ||
| 403 | }; | ||
