From be933ef2241d79558f91796cc5b3a161f72ebf9c Mon Sep 17 00:00:00 2001 From: manuel Date: Mon, 19 Oct 2020 00:52:24 +0200 Subject: sync with upstream --- xbmc/utils/StringUtils.h | 403 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 403 insertions(+) create mode 100644 xbmc/utils/StringUtils.h (limited to 'xbmc/utils/StringUtils.h') diff --git a/xbmc/utils/StringUtils.h b/xbmc/utils/StringUtils.h new file mode 100644 index 0000000..6aab4cd --- /dev/null +++ b/xbmc/utils/StringUtils.h @@ -0,0 +1,403 @@ +/* + * Copyright (C) 2005-2018 Team Kodi + * This file is part of Kodi - https://kodi.tv + * + * SPDX-License-Identifier: GPL-2.0-or-later + * See LICENSES/README.md for more information. + */ + +#pragma once + +//----------------------------------------------------------------------- +// +// File: StringUtils.h +// +// Purpose: ATL split string utility +// Author: Paul J. Weiss +// +// Modified to support J O'Leary's std::string class by kraqh3d +// +//------------------------------------------------------------------------ + +#include +#include +#include +#include +#include +#include + +// workaround for broken [[depreciated]] in coverity +#if defined(__COVERITY__) +#undef FMT_DEPRECATED +#define FMT_DEPRECATED +#endif +#include + +#if FMT_VERSION >= 40000 +#include +#endif + +#include "XBDateTime.h" +#include "utils/params_check_macros.h" + +/*! \brief C-processor Token stringification + +The following macros can be used to stringify definitions to +C style strings. + +Example: + +#define foo 4 +DEF_TO_STR_NAME(foo) // outputs "foo" +DEF_TO_STR_VALUE(foo) // outputs "4" + +*/ + +#define DEF_TO_STR_NAME(x) #x +#define DEF_TO_STR_VALUE(x) DEF_TO_STR_NAME(x) + +template::value, int> = 0> +constexpr auto&& EnumToInt(T&& arg) noexcept +{ + return arg; +} +template::value, int> = 0> +constexpr auto EnumToInt(T&& arg) noexcept +{ + return static_cast(arg); +} + +class StringUtils +{ +public: + /*! \brief Get a formatted string similar to sprintf + + Beware that this does not support directly passing in + std::string objects. You need to call c_str() to pass + the const char* buffer representing the value of the + std::string object. + + \param fmt Format of the resulting string + \param ... variable number of value type arguments + \return Formatted string + */ + template + static std::string Format(const std::string& fmt, Args&&... args) + { + // coverity[fun_call_w_exception : FALSE] + auto result = ::fmt::format(fmt, EnumToInt(std::forward(args))...); + if (result == fmt) + result = ::fmt::sprintf(fmt, EnumToInt(std::forward(args))...); + + return result; + } + template + static std::wstring Format(const std::wstring& fmt, Args&&... args) + { + // coverity[fun_call_w_exception : FALSE] + auto result = ::fmt::format(fmt, EnumToInt(std::forward(args))...); + if (result == fmt) + result = ::fmt::sprintf(fmt, EnumToInt(std::forward(args))...); + + return result; + } + + static std::string FormatV(PRINTF_FORMAT_STRING const char *fmt, va_list args); + static std::wstring FormatV(PRINTF_FORMAT_STRING const wchar_t *fmt, va_list args); + static void ToUpper(std::string &str); + static void ToUpper(std::wstring &str); + static void ToLower(std::string &str); + static void ToLower(std::wstring &str); + static void ToCapitalize(std::string &str); + static void ToCapitalize(std::wstring &str); + static bool EqualsNoCase(const std::string &str1, const std::string &str2); + static bool EqualsNoCase(const std::string &str1, const char *s2); + static bool EqualsNoCase(const char *s1, const char *s2); + static int CompareNoCase(const std::string& str1, const std::string& str2, size_t n = 0); + static int CompareNoCase(const char* s1, const char* s2, size_t n = 0); + static int ReturnDigits(const std::string &str); + static std::string Left(const std::string &str, size_t count); + static std::string Mid(const std::string &str, size_t first, size_t count = std::string::npos); + static std::string Right(const std::string &str, size_t count); + static std::string& Trim(std::string &str); + static std::string& Trim(std::string &str, const char* const chars); + static std::string& TrimLeft(std::string &str); + static std::string& TrimLeft(std::string &str, const char* const chars); + static std::string& TrimRight(std::string &str); + static std::string& TrimRight(std::string &str, const char* const chars); + static std::string& RemoveDuplicatedSpacesAndTabs(std::string& str); + static int Replace(std::string &str, char oldChar, char newChar); + static int Replace(std::string &str, const std::string &oldStr, const std::string &newStr); + static int Replace(std::wstring &str, const std::wstring &oldStr, const std::wstring &newStr); + static bool StartsWith(const std::string &str1, const std::string &str2); + static bool StartsWith(const std::string &str1, const char *s2); + static bool StartsWith(const char *s1, const char *s2); + static bool StartsWithNoCase(const std::string &str1, const std::string &str2); + static bool StartsWithNoCase(const std::string &str1, const char *s2); + static bool StartsWithNoCase(const char *s1, const char *s2); + static bool EndsWith(const std::string &str1, const std::string &str2); + static bool EndsWith(const std::string &str1, const char *s2); + static bool EndsWithNoCase(const std::string &str1, const std::string &str2); + static bool EndsWithNoCase(const std::string &str1, const char *s2); + + template + static std::string Join(const CONTAINER &strings, const std::string& delimiter) + { + std::string result; + for (const auto& str : strings) + result += str + delimiter; + + if (!result.empty()) + result.erase(result.size() - delimiter.size()); + return result; + } + + /*! \brief Splits the given input string using the given delimiter into separate strings. + + If the given input string is empty the result will be an empty array (not + an array containing an empty string). + + \param input Input string to be split + \param delimiter Delimiter to be used to split the input string + \param iMaxStrings (optional) Maximum number of splitted strings + */ + static std::vector Split(const std::string& input, const std::string& delimiter, unsigned int iMaxStrings = 0); + static std::vector Split(const std::string& input, const char delimiter, size_t iMaxStrings = 0); + static std::vector Split(const std::string& input, const std::vector &delimiters); + /*! \brief Splits the given input string using the given delimiter into separate strings. + + If the given input string is empty nothing will be put into the target iterator. + + \param d_first the beginning of the destination range + \param input Input string to be split + \param delimiter Delimiter to be used to split the input string + \param iMaxStrings (optional) Maximum number of splitted strings + \return output iterator to the element in the destination range, one past the last element + * that was put there + */ + template + static OutputIt SplitTo(OutputIt d_first, const std::string& input, const std::string& delimiter, unsigned int iMaxStrings = 0) + { + OutputIt dest = d_first; + + if (input.empty()) + return dest; + if (delimiter.empty()) + { + *d_first++ = input; + return dest; + } + + const size_t delimLen = delimiter.length(); + size_t nextDelim; + size_t textPos = 0; + do + { + if (--iMaxStrings == 0) + { + *dest++ = input.substr(textPos); + break; + } + nextDelim = input.find(delimiter, textPos); + *dest++ = input.substr(textPos, nextDelim - textPos); + textPos = nextDelim + delimLen; + } while (nextDelim != std::string::npos); + + return dest; + } + template + static OutputIt SplitTo(OutputIt d_first, const std::string& input, const char delimiter, size_t iMaxStrings = 0) + { + return SplitTo(d_first, input, std::string(1, delimiter), iMaxStrings); + } + template + static OutputIt SplitTo(OutputIt d_first, const std::string& input, const std::vector &delimiters) + { + OutputIt dest = d_first; + if (input.empty()) + return dest; + + if (delimiters.empty()) + { + *dest++ = input; + return dest; + } + std::string str = input; + for (size_t di = 1; di < delimiters.size(); di++) + StringUtils::Replace(str, delimiters[di], delimiters[0]); + return SplitTo(dest, str, delimiters[0]); + } + + /*! \brief Splits the given input strings using the given delimiters into further separate strings. + + If the given input string vector is empty the result will be an empty array (not + an array containing an empty string). + + Delimiter strings are applied in order, so once the (optional) maximum number of + items is produced no other delimiters are applied. This produces different results + to applying all delimiters at once e.g. "a/b#c/d" becomes "a", "b#c", "d" rather + than "a", "b", "c/d" + + \param input Input vector of strings each to be split + \param delimiters Delimiter strings to be used to split the input strings + \param iMaxStrings (optional) Maximum number of resulting split strings + */ + static std::vector SplitMulti(const std::vector &input, const std::vector &delimiters, unsigned int iMaxStrings = 0); + static int FindNumber(const std::string& strInput, const std::string &strFind); + static int64_t AlphaNumericCompare(const wchar_t *left, const wchar_t *right); + static int AlphaNumericCollation(int nKey1, const void* pKey1, int nKey2, const void* pKey2); + static long TimeStringToSeconds(const std::string &timeString); + static void RemoveCRLF(std::string& strLine); + + /*! \brief utf8 version of strlen - skips any non-starting bytes in the count, thus returning the number of utf8 characters + \param s c-string to find the length of. + \return the number of utf8 characters in the string. + */ + static size_t utf8_strlen(const char *s); + + /*! \brief convert a time in seconds to a string based on the given time format + \param seconds time in seconds + \param format the format we want the time in. + \return the formatted time + \sa TIME_FORMAT + */ + static std::string SecondsToTimeString(long seconds, TIME_FORMAT format = TIME_FORMAT_GUESS); + + /*! \brief check whether a string is a natural number. + Matches [ \t]*[0-9]+[ \t]* + \param str the string to check + \return true if the string is a natural number, false otherwise. + */ + static bool IsNaturalNumber(const std::string& str); + + /*! \brief check whether a string is an integer. + Matches [ \t]*[\-]*[0-9]+[ \t]* + \param str the string to check + \return true if the string is an integer, false otherwise. + */ + static bool IsInteger(const std::string& str); + + /* The next several isasciiXX and asciiXXvalue functions are locale independent (US-ASCII only), + * as opposed to standard ::isXX (::isalpha, ::isdigit...) which are locale dependent. + * Next functions get parameter as char and don't need double cast ((int)(unsigned char) is required for standard functions). */ + inline static bool isasciidigit(char chr) // locale independent + { + return chr >= '0' && chr <= '9'; + } + inline static bool isasciixdigit(char chr) // locale independent + { + return (chr >= '0' && chr <= '9') || (chr >= 'a' && chr <= 'f') || (chr >= 'A' && chr <= 'F'); + } + static int asciidigitvalue(char chr); // locale independent + static int asciixdigitvalue(char chr); // locale independent + inline static bool isasciiuppercaseletter(char chr) // locale independent + { + return (chr >= 'A' && chr <= 'Z'); + } + inline static bool isasciilowercaseletter(char chr) // locale independent + { + return (chr >= 'a' && chr <= 'z'); + } + inline static bool isasciialphanum(char chr) // locale independent + { + return isasciiuppercaseletter(chr) || isasciilowercaseletter(chr) || isasciidigit(chr); + } + static std::string SizeToString(int64_t size); + static const std::string Empty; + static size_t FindWords(const char *str, const char *wordLowerCase); + static int FindEndBracket(const std::string &str, char opener, char closer, int startPos = 0); + static int DateStringToYYYYMMDD(const std::string &dateString); + static std::string ISODateToLocalizedDate (const std::string& strIsoDate); + static void WordToDigits(std::string &word); + static std::string CreateUUID(); + static bool ValidateUUID(const std::string &uuid); // NB only validates syntax + static double CompareFuzzy(const std::string &left, const std::string &right); + static int FindBestMatch(const std::string &str, const std::vector &strings, double &matchscore); + static bool ContainsKeyword(const std::string &str, const std::vector &keywords); + + /*! \brief Convert the string of binary chars to the actual string. + + Convert the string representation of binary chars to the actual string. + For example \1\2\3 is converted to a string with binary char \1, \2 and \3 + + \param param String to convert + \return Converted string + */ + static std::string BinaryStringToString(const std::string& in); + /** + * Convert each character in the string to its hexadecimal + * representation and return the concatenated result + * + * example: "abc\n" -> "6162630a" + */ + static std::string ToHexadecimal(const std::string& in); + /*! \brief Format the string with locale separators. + + Format the string with locale separators. + For example 10000.57 in en-us is '10,000.57' but in italian is '10.000,57' + + \param param String to format + \return Formatted string + */ + template + static std::string FormatNumber(T num) + { + std::stringstream ss; +// ifdef is needed because when you set _ITERATOR_DEBUG_LEVEL=0 and you use custom numpunct you will get runtime error in debug mode +// for more info https://connect.microsoft.com/VisualStudio/feedback/details/2655363 +#if !(defined(_DEBUG) && defined(TARGET_WINDOWS)) + ss.imbue(GetOriginalLocale()); +#endif + ss.precision(1); + ss << std::fixed << num; + return ss.str(); + } + + /*! \brief Escapes the given string to be able to be used as a parameter. + + Escapes backslashes and double-quotes with an additional backslash and + adds double-quotes around the whole string. + + \param param String to escape/paramify + \return Escaped/Paramified string + */ + static std::string Paramify(const std::string ¶m); + + /*! \brief Split a string by the specified delimiters. + Splits a string using one or more delimiting characters, ignoring empty tokens. + Differs from Split() in two ways: + 1. The delimiters are treated as individual characters, rather than a single delimiting string. + 2. Empty tokens are ignored. + \return a vector of tokens + */ + static std::vector Tokenize(const std::string& input, const std::string& delimiters); + static void Tokenize(const std::string& input, std::vector& tokens, const std::string& delimiters); + static std::vector Tokenize(const std::string& input, const char delimiter); + static void Tokenize(const std::string& input, std::vector& tokens, const char delimiter); + static uint64_t ToUint64(std::string str, uint64_t fallback) noexcept; + + /*! + * Returns bytes in a human readable format using the smallest unit that will fit `bytes` in at + * most three digits. The number of decimals are adjusted with significance such that 'small' + * numbers will have more decimals than larger ones. + * + * For example: 1024 bytes will be formatted as "1.00kB", 10240 bytes as "10.0kB" and + * 102400 bytes as "100kB". See TestStringUtils for more examples. + */ + static std::string FormatFileSize(uint64_t bytes); + +private: + /*! + * Wrapper for CLangInfo::GetOriginalLocale() which allows us to + * avoid including LangInfo.h from this header. + */ + static const std::locale& GetOriginalLocale() noexcept; +}; + +struct sortstringbyname +{ + bool operator()(const std::string& strItem1, const std::string& strItem2) + { + return StringUtils::CompareNoCase(strItem1, strItem2) < 0; + } +}; -- cgit v1.2.3