diff options
Diffstat (limited to 'xbmc/utils/RegExp.h')
| -rw-r--r-- | xbmc/utils/RegExp.h | 165 |
1 files changed, 165 insertions, 0 deletions
diff --git a/xbmc/utils/RegExp.h b/xbmc/utils/RegExp.h new file mode 100644 index 0000000..53f6019 --- /dev/null +++ b/xbmc/utils/RegExp.h | |||
| @@ -0,0 +1,165 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2005-2018 Team Kodi | ||
| 3 | * This file is part of Kodi - https://kodi.tv | ||
| 4 | * | ||
| 5 | * SPDX-License-Identifier: GPL-2.0-or-later | ||
| 6 | * See LICENSES/README.md for more information. | ||
| 7 | */ | ||
| 8 | |||
| 9 | #pragma once | ||
| 10 | |||
| 11 | //! @todo - move to std::regex (after switching to gcc 4.9 or higher) and get rid of CRegExp | ||
| 12 | |||
| 13 | #include <string> | ||
| 14 | #include <vector> | ||
| 15 | |||
| 16 | /* make sure stdlib.h is included before including pcre.h inside the | ||
| 17 | namespace; this works around stdlib.h definitions also living in | ||
| 18 | the PCRE namespace */ | ||
| 19 | #include <stdlib.h> | ||
| 20 | |||
| 21 | namespace PCRE { | ||
| 22 | struct real_pcre_jit_stack; // forward declaration for PCRE without JIT | ||
| 23 | typedef struct real_pcre_jit_stack pcre_jit_stack; | ||
| 24 | #include <pcre.h> | ||
| 25 | } | ||
| 26 | |||
| 27 | class CRegExp | ||
| 28 | { | ||
| 29 | public: | ||
| 30 | enum studyMode | ||
| 31 | { | ||
| 32 | NoStudy = 0, // do not study expression | ||
| 33 | StudyRegExp = 1, // study expression (slower compilation, faster find) | ||
| 34 | StudyWithJitComp // study expression and JIT-compile it, if possible (heavyweight optimization) | ||
| 35 | }; | ||
| 36 | enum utf8Mode | ||
| 37 | { | ||
| 38 | autoUtf8 = -1, // analyze regexp for UTF-8 multi-byte chars, for Unicode codes > 0xFF | ||
| 39 | // or explicit Unicode properties (\p, \P and \X), enable UTF-8 mode if any of them are found | ||
| 40 | asciiOnly = 0, // process regexp and strings as single-byte encoded strings | ||
| 41 | forceUtf8 = 1 // enable UTF-8 mode (with Unicode properties) | ||
| 42 | }; | ||
| 43 | |||
| 44 | static const int m_MaxNumOfBackrefrences = 20; | ||
| 45 | /** | ||
| 46 | * @param caseless (optional) Matching will be case insensitive if set to true | ||
| 47 | * or case sensitive if set to false | ||
| 48 | * @param utf8 (optional) Control UTF-8 processing | ||
| 49 | */ | ||
| 50 | CRegExp(bool caseless = false, utf8Mode utf8 = asciiOnly); | ||
| 51 | /** | ||
| 52 | * Create new CRegExp object and compile regexp expression in one step | ||
| 53 | * @warning Use only with hardcoded regexp when you're sure that regexp is compiled without errors | ||
| 54 | * @param caseless Matching will be case insensitive if set to true | ||
| 55 | * or case sensitive if set to false | ||
| 56 | * @param utf8 Control UTF-8 processing | ||
| 57 | * @param re The regular expression | ||
| 58 | * @param study (optional) Controls study of expression, useful if expression will be used | ||
| 59 | * several times | ||
| 60 | */ | ||
| 61 | CRegExp(bool caseless, utf8Mode utf8, const char *re, studyMode study = NoStudy); | ||
| 62 | |||
| 63 | CRegExp(const CRegExp& re); | ||
| 64 | ~CRegExp(); | ||
| 65 | |||
| 66 | /** | ||
| 67 | * Compile (prepare) regular expression | ||
| 68 | * @param re The regular expression | ||
| 69 | * @param study (optional) Controls study of expression, useful if expression will be used | ||
| 70 | * several times | ||
| 71 | * @return true on success, false on any error | ||
| 72 | */ | ||
| 73 | bool RegComp(const char *re, studyMode study = NoStudy); | ||
| 74 | |||
| 75 | /** | ||
| 76 | * Compile (prepare) regular expression | ||
| 77 | * @param re The regular expression | ||
| 78 | * @param study (optional) Controls study of expression, useful if expression will be used | ||
| 79 | * several times | ||
| 80 | * @return true on success, false on any error | ||
| 81 | */ | ||
| 82 | bool RegComp(const std::string& re, studyMode study = NoStudy) | ||
| 83 | { return RegComp(re.c_str(), study); } | ||
| 84 | |||
| 85 | /** | ||
| 86 | * Find first match of regular expression in given string | ||
| 87 | * @param str The string to match against regular expression | ||
| 88 | * @param startoffset (optional) The string offset to start matching | ||
| 89 | * @param maxNumberOfCharsToTest (optional) The maximum number of characters to test (match) in | ||
| 90 | * string. If set to -1 string checked up to the end. | ||
| 91 | * @return staring position of match in string, negative value in case of error or no match | ||
| 92 | */ | ||
| 93 | int RegFind(const char* str, unsigned int startoffset = 0, int maxNumberOfCharsToTest = -1); | ||
| 94 | /** | ||
| 95 | * Find first match of regular expression in given string | ||
| 96 | * @param str The string to match against regular expression | ||
| 97 | * @param startoffset (optional) The string offset to start matching | ||
| 98 | * @param maxNumberOfCharsToTest (optional) The maximum number of characters to test (match) in | ||
| 99 | * string. If set to -1 string checked up to the end. | ||
| 100 | * @return staring position of match in string, negative value in case of error or no match | ||
| 101 | */ | ||
| 102 | int RegFind(const std::string& str, unsigned int startoffset = 0, int maxNumberOfCharsToTest = -1) | ||
| 103 | { return PrivateRegFind(str.length(), str.c_str(), startoffset, maxNumberOfCharsToTest); } | ||
| 104 | std::string GetReplaceString(const std::string& sReplaceExp) const; | ||
| 105 | int GetFindLen() const | ||
| 106 | { | ||
| 107 | if (!m_re || !m_bMatched) | ||
| 108 | return 0; | ||
| 109 | |||
| 110 | return (m_iOvector[1] - m_iOvector[0]); | ||
| 111 | }; | ||
| 112 | int GetSubCount() const { return m_iMatchCount - 1; } // PCRE returns the number of sub-patterns + 1 | ||
| 113 | int GetSubStart(int iSub) const; | ||
| 114 | int GetSubStart(const std::string& subName) const; | ||
| 115 | int GetSubLength(int iSub) const; | ||
| 116 | int GetSubLength(const std::string& subName) const; | ||
| 117 | int GetCaptureTotal() const; | ||
| 118 | std::string GetMatch(int iSub = 0) const; | ||
| 119 | std::string GetMatch(const std::string& subName) const; | ||
| 120 | const std::string& GetPattern() const { return m_pattern; } | ||
| 121 | bool GetNamedSubPattern(const char* strName, std::string& strMatch) const; | ||
| 122 | int GetNamedSubPatternNumber(const char* strName) const; | ||
| 123 | void DumpOvector(int iLog); | ||
| 124 | /** | ||
| 125 | * Check is RegExp object is ready for matching | ||
| 126 | * @return true if RegExp object is ready for matching, false otherwise | ||
| 127 | */ | ||
| 128 | inline bool IsCompiled(void) const | ||
| 129 | { return !m_pattern.empty(); } | ||
| 130 | CRegExp& operator= (const CRegExp& re); | ||
| 131 | static bool IsUtf8Supported(void); | ||
| 132 | static bool AreUnicodePropertiesSupported(void); | ||
| 133 | static bool LogCheckUtf8Support(void); | ||
| 134 | static bool IsJitSupported(void); | ||
| 135 | |||
| 136 | private: | ||
| 137 | int PrivateRegFind(size_t bufferLen, const char *str, unsigned int startoffset = 0, int maxNumberOfCharsToTest = -1); | ||
| 138 | void InitValues(bool caseless = false, CRegExp::utf8Mode utf8 = asciiOnly); | ||
| 139 | static bool requireUtf8(const std::string& regexp); | ||
| 140 | static int readCharXCode(const std::string& regexp, size_t& pos); | ||
| 141 | static bool isCharClassWithUnicode(const std::string& regexp, size_t& pos); | ||
| 142 | |||
| 143 | void Cleanup(); | ||
| 144 | inline bool IsValidSubNumber(int iSub) const; | ||
| 145 | |||
| 146 | PCRE::pcre* m_re; | ||
| 147 | PCRE::pcre_extra* m_sd; | ||
| 148 | static const int OVECCOUNT=(m_MaxNumOfBackrefrences + 1) * 3; | ||
| 149 | unsigned int m_offset; | ||
| 150 | int m_iOvector[OVECCOUNT]; | ||
| 151 | utf8Mode m_utf8Mode; | ||
| 152 | int m_iMatchCount; | ||
| 153 | int m_iOptions; | ||
| 154 | bool m_jitCompiled; | ||
| 155 | bool m_bMatched; | ||
| 156 | PCRE::pcre_jit_stack* m_jitStack; | ||
| 157 | std::string m_subject; | ||
| 158 | std::string m_pattern; | ||
| 159 | static int m_Utf8Supported; | ||
| 160 | static int m_UcpSupported; | ||
| 161 | static int m_JitSupported; | ||
| 162 | }; | ||
| 163 | |||
| 164 | typedef std::vector<CRegExp> VECCREGEXP; | ||
| 165 | |||
