diff options
Diffstat (limited to 'xbmc/utils/POUtils.cpp')
| -rw-r--r-- | xbmc/utils/POUtils.cpp | 305 |
1 files changed, 305 insertions, 0 deletions
diff --git a/xbmc/utils/POUtils.cpp b/xbmc/utils/POUtils.cpp new file mode 100644 index 0000000..7d8afd3 --- /dev/null +++ b/xbmc/utils/POUtils.cpp | |||
| @@ -0,0 +1,305 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2012-2018 Team Kodi | ||
| 3 | * This file is part of Kodi - https://kodi.tv | ||
| 4 | * | ||
| 5 | * SPDX-License-Identifier: GPL-2.0-or-later | ||
| 6 | * See LICENSES/README.md for more information. | ||
| 7 | */ | ||
| 8 | |||
| 9 | #include "utils/POUtils.h" | ||
| 10 | |||
| 11 | #include "URL.h" | ||
| 12 | #include "filesystem/File.h" | ||
| 13 | #include "utils/log.h" | ||
| 14 | |||
| 15 | #include <stdlib.h> | ||
| 16 | |||
| 17 | CPODocument::CPODocument() | ||
| 18 | { | ||
| 19 | m_CursorPos = 0; | ||
| 20 | m_nextEntryPos = 0; | ||
| 21 | m_POfilelength = 0; | ||
| 22 | m_Entry.msgStrPlural.clear(); | ||
| 23 | m_Entry.msgStrPlural.resize(1); | ||
| 24 | } | ||
| 25 | |||
| 26 | CPODocument::~CPODocument() = default; | ||
| 27 | |||
| 28 | bool CPODocument::LoadFile(const std::string &pofilename) | ||
| 29 | { | ||
| 30 | CURL poFileUrl(pofilename); | ||
| 31 | if (!XFILE::CFile::Exists(poFileUrl)) | ||
| 32 | return false; | ||
| 33 | |||
| 34 | XFILE::CFile file; | ||
| 35 | XFILE::auto_buffer buf; | ||
| 36 | if (file.LoadFile(poFileUrl, buf) < 18) // at least a size of a minimalistic header | ||
| 37 | { | ||
| 38 | CLog::Log(LOGERROR, "%s: can't load file \"%s\" or file is too small", __FUNCTION__, pofilename.c_str()); | ||
| 39 | return false; | ||
| 40 | } | ||
| 41 | |||
| 42 | m_strBuffer = '\n'; | ||
| 43 | m_strBuffer.append(buf.get(), buf.size()); | ||
| 44 | buf.clear(); | ||
| 45 | |||
| 46 | ConvertLineEnds(pofilename); | ||
| 47 | |||
| 48 | // we make sure, to have an LF at the end of buffer | ||
| 49 | if (*m_strBuffer.rbegin() != '\n') | ||
| 50 | { | ||
| 51 | m_strBuffer += "\n"; | ||
| 52 | } | ||
| 53 | |||
| 54 | m_POfilelength = m_strBuffer.size(); | ||
| 55 | |||
| 56 | if (GetNextEntry() && m_Entry.Type == MSGID_FOUND) | ||
| 57 | return true; | ||
| 58 | |||
| 59 | CLog::Log(LOGERROR, "POParser: unable to read PO file header from file: %s", pofilename.c_str()); | ||
| 60 | return false; | ||
| 61 | } | ||
| 62 | |||
| 63 | bool CPODocument::GetNextEntry() | ||
| 64 | { | ||
| 65 | do | ||
| 66 | { | ||
| 67 | // if we don't find LFLF, we reached the end of the buffer and the last entry to check | ||
| 68 | // we indicate this with setting m_nextEntryPos to the end of the buffer | ||
| 69 | if ((m_nextEntryPos = m_strBuffer.find("\n\n", m_CursorPos)) == std::string::npos) | ||
| 70 | m_nextEntryPos = m_POfilelength-1; | ||
| 71 | |||
| 72 | // now we read the actual entry into a temp string for further processing | ||
| 73 | m_Entry.Content.assign(m_strBuffer, m_CursorPos, m_nextEntryPos - m_CursorPos +1); | ||
| 74 | m_CursorPos = m_nextEntryPos+1; // jump cursor to the second LF character | ||
| 75 | |||
| 76 | if (FindLineStart ("\nmsgid ", m_Entry.msgID.Pos)) | ||
| 77 | { | ||
| 78 | if (FindLineStart ("\nmsgctxt \"#", m_Entry.xIDPos) && ParseNumID()) | ||
| 79 | { | ||
| 80 | m_Entry.Type = ID_FOUND; // we found an entry with a valid numeric id | ||
| 81 | return true; | ||
| 82 | } | ||
| 83 | |||
| 84 | size_t plurPos; | ||
| 85 | if (FindLineStart ("\nmsgid_plural ", plurPos)) | ||
| 86 | { | ||
| 87 | m_Entry.Type = MSGID_PLURAL_FOUND; // we found a pluralized entry | ||
| 88 | return true; | ||
| 89 | } | ||
| 90 | |||
| 91 | m_Entry.Type = MSGID_FOUND; // we found a normal entry, with no numeric id | ||
| 92 | return true; | ||
| 93 | } | ||
| 94 | } | ||
| 95 | while (m_nextEntryPos != m_POfilelength-1); | ||
| 96 | // we reached the end of buffer AND we have not found a valid entry | ||
| 97 | |||
| 98 | return false; | ||
| 99 | } | ||
| 100 | |||
| 101 | void CPODocument::ParseEntry(bool bisSourceLang) | ||
| 102 | { | ||
| 103 | if (bisSourceLang) | ||
| 104 | { | ||
| 105 | if (m_Entry.Type == ID_FOUND) | ||
| 106 | GetString(m_Entry.msgID); | ||
| 107 | else | ||
| 108 | m_Entry.msgID.Str.clear(); | ||
| 109 | return; | ||
| 110 | } | ||
| 111 | |||
| 112 | if (m_Entry.Type != ID_FOUND) | ||
| 113 | { | ||
| 114 | GetString(m_Entry.msgID); | ||
| 115 | if (FindLineStart ("\nmsgctxt ", m_Entry.msgCtxt.Pos)) | ||
| 116 | GetString(m_Entry.msgCtxt); | ||
| 117 | else | ||
| 118 | m_Entry.msgCtxt.Str.clear(); | ||
| 119 | } | ||
| 120 | |||
| 121 | if (m_Entry.Type != MSGID_PLURAL_FOUND) | ||
| 122 | { | ||
| 123 | if (FindLineStart ("\nmsgstr ", m_Entry.msgStr.Pos)) | ||
| 124 | { | ||
| 125 | GetString(m_Entry.msgStr); | ||
| 126 | GetString(m_Entry.msgID); | ||
| 127 | } | ||
| 128 | else | ||
| 129 | { | ||
| 130 | CLog::Log(LOGERROR, "POParser: missing msgstr line in entry. Failed entry: %s", | ||
| 131 | m_Entry.Content.c_str()); | ||
| 132 | m_Entry.msgStr.Str.clear(); | ||
| 133 | } | ||
| 134 | return; | ||
| 135 | } | ||
| 136 | |||
| 137 | // We found a plural form entry. We read it into a vector of CStrEntry types | ||
| 138 | m_Entry.msgStrPlural.clear(); | ||
| 139 | std::string strPattern = "\nmsgstr[0] "; | ||
| 140 | CStrEntry strEntry; | ||
| 141 | |||
| 142 | for (int n=0; n<7 ; n++) | ||
| 143 | { | ||
| 144 | strPattern[8] = static_cast<char>(n+'0'); | ||
| 145 | if (FindLineStart (strPattern, strEntry.Pos)) | ||
| 146 | { | ||
| 147 | GetString(strEntry); | ||
| 148 | if (strEntry.Str.empty()) | ||
| 149 | break; | ||
| 150 | m_Entry.msgStrPlural.push_back(strEntry); | ||
| 151 | } | ||
| 152 | else | ||
| 153 | break; | ||
| 154 | } | ||
| 155 | |||
| 156 | if (m_Entry.msgStrPlural.empty()) | ||
| 157 | { | ||
| 158 | CLog::Log(LOGERROR, "POParser: msgstr[] plural lines have zero valid strings. " | ||
| 159 | "Failed entry: %s", m_Entry.Content.c_str()); | ||
| 160 | m_Entry.msgStrPlural.resize(1); // Put 1 element with an empty string into the vector | ||
| 161 | } | ||
| 162 | } | ||
| 163 | |||
| 164 | const std::string& CPODocument::GetPlurMsgstr(size_t plural) const | ||
| 165 | { | ||
| 166 | if (m_Entry.msgStrPlural.size() < plural+1) | ||
| 167 | { | ||
| 168 | CLog::Log(LOGERROR, "POParser: msgstr[%i] plural field requested, but not found in PO file. " | ||
| 169 | "Failed entry: %s", static_cast<int>(plural), m_Entry.Content.c_str()); | ||
| 170 | plural = m_Entry.msgStrPlural.size()-1; | ||
| 171 | } | ||
| 172 | return m_Entry.msgStrPlural[plural].Str; | ||
| 173 | } | ||
| 174 | |||
| 175 | std::string CPODocument::UnescapeString(const std::string &strInput) | ||
| 176 | { | ||
| 177 | std::string strOutput; | ||
| 178 | if (strInput.empty()) | ||
| 179 | return strOutput; | ||
| 180 | |||
| 181 | char oescchar; | ||
| 182 | strOutput.reserve(strInput.size()); | ||
| 183 | std::string::const_iterator it = strInput.begin(); | ||
| 184 | while (it < strInput.end()) | ||
| 185 | { | ||
| 186 | oescchar = *it++; | ||
| 187 | if (oescchar == '\\') | ||
| 188 | { | ||
| 189 | if (it == strInput.end()) | ||
| 190 | { | ||
| 191 | CLog::Log(LOGERROR, | ||
| 192 | "POParser: warning, unhandled escape character " | ||
| 193 | "at line-end. Problematic entry: %s", | ||
| 194 | m_Entry.Content.c_str()); | ||
| 195 | break; | ||
| 196 | } | ||
| 197 | switch (*it++) | ||
| 198 | { | ||
| 199 | case 'a': oescchar = '\a'; break; | ||
| 200 | case 'b': oescchar = '\b'; break; | ||
| 201 | case 'v': oescchar = '\v'; break; | ||
| 202 | case 'n': oescchar = '\n'; break; | ||
| 203 | case 't': oescchar = '\t'; break; | ||
| 204 | case 'r': oescchar = '\r'; break; | ||
| 205 | case '"': oescchar = '"' ; break; | ||
| 206 | case '0': oescchar = '\0'; break; | ||
| 207 | case 'f': oescchar = '\f'; break; | ||
| 208 | case '?': oescchar = '\?'; break; | ||
| 209 | case '\'': oescchar = '\''; break; | ||
| 210 | case '\\': oescchar = '\\'; break; | ||
| 211 | |||
| 212 | default: | ||
| 213 | { | ||
| 214 | CLog::Log(LOGERROR, | ||
| 215 | "POParser: warning, unhandled escape character. Problematic entry: %s", | ||
| 216 | m_Entry.Content.c_str()); | ||
| 217 | continue; | ||
| 218 | } | ||
| 219 | } | ||
| 220 | } | ||
| 221 | strOutput.push_back(oescchar); | ||
| 222 | } | ||
| 223 | return strOutput; | ||
| 224 | } | ||
| 225 | |||
| 226 | bool CPODocument::FindLineStart(const std::string &strToFind, size_t &FoundPos) | ||
| 227 | { | ||
| 228 | |||
| 229 | FoundPos = m_Entry.Content.find(strToFind); | ||
| 230 | |||
| 231 | if (FoundPos == std::string::npos || FoundPos + strToFind.size() + 2 > m_Entry.Content.size()) | ||
| 232 | return false; // if we don't find the string or if we don't have at least one char after it | ||
| 233 | |||
| 234 | FoundPos += strToFind.size(); // to set the pos marker to the exact start of the real data | ||
| 235 | return true; | ||
| 236 | } | ||
| 237 | |||
| 238 | bool CPODocument::ParseNumID() | ||
| 239 | { | ||
| 240 | if (isdigit(m_Entry.Content.at(m_Entry.xIDPos))) // verify if the first char is digit | ||
| 241 | { | ||
| 242 | // we check for the numeric id for the fist 10 chars (uint32) | ||
| 243 | m_Entry.xID = strtol(&m_Entry.Content[m_Entry.xIDPos], NULL, 10); | ||
| 244 | return true; | ||
| 245 | } | ||
| 246 | |||
| 247 | CLog::Log(LOGERROR, "POParser: found numeric id descriptor, but no valid id can be read, " | ||
| 248 | "entry was handled as normal msgid entry"); | ||
| 249 | CLog::Log(LOGERROR, "POParser: The problematic entry: %s", | ||
| 250 | m_Entry.Content.c_str()); | ||
| 251 | return false; | ||
| 252 | } | ||
| 253 | |||
| 254 | void CPODocument::GetString(CStrEntry &strEntry) | ||
| 255 | { | ||
| 256 | size_t nextLFPos; | ||
| 257 | size_t startPos = strEntry.Pos; | ||
| 258 | strEntry.Str.clear(); | ||
| 259 | |||
| 260 | while (startPos < m_Entry.Content.size()) | ||
| 261 | { | ||
| 262 | nextLFPos = m_Entry.Content.find("\n", startPos); | ||
| 263 | if (nextLFPos == std::string::npos) | ||
| 264 | nextLFPos = m_Entry.Content.size(); | ||
| 265 | |||
| 266 | // check syntax, if it really is a valid quoted string line | ||
| 267 | if (nextLFPos-startPos < 2 || m_Entry.Content[startPos] != '\"' || | ||
| 268 | m_Entry.Content[nextLFPos-1] != '\"') | ||
| 269 | break; | ||
| 270 | |||
| 271 | strEntry.Str.append(m_Entry.Content, startPos+1, nextLFPos-2-startPos); | ||
| 272 | startPos = nextLFPos+1; | ||
| 273 | } | ||
| 274 | |||
| 275 | strEntry.Str = UnescapeString(strEntry.Str); | ||
| 276 | } | ||
| 277 | |||
| 278 | void CPODocument::ConvertLineEnds(const std::string &filename) | ||
| 279 | { | ||
| 280 | size_t foundPos = m_strBuffer.find_first_of("\r"); | ||
| 281 | if (foundPos == std::string::npos) | ||
| 282 | return; // We have only Linux style line endings in the file, nothing to do | ||
| 283 | |||
| 284 | if (foundPos+1 >= m_strBuffer.size() || m_strBuffer[foundPos+1] != '\n') | ||
| 285 | CLog::Log(LOGDEBUG, "POParser: PO file has Mac Style Line Endings. " | ||
| 286 | "Converted in memory to Linux LF for file: %s", filename.c_str()); | ||
| 287 | else | ||
| 288 | CLog::Log(LOGDEBUG, "POParser: PO file has Win Style Line Endings. " | ||
| 289 | "Converted in memory to Linux LF for file: %s", filename.c_str()); | ||
| 290 | |||
| 291 | std::string strTemp; | ||
| 292 | strTemp.reserve(m_strBuffer.size()); | ||
| 293 | for (std::string::const_iterator it = m_strBuffer.begin(); it < m_strBuffer.end(); ++it) | ||
| 294 | { | ||
| 295 | if (*it == '\r') | ||
| 296 | { | ||
| 297 | if (it+1 == m_strBuffer.end() || *(it+1) != '\n') | ||
| 298 | strTemp.push_back('\n'); // convert Mac style line ending and continue | ||
| 299 | continue; // we have Win style line ending so we exclude this CR now | ||
| 300 | } | ||
| 301 | strTemp.push_back(*it); | ||
| 302 | } | ||
| 303 | m_strBuffer.swap(strTemp); | ||
| 304 | m_POfilelength = m_strBuffer.size(); | ||
| 305 | } | ||
