From be933ef2241d79558f91796cc5b3a161f72ebf9c Mon Sep 17 00:00:00 2001 From: manuel Date: Mon, 19 Oct 2020 00:52:24 +0200 Subject: sync with upstream --- xbmc/utils/POUtils.cpp | 305 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 305 insertions(+) create mode 100644 xbmc/utils/POUtils.cpp (limited to 'xbmc/utils/POUtils.cpp') diff --git a/xbmc/utils/POUtils.cpp b/xbmc/utils/POUtils.cpp new file mode 100644 index 0000000..7d8afd3 --- /dev/null +++ b/xbmc/utils/POUtils.cpp @@ -0,0 +1,305 @@ +/* + * Copyright (C) 2012-2018 Team Kodi + * This file is part of Kodi - https://kodi.tv + * + * SPDX-License-Identifier: GPL-2.0-or-later + * See LICENSES/README.md for more information. + */ + +#include "utils/POUtils.h" + +#include "URL.h" +#include "filesystem/File.h" +#include "utils/log.h" + +#include + +CPODocument::CPODocument() +{ + m_CursorPos = 0; + m_nextEntryPos = 0; + m_POfilelength = 0; + m_Entry.msgStrPlural.clear(); + m_Entry.msgStrPlural.resize(1); +} + +CPODocument::~CPODocument() = default; + +bool CPODocument::LoadFile(const std::string &pofilename) +{ + CURL poFileUrl(pofilename); + if (!XFILE::CFile::Exists(poFileUrl)) + return false; + + XFILE::CFile file; + XFILE::auto_buffer buf; + if (file.LoadFile(poFileUrl, buf) < 18) // at least a size of a minimalistic header + { + CLog::Log(LOGERROR, "%s: can't load file \"%s\" or file is too small", __FUNCTION__, pofilename.c_str()); + return false; + } + + m_strBuffer = '\n'; + m_strBuffer.append(buf.get(), buf.size()); + buf.clear(); + + ConvertLineEnds(pofilename); + + // we make sure, to have an LF at the end of buffer + if (*m_strBuffer.rbegin() != '\n') + { + m_strBuffer += "\n"; + } + + m_POfilelength = m_strBuffer.size(); + + if (GetNextEntry() && m_Entry.Type == MSGID_FOUND) + return true; + + CLog::Log(LOGERROR, "POParser: unable to read PO file header from file: %s", pofilename.c_str()); + return false; +} + +bool CPODocument::GetNextEntry() +{ + do + { + // if we don't find LFLF, we reached the end of the buffer and the last entry to check + // we indicate this with setting m_nextEntryPos to the end of the buffer + if ((m_nextEntryPos = m_strBuffer.find("\n\n", m_CursorPos)) == std::string::npos) + m_nextEntryPos = m_POfilelength-1; + + // now we read the actual entry into a temp string for further processing + m_Entry.Content.assign(m_strBuffer, m_CursorPos, m_nextEntryPos - m_CursorPos +1); + m_CursorPos = m_nextEntryPos+1; // jump cursor to the second LF character + + if (FindLineStart ("\nmsgid ", m_Entry.msgID.Pos)) + { + if (FindLineStart ("\nmsgctxt \"#", m_Entry.xIDPos) && ParseNumID()) + { + m_Entry.Type = ID_FOUND; // we found an entry with a valid numeric id + return true; + } + + size_t plurPos; + if (FindLineStart ("\nmsgid_plural ", plurPos)) + { + m_Entry.Type = MSGID_PLURAL_FOUND; // we found a pluralized entry + return true; + } + + m_Entry.Type = MSGID_FOUND; // we found a normal entry, with no numeric id + return true; + } + } + while (m_nextEntryPos != m_POfilelength-1); + // we reached the end of buffer AND we have not found a valid entry + + return false; +} + +void CPODocument::ParseEntry(bool bisSourceLang) +{ + if (bisSourceLang) + { + if (m_Entry.Type == ID_FOUND) + GetString(m_Entry.msgID); + else + m_Entry.msgID.Str.clear(); + return; + } + + if (m_Entry.Type != ID_FOUND) + { + GetString(m_Entry.msgID); + if (FindLineStart ("\nmsgctxt ", m_Entry.msgCtxt.Pos)) + GetString(m_Entry.msgCtxt); + else + m_Entry.msgCtxt.Str.clear(); + } + + if (m_Entry.Type != MSGID_PLURAL_FOUND) + { + if (FindLineStart ("\nmsgstr ", m_Entry.msgStr.Pos)) + { + GetString(m_Entry.msgStr); + GetString(m_Entry.msgID); + } + else + { + CLog::Log(LOGERROR, "POParser: missing msgstr line in entry. Failed entry: %s", + m_Entry.Content.c_str()); + m_Entry.msgStr.Str.clear(); + } + return; + } + + // We found a plural form entry. We read it into a vector of CStrEntry types + m_Entry.msgStrPlural.clear(); + std::string strPattern = "\nmsgstr[0] "; + CStrEntry strEntry; + + for (int n=0; n<7 ; n++) + { + strPattern[8] = static_cast(n+'0'); + if (FindLineStart (strPattern, strEntry.Pos)) + { + GetString(strEntry); + if (strEntry.Str.empty()) + break; + m_Entry.msgStrPlural.push_back(strEntry); + } + else + break; + } + + if (m_Entry.msgStrPlural.empty()) + { + CLog::Log(LOGERROR, "POParser: msgstr[] plural lines have zero valid strings. " + "Failed entry: %s", m_Entry.Content.c_str()); + m_Entry.msgStrPlural.resize(1); // Put 1 element with an empty string into the vector + } +} + +const std::string& CPODocument::GetPlurMsgstr(size_t plural) const +{ + if (m_Entry.msgStrPlural.size() < plural+1) + { + CLog::Log(LOGERROR, "POParser: msgstr[%i] plural field requested, but not found in PO file. " + "Failed entry: %s", static_cast(plural), m_Entry.Content.c_str()); + plural = m_Entry.msgStrPlural.size()-1; + } + return m_Entry.msgStrPlural[plural].Str; +} + +std::string CPODocument::UnescapeString(const std::string &strInput) +{ + std::string strOutput; + if (strInput.empty()) + return strOutput; + + char oescchar; + strOutput.reserve(strInput.size()); + std::string::const_iterator it = strInput.begin(); + while (it < strInput.end()) + { + oescchar = *it++; + if (oescchar == '\\') + { + if (it == strInput.end()) + { + CLog::Log(LOGERROR, + "POParser: warning, unhandled escape character " + "at line-end. Problematic entry: %s", + m_Entry.Content.c_str()); + break; + } + switch (*it++) + { + case 'a': oescchar = '\a'; break; + case 'b': oescchar = '\b'; break; + case 'v': oescchar = '\v'; break; + case 'n': oescchar = '\n'; break; + case 't': oescchar = '\t'; break; + case 'r': oescchar = '\r'; break; + case '"': oescchar = '"' ; break; + case '0': oescchar = '\0'; break; + case 'f': oescchar = '\f'; break; + case '?': oescchar = '\?'; break; + case '\'': oescchar = '\''; break; + case '\\': oescchar = '\\'; break; + + default: + { + CLog::Log(LOGERROR, + "POParser: warning, unhandled escape character. Problematic entry: %s", + m_Entry.Content.c_str()); + continue; + } + } + } + strOutput.push_back(oescchar); + } + return strOutput; +} + +bool CPODocument::FindLineStart(const std::string &strToFind, size_t &FoundPos) +{ + + FoundPos = m_Entry.Content.find(strToFind); + + if (FoundPos == std::string::npos || FoundPos + strToFind.size() + 2 > m_Entry.Content.size()) + return false; // if we don't find the string or if we don't have at least one char after it + + FoundPos += strToFind.size(); // to set the pos marker to the exact start of the real data + return true; +} + +bool CPODocument::ParseNumID() +{ + if (isdigit(m_Entry.Content.at(m_Entry.xIDPos))) // verify if the first char is digit + { + // we check for the numeric id for the fist 10 chars (uint32) + m_Entry.xID = strtol(&m_Entry.Content[m_Entry.xIDPos], NULL, 10); + return true; + } + + CLog::Log(LOGERROR, "POParser: found numeric id descriptor, but no valid id can be read, " + "entry was handled as normal msgid entry"); + CLog::Log(LOGERROR, "POParser: The problematic entry: %s", + m_Entry.Content.c_str()); + return false; +} + +void CPODocument::GetString(CStrEntry &strEntry) +{ + size_t nextLFPos; + size_t startPos = strEntry.Pos; + strEntry.Str.clear(); + + while (startPos < m_Entry.Content.size()) + { + nextLFPos = m_Entry.Content.find("\n", startPos); + if (nextLFPos == std::string::npos) + nextLFPos = m_Entry.Content.size(); + + // check syntax, if it really is a valid quoted string line + if (nextLFPos-startPos < 2 || m_Entry.Content[startPos] != '\"' || + m_Entry.Content[nextLFPos-1] != '\"') + break; + + strEntry.Str.append(m_Entry.Content, startPos+1, nextLFPos-2-startPos); + startPos = nextLFPos+1; + } + + strEntry.Str = UnescapeString(strEntry.Str); +} + +void CPODocument::ConvertLineEnds(const std::string &filename) +{ + size_t foundPos = m_strBuffer.find_first_of("\r"); + if (foundPos == std::string::npos) + return; // We have only Linux style line endings in the file, nothing to do + + if (foundPos+1 >= m_strBuffer.size() || m_strBuffer[foundPos+1] != '\n') + CLog::Log(LOGDEBUG, "POParser: PO file has Mac Style Line Endings. " + "Converted in memory to Linux LF for file: %s", filename.c_str()); + else + CLog::Log(LOGDEBUG, "POParser: PO file has Win Style Line Endings. " + "Converted in memory to Linux LF for file: %s", filename.c_str()); + + std::string strTemp; + strTemp.reserve(m_strBuffer.size()); + for (std::string::const_iterator it = m_strBuffer.begin(); it < m_strBuffer.end(); ++it) + { + if (*it == '\r') + { + if (it+1 == m_strBuffer.end() || *(it+1) != '\n') + strTemp.push_back('\n'); // convert Mac style line ending and continue + continue; // we have Win style line ending so we exclude this CR now + } + strTemp.push_back(*it); + } + m_strBuffer.swap(strTemp); + m_POfilelength = m_strBuffer.size(); +} -- cgit v1.2.3