diff -r 000000000000 -r 1918ee327afb tools/linguist/shared/ts.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/linguist/shared/ts.cpp Mon Jan 11 14:00:40 2010 +0000 @@ -0,0 +1,779 @@ +/**************************************************************************** +** +** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies). +** All rights reserved. +** Contact: Nokia Corporation (qt-info@nokia.com) +** +** This file is part of the Qt Linguist of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL$ +** No Commercial Usage +** This file contains pre-release code and may not be distributed. +** You may use this file in accordance with the terms and conditions +** contained in the Technology Preview License Agreement accompanying +** this package. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 2.1 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPL included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 2.1 requirements +** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. +** +** In addition, as a special exception, Nokia gives you certain additional +** rights. These rights are described in the Nokia Qt LGPL Exception +** version 1.1, included in the file LGPL_EXCEPTION.txt in this package. +** +** If you have questions regarding the use of this file, please contact +** Nokia at qt-info@nokia.com. +** +** +** +** +** +** +** +** +** $QT_END_LICENSE$ +** +****************************************************************************/ + +#include "translator.h" + +#include +#include +#include +#include + +#include +#include + +#define STRINGIFY_INTERNAL(x) #x +#define STRINGIFY(x) STRINGIFY_INTERNAL(x) +#define STRING(s) static QString str##s(QLatin1String(STRINGIFY(s))) + +QT_BEGIN_NAMESPACE + +/* + * The encodings are a total mess. + * A Translator has a codecForTr(). Each message's text will be passed to tr() + * in that encoding or as UTF-8 to trUtf8() if it is flagged as such. + * For ts 2.0, the file content is always uniformly in UTF-8. The file stores + * the codecForTr default and marks deviating messages accordingly. + * For ts 1.1, the file content is in mixed encoding. Each message is encoded + * the way it will be passed to tr() (with 8-bit characters encoded as numeric + * entities) or trUtf8(). The file stores the encoding and codecForTr in one + * attribute, for both the default and each deviating message. + */ + + +QDebug &operator<<(QDebug &d, const QXmlStreamAttribute &attr) +{ + return d << "[" << attr.name().toString() << "," << attr.value().toString() << "]"; +} + + +class TSReader : public QXmlStreamReader +{ +public: + TSReader(QIODevice &dev, ConversionData &cd) + : QXmlStreamReader(&dev), m_cd(cd) + {} + + // the "real thing" + bool read(Translator &translator); + +private: + bool elementStarts(const QString &str) const + { + return isStartElement() && name() == str; + } + + bool isWhiteSpace() const + { + return isCharacters() && text().toString().trimmed().isEmpty(); + } + + // needed to expand + QString readContents(); + // needed to join s + QString readTransContents(); + + void handleError(); + + ConversionData &m_cd; +}; + +void TSReader::handleError() +{ + if (isComment()) + return; + if (hasError() && error() == CustomError) // raised by readContents + return; + + const QString loc = QString::fromLatin1("at %3:%1:%2") + .arg(lineNumber()).arg(columnNumber()).arg(m_cd.m_sourceFileName); + + switch (tokenType()) { + case NoToken: // Cannot happen + default: // likewise + case Invalid: + raiseError(QString::fromLatin1("Parse error %1: %2").arg(loc, errorString())); + break; + case StartElement: + raiseError(QString::fromLatin1("Unexpected tag <%1> %2").arg(name().toString(), loc)); + break; + case Characters: + { + QString tok = text().toString(); + if (tok.length() > 30) + tok = tok.left(30) + QLatin1String("[...]"); + raiseError(QString::fromLatin1("Unexpected characters '%1' %2").arg(tok, loc)); + } + break; + case EntityReference: + raiseError(QString::fromLatin1("Unexpected entity '&%1;' %2").arg(name().toString(), loc)); + break; + case ProcessingInstruction: + raiseError(QString::fromLatin1("Unexpected processing instruction %1").arg(loc)); + break; + } +} + +static QString byteValue(QString value) +{ + int base = 10; + if (value.startsWith(QLatin1String("x"))) { + base = 16; + value.remove(0, 1); + } + int n = value.toUInt(0, base); + return (n != 0) ? QString(QChar(n)) : QString(); +} + +QString TSReader::readContents() +{ + STRING(byte); + STRING(value); + + QString result; + while (!atEnd()) { + readNext(); + if (isEndElement()) { + break; + } else if (isCharacters()) { + result += text(); + } else if (elementStarts(strbyte)) { + // + result += byteValue(attributes().value(strvalue).toString()); + readNext(); + if (!isEndElement()) { + handleError(); + break; + } + } else { + handleError(); + break; + } + } + //qDebug() << "TEXT: " << result; + return result; +} + +QString TSReader::readTransContents() +{ + STRING(lengthvariant); + STRING(variants); + STRING(yes); + + if (attributes().value(strvariants) == stryes) { + QString result; + while (!atEnd()) { + readNext(); + if (isEndElement()) { + break; + } else if (isWhiteSpace()) { + // ignore these, just whitespace + } else if (elementStarts(strlengthvariant)) { + if (!result.isEmpty()) + result += QChar(Translator::BinaryVariantSeparator); + result += readContents(); + } else { + handleError(); + break; + } + } + return result; + } else { + return readContents(); + } +} + +bool TSReader::read(Translator &translator) +{ + STRING(both); + STRING(byte); + STRING(comment); + STRING(context); + STRING(defaultcodec); + STRING(encoding); + STRING(extracomment); + STRING(filename); + STRING(id); + STRING(language); + STRING(line); + STRING(location); + STRING(message); + STRING(name); + STRING(numerus); + STRING(numerusform); + STRING(obsolete); + STRING(oldcomment); + STRING(oldsource); + STRING(source); + STRING(sourcelanguage); + STRING(translation); + STRING(translatorcomment); + STRING(true); + STRING(TS); + STRING(type); + STRING(unfinished); + STRING(userdata); + STRING(utf8); + STRING(value); + //STRING(version); + STRING(yes); + + static const QString strextrans(QLatin1String("extra-")); + static const QString strUtf8(QLatin1String("UTF-8")); + + while (!atEnd()) { + readNext(); + if (isStartDocument()) { + // + //qDebug() << attributes(); + } else if (isEndDocument()) { + // + //qDebug() << attributes(); + } else if (isDTD()) { + // + //qDebug() << tokenString(); + } else if (elementStarts(strTS)) { + // + //qDebug() << "TS " << attributes(); + QHash currentLine; + QString currentFile; + + QXmlStreamAttributes atts = attributes(); + //QString version = atts.value(strversion).toString(); + translator.setLanguageCode(atts.value(strlanguage).toString()); + translator.setSourceLanguageCode(atts.value(strsourcelanguage).toString()); + while (!atEnd()) { + readNext(); + if (isEndElement()) { + // found, finish local loop + break; + } else if (isWhiteSpace()) { + // ignore these, just whitespace + } else if (elementStarts(strdefaultcodec)) { + // + const QString &codec = readElementText(); + if (!codec.isEmpty()) + translator.setCodecName(codec.toLatin1()); + // + } else if (isStartElement() + && name().toString().startsWith(strextrans)) { + // + QString tag = name().toString(); + translator.setExtra(tag.mid(6), readContents()); + // + } else if (elementStarts(strcontext)) { + // + QString context; + while (!atEnd()) { + readNext(); + if (isEndElement()) { + // found, finish local loop + break; + } else if (isWhiteSpace()) { + // ignore these, just whitespace + } else if (elementStarts(strname)) { + // + context = readElementText(); + // + } else if (elementStarts(strmessage)) { + // + TranslatorMessage::References refs; + QString currentMsgFile = currentFile; + + TranslatorMessage msg; + msg.setId(attributes().value(strid).toString()); + msg.setContext(context); + msg.setType(TranslatorMessage::Finished); + msg.setPlural(attributes().value(strnumerus) == stryes); + const QStringRef &utf8Attr = attributes().value(strutf8); + msg.setNonUtf8(utf8Attr == strboth); + msg.setUtf8(msg.isNonUtf8() || utf8Attr == strtrue + || attributes().value(strencoding) == strUtf8); + while (!atEnd()) { + readNext(); + if (isEndElement()) { + // found, finish local loop + msg.setReferences(refs); + translator.append(msg); + break; + } else if (isWhiteSpace()) { + // ignore these, just whitespace + } else if (elementStarts(strsource)) { + // ... + msg.setSourceText(readContents()); + } else if (elementStarts(stroldsource)) { + // ... + msg.setOldSourceText(readContents()); + } else if (elementStarts(stroldcomment)) { + // ... + msg.setOldComment(readContents()); + } else if (elementStarts(strextracomment)) { + // ... + msg.setExtraComment(readContents()); + } else if (elementStarts(strtranslatorcomment)) { + // ... + msg.setTranslatorComment(readContents()); + } else if (elementStarts(strlocation)) { + // + QXmlStreamAttributes atts = attributes(); + QString fileName = atts.value(strfilename).toString(); + if (fileName.isEmpty()) { + fileName = currentMsgFile; + } else { + if (refs.isEmpty()) + currentFile = fileName; + currentMsgFile = fileName; + } + const QString lin = atts.value(strline).toString(); + if (lin.isEmpty()) { + translator.setLocationsType(Translator::RelativeLocations); + refs.append(TranslatorMessage::Reference(fileName, -1)); + } else { + bool bOK; + int lineNo = lin.toInt(&bOK); + if (bOK) { + if (lin.startsWith(QLatin1Char('+')) || lin.startsWith(QLatin1Char('-'))) { + lineNo = (currentLine[fileName] += lineNo); + translator.setLocationsType(Translator::RelativeLocations); + } else { + translator.setLocationsType(Translator::AbsoluteLocations); + } + refs.append(TranslatorMessage::Reference(fileName, lineNo)); + } + } + readContents(); + } else if (elementStarts(strcomment)) { + // ... + msg.setComment(readContents()); + } else if (elementStarts(struserdata)) { + // ... + msg.setUserData(readContents()); + } else if (elementStarts(strtranslation)) { + // + QXmlStreamAttributes atts = attributes(); + QStringRef type = atts.value(strtype); + if (type == strunfinished) + msg.setType(TranslatorMessage::Unfinished); + else if (type == strobsolete) + msg.setType(TranslatorMessage::Obsolete); + if (msg.isPlural()) { + QStringList translations; + while (!atEnd()) { + readNext(); + if (isEndElement()) { + break; + } else if (isWhiteSpace()) { + // ignore these, just whitespace + } else if (elementStarts(strnumerusform)) { + translations.append(readTransContents()); + } else { + handleError(); + break; + } + } + msg.setTranslations(translations); + } else { + msg.setTranslation(readTransContents()); + } + // + } else if (isStartElement() + && name().toString().startsWith(strextrans)) { + // + QString tag = name().toString(); + msg.setExtra(tag.mid(6), readContents()); + // + } else { + handleError(); + } + } + // + } else { + handleError(); + } + } + // + } else { + handleError(); + } + } // + } else { + handleError(); + } + } + if (hasError()) { + m_cd.appendError(errorString()); + return false; + } + return true; +} + +static QString numericEntity(int ch) +{ + return QString(ch <= 0x20 ? QLatin1String("") + : QLatin1String("&#x%1;")) .arg(ch, 0, 16); +} + +static QString protect(const QString &str) +{ + QString result; + result.reserve(str.length() * 12 / 10); + for (int i = 0; i != str.size(); ++i) { + uint c = str.at(i).unicode(); + switch (c) { + case '\"': + result += QLatin1String("""); + break; + case '&': + result += QLatin1String("&"); + break; + case '>': + result += QLatin1String(">"); + break; + case '<': + result += QLatin1String("<"); + break; + case '\'': + result += QLatin1String("'"); + break; + default: + if (c < 0x20 && c != '\r' && c != '\n' && c != '\t') + result += numericEntity(c); + else // this also covers surrogates + result += QChar(c); + } + } + return result; +} + +static QString evilBytes(const QString& str, + bool isUtf8, int format, const QByteArray &codecName) +{ + //qDebug() << "EVIL: " << str << isUtf8 << format << codecName; + if (isUtf8) + return protect(str); + if (format == 20) + return protect(str); + if (codecName == "UTF-8") + return protect(str); + QTextCodec *codec = QTextCodec::codecForName(codecName); + if (!codec) + return protect(str); + QString t = QString::fromLatin1(codec->fromUnicode(protect(str)).data()); + int len = (int) t.length(); + QString result; + // FIXME: Factor is sensible only for latin scripts, probably. + result.reserve(t.length() * 2); + for (int k = 0; k < len; k++) { + if (t[k].unicode() >= 0x7f) + result += numericEntity(t[k].unicode()); + else + result += t[k]; + } + return result; +} + +static void writeExtras(QTextStream &t, const char *indent, + const TranslatorMessage::ExtraData &extras, const QRegExp &drops) +{ + for (Translator::ExtraData::ConstIterator it = extras.begin(); it != extras.end(); ++it) { + if (!drops.exactMatch(it.key())) { + t << indent << "' + << protect(it.value()) + << "\n"; + } + } +} + +static void writeVariants(QTextStream &t, const char *indent, const QString &input) +{ + int offset; + if ((offset = input.indexOf(QChar(Translator::BinaryVariantSeparator))) >= 0) { + t << " variants=\"yes\">"; + int start = 0; + forever { + t << "\n " << indent << "" + << protect(input.mid(start, offset - start)) + << ""; + if (offset == input.length()) + break; + start = offset + 1; + offset = input.indexOf(QChar(Translator::BinaryVariantSeparator), start); + if (offset < 0) + offset = input.length(); + } + t << "\n" << indent; + } else { + t << ">" << protect(input); + } +} + +bool saveTS(const Translator &translator, QIODevice &dev, ConversionData &cd, int format) +{ + bool result = true; + QTextStream t(&dev); + t.setCodec(QTextCodec::codecForName("UTF-8")); + bool trIsUtf8 = (translator.codecName() == "UTF-8"); + //qDebug() << translator.codecName(); + bool fileIsUtf8 = (format == 20 || trIsUtf8); + + // The xml prolog allows processors to easily detect the correct encoding + t << "\n\n"; + + if (format == 11) + t << "\n"; + + QByteArray codecName = translator.codecName(); + if (codecName != "ISO-8859-1") + t << "" << codecName << "\n"; + + QRegExp drops(cd.dropTags().join(QLatin1String("|"))); + + if (format == 20) + writeExtras(t, " ", translator.extras(), drops); + + QHash > messageOrder; + QList contextOrder; + foreach (const TranslatorMessage &msg, translator.messages()) { + // no need for such noise + if (msg.type() == TranslatorMessage::Obsolete && msg.translation().isEmpty()) + continue; + + QList &context = messageOrder[msg.context()]; + if (context.isEmpty()) + contextOrder.append(msg.context()); + context.append(msg); + } + if (cd.sortContexts()) + qSort(contextOrder); + + QHash currentLine; + QString currentFile; + foreach (const QString &context, contextOrder) { + const TranslatorMessage &firstMsg = messageOrder[context].first(); + t << "\n"; + + t << " " + << evilBytes(context, firstMsg.isUtf8() || fileIsUtf8, format, codecName) + << "\n"; + foreach (const TranslatorMessage &msg, messageOrder[context]) { + //msg.dump(); + + bool isUtf8 = msg.isUtf8(); + bool second = false; + forever { + + t << " \n"; + if (translator.locationsType() != Translator::NoLocations) { + QString cfile = currentFile; + bool first = true; + foreach (const TranslatorMessage::Reference &ref, msg.allReferences()) { + QString fn = cd.m_targetDir.relativeFilePath(ref.fileName()) + .replace(QLatin1Char('\\'),QLatin1Char('/')); + int ln = ref.lineNumber(); + QString ld; + if (translator.locationsType() == Translator::RelativeLocations) { + if (ln != -1) { + int dlt = ln - currentLine[fn]; + if (dlt >= 0) + ld.append(QLatin1Char('+')); + ld.append(QString::number(dlt)); + currentLine[fn] = ln; + } + + if (fn != cfile) { + if (first) + currentFile = fn; + cfile = fn; + } else { + fn.clear(); + } + first = false; + } else { + if (ln != -1) + ld = QString::number(ln); + } + t << " \n"; + } + } + + t << " " + << evilBytes(msg.sourceText(), isUtf8, format, codecName) + << "\n"; + + if (format != 11 && !msg.oldSourceText().isEmpty()) + t << " " << protect(msg.oldSourceText()) << "\n"; + + if (!msg.comment().isEmpty()) { + t << " " + << evilBytes(msg.comment(), isUtf8, format, codecName) + << "\n"; + } + + if (format != 11) { + + if (!msg.oldComment().isEmpty()) + t << " " << protect(msg.oldComment()) << "\n"; + + if (!msg.extraComment().isEmpty()) + t << " " << protect(msg.extraComment()) + << "\n"; + + if (!msg.translatorComment().isEmpty()) + t << " " << protect(msg.translatorComment()) + << "\n"; + + } + + t << " "; + const QStringList &translns = msg.translations(); + for (int j = 0; j < translns.count(); ++j) { + t << "\n "; + } + t << "\n "; + } else { + writeVariants(t, " ", msg.translation()); + } + t << "\n"; + + if (format != 11) + writeExtras(t, " ", msg.extras(), drops); + + if (!msg.userData().isEmpty()) + t << " " << msg.userData() << "\n"; + t << " \n"; + + if (format != 11 || second || !msg.isUtf8() || !msg.isNonUtf8()) + break; + isUtf8 = false; + second = true; + } + } + t << "\n"; + } + + t << "\n"; + return result; +} + +bool loadTS(Translator &translator, QIODevice &dev, ConversionData &cd) +{ + translator.setLocationsType(Translator::NoLocations); + TSReader reader(dev, cd); + return reader.read(translator); +} + +bool saveTS11(const Translator &translator, QIODevice &dev, ConversionData &cd) +{ + return saveTS(translator, dev, cd, 11); +} + +bool saveTS20(const Translator &translator, QIODevice &dev, ConversionData &cd) +{ + return saveTS(translator, dev, cd, 20); +} + +int initTS() +{ + Translator::FileFormat format; + + format.extension = QLatin1String("ts11"); + format.fileType = Translator::FileFormat::TranslationSource; + format.priority = -1; + format.description = QObject::tr("Qt translation sources (format 1.1)"); + format.loader = &loadTS; + format.saver = &saveTS11; + Translator::registerFileFormat(format); + + format.extension = QLatin1String("ts20"); + format.fileType = Translator::FileFormat::TranslationSource; + format.priority = -1; + format.description = QObject::tr("Qt translation sources (format 2.0)"); + format.loader = &loadTS; + format.saver = &saveTS20; + Translator::registerFileFormat(format); + + // "ts" is always the latest. right now it's ts20. + format.extension = QLatin1String("ts"); + format.fileType = Translator::FileFormat::TranslationSource; + format.priority = 0; + format.description = QObject::tr("Qt translation sources (latest format)"); + format.loader = &loadTS; + format.saver = &saveTS20; + Translator::registerFileFormat(format); + + return 1; +} + +Q_CONSTRUCTOR_FUNCTION(initTS) + +QT_END_NAMESPACE