diff -r 000000000000 -r 1918ee327afb tools/linguist/shared/qm.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/linguist/shared/qm.cpp Mon Jan 11 14:00:40 2010 +0000 @@ -0,0 +1,788 @@ +/**************************************************************************** +** +** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies). +** All rights reserved. +** Contact: Nokia Corporation (qt-info@nokia.com) +** +** This file is part of the Qt Linguist of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL$ +** No Commercial Usage +** This file contains pre-release code and may not be distributed. +** You may use this file in accordance with the terms and conditions +** contained in the Technology Preview License Agreement accompanying +** this package. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 2.1 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPL included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 2.1 requirements +** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. +** +** In addition, as a special exception, Nokia gives you certain additional +** rights. These rights are described in the Nokia Qt LGPL Exception +** version 1.1, included in the file LGPL_EXCEPTION.txt in this package. +** +** If you have questions regarding the use of this file, please contact +** Nokia at qt-info@nokia.com. +** +** +** +** +** +** +** +** +** $QT_END_LICENSE$ +** +****************************************************************************/ + +#include "translator.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +QT_BEGIN_NAMESPACE + +// magic number for the file +static const int MagicLength = 16; +static const uchar magic[MagicLength] = { + 0x3c, 0xb8, 0x64, 0x18, 0xca, 0xef, 0x9c, 0x95, + 0xcd, 0x21, 0x1c, 0xbf, 0x60, 0xa1, 0xbd, 0xdd +}; + + +namespace { + +enum Tag { + Tag_End = 1, + Tag_SourceText16 = 2, + Tag_Translation = 3, + Tag_Context16 = 4, + Tag_Obsolete1 = 5, + Tag_SourceText = 6, + Tag_Context = 7, + Tag_Comment = 8, + Tag_Obsolete2 = 9 +}; + +enum Prefix { + NoPrefix, + Hash, + HashContext, + HashContextSourceText, + HashContextSourceTextComment +}; + +} // namespace anon + +static uint elfHash(const QByteArray &ba) +{ + const uchar *k = (const uchar *)ba.data(); + uint h = 0; + uint g; + + if (k) { + while (*k) { + h = (h << 4) + *k++; + if ((g = (h & 0xf0000000)) != 0) + h ^= g >> 24; + h &= ~g; + } + } + if (!h) + h = 1; + return h; +} + +class ByteTranslatorMessage +{ +public: + ByteTranslatorMessage( + const QByteArray &context, + const QByteArray &sourceText, + const QByteArray &comment, + const QStringList &translations) : + m_context(context), + m_sourcetext(sourceText), + m_comment(comment), + m_translations(translations) + {} + const QByteArray &context() const { return m_context; } + const QByteArray &sourceText() const { return m_sourcetext; } + const QByteArray &comment() const { return m_comment; } + const QStringList &translations() const { return m_translations; } + bool operator<(const ByteTranslatorMessage& m) const; + +private: + QByteArray m_context; + QByteArray m_sourcetext; + QByteArray m_comment; + QStringList m_translations; +}; + +Q_DECLARE_TYPEINFO(ByteTranslatorMessage, Q_MOVABLE_TYPE); + +bool ByteTranslatorMessage::operator<(const ByteTranslatorMessage& m) const +{ + if (m_context != m.m_context) + return m_context < m.m_context; + if (m_sourcetext != m.m_sourcetext) + return m_sourcetext < m.m_sourcetext; + return m_comment < m.m_comment; +} + +class Releaser +{ +public: + struct Offset { + Offset() + : h(0), o(0) + {} + Offset(uint hash, uint offset) + : h(hash), o(offset) + {} + + bool operator<(const Offset &other) const { + return (h != other.h) ? h < other.h : o < other.o; + } + bool operator==(const Offset &other) const { + return h == other.h && o == other.o; + } + uint h; + uint o; + }; + + enum { Contexts = 0x2f, Hashes = 0x42, Messages = 0x69, NumerusRules = 0x88 }; + + Releaser() : m_codec(0) {} + + void setCodecName(const QByteArray &codecName) + { + m_codec = QTextCodec::codecForName(codecName); + } + + bool save(QIODevice *iod); + + void insert(const TranslatorMessage &msg, bool forceComment); + void insertIdBased(const TranslatorMessage &message); + + void squeeze(TranslatorSaveMode mode); + + void setNumerusRules(const QByteArray &rules); + +private: + Q_DISABLE_COPY(Releaser) + + // This should reproduce the byte array fetched from the source file, which + // on turn should be the same as passed to the actual tr(...) calls + QByteArray originalBytes(const QString &str, bool isUtf8) const; + + void insertInternal(const TranslatorMessage &message, bool forceComment, bool isUtf8); + + static Prefix commonPrefix(const ByteTranslatorMessage &m1, const ByteTranslatorMessage &m2); + + static uint msgHash(const ByteTranslatorMessage &msg); + + void writeMessage(const ByteTranslatorMessage & msg, QDataStream & stream, + TranslatorSaveMode strip, Prefix prefix) const; + + // for squeezed but non-file data, this is what needs to be deleted + QByteArray m_messageArray; + QByteArray m_offsetArray; + QByteArray m_contextArray; + QMap m_messages; + QByteArray m_numerusRules; + + // Used to reproduce the original bytes + QTextCodec *m_codec; +}; + +QByteArray Releaser::originalBytes(const QString &str, bool isUtf8) const +{ + if (str.isEmpty()) { + // Do not use QByteArray() here as the result of the serialization + // will be different. + return QByteArray(""); + } + if (isUtf8) + return str.toUtf8(); + return m_codec ? m_codec->fromUnicode(str) : str.toLatin1(); +} + +uint Releaser::msgHash(const ByteTranslatorMessage &msg) +{ + return elfHash(msg.sourceText() + msg.comment()); +} + +Prefix Releaser::commonPrefix(const ByteTranslatorMessage &m1, const ByteTranslatorMessage &m2) +{ + if (msgHash(m1) != msgHash(m2)) + return NoPrefix; + if (m1.context() != m2.context()) + return Hash; + if (m1.sourceText() != m2.sourceText()) + return HashContext; + if (m1.comment() != m2.comment()) + return HashContextSourceText; + return HashContextSourceTextComment; +} + +void Releaser::writeMessage(const ByteTranslatorMessage &msg, QDataStream &stream, + TranslatorSaveMode mode, Prefix prefix) const +{ + for (int i = 0; i < msg.translations().count(); ++i) + stream << quint8(Tag_Translation) << msg.translations().at(i); + + if (mode == SaveEverything) + prefix = HashContextSourceTextComment; + + // lrelease produces "wrong" QM files for QByteArrays that are .isNull(). + switch (prefix) { + default: + case HashContextSourceTextComment: + stream << quint8(Tag_Comment) << msg.comment(); + // fall through + case HashContextSourceText: + stream << quint8(Tag_SourceText) << msg.sourceText(); + // fall through + case HashContext: + stream << quint8(Tag_Context) << msg.context(); + break; + } + + stream << quint8(Tag_End); +} + + +bool Releaser::save(QIODevice *iod) +{ + QDataStream s(iod); + s.writeRawData((const char *)magic, MagicLength); + + if (!m_offsetArray.isEmpty()) { + quint32 oas = quint32(m_offsetArray.size()); + s << quint8(Hashes) << oas; + s.writeRawData(m_offsetArray.constData(), oas); + } + if (!m_messageArray.isEmpty()) { + quint32 mas = quint32(m_messageArray.size()); + s << quint8(Messages) << mas; + s.writeRawData(m_messageArray.constData(), mas); + } + if (!m_contextArray.isEmpty()) { + quint32 cas = quint32(m_contextArray.size()); + s << quint8(Contexts) << cas; + s.writeRawData(m_contextArray.constData(), cas); + } + if (!m_numerusRules.isEmpty()) { + quint32 nrs = m_numerusRules.size(); + s << quint8(NumerusRules) << nrs; + s.writeRawData(m_numerusRules.constData(), nrs); + } + return true; +} + +void Releaser::squeeze(TranslatorSaveMode mode) +{ + if (m_messages.isEmpty() && mode == SaveEverything) + return; + + QMap messages = m_messages; + + // re-build contents + m_messageArray.clear(); + m_offsetArray.clear(); + m_contextArray.clear(); + m_messages.clear(); + + QMap offsets; + + QDataStream ms(&m_messageArray, QIODevice::WriteOnly); + QMap::const_iterator it, next; + int cpPrev = 0, cpNext = 0; + for (it = messages.constBegin(); it != messages.constEnd(); ++it) { + cpPrev = cpNext; + next = it; + ++next; + if (next == messages.constEnd()) + cpNext = 0; + else + cpNext = commonPrefix(it.key(), next.key()); + offsets.insert(Offset(msgHash(it.key()), ms.device()->pos()), (void *)0); + writeMessage(it.key(), ms, mode, Prefix(qMax(cpPrev, cpNext + 1))); + } + + QMap::Iterator offset; + offset = offsets.begin(); + QDataStream ds(&m_offsetArray, QIODevice::WriteOnly); + while (offset != offsets.end()) { + Offset k = offset.key(); + ++offset; + ds << quint32(k.h) << quint32(k.o); + } + + if (mode == SaveStripped) { + QMap contextSet; + for (it = messages.constBegin(); it != messages.constEnd(); ++it) + ++contextSet[it.key().context()]; + + quint16 hTableSize; + if (contextSet.size() < 200) + hTableSize = (contextSet.size() < 60) ? 151 : 503; + else if (contextSet.size() < 2500) + hTableSize = (contextSet.size() < 750) ? 1511 : 5003; + else + hTableSize = (contextSet.size() < 10000) ? 15013 : 3 * contextSet.size() / 2; + + QMultiMap hashMap; + QMap::const_iterator c; + for (c = contextSet.constBegin(); c != contextSet.constEnd(); ++c) + hashMap.insert(elfHash(c.key()) % hTableSize, c.key()); + + /* + The contexts found in this translator are stored in a hash + table to provide fast lookup. The context array has the + following format: + + quint16 hTableSize; + quint16 hTable[hTableSize]; + quint8 contextPool[...]; + + The context pool stores the contexts as Pascal strings: + + quint8 len; + quint8 data[len]; + + Let's consider the look-up of context "FunnyDialog". A + hash value between 0 and hTableSize - 1 is computed, say h. + If hTable[h] is 0, "FunnyDialog" is not covered by this + translator. Else, we check in the contextPool at offset + 2 * hTable[h] to see if "FunnyDialog" is one of the + contexts stored there, until we find it or we meet the + empty string. + */ + m_contextArray.resize(2 + (hTableSize << 1)); + QDataStream t(&m_contextArray, QIODevice::WriteOnly); + + quint16 *hTable = new quint16[hTableSize]; + memset(hTable, 0, hTableSize * sizeof(quint16)); + + t << hTableSize; + t.device()->seek(2 + (hTableSize << 1)); + t << quint16(0); // the entry at offset 0 cannot be used + uint upto = 2; + + QMap::const_iterator entry = hashMap.constBegin(); + while (entry != hashMap.constEnd()) { + int i = entry.key(); + hTable[i] = quint16(upto >> 1); + + do { + const char *con = entry.value().constData(); + uint len = uint(entry.value().length()); + len = qMin(len, 255u); + t << quint8(len); + t.writeRawData(con, len); + upto += 1 + len; + ++entry; + } while (entry != hashMap.constEnd() && entry.key() == i); + if (upto & 0x1) { + // offsets have to be even + t << quint8(0); // empty string + ++upto; + } + } + t.device()->seek(2); + for (int j = 0; j < hTableSize; j++) + t << hTable[j]; + delete [] hTable; + + if (upto > 131072) { + qWarning("Releaser::squeeze: Too many contexts"); + m_contextArray.clear(); + } + } +} + +void Releaser::insertInternal(const TranslatorMessage &message, bool forceComment, bool isUtf8) +{ + ByteTranslatorMessage bmsg(originalBytes(message.context(), isUtf8), + originalBytes(message.sourceText(), isUtf8), + originalBytes(message.comment(), isUtf8), + message.translations()); + if (!forceComment) { + ByteTranslatorMessage bmsg2( + bmsg.context(), bmsg.sourceText(), QByteArray(""), bmsg.translations()); + if (!m_messages.contains(bmsg2)) { + m_messages.insert(bmsg2, 0); + return; + } + } + m_messages.insert(bmsg, 0); +} + +void Releaser::insert(const TranslatorMessage &message, bool forceComment) +{ + insertInternal(message, forceComment, message.isUtf8()); + if (message.isUtf8() && message.isNonUtf8()) + insertInternal(message, forceComment, false); +} + +void Releaser::insertIdBased(const TranslatorMessage &message) +{ + QStringList tlns = message.translations(); + if (message.type() == TranslatorMessage::Unfinished) + for (int i = 0; i < tlns.size(); ++i) + if (tlns.at(i).isEmpty()) + tlns[i] = message.sourceText(); + ByteTranslatorMessage bmsg("", originalBytes(message.id(), false), "", tlns); + m_messages.insert(bmsg, 0); +} + +void Releaser::setNumerusRules(const QByteArray &rules) +{ + m_numerusRules = rules; +} + +static quint8 read8(const uchar *data) +{ + return *data; +} + +static quint32 read32(const uchar *data) +{ + return (data[0] << 24) | (data[1] << 16) | (data[2] << 8) | (data[3]); +} + +static void fromBytes(const char *str, int len, QTextCodec *codec, QTextCodec *utf8Codec, + QString *out, QString *utf8Out, + bool *isSystem, bool *isUtf8, bool *needs8Bit) +{ + for (int i = 0; i < len; ++i) + if (str[i] & 0x80) { + if (utf8Codec) { + QTextCodec::ConverterState cvtState; + *utf8Out = utf8Codec->toUnicode(str, len, &cvtState); + *isUtf8 = !cvtState.invalidChars; + } + QTextCodec::ConverterState cvtState; + *out = codec->toUnicode(str, len, &cvtState); + *isSystem = !cvtState.invalidChars; + *needs8Bit = true; + return; + } + *out = QString::fromLatin1(str, len); + *isSystem = true; + if (utf8Codec) { + *utf8Out = *out; + *isUtf8 = true; + } + *needs8Bit = false; +} + +bool loadQM(Translator &translator, QIODevice &dev, ConversionData &cd) +{ + QByteArray ba = dev.readAll(); + const uchar *data = (uchar*)ba.data(); + int len = ba.size(); + if (len < MagicLength || memcmp(data, magic, MagicLength) != 0) { + cd.appendError(QLatin1String("QM-Format error: magic marker missing")); + return false; + } + + enum { Contexts = 0x2f, Hashes = 0x42, Messages = 0x69, NumerusRules = 0x88 }; + + // for squeezed but non-file data, this is what needs to be deleted + const uchar *messageArray = 0; + const uchar *offsetArray = 0; + const uchar *contextArray = 0; + const uchar *numerusRulesArray = 0; + uint messageLength = 0; + uint offsetLength = 0; + uint contextLength = 0; + uint numerusRulesLength = 0; + + bool ok = true; + const uchar *end = data + len; + + data += MagicLength; + + while (data < end - 4) { + quint8 tag = read8(data++); + quint32 blockLen = read32(data); + //qDebug() << "TAG:" << tag << "BLOCKLEN:" << blockLen; + data += 4; + if (!tag || !blockLen) + break; + if (data + blockLen > end) { + ok = false; + break; + } + + if (tag == Contexts) { + contextArray = data; + contextLength = blockLen; + //qDebug() << "CONTEXTS: " << contextLength << QByteArray((const char *)contextArray, contextLength).toHex(); + } else if (tag == Hashes) { + offsetArray = data; + offsetLength = blockLen; + //qDebug() << "HASHES: " << offsetLength << QByteArray((const char *)offsetArray, offsetLength).toHex(); + } else if (tag == Messages) { + messageArray = data; + messageLength = blockLen; + //qDebug() << "MESSAGES: " << messageLength << QByteArray((const char *)messageArray, messageLength).toHex(); + } else if (tag == NumerusRules) { + numerusRulesArray = data; + numerusRulesLength = blockLen; + //qDebug() << "NUMERUSRULES: " << numerusRulesLength << QByteArray((const char *)numerusRulesArray, numerusRulesLength).toHex(); + } + + data += blockLen; + } + + + size_t numItems = offsetLength / (2 * sizeof(quint32)); + //qDebug() << "NUMITEMS: " << numItems; + + QTextCodec *codec = QTextCodec::codecForName( + cd.m_codecForSource.isEmpty() ? QByteArray("Latin1") : cd.m_codecForSource); + QTextCodec *utf8Codec = 0; + if (codec->name() != "UTF-8") + utf8Codec = QTextCodec::codecForName("UTF-8"); + + QString strProN = QLatin1String("%n"); + QLocale::Language l; + QLocale::Country c; + Translator::languageAndCountry(translator.languageCode(), &l, &c); + QStringList numerusForms; + bool guessPlurals = true; + if (getNumerusInfo(l, c, 0, &numerusForms)) + guessPlurals = (numerusForms.count() == 1); + + QString context, contextUtf8; + bool contextIsSystem, contextIsUtf8, contextNeeds8Bit; + QString sourcetext, sourcetextUtf8; + bool sourcetextIsSystem, sourcetextIsUtf8, sourcetextNeeds8Bit; + QString comment, commentUtf8; + bool commentIsSystem, commentIsUtf8, commentNeeds8Bit; + QStringList translations; + + for (const uchar *start = offsetArray; start != offsetArray + (numItems << 3); start += 8) { + //quint32 hash = read32(start); + quint32 ro = read32(start + 4); + //qDebug() << "\nHASH:" << hash; + const uchar *m = messageArray + ro; + + for (;;) { + uchar tag = read8(m++); + //qDebug() << "Tag:" << tag << " ADDR: " << m; + switch(tag) { + case Tag_End: + goto end; + case Tag_Translation: { + int len = read32(m); + if (len % 1) { + cd.appendError(QLatin1String("QM-Format error")); + return false; + } + m += 4; + QString str = QString::fromUtf16((const ushort *)m, len/2); + if (QSysInfo::ByteOrder == QSysInfo::LittleEndian) { + for (int i = 0; i < str.length(); ++i) + str[i] = QChar((str.at(i).unicode() >> 8) + + ((str.at(i).unicode() << 8) & 0xff00)); + } + translations << str; + m += len; + break; + } + case Tag_Obsolete1: + m += 4; + //qDebug() << "OBSOLETE"; + break; + case Tag_SourceText: { + quint32 len = read32(m); + m += 4; + //qDebug() << "SOURCE LEN: " << len; + //qDebug() << "SOURCE: " << QByteArray((const char*)m, len); + fromBytes((const char*)m, len, codec, utf8Codec, + &sourcetext, &sourcetextUtf8, + &sourcetextIsSystem, &sourcetextIsUtf8, &sourcetextNeeds8Bit); + m += len; + break; + } + case Tag_Context: { + quint32 len = read32(m); + m += 4; + //qDebug() << "CONTEXT LEN: " << len; + //qDebug() << "CONTEXT: " << QByteArray((const char*)m, len); + fromBytes((const char*)m, len, codec, utf8Codec, + &context, &contextUtf8, + &contextIsSystem, &contextIsUtf8, &contextNeeds8Bit); + m += len; + break; + } + case Tag_Comment: { + quint32 len = read32(m); + m += 4; + //qDebug() << "COMMENT LEN: " << len; + //qDebug() << "COMMENT: " << QByteArray((const char*)m, len); + fromBytes((const char*)m, len, codec, utf8Codec, + &comment, &commentUtf8, + &commentIsSystem, &commentIsUtf8, &commentNeeds8Bit); + m += len; + break; + } + default: + //qDebug() << "UNKNOWN TAG" << tag; + break; + } + } + end:; + TranslatorMessage msg; + msg.setType(TranslatorMessage::Finished); + if (translations.count() > 1) { + // If guessPlurals is not false here, plural form discard messages + // will be spewn out later. + msg.setPlural(true); + } else if (guessPlurals) { + // This might cause false positives, so it is a fallback only. + if (sourcetext.contains(strProN)) + msg.setPlural(true); + } + msg.setTranslations(translations); + translations.clear(); + if (contextNeeds8Bit || sourcetextNeeds8Bit || commentNeeds8Bit) { + if (utf8Codec && contextIsUtf8 && sourcetextIsUtf8 && commentIsUtf8) { + // The message is utf-8, but file is not. + msg.setUtf8(true); + msg.setContext(contextUtf8); + msg.setSourceText(sourcetextUtf8); + msg.setComment(commentUtf8); + translator.append(msg); + continue; + } + if (!(contextIsSystem && sourcetextIsSystem && commentIsSystem)) { + cd.appendError(QLatin1String( + "Cannot read file with specified input codec")); + return false; + } + // The message is 8-bit in the file's encoding (utf-8 or not). + } + msg.setContext(context); + msg.setSourceText(sourcetext); + msg.setComment(comment); + translator.append(msg); + } + return ok; +} + + + +static bool saveQM(const Translator &translator, QIODevice &dev, ConversionData &cd) +{ + Releaser releaser; + QLocale::Language l; + QLocale::Country c; + Translator::languageAndCountry(translator.languageCode(), &l, &c); + QByteArray rules; + if (getNumerusInfo(l, c, &rules, 0)) + releaser.setNumerusRules(rules); + releaser.setCodecName(translator.codecName()); + + int finished = 0; + int unfinished = 0; + int untranslated = 0; + int missingIds = 0; + int droppedData = 0; + + for (int i = 0; i != translator.messageCount(); ++i) { + const TranslatorMessage &msg = translator.message(i); + TranslatorMessage::Type typ = msg.type(); + if (typ != TranslatorMessage::Obsolete) { + if (cd.m_idBased && msg.id().isEmpty()) { + ++missingIds; + continue; + } + if (typ == TranslatorMessage::Unfinished) { + if (!cd.m_idBased && msg.translation().isEmpty()) { + ++untranslated; + continue; + } else { + if (cd.ignoreUnfinished()) + continue; + ++unfinished; + } + } else { + ++finished; + } + if (cd.m_idBased) { + if (!msg.context().isEmpty() || !msg.comment().isEmpty()) + ++droppedData; + releaser.insertIdBased(msg); + } else { + // Drop the comment in (context, sourceText, comment), + // unless the context is empty, + // unless (context, sourceText, "") already exists or + // unless we already dropped the comment of (context, + // sourceText, comment0). + bool forceComment = + msg.comment().isEmpty() + || msg.context().isEmpty() + || translator.contains(msg.context(), msg.sourceText(), QString()); + releaser.insert(msg, forceComment); + } + } + } + + if (missingIds) + cd.appendError(QCoreApplication::translate("LRelease", + "Dropped %n message(s) which had no ID.", 0, + QCoreApplication::CodecForTr, missingIds)); + if (droppedData) + cd.appendError(QCoreApplication::translate("LRelease", + "Excess context/disambiguation dropped from %n message(s).", 0, + QCoreApplication::CodecForTr, droppedData)); + + releaser.squeeze(cd.m_saveMode); + bool saved = releaser.save(&dev); + if (saved && cd.isVerbose()) { + int generatedCount = finished + unfinished; + cd.appendError(QCoreApplication::translate("LRelease", + " Generated %n translation(s) (%1 finished and %2 unfinished)\n", 0, + QCoreApplication::CodecForTr, generatedCount).arg(finished).arg(unfinished)); + if (untranslated) + cd.appendError(QCoreApplication::translate("LRelease", + " Ignored %n untranslated source text(s)\n", 0, + QCoreApplication::CodecForTr, untranslated)); + } + return saved; +} + +int initQM() +{ + Translator::FileFormat format; + + format.extension = QLatin1String("qm"); + format.description = QObject::tr("Compiled Qt translations"); + format.fileType = Translator::FileFormat::TranslationBinary; + format.priority = 0; + format.loader = &loadQM; + format.saver = &saveQM; + Translator::registerFileFormat(format); + + return 1; +} + +Q_CONSTRUCTOR_FUNCTION(initQM) + +QT_END_NAMESPACE