diff -r 000000000000 -r 1918ee327afb tools/linguist/lupdate/java.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/linguist/lupdate/java.cpp Mon Jan 11 14:00:40 2010 +0000 @@ -0,0 +1,642 @@ +/**************************************************************************** +** +** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies). +** All rights reserved. +** Contact: Nokia Corporation (qt-info@nokia.com) +** +** This file is part of the Qt Linguist of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL$ +** No Commercial Usage +** This file contains pre-release code and may not be distributed. +** You may use this file in accordance with the terms and conditions +** contained in the Technology Preview License Agreement accompanying +** this package. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 2.1 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPL included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 2.1 requirements +** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. +** +** In addition, as a special exception, Nokia gives you certain additional +** rights. These rights are described in the Nokia Qt LGPL Exception +** version 1.1, included in the file LGPL_EXCEPTION.txt in this package. +** +** If you have questions regarding the use of this file, please contact +** Nokia at qt-info@nokia.com. +** +** +** +** +** +** +** +** +** $QT_END_LICENSE$ +** +****************************************************************************/ + +#include "lupdate.h" + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include + +QT_BEGIN_NAMESPACE + +enum { Tok_Eof, Tok_class, Tok_return, Tok_tr, + Tok_translate, Tok_Ident, Tok_Package, + Tok_Comment, Tok_String, Tok_Colon, Tok_Dot, + Tok_LeftBrace, Tok_RightBrace, Tok_LeftParen, + Tok_RightParen, Tok_Comma, Tok_Semicolon, + Tok_Integer, Tok_Plus, Tok_PlusPlus, Tok_PlusEq, Tok_null }; + +class Scope +{ + public: + QString name; + enum Type {Clazz, Function, Other} type; + int line; + + Scope(const QString & name, Type type, int line) : + name(name), + type(type), + line(line) + {} + + ~Scope() + {} +}; + +/* + The tokenizer maintains the following global variables. The names + should be self-explanatory. +*/ + +static QString yyFileName; +static QChar yyCh; +static QString yyIdent; +static QString yyComment; +static QString yyString; + + +static qlonglong yyInteger; +static int yyParenDepth; +static int yyLineNo; +static int yyCurLineNo; +static int yyParenLineNo; +static int yyTok; + +// the string to read from and current position in the string +static QString yyInStr; +static int yyInPos; + +// The parser maintains the following global variables. +static QString yyPackage; +static QStack yyScope; +static QString yyDefaultContext; + +static QChar getChar() +{ + if (yyInPos >= yyInStr.size()) + return EOF; + QChar c = yyInStr[yyInPos++]; + if (c.unicode() == '\n') + ++yyCurLineNo; + return c.unicode(); +} + +static int getToken() +{ + const char tab[] = "bfnrt\"\'\\"; + const char backTab[] = "\b\f\n\r\t\"\'\\"; + + yyIdent.clear(); + yyComment.clear(); + yyString.clear(); + + while ( yyCh != EOF ) { + yyLineNo = yyCurLineNo; + + if ( yyCh.isLetter() || yyCh.toLatin1() == '_' ) { + do { + yyIdent.append(yyCh); + yyCh = getChar(); + } while ( yyCh.isLetterOrNumber() || yyCh.toLatin1() == '_' ); + + if (yyTok != Tok_Dot) { + switch ( yyIdent.at(0).toLatin1() ) { + case 'r': + if ( yyIdent == QLatin1String("return") ) + return Tok_return; + break; + case 'c': + if ( yyIdent == QLatin1String("class") ) + return Tok_class; + break; + case 'n': + if ( yyIdent == QLatin1String("null") ) + return Tok_null; + break; + } + } + switch ( yyIdent.at(0).toLatin1() ) { + case 'T': + // TR() for when all else fails + if ( yyIdent == QLatin1String("TR") ) + return Tok_tr; + break; + case 'p': + if( yyIdent == QLatin1String("package") ) + return Tok_Package; + break; + case 't': + if ( yyIdent == QLatin1String("tr") ) + return Tok_tr; + if ( yyIdent == QLatin1String("translate") ) + return Tok_translate; + } + return Tok_Ident; + } else { + switch ( yyCh.toLatin1() ) { + + case '/': + yyCh = getChar(); + if ( yyCh == QLatin1Char('/') ) { + do { + yyCh = getChar(); + if (yyCh == EOF) + break; + yyComment.append(yyCh); + } while (yyCh != QLatin1Char('\n')); + return Tok_Comment; + + } else if ( yyCh == QLatin1Char('*') ) { + bool metAster = false; + bool metAsterSlash = false; + + while ( !metAsterSlash ) { + yyCh = getChar(); + if ( yyCh == EOF ) { + qFatal( "%s: Unterminated Java comment starting at" + " line %d\n", + qPrintable(yyFileName), yyLineNo ); + + return Tok_Comment; + } + + yyComment.append( yyCh ); + + if ( yyCh == QLatin1Char('*') ) + metAster = true; + else if ( metAster && yyCh == QLatin1Char('/') ) + metAsterSlash = true; + else + metAster = false; + } + yyComment.chop(2); + yyCh = getChar(); + + return Tok_Comment; + } + break; + case '"': + yyCh = getChar(); + + while ( yyCh != EOF && yyCh != QLatin1Char('\n') && yyCh != QLatin1Char('"') ) { + if ( yyCh == QLatin1Char('\\') ) { + yyCh = getChar(); + if ( yyCh == QLatin1Char('u') ) { + yyCh = getChar(); + uint unicode(0); + for (int i = 4; i > 0; --i) { + unicode = unicode << 4; + if( yyCh.isDigit() ) { + unicode += yyCh.digitValue(); + } + else { + int sub(yyCh.toLower().toAscii() - 87); + if( sub > 15 || sub < 10) { + qFatal( "%s:%d: Invalid Unicode", + qPrintable(yyFileName), yyLineNo ); + } + unicode += sub; + } + yyCh = getChar(); + } + yyString.append(QChar(unicode)); + } + else if ( yyCh == QLatin1Char('\n') ) { + yyCh = getChar(); + } + else { + yyString.append( QLatin1Char(backTab[strchr( tab, yyCh.toAscii() ) - tab]) ); + yyCh = getChar(); + } + } else { + yyString.append(yyCh); + yyCh = getChar(); + } + } + + if ( yyCh != QLatin1Char('"') ) + qFatal( "%s:%d: Unterminated string", + qPrintable(yyFileName), yyLineNo ); + + yyCh = getChar(); + + return Tok_String; + + case ':': + yyCh = getChar(); + return Tok_Colon; + case '\'': + yyCh = getChar(); + + if ( yyCh == QLatin1Char('\\') ) + yyCh = getChar(); + do { + yyCh = getChar(); + } while ( yyCh != EOF && yyCh != QLatin1Char('\'') ); + yyCh = getChar(); + break; + case '{': + yyCh = getChar(); + return Tok_LeftBrace; + case '}': + yyCh = getChar(); + return Tok_RightBrace; + case '(': + if (yyParenDepth == 0) + yyParenLineNo = yyCurLineNo; + yyParenDepth++; + yyCh = getChar(); + return Tok_LeftParen; + case ')': + if (yyParenDepth == 0) + yyParenLineNo = yyCurLineNo; + yyParenDepth--; + yyCh = getChar(); + return Tok_RightParen; + case ',': + yyCh = getChar(); + return Tok_Comma; + case '.': + yyCh = getChar(); + return Tok_Dot; + case ';': + yyCh = getChar(); + return Tok_Semicolon; + case '+': + yyCh = getChar(); + if (yyCh == QLatin1Char('+')) { + yyCh = getChar(); + return Tok_PlusPlus; + } + if( yyCh == QLatin1Char('=') ){ + yyCh = getChar(); + return Tok_PlusEq; + } + return Tok_Plus; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + { + QByteArray ba; + ba += yyCh.toLatin1(); + yyCh = getChar(); + bool hex = yyCh == QLatin1Char('x'); + if ( hex ) { + ba += yyCh.toLatin1(); + yyCh = getChar(); + } + while ( hex ? isxdigit(yyCh.toLatin1()) : yyCh.isDigit() ) { + ba += yyCh.toLatin1(); + yyCh = getChar(); + } + bool ok; + yyInteger = ba.toLongLong(&ok); + if (ok) return Tok_Integer; + break; + } + default: + yyCh = getChar(); + } + } + } + return Tok_Eof; +} + +static bool match( int t ) +{ + bool matches = ( yyTok == t ); + if ( matches ) + yyTok = getToken(); + return matches; +} + +static bool matchString( QString &s ) +{ + if ( yyTok != Tok_String ) + return false; + + s = yyString; + yyTok = getToken(); + while ( yyTok == Tok_Plus ) { + yyTok = getToken(); + if (yyTok == Tok_String) + s += yyString; + else { + qWarning( "%s:%d: String used in translation can only contain strings" + " concatenated with other strings, not expressions or numbers.", + qPrintable(yyFileName), yyLineNo ); + return false; + } + yyTok = getToken(); + } + return true; +} + +static bool matchStringOrNull(QString &s) +{ + bool matches = matchString(s); + if (!matches) { + matches = (yyTok == Tok_null); + if (matches) + yyTok = getToken(); + } + return matches; +} + +/* + * match any expression that can return a number, which can be + * 1. Literal number (e.g. '11') + * 2. simple identifier (e.g. 'm_count') + * 3. simple function call (e.g. 'size()' ) + * 4. function call on an object (e.g. 'list.size()') + * 5. function call on an object (e.g. 'list->size()') + * + * Other cases: + * size(2,4) + * list().size() + * list(a,b).size(2,4) + * etc... + */ +static bool matchExpression() +{ + if (match(Tok_Integer)) { + return true; + } + + int parenlevel = 0; + while (match(Tok_Ident) || parenlevel > 0) { + if (yyTok == Tok_RightParen) { + if (parenlevel == 0) break; + --parenlevel; + yyTok = getToken(); + } else if (yyTok == Tok_LeftParen) { + yyTok = getToken(); + if (yyTok == Tok_RightParen) { + yyTok = getToken(); + } else { + ++parenlevel; + } + } else if (yyTok == Tok_Ident) { + continue; + } else if (parenlevel == 0) { + return false; + } + } + return true; +} + +static const QString context() +{ + QString context(yyPackage); + bool innerClass = false; + for (int i = 0; i < yyScope.size(); ++i) { + if (yyScope.at(i)->type == Scope::Clazz) { + if (innerClass) + context.append(QLatin1String("$")); + else + context.append(QLatin1String(".")); + + context.append(yyScope.at(i)->name); + innerClass = true; + } + } + return context.isEmpty() ? yyDefaultContext : context; +} + +static void recordMessage( + Translator *tor, const QString &context, const QString &text, const QString &comment, + const QString &extracomment, bool plural) +{ + TranslatorMessage msg( + context, text, comment, QString(), + yyFileName, yyLineNo, QStringList(), + TranslatorMessage::Unfinished, plural); + msg.setExtraComment(extracomment.simplified()); + tor->extend(msg); +} + +static void parse( Translator *tor ) +{ + QString text; + QString com; + QString extracomment; + + yyCh = getChar(); + + yyTok = getToken(); + while ( yyTok != Tok_Eof ) { + switch ( yyTok ) { + case Tok_class: + yyTok = getToken(); + if(yyTok == Tok_Ident) { + yyScope.push(new Scope(yyIdent, Scope::Clazz, yyLineNo)); + } + else { + qFatal( "%s:%d: Class must be followed by a classname", + qPrintable(yyFileName), yyLineNo ); + } + while (!match(Tok_LeftBrace)) { + yyTok = getToken(); + } + break; + + case Tok_tr: + yyTok = getToken(); + if ( match(Tok_LeftParen) && matchString(text) ) { + com.clear(); + bool plural = false; + + if ( match(Tok_RightParen) ) { + // no comment + } else if (match(Tok_Comma) && matchStringOrNull(com)) { //comment + if ( match(Tok_RightParen)) { + // ok, + } else if (match(Tok_Comma)) { + plural = true; + } + } + if (!text.isEmpty()) + recordMessage(tor, context(), text, com, extracomment, plural); + } + break; + case Tok_translate: + { + QString contextOverride; + yyTok = getToken(); + if ( match(Tok_LeftParen) && + matchString(contextOverride) && + match(Tok_Comma) && + matchString(text) ) { + + com.clear(); + bool plural = false; + if (!match(Tok_RightParen)) { + // look for comment + if ( match(Tok_Comma) && matchStringOrNull(com)) { + if (!match(Tok_RightParen)) { + if (match(Tok_Comma) && matchExpression() && match(Tok_RightParen)) { + plural = true; + } else { + break; + } + } + } else { + break; + } + } + if (!text.isEmpty()) + recordMessage(tor, contextOverride, text, com, extracomment, plural); + } + } + break; + + case Tok_Ident: + yyTok = getToken(); + break; + + case Tok_Comment: + if (yyComment.startsWith(QLatin1Char(':'))) { + yyComment.remove(0, 1); + extracomment.append(yyComment); + } + yyTok = getToken(); + break; + + case Tok_RightBrace: + if ( yyScope.isEmpty() ) { + qFatal( "%s:%d: Unbalanced right brace in Java code\n", + qPrintable(yyFileName), yyLineNo ); + } + else + delete (yyScope.pop()); + extracomment.clear(); + yyTok = getToken(); + break; + + case Tok_LeftBrace: + yyScope.push(new Scope(QString(), Scope::Other, yyLineNo)); + yyTok = getToken(); + break; + + case Tok_Semicolon: + extracomment.clear(); + yyTok = getToken(); + break; + + case Tok_Package: + yyTok = getToken(); + while(!match(Tok_Semicolon)) { + switch(yyTok) { + case Tok_Ident: + yyPackage.append(yyIdent); + break; + case Tok_Dot: + yyPackage.append(QLatin1String(".")); + break; + default: + qFatal( "%s:%d: Package keyword should be followed by com.package.name;", + qPrintable(yyFileName), yyLineNo ); + break; + } + yyTok = getToken(); + } + break; + + default: + yyTok = getToken(); + } + } + + if ( !yyScope.isEmpty() ) + qFatal( "%s:%d: Unbalanced braces in Java code\n", + qPrintable(yyFileName), yyScope.top()->line ); + else if ( yyParenDepth != 0 ) + qFatal( "%s:%d: Unbalanced parentheses in Java code\n", + qPrintable(yyFileName), yyParenLineNo ); +} + + +bool loadJava(Translator &translator, const QString &filename, ConversionData &cd) +{ + QFile file(filename); + if (!file.open(QIODevice::ReadOnly)) { + cd.appendError(QString::fromLatin1("Cannot open %1: %2") + .arg(filename, file.errorString())); + return false; + } + + yyDefaultContext = cd.m_defaultContext; + yyInPos = -1; + yyFileName = filename; + yyPackage.clear(); + yyScope.clear(); + yyTok = -1; + yyParenDepth = 0; + yyCurLineNo = 0; + yyParenLineNo = 1; + + QTextStream ts(&file); + QByteArray codecName; + if (!cd.m_codecForSource.isEmpty()) + codecName = cd.m_codecForSource; + else + codecName = translator.codecName(); // Just because it should be latin1 already + ts.setCodec(QTextCodec::codecForName(codecName)); + ts.setAutoDetectUnicode(true); + yyInStr = ts.readAll(); + yyInPos = 0; + yyFileName = filename; + yyCurLineNo = 1; + yyParenLineNo = 1; + + parse(&translator); + + // Java uses UTF-16 internally and Jambi makes UTF-8 for tr() purposes of it. + translator.setCodecName("UTF-8"); + return true; +} + +QT_END_NAMESPACE