tools/linguist/lupdate/java.cpp
author Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
Fri, 19 Feb 2010 23:40:16 +0200
branchRCL_3
changeset 4 3b1da2848fc7
parent 0 1918ee327afb
permissions -rw-r--r--
Revision: 201003 Kit: 201007

/****************************************************************************
**
** Copyright (C) 2010 Nokia Corporation and/or its subsidiary(-ies).
** All rights reserved.
** Contact: Nokia Corporation (qt-info@nokia.com)
**
** This file is part of the Qt Linguist of the Qt Toolkit.
**
** $QT_BEGIN_LICENSE:LGPL$
** No Commercial Usage
** This file contains pre-release code and may not be distributed.
** You may use this file in accordance with the terms and conditions
** contained in the Technology Preview License Agreement accompanying
** this package.
**
** GNU Lesser General Public License Usage
** Alternatively, this file may be used under the terms of the GNU Lesser
** General Public License version 2.1 as published by the Free Software
** Foundation and appearing in the file LICENSE.LGPL included in the
** packaging of this file.  Please review the following information to
** ensure the GNU Lesser General Public License version 2.1 requirements
** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
**
** In addition, as a special exception, Nokia gives you certain additional
** rights.  These rights are described in the Nokia Qt LGPL Exception
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
**
** If you have questions regarding the use of this file, please contact
** Nokia at qt-info@nokia.com.
**
**
**
**
**
**
**
**
** $QT_END_LICENSE$
**
****************************************************************************/

#include "lupdate.h"

#include <translator.h>

#include <QtCore/QDebug>
#include <QtCore/QFile>
#include <QtCore/QRegExp>
#include <QtCore/QStack>
#include <QtCore/QStack>
#include <QtCore/QString>
#include <QtCore/QTextCodec>

#include <ctype.h>

QT_BEGIN_NAMESPACE

enum { Tok_Eof, Tok_class, Tok_return, Tok_tr,
       Tok_translate, Tok_Ident, Tok_Package,
       Tok_Comment, Tok_String, Tok_Colon, Tok_Dot,
       Tok_LeftBrace, Tok_RightBrace, Tok_LeftParen,
       Tok_RightParen, Tok_Comma, Tok_Semicolon,
       Tok_Integer, Tok_Plus, Tok_PlusPlus, Tok_PlusEq, Tok_null };

class Scope
{
    public:
        QString name;
        enum Type {Clazz, Function, Other} type;
        int line;

        Scope(const QString & name, Type type, int line) :
                name(name),
                type(type),
                line(line)
        {}

        ~Scope()
        {}
};

/*
  The tokenizer maintains the following global variables. The names
  should be self-explanatory.
*/

static QString yyFileName;
static QChar yyCh;
static QString yyIdent;
static QString yyComment;
static QString yyString;


static qlonglong yyInteger;
static int yyParenDepth;
static int yyLineNo;
static int yyCurLineNo;
static int yyParenLineNo;
static int yyTok;

// the string to read from and current position in the string
static QString yyInStr;
static int yyInPos;

// The parser maintains the following global variables.
static QString yyPackage;
static QStack<Scope*> yyScope;
static QString yyDefaultContext;

static QChar getChar()
{
    if (yyInPos >= yyInStr.size())
        return EOF;
    QChar c = yyInStr[yyInPos++];
    if (c.unicode() == '\n')
        ++yyCurLineNo;
    return c.unicode();
}

static int getToken()
{
    const char tab[] = "bfnrt\"\'\\";
    const char backTab[] = "\b\f\n\r\t\"\'\\";

    yyIdent.clear();
    yyComment.clear();
    yyString.clear();

    while ( yyCh != EOF ) {
        yyLineNo = yyCurLineNo;

        if ( yyCh.isLetter() || yyCh.toLatin1() == '_' ) {
            do {
                yyIdent.append(yyCh);
                yyCh = getChar();
            } while ( yyCh.isLetterOrNumber() || yyCh.toLatin1() == '_' );

            if (yyTok != Tok_Dot) {
                switch ( yyIdent.at(0).toLatin1() ) {
                    case 'r':
                        if ( yyIdent == QLatin1String("return") )
                            return Tok_return;
                        break;
                     case 'c':
                        if ( yyIdent == QLatin1String("class") )
                            return Tok_class;
                        break;
                     case 'n':
                         if ( yyIdent == QLatin1String("null") )
                             return Tok_null;
                        break;
                }
            }
            switch ( yyIdent.at(0).toLatin1() ) {
            case 'T':
                // TR() for when all else fails
                if ( yyIdent == QLatin1String("TR") )
                    return Tok_tr;
                break;
            case 'p':
                if( yyIdent == QLatin1String("package") )
                    return Tok_Package;
                break;
            case 't':
                if ( yyIdent == QLatin1String("tr") )
                    return Tok_tr;
                if ( yyIdent == QLatin1String("translate") )
                    return Tok_translate;
                }
            return Tok_Ident;
        } else {
            switch ( yyCh.toLatin1() ) {

            case '/':
                yyCh = getChar();
                if ( yyCh == QLatin1Char('/') ) {
                    do {
                        yyCh = getChar();
                        if (yyCh == EOF)
                            break;
                        yyComment.append(yyCh);
                    } while (yyCh != QLatin1Char('\n'));
                    return Tok_Comment;

                } else if ( yyCh == QLatin1Char('*') ) {
                    bool metAster = false;
                    bool metAsterSlash = false;

                    while ( !metAsterSlash ) {
                        yyCh = getChar();
                        if ( yyCh == EOF ) {
                            qFatal( "%s: Unterminated Java comment starting at"
                                    " line %d\n",
                                    qPrintable(yyFileName), yyLineNo );

                            return Tok_Comment;
                        }

                        yyComment.append( yyCh );

                        if ( yyCh == QLatin1Char('*') )
                            metAster = true;
                        else if ( metAster && yyCh == QLatin1Char('/') )
                            metAsterSlash = true;
                        else
                            metAster = false;
                    }
                    yyComment.chop(2);
                    yyCh = getChar();

                    return Tok_Comment;
                }
                break;
            case '"':
                yyCh = getChar();

                while ( yyCh != EOF && yyCh != QLatin1Char('\n') && yyCh != QLatin1Char('"') ) {
                    if ( yyCh == QLatin1Char('\\') ) {
                        yyCh = getChar();
                        if ( yyCh == QLatin1Char('u') ) {
                            yyCh = getChar();
                            uint unicode(0);
                            for (int i = 4; i > 0; --i) {
                                unicode = unicode << 4;
                                if( yyCh.isDigit() ) {
                                    unicode += yyCh.digitValue();
                                }
                                else {
                                    int sub(yyCh.toLower().toAscii() - 87);
                                    if( sub > 15 || sub < 10) {
                                        qFatal( "%s:%d: Invalid Unicode",
                                            qPrintable(yyFileName), yyLineNo );
                                    }
                                    unicode += sub;
                                }
                                yyCh = getChar();
                            }
                            yyString.append(QChar(unicode));
                        }
                        else if ( yyCh == QLatin1Char('\n') ) {
                            yyCh = getChar();
                        }
                        else {
                            yyString.append( QLatin1Char(backTab[strchr( tab, yyCh.toAscii() ) - tab]) );
                            yyCh = getChar();
                        }
                    } else {
                        yyString.append(yyCh);
                        yyCh = getChar();
                    }
                }

                if ( yyCh != QLatin1Char('"') )
                    qFatal( "%s:%d: Unterminated string",
                        qPrintable(yyFileName), yyLineNo );

                yyCh = getChar();

                return Tok_String;

            case ':':
                yyCh = getChar();
                return Tok_Colon;
            case '\'':
                yyCh = getChar();

                if ( yyCh == QLatin1Char('\\') )
                    yyCh = getChar();
                do {
                    yyCh = getChar();
                } while ( yyCh != EOF && yyCh != QLatin1Char('\'') );
                yyCh = getChar();
                break;
            case '{':
                yyCh = getChar();
                return Tok_LeftBrace;
            case '}':
                yyCh = getChar();
                return Tok_RightBrace;
            case '(':
                if (yyParenDepth == 0)
                    yyParenLineNo = yyCurLineNo;
                yyParenDepth++;
                yyCh = getChar();
                return Tok_LeftParen;
            case ')':
                if (yyParenDepth == 0)
                    yyParenLineNo = yyCurLineNo;
                yyParenDepth--;
                yyCh = getChar();
                return Tok_RightParen;
            case ',':
                yyCh = getChar();
                return Tok_Comma;
            case '.':
                yyCh = getChar();
                return Tok_Dot;
            case ';':
                yyCh = getChar();
                return Tok_Semicolon;
            case '+':
                yyCh = getChar();
                if (yyCh == QLatin1Char('+')) {
                    yyCh = getChar();
                    return Tok_PlusPlus;
		}
                if( yyCh == QLatin1Char('=') ){
                    yyCh = getChar();
                    return Tok_PlusEq;
		}
                return Tok_Plus;
            case '0':
            case '1':
            case '2':
            case '3':
            case '4':
            case '5':
            case '6':
            case '7':
            case '8':
            case '9':
                {
                    QByteArray ba;
                    ba += yyCh.toLatin1();
                    yyCh = getChar();
                    bool hex = yyCh == QLatin1Char('x');
                    if ( hex ) {
                        ba += yyCh.toLatin1();
                        yyCh = getChar();
                    }
                    while ( hex ? isxdigit(yyCh.toLatin1()) : yyCh.isDigit() ) {
                        ba += yyCh.toLatin1();
                        yyCh = getChar();
                    }
                    bool ok;
                    yyInteger = ba.toLongLong(&ok);
                    if (ok) return Tok_Integer;
                    break;
                }
            default:
                yyCh = getChar();
            }
        }
    }
    return Tok_Eof;
}

static bool match( int t )
{
    bool matches = ( yyTok == t );
    if ( matches )
        yyTok = getToken();
    return matches;
}

static bool matchString( QString &s )
{
    if ( yyTok != Tok_String )
        return false;

    s = yyString;
    yyTok = getToken();
    while ( yyTok == Tok_Plus ) {
        yyTok = getToken();
        if (yyTok == Tok_String)
            s += yyString;
        else {
            qWarning( "%s:%d: String used in translation can only contain strings"
                " concatenated with other strings, not expressions or numbers.",
                qPrintable(yyFileName), yyLineNo );
            return false;
        }
        yyTok = getToken();
    }
    return true;
}

static bool matchStringOrNull(QString &s)
{
    bool matches = matchString(s);
    if (!matches) {
        matches = (yyTok == Tok_null);
        if (matches)
            yyTok = getToken();
    }
    return matches;
}

/*
 * match any expression that can return a number, which can be
 * 1. Literal number (e.g. '11')
 * 2. simple identifier (e.g. 'm_count')
 * 3. simple function call (e.g. 'size()' )
 * 4. function call on an object (e.g. 'list.size()')
 * 5. function call on an object (e.g. 'list->size()')
 *
 * Other cases:
 * size(2,4)
 * list().size()
 * list(a,b).size(2,4)
 * etc...
 */
static bool matchExpression()
{
    if (match(Tok_Integer)) {
        return true;
    }

    int parenlevel = 0;
    while (match(Tok_Ident) || parenlevel > 0) {
        if (yyTok == Tok_RightParen) {
            if (parenlevel == 0) break;
            --parenlevel;
            yyTok = getToken();
        } else if (yyTok == Tok_LeftParen) {
            yyTok = getToken();
            if (yyTok == Tok_RightParen) {
                yyTok = getToken();
            } else {
                ++parenlevel;
            }
        } else if (yyTok == Tok_Ident) {
            continue;
        } else if (parenlevel == 0) {
            return false;
        }
    }
    return true;
}

static const QString context()
{
      QString context(yyPackage);
      bool innerClass = false;
      for (int i = 0; i < yyScope.size(); ++i) {
         if (yyScope.at(i)->type == Scope::Clazz) {
             if (innerClass)
                 context.append(QLatin1String("$"));
             else
                 context.append(QLatin1String("."));

             context.append(yyScope.at(i)->name);
             innerClass = true;
         }
     }
     return context.isEmpty() ? yyDefaultContext : context;
}

static void recordMessage(
    Translator *tor, const QString &context, const QString &text, const QString &comment,
    const QString &extracomment, bool plural)
{
    TranslatorMessage msg(
        context, text, comment, QString(),
        yyFileName, yyLineNo, QStringList(),
        TranslatorMessage::Unfinished, plural);
    msg.setExtraComment(extracomment.simplified());
    tor->extend(msg);
}

static void parse( Translator *tor )
{
    QString text;
    QString com;
    QString extracomment;

    yyCh = getChar();

    yyTok = getToken();
    while ( yyTok != Tok_Eof ) {
        switch ( yyTok ) {
        case Tok_class:
            yyTok = getToken();
            if(yyTok == Tok_Ident) {
                yyScope.push(new Scope(yyIdent, Scope::Clazz, yyLineNo));
            }
            else {
                qFatal( "%s:%d: Class must be followed by a classname",
                                          qPrintable(yyFileName), yyLineNo );
            }
            while (!match(Tok_LeftBrace)) {
                yyTok = getToken();
            }
            break;

        case Tok_tr:
            yyTok = getToken();
            if ( match(Tok_LeftParen) && matchString(text) ) {
                com.clear();
                bool plural = false;

                if ( match(Tok_RightParen) ) {
                    // no comment
                } else if (match(Tok_Comma) && matchStringOrNull(com)) {   //comment
                    if ( match(Tok_RightParen)) {
                        // ok,
                    } else if (match(Tok_Comma)) {
                        plural = true;
                    }
                }
                if (!text.isEmpty())
                    recordMessage(tor, context(), text, com, extracomment, plural);
            }
            break;
        case Tok_translate:
            {
                QString contextOverride;
                yyTok = getToken();
                if ( match(Tok_LeftParen) &&
                     matchString(contextOverride) &&
                     match(Tok_Comma) &&
                     matchString(text) ) {

                    com.clear();
                    bool plural = false;
                    if (!match(Tok_RightParen)) {
                        // look for comment
                        if ( match(Tok_Comma) && matchStringOrNull(com)) {
                            if (!match(Tok_RightParen)) {
                                if (match(Tok_Comma) && matchExpression() && match(Tok_RightParen)) {
                                    plural = true;
                                } else {
                                    break;
                                }
                            }
                        } else {
                            break;
                        }
                    }
                    if (!text.isEmpty())
                        recordMessage(tor, contextOverride, text, com, extracomment, plural);
                }
            }
            break;

        case Tok_Ident:
            yyTok = getToken();
            break;

        case Tok_Comment:
            if (yyComment.startsWith(QLatin1Char(':'))) {
                yyComment.remove(0, 1);
                extracomment.append(yyComment);
            }
            yyTok = getToken();
            break;

        case Tok_RightBrace:
            if ( yyScope.isEmpty() ) {
                qFatal( "%s:%d: Unbalanced right brace in Java code\n",
                        qPrintable(yyFileName), yyLineNo );
            }
            else
                delete (yyScope.pop());
            extracomment.clear();
            yyTok = getToken();
            break;

         case Tok_LeftBrace:
            yyScope.push(new Scope(QString(), Scope::Other, yyLineNo));
            yyTok = getToken();
            break;

        case Tok_Semicolon:
            extracomment.clear();
            yyTok = getToken();
            break;

        case Tok_Package:
            yyTok = getToken();
            while(!match(Tok_Semicolon)) {
                switch(yyTok) {
                    case Tok_Ident:
                        yyPackage.append(yyIdent);
                        break;
                    case Tok_Dot:
                        yyPackage.append(QLatin1String("."));
                        break;
                    default:
                         qFatal( "%s:%d: Package keyword should be followed by com.package.name;",
                                          qPrintable(yyFileName), yyLineNo );
                         break;
                }
                yyTok = getToken();
            }
            break;

        default:
            yyTok = getToken();
        }
    }

    if ( !yyScope.isEmpty() )
        qFatal( "%s:%d: Unbalanced braces in Java code\n",
                 qPrintable(yyFileName), yyScope.top()->line );
    else if ( yyParenDepth != 0 )
        qFatal( "%s:%d: Unbalanced parentheses in Java code\n",
                 qPrintable(yyFileName), yyParenLineNo );
}


bool loadJava(Translator &translator, const QString &filename, ConversionData &cd)
{
    QFile file(filename);
    if (!file.open(QIODevice::ReadOnly)) {
        cd.appendError(QString::fromLatin1("Cannot open %1: %2")
            .arg(filename, file.errorString()));
        return false;
    }

    yyDefaultContext = cd.m_defaultContext;
    yyInPos = -1;
    yyFileName = filename;
    yyPackage.clear();
    yyScope.clear();
    yyTok = -1;
    yyParenDepth = 0;
    yyCurLineNo = 0;
    yyParenLineNo = 1;

    QTextStream ts(&file);
    QByteArray codecName;
    if (!cd.m_codecForSource.isEmpty())
        codecName = cd.m_codecForSource;
    else
        codecName = translator.codecName(); // Just because it should be latin1 already
    ts.setCodec(QTextCodec::codecForName(codecName));
    ts.setAutoDetectUnicode(true);
    yyInStr = ts.readAll();
    yyInPos = 0;
    yyFileName = filename;
    yyCurLineNo = 1;
    yyParenLineNo = 1;

    parse(&translator);

    // Java uses UTF-16 internally and Jambi makes UTF-8 for tr() purposes of it.
    translator.setCodecName("UTF-8");
    return true;
}

QT_END_NAMESPACE