tools/linguist/lupdate/java.cpp
changeset 0 1918ee327afb
child 4 3b1da2848fc7
equal deleted inserted replaced
-1:000000000000 0:1918ee327afb
       
     1 /****************************************************************************
       
     2 **
       
     3 ** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
       
     4 ** All rights reserved.
       
     5 ** Contact: Nokia Corporation (qt-info@nokia.com)
       
     6 **
       
     7 ** This file is part of the Qt Linguist of the Qt Toolkit.
       
     8 **
       
     9 ** $QT_BEGIN_LICENSE:LGPL$
       
    10 ** No Commercial Usage
       
    11 ** This file contains pre-release code and may not be distributed.
       
    12 ** You may use this file in accordance with the terms and conditions
       
    13 ** contained in the Technology Preview License Agreement accompanying
       
    14 ** this package.
       
    15 **
       
    16 ** GNU Lesser General Public License Usage
       
    17 ** Alternatively, this file may be used under the terms of the GNU Lesser
       
    18 ** General Public License version 2.1 as published by the Free Software
       
    19 ** Foundation and appearing in the file LICENSE.LGPL included in the
       
    20 ** packaging of this file.  Please review the following information to
       
    21 ** ensure the GNU Lesser General Public License version 2.1 requirements
       
    22 ** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
       
    23 **
       
    24 ** In addition, as a special exception, Nokia gives you certain additional
       
    25 ** rights.  These rights are described in the Nokia Qt LGPL Exception
       
    26 ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
       
    27 **
       
    28 ** If you have questions regarding the use of this file, please contact
       
    29 ** Nokia at qt-info@nokia.com.
       
    30 **
       
    31 **
       
    32 **
       
    33 **
       
    34 **
       
    35 **
       
    36 **
       
    37 **
       
    38 ** $QT_END_LICENSE$
       
    39 **
       
    40 ****************************************************************************/
       
    41 
       
    42 #include "lupdate.h"
       
    43 
       
    44 #include <translator.h>
       
    45 
       
    46 #include <QtCore/QDebug>
       
    47 #include <QtCore/QFile>
       
    48 #include <QtCore/QRegExp>
       
    49 #include <QtCore/QStack>
       
    50 #include <QtCore/QStack>
       
    51 #include <QtCore/QString>
       
    52 #include <QtCore/QTextCodec>
       
    53 
       
    54 #include <ctype.h>
       
    55 
       
    56 QT_BEGIN_NAMESPACE
       
    57 
       
    58 enum { Tok_Eof, Tok_class, Tok_return, Tok_tr,
       
    59        Tok_translate, Tok_Ident, Tok_Package,
       
    60        Tok_Comment, Tok_String, Tok_Colon, Tok_Dot,
       
    61        Tok_LeftBrace, Tok_RightBrace, Tok_LeftParen,
       
    62        Tok_RightParen, Tok_Comma, Tok_Semicolon,
       
    63        Tok_Integer, Tok_Plus, Tok_PlusPlus, Tok_PlusEq, Tok_null };
       
    64 
       
    65 class Scope
       
    66 {
       
    67     public:
       
    68         QString name;
       
    69         enum Type {Clazz, Function, Other} type;
       
    70         int line;
       
    71 
       
    72         Scope(const QString & name, Type type, int line) :
       
    73                 name(name),
       
    74                 type(type),
       
    75                 line(line)
       
    76         {}
       
    77 
       
    78         ~Scope()
       
    79         {}
       
    80 };
       
    81 
       
    82 /*
       
    83   The tokenizer maintains the following global variables. The names
       
    84   should be self-explanatory.
       
    85 */
       
    86 
       
    87 static QString yyFileName;
       
    88 static QChar yyCh;
       
    89 static QString yyIdent;
       
    90 static QString yyComment;
       
    91 static QString yyString;
       
    92 
       
    93 
       
    94 static qlonglong yyInteger;
       
    95 static int yyParenDepth;
       
    96 static int yyLineNo;
       
    97 static int yyCurLineNo;
       
    98 static int yyParenLineNo;
       
    99 static int yyTok;
       
   100 
       
   101 // the string to read from and current position in the string
       
   102 static QString yyInStr;
       
   103 static int yyInPos;
       
   104 
       
   105 // The parser maintains the following global variables.
       
   106 static QString yyPackage;
       
   107 static QStack<Scope*> yyScope;
       
   108 static QString yyDefaultContext;
       
   109 
       
   110 static QChar getChar()
       
   111 {
       
   112     if (yyInPos >= yyInStr.size())
       
   113         return EOF;
       
   114     QChar c = yyInStr[yyInPos++];
       
   115     if (c.unicode() == '\n')
       
   116         ++yyCurLineNo;
       
   117     return c.unicode();
       
   118 }
       
   119 
       
   120 static int getToken()
       
   121 {
       
   122     const char tab[] = "bfnrt\"\'\\";
       
   123     const char backTab[] = "\b\f\n\r\t\"\'\\";
       
   124 
       
   125     yyIdent.clear();
       
   126     yyComment.clear();
       
   127     yyString.clear();
       
   128 
       
   129     while ( yyCh != EOF ) {
       
   130         yyLineNo = yyCurLineNo;
       
   131 
       
   132         if ( yyCh.isLetter() || yyCh.toLatin1() == '_' ) {
       
   133             do {
       
   134                 yyIdent.append(yyCh);
       
   135                 yyCh = getChar();
       
   136             } while ( yyCh.isLetterOrNumber() || yyCh.toLatin1() == '_' );
       
   137 
       
   138             if (yyTok != Tok_Dot) {
       
   139                 switch ( yyIdent.at(0).toLatin1() ) {
       
   140                     case 'r':
       
   141                         if ( yyIdent == QLatin1String("return") )
       
   142                             return Tok_return;
       
   143                         break;
       
   144                      case 'c':
       
   145                         if ( yyIdent == QLatin1String("class") )
       
   146                             return Tok_class;
       
   147                         break;
       
   148                      case 'n':
       
   149                          if ( yyIdent == QLatin1String("null") )
       
   150                              return Tok_null;
       
   151                         break;
       
   152                 }
       
   153             }
       
   154             switch ( yyIdent.at(0).toLatin1() ) {
       
   155             case 'T':
       
   156                 // TR() for when all else fails
       
   157                 if ( yyIdent == QLatin1String("TR") )
       
   158                     return Tok_tr;
       
   159                 break;
       
   160             case 'p':
       
   161                 if( yyIdent == QLatin1String("package") )
       
   162                     return Tok_Package;
       
   163                 break;
       
   164             case 't':
       
   165                 if ( yyIdent == QLatin1String("tr") )
       
   166                     return Tok_tr;
       
   167                 if ( yyIdent == QLatin1String("translate") )
       
   168                     return Tok_translate;
       
   169                 }
       
   170             return Tok_Ident;
       
   171         } else {
       
   172             switch ( yyCh.toLatin1() ) {
       
   173 
       
   174             case '/':
       
   175                 yyCh = getChar();
       
   176                 if ( yyCh == QLatin1Char('/') ) {
       
   177                     do {
       
   178                         yyCh = getChar();
       
   179                         if (yyCh == EOF)
       
   180                             break;
       
   181                         yyComment.append(yyCh);
       
   182                     } while (yyCh != QLatin1Char('\n'));
       
   183                     return Tok_Comment;
       
   184 
       
   185                 } else if ( yyCh == QLatin1Char('*') ) {
       
   186                     bool metAster = false;
       
   187                     bool metAsterSlash = false;
       
   188 
       
   189                     while ( !metAsterSlash ) {
       
   190                         yyCh = getChar();
       
   191                         if ( yyCh == EOF ) {
       
   192                             qFatal( "%s: Unterminated Java comment starting at"
       
   193                                     " line %d\n",
       
   194                                     qPrintable(yyFileName), yyLineNo );
       
   195 
       
   196                             return Tok_Comment;
       
   197                         }
       
   198 
       
   199                         yyComment.append( yyCh );
       
   200 
       
   201                         if ( yyCh == QLatin1Char('*') )
       
   202                             metAster = true;
       
   203                         else if ( metAster && yyCh == QLatin1Char('/') )
       
   204                             metAsterSlash = true;
       
   205                         else
       
   206                             metAster = false;
       
   207                     }
       
   208                     yyComment.chop(2);
       
   209                     yyCh = getChar();
       
   210 
       
   211                     return Tok_Comment;
       
   212                 }
       
   213                 break;
       
   214             case '"':
       
   215                 yyCh = getChar();
       
   216 
       
   217                 while ( yyCh != EOF && yyCh != QLatin1Char('\n') && yyCh != QLatin1Char('"') ) {
       
   218                     if ( yyCh == QLatin1Char('\\') ) {
       
   219                         yyCh = getChar();
       
   220                         if ( yyCh == QLatin1Char('u') ) {
       
   221                             yyCh = getChar();
       
   222                             uint unicode(0);
       
   223                             for (int i = 4; i > 0; --i) {
       
   224                                 unicode = unicode << 4;
       
   225                                 if( yyCh.isDigit() ) {
       
   226                                     unicode += yyCh.digitValue();
       
   227                                 }
       
   228                                 else {
       
   229                                     int sub(yyCh.toLower().toAscii() - 87);
       
   230                                     if( sub > 15 || sub < 10) {
       
   231                                         qFatal( "%s:%d: Invalid Unicode",
       
   232                                             qPrintable(yyFileName), yyLineNo );
       
   233                                     }
       
   234                                     unicode += sub;
       
   235                                 }
       
   236                                 yyCh = getChar();
       
   237                             }
       
   238                             yyString.append(QChar(unicode));
       
   239                         }
       
   240                         else if ( yyCh == QLatin1Char('\n') ) {
       
   241                             yyCh = getChar();
       
   242                         }
       
   243                         else {
       
   244                             yyString.append( QLatin1Char(backTab[strchr( tab, yyCh.toAscii() ) - tab]) );
       
   245                             yyCh = getChar();
       
   246                         }
       
   247                     } else {
       
   248                         yyString.append(yyCh);
       
   249                         yyCh = getChar();
       
   250                     }
       
   251                 }
       
   252 
       
   253                 if ( yyCh != QLatin1Char('"') )
       
   254                     qFatal( "%s:%d: Unterminated string",
       
   255                         qPrintable(yyFileName), yyLineNo );
       
   256 
       
   257                 yyCh = getChar();
       
   258 
       
   259                 return Tok_String;
       
   260 
       
   261             case ':':
       
   262                 yyCh = getChar();
       
   263                 return Tok_Colon;
       
   264             case '\'':
       
   265                 yyCh = getChar();
       
   266 
       
   267                 if ( yyCh == QLatin1Char('\\') )
       
   268                     yyCh = getChar();
       
   269                 do {
       
   270                     yyCh = getChar();
       
   271                 } while ( yyCh != EOF && yyCh != QLatin1Char('\'') );
       
   272                 yyCh = getChar();
       
   273                 break;
       
   274             case '{':
       
   275                 yyCh = getChar();
       
   276                 return Tok_LeftBrace;
       
   277             case '}':
       
   278                 yyCh = getChar();
       
   279                 return Tok_RightBrace;
       
   280             case '(':
       
   281                 if (yyParenDepth == 0)
       
   282                     yyParenLineNo = yyCurLineNo;
       
   283                 yyParenDepth++;
       
   284                 yyCh = getChar();
       
   285                 return Tok_LeftParen;
       
   286             case ')':
       
   287                 if (yyParenDepth == 0)
       
   288                     yyParenLineNo = yyCurLineNo;
       
   289                 yyParenDepth--;
       
   290                 yyCh = getChar();
       
   291                 return Tok_RightParen;
       
   292             case ',':
       
   293                 yyCh = getChar();
       
   294                 return Tok_Comma;
       
   295             case '.':
       
   296                 yyCh = getChar();
       
   297                 return Tok_Dot;
       
   298             case ';':
       
   299                 yyCh = getChar();
       
   300                 return Tok_Semicolon;
       
   301             case '+':
       
   302                 yyCh = getChar();
       
   303                 if (yyCh == QLatin1Char('+')) {
       
   304                     yyCh = getChar();
       
   305                     return Tok_PlusPlus;
       
   306 		}
       
   307                 if( yyCh == QLatin1Char('=') ){
       
   308                     yyCh = getChar();
       
   309                     return Tok_PlusEq;
       
   310 		}
       
   311                 return Tok_Plus;
       
   312             case '0':
       
   313             case '1':
       
   314             case '2':
       
   315             case '3':
       
   316             case '4':
       
   317             case '5':
       
   318             case '6':
       
   319             case '7':
       
   320             case '8':
       
   321             case '9':
       
   322                 {
       
   323                     QByteArray ba;
       
   324                     ba += yyCh.toLatin1();
       
   325                     yyCh = getChar();
       
   326                     bool hex = yyCh == QLatin1Char('x');
       
   327                     if ( hex ) {
       
   328                         ba += yyCh.toLatin1();
       
   329                         yyCh = getChar();
       
   330                     }
       
   331                     while ( hex ? isxdigit(yyCh.toLatin1()) : yyCh.isDigit() ) {
       
   332                         ba += yyCh.toLatin1();
       
   333                         yyCh = getChar();
       
   334                     }
       
   335                     bool ok;
       
   336                     yyInteger = ba.toLongLong(&ok);
       
   337                     if (ok) return Tok_Integer;
       
   338                     break;
       
   339                 }
       
   340             default:
       
   341                 yyCh = getChar();
       
   342             }
       
   343         }
       
   344     }
       
   345     return Tok_Eof;
       
   346 }
       
   347 
       
   348 static bool match( int t )
       
   349 {
       
   350     bool matches = ( yyTok == t );
       
   351     if ( matches )
       
   352         yyTok = getToken();
       
   353     return matches;
       
   354 }
       
   355 
       
   356 static bool matchString( QString &s )
       
   357 {
       
   358     if ( yyTok != Tok_String )
       
   359         return false;
       
   360 
       
   361     s = yyString;
       
   362     yyTok = getToken();
       
   363     while ( yyTok == Tok_Plus ) {
       
   364         yyTok = getToken();
       
   365         if (yyTok == Tok_String)
       
   366             s += yyString;
       
   367         else {
       
   368             qWarning( "%s:%d: String used in translation can only contain strings"
       
   369                 " concatenated with other strings, not expressions or numbers.",
       
   370                 qPrintable(yyFileName), yyLineNo );
       
   371             return false;
       
   372         }
       
   373         yyTok = getToken();
       
   374     }
       
   375     return true;
       
   376 }
       
   377 
       
   378 static bool matchStringOrNull(QString &s)
       
   379 {
       
   380     bool matches = matchString(s);
       
   381     if (!matches) {
       
   382         matches = (yyTok == Tok_null);
       
   383         if (matches)
       
   384             yyTok = getToken();
       
   385     }
       
   386     return matches;
       
   387 }
       
   388 
       
   389 /*
       
   390  * match any expression that can return a number, which can be
       
   391  * 1. Literal number (e.g. '11')
       
   392  * 2. simple identifier (e.g. 'm_count')
       
   393  * 3. simple function call (e.g. 'size()' )
       
   394  * 4. function call on an object (e.g. 'list.size()')
       
   395  * 5. function call on an object (e.g. 'list->size()')
       
   396  *
       
   397  * Other cases:
       
   398  * size(2,4)
       
   399  * list().size()
       
   400  * list(a,b).size(2,4)
       
   401  * etc...
       
   402  */
       
   403 static bool matchExpression()
       
   404 {
       
   405     if (match(Tok_Integer)) {
       
   406         return true;
       
   407     }
       
   408 
       
   409     int parenlevel = 0;
       
   410     while (match(Tok_Ident) || parenlevel > 0) {
       
   411         if (yyTok == Tok_RightParen) {
       
   412             if (parenlevel == 0) break;
       
   413             --parenlevel;
       
   414             yyTok = getToken();
       
   415         } else if (yyTok == Tok_LeftParen) {
       
   416             yyTok = getToken();
       
   417             if (yyTok == Tok_RightParen) {
       
   418                 yyTok = getToken();
       
   419             } else {
       
   420                 ++parenlevel;
       
   421             }
       
   422         } else if (yyTok == Tok_Ident) {
       
   423             continue;
       
   424         } else if (parenlevel == 0) {
       
   425             return false;
       
   426         }
       
   427     }
       
   428     return true;
       
   429 }
       
   430 
       
   431 static const QString context()
       
   432 {
       
   433       QString context(yyPackage);
       
   434       bool innerClass = false;
       
   435       for (int i = 0; i < yyScope.size(); ++i) {
       
   436          if (yyScope.at(i)->type == Scope::Clazz) {
       
   437              if (innerClass)
       
   438                  context.append(QLatin1String("$"));
       
   439              else
       
   440                  context.append(QLatin1String("."));
       
   441 
       
   442              context.append(yyScope.at(i)->name);
       
   443              innerClass = true;
       
   444          }
       
   445      }
       
   446      return context.isEmpty() ? yyDefaultContext : context;
       
   447 }
       
   448 
       
   449 static void recordMessage(
       
   450     Translator *tor, const QString &context, const QString &text, const QString &comment,
       
   451     const QString &extracomment, bool plural)
       
   452 {
       
   453     TranslatorMessage msg(
       
   454         context, text, comment, QString(),
       
   455         yyFileName, yyLineNo, QStringList(),
       
   456         TranslatorMessage::Unfinished, plural);
       
   457     msg.setExtraComment(extracomment.simplified());
       
   458     tor->extend(msg);
       
   459 }
       
   460 
       
   461 static void parse( Translator *tor )
       
   462 {
       
   463     QString text;
       
   464     QString com;
       
   465     QString extracomment;
       
   466 
       
   467     yyCh = getChar();
       
   468 
       
   469     yyTok = getToken();
       
   470     while ( yyTok != Tok_Eof ) {
       
   471         switch ( yyTok ) {
       
   472         case Tok_class:
       
   473             yyTok = getToken();
       
   474             if(yyTok == Tok_Ident) {
       
   475                 yyScope.push(new Scope(yyIdent, Scope::Clazz, yyLineNo));
       
   476             }
       
   477             else {
       
   478                 qFatal( "%s:%d: Class must be followed by a classname",
       
   479                                           qPrintable(yyFileName), yyLineNo );
       
   480             }
       
   481             while (!match(Tok_LeftBrace)) {
       
   482                 yyTok = getToken();
       
   483             }
       
   484             break;
       
   485 
       
   486         case Tok_tr:
       
   487             yyTok = getToken();
       
   488             if ( match(Tok_LeftParen) && matchString(text) ) {
       
   489                 com.clear();
       
   490                 bool plural = false;
       
   491 
       
   492                 if ( match(Tok_RightParen) ) {
       
   493                     // no comment
       
   494                 } else if (match(Tok_Comma) && matchStringOrNull(com)) {   //comment
       
   495                     if ( match(Tok_RightParen)) {
       
   496                         // ok,
       
   497                     } else if (match(Tok_Comma)) {
       
   498                         plural = true;
       
   499                     }
       
   500                 }
       
   501                 if (!text.isEmpty())
       
   502                     recordMessage(tor, context(), text, com, extracomment, plural);
       
   503             }
       
   504             break;
       
   505         case Tok_translate:
       
   506             {
       
   507                 QString contextOverride;
       
   508                 yyTok = getToken();
       
   509                 if ( match(Tok_LeftParen) &&
       
   510                      matchString(contextOverride) &&
       
   511                      match(Tok_Comma) &&
       
   512                      matchString(text) ) {
       
   513 
       
   514                     com.clear();
       
   515                     bool plural = false;
       
   516                     if (!match(Tok_RightParen)) {
       
   517                         // look for comment
       
   518                         if ( match(Tok_Comma) && matchStringOrNull(com)) {
       
   519                             if (!match(Tok_RightParen)) {
       
   520                                 if (match(Tok_Comma) && matchExpression() && match(Tok_RightParen)) {
       
   521                                     plural = true;
       
   522                                 } else {
       
   523                                     break;
       
   524                                 }
       
   525                             }
       
   526                         } else {
       
   527                             break;
       
   528                         }
       
   529                     }
       
   530                     if (!text.isEmpty())
       
   531                         recordMessage(tor, contextOverride, text, com, extracomment, plural);
       
   532                 }
       
   533             }
       
   534             break;
       
   535 
       
   536         case Tok_Ident:
       
   537             yyTok = getToken();
       
   538             break;
       
   539 
       
   540         case Tok_Comment:
       
   541             if (yyComment.startsWith(QLatin1Char(':'))) {
       
   542                 yyComment.remove(0, 1);
       
   543                 extracomment.append(yyComment);
       
   544             }
       
   545             yyTok = getToken();
       
   546             break;
       
   547 
       
   548         case Tok_RightBrace:
       
   549             if ( yyScope.isEmpty() ) {
       
   550                 qFatal( "%s:%d: Unbalanced right brace in Java code\n",
       
   551                         qPrintable(yyFileName), yyLineNo );
       
   552             }
       
   553             else
       
   554                 delete (yyScope.pop());
       
   555             extracomment.clear();
       
   556             yyTok = getToken();
       
   557             break;
       
   558 
       
   559          case Tok_LeftBrace:
       
   560             yyScope.push(new Scope(QString(), Scope::Other, yyLineNo));
       
   561             yyTok = getToken();
       
   562             break;
       
   563 
       
   564         case Tok_Semicolon:
       
   565             extracomment.clear();
       
   566             yyTok = getToken();
       
   567             break;
       
   568 
       
   569         case Tok_Package:
       
   570             yyTok = getToken();
       
   571             while(!match(Tok_Semicolon)) {
       
   572                 switch(yyTok) {
       
   573                     case Tok_Ident:
       
   574                         yyPackage.append(yyIdent);
       
   575                         break;
       
   576                     case Tok_Dot:
       
   577                         yyPackage.append(QLatin1String("."));
       
   578                         break;
       
   579                     default:
       
   580                          qFatal( "%s:%d: Package keyword should be followed by com.package.name;",
       
   581                                           qPrintable(yyFileName), yyLineNo );
       
   582                          break;
       
   583                 }
       
   584                 yyTok = getToken();
       
   585             }
       
   586             break;
       
   587 
       
   588         default:
       
   589             yyTok = getToken();
       
   590         }
       
   591     }
       
   592 
       
   593     if ( !yyScope.isEmpty() )
       
   594         qFatal( "%s:%d: Unbalanced braces in Java code\n",
       
   595                  qPrintable(yyFileName), yyScope.top()->line );
       
   596     else if ( yyParenDepth != 0 )
       
   597         qFatal( "%s:%d: Unbalanced parentheses in Java code\n",
       
   598                  qPrintable(yyFileName), yyParenLineNo );
       
   599 }
       
   600 
       
   601 
       
   602 bool loadJava(Translator &translator, const QString &filename, ConversionData &cd)
       
   603 {
       
   604     QFile file(filename);
       
   605     if (!file.open(QIODevice::ReadOnly)) {
       
   606         cd.appendError(QString::fromLatin1("Cannot open %1: %2")
       
   607             .arg(filename, file.errorString()));
       
   608         return false;
       
   609     }
       
   610 
       
   611     yyDefaultContext = cd.m_defaultContext;
       
   612     yyInPos = -1;
       
   613     yyFileName = filename;
       
   614     yyPackage.clear();
       
   615     yyScope.clear();
       
   616     yyTok = -1;
       
   617     yyParenDepth = 0;
       
   618     yyCurLineNo = 0;
       
   619     yyParenLineNo = 1;
       
   620 
       
   621     QTextStream ts(&file);
       
   622     QByteArray codecName;
       
   623     if (!cd.m_codecForSource.isEmpty())
       
   624         codecName = cd.m_codecForSource;
       
   625     else
       
   626         codecName = translator.codecName(); // Just because it should be latin1 already
       
   627     ts.setCodec(QTextCodec::codecForName(codecName));
       
   628     ts.setAutoDetectUnicode(true);
       
   629     yyInStr = ts.readAll();
       
   630     yyInPos = 0;
       
   631     yyFileName = filename;
       
   632     yyCurLineNo = 1;
       
   633     yyParenLineNo = 1;
       
   634 
       
   635     parse(&translator);
       
   636 
       
   637     // Java uses UTF-16 internally and Jambi makes UTF-8 for tr() purposes of it.
       
   638     translator.setCodecName("UTF-8");
       
   639     return true;
       
   640 }
       
   641 
       
   642 QT_END_NAMESPACE