tools/porting/src/rpplexer.cpp
changeset 0 1918ee327afb
child 4 3b1da2848fc7
equal deleted inserted replaced
-1:000000000000 0:1918ee327afb
       
     1 /****************************************************************************
       
     2 **
       
     3 ** Copyright (C) 2001-2004 Roberto Raggi
       
     4 ** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
       
     5 ** All rights reserved.
       
     6 ** Contact: Nokia Corporation (qt-info@nokia.com)
       
     7 **
       
     8 ** This file is part of the qt3to4 porting application of the Qt Toolkit.
       
     9 **
       
    10 ** $QT_BEGIN_LICENSE:LGPL$
       
    11 ** No Commercial Usage
       
    12 ** This file contains pre-release code and may not be distributed.
       
    13 ** You may use this file in accordance with the terms and conditions
       
    14 ** contained in the Technology Preview License Agreement accompanying
       
    15 ** this package.
       
    16 **
       
    17 ** GNU Lesser General Public License Usage
       
    18 ** Alternatively, this file may be used under the terms of the GNU Lesser
       
    19 ** General Public License version 2.1 as published by the Free Software
       
    20 ** Foundation and appearing in the file LICENSE.LGPL included in the
       
    21 ** packaging of this file.  Please review the following information to
       
    22 ** ensure the GNU Lesser General Public License version 2.1 requirements
       
    23 ** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
       
    24 **
       
    25 ** In addition, as a special exception, Nokia gives you certain additional
       
    26 ** rights.  These rights are described in the Nokia Qt LGPL Exception
       
    27 ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
       
    28 **
       
    29 ** If you have questions regarding the use of this file, please contact
       
    30 ** Nokia at qt-info@nokia.com.
       
    31 **
       
    32 **
       
    33 **
       
    34 **
       
    35 **
       
    36 **
       
    37 **
       
    38 **
       
    39 ** $QT_END_LICENSE$
       
    40 **
       
    41 ****************************************************************************/
       
    42 
       
    43 #include "rpplexer.h"
       
    44 #include <QChar>
       
    45 #include <ctype.h>
       
    46 
       
    47 QT_BEGIN_NAMESPACE
       
    48 
       
    49 using namespace TokenEngine;
       
    50 
       
    51 namespace Rpp {
       
    52 RppLexer::RppLexer()
       
    53     : m_buffer(0), m_ptr(0), m_len(0)
       
    54 {
       
    55     setupScanTable();
       
    56 }
       
    57 
       
    58 void RppLexer::setupScanTable()
       
    59 {
       
    60     memset(s_attr_table, 0, 256);
       
    61 
       
    62     for (int i=0; i<128; ++i) {
       
    63         switch (i) {
       
    64         case ':':
       
    65         case '*':
       
    66         case '%':
       
    67         case '^':
       
    68         case '=':
       
    69         case '!':
       
    70         case '&':
       
    71         case '|':
       
    72         case '+':
       
    73         case '<':
       
    74         case '>':
       
    75         case '-':
       
    76         case '.':
       
    77             s_scan_table[i] = &RppLexer::scanOperator;
       
    78             break;
       
    79 
       
    80         case '\r':
       
    81 		case '\n':
       
    82             s_scan_table[i] = &RppLexer::scanNewline;
       
    83             break;
       
    84 
       
    85         case '\'':
       
    86             s_scan_table[i] = &RppLexer::scanCharLiteral;
       
    87             break;
       
    88 
       
    89         case '"':
       
    90             s_scan_table[i] = &RppLexer::scanStringLiteral;
       
    91             break;
       
    92         case '#':
       
    93             s_scan_table[i] = &RppLexer::scanPreprocessor;
       
    94             break;
       
    95 
       
    96         case '/':
       
    97             s_scan_table[i] = &RppLexer::scanComment;
       
    98             break;
       
    99 
       
   100         default:
       
   101             if (isspace(i)) {
       
   102                 s_scan_table[i] = &RppLexer::scanWhiteSpaces;
       
   103                 s_attr_table[i] |= A_Whitespace;
       
   104             } else if (isalpha(i) || i == '_') {
       
   105                 s_scan_table[i] = &RppLexer::scanKeyword;
       
   106                 s_attr_table[i] |= A_Alpha;
       
   107             } else if (isdigit(i)) {
       
   108                 s_scan_table[i] = &RppLexer::scanNumberLiteral;
       
   109                 s_attr_table[i] |= A_Digit;
       
   110             } else
       
   111                 s_scan_table[i] = &RppLexer::scanChar;
       
   112         }
       
   113     }
       
   114 
       
   115     s_scan_table[128] = &RppLexer::scanUnicodeChar;
       
   116 }
       
   117 
       
   118 QVector<Type> RppLexer::lex(const TokenContainer &tokenContainer)
       
   119 {
       
   120     QVector<Type> tokenTypes;
       
   121     const int numTokens = tokenContainer.count();
       
   122     tokenTypes.reserve(numTokens);
       
   123     QByteArray text = tokenContainer.fullText();
       
   124     m_buffer = text.constData();
       
   125     for(int t=0; t<numTokens; ++t) {
       
   126         TokenEngine::Token token = tokenContainer.token(t);
       
   127         tokenTypes.append(indentify(token.start, token.length));
       
   128     }
       
   129     return tokenTypes;
       
   130 }
       
   131 
       
   132 Type RppLexer::indentify(int pos, int length)
       
   133 {
       
   134     Q_ASSERT(length > 0);
       
   135     m_ptr = pos;
       
   136     m_len = length;
       
   137     int kind = 0;
       
   138     const unsigned char ch = m_buffer[pos];
       
   139     (this->*s_scan_table[ch < 128 ? ch : 128])(&kind);
       
   140     return (Type)kind;
       
   141 }
       
   142 
       
   143 void RppLexer::scanChar(int *kind)
       
   144 {
       
   145     *kind = m_buffer[m_ptr];
       
   146 }
       
   147 
       
   148 void RppLexer::scanWhiteSpaces(int *kind)
       
   149 {
       
   150     *kind = Token_whitespaces;
       
   151 
       
   152     while (unsigned char ch = m_buffer[m_ptr]) {
       
   153         if (s_attr_table[ch] & A_Whitespace)
       
   154             ++m_ptr;
       
   155         else
       
   156             break;
       
   157     }
       
   158 }
       
   159 
       
   160 void RppLexer::scanNewline(int *kind)
       
   161 {
       
   162     *kind = '\n';
       
   163 }
       
   164 
       
   165 void RppLexer::scanUnicodeChar(int *kind)
       
   166 {
       
   167     *kind = m_buffer[m_ptr];
       
   168 }
       
   169 
       
   170 void RppLexer::scanCharLiteral(int *kind)
       
   171 {
       
   172     *kind = Token_char_literal;
       
   173 }
       
   174 
       
   175 void RppLexer::scanStringLiteral(int *kind)
       
   176 {
       
   177     *kind = Token_string_literal;
       
   178 }
       
   179 
       
   180 void RppLexer::scanIdentifier(int *kind)
       
   181 {
       
   182     *kind = Token_identifier;
       
   183 }
       
   184 
       
   185 void RppLexer::scanNumberLiteral(int *kind)
       
   186 {
       
   187     *kind = Token_number_literal;
       
   188 }
       
   189 
       
   190 void RppLexer::scanPreprocessor(int *kind)
       
   191 {
       
   192     *kind = Token_preproc;
       
   193 }
       
   194 
       
   195 void RppLexer::scanComment(int *kind)
       
   196 {
       
   197     switch(m_buffer[m_ptr + 1]) {
       
   198     case '/':
       
   199         *kind = Token_line_comment;
       
   200         break;
       
   201     case '*':
       
   202         *kind = Token_multiline_comment;
       
   203         break;
       
   204     default:
       
   205         scanOperator(kind);
       
   206     }
       
   207 }
       
   208 
       
   209 void RppLexer::scanOperator(int *kind)
       
   210 {
       
   211     switch (m_buffer[m_ptr]) {
       
   212     case ':':
       
   213         if (m_buffer[m_ptr+1] == ':') {
       
   214             *kind = Token_scope;
       
   215             return;
       
   216         }
       
   217         break;
       
   218 
       
   219     case '*':
       
   220     case '/':
       
   221     case '%':
       
   222     case '^':
       
   223         if (m_buffer[m_ptr+1] == '=') {
       
   224             *kind = Token_assign;
       
   225             return;
       
   226         }
       
   227         break;
       
   228 
       
   229     case '=':
       
   230        if (m_buffer[m_ptr+1] == '=') {
       
   231             *kind = Token_eq;
       
   232             return;
       
   233         }
       
   234         break;
       
   235     case '!':
       
   236        if (m_buffer[m_ptr+1] == '=') {
       
   237             *kind = Token_not_eq;
       
   238             return;
       
   239         }
       
   240         break;
       
   241 
       
   242     case '&':
       
   243         if (m_buffer[m_ptr+1] == '&') {
       
   244             *kind = Token_and;
       
   245             return;
       
   246         } else if (m_buffer[m_ptr+1] == '=') {
       
   247             *kind = Token_assign;
       
   248             return;
       
   249         }
       
   250         break;
       
   251 
       
   252     case '|':
       
   253         if (m_buffer[m_ptr+1] == '|' ) {
       
   254             *kind = Token_or;
       
   255             return;
       
   256         } else if (m_buffer[m_ptr+1] == '=') {
       
   257             *kind = Token_assign;
       
   258             return;
       
   259         }
       
   260         break;
       
   261 
       
   262     case '+':
       
   263         if (m_buffer[m_ptr+1] == '+' ) {
       
   264             *kind = Token_incr;
       
   265             return;
       
   266         } else if (m_buffer[m_ptr+1] == '=') {
       
   267             *kind = Token_assign;
       
   268             return;
       
   269         }
       
   270         break;
       
   271 
       
   272     case '<':
       
   273         if (m_buffer[m_ptr+1] == '<') {
       
   274             if (m_buffer[m_ptr+2] == '=') {
       
   275                 *kind = Token_assign;
       
   276                 return;
       
   277             }
       
   278             *kind = Token_left_shift;
       
   279             return;
       
   280         } else if (m_buffer[m_ptr+1] == '=') {
       
   281             *kind = Token_leq;
       
   282             return;
       
   283         }
       
   284         break;
       
   285 
       
   286     case '>':
       
   287         if (m_buffer[m_ptr+1] == '>') {
       
   288             if (m_buffer[m_ptr+2] == '=') {
       
   289                 *kind = Token_assign;
       
   290                 return;
       
   291             }
       
   292             *kind = Token_right_shift;
       
   293             return;
       
   294         } else if (m_buffer[m_ptr+1] == '=') {
       
   295             *kind = Token_geq;
       
   296             return;
       
   297         }
       
   298         break;
       
   299 
       
   300     case '-':
       
   301         if (m_buffer[m_ptr+1] == '>') {
       
   302             if (m_buffer[m_ptr+2] == '*') {
       
   303                 *kind = Token_ptrmem;
       
   304                 return;
       
   305             }
       
   306             *kind = Token_arrow;
       
   307             return;
       
   308         } else if (m_buffer[m_ptr+1] == '-') {
       
   309             *kind = Token_decr;
       
   310             return;
       
   311         } else if (m_buffer[m_ptr+1] == '=') {
       
   312             *kind = Token_assign;
       
   313             return;
       
   314         }
       
   315         break;
       
   316 
       
   317     case '.':
       
   318         if (m_buffer[m_ptr+1] == '.' && m_buffer[m_ptr+2] == '.') {
       
   319             *kind = Token_ellipsis;
       
   320             return;
       
   321         } else if (m_buffer[m_ptr+1] == '*') {
       
   322             *kind = Token_ptrmem;
       
   323             return;
       
   324         }
       
   325         break;
       
   326 
       
   327     }
       
   328 
       
   329     *kind = m_buffer[m_ptr++];
       
   330 }
       
   331 
       
   332 bool RppLexer::match(const char *buf, int len)
       
   333 {
       
   334     if (m_len != len)
       
   335         return false;
       
   336     for (int i = 0; i < len; ++i) {
       
   337         if(m_buffer[m_ptr + i] != buf[i])
       
   338             return false;
       
   339     }
       
   340     return true;
       
   341 }
       
   342 
       
   343 void RppLexer::scanKeyword(int *kind)
       
   344 {
       
   345     if(match("if", 2))
       
   346         *kind = Token_directive_if;
       
   347     else if(match("elif", 4))
       
   348         *kind = Token_directive_elif;
       
   349     else if(match("else", 4))
       
   350         *kind = Token_directive_else;
       
   351     else if(match("line", 4))
       
   352         *kind = Token_directive_line;
       
   353     else if(match("else", 4))
       
   354         *kind = Token_directive_else;
       
   355     else if(match("line", 4))
       
   356         *kind = Token_directive_line;
       
   357     else if(match("endif", 5))
       
   358         *kind = Token_directive_endif;
       
   359     else if(match("ifdef", 5))
       
   360         *kind = Token_directive_ifdef;
       
   361     else if(match("error", 5))
       
   362         *kind = Token_directive_error;
       
   363     else if(match("undef", 5))
       
   364         *kind = Token_directive_undef;
       
   365     else if(match("pragma", 6))
       
   366         *kind = Token_directive_pragma;
       
   367     else if(match("ifndef", 6))
       
   368         *kind = Token_directive_ifndef;
       
   369     else if(match("define", 6))
       
   370         *kind = Token_directive_define;
       
   371     else if(match("include", 7))
       
   372         *kind = Token_directive_include;
       
   373     else if(match("defined", 7))
       
   374         *kind = Token_defined;
       
   375     else
       
   376         *kind = Token_identifier;
       
   377 }
       
   378 
       
   379 } //namespace Rpp
       
   380 
       
   381 QT_END_NAMESPACE