FCL/sf/mw/qt: comparison tools/porting/src/tokenizer.cpp

equal deleted inserted replaced

--1:000000000000
+:1918ee327afb
+/****************************************************************************
+**
+** Copyright (C) 2001-2004 Roberto Raggi
+** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
+** All rights reserved.
+** Contact: Nokia Corporation (qt-info@nokia.com)
+**
+** This file is part of the qt3to4 porting application of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** No Commercial Usage
+** This file contains pre-release code and may not be distributed.
+** You may use this file in accordance with the terms and conditions
+** contained in the Technology Preview License Agreement accompanying
+** this package.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL included in the
+** packaging of this file.  Please review the following information to
+** ensure the GNU Lesser General Public License version 2.1 requirements
+** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain additional
+** rights.  These rights are described in the Nokia Qt LGPL Exception
+** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
+**
+** If you have questions regarding the use of this file, please contact
+** Nokia at qt-info@nokia.com.
+**
+**
+**
+**
+**
+**
+**
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+#include "tokenizer.h"
+#include "tokens.h"
+#include <QDateTime>
+#include <QHash>
+#include <ctype.h>
+QT_BEGIN_NAMESPACE
+using TokenEngine::Token;
+static QHash<QByteArray, bool> preprocessed;
+bool Tokenizer::s_initialized = false;
+Tokenizer::scan_fun_ptr Tokenizer::s_scan_table[128 + 1];
+int Tokenizer::s_attr_table[256];
+Tokenizer::Tokenizer()
+: m_buffer(0), m_ptr(0)
+{
+if (!s_initialized)
+setupScanTable();
+}
+Tokenizer::~Tokenizer()
+{
+}
+enum
+{
+A_Alpha = 0x01,
+A_Digit = 0x02,
+A_Alphanum = A_Alpha | A_Digit,
+A_Whitespace = 0x04
+};
+void Tokenizer::setupScanTable()
+{
+s_initialized = true;
+memset(s_attr_table, 0, 256);
+for (int i=0; i<128; ++i) {
+switch (i) {
+case ':':
+case '*':
+case '%':
+case '^':
+case '=':
+case '!':
+case '&':
+case '|':
+case '+':
+case '<':
+case '>':
+case '-':
+case '.':
+s_scan_table[i] = &Tokenizer::scanOperator;
+break;
+case '\r':
+case '\n':
+s_scan_table[i] = &Tokenizer::scanNewline;
+break;
+case '#':
+s_scan_table[i] = &Tokenizer::scanPreprocessor;
+break;
+case '/':
+s_scan_table[i] = &Tokenizer::scanComment;
+break;
+case '\'':
+s_scan_table[i] = &Tokenizer::scanCharLiteral;
+break;
+case '"':
+s_scan_table[i] = &Tokenizer::scanStringLiteral;
+break;
+default:
+if (isspace(i)) {
+s_scan_table[i] = &Tokenizer::scanWhiteSpaces;
+s_attr_table[i] |= A_Whitespace;
+} else if (isalpha(i) || i == '_') {
+s_scan_table[i] = &Tokenizer::scanIdentifier;
+s_attr_table[i] |= A_Alpha;
+} else if (isdigit(i)) {
+s_scan_table[i] = &Tokenizer::scanNumberLiteral;
+s_attr_table[i] |= A_Digit;
+} else
+s_scan_table[i] = &Tokenizer::scanChar;
+}
+}
+s_scan_table[128] = &Tokenizer::scanUnicodeChar;
+}
+QVector<TokenEngine::Token> Tokenizer::tokenize(QByteArray text)
+{
+m_tokens.clear();
+m_buffer = text;
+m_ptr = 0;
+// tokenize
+for (;;) {
+Token tk;
+bool endOfFile = nextToken(tk);
+if (endOfFile) {
+break;
+}
+m_tokens.append(tk);
+}
+return m_tokens;
+}
+bool Tokenizer::nextToken(Token &tok)
+{
+int start = m_ptr;
+unsigned char ch = (unsigned char)m_buffer[m_ptr];
+int kind = 0;
+(this->*s_scan_table[ch < 128 ? ch : 128])(&kind);
+tok.start = start;
+tok.length = m_ptr - start;
+return (kind == 0);
+}
+void Tokenizer::scanChar(int *kind)
+{
+*kind = m_buffer[m_ptr++];
+}
+void Tokenizer::scanWhiteSpaces(int *kind)
+{
+*kind = Token_whitespaces;
+while (unsigned char ch = m_buffer[m_ptr]) {
+if (s_attr_table[ch] & A_Whitespace)
+++m_ptr;
+else
+break;
+}
+}
+void Tokenizer::scanNewline(int *kind)
+{
+Q_UNUSED(kind);
+const unsigned char ch = m_buffer[m_ptr++];
+// Check for \n.
+if (ch == '\n') {
+*kind = '\n';
+return;
+}
+// Check for \r\n.
+if (ch == '\r' && m_buffer[m_ptr] == '\n') {
+*kind = '\n';
+++ m_ptr;
+return;
+}
+*kind = ch;
+}
+void Tokenizer::scanUnicodeChar(int *kind)
+{
+*kind = m_buffer[m_ptr++];
+}
+void Tokenizer::scanCharLiteral(int *kind)
+{
+++m_ptr;
+for (;;) {
+unsigned char ch = m_buffer[m_ptr];
+switch (ch) {
+case '\0':
+case '\n':
+// ### error
+*kind = Token_char_literal;
+return;
+case '\\':
+if (m_buffer[m_ptr+1] == '\'' || m_buffer[m_ptr+1] == '\\')
+m_ptr += 2;
+else
+++m_ptr;
+break;
+case '\'':
+++m_ptr;
+*kind = Token_char_literal;
+return;
+default:
+++m_ptr;
+break;
+}
+}
+// ### error
+*kind = Token_char_literal;
+}
+void Tokenizer::scanStringLiteral(int *kind)
+{
+++m_ptr;
+while (m_buffer[m_ptr]) {
+switch (m_buffer[m_ptr]) {
+case '\n':
+// ### error
+*kind = Token_string_literal;
+return;
+case '\\':
+if (m_buffer[m_ptr+1] == '"' || m_buffer[m_ptr+1] == '\\')
+m_ptr += 2;
+else
+++m_ptr;
+break;
+case '"':
+++m_ptr;
+*kind = Token_string_literal;
+return;
+default:
+++m_ptr;
+break;
+}
+}
+// ### error
+*kind = Token_string_literal;
+}
+void Tokenizer::scanIdentifier(int *kind)
+{
+unsigned char ch;
+for (;;) {
+ch = m_buffer[m_ptr];
+if (s_attr_table[ch] & A_Alphanum)
+++m_ptr;
+else
+break;
+}
+*kind = Token_identifier;
+}
+void Tokenizer::scanNumberLiteral(int *kind)
+{
+unsigned char ch;
+for (;;) {
+ch = m_buffer[m_ptr];
+if (s_attr_table[ch] & A_Alphanum || ch == '.')
+++m_ptr;
+else
+break;
+}
+// ### finish to implement me!!
+*kind = Token_number_literal;
+}
+void Tokenizer::scanComment(int *kind)
+{
+if (!(m_buffer[m_ptr+1] == '/' || m_buffer[m_ptr+1] == '*')) {
+scanOperator(kind);
+		return;
+	}
+++m_ptr; // skip '/'
+bool multiLineComment = m_buffer[m_ptr++] == '*';
+while (m_buffer[m_ptr]) {
+switch (m_buffer[m_ptr]) {
+case '\r':
+case '\n':
+if (!multiLineComment) {
+*kind = Token_comment;
+return;
+}
+(void) scanNewline(kind);
+break;
+case '*':
+if (multiLineComment && m_buffer[m_ptr+1] == '/') {
+m_ptr += 2;
+*kind = Token_comment;
+return;
+}
+++m_ptr;
+break;
+default:
+++m_ptr;
+}
+}
+// ### error
+*kind = Token_comment;
+}
+void Tokenizer::scanPreprocessor(int *kind)
+{
+++m_ptr;
+*kind = Token_preproc;
+}
+void Tokenizer::scanOperator(int *kind)
+{
+switch (m_buffer[m_ptr]) {
+case ':':
+if (m_buffer[m_ptr+1] == ':') {
+m_ptr += 2;
+*kind = Token_scope;
+return;
+}
+break;
+case '*':
+case '/':
+case '%':
+case '^':
+if (m_buffer[m_ptr+1] == '=') {
+m_ptr += 2;
+*kind = Token_assign;
+return;
+}
+break;
+case '=':
+case '!':
+if (m_buffer[m_ptr+1] == '=') {
+m_ptr += 2;
+*kind = Token_eq;
+return;
+}
+break;
+case '&':
+if (m_buffer[m_ptr+1] == '&') {
+m_ptr += 2;
+*kind = Token_and;
+return;
+} else if (m_buffer[m_ptr+1] == '=') {
+m_ptr += 2;
+*kind = Token_assign;
+return;
+}
+break;
+case '|':
+if (m_buffer[m_ptr+1] == '|' ) {
+m_ptr += 2;
+*kind = Token_or;
+return;
+} else if (m_buffer[m_ptr+1] == '=') {
+m_ptr += 2;
+*kind = Token_assign;
+return;
+}
+break;
+case '+':
+if (m_buffer[m_ptr+1] == '+' ) {
+m_ptr += 2;
+*kind = Token_incr;
+return;
+} else if (m_buffer[m_ptr+1] == '=') {
+m_ptr += 2;
+*kind = Token_assign;
+return;
+}
+break;
+case '<':
+if (m_buffer[m_ptr+1] == '<') {
+if (m_buffer[m_ptr+2] == '=') {
+m_ptr += 3;
+*kind = Token_assign;
+return;
+}
+m_ptr += 2;
+*kind = Token_shift;
+return;
+} else if (m_buffer[m_ptr+1] == '=') {
+m_ptr += 2;
+*kind = Token_leq;
+return;
+}
+break;
+case '>':
+if (m_buffer[m_ptr+1] == '>') {
+if (m_buffer[m_ptr+2] == '=') {
+m_ptr += 3;
+*kind = Token_assign;
+return;
+}
+m_ptr += 2;
+*kind = Token_shift;
+return;
+} else if (m_buffer[m_ptr+1] == '=') {
+m_ptr += 2;
+*kind = Token_geq;
+return;
+}
+break;
+case '-':
+if (m_buffer[m_ptr+1] == '>') {
+if (m_buffer[m_ptr+2] == '*') {
+m_ptr += 3;
+*kind = Token_ptrmem;
+return;
+}
+m_ptr += 2;
+*kind = Token_arrow;
+return;
+} else if (m_buffer[m_ptr+1] == '-') {
+m_ptr += 2;
+*kind = Token_decr;
+return;
+} else if (m_buffer[m_ptr+1] == '=') {
+m_ptr += 2;
+*kind = Token_assign;
+return;
+}
+break;
+case '.':
+if (m_buffer[m_ptr+1] == '.' && m_buffer[m_ptr+2] == '.') {
+m_ptr += 3;
+*kind = Token_ellipsis;
+return;
+} else if (m_buffer[m_ptr+1] == '*') {
+m_ptr += 2;
+*kind = Token_ptrmem;
+return;
+}
+break;
+}
+*kind = m_buffer[m_ptr++];
+}
+QT_END_NAMESPACE

changeset 0	1918ee327afb
child 4	3b1da2848fc7