--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/qdoc3/tokenizer.cpp Mon Jan 11 14:00:40 2010 +0000
@@ -0,0 +1,753 @@
+/****************************************************************************
+**
+** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
+** All rights reserved.
+** Contact: Nokia Corporation (qt-info@nokia.com)
+**
+** This file is part of the tools applications of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** No Commercial Usage
+** This file contains pre-release code and may not be distributed.
+** You may use this file in accordance with the terms and conditions
+** contained in the Technology Preview License Agreement accompanying
+** this package.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU Lesser General Public License version 2.1 requirements
+** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain additional
+** rights. These rights are described in the Nokia Qt LGPL Exception
+** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
+**
+** If you have questions regarding the use of this file, please contact
+** Nokia at qt-info@nokia.com.
+**
+**
+**
+**
+**
+**
+**
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#include "config.h"
+#include "tokenizer.h"
+
+#include <qdebug.h>
+#include <qfile.h>
+#include <qhash.h>
+#include <qregexp.h>
+#include <qstring.h>
+
+#include <ctype.h>
+#include <string.h>
+
+QT_BEGIN_NAMESPACE
+
+#define LANGUAGE_CPP "Cpp"
+
+/* qmake ignore Q_OBJECT */
+
+/*
+ Keep in sync with tokenizer.h.
+*/
+static const char *kwords[] = {
+ "char", "class", "const", "double", "enum", "explicit",
+ "friend", "inline", "int", "long", "namespace", "operator",
+ "private", "protected", "public", "short", "signals", "signed",
+ "slots", "static", "struct", "template", "typedef", "typename",
+ "union", "unsigned", "using", "virtual", "void", "volatile",
+ "__int64", "Q_OBJECT", "Q_OVERRIDE", "Q_PROPERTY",
+ "Q_DECLARE_SEQUENTIAL_ITERATOR",
+ "Q_DECLARE_MUTABLE_SEQUENTIAL_ITERATOR",
+ "Q_DECLARE_ASSOCIATIVE_ITERATOR",
+ "Q_DECLARE_MUTABLE_ASSOCIATIVE_ITERATOR",
+ "Q_DECLARE_FLAGS",
+ "Q_SIGNALS",
+ "Q_SLOTS",
+ "QT_COMPAT",
+ "QT_COMPAT_CONSTRUCTOR",
+ "QT_DEPRECATED",
+ "QT_MOC_COMPAT",
+ "QT_MODULE",
+ "QT3_SUPPORT",
+ "QT3_SUPPORT_CONSTRUCTOR",
+ "QT3_MOC_SUPPORT",
+ "QDOC_PROPERTY"
+};
+
+static const int KwordHashTableSize = 4096;
+static int kwordHashTable[KwordHashTableSize];
+
+static QHash<QByteArray, bool> *ignoredTokensAndDirectives = 0;
+
+static QRegExp *comment = 0;
+static QRegExp *versionX = 0;
+static QRegExp *definedX = 0;
+
+static QRegExp *defines = 0;
+static QRegExp *falsehoods = 0;
+
+/*
+ This function is a perfect hash function for the 37 keywords of C99
+ (with a hash table size of 512). It should perform well on our
+ Qt-enhanced C++ subset.
+*/
+static int hashKword(const char *s, int len)
+{
+ return (((uchar) s[0]) + (((uchar) s[2]) << 5) +
+ (((uchar) s[len - 1]) << 3)) % KwordHashTableSize;
+}
+
+static void insertKwordIntoHash(const char *s, int number)
+{
+ int k = hashKword(s, strlen(s));
+ while (kwordHashTable[k]) {
+ if (++k == KwordHashTableSize)
+ k = 0;
+ }
+ kwordHashTable[k] = number;
+}
+
+Tokenizer::Tokenizer(const Location& loc, FILE *in)
+{
+ init();
+ QFile file;
+ file.open(in, QIODevice::ReadOnly);
+ yyIn = file.readAll();
+ file.close();
+ yyPos = 0;
+ start(loc);
+}
+
+Tokenizer::Tokenizer(const Location& loc, const QByteArray &in)
+ : yyIn(in)
+{
+ init();
+ yyPos = 0;
+ start(loc);
+}
+
+Tokenizer::~Tokenizer()
+{
+ delete[] yyLexBuf1;
+ delete[] yyLexBuf2;
+}
+
+int Tokenizer::getToken()
+{
+ char *t = yyPrevLex;
+ yyPrevLex = yyLex;
+ yyLex = t;
+
+ while (yyCh != EOF) {
+ yyTokLoc = yyCurLoc;
+ yyLexLen = 0;
+
+ if (isspace(yyCh)) {
+ do {
+ yyCh = getChar();
+ } while (isspace(yyCh));
+ }
+ else if (isalpha(yyCh) || yyCh == '_') {
+ do {
+ yyCh = getChar();
+ } while (isalnum(yyCh) || yyCh == '_');
+
+ int k = hashKword(yyLex, yyLexLen);
+ for (;;) {
+ int i = kwordHashTable[k];
+ if (i == 0) {
+ return Tok_Ident;
+ }
+ else if (i == -1) {
+ if (!parsingMacro && ignoredTokensAndDirectives->contains(yyLex)) {
+ if (ignoredTokensAndDirectives->value(yyLex)) { // it's a directive
+ int parenDepth = 0;
+ while (yyCh != EOF && (yyCh != ')' || parenDepth > 1)) {
+ if (yyCh == '(')
+ ++parenDepth;
+ else if (yyCh == ')')
+ --parenDepth;
+ yyCh = getChar();
+ }
+ if (yyCh == ')')
+ yyCh = getChar();
+ }
+ break;
+ }
+ }
+ else if (strcmp(yyLex, kwords[i - 1]) == 0) {
+ int ret = (int) Tok_FirstKeyword + i - 1;
+ if (ret != Tok_explicit && ret != Tok_inline && ret != Tok_typename)
+ return ret;
+ break;
+ }
+
+ if (++k == KwordHashTableSize)
+ k = 0;
+ }
+ }
+ else if (isdigit(yyCh)) {
+ do {
+ yyCh = getChar();
+ } while (isalnum(yyCh) || yyCh == '.' || yyCh == '+' ||
+ yyCh == '-');
+ return Tok_Number;
+ }
+ else {
+ switch (yyCh) {
+ case '!':
+ case '%':
+ yyCh = getChar();
+ if (yyCh == '=')
+ yyCh = getChar();
+ return Tok_SomeOperator;
+ case '"':
+ yyCh = getChar();
+
+ while (yyCh != EOF && yyCh != '"') {
+ if (yyCh == '\\')
+ yyCh = getChar();
+ yyCh = getChar();
+ }
+ yyCh = getChar();
+
+ if (yyCh == EOF)
+ yyTokLoc.warning(tr("Unterminated C++ string literal"),
+ tr("Maybe you forgot '/*!' at the beginning of the file?"));
+ else
+ return Tok_String;
+ break;
+ case '#':
+ return getTokenAfterPreprocessor();
+ case '&':
+ yyCh = getChar();
+ if (yyCh == '&' || yyCh == '=') {
+ yyCh = getChar();
+ return Tok_SomeOperator;
+ }
+ else {
+ return Tok_Ampersand;
+ }
+ case '\'':
+ yyCh = getChar();
+ if (yyCh == '\\')
+ yyCh = getChar();
+ do {
+ yyCh = getChar();
+ } while (yyCh != EOF && yyCh != '\'');
+
+ if (yyCh == EOF) {
+ yyTokLoc.warning(tr("Unterminated C++ character"
+ " literal"));
+ }
+ else {
+ yyCh = getChar();
+ return Tok_Number;
+ }
+ break;
+ case '(':
+ yyCh = getChar();
+ if (yyNumPreprocessorSkipping == 0)
+ yyParenDepth++;
+ if (isspace(yyCh)) {
+ do {
+ yyCh = getChar();
+ } while (isspace(yyCh));
+ yyLexLen = 1;
+ yyLex[1] = '\0';
+ }
+ if (yyCh == '*') {
+ yyCh = getChar();
+ return Tok_LeftParenAster;
+ }
+ return Tok_LeftParen;
+ case ')':
+ yyCh = getChar();
+ if (yyNumPreprocessorSkipping == 0)
+ yyParenDepth--;
+ return Tok_RightParen;
+ case '*':
+ yyCh = getChar();
+ if (yyCh == '=') {
+ yyCh = getChar();
+ return Tok_SomeOperator;
+ } else {
+ return Tok_Aster;
+ }
+ case '^':
+ yyCh = getChar();
+ if (yyCh == '=') {
+ yyCh = getChar();
+ return Tok_SomeOperator;
+ } else {
+ return Tok_Caret;
+ }
+ case '+':
+ yyCh = getChar();
+ if (yyCh == '+' || yyCh == '=')
+ yyCh = getChar();
+ return Tok_SomeOperator;
+ case ',':
+ yyCh = getChar();
+ return Tok_Comma;
+ case '-':
+ yyCh = getChar();
+ if (yyCh == '-' || yyCh == '=') {
+ yyCh = getChar();
+ } else if (yyCh == '>') {
+ yyCh = getChar();
+ if (yyCh == '*')
+ yyCh = getChar();
+ }
+ return Tok_SomeOperator;
+ case '.':
+ yyCh = getChar();
+ if (yyCh == '*') {
+ yyCh = getChar();
+ } else if (yyCh == '.') {
+ do {
+ yyCh = getChar();
+ } while (yyCh == '.');
+ return Tok_Ellipsis;
+ } else if (isdigit(yyCh)) {
+ do {
+ yyCh = getChar();
+ } while (isalnum(yyCh) || yyCh == '.' || yyCh == '+' ||
+ yyCh == '-');
+ return Tok_Number;
+ }
+ return Tok_SomeOperator;
+ case '/':
+ yyCh = getChar();
+ if (yyCh == '/') {
+ do {
+ yyCh = getChar();
+ } while (yyCh != EOF && yyCh != '\n');
+ } else if (yyCh == '*') {
+ bool metDoc = false; // empty doc is no doc
+ bool metSlashAsterBang = false;
+ bool metAster = false;
+ bool metAsterSlash = false;
+
+ yyCh = getChar();
+ if (yyCh == '!')
+ metSlashAsterBang = true;
+
+ while (!metAsterSlash) {
+ if (yyCh == EOF) {
+ yyTokLoc.warning(tr("Unterminated C++ comment"));
+ break;
+ } else {
+ if (yyCh == '*') {
+ metAster = true;
+ } else if (metAster && yyCh == '/') {
+ metAsterSlash = true;
+ } else {
+ metAster = false;
+ if (isgraph(yyCh))
+ metDoc = true;
+ }
+ }
+ yyCh = getChar();
+ }
+ if (metSlashAsterBang && metDoc)
+ return Tok_Doc;
+ else if (yyParenDepth > 0)
+ return Tok_Comment;
+ } else {
+ if (yyCh == '=')
+ yyCh = getChar();
+ return Tok_SomeOperator;
+ }
+ break;
+ case ':':
+ yyCh = getChar();
+ if (yyCh == ':') {
+ yyCh = getChar();
+ return Tok_Gulbrandsen;
+ } else {
+ return Tok_Colon;
+ }
+ case ';':
+ yyCh = getChar();
+ return Tok_Semicolon;
+ case '<':
+ yyCh = getChar();
+ if (yyCh == '<') {
+ yyCh = getChar();
+ if (yyCh == '=')
+ yyCh = getChar();
+ return Tok_SomeOperator;
+ } else if (yyCh == '=') {
+ yyCh = getChar();
+ return Tok_SomeOperator;
+ } else {
+ return Tok_LeftAngle;
+ }
+ case '=':
+ yyCh = getChar();
+ if (yyCh == '=') {
+ yyCh = getChar();
+ return Tok_SomeOperator;
+ } else {
+ return Tok_Equal;
+ }
+ case '>':
+ yyCh = getChar();
+ if (yyCh == '>') {
+ yyCh = getChar();
+ if (yyCh == '=')
+ yyCh = getChar();
+ return Tok_SomeOperator;
+ } else if (yyCh == '=') {
+ yyCh = getChar();
+ return Tok_SomeOperator;
+ } else {
+ return Tok_RightAngle;
+ }
+ case '?':
+ yyCh = getChar();
+ return Tok_SomeOperator;
+ case '[':
+ yyCh = getChar();
+ if (yyNumPreprocessorSkipping == 0)
+ yyBracketDepth++;
+ return Tok_LeftBracket;
+ case '\\':
+ yyCh = getChar();
+ yyCh = getChar(); // skip one character
+ break;
+ case ']':
+ yyCh = getChar();
+ if (yyNumPreprocessorSkipping == 0)
+ yyBracketDepth--;
+ return Tok_RightBracket;
+ case '{':
+ yyCh = getChar();
+ if (yyNumPreprocessorSkipping == 0)
+ yyBraceDepth++;
+ return Tok_LeftBrace;
+ case '}':
+ yyCh = getChar();
+ if (yyNumPreprocessorSkipping == 0)
+ yyBraceDepth--;
+ return Tok_RightBrace;
+ case '|':
+ yyCh = getChar();
+ if (yyCh == '|' || yyCh == '=')
+ yyCh = getChar();
+ return Tok_SomeOperator;
+ case '~':
+ yyCh = getChar();
+ return Tok_Tilde;
+ case '@':
+ yyCh = getChar();
+ return Tok_At;
+ default:
+ // ### We should really prevent qdoc from looking at snippet files rather than
+ // ### suppress warnings when reading them.
+ if (yyNumPreprocessorSkipping == 0 && !yyTokLoc.fileName().endsWith(".qdoc")) {
+ yyTokLoc.warning(tr("Hostile character 0x%1 in C++ source")
+ .arg((uchar)yyCh, 1, 16));
+ }
+ yyCh = getChar();
+ }
+ }
+ }
+
+ if (yyPreprocessorSkipping.count() > 1) {
+ yyTokLoc.warning(tr("Expected #endif before end of file"));
+ // clear it out or we get an infinite loop!
+ while (!yyPreprocessorSkipping.isEmpty()) {
+ popSkipping();
+ }
+ }
+
+ strcpy(yyLex, "end-of-input");
+ yyLexLen = strlen(yyLex);
+ return Tok_Eoi;
+}
+
+void Tokenizer::initialize(const Config &config)
+{
+ QString versionSym = config.getString(CONFIG_VERSIONSYM);
+
+ comment = new QRegExp("/(?:\\*.*\\*/|/.*\n|/[^\n]*$)");
+ comment->setMinimal(true);
+ versionX = new QRegExp("$cannot possibly match^");
+ if (!versionSym.isEmpty())
+ versionX->setPattern("[ \t]*(?:" + QRegExp::escape(versionSym)
+ + ")[ \t]+\"([^\"]*)\"[ \t]*");
+ definedX = new QRegExp("defined ?\\(?([A-Z_0-9a-z]+) ?\\)");
+
+ QStringList d = config.getStringList(CONFIG_DEFINES);
+ d += "qdoc";
+ defines = new QRegExp(d.join("|"));
+ falsehoods = new QRegExp(config.getStringList(CONFIG_FALSEHOODS).join("|"));
+
+ memset(kwordHashTable, 0, sizeof(kwordHashTable));
+ for (int i = 0; i < Tok_LastKeyword - Tok_FirstKeyword + 1; i++)
+ insertKwordIntoHash(kwords[i], i + 1);
+
+ ignoredTokensAndDirectives = new QHash<QByteArray, bool>;
+
+ QStringList tokens = config.getStringList(LANGUAGE_CPP + Config::dot + CONFIG_IGNORETOKENS);
+ foreach (const QString &t, tokens) {
+ const QByteArray tb = t.toAscii();
+ ignoredTokensAndDirectives->insert(tb, false);
+ insertKwordIntoHash(tb.data(), -1);
+ }
+
+ QStringList directives = config.getStringList(LANGUAGE_CPP + Config::dot
+ + CONFIG_IGNOREDIRECTIVES);
+ foreach (const QString &d, directives) {
+ const QByteArray db = d.toAscii();
+ ignoredTokensAndDirectives->insert(db, true);
+ insertKwordIntoHash(db.data(), -1);
+ }
+}
+
+void Tokenizer::terminate()
+{
+ delete comment;
+ comment = 0;
+ delete versionX;
+ versionX = 0;
+ delete definedX;
+ definedX = 0;
+ delete defines;
+ defines = 0;
+ delete falsehoods;
+ falsehoods = 0;
+ delete ignoredTokensAndDirectives;
+ ignoredTokensAndDirectives = 0;
+}
+
+void Tokenizer::init()
+{
+ yyLexBuf1 = new char[(int) yyLexBufSize];
+ yyLexBuf2 = new char[(int) yyLexBufSize];
+ yyPrevLex = yyLexBuf1;
+ yyPrevLex[0] = '\0';
+ yyLex = yyLexBuf2;
+ yyLex[0] = '\0';
+ yyLexLen = 0;
+ yyPreprocessorSkipping.push(false);
+ yyNumPreprocessorSkipping = 0;
+ yyBraceDepth = 0;
+ yyParenDepth = 0;
+ yyBracketDepth = 0;
+ yyCh = '\0';
+ parsingMacro = false;
+}
+
+void Tokenizer::start(const Location& loc)
+{
+ yyTokLoc = loc;
+ yyCurLoc = loc;
+ yyCurLoc.start();
+ strcpy(yyPrevLex, "beginning-of-input");
+ strcpy(yyLex, "beginning-of-input");
+ yyLexLen = strlen(yyLex);
+ yyBraceDepth = 0;
+ yyParenDepth = 0;
+ yyBracketDepth = 0;
+ yyCh = '\0';
+ yyCh = getChar();
+}
+
+/*
+ Returns the next token, if # was met. This function interprets the
+ preprocessor directive, skips over any #ifdef'd out tokens, and returns the
+ token after all of that.
+*/
+int Tokenizer::getTokenAfterPreprocessor()
+{
+ yyCh = getChar();
+ while (isspace(yyCh) && yyCh != '\n')
+ yyCh = getChar();
+
+ /*
+ #directive condition
+ */
+ QString directive;
+ QString condition;
+
+ while (isalpha(yyCh)) {
+ directive += QChar(yyCh);
+ yyCh = getChar();
+ }
+ if (!directive.isEmpty()) {
+ while (yyCh != EOF && yyCh != '\n') {
+ if (yyCh == '\\')
+ yyCh = getChar();
+ condition += yyCh;
+ yyCh = getChar();
+ }
+ condition.replace(*comment, "");
+ condition = condition.simplified();
+
+ /*
+ The #if, #ifdef, #ifndef, #elif, #else, and #endif
+ directives have an effect on the skipping stack. For
+ instance, if the code processed so far is
+
+ #if 1
+ #if 0
+ #if 1
+ // ...
+ #else
+
+ the skipping stack contains, from bottom to top, false true
+ true (assuming 0 is false and 1 is true). If at least one
+ entry of the stack is true, the tokens are skipped.
+
+ This mechanism is simple yet hard to understand.
+ */
+ if (directive[0] == QChar('i')) {
+ if (directive == QString("if"))
+ pushSkipping(!isTrue(condition));
+ else if (directive == QString("ifdef"))
+ pushSkipping(!defines->exactMatch(condition));
+ else if (directive == QString("ifndef"))
+ pushSkipping(defines->exactMatch(condition));
+ } else if (directive[0] == QChar('e')) {
+ if (directive == QString("elif")) {
+ bool old = popSkipping();
+ if (old)
+ pushSkipping(!isTrue(condition));
+ else
+ pushSkipping(true);
+ } else if (directive == QString("else")) {
+ pushSkipping(!popSkipping());
+ } else if (directive == QString("endif")) {
+ popSkipping();
+ }
+ } else if (directive == QString("define")) {
+ if (versionX->exactMatch(condition))
+ yyVersion = versionX->cap(1);
+ }
+ }
+
+ int tok;
+ do {
+ /*
+ We set yyLex now, and after getToken() this will be
+ yyPrevLex. This way, we skip over the preprocessor
+ directive.
+ */
+ qstrcpy(yyLex, yyPrevLex);
+
+ /*
+ If getToken() meets another #, it will call
+ getTokenAfterPreprocessor() once again, which could in turn
+ call getToken() again, etc. Unless there are 10,000 or so
+ preprocessor directives in a row, this shouldn't overflow
+ the stack.
+ */
+ tok = getToken();
+ } while (yyNumPreprocessorSkipping > 0);
+ return tok;
+}
+
+/*
+ Pushes a new skipping value onto the stack. This corresponds to entering a
+ new #if block.
+*/
+void Tokenizer::pushSkipping(bool skip)
+{
+ yyPreprocessorSkipping.push(skip);
+ if (skip)
+ yyNumPreprocessorSkipping++;
+}
+
+/*
+ Pops a skipping value from the stack. This corresponds to reaching a #endif.
+*/
+bool Tokenizer::popSkipping()
+{
+ if (yyPreprocessorSkipping.isEmpty()) {
+ yyTokLoc.warning(tr("Unexpected #elif, #else or #endif"));
+ return true;
+ }
+
+ bool skip = yyPreprocessorSkipping.pop();
+ if (skip)
+ yyNumPreprocessorSkipping--;
+ return skip;
+}
+
+/*
+ Returns true if the condition evaluates as true, otherwise false. The
+ condition is represented by a string. Unsophisticated parsing techniques are
+ used. The preprocessing method could be named StriNg-Oriented PreProcessing,
+ as SNOBOL stands for StriNg-Oriented symBOlic Language.
+*/
+bool Tokenizer::isTrue(const QString &condition)
+{
+ int firstOr = -1;
+ int firstAnd = -1;
+ int parenDepth = 0;
+
+ /*
+ Find the first logical operator at top level, but be careful
+ about precedence. Examples:
+
+ X || Y // the or
+ X || Y || Z // the leftmost or
+ X || Y && Z // the or
+ X && Y || Z // the or
+ (X || Y) && Z // the and
+ */
+ for (int i = 0; i < (int) condition.length() - 1; i++) {
+ QChar ch = condition[i];
+ if (ch == QChar('(')) {
+ parenDepth++;
+ } else if (ch == QChar(')')) {
+ parenDepth--;
+ } else if (parenDepth == 0) {
+ if (condition[i + 1] == ch) {
+ if (ch == QChar('|')) {
+ firstOr = i;
+ break;
+ } else if (ch == QChar('&')) {
+ if (firstAnd == -1)
+ firstAnd = i;
+ }
+ }
+ }
+ }
+ if (firstOr != -1)
+ return isTrue(condition.left(firstOr)) ||
+ isTrue(condition.mid(firstOr + 2));
+ if (firstAnd != -1)
+ return isTrue(condition.left(firstAnd)) &&
+ isTrue(condition.mid(firstAnd + 2));
+
+ QString t = condition.simplified();
+ if (t.isEmpty())
+ return true;
+
+ if (t[0] == QChar('!'))
+ return !isTrue(t.mid(1));
+ if (t[0] == QChar('(') && t.right(1)[0] == QChar(')'))
+ return isTrue(t.mid(1, t.length() - 2));
+
+ if (definedX->exactMatch(t))
+ return defines->exactMatch(definedX->cap(1));
+ else
+ return !falsehoods->exactMatch(t);
+}
+
+QT_END_NAMESPACE