/****************************************************************************
**
** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
** All rights reserved.
** Contact: Nokia Corporation (qt-info@nokia.com)
**
** This file is part of the tools applications of the Qt Toolkit.
**
** $QT_BEGIN_LICENSE:LGPL$
** No Commercial Usage
** This file contains pre-release code and may not be distributed.
** You may use this file in accordance with the terms and conditions
** contained in the Technology Preview License Agreement accompanying
** this package.
**
** GNU Lesser General Public License Usage
** Alternatively, this file may be used under the terms of the GNU Lesser
** General Public License version 2.1 as published by the Free Software
** Foundation and appearing in the file LICENSE.LGPL included in the
** packaging of this file. Please review the following information to
** ensure the GNU Lesser General Public License version 2.1 requirements
** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
**
** In addition, as a special exception, Nokia gives you certain additional
** rights. These rights are described in the Nokia Qt LGPL Exception
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
**
** If you have questions regarding the use of this file, please contact
** Nokia at qt-info@nokia.com.
**
**
**
**
**
**
**
**
** $QT_END_LICENSE$
**
****************************************************************************/
#include "config.h"
#include "tokenizer.h"
#include <qdebug.h>
#include <qfile.h>
#include <qhash.h>
#include <qregexp.h>
#include <qstring.h>
#include <ctype.h>
#include <string.h>
QT_BEGIN_NAMESPACE
#define LANGUAGE_CPP "Cpp"
/* qmake ignore Q_OBJECT */
/*
Keep in sync with tokenizer.h.
*/
static const char *kwords[] = {
"char", "class", "const", "double", "enum", "explicit",
"friend", "inline", "int", "long", "namespace", "operator",
"private", "protected", "public", "short", "signals", "signed",
"slots", "static", "struct", "template", "typedef", "typename",
"union", "unsigned", "using", "virtual", "void", "volatile",
"__int64", "Q_OBJECT", "Q_OVERRIDE", "Q_PROPERTY",
"Q_DECLARE_SEQUENTIAL_ITERATOR",
"Q_DECLARE_MUTABLE_SEQUENTIAL_ITERATOR",
"Q_DECLARE_ASSOCIATIVE_ITERATOR",
"Q_DECLARE_MUTABLE_ASSOCIATIVE_ITERATOR",
"Q_DECLARE_FLAGS",
"Q_SIGNALS",
"Q_SLOTS",
"QT_COMPAT",
"QT_COMPAT_CONSTRUCTOR",
"QT_DEPRECATED",
"QT_MOC_COMPAT",
"QT_MODULE",
"QT3_SUPPORT",
"QT3_SUPPORT_CONSTRUCTOR",
"QT3_MOC_SUPPORT",
"QDOC_PROPERTY"
};
static const int KwordHashTableSize = 4096;
static int kwordHashTable[KwordHashTableSize];
static QHash<QByteArray, bool> *ignoredTokensAndDirectives = 0;
static QRegExp *comment = 0;
static QRegExp *versionX = 0;
static QRegExp *definedX = 0;
static QRegExp *defines = 0;
static QRegExp *falsehoods = 0;
/*
This function is a perfect hash function for the 37 keywords of C99
(with a hash table size of 512). It should perform well on our
Qt-enhanced C++ subset.
*/
static int hashKword(const char *s, int len)
{
return (((uchar) s[0]) + (((uchar) s[2]) << 5) +
(((uchar) s[len - 1]) << 3)) % KwordHashTableSize;
}
static void insertKwordIntoHash(const char *s, int number)
{
int k = hashKword(s, strlen(s));
while (kwordHashTable[k]) {
if (++k == KwordHashTableSize)
k = 0;
}
kwordHashTable[k] = number;
}
Tokenizer::Tokenizer(const Location& loc, FILE *in)
{
init();
QFile file;
file.open(in, QIODevice::ReadOnly);
yyIn = file.readAll();
file.close();
yyPos = 0;
start(loc);
}
Tokenizer::Tokenizer(const Location& loc, const QByteArray &in)
: yyIn(in)
{
init();
yyPos = 0;
start(loc);
}
Tokenizer::~Tokenizer()
{
delete[] yyLexBuf1;
delete[] yyLexBuf2;
}
int Tokenizer::getToken()
{
char *t = yyPrevLex;
yyPrevLex = yyLex;
yyLex = t;
while (yyCh != EOF) {
yyTokLoc = yyCurLoc;
yyLexLen = 0;
if (isspace(yyCh)) {
do {
yyCh = getChar();
} while (isspace(yyCh));
}
else if (isalpha(yyCh) || yyCh == '_') {
do {
yyCh = getChar();
} while (isalnum(yyCh) || yyCh == '_');
int k = hashKword(yyLex, yyLexLen);
for (;;) {
int i = kwordHashTable[k];
if (i == 0) {
return Tok_Ident;
}
else if (i == -1) {
if (!parsingMacro && ignoredTokensAndDirectives->contains(yyLex)) {
if (ignoredTokensAndDirectives->value(yyLex)) { // it's a directive
int parenDepth = 0;
while (yyCh != EOF && (yyCh != ')' || parenDepth > 1)) {
if (yyCh == '(')
++parenDepth;
else if (yyCh == ')')
--parenDepth;
yyCh = getChar();
}
if (yyCh == ')')
yyCh = getChar();
}
break;
}
}
else if (strcmp(yyLex, kwords[i - 1]) == 0) {
int ret = (int) Tok_FirstKeyword + i - 1;
if (ret != Tok_explicit && ret != Tok_inline && ret != Tok_typename)
return ret;
break;
}
if (++k == KwordHashTableSize)
k = 0;
}
}
else if (isdigit(yyCh)) {
do {
yyCh = getChar();
} while (isalnum(yyCh) || yyCh == '.' || yyCh == '+' ||
yyCh == '-');
return Tok_Number;
}
else {
switch (yyCh) {
case '!':
case '%':
yyCh = getChar();
if (yyCh == '=')
yyCh = getChar();
return Tok_SomeOperator;
case '"':
yyCh = getChar();
while (yyCh != EOF && yyCh != '"') {
if (yyCh == '\\')
yyCh = getChar();
yyCh = getChar();
}
yyCh = getChar();
if (yyCh == EOF)
yyTokLoc.warning(tr("Unterminated C++ string literal"),
tr("Maybe you forgot '/*!' at the beginning of the file?"));
else
return Tok_String;
break;
case '#':
return getTokenAfterPreprocessor();
case '&':
yyCh = getChar();
if (yyCh == '&' || yyCh == '=') {
yyCh = getChar();
return Tok_SomeOperator;
}
else {
return Tok_Ampersand;
}
case '\'':
yyCh = getChar();
if (yyCh == '\\')
yyCh = getChar();
do {
yyCh = getChar();
} while (yyCh != EOF && yyCh != '\'');
if (yyCh == EOF) {
yyTokLoc.warning(tr("Unterminated C++ character"
" literal"));
}
else {
yyCh = getChar();
return Tok_Number;
}
break;
case '(':
yyCh = getChar();
if (yyNumPreprocessorSkipping == 0)
yyParenDepth++;
if (isspace(yyCh)) {
do {
yyCh = getChar();
} while (isspace(yyCh));
yyLexLen = 1;
yyLex[1] = '\0';
}
if (yyCh == '*') {
yyCh = getChar();
return Tok_LeftParenAster;
}
return Tok_LeftParen;
case ')':
yyCh = getChar();
if (yyNumPreprocessorSkipping == 0)
yyParenDepth--;
return Tok_RightParen;
case '*':
yyCh = getChar();
if (yyCh == '=') {
yyCh = getChar();
return Tok_SomeOperator;
} else {
return Tok_Aster;
}
case '^':
yyCh = getChar();
if (yyCh == '=') {
yyCh = getChar();
return Tok_SomeOperator;
} else {
return Tok_Caret;
}
case '+':
yyCh = getChar();
if (yyCh == '+' || yyCh == '=')
yyCh = getChar();
return Tok_SomeOperator;
case ',':
yyCh = getChar();
return Tok_Comma;
case '-':
yyCh = getChar();
if (yyCh == '-' || yyCh == '=') {
yyCh = getChar();
} else if (yyCh == '>') {
yyCh = getChar();
if (yyCh == '*')
yyCh = getChar();
}
return Tok_SomeOperator;
case '.':
yyCh = getChar();
if (yyCh == '*') {
yyCh = getChar();
} else if (yyCh == '.') {
do {
yyCh = getChar();
} while (yyCh == '.');
return Tok_Ellipsis;
} else if (isdigit(yyCh)) {
do {
yyCh = getChar();
} while (isalnum(yyCh) || yyCh == '.' || yyCh == '+' ||
yyCh == '-');
return Tok_Number;
}
return Tok_SomeOperator;
case '/':
yyCh = getChar();
if (yyCh == '/') {
do {
yyCh = getChar();
} while (yyCh != EOF && yyCh != '\n');
} else if (yyCh == '*') {
bool metDoc = false; // empty doc is no doc
bool metSlashAsterBang = false;
bool metAster = false;
bool metAsterSlash = false;
yyCh = getChar();
if (yyCh == '!')
metSlashAsterBang = true;
while (!metAsterSlash) {
if (yyCh == EOF) {
yyTokLoc.warning(tr("Unterminated C++ comment"));
break;
} else {
if (yyCh == '*') {
metAster = true;
} else if (metAster && yyCh == '/') {
metAsterSlash = true;
} else {
metAster = false;
if (isgraph(yyCh))
metDoc = true;
}
}
yyCh = getChar();
}
if (metSlashAsterBang && metDoc)
return Tok_Doc;
else if (yyParenDepth > 0)
return Tok_Comment;
} else {
if (yyCh == '=')
yyCh = getChar();
return Tok_SomeOperator;
}
break;
case ':':
yyCh = getChar();
if (yyCh == ':') {
yyCh = getChar();
return Tok_Gulbrandsen;
} else {
return Tok_Colon;
}
case ';':
yyCh = getChar();
return Tok_Semicolon;
case '<':
yyCh = getChar();
if (yyCh == '<') {
yyCh = getChar();
if (yyCh == '=')
yyCh = getChar();
return Tok_SomeOperator;
} else if (yyCh == '=') {
yyCh = getChar();
return Tok_SomeOperator;
} else {
return Tok_LeftAngle;
}
case '=':
yyCh = getChar();
if (yyCh == '=') {
yyCh = getChar();
return Tok_SomeOperator;
} else {
return Tok_Equal;
}
case '>':
yyCh = getChar();
if (yyCh == '>') {
yyCh = getChar();
if (yyCh == '=')
yyCh = getChar();
return Tok_SomeOperator;
} else if (yyCh == '=') {
yyCh = getChar();
return Tok_SomeOperator;
} else {
return Tok_RightAngle;
}
case '?':
yyCh = getChar();
return Tok_SomeOperator;
case '[':
yyCh = getChar();
if (yyNumPreprocessorSkipping == 0)
yyBracketDepth++;
return Tok_LeftBracket;
case '\\':
yyCh = getChar();
yyCh = getChar(); // skip one character
break;
case ']':
yyCh = getChar();
if (yyNumPreprocessorSkipping == 0)
yyBracketDepth--;
return Tok_RightBracket;
case '{':
yyCh = getChar();
if (yyNumPreprocessorSkipping == 0)
yyBraceDepth++;
return Tok_LeftBrace;
case '}':
yyCh = getChar();
if (yyNumPreprocessorSkipping == 0)
yyBraceDepth--;
return Tok_RightBrace;
case '|':
yyCh = getChar();
if (yyCh == '|' || yyCh == '=')
yyCh = getChar();
return Tok_SomeOperator;
case '~':
yyCh = getChar();
return Tok_Tilde;
case '@':
yyCh = getChar();
return Tok_At;
default:
// ### We should really prevent qdoc from looking at snippet files rather than
// ### suppress warnings when reading them.
if (yyNumPreprocessorSkipping == 0 && !yyTokLoc.fileName().endsWith(".qdoc")) {
yyTokLoc.warning(tr("Hostile character 0x%1 in C++ source")
.arg((uchar)yyCh, 1, 16));
}
yyCh = getChar();
}
}
}
if (yyPreprocessorSkipping.count() > 1) {
yyTokLoc.warning(tr("Expected #endif before end of file"));
// clear it out or we get an infinite loop!
while (!yyPreprocessorSkipping.isEmpty()) {
popSkipping();
}
}
strcpy(yyLex, "end-of-input");
yyLexLen = strlen(yyLex);
return Tok_Eoi;
}
void Tokenizer::initialize(const Config &config)
{
QString versionSym = config.getString(CONFIG_VERSIONSYM);
comment = new QRegExp("/(?:\\*.*\\*/|/.*\n|/[^\n]*$)");
comment->setMinimal(true);
versionX = new QRegExp("$cannot possibly match^");
if (!versionSym.isEmpty())
versionX->setPattern("[ \t]*(?:" + QRegExp::escape(versionSym)
+ ")[ \t]+\"([^\"]*)\"[ \t]*");
definedX = new QRegExp("defined ?\\(?([A-Z_0-9a-z]+) ?\\)");
QStringList d = config.getStringList(CONFIG_DEFINES);
d += "qdoc";
defines = new QRegExp(d.join("|"));
falsehoods = new QRegExp(config.getStringList(CONFIG_FALSEHOODS).join("|"));
memset(kwordHashTable, 0, sizeof(kwordHashTable));
for (int i = 0; i < Tok_LastKeyword - Tok_FirstKeyword + 1; i++)
insertKwordIntoHash(kwords[i], i + 1);
ignoredTokensAndDirectives = new QHash<QByteArray, bool>;
QStringList tokens = config.getStringList(LANGUAGE_CPP + Config::dot + CONFIG_IGNORETOKENS);
foreach (const QString &t, tokens) {
const QByteArray tb = t.toAscii();
ignoredTokensAndDirectives->insert(tb, false);
insertKwordIntoHash(tb.data(), -1);
}
QStringList directives = config.getStringList(LANGUAGE_CPP + Config::dot
+ CONFIG_IGNOREDIRECTIVES);
foreach (const QString &d, directives) {
const QByteArray db = d.toAscii();
ignoredTokensAndDirectives->insert(db, true);
insertKwordIntoHash(db.data(), -1);
}
}
void Tokenizer::terminate()
{
delete comment;
comment = 0;
delete versionX;
versionX = 0;
delete definedX;
definedX = 0;
delete defines;
defines = 0;
delete falsehoods;
falsehoods = 0;
delete ignoredTokensAndDirectives;
ignoredTokensAndDirectives = 0;
}
void Tokenizer::init()
{
yyLexBuf1 = new char[(int) yyLexBufSize];
yyLexBuf2 = new char[(int) yyLexBufSize];
yyPrevLex = yyLexBuf1;
yyPrevLex[0] = '\0';
yyLex = yyLexBuf2;
yyLex[0] = '\0';
yyLexLen = 0;
yyPreprocessorSkipping.push(false);
yyNumPreprocessorSkipping = 0;
yyBraceDepth = 0;
yyParenDepth = 0;
yyBracketDepth = 0;
yyCh = '\0';
parsingMacro = false;
}
void Tokenizer::start(const Location& loc)
{
yyTokLoc = loc;
yyCurLoc = loc;
yyCurLoc.start();
strcpy(yyPrevLex, "beginning-of-input");
strcpy(yyLex, "beginning-of-input");
yyLexLen = strlen(yyLex);
yyBraceDepth = 0;
yyParenDepth = 0;
yyBracketDepth = 0;
yyCh = '\0';
yyCh = getChar();
}
/*
Returns the next token, if # was met. This function interprets the
preprocessor directive, skips over any #ifdef'd out tokens, and returns the
token after all of that.
*/
int Tokenizer::getTokenAfterPreprocessor()
{
yyCh = getChar();
while (isspace(yyCh) && yyCh != '\n')
yyCh = getChar();
/*
#directive condition
*/
QString directive;
QString condition;
while (isalpha(yyCh)) {
directive += QChar(yyCh);
yyCh = getChar();
}
if (!directive.isEmpty()) {
while (yyCh != EOF && yyCh != '\n') {
if (yyCh == '\\')
yyCh = getChar();
condition += yyCh;
yyCh = getChar();
}
condition.replace(*comment, "");
condition = condition.simplified();
/*
The #if, #ifdef, #ifndef, #elif, #else, and #endif
directives have an effect on the skipping stack. For
instance, if the code processed so far is
#if 1
#if 0
#if 1
// ...
#else
the skipping stack contains, from bottom to top, false true
true (assuming 0 is false and 1 is true). If at least one
entry of the stack is true, the tokens are skipped.
This mechanism is simple yet hard to understand.
*/
if (directive[0] == QChar('i')) {
if (directive == QString("if"))
pushSkipping(!isTrue(condition));
else if (directive == QString("ifdef"))
pushSkipping(!defines->exactMatch(condition));
else if (directive == QString("ifndef"))
pushSkipping(defines->exactMatch(condition));
} else if (directive[0] == QChar('e')) {
if (directive == QString("elif")) {
bool old = popSkipping();
if (old)
pushSkipping(!isTrue(condition));
else
pushSkipping(true);
} else if (directive == QString("else")) {
pushSkipping(!popSkipping());
} else if (directive == QString("endif")) {
popSkipping();
}
} else if (directive == QString("define")) {
if (versionX->exactMatch(condition))
yyVersion = versionX->cap(1);
}
}
int tok;
do {
/*
We set yyLex now, and after getToken() this will be
yyPrevLex. This way, we skip over the preprocessor
directive.
*/
qstrcpy(yyLex, yyPrevLex);
/*
If getToken() meets another #, it will call
getTokenAfterPreprocessor() once again, which could in turn
call getToken() again, etc. Unless there are 10,000 or so
preprocessor directives in a row, this shouldn't overflow
the stack.
*/
tok = getToken();
} while (yyNumPreprocessorSkipping > 0);
return tok;
}
/*
Pushes a new skipping value onto the stack. This corresponds to entering a
new #if block.
*/
void Tokenizer::pushSkipping(bool skip)
{
yyPreprocessorSkipping.push(skip);
if (skip)
yyNumPreprocessorSkipping++;
}
/*
Pops a skipping value from the stack. This corresponds to reaching a #endif.
*/
bool Tokenizer::popSkipping()
{
if (yyPreprocessorSkipping.isEmpty()) {
yyTokLoc.warning(tr("Unexpected #elif, #else or #endif"));
return true;
}
bool skip = yyPreprocessorSkipping.pop();
if (skip)
yyNumPreprocessorSkipping--;
return skip;
}
/*
Returns true if the condition evaluates as true, otherwise false. The
condition is represented by a string. Unsophisticated parsing techniques are
used. The preprocessing method could be named StriNg-Oriented PreProcessing,
as SNOBOL stands for StriNg-Oriented symBOlic Language.
*/
bool Tokenizer::isTrue(const QString &condition)
{
int firstOr = -1;
int firstAnd = -1;
int parenDepth = 0;
/*
Find the first logical operator at top level, but be careful
about precedence. Examples:
X || Y // the or
X || Y || Z // the leftmost or
X || Y && Z // the or
X && Y || Z // the or
(X || Y) && Z // the and
*/
for (int i = 0; i < (int) condition.length() - 1; i++) {
QChar ch = condition[i];
if (ch == QChar('(')) {
parenDepth++;
} else if (ch == QChar(')')) {
parenDepth--;
} else if (parenDepth == 0) {
if (condition[i + 1] == ch) {
if (ch == QChar('|')) {
firstOr = i;
break;
} else if (ch == QChar('&')) {
if (firstAnd == -1)
firstAnd = i;
}
}
}
}
if (firstOr != -1)
return isTrue(condition.left(firstOr)) ||
isTrue(condition.mid(firstOr + 2));
if (firstAnd != -1)
return isTrue(condition.left(firstAnd)) &&
isTrue(condition.mid(firstAnd + 2));
QString t = condition.simplified();
if (t.isEmpty())
return true;
if (t[0] == QChar('!'))
return !isTrue(t.mid(1));
if (t[0] == QChar('(') && t.right(1)[0] == QChar(')'))
return isTrue(t.mid(1, t.length() - 2));
if (definedX->exactMatch(t))
return defines->exactMatch(definedX->cap(1));
else
return !falsehoods->exactMatch(t);
}
QT_END_NAMESPACE