diff -r 000000000000 -r 1918ee327afb src/xmlpatterns/acceltree/qcompressedwhitespace.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/xmlpatterns/acceltree/qcompressedwhitespace.cpp Mon Jan 11 14:00:40 2010 +0000 @@ -0,0 +1,197 @@ +/**************************************************************************** +** +** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies). +** All rights reserved. +** Contact: Nokia Corporation (qt-info@nokia.com) +** +** This file is part of the QtXmlPatterns module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL$ +** No Commercial Usage +** This file contains pre-release code and may not be distributed. +** You may use this file in accordance with the terms and conditions +** contained in the Technology Preview License Agreement accompanying +** this package. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 2.1 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPL included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 2.1 requirements +** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. +** +** In addition, as a special exception, Nokia gives you certain additional +** rights. These rights are described in the Nokia Qt LGPL Exception +** version 1.1, included in the file LGPL_EXCEPTION.txt in this package. +** +** If you have questions regarding the use of this file, please contact +** Nokia at qt-info@nokia.com. +** +** +** +** +** +** +** +** +** $QT_END_LICENSE$ +** +****************************************************************************/ + +#include + +#include "qcompressedwhitespace_p.h" + +QT_BEGIN_NAMESPACE + +using namespace QPatternist; + +CompressedWhitespace::CharIdentifier CompressedWhitespace::toIdentifier(const QChar ch) +{ + switch(ch.unicode()) + { + case ' ': + return Space; + case '\n': + return LF; + case '\r': + return CR; + case '\t': + return Tab; + default: + { + Q_ASSERT_X(false, Q_FUNC_INFO, + "The caller must guarantee only whitespace is passed."); + return Tab; + } + } +} + +bool CompressedWhitespace::isEven(const int number) +{ + Q_ASSERT(number >= 0); + return number % 2 == 0; +} + +quint8 CompressedWhitespace::toCompressedChar(const QChar ch, const int len) +{ + Q_ASSERT(len > 0); + Q_ASSERT(len <= MaxCharCount); + + return len + toIdentifier(ch); +} + +QChar CompressedWhitespace::toChar(const CharIdentifier id) +{ + switch(id) + { + case Space: return QLatin1Char(' '); + case CR: return QLatin1Char('\r'); + case LF: return QLatin1Char('\n'); + case Tab: return QLatin1Char('\t'); + default: + { + Q_ASSERT_X(false, Q_FUNC_INFO, "Unexpected input"); + return QChar(); + } + } +} + +QString CompressedWhitespace::compress(const QStringRef &input) +{ + Q_ASSERT(!isEven(1) && isEven(0) && isEven(2)); + Q_ASSERT(!input.isEmpty()); + + QString result; + const int len = input.length(); + + /* The amount of compressed characters. For instance, if input is + * four spaces followed by one tab, compressedChars will be 2, and the resulting + * QString will have a length of 1, two compressedChars stored in one QChar. */ + int compressedChars = 0; + + for(int i = 0; i < len; ++i) + { + const QChar c(input.at(i)); + + int start = i; + + while(true) + { + if(i + 1 == input.length() || input.at(i + 1) != c) + break; + else + ++i; + } + + /* The length of subsequent whitespace characters in the input. */ + int wsLen = (i - start) + 1; + + /* We might get a sequence of whitespace that is so long, that we can't + * store it in one unit/byte. In that case we chop it into as many subsequent + * ones that is needed. */ + while(true) + { + const int unitLength = qMin(wsLen, int(MaxCharCount)); + wsLen -= unitLength; + + ushort resultCP = toCompressedChar(c, unitLength); + + if(isEven(compressedChars)) + result += QChar(resultCP); + else + { + resultCP = resultCP << 8; + resultCP |= result.at(result.size() - 1).unicode(); + result[result.size() - 1] = resultCP; + } + + ++compressedChars; + + if(wsLen == 0) + break; + } + } + + return result; +} + +QString CompressedWhitespace::decompress(const QString &input) +{ + Q_ASSERT(!input.isEmpty()); + const int len = input.length() * 2; + QString retval; + + for(int i = 0; i < len; ++i) + { + ushort cp = input.at(i / 2).unicode(); + + if(isEven(i)) + cp &= Lower8Bits; + else + { + cp = cp >> 8; + + if(cp == 0) + return retval; + } + + const quint8 wsLen = cp & Lower6Bits; + const quint8 id = cp & UpperTwoBits; + + /* Resize retval, and fill in on the top. */ + const int oldSize = retval.size(); + const int newSize = retval.size() + wsLen; + retval.resize(newSize); + const QChar ch(toChar(CharIdentifier(id))); + + for(int f = oldSize; f < newSize; ++f) + retval[f] = ch; + } + + return retval; +} + +QT_END_NAMESPACE +