/****************************************************************************
**
** Copyright (C) 2010 Nokia Corporation and/or its subsidiary(-ies).
** All rights reserved.
** Contact: Nokia Corporation (qt-info@nokia.com)
**
** This file is part of the QtXmlPatterns module of the Qt Toolkit.
**
** $QT_BEGIN_LICENSE:LGPL$
** No Commercial Usage
** This file contains pre-release code and may not be distributed.
** You may use this file in accordance with the terms and conditions
** contained in the Technology Preview License Agreement accompanying
** this package.
**
** GNU Lesser General Public License Usage
** Alternatively, this file may be used under the terms of the GNU Lesser
** General Public License version 2.1 as published by the Free Software
** Foundation and appearing in the file LICENSE.LGPL included in the
** packaging of this file. Please review the following information to
** ensure the GNU Lesser General Public License version 2.1 requirements
** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
**
** In addition, as a special exception, Nokia gives you certain additional
** rights. These rights are described in the Nokia Qt LGPL Exception
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
**
** If you have questions regarding the use of this file, please contact
** Nokia at qt-info@nokia.com.
**
**
**
**
**
**
**
**
** $QT_END_LICENSE$
**
****************************************************************************/
#include <QString>
#include "qcompressedwhitespace_p.h"
QT_BEGIN_NAMESPACE
using namespace QPatternist;
CompressedWhitespace::CharIdentifier CompressedWhitespace::toIdentifier(const QChar ch)
{
switch(ch.unicode())
{
case ' ':
return Space;
case '\n':
return LF;
case '\r':
return CR;
case '\t':
return Tab;
default:
{
Q_ASSERT_X(false, Q_FUNC_INFO,
"The caller must guarantee only whitespace is passed.");
return Tab;
}
}
}
bool CompressedWhitespace::isEven(const int number)
{
Q_ASSERT(number >= 0);
return number % 2 == 0;
}
quint8 CompressedWhitespace::toCompressedChar(const QChar ch, const int len)
{
Q_ASSERT(len > 0);
Q_ASSERT(len <= MaxCharCount);
return len + toIdentifier(ch);
}
QChar CompressedWhitespace::toChar(const CharIdentifier id)
{
switch(id)
{
case Space: return QLatin1Char(' ');
case CR: return QLatin1Char('\r');
case LF: return QLatin1Char('\n');
case Tab: return QLatin1Char('\t');
default:
{
Q_ASSERT_X(false, Q_FUNC_INFO, "Unexpected input");
return QChar();
}
}
}
QString CompressedWhitespace::compress(const QStringRef &input)
{
Q_ASSERT(!isEven(1) && isEven(0) && isEven(2));
Q_ASSERT(!input.isEmpty());
QString result;
const int len = input.length();
/* The amount of compressed characters. For instance, if input is
* four spaces followed by one tab, compressedChars will be 2, and the resulting
* QString will have a length of 1, two compressedChars stored in one QChar. */
int compressedChars = 0;
for(int i = 0; i < len; ++i)
{
const QChar c(input.at(i));
int start = i;
while(true)
{
if(i + 1 == input.length() || input.at(i + 1) != c)
break;
else
++i;
}
/* The length of subsequent whitespace characters in the input. */
int wsLen = (i - start) + 1;
/* We might get a sequence of whitespace that is so long, that we can't
* store it in one unit/byte. In that case we chop it into as many subsequent
* ones that is needed. */
while(true)
{
const int unitLength = qMin(wsLen, int(MaxCharCount));
wsLen -= unitLength;
ushort resultCP = toCompressedChar(c, unitLength);
if(isEven(compressedChars))
result += QChar(resultCP);
else
{
resultCP = resultCP << 8;
resultCP |= result.at(result.size() - 1).unicode();
result[result.size() - 1] = resultCP;
}
++compressedChars;
if(wsLen == 0)
break;
}
}
return result;
}
QString CompressedWhitespace::decompress(const QString &input)
{
Q_ASSERT(!input.isEmpty());
const int len = input.length() * 2;
QString retval;
for(int i = 0; i < len; ++i)
{
ushort cp = input.at(i / 2).unicode();
if(isEven(i))
cp &= Lower8Bits;
else
{
cp = cp >> 8;
if(cp == 0)
return retval;
}
const quint8 wsLen = cp & Lower6Bits;
const quint8 id = cp & UpperTwoBits;
/* Resize retval, and fill in on the top. */
const int oldSize = retval.size();
const int newSize = retval.size() + wsLen;
retval.resize(newSize);
const QChar ch(toChar(CharIdentifier(id)));
for(int f = oldSize; f < newSize; ++f)
retval[f] = ch;
}
return retval;
}
QT_END_NAMESPACE