src/xmlpatterns/acceltree/qcompressedwhitespace.cpp
author Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
Fri, 22 Jan 2010 10:32:13 +0200
changeset 1 ae9c8dab0e3e
parent 0 1918ee327afb
child 4 3b1da2848fc7
permissions -rw-r--r--
Revision: 201001 Kit: 201003

/****************************************************************************
**
** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
** All rights reserved.
** Contact: Nokia Corporation (qt-info@nokia.com)
**
** This file is part of the QtXmlPatterns module of the Qt Toolkit.
**
** $QT_BEGIN_LICENSE:LGPL$
** No Commercial Usage
** This file contains pre-release code and may not be distributed.
** You may use this file in accordance with the terms and conditions
** contained in the Technology Preview License Agreement accompanying
** this package.
**
** GNU Lesser General Public License Usage
** Alternatively, this file may be used under the terms of the GNU Lesser
** General Public License version 2.1 as published by the Free Software
** Foundation and appearing in the file LICENSE.LGPL included in the
** packaging of this file.  Please review the following information to
** ensure the GNU Lesser General Public License version 2.1 requirements
** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
**
** In addition, as a special exception, Nokia gives you certain additional
** rights.  These rights are described in the Nokia Qt LGPL Exception
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
**
** If you have questions regarding the use of this file, please contact
** Nokia at qt-info@nokia.com.
**
**
**
**
**
**
**
**
** $QT_END_LICENSE$
**
****************************************************************************/

#include <QString>

#include "qcompressedwhitespace_p.h"

QT_BEGIN_NAMESPACE

using namespace QPatternist;

CompressedWhitespace::CharIdentifier CompressedWhitespace::toIdentifier(const QChar ch)
{
    switch(ch.unicode())
    {
        case ' ':
            return Space;
        case '\n':
            return LF;
        case '\r':
            return CR;
        case '\t':
            return Tab;
        default:
        {
            Q_ASSERT_X(false, Q_FUNC_INFO,
                       "The caller must guarantee only whitespace is passed.");
            return Tab;
        }
    }
}

bool CompressedWhitespace::isEven(const int number)
{
    Q_ASSERT(number >= 0);
    return number % 2 == 0;
}

quint8 CompressedWhitespace::toCompressedChar(const QChar ch, const int len)
{
    Q_ASSERT(len > 0);
    Q_ASSERT(len <= MaxCharCount);

    return len + toIdentifier(ch);
}

QChar CompressedWhitespace::toChar(const CharIdentifier id)
{
    switch(id)
    {
        case Space: return QLatin1Char(' ');
        case CR:    return QLatin1Char('\r');
        case LF:    return QLatin1Char('\n');
        case Tab:   return QLatin1Char('\t');
        default:
                    {
                        Q_ASSERT_X(false, Q_FUNC_INFO, "Unexpected input");
                        return QChar();
                    }
    }
}

QString CompressedWhitespace::compress(const QStringRef &input)
{
    Q_ASSERT(!isEven(1) && isEven(0) && isEven(2));
    Q_ASSERT(!input.isEmpty());

    QString result;
    const int len = input.length();

    /* The amount of compressed characters. For instance, if input is
     * four spaces followed by one tab, compressedChars will be 2, and the resulting
     * QString will have a length of 1, two compressedChars stored in one QChar. */
    int compressedChars = 0;

    for(int i = 0; i < len; ++i)
    {
        const QChar c(input.at(i));

        int start = i;

        while(true)
        {
            if(i + 1 == input.length() || input.at(i + 1) != c)
                break;
            else
                ++i;
        }

        /* The length of subsequent whitespace characters in the input. */
        int wsLen = (i - start) + 1;

        /* We might get a sequence of whitespace that is so long, that we can't
         * store it in one unit/byte. In that case we chop it into as many subsequent
         * ones that is needed. */
        while(true)
        {
            const int unitLength = qMin(wsLen, int(MaxCharCount));
            wsLen -= unitLength;

            ushort resultCP = toCompressedChar(c, unitLength);

            if(isEven(compressedChars))
                result += QChar(resultCP);
            else
            {
                resultCP = resultCP << 8;
                resultCP |= result.at(result.size() - 1).unicode();
                result[result.size() - 1] = resultCP;
            }

            ++compressedChars;

            if(wsLen == 0)
                break;
        }
    }

    return result;
}

QString CompressedWhitespace::decompress(const QString &input)
{
    Q_ASSERT(!input.isEmpty());
    const int len = input.length() * 2;
    QString retval;

    for(int i = 0; i < len; ++i)
    {
        ushort cp = input.at(i / 2).unicode();

        if(isEven(i))
            cp &= Lower8Bits;
        else
        {
            cp = cp >> 8;

            if(cp == 0)
                return retval;
        }

        const quint8 wsLen = cp & Lower6Bits;
        const quint8 id = cp & UpperTwoBits;

        /* Resize retval, and fill in on the top. */
        const int oldSize = retval.size();
        const int newSize = retval.size() + wsLen;
        retval.resize(newSize);
        const QChar ch(toChar(CharIdentifier(id)));

        for(int f = oldSize; f < newSize; ++f)
            retval[f] = ch;
    }

    return retval;
}

QT_END_NAMESPACE