diff -r 000000000000 -r 044383f39525 bintools/rcomp/src/UNICODE_COMPRESSOR.CPP --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bintools/rcomp/src/UNICODE_COMPRESSOR.CPP Tue Oct 27 16:36:35 2009 +0000 @@ -0,0 +1,453 @@ +/* +* Copyright (c) 2001-2009 Nokia Corporation and/or its subsidiary(-ies). +* All rights reserved. +* This component and the accompanying materials are made available +* under the terms of the License "Eclipse Public License v1.0" +* which accompanies this distribution, and is available +* at the URL "http://www.eclipse.org/legal/epl-v10.html". +* +* Initial Contributors: +* Nokia Corporation - initial contribution. +* +* Contributors: +* +* Description: +* +*/ + + +#include "UNICODE_COMPRESSOR.H" + +void CompressUnicode(unsigned char* aOutputBuffer, int& aOutputLength, int aMaximumOutputLength, const UTF16* aInputBuffer, int aInputLength) + { + TUnicodeCompressor unicodeCompressor; + TMemoryUnicodeSource decompressedUnicode(aInputBuffer); + TInt numberOfInputElementsConsumed; + unicodeCompressor.CompressL(aOutputBuffer, decompressedUnicode, aMaximumOutputLength, aInputLength, &aOutputLength, &numberOfInputElementsConsumed); + TInt temp; + unicodeCompressor.FlushL(aOutputBuffer, aMaximumOutputLength, temp); + aOutputLength+=temp; + if (aOutputLength quote from window 0 +const TUint8 SDX = 0x0B; // define window in expansion area +const TUint8 SQU = 0x0E; // quote Unicode value +const TUint8 SCU = 0x0F; // switch to Unicode mode +const TUint8 SC0 = 0x10; // select dynamic window 0 +const TUint8 SD0 = 0x18; // set dynamic window 0 index to and select it + +// Unicode mode tag values +const TUint8 UC0 = 0xE0; // select dynamic window 0 and switch to single-byte mode +const TUint8 UD0 = 0xE8; // set dynamic window 0 index to , select it and switch to + // single-byte mode +const TUint8 UQU = 0xF0; // , quote Unicode value +const TUint8 UDX = 0xF1; // , define window in expansion area and switch to single-byte mode + +TUnicodeCompressionState::TUnicodeCompressionState(): + iUnicodeWords(0), + iMaxUnicodeWords(0), + iCompressedBytes(0), + iMaxCompressedBytes(0) + { + Reset(); + } + +void TUnicodeCompressionState::Reset() + { + iUnicodeMode = FALSE; + iActiveWindowBase = 0x0080; + for (int i = 0; i < EDynamicWindows; i++) + iDynamicWindow[i] = iDynamicWindowDefault[i]; + } + + +// Return the index of the static window that contains this code, if any, or -1 if there is none. +TInt TUnicodeCompressionState::StaticWindowIndex(TUint16 aCode) + { + for (TInt i = 0; i < EStaticWindows; i++) + if (aCode >= iStaticWindow[i] && aCode < iStaticWindow[i] + 128) + return i; + return -1; + } + +/* +If aCode can be accommodated in one of the legal dynamic windows, return the index of that window +in the offset table. If not return KErrNotFound. +*/ +TInt TUnicodeCompressionState::DynamicWindowOffsetIndex(TUint16 aCode) + { + if (aCode < 0x0080) + return KErrNotFound; + if (aCode >= 0x3400 && aCode <= 0xDFFF) + return KErrNotFound; + + /* + Prefer sections that cross half-block boundaries. These are better adapted to actual text. + They are represented by offset indices 0xf9..0xff. + */ + for (int i = 0; i < ESpecialBases; i++) + if (aCode >= iSpecialBase[i] && aCode < iSpecialBase[i] + 128) + return 0xF9 + i; + + /* + Offset indices 0x01..0x67 represent half blocks from 0x0080 to 0x3380 and + 0x68..0xA7 represent half blocks from 0xE000 to 0xFF80. + */ + if (aCode >= 0xE000) + aCode -= 0xAC00; + return aCode / 0x80; + } + +// Return the base of the window represented by offset index . Return 0 if the offset index is illegal. +TUint32 TUnicodeCompressionState::DynamicWindowBase(TInt aOffsetIndex) + { + if (aOffsetIndex >= 0xF9 && aOffsetIndex <= 0xFF) + { + /* + WARNING: don't optimise the following two lines by replacing them with + 'return iSpecialBase[aOffsetIndex - 0xF9];'. To do so would re-introduce a defect + in ARM builds caused by optimisation and consequent erroneous fixing up + of the array base: see defect EDNGASR-4AGJQX in ER5U defects. + */ + int special_base_index = aOffsetIndex - 0xF9; + return iSpecialBase[special_base_index]; + } + if (aOffsetIndex >= 0x01 && aOffsetIndex <= 0x67) + return aOffsetIndex * 0x80; + if (aOffsetIndex >= 0x68 && aOffsetIndex <= 0xA7) + return aOffsetIndex * 0x80 + 0xAC00; + return 0; + } + +TBool TUnicodeCompressionState::EncodeAsIs(TUint16 aCode) + { + return aCode == 0x0000 || aCode == 0x0009 || aCode == 0x000A || aCode == 0x000D || + (aCode >= 0x0020 && aCode <= 0x007F); + } + +void TUnicodeCompressionState::Panic(TPanic aPanic) + { + ::Panic(100+aPanic); + } + +EXPORT_C TUnicodeCompressor::TUnicodeCompressor(): + iInputBufferStart(0), + iInputBufferSize(0), + iOutputBufferStart(0), + iOutputBufferSize(0), + iDynamicWindowIndex(0), + iOutputStream(NULL), + iOutputPointer(NULL), + iInput(NULL) + { + } + +EXPORT_C void TUnicodeCompressor::CompressL(TUint8* aOutput,MUnicodeSource& aInput, + TInt aMaxOutputBytes,TInt aMaxInputWords, + TInt* aOutputBytes,TInt* aInputWords) + { + DoCompressL(NULL,aOutput,&aInput,aMaxOutputBytes,aMaxInputWords,aOutputBytes,aInputWords); + } + +EXPORT_C TInt TUnicodeCompressor::FlushL(TUint8* aOutput,TInt aMaxOutputBytes,TInt& aOutputBytes) + { + DoCompressL(NULL,aOutput,NULL,aMaxOutputBytes,0,&aOutputBytes,NULL); + return iOutputBufferSize; + } + +EXPORT_C TInt TUnicodeCompressor::CompressedSizeL(MUnicodeSource& aInput,TInt aInputWords) + { + TInt bytes; + TUnicodeCompressor c; + c.DoCompressL(NULL,NULL,&aInput,KMaxTInt,aInputWords,&bytes,NULL); + return bytes; + } + +// Compress until input or output is exhausted or an exception occurs. +void TUnicodeCompressor::DoCompressL(RWriteStream* aOutputStream,TUint8* aOutputPointer,MUnicodeSource* aInput, + TInt aMaxOutputBytes,TInt aMaxInputWords, + TInt* aOutputBytes,TInt* aInputWords) + { + iOutputStream = aOutputStream; + iOutputPointer = aOutputPointer; + iInput = aInput; + iMaxCompressedBytes = aMaxOutputBytes; + iMaxUnicodeWords = aMaxInputWords; + iCompressedBytes = iUnicodeWords = 0; + FlushOutputBufferL(); + if (iInput) + { + while (iUnicodeWords < iMaxUnicodeWords && iCompressedBytes < iMaxCompressedBytes) + { + TUint16 x = iInput->ReadUnicodeValueL(); + TAction action(x); + iInputBuffer[(iInputBufferStart + iInputBufferSize) % EMaxInputBufferSize] = action; + iInputBufferSize++; + iUnicodeWords++; + if (iInputBufferSize == EMaxInputBufferSize) + WriteRunL(); + } + } + FlushInputBufferL(); + if (aOutputBytes) + *aOutputBytes = iCompressedBytes; + if (aInputWords) + *aInputWords = iUnicodeWords; + } + +TUnicodeCompressor::TAction::TAction(TUint16 aCode): + iCode(aCode) + { + if (TUnicodeCompressionState::EncodeAsIs(aCode)) + iTreatment = EPlainASCII; + else + { + iTreatment = TUnicodeCompressionState::DynamicWindowOffsetIndex(aCode); + if (iTreatment == -1) + { + iTreatment = TUnicodeCompressionState::StaticWindowIndex(aCode); + if (iTreatment == -1) + iTreatment = EPlainUnicode; + else + iTreatment += EFirstStatic; + } + } + } + +void TUnicodeCompressor::WriteCharacterFromBuffer() + { + const TAction& action = iInputBuffer[iInputBufferStart]; + iInputBufferSize--; + iInputBufferStart = (iInputBufferStart + 1) % EMaxInputBufferSize; + WriteCharacter(action); + } + +void TUnicodeCompressor::FlushInputBufferL() + { + while (iInputBufferSize > 0 && iCompressedBytes < iMaxCompressedBytes) + WriteRunL(); + } + +void TUnicodeCompressor::WriteRunL() + { + // Write out any leading characters that can be passed through. + if (!iUnicodeMode) + while (iInputBufferSize > 0) + { + const TAction& action = iInputBuffer[iInputBufferStart]; + if (action.iTreatment == TAction::EPlainASCII || + (action.iCode >= iActiveWindowBase && action.iCode < iActiveWindowBase + 128)) + WriteCharacterFromBuffer(); + else + break; + } + + // Write a run of characters that cannot be passed through. + int i; + if (iInputBufferSize > 0) + { + /* + Find a run of characters with the same treatment and select that treatment + if the run has more than one character. + */ + int treatment = iInputBuffer[iInputBufferStart].iTreatment; + int next_treatment = treatment; + int run_size = 1; + for (i = 1; i < iInputBufferSize; i++) + { + int index = (iInputBufferStart + i) % EMaxInputBufferSize; + next_treatment = iInputBuffer[index].iTreatment; + if (next_treatment != treatment) + break; + run_size++; + } + if (run_size > 1) + SelectTreatment(treatment); + for (i = 0; i < run_size; i++) + WriteCharacterFromBuffer(); + } + + FlushOutputBufferL(); + } + +void TUnicodeCompressor::FlushOutputBufferL() + { + while (iOutputBufferSize > 0 && iCompressedBytes < iMaxCompressedBytes) + { + TUint8 byte = iOutputBuffer[iOutputBufferStart]; + if (iOutputPointer) + *iOutputPointer++ = byte; + else if (iOutputStream) + Panic(ECannotUseStreams); + iCompressedBytes++; + iOutputBufferSize--; + iOutputBufferStart = (iOutputBufferStart + 1) % EMaxOutputBufferSize; + } + } + +void TUnicodeCompressor::SelectTreatment(TInt aTreatment) + { + if (aTreatment == TAction::EPlainUnicode) + { + // Switch to Unicode mode if not there already. + if (!iUnicodeMode) + { + WriteByte(SCU); + iUnicodeMode = TRUE; + } + return; + } + + if (aTreatment == TAction::EPlainASCII) + { + // Switch to single-byte mode, using the current dynamic window, if not there already. + if (iUnicodeMode) + { + WriteByte(UC0 + iDynamicWindowIndex); + iUnicodeMode = FALSE; + } + return; + } + + if (aTreatment >= TAction::EFirstDynamic && aTreatment <= TAction::ELastDynamic) + { + TUint32 base = DynamicWindowBase(aTreatment); + + // Switch to the appropriate dynamic window if it is available; if not, redefine and select dynamic window 4. + for (int i = 0; i < EDynamicWindows; i++) + if (base == iDynamicWindow[i]) + { + if (iUnicodeMode) + WriteByte(UC0 + i); + else if (i != iDynamicWindowIndex) + WriteByte(SC0 + i); + iUnicodeMode = FALSE; + iDynamicWindowIndex = i; + iActiveWindowBase = base; + return; + } + if (iUnicodeMode) + WriteByte(UD0 + 4); + else + WriteByte(SD0 + 4); + iDynamicWindowIndex = 4; + iUnicodeMode = FALSE; + WriteByte(aTreatment); + iDynamicWindow[4] = base; + iActiveWindowBase = base; + return; + } + } + +// Write a character without changing mode or window. +void TUnicodeCompressor::WriteCharacter(const TAction& aAction) + { + if (iUnicodeMode) + WriteUCharacter(aAction.iCode); + else + WriteSCharacter(aAction); + } + +void TUnicodeCompressor::WriteUCharacter(TUint16 aCode) + { + // Emit the 'quote Unicode' tag if the character would conflict with a tag. + if (aCode >= 0xE000 && aCode <= 0xF2FF) + WriteByte(UQU); + + // Write the Unicode value big-end first. + WriteByte((aCode >> 8) & 0xFF); + WriteByte(aCode & 0xFF); + } + +void TUnicodeCompressor::WriteByte(TUint aByte) + { + if (iOutputBufferSize >= EMaxOutputBufferSize) + Panic(EOutputBufferOverflow); + iOutputBuffer[(iOutputBufferStart + iOutputBufferSize) % EMaxOutputBufferSize] = (TUint8)aByte; + iOutputBufferSize++; + } + +void TUnicodeCompressor::WriteSCharacter(const TAction& aAction) + { + // Characters in the range 0x0020..0x007F, plus nul, tab, cr, and lf, can be emitted as their low bytes. + if (aAction.iTreatment == TAction::EPlainASCII) + { + WriteByte(aAction.iCode); + return; + } + + // Characters in a static window can be written using SQ plus a byte in the range 0x00-0x7F + if (aAction.iTreatment >= TAction::EFirstStatic && aAction.iTreatment <= TAction::ELastStatic) + { + int window = aAction.iTreatment - TAction::EFirstStatic; + WriteByte(SQ0 + window); + WriteByte(aAction.iCode); + return; + } + + // Characters in the current dynamic window can be written as a byte in the range 0x80-0xFF. + if (aAction.iCode >= iActiveWindowBase && aAction.iCode < iActiveWindowBase + 128) + { + WriteByte(aAction.iCode - iActiveWindowBase + 0x80); + return; + } + + // Characters in another dynamic window can be written using SQ plus a byte in the range 0x80-0xFF + int i; + for (i = 0; i < EDynamicWindows; i++) + if (aAction.iCode >= iDynamicWindow[i] && aAction.iCode < iDynamicWindow[i] + 128) + { + WriteByte(SQ0 + i); + WriteByte(aAction.iCode - iDynamicWindow[i] + 0x80); + return; + } + + // Other characters can be quoted. + WriteByte(SQU); + WriteByte((aAction.iCode >> 8) & 0xFF); + WriteByte(aAction.iCode & 0xFF); + return; + } +