--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/bintools/rcomp/src/ASTRING.CPP Tue Oct 27 16:36:35 2009 +0000
@@ -0,0 +1,530 @@
+/*
+* Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies).
+* All rights reserved.
+* This component and the accompanying materials are made available
+* under the terms of the License "Eclipse Public License v1.0"
+* which accompanies this distribution, and is available
+* at the URL "http://www.eclipse.org/legal/epl-v10.html".
+*
+* Initial Contributors:
+* Nokia Corporation - initial contribution.
+*
+* Contributors:
+*
+* Description:
+* STRING.CPP
+*
+*/
+
+
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <ctype.h>
+#include "CTABLE.H" // character code mapping classes
+#include "ASTRING.H"
+
+
+extern Mapping_range CP1252_exceptions;
+// table of exceptions from CP1252 1:1 mapping with Unicode.
+
+
+#undef STRING_DEBUG
+
+String::String()
+ {
+ iLength=0;
+ iRep=NULL;
+ }
+
+String::String(const char* aText)
+ {
+ ArrayItem();
+ iLength=strlen(aText);
+ iRep=new char[iLength+1];
+ assert(iRep!=NULL);
+ strcpy(iRep,aText);
+ }
+
+String::String(const String& SourceString): ArrayItem(SourceString)
+ {
+ iLength=SourceString.iLength;
+ iRep=new char[ iLength + 1];
+ assert( iRep != 0);
+ if(iLength==0) *iRep='\0';
+ else strcpy( iRep, SourceString.iRep);
+ }
+
+String::~String()
+ {
+ delete [] iRep;
+ }
+
+void String::Reset()
+ {
+ iLength=0;
+ delete [] iRep;
+ iRep=NULL;
+ }
+
+char& String::operator[] (unsigned long CharIndex) const
+ {
+ if ( CharIndex > iLength)
+ {
+ assert( 0);
+ return iRep[ iLength]; // i.e. \0
+ }
+ return iRep[ CharIndex];
+ }
+
+String& String::operator=(const String& SourceString)
+ {
+ if(&SourceString==this)
+ return *this;
+ delete [] iRep;
+ iLength=SourceString.iLength;
+ if ( iLength == 0)
+ {
+ iRep=NULL;
+ return * this;
+ }
+ iRep=new char [ iLength + 1];
+ assert( iRep != NULL);
+ strcpy( iRep, SourceString.iRep);
+ return *this;
+ }
+
+String& String::operator+= (const String & SourceString)
+ {
+ char * pOldRep=iRep;
+ iLength += SourceString.iLength;
+ if ( iLength == 0)
+ iRep=NULL;
+ else
+ {
+ iRep=new char [ iLength + 1];
+ assert( iRep != NULL);
+ strcpy( iRep, pOldRep);
+ strcpy( iRep + strlen( pOldRep), SourceString.iRep);
+ }
+ delete [] pOldRep;
+ return *this;
+ }
+
+int String::operator== (const String & CompareString) const
+ {
+ return(!strcmp( iRep, CompareString.iRep));
+ }
+
+int String::operator!= (const String & CompareString) const
+ {
+ return(strcmp( iRep, CompareString.iRep));
+ }
+
+unsigned long String::Length() const
+ {
+ return iLength;
+ }
+
+ostream& operator<< ( ostream& os, const String & a)
+ {
+ return ( os << ( ( a.iLength <= 0) ? "<empty>" : a.iRep) );
+ }
+
+const char * String::GetBuffer() const
+ {
+ assert (iRep != NULL);
+ return iRep;
+ }
+
+const char * String::GetAssertedNonEmptyBuffer() const
+ {
+ assert( iRep != NULL);
+ assert( iLength > 0);
+ return iRep;
+ }
+
+int String::IsDecNatural() const
+ {
+ assert( iLength > 0);
+ unsigned long i=0;
+ if ( iRep[0] == '-')
+ i++;
+ for( ; i < iLength; i++)
+ {
+ if (!isdigit( iRep[i]) )
+ return 0; // Non-digit found.
+ }
+ return 1; // Successful - all numeric.
+ }
+
+String & String::Upper()
+ {
+ for(unsigned long i=0;i<iLength;i++)
+ iRep[i]=char(toupper(iRep[i]));
+ return *this;
+ }
+
+String String::operator+ (const String & SecondString) const
+ {
+ String s;
+ s.iLength=iLength + SecondString.iLength;
+ s.iRep=new char[ s.iLength + 1];
+ strcpy( s.iRep, iRep);
+ strcpy( s.iRep + iLength, SecondString.iRep);
+ return s;
+ }
+
+bool StringLess::operator()(const String& aLeft, const String& aRight) const
+ {
+ const char* bufferLeft = aLeft.GetBuffer();
+ const char* bufferRight = aRight.GetBuffer();
+ for (;;)
+ {
+ if (*bufferLeft != *bufferRight || *bufferLeft == 0)
+ return *bufferLeft < *bufferRight;
+ ++bufferLeft;
+ ++bufferRight;
+ }
+ }
+
+const unsigned char* String::UCRep (unsigned long aIndex) const
+ {
+ return (const unsigned char*)&iRep[aIndex];
+ }
+
+static UTF32 getUTF8(const unsigned char* aUtfByte, unsigned int& aIndex, unsigned int aMax)
+ {
+ unsigned char utfByte = *aUtfByte++;
+ aIndex +=1;
+ UTF32 unicodeChar = (UTF32) utfByte;
+
+ // Slightly cavalier decoding - always write something
+ // and don't consume bytes which don't fit the pattern!
+ if ((utfByte & 0xe0) == 0xc0)
+ {
+ unicodeChar = (UTF16)((utfByte&0x1f)<<6);
+ if (aIndex < aMax)
+ {
+ utfByte = (unsigned char)(*aUtfByte++);
+ if ((utfByte&0xc0)==0x80)
+ {
+ unicodeChar |= (utfByte&0x3f);
+ aIndex +=1;
+ }
+ }
+ }
+ else
+ if ((utfByte & 0xf0) == 0xe0)
+ {
+ unicodeChar = (UTF16)((utfByte&0x0f)<<12);
+ if (aIndex < aMax)
+ {
+ utfByte = (unsigned char)(*aUtfByte++);
+ if ((utfByte&0xc0)==0x80)
+ {
+ unicodeChar |= (utfByte&0x3f)<<6;
+ aIndex +=1;
+ }
+ }
+ if (aIndex < aMax)
+ {
+ utfByte = (unsigned char)(*aUtfByte++);
+ if ((utfByte&0xc0)==0x80)
+ {
+ unicodeChar |= (utfByte&0x3f);
+ aIndex +=1;
+ }
+ }
+ }
+ else if ((utfByte & 0xF8) == 0xF0) // 4 bytes UTF-8
+ {
+ unicodeChar = (UTF32)((utfByte & 0x07) << 18);
+ if (aIndex < aMax)
+ {
+ utfByte = (unsigned char)(*aUtfByte++);
+ if ((utfByte&0xc0)==0x80)
+ {
+ unicodeChar |= (utfByte&0x3f)<<12;
+ aIndex +=1;
+ }
+ }
+ if (aIndex < aMax)
+ {
+ utfByte = (unsigned char)(*aUtfByte++);
+ if ((utfByte&0xc0)==0x80)
+ {
+ unicodeChar |= (utfByte&0x3f)<<6;
+ aIndex +=1;
+ }
+ }
+ if (aIndex < aMax)
+ {
+ utfByte = (unsigned char)(*aUtfByte++);
+ if ((utfByte&0xc0)==0x80)
+ {
+ unicodeChar |= (utfByte&0x3f);
+ aIndex +=1;
+ }
+ }
+ }
+
+ return unicodeChar;
+ }
+
+int String::FindSubString(String aSubString, int aStart)
+ {
+ for(unsigned int j=aStart; j<iLength - aSubString.Length() + 1; j++)
+ {
+ bool match = true;
+ for(unsigned int k=0; k< aSubString.Length(); k++)
+ if(iRep[j+k]!=aSubString[k])
+ match = false;
+ if(match) return j;
+ }
+ return EStringNotFound;
+ }
+
+
+String String::ExtractSubString(const unsigned int aStart, const unsigned int aFinish)
+ {
+ // ensure that the passed bounds are valid
+
+ if( aStart > iLength )
+ {
+ String substr = "";
+ return substr;
+ }
+ else if( aFinish > iLength ) {
+ assert( !"This condition should never happen" );
+ String substr = "";
+ return substr;
+ }
+ else if( aStart > aFinish ) {
+ assert( !"This condition should never happen" );
+ String substr = "";
+ return substr;
+ }
+
+ // if valid - go and copy everything
+ else {
+ char *char_substr = &(iRep[aStart]);
+ char temp_char = iRep[aFinish + 1];
+ iRep[aFinish + 1] = 0;
+ String substr( char_substr );
+ iRep[aFinish + 1] = temp_char;
+ return substr;
+ }
+ }
+
+String& String::operator+= (char * SourceChar)
+ {
+ char * pOldRep = iRep;
+ iLength += strlen(SourceChar);
+ iRep = new char [iLength + 1];
+ strcpy( iRep, pOldRep);
+ strcpy( iRep + iLength-strlen(SourceChar), SourceChar);
+ delete [] pOldRep;
+ return * this;
+ }
+
+int String::Atoi()
+ {
+ return atoi(iRep);
+ }
+
+int String::Export( UTF16 *buffer, int& length, CharacterSet fromchset ) const
+// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+//
+// Export the string from its internal form to the caller supplied buffer
+// (which in this case is meant to be Unicode). On entry, length indicates
+// the number of characters in the buffer. On exit, this is set to the number
+// of characters actually written. The export involves mapping from the
+// specified character set to Unicode.
+//
+// The return value is normally TRUE. If not, truncation has occurred.
+//
+//
+// NB, this function is currently under development and character set
+// mapping is not yet properly implemented.
+// NB2. it's also largely duplicated in String::ExportLength, which should
+// be updated to match...
+// ---------------------------------------------------------------------------
+ {
+ unsigned int index = 0; // index into internal buffer
+ int outcount = 0; // count of characters written to export buffer
+ for(int i=0;i<length;i++)
+ {
+ buffer[i] = 0;
+ }
+
+ // Because of multibyte character sets, the general code pattern for
+ // copying the characters has to work left to right to allow for
+ // byte sequence interpretation. The raw byte count of such a string
+ // can be greater than the number of characters it represents.
+ switch ( fromchset )
+ {
+ case CP1252:
+ // In this case, we know that there is only a narrow range
+ // of characters that aren't a direct mapping.
+
+ while ( (index < iLength) && ( outcount < length ) )
+ {
+
+ // To allow for direct Unicode characters in CP1252 strings, we
+ // insert a special character followed by the UTF8 sequence
+
+ if (*UCRep(index) == UnicodeEscape)
+ {
+ index +=1;
+ if (index < iLength)
+ {
+ buffer[outcount] = getUTF8(UCRep(index), index, iLength);
+ }
+ }
+ else
+ {
+ buffer[outcount] = *UCRep(index);
+ index +=1;
+ }
+
+ // Now, see if the character ended up in the forbidden range. If so, map
+ // it to the correct Unicode character.
+
+ if ( buffer[outcount] < 255 )
+ {
+ unsigned char temp;
+ temp = (unsigned char)buffer[outcount];
+ CP1252_exceptions.map(temp, buffer[outcount]);
+ }
+
+ outcount += 1;
+
+
+ } // end of loop to export characters
+ break;
+
+
+
+ case UTF8:
+
+ while ( (index < iLength) && ( outcount < length ) )
+ {
+ UTF32 tu32 = getUTF8(UCRep(index), index, iLength);
+ if (tu32 <= 0xFFFF)
+ {
+ buffer[outcount] = tu32;
+ outcount +=1;
+ }
+ else
+ {
+ if ( tu32 > 0x10ffff )
+ {
+ printf("Surrogate character code must be a number in the range 0x10000 to 0x10ffff\n");
+ printf("Error: rcomp.exe line %d\n", __LINE__);
+ }
+
+ buffer[outcount] = (UTF16)(0xD7C0 + (tu32 >> 10)); // high surrogate
+ outcount++;
+ if (outcount < length)
+ {
+ buffer[outcount] = (UTF16)(0xDC00 | (tu32 & 0x3FF)); // low surrogate
+ outcount++;
+ }
+ else
+ {
+ printf("Error: rcomp.exe line %d\n", __LINE__);
+ }
+ }
+ } // end of loop to export characters
+ break;
+
+
+ default: // this should eventually become an exception
+
+ while ( (index < iLength) && ( outcount < length ) )
+ {
+ buffer[outcount] = *UCRep(index);
+ outcount +=1;
+ index += 1;
+ } // end of loop to export characters
+ break;
+
+
+ } // end of switch on character set.
+
+ length = outcount;
+
+ // If the index is not now equal to the internal length then
+ // the string was truncated on export.
+
+ if ( index != iLength ) return 0; else return 1;
+
+
+
+ } // end of Export to Unicode function.
+
+
+// What length of exported text does this String represent?
+
+unsigned long String::ExportLength (CharacterSet tochset, CharacterSet fromchset) const
+ {
+ if (tochset != Unicode)
+ return iLength;
+
+ unsigned int index = 0; // index into internal buffer
+ unsigned long outcount = 0; // count of output characters
+
+ switch ( fromchset )
+ {
+ case CP1252:
+ // In this case, we know that there is only a narrow range
+ // of characters that aren't a direct mapping.
+
+ while ( (index < iLength) )
+ {
+
+ // To allow for direct Unicode characters in CP1252 strings, we
+ // insert a special character followed by the UTF8 sequence
+
+ if (*UCRep(index) == UnicodeEscape)
+ {
+ index +=1;
+ if (index < iLength)
+ {
+ (void) getUTF8(UCRep(index), index, iLength);
+ }
+ }
+ else
+ {
+ index +=1;
+ }
+ outcount += 1;
+ }
+ break;
+
+ case UTF8:
+
+ while ( (index < iLength) )
+ {
+ UTF32 tu32 = getUTF8(UCRep(index), index, iLength);
+ outcount +=1;
+ if (tu32 > 0xFFFF)
+ ++outcount;
+ }
+ break;
+
+
+ default: // this should eventually become an exception
+
+ outcount = iLength;
+ break;
+
+ } // end of switch on character set.
+
+ return outcount;
+ }
+
+
+
+// end of ASTRING.CPP