/** Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies).* All rights reserved.* This component and the accompanying materials are made available* under the terms of the License "Eclipse Public License v1.0"* which accompanies this distribution, and is available* at the URL "http://www.eclipse.org/legal/epl-v10.html".** Initial Contributors:* Nokia Corporation - initial contribution.** Contributors:** Description: * STRING.CPP**/#include <stdio.h>#include <string.h>#include <assert.h>#include <ctype.h>#include "CTABLE.H" // character code mapping classes#include "ASTRING.H"extern Mapping_range CP1252_exceptions;// table of exceptions from CP1252 1:1 mapping with Unicode.#undef STRING_DEBUGString::String() { iLength=0; iRep=NULL; }String::String(const char* aText) { ArrayItem(); iLength=strlen(aText); iRep=new char[iLength+1]; assert(iRep!=NULL); strcpy(iRep,aText); }String::String(const String& SourceString): ArrayItem(SourceString) { iLength=SourceString.iLength; iRep=new char[ iLength + 1]; assert( iRep != 0); if(iLength==0) *iRep='\0'; else strcpy( iRep, SourceString.iRep); }String::~String() { delete [] iRep; }void String::Reset() { iLength=0; delete [] iRep; iRep=NULL; }char& String::operator[] (unsigned long CharIndex) const { if ( CharIndex > iLength) { assert( 0); return iRep[ iLength]; // i.e. \0 } return iRep[ CharIndex]; }String& String::operator=(const String& SourceString) { if(&SourceString==this) return *this; delete [] iRep; iLength=SourceString.iLength; if ( iLength == 0) { iRep=NULL; return * this; } iRep=new char [ iLength + 1]; assert( iRep != NULL); strcpy( iRep, SourceString.iRep); return *this; }String& String::operator+= (const String & SourceString) { char * pOldRep=iRep; iLength += SourceString.iLength; if ( iLength == 0) iRep=NULL; else { iRep=new char [ iLength + 1]; assert( iRep != NULL); strcpy( iRep, pOldRep); strcpy( iRep + strlen( pOldRep), SourceString.iRep); } delete [] pOldRep; return *this; }int String::operator== (const String & CompareString) const { return(!strcmp( iRep, CompareString.iRep)); }int String::operator!= (const String & CompareString) const { return(strcmp( iRep, CompareString.iRep)); }unsigned long String::Length() const { return iLength; }ostream& operator<< ( ostream& os, const String & a) { return ( os << ( ( a.iLength <= 0) ? "<empty>" : a.iRep) ); }const char * String::GetBuffer() const { assert (iRep != NULL); return iRep; }const char * String::GetAssertedNonEmptyBuffer() const { assert( iRep != NULL); assert( iLength > 0); return iRep; }int String::IsDecNatural() const { assert( iLength > 0); unsigned long i=0; if ( iRep[0] == '-') i++; for( ; i < iLength; i++) { if (!isdigit( iRep[i]) ) return 0; // Non-digit found. } return 1; // Successful - all numeric. }String & String::Upper() { for(unsigned long i=0;i<iLength;i++) iRep[i]=char(toupper(iRep[i])); return *this; }String String::operator+ (const String & SecondString) const { String s; s.iLength=iLength + SecondString.iLength; s.iRep=new char[ s.iLength + 1]; strcpy( s.iRep, iRep); strcpy( s.iRep + iLength, SecondString.iRep); return s; }bool StringLess::operator()(const String& aLeft, const String& aRight) const { const char* bufferLeft = aLeft.GetBuffer(); const char* bufferRight = aRight.GetBuffer(); for (;;) { if (*bufferLeft != *bufferRight || *bufferLeft == 0) return *bufferLeft < *bufferRight; ++bufferLeft; ++bufferRight; } }const unsigned char* String::UCRep (unsigned long aIndex) const { return (const unsigned char*)&iRep[aIndex]; }static UTF32 getUTF8(const unsigned char* aUtfByte, unsigned int& aIndex, unsigned int aMax) { unsigned char utfByte = *aUtfByte++; aIndex +=1; UTF32 unicodeChar = (UTF32) utfByte; // Slightly cavalier decoding - always write something // and don't consume bytes which don't fit the pattern! if ((utfByte & 0xe0) == 0xc0) { unicodeChar = (UTF16)((utfByte&0x1f)<<6); if (aIndex < aMax) { utfByte = (unsigned char)(*aUtfByte++); if ((utfByte&0xc0)==0x80) { unicodeChar |= (utfByte&0x3f); aIndex +=1; } } } else if ((utfByte & 0xf0) == 0xe0) { unicodeChar = (UTF16)((utfByte&0x0f)<<12); if (aIndex < aMax) { utfByte = (unsigned char)(*aUtfByte++); if ((utfByte&0xc0)==0x80) { unicodeChar |= (utfByte&0x3f)<<6; aIndex +=1; } } if (aIndex < aMax) { utfByte = (unsigned char)(*aUtfByte++); if ((utfByte&0xc0)==0x80) { unicodeChar |= (utfByte&0x3f); aIndex +=1; } } } else if ((utfByte & 0xF8) == 0xF0) // 4 bytes UTF-8 { unicodeChar = (UTF32)((utfByte & 0x07) << 18); if (aIndex < aMax) { utfByte = (unsigned char)(*aUtfByte++); if ((utfByte&0xc0)==0x80) { unicodeChar |= (utfByte&0x3f)<<12; aIndex +=1; } } if (aIndex < aMax) { utfByte = (unsigned char)(*aUtfByte++); if ((utfByte&0xc0)==0x80) { unicodeChar |= (utfByte&0x3f)<<6; aIndex +=1; } } if (aIndex < aMax) { utfByte = (unsigned char)(*aUtfByte++); if ((utfByte&0xc0)==0x80) { unicodeChar |= (utfByte&0x3f); aIndex +=1; } } } return unicodeChar; }int String::FindSubString(String aSubString, int aStart) { for(unsigned int j=aStart; j<iLength - aSubString.Length() + 1; j++) { bool match = true; for(unsigned int k=0; k< aSubString.Length(); k++) if(iRep[j+k]!=aSubString[k]) match = false; if(match) return j; } return EStringNotFound; }String String::ExtractSubString(const unsigned int aStart, const unsigned int aFinish) { // ensure that the passed bounds are valid if( aStart > iLength ) { String substr = ""; return substr; } else if( aFinish > iLength ) { assert( !"This condition should never happen" ); String substr = ""; return substr; } else if( aStart > aFinish ) { assert( !"This condition should never happen" ); String substr = ""; return substr; } // if valid - go and copy everything else { char *char_substr = &(iRep[aStart]); char temp_char = iRep[aFinish + 1]; iRep[aFinish + 1] = 0; String substr( char_substr ); iRep[aFinish + 1] = temp_char; return substr; } }String& String::operator+= (char * SourceChar) { char * pOldRep = iRep; iLength += strlen(SourceChar); iRep = new char [iLength + 1]; strcpy( iRep, pOldRep); strcpy( iRep + iLength-strlen(SourceChar), SourceChar); delete [] pOldRep; return * this; }int String::Atoi() { return atoi(iRep); }int String::Export( UTF16 *buffer, int& length, CharacterSet fromchset ) const// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++//// Export the string from its internal form to the caller supplied buffer// (which in this case is meant to be Unicode). On entry, length indicates// the number of characters in the buffer. On exit, this is set to the number// of characters actually written. The export involves mapping from the// specified character set to Unicode.//// The return value is normally TRUE. If not, truncation has occurred.////// NB, this function is currently under development and character set// mapping is not yet properly implemented.// NB2. it's also largely duplicated in String::ExportLength, which should// be updated to match...// --------------------------------------------------------------------------- { unsigned int index = 0; // index into internal buffer int outcount = 0; // count of characters written to export buffer for(int i=0;i<length;i++) { buffer[i] = 0; } // Because of multibyte character sets, the general code pattern for // copying the characters has to work left to right to allow for // byte sequence interpretation. The raw byte count of such a string // can be greater than the number of characters it represents. switch ( fromchset ) { case CP1252: // In this case, we know that there is only a narrow range // of characters that aren't a direct mapping. while ( (index < iLength) && ( outcount < length ) ) { // To allow for direct Unicode characters in CP1252 strings, we // insert a special character followed by the UTF8 sequence if (*UCRep(index) == UnicodeEscape) { index +=1; if (index < iLength) { buffer[outcount] = getUTF8(UCRep(index), index, iLength); } } else { buffer[outcount] = *UCRep(index); index +=1; } // Now, see if the character ended up in the forbidden range. If so, map // it to the correct Unicode character. if ( buffer[outcount] < 255 ) { unsigned char temp; temp = (unsigned char)buffer[outcount]; CP1252_exceptions.map(temp, buffer[outcount]); } outcount += 1; } // end of loop to export characters break; case UTF8: while ( (index < iLength) && ( outcount < length ) ) { UTF32 tu32 = getUTF8(UCRep(index), index, iLength); if (tu32 <= 0xFFFF) { buffer[outcount] = tu32; outcount +=1; } else { if ( tu32 > 0x10ffff ) { printf("Surrogate character code must be a number in the range 0x10000 to 0x10ffff\n"); printf("Error: rcomp.exe line %d\n", __LINE__); } buffer[outcount] = (UTF16)(0xD7C0 + (tu32 >> 10)); // high surrogate outcount++; if (outcount < length) { buffer[outcount] = (UTF16)(0xDC00 | (tu32 & 0x3FF)); // low surrogate outcount++; } else { printf("Error: rcomp.exe line %d\n", __LINE__); } } } // end of loop to export characters break; default: // this should eventually become an exception while ( (index < iLength) && ( outcount < length ) ) { buffer[outcount] = *UCRep(index); outcount +=1; index += 1; } // end of loop to export characters break; } // end of switch on character set. length = outcount; // If the index is not now equal to the internal length then // the string was truncated on export. if ( index != iLength ) return 0; else return 1; } // end of Export to Unicode function.// What length of exported text does this String represent?unsigned long String::ExportLength (CharacterSet tochset, CharacterSet fromchset) const { if (tochset != Unicode) return iLength; unsigned int index = 0; // index into internal buffer unsigned long outcount = 0; // count of output characters switch ( fromchset ) { case CP1252: // In this case, we know that there is only a narrow range // of characters that aren't a direct mapping. while ( (index < iLength) ) { // To allow for direct Unicode characters in CP1252 strings, we // insert a special character followed by the UTF8 sequence if (*UCRep(index) == UnicodeEscape) { index +=1; if (index < iLength) { (void) getUTF8(UCRep(index), index, iLength); } } else { index +=1; } outcount += 1; } break; case UTF8: while ( (index < iLength) ) { UTF32 tu32 = getUTF8(UCRep(index), index, iLength); outcount +=1; if (tu32 > 0xFFFF) ++outcount; } break; default: // this should eventually become an exception outcount = iLength; break; } // end of switch on character set. return outcount; }// end of ASTRING.CPP