bintools/rcomp/src/ASTRING.CPP
changeset 0 044383f39525
equal deleted inserted replaced
-1:000000000000 0:044383f39525
       
     1 /*
       
     2 * Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies).
       
     3 * All rights reserved.
       
     4 * This component and the accompanying materials are made available
       
     5 * under the terms of the License "Eclipse Public License v1.0"
       
     6 * which accompanies this distribution, and is available
       
     7 * at the URL "http://www.eclipse.org/legal/epl-v10.html".
       
     8 *
       
     9 * Initial Contributors:
       
    10 * Nokia Corporation - initial contribution.
       
    11 *
       
    12 * Contributors:
       
    13 *
       
    14 * Description: 
       
    15 * STRING.CPP
       
    16 *
       
    17 */
       
    18 
       
    19 
       
    20 #include <stdio.h>
       
    21 #include <string.h>
       
    22 #include <assert.h>
       
    23 #include <ctype.h>
       
    24 #include "CTABLE.H"     // character code mapping classes
       
    25 #include "ASTRING.H"
       
    26 
       
    27 
       
    28 extern Mapping_range CP1252_exceptions;
       
    29 // table of exceptions from CP1252 1:1 mapping with Unicode.
       
    30 
       
    31 
       
    32 #undef STRING_DEBUG
       
    33 
       
    34 String::String()
       
    35 	{
       
    36 	iLength=0;
       
    37 	iRep=NULL;
       
    38 	}
       
    39 
       
    40 String::String(const char* aText)
       
    41 	{
       
    42 	ArrayItem();
       
    43 	iLength=strlen(aText);
       
    44 	iRep=new char[iLength+1];
       
    45 	assert(iRep!=NULL);
       
    46 	strcpy(iRep,aText);
       
    47 	}
       
    48 
       
    49 String::String(const String& SourceString): ArrayItem(SourceString) 
       
    50 	{
       
    51 	iLength=SourceString.iLength;
       
    52 	iRep=new char[ iLength + 1];
       
    53 	assert( iRep != 0);
       
    54 	if(iLength==0) *iRep='\0';
       
    55 	else strcpy( iRep, SourceString.iRep);
       
    56 	}
       
    57 
       
    58 String::~String()
       
    59 	{
       
    60 	delete [] iRep;
       
    61 	}
       
    62 
       
    63 void String::Reset()
       
    64 	{
       
    65 	iLength=0;
       
    66 	delete [] iRep;
       
    67 	iRep=NULL;
       
    68 	}
       
    69 
       
    70 char& String::operator[] (unsigned long CharIndex) const
       
    71 	{
       
    72 	if ( CharIndex > iLength)
       
    73 		{
       
    74 		assert( 0);
       
    75 		return iRep[ iLength];	// i.e. \0
       
    76 		}
       
    77 	return iRep[ CharIndex];
       
    78 	}
       
    79 
       
    80 String& String::operator=(const String& SourceString)
       
    81 	{
       
    82 	if(&SourceString==this)
       
    83 		return *this;
       
    84 	delete [] iRep;
       
    85 	iLength=SourceString.iLength;
       
    86 	if ( iLength == 0)
       
    87 		{
       
    88 		iRep=NULL;
       
    89 		return * this;
       
    90 		}
       
    91 	iRep=new char [ iLength + 1];
       
    92 	assert( iRep != NULL);
       
    93 	strcpy( iRep, SourceString.iRep);
       
    94 	return *this;
       
    95 	}
       
    96 
       
    97 String& String::operator+= (const String & SourceString)
       
    98 	{
       
    99 	char * pOldRep=iRep;
       
   100 	iLength += SourceString.iLength;
       
   101 	if ( iLength == 0)
       
   102 		iRep=NULL;
       
   103 	else
       
   104 		{
       
   105 		iRep=new char [ iLength + 1];
       
   106 		assert( iRep != NULL);
       
   107 		strcpy( iRep, pOldRep);
       
   108 		strcpy( iRep + strlen( pOldRep), SourceString.iRep);
       
   109 		}
       
   110 	delete [] pOldRep;
       
   111 	return *this;
       
   112 	}
       
   113 
       
   114 int String::operator== (const String & CompareString) const
       
   115 	{
       
   116 	return(!strcmp( iRep, CompareString.iRep));
       
   117 	}
       
   118 
       
   119 int String::operator!= (const String & CompareString) const
       
   120 	{
       
   121 	return(strcmp( iRep, CompareString.iRep));
       
   122 	}
       
   123 
       
   124 unsigned long String::Length() const
       
   125 	{
       
   126 	return iLength;
       
   127 	}
       
   128 
       
   129 ostream& operator<< ( ostream& os, const String & a)
       
   130 	{
       
   131 	return ( os << ( ( a.iLength <= 0) ? "<empty>" : a.iRep) );
       
   132 	}
       
   133 
       
   134 const char * String::GetBuffer() const
       
   135 	{
       
   136 	assert (iRep != NULL);
       
   137 	return iRep;
       
   138 	}
       
   139 
       
   140 const char * String::GetAssertedNonEmptyBuffer() const
       
   141 	{
       
   142 	assert( iRep != NULL);
       
   143 	assert( iLength > 0);
       
   144 	return iRep;
       
   145 	}
       
   146 
       
   147 int String::IsDecNatural() const
       
   148 	{
       
   149 	assert( iLength > 0);
       
   150 	unsigned long i=0;	
       
   151 	if ( iRep[0] == '-')
       
   152 		i++;
       
   153 	for( ; i < iLength; i++)
       
   154 		{
       
   155 		if (!isdigit( iRep[i]) )
       
   156 			return 0;	// Non-digit found.
       
   157 		}
       
   158 	return 1; // Successful - all numeric.
       
   159 	}
       
   160 
       
   161 String & String::Upper()
       
   162 	{
       
   163 	for(unsigned long i=0;i<iLength;i++)
       
   164 		iRep[i]=char(toupper(iRep[i]));
       
   165 	return *this;
       
   166 	}
       
   167 
       
   168 String String::operator+ (const String & SecondString) const
       
   169 	{
       
   170 	String	s;
       
   171 	s.iLength=iLength + SecondString.iLength;
       
   172 	s.iRep=new char[ s.iLength + 1];
       
   173 	strcpy( s.iRep, iRep);
       
   174 	strcpy( s.iRep + iLength, SecondString.iRep);
       
   175 	return s;	
       
   176 	}
       
   177 
       
   178 bool StringLess::operator()(const String& aLeft, const String& aRight) const
       
   179 	{
       
   180 	const char* bufferLeft = aLeft.GetBuffer();
       
   181 	const char* bufferRight = aRight.GetBuffer();
       
   182 	for (;;)
       
   183 		{
       
   184 		if (*bufferLeft != *bufferRight || *bufferLeft == 0)
       
   185 			return *bufferLeft < *bufferRight;
       
   186 		++bufferLeft;
       
   187 		++bufferRight;
       
   188 		}
       
   189 	}
       
   190 
       
   191 const unsigned char* String::UCRep (unsigned long aIndex) const
       
   192 	{
       
   193 	return (const unsigned char*)&iRep[aIndex];
       
   194 	}
       
   195 
       
   196 static UTF32 getUTF8(const unsigned char* aUtfByte, unsigned int& aIndex, unsigned int aMax)
       
   197 	{
       
   198 	unsigned char utfByte = *aUtfByte++;
       
   199 	aIndex +=1;
       
   200 	UTF32 unicodeChar = (UTF32) utfByte;
       
   201 
       
   202 	// Slightly cavalier decoding - always write something
       
   203 	// and don't consume bytes which don't fit the pattern!
       
   204 	if ((utfByte & 0xe0) == 0xc0)
       
   205 		{
       
   206 		unicodeChar = (UTF16)((utfByte&0x1f)<<6);
       
   207 		if (aIndex < aMax)
       
   208 			{
       
   209 			utfByte = (unsigned char)(*aUtfByte++);
       
   210 			if ((utfByte&0xc0)==0x80)
       
   211 				{
       
   212 				unicodeChar |= (utfByte&0x3f);
       
   213 				aIndex +=1;
       
   214 				}
       
   215 			}
       
   216 		}
       
   217 	else
       
   218 	if ((utfByte & 0xf0) == 0xe0)
       
   219 		{
       
   220 		unicodeChar = (UTF16)((utfByte&0x0f)<<12);
       
   221 		if (aIndex < aMax)
       
   222 			{
       
   223 			utfByte = (unsigned char)(*aUtfByte++);
       
   224 			if ((utfByte&0xc0)==0x80)
       
   225 				{
       
   226 				unicodeChar |= (utfByte&0x3f)<<6;
       
   227 				aIndex +=1;
       
   228 				}
       
   229 			}
       
   230 		if (aIndex < aMax)
       
   231 			{
       
   232 			utfByte = (unsigned char)(*aUtfByte++);
       
   233 			if ((utfByte&0xc0)==0x80)
       
   234 				{
       
   235 				unicodeChar |= (utfByte&0x3f);
       
   236 				aIndex +=1;
       
   237 				}
       
   238 			}
       
   239 		}
       
   240 	else if ((utfByte & 0xF8) == 0xF0)	// 4 bytes UTF-8
       
   241 		{
       
   242 		unicodeChar = (UTF32)((utfByte & 0x07) << 18);
       
   243 		if (aIndex < aMax)
       
   244 			{
       
   245 			utfByte = (unsigned char)(*aUtfByte++);
       
   246 			if ((utfByte&0xc0)==0x80)
       
   247 				{
       
   248 				unicodeChar |= (utfByte&0x3f)<<12;
       
   249 				aIndex +=1;
       
   250 				}
       
   251 			}
       
   252 		if (aIndex < aMax)
       
   253 			{
       
   254 			utfByte = (unsigned char)(*aUtfByte++);
       
   255 			if ((utfByte&0xc0)==0x80)
       
   256 				{
       
   257 				unicodeChar |= (utfByte&0x3f)<<6;
       
   258 				aIndex +=1;
       
   259 				}
       
   260 			}
       
   261 		if (aIndex < aMax)
       
   262 			{
       
   263 			utfByte = (unsigned char)(*aUtfByte++);
       
   264 			if ((utfByte&0xc0)==0x80)
       
   265 				{
       
   266 				unicodeChar |= (utfByte&0x3f);
       
   267 				aIndex +=1;
       
   268 				}
       
   269 			}
       
   270 		}
       
   271 		
       
   272 	return unicodeChar;
       
   273 	}
       
   274 
       
   275 int String::FindSubString(String aSubString, int aStart)
       
   276 	{
       
   277 	for(unsigned int j=aStart; j<iLength - aSubString.Length() + 1; j++)
       
   278 		{
       
   279 		bool match = true;
       
   280 		for(unsigned int k=0; k< aSubString.Length(); k++)
       
   281 			if(iRep[j+k]!=aSubString[k])
       
   282 				match = false;
       
   283 		if(match) return j;
       
   284 		}
       
   285 	return EStringNotFound;
       
   286 	}
       
   287 
       
   288 
       
   289 String String::ExtractSubString(const unsigned int aStart, const unsigned int aFinish)
       
   290 	{
       
   291 	// ensure that the passed bounds are valid 
       
   292 	
       
   293 	if( aStart > iLength ) 
       
   294 		{
       
   295 		String substr = "";
       
   296 		return substr;
       
   297 		}
       
   298 	else if( aFinish > iLength ) {
       
   299 		assert( !"This condition should never happen" );
       
   300 		String substr = "";
       
   301 		return substr;
       
   302 		}
       
   303 	else if( aStart > aFinish ) {
       
   304 		assert( !"This condition should never happen" );
       
   305 		String substr = "";
       
   306 		return substr;
       
   307 		}
       
   308 
       
   309 	// if valid - go and copy everything
       
   310 	else {
       
   311 		char *char_substr = &(iRep[aStart]);
       
   312 		char temp_char = iRep[aFinish + 1];
       
   313 		iRep[aFinish + 1] = 0;
       
   314 		String substr( char_substr );
       
   315 		iRep[aFinish + 1] = temp_char;
       
   316 		return substr;
       
   317 		}
       
   318 	}
       
   319 
       
   320 String& String::operator+= (char * SourceChar)
       
   321 	{
       
   322 	char * pOldRep = iRep;
       
   323 	iLength += strlen(SourceChar);
       
   324 	iRep = new char [iLength + 1];
       
   325 	strcpy( iRep, pOldRep);
       
   326 	strcpy( iRep + iLength-strlen(SourceChar),  SourceChar);
       
   327 	delete [] pOldRep;
       
   328 	return * this;
       
   329 	}
       
   330 
       
   331 int String::Atoi()
       
   332 	{
       
   333 	return atoi(iRep);
       
   334 	}
       
   335 
       
   336 int String::Export( UTF16 *buffer, int& length, CharacterSet fromchset ) const
       
   337 // ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
       
   338 //
       
   339 // Export the string from its internal form to the caller supplied buffer
       
   340 // (which in this case is meant to be Unicode).  On entry, length indicates
       
   341 // the number of characters in the buffer.  On exit, this is set to the number
       
   342 // of characters actually written. The export involves mapping from the
       
   343 // specified character set to Unicode.
       
   344 //
       
   345 // The return value is normally TRUE.  If not, truncation has occurred.
       
   346 //
       
   347 //
       
   348 // NB, this function is currently under development and character set
       
   349 // mapping is not yet properly implemented.
       
   350 // NB2. it's also largely duplicated in String::ExportLength, which should
       
   351 // be updated to match...
       
   352 // ---------------------------------------------------------------------------
       
   353    {
       
   354    unsigned int index = 0;           // index into internal buffer
       
   355    int outcount = 0;        // count of characters written to export buffer
       
   356    for(int i=0;i<length;i++)
       
   357      {
       
   358 	   buffer[i] = 0;
       
   359      }
       
   360 
       
   361    // Because of multibyte character sets, the general code pattern for
       
   362    // copying the characters has to work left to right to allow for
       
   363    // byte sequence interpretation.  The raw byte count of such a string
       
   364    // can be greater than the number of characters it represents.
       
   365    switch ( fromchset )
       
   366       {
       
   367    case CP1252:
       
   368       // In this case, we know that there is only a narrow range
       
   369       // of characters that aren't a direct mapping.
       
   370 
       
   371       while ( (index < iLength) && ( outcount < length ) )
       
   372          {
       
   373 
       
   374          // To allow for direct Unicode characters in CP1252 strings, we
       
   375          // insert a special character followed by the UTF8 sequence
       
   376 
       
   377          if (*UCRep(index) == UnicodeEscape)
       
   378             {
       
   379             index +=1;
       
   380             if (index < iLength)
       
   381                {
       
   382                buffer[outcount] = getUTF8(UCRep(index), index, iLength);
       
   383                }
       
   384             }
       
   385          else
       
   386             {
       
   387             buffer[outcount] = *UCRep(index);
       
   388             index +=1;
       
   389             }
       
   390 
       
   391       // Now, see if the character ended up in the forbidden range.  If so, map
       
   392       // it to the correct Unicode character.
       
   393 
       
   394       if ( buffer[outcount] < 255 )
       
   395          {
       
   396          unsigned char temp;
       
   397          temp = (unsigned char)buffer[outcount];
       
   398          CP1252_exceptions.map(temp, buffer[outcount]);
       
   399          }
       
   400 
       
   401       outcount += 1;
       
   402 
       
   403 
       
   404          } // end of loop to export characters
       
   405     break;
       
   406 
       
   407 
       
   408 
       
   409    case UTF8: 
       
   410 
       
   411       while ( (index < iLength) && ( outcount < length ) )
       
   412          {
       
   413 		UTF32 tu32 = getUTF8(UCRep(index), index, iLength);
       
   414 		if (tu32 <= 0xFFFF)
       
   415 			{
       
   416 			buffer[outcount] = tu32;
       
   417 			outcount +=1;
       
   418 			}
       
   419 		else
       
   420 			{
       
   421 			if ( tu32 > 0x10ffff )
       
   422 			{
       
   423 			printf("Surrogate character code must be a number in the range 0x10000 to 0x10ffff\n");
       
   424 			printf("Error: rcomp.exe line %d\n", __LINE__);
       
   425 			}
       
   426 			
       
   427 			buffer[outcount] = (UTF16)(0xD7C0 + (tu32 >> 10));			// high surrogate
       
   428 			outcount++;
       
   429 			if (outcount < length)
       
   430 				{
       
   431 				buffer[outcount] = (UTF16)(0xDC00 | (tu32 & 0x3FF));		// low surrogate
       
   432 				outcount++;
       
   433 				}
       
   434 			else
       
   435 				{
       
   436 				printf("Error: rcomp.exe line %d\n", __LINE__);
       
   437 				}
       
   438 		}
       
   439          } // end of loop to export characters
       
   440    break;
       
   441 
       
   442 
       
   443    default: // this should eventually become an exception
       
   444 
       
   445       while ( (index < iLength) && ( outcount < length ) )
       
   446          {
       
   447           buffer[outcount] = *UCRep(index);
       
   448           outcount +=1;
       
   449           index += 1;
       
   450          } // end of loop to export characters
       
   451    break;
       
   452 
       
   453 
       
   454       } // end of switch on character set.
       
   455 
       
   456    length = outcount;
       
   457 
       
   458    // If the index is not now equal to the internal length then
       
   459    // the string was truncated on export.
       
   460 
       
   461    if ( index != iLength ) return 0; else return 1;
       
   462 
       
   463 
       
   464 
       
   465    } // end of Export to Unicode function.
       
   466 
       
   467 
       
   468 // What length of exported text does this String represent?
       
   469 
       
   470 unsigned long String::ExportLength (CharacterSet tochset, CharacterSet fromchset) const
       
   471 	{
       
   472 	if (tochset != Unicode)
       
   473 		return iLength;
       
   474 
       
   475 	unsigned int index = 0;           // index into internal buffer
       
   476 	unsigned long outcount = 0;       // count of output characters
       
   477 
       
   478 	switch ( fromchset )
       
   479 		{
       
   480 	case CP1252:
       
   481 		// In this case, we know that there is only a narrow range
       
   482 		// of characters that aren't a direct mapping.
       
   483 
       
   484 		while ( (index < iLength) )
       
   485 			{
       
   486 
       
   487 			// To allow for direct Unicode characters in CP1252 strings, we
       
   488 			// insert a special character followed by the UTF8 sequence
       
   489 
       
   490 			if (*UCRep(index) == UnicodeEscape)
       
   491 				{
       
   492 				index +=1;
       
   493 				if (index < iLength)
       
   494 					{
       
   495 					(void) getUTF8(UCRep(index), index, iLength);
       
   496 					}
       
   497 				}
       
   498 			else
       
   499 				{
       
   500 				index +=1;
       
   501 				}
       
   502 			outcount += 1;
       
   503 			}
       
   504 		break;
       
   505 
       
   506 	case UTF8: 
       
   507 
       
   508 		while ( (index < iLength) )
       
   509 			{
       
   510 			UTF32 tu32 = getUTF8(UCRep(index), index, iLength);
       
   511 			outcount +=1;
       
   512 			if (tu32 > 0xFFFF)
       
   513 				++outcount;
       
   514 			}
       
   515 		break;
       
   516 
       
   517 
       
   518 	default: // this should eventually become an exception
       
   519 
       
   520 		outcount = iLength;
       
   521 		break;
       
   522 
       
   523 		} // end of switch on character set.
       
   524 
       
   525 	return outcount;
       
   526 	}
       
   527 
       
   528 
       
   529 
       
   530 // end of ASTRING.CPP