|
1 /* |
|
2 * Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies). |
|
3 * All rights reserved. |
|
4 * This component and the accompanying materials are made available |
|
5 * under the terms of the License "Eclipse Public License v1.0" |
|
6 * which accompanies this distribution, and is available |
|
7 * at the URL "http://www.eclipse.org/legal/epl-v10.html". |
|
8 * |
|
9 * Initial Contributors: |
|
10 * Nokia Corporation - initial contribution. |
|
11 * |
|
12 * Contributors: |
|
13 * |
|
14 * Description: |
|
15 * STRING.CPP |
|
16 * |
|
17 */ |
|
18 |
|
19 |
|
20 #include <stdio.h> |
|
21 #include <string.h> |
|
22 #include <assert.h> |
|
23 #include <ctype.h> |
|
24 #include "CTABLE.H" // character code mapping classes |
|
25 #include "ASTRING.H" |
|
26 |
|
27 |
|
28 extern Mapping_range CP1252_exceptions; |
|
29 // table of exceptions from CP1252 1:1 mapping with Unicode. |
|
30 |
|
31 |
|
32 #undef STRING_DEBUG |
|
33 |
|
34 String::String() |
|
35 { |
|
36 iLength=0; |
|
37 iRep=NULL; |
|
38 } |
|
39 |
|
40 String::String(const char* aText) |
|
41 { |
|
42 ArrayItem(); |
|
43 iLength=strlen(aText); |
|
44 iRep=new char[iLength+1]; |
|
45 assert(iRep!=NULL); |
|
46 strcpy(iRep,aText); |
|
47 } |
|
48 |
|
49 String::String(const String& SourceString): ArrayItem(SourceString) |
|
50 { |
|
51 iLength=SourceString.iLength; |
|
52 iRep=new char[ iLength + 1]; |
|
53 assert( iRep != 0); |
|
54 if(iLength==0) *iRep='\0'; |
|
55 else strcpy( iRep, SourceString.iRep); |
|
56 } |
|
57 |
|
58 String::~String() |
|
59 { |
|
60 delete [] iRep; |
|
61 } |
|
62 |
|
63 void String::Reset() |
|
64 { |
|
65 iLength=0; |
|
66 delete [] iRep; |
|
67 iRep=NULL; |
|
68 } |
|
69 |
|
70 char& String::operator[] (unsigned long CharIndex) const |
|
71 { |
|
72 if ( CharIndex > iLength) |
|
73 { |
|
74 assert( 0); |
|
75 return iRep[ iLength]; // i.e. \0 |
|
76 } |
|
77 return iRep[ CharIndex]; |
|
78 } |
|
79 |
|
80 String& String::operator=(const String& SourceString) |
|
81 { |
|
82 if(&SourceString==this) |
|
83 return *this; |
|
84 delete [] iRep; |
|
85 iLength=SourceString.iLength; |
|
86 if ( iLength == 0) |
|
87 { |
|
88 iRep=NULL; |
|
89 return * this; |
|
90 } |
|
91 iRep=new char [ iLength + 1]; |
|
92 assert( iRep != NULL); |
|
93 strcpy( iRep, SourceString.iRep); |
|
94 return *this; |
|
95 } |
|
96 |
|
97 String& String::operator+= (const String & SourceString) |
|
98 { |
|
99 char * pOldRep=iRep; |
|
100 iLength += SourceString.iLength; |
|
101 if ( iLength == 0) |
|
102 iRep=NULL; |
|
103 else |
|
104 { |
|
105 iRep=new char [ iLength + 1]; |
|
106 assert( iRep != NULL); |
|
107 strcpy( iRep, pOldRep); |
|
108 strcpy( iRep + strlen( pOldRep), SourceString.iRep); |
|
109 } |
|
110 delete [] pOldRep; |
|
111 return *this; |
|
112 } |
|
113 |
|
114 int String::operator== (const String & CompareString) const |
|
115 { |
|
116 return(!strcmp( iRep, CompareString.iRep)); |
|
117 } |
|
118 |
|
119 int String::operator!= (const String & CompareString) const |
|
120 { |
|
121 return(strcmp( iRep, CompareString.iRep)); |
|
122 } |
|
123 |
|
124 unsigned long String::Length() const |
|
125 { |
|
126 return iLength; |
|
127 } |
|
128 |
|
129 ostream& operator<< ( ostream& os, const String & a) |
|
130 { |
|
131 return ( os << ( ( a.iLength <= 0) ? "<empty>" : a.iRep) ); |
|
132 } |
|
133 |
|
134 const char * String::GetBuffer() const |
|
135 { |
|
136 assert (iRep != NULL); |
|
137 return iRep; |
|
138 } |
|
139 |
|
140 const char * String::GetAssertedNonEmptyBuffer() const |
|
141 { |
|
142 assert( iRep != NULL); |
|
143 assert( iLength > 0); |
|
144 return iRep; |
|
145 } |
|
146 |
|
147 int String::IsDecNatural() const |
|
148 { |
|
149 assert( iLength > 0); |
|
150 unsigned long i=0; |
|
151 if ( iRep[0] == '-') |
|
152 i++; |
|
153 for( ; i < iLength; i++) |
|
154 { |
|
155 if (!isdigit( iRep[i]) ) |
|
156 return 0; // Non-digit found. |
|
157 } |
|
158 return 1; // Successful - all numeric. |
|
159 } |
|
160 |
|
161 String & String::Upper() |
|
162 { |
|
163 for(unsigned long i=0;i<iLength;i++) |
|
164 iRep[i]=char(toupper(iRep[i])); |
|
165 return *this; |
|
166 } |
|
167 |
|
168 String String::operator+ (const String & SecondString) const |
|
169 { |
|
170 String s; |
|
171 s.iLength=iLength + SecondString.iLength; |
|
172 s.iRep=new char[ s.iLength + 1]; |
|
173 strcpy( s.iRep, iRep); |
|
174 strcpy( s.iRep + iLength, SecondString.iRep); |
|
175 return s; |
|
176 } |
|
177 |
|
178 bool StringLess::operator()(const String& aLeft, const String& aRight) const |
|
179 { |
|
180 const char* bufferLeft = aLeft.GetBuffer(); |
|
181 const char* bufferRight = aRight.GetBuffer(); |
|
182 for (;;) |
|
183 { |
|
184 if (*bufferLeft != *bufferRight || *bufferLeft == 0) |
|
185 return *bufferLeft < *bufferRight; |
|
186 ++bufferLeft; |
|
187 ++bufferRight; |
|
188 } |
|
189 } |
|
190 |
|
191 const unsigned char* String::UCRep (unsigned long aIndex) const |
|
192 { |
|
193 return (const unsigned char*)&iRep[aIndex]; |
|
194 } |
|
195 |
|
196 static UTF32 getUTF8(const unsigned char* aUtfByte, unsigned int& aIndex, unsigned int aMax) |
|
197 { |
|
198 unsigned char utfByte = *aUtfByte++; |
|
199 aIndex +=1; |
|
200 UTF32 unicodeChar = (UTF32) utfByte; |
|
201 |
|
202 // Slightly cavalier decoding - always write something |
|
203 // and don't consume bytes which don't fit the pattern! |
|
204 if ((utfByte & 0xe0) == 0xc0) |
|
205 { |
|
206 unicodeChar = (UTF16)((utfByte&0x1f)<<6); |
|
207 if (aIndex < aMax) |
|
208 { |
|
209 utfByte = (unsigned char)(*aUtfByte++); |
|
210 if ((utfByte&0xc0)==0x80) |
|
211 { |
|
212 unicodeChar |= (utfByte&0x3f); |
|
213 aIndex +=1; |
|
214 } |
|
215 } |
|
216 } |
|
217 else |
|
218 if ((utfByte & 0xf0) == 0xe0) |
|
219 { |
|
220 unicodeChar = (UTF16)((utfByte&0x0f)<<12); |
|
221 if (aIndex < aMax) |
|
222 { |
|
223 utfByte = (unsigned char)(*aUtfByte++); |
|
224 if ((utfByte&0xc0)==0x80) |
|
225 { |
|
226 unicodeChar |= (utfByte&0x3f)<<6; |
|
227 aIndex +=1; |
|
228 } |
|
229 } |
|
230 if (aIndex < aMax) |
|
231 { |
|
232 utfByte = (unsigned char)(*aUtfByte++); |
|
233 if ((utfByte&0xc0)==0x80) |
|
234 { |
|
235 unicodeChar |= (utfByte&0x3f); |
|
236 aIndex +=1; |
|
237 } |
|
238 } |
|
239 } |
|
240 else if ((utfByte & 0xF8) == 0xF0) // 4 bytes UTF-8 |
|
241 { |
|
242 unicodeChar = (UTF32)((utfByte & 0x07) << 18); |
|
243 if (aIndex < aMax) |
|
244 { |
|
245 utfByte = (unsigned char)(*aUtfByte++); |
|
246 if ((utfByte&0xc0)==0x80) |
|
247 { |
|
248 unicodeChar |= (utfByte&0x3f)<<12; |
|
249 aIndex +=1; |
|
250 } |
|
251 } |
|
252 if (aIndex < aMax) |
|
253 { |
|
254 utfByte = (unsigned char)(*aUtfByte++); |
|
255 if ((utfByte&0xc0)==0x80) |
|
256 { |
|
257 unicodeChar |= (utfByte&0x3f)<<6; |
|
258 aIndex +=1; |
|
259 } |
|
260 } |
|
261 if (aIndex < aMax) |
|
262 { |
|
263 utfByte = (unsigned char)(*aUtfByte++); |
|
264 if ((utfByte&0xc0)==0x80) |
|
265 { |
|
266 unicodeChar |= (utfByte&0x3f); |
|
267 aIndex +=1; |
|
268 } |
|
269 } |
|
270 } |
|
271 |
|
272 return unicodeChar; |
|
273 } |
|
274 |
|
275 int String::FindSubString(String aSubString, int aStart) |
|
276 { |
|
277 for(unsigned int j=aStart; j<iLength - aSubString.Length() + 1; j++) |
|
278 { |
|
279 bool match = true; |
|
280 for(unsigned int k=0; k< aSubString.Length(); k++) |
|
281 if(iRep[j+k]!=aSubString[k]) |
|
282 match = false; |
|
283 if(match) return j; |
|
284 } |
|
285 return EStringNotFound; |
|
286 } |
|
287 |
|
288 |
|
289 String String::ExtractSubString(const unsigned int aStart, const unsigned int aFinish) |
|
290 { |
|
291 // ensure that the passed bounds are valid |
|
292 |
|
293 if( aStart > iLength ) |
|
294 { |
|
295 String substr = ""; |
|
296 return substr; |
|
297 } |
|
298 else if( aFinish > iLength ) { |
|
299 assert( !"This condition should never happen" ); |
|
300 String substr = ""; |
|
301 return substr; |
|
302 } |
|
303 else if( aStart > aFinish ) { |
|
304 assert( !"This condition should never happen" ); |
|
305 String substr = ""; |
|
306 return substr; |
|
307 } |
|
308 |
|
309 // if valid - go and copy everything |
|
310 else { |
|
311 char *char_substr = &(iRep[aStart]); |
|
312 char temp_char = iRep[aFinish + 1]; |
|
313 iRep[aFinish + 1] = 0; |
|
314 String substr( char_substr ); |
|
315 iRep[aFinish + 1] = temp_char; |
|
316 return substr; |
|
317 } |
|
318 } |
|
319 |
|
320 String& String::operator+= (char * SourceChar) |
|
321 { |
|
322 char * pOldRep = iRep; |
|
323 iLength += strlen(SourceChar); |
|
324 iRep = new char [iLength + 1]; |
|
325 strcpy( iRep, pOldRep); |
|
326 strcpy( iRep + iLength-strlen(SourceChar), SourceChar); |
|
327 delete [] pOldRep; |
|
328 return * this; |
|
329 } |
|
330 |
|
331 int String::Atoi() |
|
332 { |
|
333 return atoi(iRep); |
|
334 } |
|
335 |
|
336 int String::Export( UTF16 *buffer, int& length, CharacterSet fromchset ) const |
|
337 // ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
|
338 // |
|
339 // Export the string from its internal form to the caller supplied buffer |
|
340 // (which in this case is meant to be Unicode). On entry, length indicates |
|
341 // the number of characters in the buffer. On exit, this is set to the number |
|
342 // of characters actually written. The export involves mapping from the |
|
343 // specified character set to Unicode. |
|
344 // |
|
345 // The return value is normally TRUE. If not, truncation has occurred. |
|
346 // |
|
347 // |
|
348 // NB, this function is currently under development and character set |
|
349 // mapping is not yet properly implemented. |
|
350 // NB2. it's also largely duplicated in String::ExportLength, which should |
|
351 // be updated to match... |
|
352 // --------------------------------------------------------------------------- |
|
353 { |
|
354 unsigned int index = 0; // index into internal buffer |
|
355 int outcount = 0; // count of characters written to export buffer |
|
356 for(int i=0;i<length;i++) |
|
357 { |
|
358 buffer[i] = 0; |
|
359 } |
|
360 |
|
361 // Because of multibyte character sets, the general code pattern for |
|
362 // copying the characters has to work left to right to allow for |
|
363 // byte sequence interpretation. The raw byte count of such a string |
|
364 // can be greater than the number of characters it represents. |
|
365 switch ( fromchset ) |
|
366 { |
|
367 case CP1252: |
|
368 // In this case, we know that there is only a narrow range |
|
369 // of characters that aren't a direct mapping. |
|
370 |
|
371 while ( (index < iLength) && ( outcount < length ) ) |
|
372 { |
|
373 |
|
374 // To allow for direct Unicode characters in CP1252 strings, we |
|
375 // insert a special character followed by the UTF8 sequence |
|
376 |
|
377 if (*UCRep(index) == UnicodeEscape) |
|
378 { |
|
379 index +=1; |
|
380 if (index < iLength) |
|
381 { |
|
382 buffer[outcount] = getUTF8(UCRep(index), index, iLength); |
|
383 } |
|
384 } |
|
385 else |
|
386 { |
|
387 buffer[outcount] = *UCRep(index); |
|
388 index +=1; |
|
389 } |
|
390 |
|
391 // Now, see if the character ended up in the forbidden range. If so, map |
|
392 // it to the correct Unicode character. |
|
393 |
|
394 if ( buffer[outcount] < 255 ) |
|
395 { |
|
396 unsigned char temp; |
|
397 temp = (unsigned char)buffer[outcount]; |
|
398 CP1252_exceptions.map(temp, buffer[outcount]); |
|
399 } |
|
400 |
|
401 outcount += 1; |
|
402 |
|
403 |
|
404 } // end of loop to export characters |
|
405 break; |
|
406 |
|
407 |
|
408 |
|
409 case UTF8: |
|
410 |
|
411 while ( (index < iLength) && ( outcount < length ) ) |
|
412 { |
|
413 UTF32 tu32 = getUTF8(UCRep(index), index, iLength); |
|
414 if (tu32 <= 0xFFFF) |
|
415 { |
|
416 buffer[outcount] = tu32; |
|
417 outcount +=1; |
|
418 } |
|
419 else |
|
420 { |
|
421 if ( tu32 > 0x10ffff ) |
|
422 { |
|
423 printf("Surrogate character code must be a number in the range 0x10000 to 0x10ffff\n"); |
|
424 printf("Error: rcomp.exe line %d\n", __LINE__); |
|
425 } |
|
426 |
|
427 buffer[outcount] = (UTF16)(0xD7C0 + (tu32 >> 10)); // high surrogate |
|
428 outcount++; |
|
429 if (outcount < length) |
|
430 { |
|
431 buffer[outcount] = (UTF16)(0xDC00 | (tu32 & 0x3FF)); // low surrogate |
|
432 outcount++; |
|
433 } |
|
434 else |
|
435 { |
|
436 printf("Error: rcomp.exe line %d\n", __LINE__); |
|
437 } |
|
438 } |
|
439 } // end of loop to export characters |
|
440 break; |
|
441 |
|
442 |
|
443 default: // this should eventually become an exception |
|
444 |
|
445 while ( (index < iLength) && ( outcount < length ) ) |
|
446 { |
|
447 buffer[outcount] = *UCRep(index); |
|
448 outcount +=1; |
|
449 index += 1; |
|
450 } // end of loop to export characters |
|
451 break; |
|
452 |
|
453 |
|
454 } // end of switch on character set. |
|
455 |
|
456 length = outcount; |
|
457 |
|
458 // If the index is not now equal to the internal length then |
|
459 // the string was truncated on export. |
|
460 |
|
461 if ( index != iLength ) return 0; else return 1; |
|
462 |
|
463 |
|
464 |
|
465 } // end of Export to Unicode function. |
|
466 |
|
467 |
|
468 // What length of exported text does this String represent? |
|
469 |
|
470 unsigned long String::ExportLength (CharacterSet tochset, CharacterSet fromchset) const |
|
471 { |
|
472 if (tochset != Unicode) |
|
473 return iLength; |
|
474 |
|
475 unsigned int index = 0; // index into internal buffer |
|
476 unsigned long outcount = 0; // count of output characters |
|
477 |
|
478 switch ( fromchset ) |
|
479 { |
|
480 case CP1252: |
|
481 // In this case, we know that there is only a narrow range |
|
482 // of characters that aren't a direct mapping. |
|
483 |
|
484 while ( (index < iLength) ) |
|
485 { |
|
486 |
|
487 // To allow for direct Unicode characters in CP1252 strings, we |
|
488 // insert a special character followed by the UTF8 sequence |
|
489 |
|
490 if (*UCRep(index) == UnicodeEscape) |
|
491 { |
|
492 index +=1; |
|
493 if (index < iLength) |
|
494 { |
|
495 (void) getUTF8(UCRep(index), index, iLength); |
|
496 } |
|
497 } |
|
498 else |
|
499 { |
|
500 index +=1; |
|
501 } |
|
502 outcount += 1; |
|
503 } |
|
504 break; |
|
505 |
|
506 case UTF8: |
|
507 |
|
508 while ( (index < iLength) ) |
|
509 { |
|
510 UTF32 tu32 = getUTF8(UCRep(index), index, iLength); |
|
511 outcount +=1; |
|
512 if (tu32 > 0xFFFF) |
|
513 ++outcount; |
|
514 } |
|
515 break; |
|
516 |
|
517 |
|
518 default: // this should eventually become an exception |
|
519 |
|
520 outcount = iLength; |
|
521 break; |
|
522 |
|
523 } // end of switch on character set. |
|
524 |
|
525 return outcount; |
|
526 } |
|
527 |
|
528 |
|
529 |
|
530 // end of ASTRING.CPP |