kernel/eka/euser/unicode/Compare.cpp
changeset 0 a41df078684a
child 90 947f0dc9f7a8
equal deleted inserted replaced
-1:000000000000 0:a41df078684a
       
     1 // Copyright (c) 2001-2009 Nokia Corporation and/or its subsidiary(-ies).
       
     2 // All rights reserved.
       
     3 // This component and the accompanying materials are made available
       
     4 // under the terms of the License "Eclipse Public License v1.0"
       
     5 // which accompanies this distribution, and is available
       
     6 // at the URL "http://www.eclipse.org/legal/epl-v10.html".
       
     7 //
       
     8 // Initial Contributors:
       
     9 // Nokia Corporation - initial contribution.
       
    10 //
       
    11 // Contributors:
       
    12 //
       
    13 // Description:
       
    14 // Folding and decomposition implementation
       
    15 // 
       
    16 //
       
    17 
       
    18 #include "FoldDecomp.inl"
       
    19 #include "CompareImp.h"
       
    20 #include "u32std.h"
       
    21 
       
    22 #define ARRAY_LENGTH(a) (static_cast<TInt>(sizeof(a)/sizeof(a[0])))
       
    23 
       
    24 ////////////////////////////////////////////////////////////////////////////////////////////
       
    25 // Global functions
       
    26 ////////////////////////////////////////////////////////////////////////////////////////////
       
    27 
       
    28 /**
       
    29 @internalComponent
       
    30 */
       
    31 TChar UTF16ToChar(const TText16* a)
       
    32 	{
       
    33 	if (0xD800 <= a[0])
       
    34 		{
       
    35 		if (a[0] < 0xE000)
       
    36 			{
       
    37             if (a[0] < 0xDC00 && ::IsLowSurrogate(a[1]))
       
    38 				{
       
    39                 TChar c = ::PairSurrogates(a[0], a[1]);
       
    40 				if ((c & 0xFFFE) != 0xFFFE)
       
    41 					return c;
       
    42 				}
       
    43 			return 0xFFFF;
       
    44 			}
       
    45 		if (a[0] == 0xFFFE)
       
    46 			return 0xFFFF;
       
    47 		}
       
    48 	return a[0];
       
    49 	}
       
    50 
       
    51 /**
       
    52 Is a character a base character (ETrue) or a combiner (EFalse)?
       
    53 For now, we will treat all control characters as base characters.
       
    54 @internalComponent
       
    55 */
       
    56 TBool IsBaseCharacter(TChar a)
       
    57 	{
       
    58 	if(a < 0x220)
       
    59         {
       
    60 		// These Unicode characters are all assigned
       
    61 		// and none of them is a combining character
       
    62 		return ETrue;
       
    63         }
       
    64 	return (a.GetCategory() & 0xFFF0) - TChar::EMarkGroup;
       
    65 	}
       
    66 
       
    67 /**
       
    68 @internalComponent
       
    69 */
       
    70 inline TInt GetDecompositionIndex(TChar a)
       
    71 	{
       
    72     TInt i = DecompositionHashStart(a);
       
    73 	TUint32 v = KUnicodeToIndexHash[i];
       
    74 	if (!v)
       
    75 		return -1;
       
    76 	if ((v & 0xFFFFF) != a)
       
    77 		{
       
    78         TInt step = DecompositionHashStep(a);
       
    79 		do	{
       
    80 			i = (i + step) & KDecompositionHashBitmask;
       
    81 			v = KUnicodeToIndexHash[i];
       
    82 			if (!v)
       
    83 				return -1;
       
    84 			} while ((v & 0xFFFFF) != a);
       
    85 		}
       
    86 // it is important that this is an unsigned shift
       
    87 	return static_cast<TInt>(v >> 20);
       
    88 	}
       
    89 
       
    90 /**
       
    91 Will not work if an invalid index is passed.
       
    92 @internalComponent
       
    93 */
       
    94 static TUTF32Iterator GetFoldedDecomposition(TInt aIndex)
       
    95 	{
       
    96 	TInt singletonIndex = aIndex - (sizeof(KNonSingletonFolds)/sizeof(KNonSingletonFolds[0])/2);
       
    97 	if (0 <= singletonIndex)
       
    98 		return TUTF32Iterator(KSingletonFolds + singletonIndex);
       
    99 	const TText* start = KNonSingletonFolds + aIndex * 2;
       
   100 	if (*start != KLongD)
       
   101 		return TUTF32Iterator(start, start + 2,
       
   102 			TUTF32Iterator::EStartsWithValidCharacter);
       
   103 	TInt longDecompIndex = start[1];
       
   104 	start = KLongDecompositions + (longDecompIndex & 0xFFF);
       
   105 	return TUTF32Iterator(start, start + (longDecompIndex >> 12) + 3,
       
   106 		TUTF32Iterator::EStartsWithValidCharacter);
       
   107 	}
       
   108 
       
   109 /**
       
   110 @internalComponent
       
   111 */
       
   112 static TChar GetFirstFoldedChar(TChar a)
       
   113 	{
       
   114     TInt index = ::GetDecompositionIndex(a);
       
   115     return index < 0? a : ::GetFoldedDecomposition(index).Current();
       
   116 	}
       
   117 
       
   118 /**
       
   119 @internalComponent
       
   120 */
       
   121 static TBool FirstFoldedCodeIsNot(TInt aNonSurrogate, TInt aFoldedNonSurrogate)
       
   122 	{
       
   123     TInt index = ::GetDecompositionIndex(aNonSurrogate);
       
   124 	if (index < 0)
       
   125 		return aNonSurrogate - aFoldedNonSurrogate;
       
   126 	TInt singletonIndex = index - (sizeof(KNonSingletonFolds)/sizeof(KNonSingletonFolds[0])/2);
       
   127 	if (0 < singletonIndex)
       
   128 		return KSingletonFolds[singletonIndex] - aFoldedNonSurrogate;
       
   129 	const TText* start = KNonSingletonFolds + index * 2;
       
   130 	if (start[0] == KLongD)
       
   131 		start = KLongDecompositions + (start[1] & 0xFFF);
       
   132 	return *start - aFoldedNonSurrogate;
       
   133 	}
       
   134 
       
   135 /**
       
   136 @internalComponent
       
   137 */
       
   138 static TInt GetClass(TFoldedDecompIterator& a)
       
   139 	{
       
   140 	ASSERT(!a.AtEnd());
       
   141 	a.EnterFoldedSequence();
       
   142 	TChar ch = a.Current();
       
   143 	TInt cl = ch.GetCombiningClass();
       
   144 	if (cl == 0)
       
   145 		// Assume starters have been folded from ypogegrammeni
       
   146 		cl = 240;
       
   147 	return cl;
       
   148 	}
       
   149 
       
   150 ////////////////////////////////////////////////////////////////////////////////////////////
       
   151 // TUTF32Iterator
       
   152 ////////////////////////////////////////////////////////////////////////////////////////////
       
   153 
       
   154 /**
       
   155 @internalComponent
       
   156 */
       
   157 void TUTF32Iterator::Next()
       
   158 	{
       
   159 	ASSERT(iStart != iEnd);
       
   160 	while (++iStart != iEnd)
       
   161 		{
       
   162         iCurrent = ::UTF16ToChar(iStart);
       
   163 		if (iCurrent != 0xFFFF)
       
   164 			return;
       
   165 		}
       
   166 	}
       
   167 
       
   168 /**
       
   169 Locates a base character in a string using a folded comparision. Will not find combining 
       
   170 characters, nor will it consider Korean combining Jamo to be equivalent to Hangul.
       
   171 @internalComponent
       
   172 */
       
   173 TBool TUTF32Iterator::LocateFoldedBaseCharacter(TChar aChar)
       
   174 	{
       
   175 	// A quick shortcut for simple rejections
       
   176 	if (aChar < 0xFFFF)
       
   177 		{
       
   178         while (iStart != iEnd && *iStart < 0xD800 && ::FirstFoldedCodeIsNot(*iStart, aChar))
       
   179 			++iStart;
       
   180 		if (iStart != iEnd)
       
   181 			{
       
   182             iCurrent = ::UTF16ToChar(iStart);
       
   183 			if (iCurrent == 0xFFFF)
       
   184 				Next();
       
   185 			}
       
   186 		}
       
   187 	while (!AtEnd())
       
   188 		{
       
   189         TChar foldedChar = ::GetFirstFoldedChar(iCurrent);
       
   190 		if (aChar == foldedChar)
       
   191 			return ETrue;
       
   192 		Next();
       
   193 		}
       
   194 	return EFalse;
       
   195 	}
       
   196 
       
   197 ////////////////////////////////////////////////////////////////////////////////////////////
       
   198 // TFoldedDecompIterator
       
   199 ////////////////////////////////////////////////////////////////////////////////////////////
       
   200 
       
   201 /**
       
   202 @internalComponent
       
   203 */
       
   204 TFoldedDecompIterator::TFoldedDecompIterator(const TUTF32Iterator& a)
       
   205 	{
       
   206 	Set(a);
       
   207 	}
       
   208 
       
   209 /**
       
   210 @internalComponent
       
   211 */
       
   212 TBool TFoldedDecompIterator::AtEnd() const
       
   213 	{
       
   214 	return iOriginal.AtEnd();
       
   215 	}
       
   216 
       
   217 /**
       
   218 @internalComponent
       
   219 */
       
   220 TBool TFoldedDecompIterator::AtEndOrWildcard() const
       
   221 	{
       
   222 	// neither '?' nor '*' have decomposition sequences, so we can assume that
       
   223 	// if we are pointing at one or the other, we don't need to check if we
       
   224 	// are in a decomposition sequence or not.
       
   225 	return iOriginal.AtEnd() || iOriginal.Current() == '?' || iOriginal.Current() == '*';
       
   226 	}
       
   227 
       
   228 /**
       
   229 @internalComponent
       
   230 */
       
   231 TBool TFoldedDecompIterator::EnterFoldedSequence()
       
   232 	{
       
   233 	ASSERT(!AtEnd());
       
   234 	return !IsInFoldedSequence() && StrictEnterFoldedSequence();
       
   235 	}
       
   236 
       
   237 /**
       
   238 Enter folded sequence, assuming that we are not already in one
       
   239 @internalComponent
       
   240 */
       
   241 TBool TFoldedDecompIterator::StrictEnterFoldedSequence()
       
   242 	{
       
   243 	ASSERT(!AtEnd());
       
   244 	ASSERT(!IsInFoldedSequence());
       
   245     TInt index = ::GetDecompositionIndex(iOriginal.Current());
       
   246 	if (index < 0)
       
   247 		return EFalse;
       
   248 	iFolded = ::GetFoldedDecomposition(index);
       
   249 	return ETrue;
       
   250 	}
       
   251 
       
   252 /**
       
   253 An iota might have folded from a combining ypogegrammeni.
       
   254 If the current character is a base character, this function will
       
   255 detect whether it was folded from a combining character (and
       
   256 therefore does not constitute a grapheme boundary).
       
   257 @internalComponent
       
   258 */
       
   259 TBool TFoldedDecompIterator::CurrentIsBaseFoldedFromCombiner() const
       
   260 	{
       
   261 	if (!IsInFoldedSequence())
       
   262 		return EFalse;
       
   263 	// The only character that does this is Ypogerammeni, which folds to iota
       
   264 	if (iFolded.Current() != 0x3B9)
       
   265 		return EFalse;
       
   266 	// If the unfolded character is a combiner then it cannot contain an iota,
       
   267 	// so it must be an ypogegrammeni that has been folded to one.
       
   268 	// This will catch 0x345, the ypogegrammeni itself.
       
   269     if (!::IsBaseCharacter(iOriginal.Current()))
       
   270 		return ETrue;
       
   271 	// Otherwise the base character will be at the start of the decomposition
       
   272 	// sequence. We will assume that it is folded from a genuine iota if and
       
   273 	// only if there is an iota at the start of the folded decomposition
       
   274 	// sequence. (In theory there might be an iota with a combining
       
   275 	// ypogegrammeni, but in practice this will not occur).
       
   276 	TInt index = ::GetDecompositionIndex(iOriginal.Current());
       
   277 	ASSERT(0 <= index);
       
   278 	TUTF32Iterator folded = ::GetFoldedDecomposition(index);
       
   279 	return folded.Current() != 0x3B9;
       
   280 	}
       
   281 
       
   282 /**
       
   283 @internalComponent
       
   284 */
       
   285 TChar TFoldedDecompIterator::Current() const
       
   286 	{
       
   287 	ASSERT(!AtEnd());
       
   288 	return IsInFoldedSequence()? iFolded.Current() : iOriginal.Current();
       
   289 	}
       
   290 
       
   291 /** 
       
   292 Move past this code if it matches unfolded or folded 
       
   293 @internalComponent
       
   294 */
       
   295 TBool TFoldedDecompIterator::Match(TChar aCode)
       
   296 	{
       
   297 	ASSERT(!AtEnd());
       
   298 	if (!IsInFoldedSequence())
       
   299 		{
       
   300 		if (aCode == iOriginal.Current())
       
   301 			{
       
   302 			iOriginal.Next();
       
   303 			return ETrue;
       
   304 			}
       
   305 		if (!StrictEnterFoldedSequence())
       
   306 			return EFalse;
       
   307 		}
       
   308 	ASSERT(IsInFoldedSequence());
       
   309 	if (aCode == iFolded.Current())
       
   310 		{
       
   311 		iFolded.Next();
       
   312 		if (iFolded.AtEnd())
       
   313 			iOriginal.Next();
       
   314 		return ETrue;
       
   315 		}
       
   316 	return EFalse;
       
   317 	}
       
   318 
       
   319 /** 
       
   320 Move this and argument past matching character. 
       
   321 @internalComponent
       
   322 */
       
   323 TBool TFoldedDecompIterator::Match(TFoldedDecompIterator& aThat)
       
   324 	{
       
   325 	ASSERT(!AtEnd());
       
   326 	if (!IsInFoldedSequence())
       
   327 		{
       
   328 		if ( aThat.Match(iOriginal.Current()) )
       
   329 			{
       
   330 			iOriginal.Next();
       
   331 			return ETrue;
       
   332 			}
       
   333 		if (!StrictEnterFoldedSequence())
       
   334 			return EFalse;
       
   335 		}
       
   336 	ASSERT(IsInFoldedSequence());
       
   337 	if ( aThat.Match(iFolded.Current()) )
       
   338 		{
       
   339 		iFolded.Next();
       
   340 		if (iFolded.AtEnd())
       
   341 			iOriginal.Next();
       
   342 		return ETrue;
       
   343 		}
       
   344 	return EFalse;
       
   345 	}
       
   346 
       
   347 /** 
       
   348 @internalComponent
       
   349 */
       
   350 void TFoldedDecompIterator::Next()
       
   351 	{
       
   352 	ASSERT(!AtEnd());
       
   353 	if (IsInFoldedSequence())
       
   354 		{
       
   355 		iFolded.Next();
       
   356 		if (IsInFoldedSequence())
       
   357 			return;
       
   358 		}
       
   359 	iOriginal.Next();
       
   360 	}
       
   361 
       
   362 ////////////////////////////////////////////////////////////////////////////////////////////
       
   363 // TFoldedSortedDecompIterator
       
   364 ////////////////////////////////////////////////////////////////////////////////////////////
       
   365 
       
   366 /**
       
   367 Set this iterator to iterate over the next combining characters.
       
   368 Iotas in folded sequences are assumed to be character class 240
       
   369 (combining ypogegrammeni). Next() must be used once before
       
   370 the first call to Current(), as long as AtEnd() returns false.
       
   371 @param aBase Sets the start of the iteration. This value is advanced to the
       
   372              end of the iteration.
       
   373 @return The number of codes in the iteration.
       
   374 @internalComponent
       
   375 */
       
   376 TInt TFoldedSortedDecompIterator::Set(TFoldedDecompIterator &aBase)
       
   377 	{
       
   378 	iStart = aBase;
       
   379 	TInt length = 0;
       
   380 	iCurrentClass = 256;
       
   381 
       
   382 	const TUnicodeDataSet* charDataSet = GetLocaleCharSet()->iCharDataSet;
       
   383 
       
   384 	// Find the next starter (i.e. character with combining class 0).
       
   385 	// We must not count iota folded from ypogegrammeni.
       
   386 	// Ypogegrammeni has combining class 240.
       
   387 	// Iota has combining class 0.
       
   388 	// Also we will be searching for the character with the smallest
       
   389 	// combining class to start at.
       
   390 	while (!aBase.AtEnd())
       
   391 		{
       
   392 		aBase.EnterFoldedSequence();
       
   393 		TChar ch = aBase.Current();
       
   394 		TInt cl = TUnicode(TUint(ch)).GetCombiningClass(charDataSet);
       
   395 		if (cl == 0)
       
   396 			{
       
   397 			if (aBase.CurrentIsBaseFoldedFromCombiner())
       
   398 				cl = 240;
       
   399 			else
       
   400 				break;
       
   401 			}
       
   402 		if (cl < iCurrentClass)
       
   403 			{
       
   404 			iCurrentClass = cl;
       
   405 			iCurrent = aBase;
       
   406 			iCurrentCount = length;
       
   407 			}
       
   408 		++length;
       
   409 		aBase.Next();
       
   410 		}
       
   411 	iRemaining = length;
       
   412 	iLength = length;
       
   413 	ASSERT(iLength == 0 || iCurrentClass < 256);
       
   414 	return length;
       
   415 	}
       
   416 
       
   417 /** 
       
   418 Set this iterator so that AtEnd() returns ETrue. 
       
   419 @internalComponent
       
   420 */
       
   421 void TFoldedSortedDecompIterator::Set()
       
   422 	{
       
   423 	iRemaining = 0;
       
   424 	}
       
   425 
       
   426 /** 
       
   427 @internalComponent
       
   428 */
       
   429 TBool TFoldedSortedDecompIterator::AtEnd() const
       
   430 	{
       
   431 	return iRemaining == 0;
       
   432 	}
       
   433 
       
   434 /** 
       
   435 @internalComponent
       
   436 */
       
   437 TChar TFoldedSortedDecompIterator::Current() const
       
   438 	{
       
   439 	ASSERT(!AtEnd());
       
   440 	return iCurrent.Current();
       
   441 	}
       
   442 
       
   443 /** 
       
   444 @internalComponent
       
   445 */
       
   446 void TFoldedSortedDecompIterator::Next()
       
   447 	{
       
   448 	ASSERT(!AtEnd());
       
   449 	--iRemaining;
       
   450 	while (++iCurrentCount != iLength)
       
   451 		{
       
   452 		iCurrent.Next();
       
   453         if (::GetClass(iCurrent) == iCurrentClass)
       
   454 			return;
       
   455 		}
       
   456 	// We have not found any more of the same class, so we will look
       
   457 	// for the smallest one larger.
       
   458 	TInt minClass = 256;
       
   459 	TFoldedDecompIterator searcher(iStart);
       
   460 	TInt searchCount = 0;
       
   461 	while (searchCount != iLength)
       
   462 		{
       
   463         TInt cl = ::GetClass(searcher);
       
   464 		if (iCurrentClass < cl && cl < minClass)
       
   465 			{
       
   466 			minClass = cl;
       
   467 			iCurrentCount = searchCount;
       
   468 			iCurrent = searcher;
       
   469 			}
       
   470 		++searchCount;
       
   471 		searcher.Next();
       
   472 		}
       
   473 	iCurrentClass = minClass;
       
   474 	ASSERT(minClass < 256 || AtEnd());
       
   475 	}
       
   476 
       
   477 ////////////////////////////////////////////////////////////////////////////////////////////
       
   478 // TFoldedCanonicalIterator
       
   479 ////////////////////////////////////////////////////////////////////////////////////////////
       
   480 
       
   481 /** 
       
   482 @internalComponent
       
   483 */
       
   484 TFoldedCanonicalIterator::TFoldedCanonicalIterator(const TUTF32Iterator& a)
       
   485 	{
       
   486 	iBase.Set(a);
       
   487 	iSorted.Set();
       
   488     if(!iBase.AtEnd())
       
   489         {
       
   490 	    iBase.EnterFoldedSequence();
       
   491 	    if (iBase.Current().GetCombiningClass() != 0 || iBase.CurrentIsBaseFoldedFromCombiner())
       
   492             {
       
   493 		    iSorted.Set(iBase);
       
   494             }
       
   495         }
       
   496 	}
       
   497 
       
   498 /** 
       
   499 @internalComponent
       
   500 */
       
   501 TBool TFoldedCanonicalIterator::AtEnd() const
       
   502 	{
       
   503 	return iSorted.AtEnd() && iBase.AtEnd();
       
   504 	}
       
   505 
       
   506 /** 
       
   507 @internalComponent
       
   508 */
       
   509 TChar TFoldedCanonicalIterator::Current() const
       
   510 	{
       
   511 	ASSERT(!iBase.AtEnd() || !iSorted.AtEnd());
       
   512 	return iSorted.AtEnd()? iBase.Current() : iSorted.Current();
       
   513 	}
       
   514 
       
   515 /** 
       
   516 @internalComponent
       
   517 */
       
   518 void TFoldedCanonicalIterator::Next(const TUnicodeDataSet* aCharDataSet)
       
   519 	{
       
   520 	ASSERT(!iBase.AtEnd() || !iSorted.AtEnd());
       
   521 	if (!iSorted.AtEnd())
       
   522 		{
       
   523 		iSorted.Next();
       
   524 		return;
       
   525 		}
       
   526 	iBase.Next();
       
   527     if(!iBase.AtEnd())
       
   528         {
       
   529 	    iBase.EnterFoldedSequence();
       
   530  	    if (TUnicode(TUint(iBase.Current())).GetCombiningClass(aCharDataSet) != 0 || iBase.CurrentIsBaseFoldedFromCombiner())
       
   531            {
       
   532 		    iSorted.Set(iBase);
       
   533             }
       
   534         }
       
   535 	}
       
   536 
       
   537 ////////////////////////////////////////////////////////////////////////////////////////////
       
   538 // Folding - Global functions and structures
       
   539 ////////////////////////////////////////////////////////////////////////////////////////////
       
   540 
       
   541 /** 
       
   542 @internalComponent
       
   543 */
       
   544 struct TEndTester 
       
   545     { 
       
   546     typedef enum {EGenuine, EWildcard} TType;
       
   547 
       
   548     inline TEndTester(TType aType) :
       
   549         iType(aType)
       
   550         {
       
   551         }
       
   552 
       
   553     inline TBool operator()(const TFoldedDecompIterator& a) const
       
   554         {
       
   555         return iType == EGenuine ? a.AtEnd() : a.AtEndOrWildcard();
       
   556         } 
       
   557 
       
   558 private:
       
   559     TType iType;
       
   560 
       
   561     };
       
   562 
       
   563 /**
       
   564 Consumes as much of aCandidate as matches aSearchTerm up to the next '?' or
       
   565 '*' wildcard or the end of aSearchTerm.
       
   566 It is assumed that the search term begins with a base character.
       
   567 Returns true if and only if the whole search term was matched
       
   568 with a whole number of UTF16s in the candidate string.
       
   569 On return of ETrue the candidate string iterator will have consumed the
       
   570 matching characters the search term will have had all its matching characters
       
   571 consumed.
       
   572 @internalComponent
       
   573 */
       
   574 TBool ConsumeFoldedMatch(TUTF32Iterator& aCandidateString, TUTF32Iterator& aSearchTerm,
       
   575                          const TEndTester& aEndTester)
       
   576 	{
       
   577 	TBool assumeBase = ETrue;
       
   578 	TFoldedDecompIterator st(aSearchTerm);
       
   579 	TFoldedDecompIterator cs(aCandidateString);
       
   580 	while (!aEndTester(st))
       
   581 		{
       
   582 		if (cs.AtEnd())
       
   583 			return EFalse;
       
   584 		if (st.Match(cs))
       
   585 			{
       
   586 			assumeBase = EFalse;
       
   587 			if (aEndTester(st))
       
   588 				{
       
   589                 // We have a match...
       
   590                 if (cs.IsInFoldedSequence())
       
   591                     // but it was against only part of a character
       
   592                     // in the original string, so we fail.
       
   593                     return EFalse;
       
   594 				aCandidateString = cs.BaseIterator();
       
   595 				aSearchTerm = st.BaseIterator();
       
   596 				return ETrue;
       
   597 				}
       
   598 			continue;
       
   599 			}
       
   600 		// did not match. We need to re-order canonically.
       
   601 		if (assumeBase)
       
   602 			// The first characters did not match.. do not bother
       
   603 			// to re-order.
       
   604 			return EFalse;
       
   605 		TFoldedSortedDecompIterator csc;
       
   606 		TInt cscLength = csc.Set(cs);
       
   607 		if (cscLength < 2)
       
   608 			// If there are less than two characters to be reordered,
       
   609 			// there is no hope of getting a match by re-ordering
       
   610 			return EFalse;
       
   611 		TFoldedSortedDecompIterator stc;
       
   612 		if (cscLength != stc.Set(st))
       
   613 			// if the strings have differing numbers of characters,
       
   614 			// there can be no match
       
   615 			return EFalse;
       
   616 		while (!stc.AtEnd())
       
   617 			{
       
   618 			ASSERT(!csc.AtEnd());
       
   619 			if (stc.Current() != csc.Current())
       
   620 				// well, we tried.
       
   621 				return EFalse;
       
   622 			stc.Next();
       
   623 			csc.Next();
       
   624 			}
       
   625 		}
       
   626 	// If the candidate string is in a folded sequence, then
       
   627 	// we should not accept the match, as we require all matches
       
   628 	// to be for a whole number of characters in the original string.
       
   629 	if (cs.IsInFoldedSequence())
       
   630 		return EFalse;
       
   631 	aCandidateString = cs.BaseIterator();
       
   632 	aSearchTerm = st.BaseIterator();
       
   633 	return ETrue;
       
   634 	}
       
   635 
       
   636 /** 
       
   637 @internalComponent
       
   638 */
       
   639 TBool ConsumeFoldedMatchWholeGraphemes(TUTF32Iterator& aCandidateString, TUTF32Iterator& aSearchTerm, 
       
   640                                        const TEndTester& aEndTester)
       
   641 	{
       
   642     if (!::ConsumeFoldedMatch(aCandidateString, aSearchTerm, aEndTester))
       
   643 		return EFalse;
       
   644     return aCandidateString.AtEnd() || ::IsBaseCharacter(aCandidateString.Current());
       
   645 	}
       
   646 
       
   647 /** 
       
   648 @internalComponent
       
   649 */
       
   650 static TBool ConsumeGrapheme(TUTF32Iterator& a)
       
   651 	{
       
   652 	if (a.AtEnd())
       
   653 		return EFalse;
       
   654 	a.Next();
       
   655     while (!a.AtEnd() && !::IsBaseCharacter(a.Current()))
       
   656 		a.Next();
       
   657 	return ETrue;
       
   658 	}
       
   659 
       
   660 /** 
       
   661 @internalComponent
       
   662 */
       
   663 TBool MatchSectionFolded(TUTF32Iterator& aCandidateString, TUTF32Iterator& aSearchTerm)
       
   664 	{
       
   665     TEndTester endTester(TEndTester::EWildcard);
       
   666     while(::ConsumeFoldedMatchWholeGraphemes(aCandidateString, aSearchTerm, endTester))
       
   667 		{
       
   668 		if (aSearchTerm.AtEnd() || aSearchTerm.Current() == '*')
       
   669 			return ETrue;
       
   670 		ASSERT(aSearchTerm.Current() == '?');
       
   671 		aSearchTerm.Next();
       
   672         if (!::ConsumeGrapheme(aCandidateString))
       
   673 			return EFalse;
       
   674 		}
       
   675 	return EFalse;
       
   676 	}
       
   677 
       
   678 /** 
       
   679 @internalComponent
       
   680 */
       
   681 TBool FindMatchSectionFolded(TUTF32Iterator& aCandidateString, TUTF32Iterator& aSearchTerm)
       
   682 	{
       
   683 	// match as many graphemes as there are leading ?s
       
   684 	while (!aSearchTerm.AtEnd()
       
   685 		&& aSearchTerm.Current() != '*' && aSearchTerm.Current() == '?')
       
   686 		{
       
   687         if (!::ConsumeGrapheme(aCandidateString))
       
   688 			return EFalse;
       
   689 		aSearchTerm.Next();
       
   690 		}
       
   691 	if (aSearchTerm.AtEnd() || aSearchTerm.Current() == '*')
       
   692 		return ETrue;
       
   693     TChar firstCharOfSearchTerm = ::GetFirstFoldedChar(aSearchTerm.Current());
       
   694 	TUTF32Iterator savedST(aSearchTerm);
       
   695 	while (aCandidateString.LocateFoldedBaseCharacter(firstCharOfSearchTerm))
       
   696 		{
       
   697 		TUTF32Iterator savedCS = aCandidateString;
       
   698 		if (::MatchSectionFolded(aCandidateString, aSearchTerm))
       
   699 			return ETrue;
       
   700 		aSearchTerm = savedST;
       
   701 		aCandidateString = savedCS;
       
   702 		aCandidateString.Next();
       
   703 		}
       
   704 	return EFalse;
       
   705 	}
       
   706 
       
   707 /** 
       
   708 @internalComponent
       
   709 */
       
   710 TBool MatchStringFolded(const TText16* aCandidateStringStart, const TText16* aCandidateStringEnd,
       
   711                         const TText16* aSearchTermStart, const TText16* aSearchTermEnd)
       
   712 	{
       
   713 	TUTF32Iterator candidate(aCandidateStringStart, aCandidateStringEnd);
       
   714 	TUTF32Iterator searchTerm(aSearchTermStart, aSearchTermEnd);
       
   715 	// First section of search term must match exactly at the start of the
       
   716 	// candidate string.
       
   717 	if (!::MatchSectionFolded(candidate, searchTerm))
       
   718 		return EFalse;
       
   719 
       
   720 	// If there was only one section, it must match the whole candidate string.
       
   721 	if (searchTerm.AtEnd())
       
   722 		return candidate.AtEnd();
       
   723 
       
   724 	while (!searchTerm.AtEnd())
       
   725 		{
       
   726 		searchTerm.Next();
       
   727 		if (!::FindMatchSectionFolded(candidate, searchTerm))
       
   728 			return EFalse;
       
   729 		}
       
   730 
       
   731 	// The last section must match exactly at the end of the candidate string.
       
   732 	if (candidate.AtEnd())	// shortcut
       
   733 		return ETrue;
       
   734 	const TText16* searchTermLastSection = aSearchTermEnd;
       
   735 	while (searchTermLastSection != aSearchTermStart
       
   736 		&& searchTermLastSection[-1] != '*')
       
   737 		--searchTermLastSection;
       
   738 	if (searchTermLastSection == aSearchTermEnd)
       
   739 		// last section is null, so trivially matches
       
   740 		return ETrue;
       
   741 	// Count graphemes by counting the number of base characters.
       
   742 	// The first one is assumed to start a grapheme.
       
   743 	TInt graphemeCount = 1;
       
   744 	for (const TText16* s = searchTermLastSection + 1; s != aSearchTermEnd; ++s)
       
   745 		{
       
   746         if (::IsBaseCharacter(*s))
       
   747 			++graphemeCount;
       
   748 		}
       
   749 	// Count this many graphemes back in the candidate string
       
   750 	const TText16* candidateLastSection = aCandidateStringEnd;
       
   751 	while (graphemeCount != 0
       
   752 		&& candidateLastSection != aCandidateStringStart)
       
   753 		{
       
   754         if (::IsBaseCharacter(*--candidateLastSection))
       
   755 			--graphemeCount;
       
   756 		}
       
   757 	TUTF32Iterator last(candidateLastSection, aCandidateStringEnd);
       
   758 	TUTF32Iterator st(searchTermLastSection, aSearchTermEnd);
       
   759 	return ::MatchSectionFolded(last, st);
       
   760 	}
       
   761 
       
   762 /** 
       
   763 Implementation of MatchF
       
   764 (slow if there is a match: MatchStringFolded is better, but does not return
       
   765 the position of the match)
       
   766 @internalComponent
       
   767 */
       
   768 TInt LocateMatchStringFolded(const TText16* aCandidateStringStart, const TText16* aCandidateStringEnd,
       
   769                              const TText16* aSearchTermStart, const TText16* aSearchTermEnd)
       
   770 	{
       
   771 	// Quick shortcut for simple non-match
       
   772 	if (aSearchTermStart != aSearchTermEnd && *aSearchTermStart != '*')
       
   773 		{
       
   774 		if (aCandidateStringStart == aCandidateStringEnd)
       
   775 			return KErrNotFound;
       
   776 		// We won't bother with non-characters and surrogate pairs.
       
   777 		if (*aSearchTermStart != '?'
       
   778 			&& *aSearchTermStart < 0xD800
       
   779 			&& *aCandidateStringStart < 0xD800
       
   780             && ::GetFirstFoldedChar(*aSearchTermStart) != ::GetFirstFoldedChar(*aCandidateStringStart))
       
   781 			return KErrNotFound;
       
   782 		}
       
   783     if (!::MatchStringFolded(aCandidateStringStart, aCandidateStringEnd,
       
   784 		aSearchTermStart, aSearchTermEnd))
       
   785 		return KErrNotFound;
       
   786 	// find where it matches
       
   787 	while (aSearchTermStart != aSearchTermEnd && *aSearchTermStart == '*')
       
   788 		++aSearchTermStart;
       
   789 	const TText16* end = aSearchTermStart;
       
   790 	while (end != aSearchTermEnd && *end != '*')
       
   791 		++end;
       
   792 	// To preserve existing behaviour, a search term consisting of nothing
       
   793 	// but stars is considered to match at 0.
       
   794 	if (end == aSearchTermStart)
       
   795 		return 0;
       
   796 	for (const TText16* csSection = aCandidateStringStart;
       
   797 		csSection <= aCandidateStringEnd; ++csSection)
       
   798 		{
       
   799 		TUTF32Iterator cs(csSection, aCandidateStringEnd);
       
   800 		TUTF32Iterator st(aSearchTermStart, end);
       
   801         if (::MatchSectionFolded(cs, st))
       
   802 			{
       
   803 			// If this match must match exactly at the end, we must keep
       
   804 			// going.
       
   805 			// This could be a lot faster, with some optimizations.
       
   806 			if (end == aSearchTermEnd)
       
   807 				{
       
   808 				if (!cs.AtEnd())
       
   809 					continue;
       
   810 				}
       
   811 			return csSection - aCandidateStringStart;
       
   812 			}
       
   813         // Because this function is using TUTF32Iterator, which means the
       
   814         // original author want to support surrogate. Take it as a defect and
       
   815         // fix it, while do not define a new LocateMatchStringFoldedSurrogate().
       
   816         if (IsSurrogate(*csSection))
       
   817         	++csSection;
       
   818 		}
       
   819 	// this should never happen!
       
   820 	ASSERT(0);
       
   821 	return KErrNotFound;
       
   822 	}
       
   823 
       
   824 /** 
       
   825 Implementation of FindF
       
   826 @internalComponent
       
   827 */
       
   828 TInt FindFolded(TUTF32Iterator& aCandidateString, TUTF32Iterator& aSearchTerm)
       
   829 	{
       
   830     //Empty aSearchTerm string? - Then return 0 - keep the new implementation functionally 
       
   831     //compatible with the old one.
       
   832     if(aSearchTerm.Length() == 0)
       
   833         {
       
   834         return 0;
       
   835         }
       
   836 	const TText16* candidateStartPosition = aCandidateString.CurrentPosition();
       
   837     TChar firstCharOfSearchTerm = ::GetFirstFoldedChar(aSearchTerm.Current());
       
   838 	TUTF32Iterator savedST(aSearchTerm);
       
   839 	while (aCandidateString.LocateFoldedBaseCharacter(firstCharOfSearchTerm))
       
   840 		{
       
   841 		TUTF32Iterator savedCS = aCandidateString;
       
   842         TEndTester endTester(TEndTester::EGenuine);
       
   843         if (::ConsumeFoldedMatchWholeGraphemes(aCandidateString, aSearchTerm, endTester))
       
   844 			return savedCS.CurrentPosition() - candidateStartPosition;
       
   845 		aSearchTerm = savedST;
       
   846 		aCandidateString = savedCS;
       
   847 		aCandidateString.Next();
       
   848 		}
       
   849 	return KErrNotFound;
       
   850 	}
       
   851 
       
   852 /** 
       
   853 Implementation of boolean CompareF
       
   854 @internalComponent
       
   855 */
       
   856 TBool EqualsFolded(TUTF32Iterator& aLeft, TUTF32Iterator& aRight)
       
   857 	{
       
   858     TEndTester endTester(TEndTester::EGenuine);
       
   859     if (::ConsumeFoldedMatchWholeGraphemes(aLeft, aRight, endTester))
       
   860 		return aLeft.AtEnd();
       
   861 	return EFalse;
       
   862 	}
       
   863 
       
   864 /** 
       
   865 Implementation of tri-state CompareF
       
   866 @internalComponent
       
   867 */
       
   868 TInt CompareFolded(const TUTF32Iterator& aLeft, const TUTF32Iterator& aRight)
       
   869 	{
       
   870 	TFoldedCanonicalIterator left(aLeft);
       
   871 	TFoldedCanonicalIterator right(aRight);
       
   872 
       
   873 	const TUnicodeDataSet* charDataSet = GetLocaleCharSet()->iCharDataSet;
       
   874 
       
   875 	while (!left.AtEnd())
       
   876 		{
       
   877 		if (right.AtEnd())
       
   878 			return 1;
       
   879 		TChar cr = right.Current();
       
   880 		TChar cl = left.Current();
       
   881 		if (cr != cl)
       
   882 			return cl - cr;
       
   883         right.Next(charDataSet);
       
   884         left.Next(charDataSet);
       
   885 		}
       
   886 	return right.AtEnd()? 0 : -1;
       
   887 	}
       
   888 
       
   889 ////////////////////////////////////////////////////////////////////////////////////////////
       
   890 // Composition/Decomposition - Global functions and structures
       
   891 ////////////////////////////////////////////////////////////////////////////////////////////
       
   892 
       
   893 /** 
       
   894 @internalComponent
       
   895 */
       
   896 static TUTF32Iterator IndexToNonSingletonDecomposition(TInt aIndex)
       
   897 	{
       
   898 	const TText* start = KNonSingletonDecompositions + aIndex * 2;
       
   899 	if (*start != KLongD)
       
   900 		return TUTF32Iterator(start, start + 2, TUTF32Iterator::EStartsWithValidCharacter);
       
   901 	TInt longDecompIndex = start[1];
       
   902 	start = KLongDecompositions + (longDecompIndex & 0xFFF);
       
   903 	return TUTF32Iterator(start, start + (longDecompIndex >> 12) + 3, TUTF32Iterator::EStartsWithValidCharacter);
       
   904 	}
       
   905 
       
   906 /** 
       
   907 Come up with a decomposition for the current value pointed at by the iterator
       
   908 @internalComponent
       
   909 */
       
   910 static TBool Decompose(const TUTF32Iterator& aUTF32It, TUTF32Iterator& aOutIt)
       
   911 	{
       
   912     TInt index = ::GetDecompositionIndex(aUTF32It.Current());
       
   913 	TInt singletonIndex = index - (sizeof(KNonSingletonDecompositions)/sizeof(KNonSingletonDecompositions[0])/2);
       
   914 	const TInt SizeOfSingletonTable = sizeof(KSingletonDecompositions)/sizeof(KSingletonDecompositions[0]);
       
   915 	if(index < 0 || SizeOfSingletonTable <= singletonIndex)
       
   916         {
       
   917         aOutIt = aUTF32It.CurrentAsIterator();
       
   918 		return EFalse;//There is not any valid decomposition
       
   919         }
       
   920 	if(0 <= singletonIndex)
       
   921         {
       
   922         // KSingletonDecompositions contains some items that come from "ShortDecompsLongFolds".
       
   923         // "ShortDecompsLongFolds" contains some characters that have no composition, but have "long fold".
       
   924         // This basically specific to non-BMP characters with fold also outside BMP.
       
   925         // For example:
       
   926         // 10400;DESERET CAPITAL LETTER LONG I;Lu;0;L;;;;;N;;;;10428;
       
   927         // In Unicode 5.0.0, totally, there are 40 similar characters, which are U+10400-U+10427.
       
   928         if (KSingletonDecompositions[singletonIndex] == 0xFFFF)
       
   929         	return EFalse;
       
   930 		aOutIt = TUTF32Iterator(KSingletonDecompositions + singletonIndex);
       
   931         }
       
   932     else
       
   933         {
       
   934         aOutIt = ::IndexToNonSingletonDecomposition(index);
       
   935         }
       
   936     return ETrue;//Valid decomposition
       
   937 	}
       
   938 
       
   939 /** 
       
   940 @internalComponent
       
   941 */
       
   942 static TUTF32Iterator CharToUTF32Iterator(TChar aChar, TDes16& aBuf)
       
   943 	{
       
   944 	aBuf.Zero();
       
   945 	if (aChar < 0x10000)
       
   946 		aBuf.Append(aChar);
       
   947 	else
       
   948 		{
       
   949 		aBuf.Append((aChar >> 10) + 0xD7C0);
       
   950 		aBuf.Append((aChar & 0x3FF) + 0xDC00);
       
   951 		}
       
   952 	const TText16* t = aBuf.Ptr();
       
   953 	return TUTF32Iterator(t, t + aBuf.Length());
       
   954 	}
       
   955 
       
   956 /** 
       
   957 @internalComponent
       
   958 */
       
   959 TBool DecomposeChar(TChar aCh, TPtrC16& aResult)
       
   960     {
       
   961     aResult.Set(NULL, 0);
       
   962     TBuf16<2> srcBuf;
       
   963     TUTF32Iterator it = ::CharToUTF32Iterator(aCh, srcBuf);
       
   964     TBool res = ::Decompose(it, it);
       
   965     if(res)
       
   966         {
       
   967         aResult.Set(it.CurrentPosition(), it.Length());
       
   968         }
       
   969     return res;
       
   970     }
       
   971 
       
   972 /** 
       
   973 Turn an index into the hash table known to point to a non-singleton
       
   974 decomposition into that decomposition.
       
   975 @internalComponent
       
   976 */
       
   977 static TUTF32Iterator HashIndexToDecomposition(TInt aIndex)
       
   978 	{
       
   979 	TUint32 v = KUnicodeToIndexHash[aIndex];
       
   980 	ASSERT(v != 0);
       
   981 	TInt index = static_cast<TInt>(v >> 20);
       
   982 	ASSERT(index < ARRAY_LENGTH(KNonSingletonDecompositions)/2);
       
   983     return ::IndexToNonSingletonDecomposition(index);
       
   984 	}
       
   985 
       
   986 /**
       
   987 Takes a start and (one past the) end index into KCompostionMapping
       
   988 and a number of UTF16 characters (aLengthSoFar). All of the compositions
       
   989 within the range must have their first aLengthSoFar UTF16 characters
       
   990 matching.
       
   991 
       
   992 On entry, if aStart == aEnd then there is no possibility of a match so return
       
   993 immediately with EFalse. To continue, aStart must be strictly less than aEnd.
       
   994 
       
   995 Afterwards, aStart and aEnd will be narrowed to all those compositions
       
   996 where the aLengthSoFar'th UTF16 character matches aNextCharacter.
       
   997 No further compositions existing is indicated by aStart == aEnd.
       
   998 
       
   999 @return ETrue if the composition at aStart is exactly of length aLengthSoFar + 1.
       
  1000 @internalComponent
       
  1001 */
       
  1002 static TBool RefineComposition(TInt& aStart, TInt& aEnd, TInt aLengthSoFar, TInt aNextCharacter)
       
  1003 	{
       
  1004 	if (aStart == aEnd)
       
  1005 		return EFalse;
       
  1006 	ASSERT((TUint)aStart < (TUint)aEnd);
       
  1007  	ASSERT((TUint)aEnd <= (TUint)ARRAY_LENGTH(KCompositionMapping));
       
  1008     TUTF32Iterator startIterator(::HashIndexToDecomposition(KCompositionMapping[aStart]));
       
  1009 	if (startIterator.Length() == aLengthSoFar)
       
  1010 		++aStart;
       
  1011 
       
  1012 	// Find a single example of a decomposition that is suitable
       
  1013 	TInt mid;
       
  1014 	TUTF32Iterator midIt;
       
  1015 	for (;;)
       
  1016 		{
       
  1017 		if (aStart == aEnd)
       
  1018 			return EFalse;
       
  1019 		mid = aStart + ((aEnd - aStart) >> 1);
       
  1020         midIt = ::HashIndexToDecomposition(KCompositionMapping[mid]);
       
  1021 		ASSERT(aLengthSoFar < midIt.Length());
       
  1022 		TInt midItChar = midIt[aLengthSoFar];
       
  1023 		if (midItChar < aNextCharacter)
       
  1024 			aStart = mid + 1;
       
  1025 		else if (aNextCharacter < midItChar)
       
  1026 			aEnd = mid;
       
  1027 		else
       
  1028 			{
       
  1029 			startIterator = midIt;
       
  1030 			break;
       
  1031 			}
       
  1032 		}
       
  1033 
       
  1034 	// FInd the first decomposition that does not match
       
  1035 	TInt start2 = mid + 1;
       
  1036 	while (start2 != aEnd)
       
  1037 		{
       
  1038 		ASSERT(start2 < aEnd);
       
  1039 		TInt mid2 = start2 + ((aEnd - start2) >> 1);
       
  1040         midIt = ::HashIndexToDecomposition(KCompositionMapping[mid2]);
       
  1041 		ASSERT(aLengthSoFar < midIt.Length());
       
  1042 		TInt midItChar = midIt[aLengthSoFar];
       
  1043 		ASSERT(aNextCharacter <= midItChar);
       
  1044 		if (aNextCharacter < midItChar)
       
  1045 			aEnd = mid2;
       
  1046 		else
       
  1047 			start2 = mid2 + 1;
       
  1048 		}
       
  1049 
       
  1050 	// Find the first decomposition that matches
       
  1051 	while (aStart != mid)
       
  1052 		{
       
  1053 		ASSERT(aStart < mid);
       
  1054 		TInt mid2 = aStart + ((mid - aStart) >> 1);
       
  1055         midIt = ::HashIndexToDecomposition(KCompositionMapping[mid2]);
       
  1056 		ASSERT(aLengthSoFar < midIt.Length());
       
  1057 		TInt midItChar = midIt[aLengthSoFar];
       
  1058 		ASSERT(midItChar <= aNextCharacter);
       
  1059 		if (midItChar < aNextCharacter)
       
  1060 			aStart = mid2 + 1;
       
  1061 		else
       
  1062 			{
       
  1063 			startIterator = midIt;
       
  1064 			mid = mid2;
       
  1065 			}
       
  1066 		}
       
  1067 
       
  1068 	return startIterator.Length() == (aLengthSoFar + 1);
       
  1069 	}
       
  1070 
       
  1071 /** 
       
  1072 @internalComponent
       
  1073 */
       
  1074 static TBool RefineCompositionUTF32(TInt& aStart, TInt& aEnd, TInt& aLengthSoFar, TChar aChar)
       
  1075 	{
       
  1076 	if (aChar < 0x10000)
       
  1077         return ::RefineComposition(aStart, aEnd, aLengthSoFar++, aChar);
       
  1078     ::RefineComposition(aStart, aEnd, aLengthSoFar++, (aChar >> 10) + 0xD7C0);
       
  1079 	if (aStart == aEnd)
       
  1080 		return EFalse;
       
  1081     return ::RefineComposition(aStart, aEnd, aLengthSoFar++, (aChar & 0x3FF) + 0xDC00);
       
  1082 	}
       
  1083 
       
  1084 /**
       
  1085 Combine as many of the characters presented as possible into a single
       
  1086 character.
       
  1087 @return The number of characters successfully combined.
       
  1088 @param aCombined If a nonzero value is returned, this contains
       
  1089             the character that is that number of characters from the start of
       
  1090             aDes combined.
       
  1091 @internalComponent
       
  1092 */
       
  1093 TInt CombineAsMuchAsPossible(const TDesC16& aDes, TChar& aCombined)
       
  1094 	{
       
  1095 	TInt start = 0;
       
  1096 	TInt end = sizeof(KCompositionMapping)/sizeof(KCompositionMapping[0]);
       
  1097 	TInt length = 0;
       
  1098 	TInt bestIndex = 0;
       
  1099 	TInt bestLength = 0;
       
  1100 	const TText16* ptr = aDes.Ptr();
       
  1101 	TUTF32Iterator input(ptr, ptr + aDes.Length());
       
  1102 	while (!input.AtEnd())
       
  1103 		{
       
  1104         if (::RefineCompositionUTF32(start, end, length, input.Current()))
       
  1105 			{
       
  1106 			bestIndex = start;
       
  1107 			bestLength = length;
       
  1108 			}
       
  1109 		input.Next();
       
  1110 		}
       
  1111 	if (bestLength == 0)
       
  1112 		return 0;
       
  1113 	aCombined = KUnicodeToIndexHash[KCompositionMapping[bestIndex]] & 0xFFFFF;
       
  1114 	return bestLength;
       
  1115 	}
       
  1116 
       
  1117 //////////////////////////////////////////////////////////////////////////////////////////////
       
  1118 // COLLATION
       
  1119 //////////////////////////////////////////////////////////////////////////////////////////////
       
  1120 
       
  1121 //////////////////////////////////////////////////////////////////////////////////////////////
       
  1122 // TDecompositionIterator class
       
  1123 
       
  1124 /** 
       
  1125 @internalComponent
       
  1126 */
       
  1127 void TDecompositionIterator::Set(const TUTF32Iterator& a)
       
  1128 	{
       
  1129 	iBase = a;
       
  1130 	if (!iBase.AtEnd())
       
  1131         {
       
  1132         (void)::Decompose(iBase, iDecomposition);
       
  1133         }
       
  1134 	}
       
  1135 
       
  1136 /** 
       
  1137 @internalComponent
       
  1138 */
       
  1139 TDecompositionIterator::TDecompositionIterator(const TUTF32Iterator& a)
       
  1140 	{
       
  1141 	Set(a);
       
  1142 	}
       
  1143 
       
  1144 /** 
       
  1145 @internalComponent
       
  1146 */
       
  1147 TBool TDecompositionIterator::AtEnd() const
       
  1148 	{
       
  1149 	return iBase.AtEnd();
       
  1150 	}
       
  1151 
       
  1152 /** 
       
  1153 @internalComponent
       
  1154 */
       
  1155 TChar TDecompositionIterator::Current() const
       
  1156 	{
       
  1157 	return iDecomposition.Current();
       
  1158 	}
       
  1159 
       
  1160 /** 
       
  1161 @internalComponent
       
  1162 */
       
  1163 void TDecompositionIterator::Next()
       
  1164 	{
       
  1165 	ASSERT(!iBase.AtEnd() && !iDecomposition.AtEnd());
       
  1166 	iDecomposition.Next();
       
  1167 	if (!iDecomposition.AtEnd())
       
  1168 		return;
       
  1169 	iBase.Next();
       
  1170 	if (!iBase.AtEnd())
       
  1171         {
       
  1172         (void)::Decompose(iBase, iDecomposition);
       
  1173         }
       
  1174 	}
       
  1175 
       
  1176 /** 
       
  1177 @internalComponent
       
  1178 */
       
  1179 const TText16* TDecompositionIterator::CurrentPosition() const
       
  1180 	{
       
  1181 	return iBase.CurrentPosition();
       
  1182 	}
       
  1183 
       
  1184 /** 
       
  1185 Find out the length and minimum combining class of
       
  1186 the current run of characters of nonzero combining class.
       
  1187 aMinClass and aMinClassPos are not written to if the return
       
  1188 value is 0.
       
  1189 aEndOfRun is written to with the final position of the iteration
       
  1190 if 0 is returned and aEndOfRun is non-null
       
  1191 @internalComponent
       
  1192 */
       
  1193 static TInt ReorderingRun(TInt& aMinClass, TDecompositionIterator& aMinClassPos,
       
  1194                           const TDecompositionIterator& aStart, TBool* aOpenSequence, 
       
  1195                           TInt aMaxDisallowedClass = 0, TDecompositionIterator* aEndOfRun = 0)
       
  1196 	{
       
  1197 	TInt comclass = aStart.AtEnd()? 0 : aStart.Current().GetCombiningClass();
       
  1198 	if (comclass == 0)
       
  1199 		{
       
  1200 		if (aEndOfRun)
       
  1201 			*aEndOfRun = aStart;
       
  1202 		if (aOpenSequence)
       
  1203 			*aOpenSequence = aStart.AtEnd();
       
  1204 		return 0;
       
  1205 		}
       
  1206 	aMinClass = 256;
       
  1207 	TDecompositionIterator i = aStart;
       
  1208 	TInt count = 0;
       
  1209 	while (comclass != 0)
       
  1210 		{
       
  1211 		if (aMaxDisallowedClass < comclass)
       
  1212 			{
       
  1213 			if (comclass < aMinClass)
       
  1214 				{
       
  1215 				aMinClass = comclass;
       
  1216 				aMinClassPos = i;
       
  1217 				}
       
  1218 			++count;
       
  1219 			}
       
  1220 		i.Next();
       
  1221 		comclass = i.AtEnd()? 0 : i.Current().GetCombiningClass();
       
  1222 		}
       
  1223 	if (count == 0 && aEndOfRun)
       
  1224 		*aEndOfRun = i;
       
  1225 	if (aOpenSequence)
       
  1226 		*aOpenSequence = i.AtEnd();
       
  1227 	return count;
       
  1228 	}
       
  1229 
       
  1230 //////////////////////////////////////////////////////////////////////////////////////////////
       
  1231 // TCanonicalDecompositionIterator class
       
  1232 
       
  1233 /** 
       
  1234 @internalComponent
       
  1235 */
       
  1236 void TCanonicalDecompositionIterator::Set(const TUTF32Iterator& a)
       
  1237 	{
       
  1238 	iBase.Set(a);
       
  1239 	iLastPosition = 0;
       
  1240 	if (ReorderingRun(iCurrentCombiningClass, iCurrent, iBase, &iInOpenSequence) < 2)
       
  1241 		iCurrentCombiningClass = 0;
       
  1242 	}
       
  1243 
       
  1244 /** 
       
  1245 @internalComponent
       
  1246 */
       
  1247 TBool TCanonicalDecompositionIterator::AtEnd() const
       
  1248 	{
       
  1249 	return iBase.AtEnd();
       
  1250 	}
       
  1251 
       
  1252 /** 
       
  1253 @internalComponent
       
  1254 */
       
  1255 TChar TCanonicalDecompositionIterator::Current() const
       
  1256 	{
       
  1257 	return iCurrentCombiningClass? iCurrent.Current() : iBase.Current();
       
  1258 	}
       
  1259 
       
  1260 /** 
       
  1261 @internalComponent
       
  1262 */
       
  1263 void TCanonicalDecompositionIterator::Next()
       
  1264 	{
       
  1265 	iLastPosition = iBase.CurrentPosition();
       
  1266 	if (iCurrentCombiningClass == 0)
       
  1267 		{
       
  1268 		iBase.Next();
       
  1269 		if (ReorderingRun(iCurrentCombiningClass, iCurrent, iBase, &iInOpenSequence) < 2)
       
  1270 			iCurrentCombiningClass = 0;
       
  1271 		return;
       
  1272 		}
       
  1273 	// Find the next character in the run with the same combining class
       
  1274 	iCurrent.Next();
       
  1275 	TInt curclass = iCurrent.AtEnd()? 0 : iCurrent.Current().GetCombiningClass();
       
  1276 	while (curclass != 0)
       
  1277 		{
       
  1278 		if (curclass == iCurrentCombiningClass)
       
  1279 			// success
       
  1280 			return;
       
  1281 		iCurrent.Next();
       
  1282 		curclass = iCurrent.AtEnd()? 0 : iCurrent.Current().GetCombiningClass();
       
  1283 		}
       
  1284 	// There are none left in the current class. Find out what the next one is.
       
  1285 	if (0 == ReorderingRun(iCurrentCombiningClass, iCurrent, iBase, 0, iCurrentCombiningClass, &iBase))
       
  1286 		iCurrentCombiningClass = 0;
       
  1287 	}
       
  1288 
       
  1289 /** 
       
  1290 @internalComponent
       
  1291 */
       
  1292 const TText16* TCanonicalDecompositionIterator::CurrentPositionIfAtCharacter() const
       
  1293 	{
       
  1294 	if (iCurrentCombiningClass != 0)
       
  1295 		return 0;
       
  1296 	const TText16* p = iBase.CurrentPosition();
       
  1297 	return iLastPosition == p? 0 : p;
       
  1298 	}
       
  1299 
       
  1300 /** 
       
  1301 @internalComponent
       
  1302 */
       
  1303 TBool TCanonicalDecompositionIterator::IsInOpenSequence() const
       
  1304 	{
       
  1305 	return iInOpenSequence;
       
  1306 	}
       
  1307 
       
  1308 //////////////////////////////////////////////////////////////////////////////////////////////
       
  1309 // TCanonicalDecompositionIteratorCached class
       
  1310 
       
  1311 /** 
       
  1312 @internalComponent
       
  1313 */
       
  1314 void TCanonicalDecompositionIteratorCached::Set(const TUTF32Iterator& a)
       
  1315 	{
       
  1316 	iBase.Set(a);
       
  1317 	iCacheStart = 0;
       
  1318 	iCacheSize = 0;
       
  1319 	}
       
  1320 
       
  1321 /** 
       
  1322 @internalComponent
       
  1323 */
       
  1324 TBool TCanonicalDecompositionIteratorCached::AtEnd() const
       
  1325 	{
       
  1326 	return iCacheSize == 0 && iBase.AtEnd();
       
  1327 	}
       
  1328 
       
  1329 /** 
       
  1330 @internalComponent
       
  1331 */
       
  1332 void TCanonicalDecompositionIteratorCached::Next(TInt aOffset)
       
  1333 	{
       
  1334 	ASSERT(0 <= aOffset);
       
  1335 	ASSERT(0 <= iCacheSize);
       
  1336 	ASSERT(0 != iCacheSize || !iBase.AtEnd());
       
  1337 	if (aOffset <= iCacheSize)
       
  1338 		{
       
  1339 		iCacheSize -= aOffset;
       
  1340 		iCacheStart = (iCacheStart + aOffset) & (KMaxLookAhead - 1);
       
  1341 		return;
       
  1342 		}
       
  1343 	aOffset -= iCacheSize;
       
  1344 	iCacheSize = 0;
       
  1345 	while (aOffset != 0)
       
  1346 		{
       
  1347 		iBase.Next();
       
  1348 		--aOffset;
       
  1349 		}
       
  1350 	}
       
  1351 
       
  1352 /** 
       
  1353 Get the character at the position of the iterator plus aOffset steps.
       
  1354 Returns -1 if we are looking too far ahead.
       
  1355 @internalComponent
       
  1356 */
       
  1357 TChar TCanonicalDecompositionIteratorCached::Get(TInt aOffset)
       
  1358 	{
       
  1359 	// should be assert debug: there is a chance this could go off with
       
  1360 	// bad collation tables
       
  1361 	ASSERT(aOffset <= KMaxLookAhead);
       
  1362 	while (iCacheSize <= aOffset)
       
  1363 		{
       
  1364 		if (iBase.AtEnd())
       
  1365 			return TChar(static_cast <TUint> (-1));
       
  1366 		TInt cachePos = (iCacheStart + iCacheSize) & (KMaxLookAhead - 1);
       
  1367 		iCache[cachePos].iChar = iBase.Current();
       
  1368 		iCache[cachePos].iPos = iBase.CurrentPositionIfAtCharacter();
       
  1369 		++iCacheSize;
       
  1370 		iBase.Next();
       
  1371 		}
       
  1372 	return iCacheSize == aOffset? iBase.Current() : iCache[(iCacheStart + aOffset) & (KMaxLookAhead - 1)].iChar;
       
  1373 	}
       
  1374 
       
  1375 /** 
       
  1376 If the current position in the original string is representable
       
  1377 as a pointer into it and we know what it is, return it.
       
  1378 @internalComponent
       
  1379 */
       
  1380 const TText16* TCanonicalDecompositionIteratorCached::CurrentPositionIfAtCharacter() const
       
  1381 	{
       
  1382 	if(iCacheSize == 0)
       
  1383 		{
       
  1384 		return iBase.CurrentPositionIfAtCharacter();
       
  1385 		}
       
  1386 	return iCache[iCacheStart & (KMaxLookAhead - 1)].iPos;
       
  1387 	}
       
  1388