symport/e32/euser/unicode/compare.cpp
changeset 1 0a7b44b10206
child 2 806186ab5e14
equal deleted inserted replaced
0:c55016431358 1:0a7b44b10206
       
     1 // Copyright (c) 2001-2009 Nokia Corporation and/or its subsidiary(-ies).
       
     2 // All rights reserved.
       
     3 // This component and the accompanying materials are made available
       
     4 // under the terms of the License "Symbian Foundation License v1.0"
       
     5 // which accompanies this distribution, and is available
       
     6 // at the URL "http://www.symbianfoundation.org/legal/sfl-v10.html".
       
     7 //
       
     8 // Initial Contributors:
       
     9 // Nokia Corporation - initial contribution.
       
    10 //
       
    11 // Contributors:
       
    12 //
       
    13 // Description:
       
    14 // Folding and decomposition implementation
       
    15 // 
       
    16 //
       
    17 
       
    18 #include "folddecomp.inl"
       
    19 #include "compareimp.h"
       
    20 #include "u32std.h"
       
    21 
       
    22 #define ARRAY_LENGTH(a) (static_cast<TInt>(sizeof(a)/sizeof(a[0])))
       
    23 
       
    24 ////////////////////////////////////////////////////////////////////////////////////////////
       
    25 // Global functions
       
    26 ////////////////////////////////////////////////////////////////////////////////////////////
       
    27 
       
    28 /**
       
    29 @internalComponent
       
    30 */
       
    31 TChar UTF16ToChar(const TText16* a)
       
    32 	{
       
    33 	if (0xD800 <= a[0])
       
    34 		{
       
    35 		if (a[0] < 0xE000)
       
    36 			{
       
    37             if (a[0] < 0xDC00 && ::IsLowSurrogate(a[1]))
       
    38 				{
       
    39                 TChar c = ::PairSurrogates(a[0], a[1]);
       
    40 				if ((c & 0xFFFE) != 0xFFFE)
       
    41 					return c;
       
    42 				}
       
    43 			return 0xFFFF;
       
    44 			}
       
    45 		if (a[0] == 0xFFFE)
       
    46 			return 0xFFFF;
       
    47 		}
       
    48 	return a[0];
       
    49 	}
       
    50 
       
    51 /**
       
    52 Is a character a base character (ETrue) or a combiner (EFalse)?
       
    53 For now, we will treat all control characters as base characters.
       
    54 @internalComponent
       
    55 */
       
    56 TBool IsBaseCharacter(TChar a)
       
    57 	{
       
    58 	if(a < 0x220)
       
    59         {
       
    60 		// These Unicode characters are all assigned
       
    61 		// and none of them is a combining character
       
    62 		return ETrue;
       
    63         }
       
    64 	return (a.GetCategory() & 0xFFF0) - TChar::EMarkGroup;
       
    65 	}
       
    66 
       
    67 /**
       
    68 @internalComponent
       
    69 */
       
    70 inline TInt GetDecompositionIndex(TChar a)
       
    71 	{
       
    72     TInt i = DecompositionHashStart(a);
       
    73 	TUint32 v = KUnicodeToIndexHash[i];
       
    74 	if (!v)
       
    75 		return -1;
       
    76 	if ((v & 0xFFFFF) != a)
       
    77 		{
       
    78         TInt step = DecompositionHashStep(a);
       
    79 		do	{
       
    80 			i = (i + step) & KDecompositionHashBitmask;
       
    81 			v = KUnicodeToIndexHash[i];
       
    82 			if (!v)
       
    83 				return -1;
       
    84 			} while ((v & 0xFFFFF) != a);
       
    85 		}
       
    86 // it is important that this is an unsigned shift
       
    87 	return static_cast<TInt>(v >> 20);
       
    88 	}
       
    89 
       
    90 /**
       
    91 Will not work if an invalid index is passed.
       
    92 @internalComponent
       
    93 */
       
    94 static TUTF32Iterator GetFoldedDecomposition(TInt aIndex)
       
    95 	{
       
    96 	TInt singletonIndex = aIndex - (sizeof(KNonSingletonFolds)/sizeof(KNonSingletonFolds[0])/2);
       
    97 	if (0 <= singletonIndex)
       
    98 		return TUTF32Iterator(KSingletonFolds + singletonIndex);
       
    99 	const TText* start = KNonSingletonFolds + aIndex * 2;
       
   100 	if (*start != KLongD)
       
   101 		return TUTF32Iterator(start, start + 2,
       
   102 			TUTF32Iterator::EStartsWithValidCharacter);
       
   103 	TInt longDecompIndex = start[1];
       
   104 	start = KLongDecompositions + (longDecompIndex & 0xFFF);
       
   105 	return TUTF32Iterator(start, start + (longDecompIndex >> 12) + 3,
       
   106 		TUTF32Iterator::EStartsWithValidCharacter);
       
   107 	}
       
   108 
       
   109 /**
       
   110 @internalComponent
       
   111 */
       
   112 static TChar GetFirstFoldedChar(TChar a)
       
   113 	{
       
   114     TInt index = ::GetDecompositionIndex(a);
       
   115     return index < 0? a : ::GetFoldedDecomposition(index).Current();
       
   116 	}
       
   117 
       
   118 /**
       
   119 @internalComponent
       
   120 */
       
   121 static TBool FirstFoldedCodeIsNot(TInt aNonSurrogate, TInt aFoldedNonSurrogate)
       
   122 	{
       
   123     TInt index = ::GetDecompositionIndex(aNonSurrogate);
       
   124 	if (index < 0)
       
   125 		return aNonSurrogate - aFoldedNonSurrogate;
       
   126 	TInt singletonIndex = index - (sizeof(KNonSingletonFolds)/sizeof(KNonSingletonFolds[0])/2);
       
   127 	if (0 < singletonIndex)
       
   128 		return KSingletonFolds[singletonIndex] - aFoldedNonSurrogate;
       
   129 	const TText* start = KNonSingletonFolds + index * 2;
       
   130 	if (start[0] == KLongD)
       
   131 		start = KLongDecompositions + (start[1] & 0xFFF);
       
   132 	return *start - aFoldedNonSurrogate;
       
   133 	}
       
   134 
       
   135 /**
       
   136 @internalComponent
       
   137 */
       
   138 static TInt GetClass(TFoldedDecompIterator& a)
       
   139 	{
       
   140 	ASSERT(!a.AtEnd());
       
   141 	a.EnterFoldedSequence();
       
   142 	TChar ch = a.Current();
       
   143 	TInt cl = ch.GetCombiningClass();
       
   144 	if (cl == 0)
       
   145 		// Assume starters have been folded from ypogegrammeni
       
   146 		cl = 240;
       
   147 	return cl;
       
   148 	}
       
   149 
       
   150 ////////////////////////////////////////////////////////////////////////////////////////////
       
   151 // TUTF32Iterator
       
   152 ////////////////////////////////////////////////////////////////////////////////////////////
       
   153 
       
   154 /**
       
   155 @internalComponent
       
   156 */
       
   157 void TUTF32Iterator::Next()
       
   158 	{
       
   159 	ASSERT(iStart != iEnd);
       
   160 	while (++iStart != iEnd)
       
   161 		{
       
   162         iCurrent = ::UTF16ToChar(iStart);
       
   163 		if (iCurrent != 0xFFFF)
       
   164 			return;
       
   165 		}
       
   166 	}
       
   167 
       
   168 /**
       
   169 Locates a base character in a string using a folded comparision. Will not find combining 
       
   170 characters, nor will it consider Korean combining Jamo to be equivalent to Hangul.
       
   171 @internalComponent
       
   172 */
       
   173 TBool TUTF32Iterator::LocateFoldedBaseCharacter(TChar aChar)
       
   174 	{
       
   175 	// A quick shortcut for simple rejections
       
   176 	if (aChar < 0xFFFF)
       
   177 		{
       
   178         while (iStart != iEnd && *iStart < 0xD800 && ::FirstFoldedCodeIsNot(*iStart, aChar))
       
   179 			++iStart;
       
   180 		if (iStart != iEnd)
       
   181 			{
       
   182             iCurrent = ::UTF16ToChar(iStart);
       
   183 			if (iCurrent == 0xFFFF)
       
   184 				Next();
       
   185 			}
       
   186 		}
       
   187 	while (!AtEnd())
       
   188 		{
       
   189         TChar foldedChar = ::GetFirstFoldedChar(iCurrent);
       
   190 		if (aChar == foldedChar)
       
   191 			return ETrue;
       
   192 		Next();
       
   193 		}
       
   194 	return EFalse;
       
   195 	}
       
   196 
       
   197 ////////////////////////////////////////////////////////////////////////////////////////////
       
   198 // TFoldedDecompIterator
       
   199 ////////////////////////////////////////////////////////////////////////////////////////////
       
   200 
       
   201 /**
       
   202 @internalComponent
       
   203 */
       
   204 TFoldedDecompIterator::TFoldedDecompIterator(const TUTF32Iterator& a)
       
   205 	{
       
   206 	Set(a);
       
   207 	}
       
   208 
       
   209 /**
       
   210 @internalComponent
       
   211 */
       
   212 TBool TFoldedDecompIterator::AtEnd() const
       
   213 	{
       
   214 	return iOriginal.AtEnd();
       
   215 	}
       
   216 
       
   217 /**
       
   218 @internalComponent
       
   219 */
       
   220 TBool TFoldedDecompIterator::AtEndOrWildcard() const
       
   221 	{
       
   222 	// neither '?' nor '*' have decomposition sequences, so we can assume that
       
   223 	// if we are pointing at one or the other, we don't need to check if we
       
   224 	// are in a decomposition sequence or not.
       
   225 	return iOriginal.AtEnd() || iOriginal.Current() == '?' || iOriginal.Current() == '*';
       
   226 	}
       
   227 
       
   228 /**
       
   229 @internalComponent
       
   230 */
       
   231 TBool TFoldedDecompIterator::EnterFoldedSequence()
       
   232 	{
       
   233 	ASSERT(!AtEnd());
       
   234 	return !IsInFoldedSequence() && StrictEnterFoldedSequence();
       
   235 	}
       
   236 
       
   237 /**
       
   238 Enter folded sequence, assuming that we are not already in one
       
   239 @internalComponent
       
   240 */
       
   241 TBool TFoldedDecompIterator::StrictEnterFoldedSequence()
       
   242 	{
       
   243 	ASSERT(!AtEnd());
       
   244 	ASSERT(!IsInFoldedSequence());
       
   245     TInt index = ::GetDecompositionIndex(iOriginal.Current());
       
   246 	if (index < 0)
       
   247 		return EFalse;
       
   248 	iFolded = ::GetFoldedDecomposition(index);
       
   249 	return ETrue;
       
   250 	}
       
   251 
       
   252 /**
       
   253 An iota might have folded from a combining ypogegrammeni.
       
   254 If the current character is a base character, this function will
       
   255 detect whether it was folded from a combining character (and
       
   256 therefore does not constitute a grapheme boundary).
       
   257 @internalComponent
       
   258 */
       
   259 TBool TFoldedDecompIterator::CurrentIsBaseFoldedFromCombiner() const
       
   260 	{
       
   261 	if (!IsInFoldedSequence())
       
   262 		return EFalse;
       
   263 	// The only character that does this is Ypogerammeni, which folds to iota
       
   264 	if (iFolded.Current() != 0x3B9)
       
   265 		return EFalse;
       
   266 	// If the unfolded character is a combiner then it cannot contain an iota,
       
   267 	// so it must be an ypogegrammeni that has been folded to one.
       
   268 	// This will catch 0x345, the ypogegrammeni itself.
       
   269     if (!::IsBaseCharacter(iOriginal.Current()))
       
   270 		return ETrue;
       
   271 	// Otherwise the base character will be at the start of the decomposition
       
   272 	// sequence. We will assume that it is folded from a genuine iota if and
       
   273 	// only if there is an iota at the start of the folded decomposition
       
   274 	// sequence. (In theory there might be an iota with a combining
       
   275 	// ypogegrammeni, but in practice this will not occur).
       
   276 	TInt index = ::GetDecompositionIndex(iOriginal.Current());
       
   277 	ASSERT(0 <= index);
       
   278 	TUTF32Iterator folded = ::GetFoldedDecomposition(index);
       
   279 	return folded.Current() != 0x3B9;
       
   280 	}
       
   281 
       
   282 /**
       
   283 @internalComponent
       
   284 */
       
   285 TChar TFoldedDecompIterator::Current() const
       
   286 	{
       
   287 	ASSERT(!AtEnd());
       
   288 	return IsInFoldedSequence()? iFolded.Current() : iOriginal.Current();
       
   289 	}
       
   290 
       
   291 /** 
       
   292 Move past this code if it matches unfolded or folded 
       
   293 @internalComponent
       
   294 */
       
   295 TBool TFoldedDecompIterator::Match(TChar aCode)
       
   296 	{
       
   297 	ASSERT(!AtEnd());
       
   298 	if (!IsInFoldedSequence())
       
   299 		{
       
   300 		if (aCode == iOriginal.Current())
       
   301 			{
       
   302 			iOriginal.Next();
       
   303 			return ETrue;
       
   304 			}
       
   305 		if (!StrictEnterFoldedSequence())
       
   306 			return EFalse;
       
   307 		}
       
   308 	ASSERT(IsInFoldedSequence());
       
   309 	if (aCode == iFolded.Current())
       
   310 		{
       
   311 		iFolded.Next();
       
   312 		if (iFolded.AtEnd())
       
   313 			iOriginal.Next();
       
   314 		return ETrue;
       
   315 		}
       
   316 	return EFalse;
       
   317 	}
       
   318 
       
   319 /** 
       
   320 Move this and argument past matching character. 
       
   321 @internalComponent
       
   322 */
       
   323 TBool TFoldedDecompIterator::Match(TFoldedDecompIterator& aThat)
       
   324 	{
       
   325 	ASSERT(!AtEnd());
       
   326 	if (!IsInFoldedSequence())
       
   327 		{
       
   328 		if ( aThat.Match(iOriginal.Current()) )
       
   329 			{
       
   330 			iOriginal.Next();
       
   331 			return ETrue;
       
   332 			}
       
   333 		if (!StrictEnterFoldedSequence())
       
   334 			return EFalse;
       
   335 		}
       
   336 	ASSERT(IsInFoldedSequence());
       
   337 	if ( aThat.Match(iFolded.Current()) )
       
   338 		{
       
   339 		iFolded.Next();
       
   340 		if (iFolded.AtEnd())
       
   341 			iOriginal.Next();
       
   342 		return ETrue;
       
   343 		}
       
   344 	return EFalse;
       
   345 	}
       
   346 
       
   347 /** 
       
   348 @internalComponent
       
   349 */
       
   350 void TFoldedDecompIterator::Next()
       
   351 	{
       
   352 	ASSERT(!AtEnd());
       
   353 	if (IsInFoldedSequence())
       
   354 		{
       
   355 		iFolded.Next();
       
   356 		if (IsInFoldedSequence())
       
   357 			return;
       
   358 		}
       
   359 	iOriginal.Next();
       
   360 	}
       
   361 
       
   362 ////////////////////////////////////////////////////////////////////////////////////////////
       
   363 // TFoldedSortedDecompIterator
       
   364 ////////////////////////////////////////////////////////////////////////////////////////////
       
   365 
       
   366 /**
       
   367 Set this iterator to iterate over the next combining characters.
       
   368 Iotas in folded sequences are assumed to be character class 240
       
   369 (combining ypogegrammeni). Next() must be used once before
       
   370 the first call to Current(), as long as AtEnd() returns false.
       
   371 @param aBase Sets the start of the iteration. This value is advanced to the
       
   372              end of the iteration.
       
   373 @return The number of codes in the iteration.
       
   374 @internalComponent
       
   375 */
       
   376 TInt TFoldedSortedDecompIterator::Set(TFoldedDecompIterator &aBase)
       
   377 	{
       
   378 	iStart = aBase;
       
   379 	TInt length = 0;
       
   380 	iCurrentClass = 256;
       
   381 
       
   382 	const TUnicodeDataSet* charDataSet = GetLocaleCharSet()->iCharDataSet;
       
   383 
       
   384 	// Find the next starter (i.e. character with combining class 0).
       
   385 	// We must not count iota folded from ypogegrammeni.
       
   386 	// Ypogegrammeni has combining class 240.
       
   387 	// Iota has combining class 0.
       
   388 	// Also we will be searching for the character with the smallest
       
   389 	// combining class to start at.
       
   390 	while (!aBase.AtEnd())
       
   391 		{
       
   392 		aBase.EnterFoldedSequence();
       
   393 		TChar ch = aBase.Current();
       
   394 		TInt cl = TUnicode(TUint(ch)).GetCombiningClass(charDataSet);
       
   395 		if (cl == 0)
       
   396 			{
       
   397 			if (aBase.CurrentIsBaseFoldedFromCombiner())
       
   398 				cl = 240;
       
   399 			else
       
   400 				break;
       
   401 			}
       
   402 		if (cl < iCurrentClass)
       
   403 			{
       
   404 			iCurrentClass = cl;
       
   405 			iCurrent = aBase;
       
   406 			iCurrentCount = length;
       
   407 			}
       
   408 		++length;
       
   409 		aBase.Next();
       
   410 		}
       
   411 	iRemaining = length;
       
   412 	iLength = length;
       
   413 	ASSERT(iLength == 0 || iCurrentClass < 256);
       
   414 	return length;
       
   415 	}
       
   416 
       
   417 /** 
       
   418 Set this iterator so that AtEnd() returns ETrue. 
       
   419 @internalComponent
       
   420 */
       
   421 void TFoldedSortedDecompIterator::Set()
       
   422 	{
       
   423 	iRemaining = 0;
       
   424 	}
       
   425 
       
   426 /** 
       
   427 @internalComponent
       
   428 */
       
   429 TBool TFoldedSortedDecompIterator::AtEnd() const
       
   430 	{
       
   431 	return iRemaining == 0;
       
   432 	}
       
   433 
       
   434 /** 
       
   435 @internalComponent
       
   436 */
       
   437 TChar TFoldedSortedDecompIterator::Current() const
       
   438 	{
       
   439 	ASSERT(!AtEnd());
       
   440 	return iCurrent.Current();
       
   441 	}
       
   442 
       
   443 /** 
       
   444 @internalComponent
       
   445 */
       
   446 void TFoldedSortedDecompIterator::Next()
       
   447 	{
       
   448 	ASSERT(!AtEnd());
       
   449 	--iRemaining;
       
   450 	while (++iCurrentCount != iLength)
       
   451 		{
       
   452 		iCurrent.Next();
       
   453         if (::GetClass(iCurrent) == iCurrentClass)
       
   454 			return;
       
   455 		}
       
   456 	// We have not found any more of the same class, so we will look
       
   457 	// for the smallest one larger.
       
   458 	TInt minClass = 256;
       
   459 	TFoldedDecompIterator searcher(iStart);
       
   460 	TInt searchCount = 0;
       
   461 	while (searchCount != iLength)
       
   462 		{
       
   463         TInt cl = ::GetClass(searcher);
       
   464 		if (iCurrentClass < cl && cl < minClass)
       
   465 			{
       
   466 			minClass = cl;
       
   467 			iCurrentCount = searchCount;
       
   468 			iCurrent = searcher;
       
   469 			}
       
   470 		++searchCount;
       
   471 		searcher.Next();
       
   472 		}
       
   473 	iCurrentClass = minClass;
       
   474 	ASSERT(minClass < 256 || AtEnd());
       
   475 	}
       
   476 
       
   477 ////////////////////////////////////////////////////////////////////////////////////////////
       
   478 // TFoldedCanonicalIterator
       
   479 ////////////////////////////////////////////////////////////////////////////////////////////
       
   480 
       
   481 /** 
       
   482 @internalComponent
       
   483 */
       
   484 TFoldedCanonicalIterator::TFoldedCanonicalIterator(const TUTF32Iterator& a)
       
   485 	{
       
   486 	iBase.Set(a);
       
   487 	iSorted.Set();
       
   488     if(!iBase.AtEnd())
       
   489         {
       
   490 	    iBase.EnterFoldedSequence();
       
   491 	    if (iBase.Current().GetCombiningClass() != 0 || iBase.CurrentIsBaseFoldedFromCombiner())
       
   492             {
       
   493 		    iSorted.Set(iBase);
       
   494             }
       
   495         }
       
   496 	}
       
   497 
       
   498 /** 
       
   499 @internalComponent
       
   500 */
       
   501 TBool TFoldedCanonicalIterator::AtEnd() const
       
   502 	{
       
   503 	return iSorted.AtEnd() && iBase.AtEnd();
       
   504 	}
       
   505 
       
   506 /** 
       
   507 @internalComponent
       
   508 */
       
   509 TChar TFoldedCanonicalIterator::Current() const
       
   510 	{
       
   511 	ASSERT(!iBase.AtEnd() || !iSorted.AtEnd());
       
   512 	return iSorted.AtEnd()? iBase.Current() : iSorted.Current();
       
   513 	}
       
   514 
       
   515 /** 
       
   516 @internalComponent
       
   517 */
       
   518 void TFoldedCanonicalIterator::Next(const TUnicodeDataSet* aCharDataSet)
       
   519 	{
       
   520 	ASSERT(!iBase.AtEnd() || !iSorted.AtEnd());
       
   521 	if (!iSorted.AtEnd())
       
   522 		{
       
   523 		iSorted.Next();
       
   524 		return;
       
   525 		}
       
   526 	iBase.Next();
       
   527     if(!iBase.AtEnd())
       
   528         {
       
   529 	    iBase.EnterFoldedSequence();
       
   530  	    if (TUnicode(TUint(iBase.Current())).GetCombiningClass(aCharDataSet) != 0 || iBase.CurrentIsBaseFoldedFromCombiner())
       
   531            {
       
   532 		    iSorted.Set(iBase);
       
   533             }
       
   534         }
       
   535 	}
       
   536 
       
   537 ////////////////////////////////////////////////////////////////////////////////////////////
       
   538 // Folding - Global functions and structures
       
   539 ////////////////////////////////////////////////////////////////////////////////////////////
       
   540 
       
   541 /** 
       
   542 @internalComponent
       
   543 */
       
   544 struct TEndTester 
       
   545     { 
       
   546     typedef enum {EGenuine, EWildcard} TType;
       
   547 
       
   548     inline TEndTester(TType aType) :
       
   549         iType(aType)
       
   550         {
       
   551         }
       
   552 
       
   553     inline TBool operator()(const TFoldedDecompIterator& a) const
       
   554         {
       
   555         return iType == EGenuine ? a.AtEnd() : a.AtEndOrWildcard();
       
   556         } 
       
   557 
       
   558 private:
       
   559     TType iType;
       
   560 
       
   561     };
       
   562 
       
   563 /**
       
   564 Consumes as much of aCandidate as matches aSearchTerm up to the next '?' or
       
   565 '*' wildcard or the end of aSearchTerm.
       
   566 It is assumed that the search term begins with a base character.
       
   567 Returns true if and only if the whole search term was matched
       
   568 with a whole number of UTF16s in the candidate string.
       
   569 On return of ETrue the candidate string iterator will have consumed the
       
   570 matching characters the search term will have had all its matching characters
       
   571 consumed.
       
   572 @internalComponent
       
   573 */
       
   574 TBool ConsumeFoldedMatch(TUTF32Iterator& aCandidateString, TUTF32Iterator& aSearchTerm,
       
   575                          const TEndTester& aEndTester)
       
   576 	{
       
   577 	TBool assumeBase = ETrue;
       
   578 	TFoldedDecompIterator st(aSearchTerm);
       
   579 	TFoldedDecompIterator cs(aCandidateString);
       
   580 	while (!aEndTester(st))
       
   581 		{
       
   582 		if (cs.AtEnd())
       
   583 			return EFalse;
       
   584 		if (st.Match(cs))
       
   585 			{
       
   586 			assumeBase = EFalse;
       
   587 			if (aEndTester(st))
       
   588 				{
       
   589                 // We have a match...
       
   590                 if (cs.IsInFoldedSequence())
       
   591                     // but it was against only part of a character
       
   592                     // in the original string, so we fail.
       
   593                     return EFalse;
       
   594 				aCandidateString = cs.BaseIterator();
       
   595 				aSearchTerm = st.BaseIterator();
       
   596 				return ETrue;
       
   597 				}
       
   598 			continue;
       
   599 			}
       
   600 		// did not match. We need to re-order canonically.
       
   601 		if (assumeBase)
       
   602 			// The first characters did not match.. do not bother
       
   603 			// to re-order.
       
   604 			return EFalse;
       
   605 		TFoldedSortedDecompIterator csc;
       
   606 		TInt cscLength = csc.Set(cs);
       
   607 		if (cscLength < 2)
       
   608 			// If there are less than two characters to be reordered,
       
   609 			// there is no hope of getting a match by re-ordering
       
   610 			return EFalse;
       
   611 		TFoldedSortedDecompIterator stc;
       
   612 		if (cscLength != stc.Set(st))
       
   613 			// if the strings have differing numbers of characters,
       
   614 			// there can be no match
       
   615 			return EFalse;
       
   616 		while (!stc.AtEnd())
       
   617 			{
       
   618 			ASSERT(!csc.AtEnd());
       
   619 			if (stc.Current() != csc.Current())
       
   620 				// well, we tried.
       
   621 				return EFalse;
       
   622 			stc.Next();
       
   623 			csc.Next();
       
   624 			}
       
   625 		}
       
   626 	// If the candidate string is in a folded sequence, then
       
   627 	// we should not accept the match, as we require all matches
       
   628 	// to be for a whole number of characters in the original string.
       
   629 	if (cs.IsInFoldedSequence())
       
   630 		return EFalse;
       
   631 	aCandidateString = cs.BaseIterator();
       
   632 	aSearchTerm = st.BaseIterator();
       
   633 	return ETrue;
       
   634 	}
       
   635 
       
   636 /** 
       
   637 @internalComponent
       
   638 */
       
   639 TBool ConsumeFoldedMatchWholeGraphemes(TUTF32Iterator& aCandidateString, TUTF32Iterator& aSearchTerm, 
       
   640                                        const TEndTester& aEndTester)
       
   641 	{
       
   642     if (!::ConsumeFoldedMatch(aCandidateString, aSearchTerm, aEndTester))
       
   643 		return EFalse;
       
   644     return aCandidateString.AtEnd() || ::IsBaseCharacter(aCandidateString.Current());
       
   645 	}
       
   646 
       
   647 /** 
       
   648 @internalComponent
       
   649 */
       
   650 static TBool ConsumeGrapheme(TUTF32Iterator& a)
       
   651 	{
       
   652 	if (a.AtEnd())
       
   653 		return EFalse;
       
   654 	a.Next();
       
   655     while (!a.AtEnd() && !::IsBaseCharacter(a.Current()))
       
   656 		a.Next();
       
   657 	return ETrue;
       
   658 	}
       
   659 
       
   660 /** 
       
   661 @internalComponent
       
   662 */
       
   663 TBool MatchSectionFolded(TUTF32Iterator& aCandidateString, TUTF32Iterator& aSearchTerm)
       
   664 	{
       
   665     TEndTester endTester(TEndTester::EWildcard);
       
   666     while(::ConsumeFoldedMatchWholeGraphemes(aCandidateString, aSearchTerm, endTester))
       
   667 		{
       
   668 		if (aSearchTerm.AtEnd() || aSearchTerm.Current() == '*')
       
   669 			return ETrue;
       
   670 		ASSERT(aSearchTerm.Current() == '?');
       
   671 		aSearchTerm.Next();
       
   672         if (!::ConsumeGrapheme(aCandidateString))
       
   673 			return EFalse;
       
   674 		}
       
   675 	return EFalse;
       
   676 	}
       
   677 
       
   678 /** 
       
   679 @internalComponent
       
   680 */
       
   681 TBool FindMatchSectionFolded(TUTF32Iterator& aCandidateString, TUTF32Iterator& aSearchTerm)
       
   682 	{
       
   683 	// match as many graphemes as there are leading ?s
       
   684 	while (!aSearchTerm.AtEnd()
       
   685 		&& aSearchTerm.Current() != '*' && aSearchTerm.Current() == '?')
       
   686 		{
       
   687         if (!::ConsumeGrapheme(aCandidateString))
       
   688 			return EFalse;
       
   689 		aSearchTerm.Next();
       
   690 		}
       
   691 	if (aSearchTerm.AtEnd() || aSearchTerm.Current() == '*')
       
   692 		return ETrue;
       
   693     TChar firstCharOfSearchTerm = ::GetFirstFoldedChar(aSearchTerm.Current());
       
   694 	TUTF32Iterator savedST(aSearchTerm);
       
   695 	while (aCandidateString.LocateFoldedBaseCharacter(firstCharOfSearchTerm))
       
   696 		{
       
   697 		TUTF32Iterator savedCS = aCandidateString;
       
   698 		if (::MatchSectionFolded(aCandidateString, aSearchTerm))
       
   699 			return ETrue;
       
   700 		aSearchTerm = savedST;
       
   701 		aCandidateString = savedCS;
       
   702 		aCandidateString.Next();
       
   703 		}
       
   704 	return EFalse;
       
   705 	}
       
   706 
       
   707 /** 
       
   708 @internalComponent
       
   709 */
       
   710 TBool MatchStringFolded(const TText16* aCandidateStringStart, const TText16* aCandidateStringEnd,
       
   711                         const TText16* aSearchTermStart, const TText16* aSearchTermEnd)
       
   712 	{
       
   713 	TUTF32Iterator candidate(aCandidateStringStart, aCandidateStringEnd);
       
   714 	TUTF32Iterator searchTerm(aSearchTermStart, aSearchTermEnd);
       
   715 	// First section of search term must match exactly at the start of the
       
   716 	// candidate string.
       
   717 	if (!::MatchSectionFolded(candidate, searchTerm))
       
   718 		return EFalse;
       
   719 
       
   720 	// If there was only one section, it must match the whole candidate string.
       
   721 	if (searchTerm.AtEnd())
       
   722 		return candidate.AtEnd();
       
   723 
       
   724 	while (!searchTerm.AtEnd())
       
   725 		{
       
   726 		searchTerm.Next();
       
   727 		if (!::FindMatchSectionFolded(candidate, searchTerm))
       
   728 			return EFalse;
       
   729 		}
       
   730 
       
   731 	// The last section must match exactly at the end of the candidate string.
       
   732 	if (candidate.AtEnd())	// shortcut
       
   733 		return ETrue;
       
   734 	const TText16* searchTermLastSection = aSearchTermEnd;
       
   735 	while (searchTermLastSection != aSearchTermStart
       
   736 		&& searchTermLastSection[-1] != '*')
       
   737 		--searchTermLastSection;
       
   738 	if (searchTermLastSection == aSearchTermEnd)
       
   739 		// last section is null, so trivially matches
       
   740 		return ETrue;
       
   741 	// Count graphemes by counting the number of base characters.
       
   742 	// The first one is assumed to start a grapheme.
       
   743 	TInt graphemeCount = 1;
       
   744 	for (const TText16* s = searchTermLastSection + 1; s != aSearchTermEnd; ++s)
       
   745 		{
       
   746         if (::IsBaseCharacter(*s))
       
   747 			++graphemeCount;
       
   748 		}
       
   749 	// Count this many graphemes back in the candidate string
       
   750 	const TText16* candidateLastSection = aCandidateStringEnd;
       
   751 	while (graphemeCount != 0
       
   752 		&& candidateLastSection != aCandidateStringStart)
       
   753 		{
       
   754         if (::IsBaseCharacter(*--candidateLastSection))
       
   755 			--graphemeCount;
       
   756 		}
       
   757 	TUTF32Iterator last(candidateLastSection, aCandidateStringEnd);
       
   758 	TUTF32Iterator st(searchTermLastSection, aSearchTermEnd);
       
   759 	return ::MatchSectionFolded(last, st);
       
   760 	}
       
   761 
       
   762 /** 
       
   763 Implementation of MatchF
       
   764 (slow if there is a match: MatchStringFolded is better, but does not return
       
   765 the position of the match)
       
   766 @internalComponent
       
   767 */
       
   768 TInt LocateMatchStringFolded(const TText16* aCandidateStringStart, const TText16* aCandidateStringEnd,
       
   769                              const TText16* aSearchTermStart, const TText16* aSearchTermEnd)
       
   770 	{
       
   771 	// Quick shortcut for simple non-match
       
   772 	if (aSearchTermStart != aSearchTermEnd && *aSearchTermStart != '*')
       
   773 		{
       
   774 		if (aCandidateStringStart == aCandidateStringEnd)
       
   775 			return KErrNotFound;
       
   776 		// We won't bother with non-characters and surrogate pairs.
       
   777 		if (*aSearchTermStart != '?'
       
   778 			&& *aSearchTermStart < 0xD800
       
   779 			&& *aCandidateStringStart < 0xD800
       
   780             && ::GetFirstFoldedChar(*aSearchTermStart) != ::GetFirstFoldedChar(*aCandidateStringStart))
       
   781 			return KErrNotFound;
       
   782 		}
       
   783     if (!::MatchStringFolded(aCandidateStringStart, aCandidateStringEnd,
       
   784 		aSearchTermStart, aSearchTermEnd))
       
   785 		return KErrNotFound;
       
   786 	// find where it matches
       
   787 	while (aSearchTermStart != aSearchTermEnd && *aSearchTermStart == '*')
       
   788 		++aSearchTermStart;
       
   789 	const TText16* end = aSearchTermStart;
       
   790 	while (end != aSearchTermEnd && *end != '*')
       
   791 		++end;
       
   792 	// To preserve existing behaviour, a search term consisting of nothing
       
   793 	// but stars is considered to match at 0.
       
   794 	if (end == aSearchTermStart)
       
   795 		return 0;
       
   796 	for (const TText16* csSection = aCandidateStringStart;
       
   797 		csSection <= aCandidateStringEnd; ++csSection)
       
   798 		{
       
   799 		TUTF32Iterator cs(csSection, aCandidateStringEnd);
       
   800 		TUTF32Iterator st(aSearchTermStart, end);
       
   801         if (::MatchSectionFolded(cs, st))
       
   802 			{
       
   803 			// If this match must match exactly at the end, we must keep
       
   804 			// going.
       
   805 			// This could be a lot faster, with some optimizations.
       
   806 			if (end == aSearchTermEnd)
       
   807 				{
       
   808 				if (!cs.AtEnd())
       
   809 					continue;
       
   810 				}
       
   811 			return csSection - aCandidateStringStart;
       
   812 			}
       
   813 		}
       
   814 	// this should never happen!
       
   815 	ASSERT(0);
       
   816 	return KErrNotFound;
       
   817 	}
       
   818 
       
   819 /** 
       
   820 Implementation of FindF
       
   821 @internalComponent
       
   822 */
       
   823 TInt FindFolded(TUTF32Iterator& aCandidateString, TUTF32Iterator& aSearchTerm)
       
   824 	{
       
   825     //Empty aSearchTerm string? - Then return 0 - keep the new implementation functionally 
       
   826     //compatible with the old one.
       
   827     if(aSearchTerm.Length() == 0)
       
   828         {
       
   829         return 0;
       
   830         }
       
   831 	const TText16* candidateStartPosition = aCandidateString.CurrentPosition();
       
   832     TChar firstCharOfSearchTerm = ::GetFirstFoldedChar(aSearchTerm.Current());
       
   833 	TUTF32Iterator savedST(aSearchTerm);
       
   834 	while (aCandidateString.LocateFoldedBaseCharacter(firstCharOfSearchTerm))
       
   835 		{
       
   836 		TUTF32Iterator savedCS = aCandidateString;
       
   837         TEndTester endTester(TEndTester::EGenuine);
       
   838         if (::ConsumeFoldedMatchWholeGraphemes(aCandidateString, aSearchTerm, endTester))
       
   839 			return savedCS.CurrentPosition() - candidateStartPosition;
       
   840 		aSearchTerm = savedST;
       
   841 		aCandidateString = savedCS;
       
   842 		aCandidateString.Next();
       
   843 		}
       
   844 	return KErrNotFound;
       
   845 	}
       
   846 
       
   847 /** 
       
   848 Implementation of boolean CompareF
       
   849 @internalComponent
       
   850 */
       
   851 TBool EqualsFolded(TUTF32Iterator& aLeft, TUTF32Iterator& aRight)
       
   852 	{
       
   853     TEndTester endTester(TEndTester::EGenuine);
       
   854     if (::ConsumeFoldedMatchWholeGraphemes(aLeft, aRight, endTester))
       
   855 		return aLeft.AtEnd();
       
   856 	return EFalse;
       
   857 	}
       
   858 
       
   859 /** 
       
   860 Implementation of tri-state CompareF
       
   861 @internalComponent
       
   862 */
       
   863 TInt CompareFolded(const TUTF32Iterator& aLeft, const TUTF32Iterator& aRight)
       
   864 	{
       
   865 	TFoldedCanonicalIterator left(aLeft);
       
   866 	TFoldedCanonicalIterator right(aRight);
       
   867 
       
   868 	const TUnicodeDataSet* charDataSet = GetLocaleCharSet()->iCharDataSet;
       
   869 
       
   870 	while (!left.AtEnd())
       
   871 		{
       
   872 		if (right.AtEnd())
       
   873 			return 1;
       
   874 		TChar cr = right.Current();
       
   875 		TChar cl = left.Current();
       
   876 		if (cr != cl)
       
   877 			return cl - cr;
       
   878         right.Next(charDataSet);
       
   879         left.Next(charDataSet);
       
   880 		}
       
   881 	return right.AtEnd()? 0 : -1;
       
   882 	}
       
   883 
       
   884 ////////////////////////////////////////////////////////////////////////////////////////////
       
   885 // Composition/Decomposition - Global functions and structures
       
   886 ////////////////////////////////////////////////////////////////////////////////////////////
       
   887 
       
   888 /** 
       
   889 @internalComponent
       
   890 */
       
   891 static TUTF32Iterator IndexToNonSingletonDecomposition(TInt aIndex)
       
   892 	{
       
   893 	const TText* start = KNonSingletonDecompositions + aIndex * 2;
       
   894 	if (*start != KLongD)
       
   895 		return TUTF32Iterator(start, start + 2, TUTF32Iterator::EStartsWithValidCharacter);
       
   896 	TInt longDecompIndex = start[1];
       
   897 	start = KLongDecompositions + (longDecompIndex & 0xFFF);
       
   898 	return TUTF32Iterator(start, start + (longDecompIndex >> 12) + 3, TUTF32Iterator::EStartsWithValidCharacter);
       
   899 	}
       
   900 
       
   901 /** 
       
   902 Come up with a decomposition for the current value pointed at by the iterator
       
   903 @internalComponent
       
   904 */
       
   905 static TBool Decompose(const TUTF32Iterator& aUTF32It, TUTF32Iterator& aOutIt)
       
   906 	{
       
   907     TInt index = ::GetDecompositionIndex(aUTF32It.Current());
       
   908 	TInt singletonIndex = index - (sizeof(KNonSingletonFolds)/sizeof(KNonSingletonFolds[0])/2);
       
   909 	const TInt SizeOfSingletonTable = sizeof(KSingletonDecompositions)/sizeof(KSingletonDecompositions[0])/2;
       
   910 	if(index < 0 || SizeOfSingletonTable <= singletonIndex)
       
   911         {
       
   912         aOutIt = aUTF32It.CurrentAsIterator();
       
   913 		return EFalse;//There is not any valid decomposition
       
   914         }
       
   915 	if(0 <= singletonIndex)
       
   916         {
       
   917 		aOutIt = TUTF32Iterator(KSingletonDecompositions + singletonIndex);
       
   918         }
       
   919     else
       
   920         {
       
   921         aOutIt = ::IndexToNonSingletonDecomposition(index);
       
   922         }
       
   923     return ETrue;//Valid decomposition
       
   924 	}
       
   925 
       
   926 /** 
       
   927 @internalComponent
       
   928 */
       
   929 static TUTF32Iterator CharToUTF32Iterator(TChar aChar, TDes16& aBuf)
       
   930 	{
       
   931 	aBuf.Zero();
       
   932 	if (aChar < 0x10000)
       
   933 		aBuf.Append(aChar);
       
   934 	else
       
   935 		{
       
   936 		aBuf.Append((aChar >> 10) + 0xD7C0);
       
   937 		aBuf.Append((aChar & 0x3FF) + 0xDC00);
       
   938 		}
       
   939 	const TText16* t = aBuf.Ptr();
       
   940 	return TUTF32Iterator(t, t + aBuf.Length());
       
   941 	}
       
   942 
       
   943 /** 
       
   944 @internalComponent
       
   945 */
       
   946 TBool DecomposeChar(TChar aCh, TPtrC16& aResult)
       
   947     {
       
   948     aResult.Set(NULL, 0);
       
   949     TBuf16<2> srcBuf;
       
   950     TUTF32Iterator it = ::CharToUTF32Iterator(aCh, srcBuf);
       
   951     TBool res = ::Decompose(it, it);
       
   952     if(res)
       
   953         {
       
   954         aResult.Set(it.CurrentPosition(), it.Length());
       
   955         }
       
   956     return res;
       
   957     }
       
   958 
       
   959 /** 
       
   960 Turn an index into the hash table known to point to a non-singleton
       
   961 decomposition into that decomposition.
       
   962 @internalComponent
       
   963 */
       
   964 static TUTF32Iterator HashIndexToDecomposition(TInt aIndex)
       
   965 	{
       
   966 	TUint32 v = KUnicodeToIndexHash[aIndex];
       
   967 	ASSERT(v != 0);
       
   968 	TInt index = static_cast<TInt>(v >> 20);
       
   969 	ASSERT(index < ARRAY_LENGTH(KNonSingletonDecompositions)/2);
       
   970     return ::IndexToNonSingletonDecomposition(index);
       
   971 	}
       
   972 
       
   973 /**
       
   974 Takes a start and (one past the) end index into KCompostionMapping
       
   975 and a number of UTF16 characters (aLengthSoFar). All of the compositions
       
   976 within the range must have their first aLengthSoFar UTF16 characters
       
   977 matching.
       
   978 
       
   979 On entry, if aStart == aEnd then there is no possibility of a match so return
       
   980 immediately with EFalse. To continue, aStart must be strictly less than aEnd.
       
   981 
       
   982 Afterwards, aStart and aEnd will be narrowed to all those compositions
       
   983 where the aLengthSoFar'th UTF16 character matches aNextCharacter.
       
   984 No further compositions existing is indicated by aStart == aEnd.
       
   985 
       
   986 @return ETrue if the composition at aStart is exactly of length aLengthSoFar + 1.
       
   987 @internalComponent
       
   988 */
       
   989 static TBool RefineComposition(TInt& aStart, TInt& aEnd, TInt aLengthSoFar, TInt aNextCharacter)
       
   990 	{
       
   991 	if (aStart == aEnd)
       
   992 		return EFalse;
       
   993 	ASSERT((TUint)aStart < (TUint)aEnd);
       
   994  	ASSERT((TUint)aEnd <= (TUint)ARRAY_LENGTH(KCompositionMapping));
       
   995     TUTF32Iterator startIterator(::HashIndexToDecomposition(KCompositionMapping[aStart]));
       
   996 	if (startIterator.Length() == aLengthSoFar)
       
   997 		++aStart;
       
   998 
       
   999 	// Find a single example of a decomposition that is suitable
       
  1000 	TInt mid;
       
  1001 	TUTF32Iterator midIt;
       
  1002 	for (;;)
       
  1003 		{
       
  1004 		if (aStart == aEnd)
       
  1005 			return EFalse;
       
  1006 		mid = aStart + ((aEnd - aStart) >> 1);
       
  1007         midIt = ::HashIndexToDecomposition(KCompositionMapping[mid]);
       
  1008 		ASSERT(aLengthSoFar < midIt.Length());
       
  1009 		TInt midItChar = midIt[aLengthSoFar];
       
  1010 		if (midItChar < aNextCharacter)
       
  1011 			aStart = mid + 1;
       
  1012 		else if (aNextCharacter < midItChar)
       
  1013 			aEnd = mid;
       
  1014 		else
       
  1015 			{
       
  1016 			startIterator = midIt;
       
  1017 			break;
       
  1018 			}
       
  1019 		}
       
  1020 
       
  1021 	// FInd the first decomposition that does not match
       
  1022 	TInt start2 = mid + 1;
       
  1023 	while (start2 != aEnd)
       
  1024 		{
       
  1025 		ASSERT(start2 < aEnd);
       
  1026 		TInt mid2 = start2 + ((aEnd - start2) >> 1);
       
  1027         midIt = ::HashIndexToDecomposition(KCompositionMapping[mid2]);
       
  1028 		ASSERT(aLengthSoFar < midIt.Length());
       
  1029 		TInt midItChar = midIt[aLengthSoFar];
       
  1030 		ASSERT(aNextCharacter <= midItChar);
       
  1031 		if (aNextCharacter < midItChar)
       
  1032 			aEnd = mid2;
       
  1033 		else
       
  1034 			start2 = mid2 + 1;
       
  1035 		}
       
  1036 
       
  1037 	// Find the first decomposition that matches
       
  1038 	while (aStart != mid)
       
  1039 		{
       
  1040 		ASSERT(aStart < mid);
       
  1041 		TInt mid2 = aStart + ((mid - aStart) >> 1);
       
  1042         midIt = ::HashIndexToDecomposition(KCompositionMapping[mid2]);
       
  1043 		ASSERT(aLengthSoFar < midIt.Length());
       
  1044 		TInt midItChar = midIt[aLengthSoFar];
       
  1045 		ASSERT(midItChar <= aNextCharacter);
       
  1046 		if (midItChar < aNextCharacter)
       
  1047 			aStart = mid2 + 1;
       
  1048 		else
       
  1049 			{
       
  1050 			startIterator = midIt;
       
  1051 			mid = mid2;
       
  1052 			}
       
  1053 		}
       
  1054 
       
  1055 	return startIterator.Length() == (aLengthSoFar + 1);
       
  1056 	}
       
  1057 
       
  1058 /** 
       
  1059 @internalComponent
       
  1060 */
       
  1061 static TBool RefineCompositionUTF32(TInt& aStart, TInt& aEnd, TInt& aLengthSoFar, TChar aChar)
       
  1062 	{
       
  1063 	if (aChar < 0x10000)
       
  1064         return ::RefineComposition(aStart, aEnd, aLengthSoFar++, aChar);
       
  1065     ::RefineComposition(aStart, aEnd, aLengthSoFar++, (aChar >> 10) + 0xD7C0);
       
  1066 	if (aStart == aEnd)
       
  1067 		return EFalse;
       
  1068     return ::RefineComposition(aStart, aEnd, aLengthSoFar++, (aChar & 0x3FF) + 0xDC00);
       
  1069 	}
       
  1070 
       
  1071 /**
       
  1072 Combine as many of the characters presented as possible into a single
       
  1073 character.
       
  1074 @return The number of characters successfully combined.
       
  1075 @param aCombined If a nonzero value is returned, this contains
       
  1076             the character that is that number of characters from the start of
       
  1077             aDes combined.
       
  1078 @internalComponent
       
  1079 */
       
  1080 TInt CombineAsMuchAsPossible(const TDesC16& aDes, TChar& aCombined)
       
  1081 	{
       
  1082 	TInt start = 0;
       
  1083 	TInt end = sizeof(KCompositionMapping)/sizeof(KCompositionMapping[0]);
       
  1084 	TInt length = 0;
       
  1085 	TInt bestIndex = 0;
       
  1086 	TInt bestLength = 0;
       
  1087 	const TText16* ptr = aDes.Ptr();
       
  1088 	TUTF32Iterator input(ptr, ptr + aDes.Length());
       
  1089 	while (!input.AtEnd())
       
  1090 		{
       
  1091         if (::RefineCompositionUTF32(start, end, length, input.Current()))
       
  1092 			{
       
  1093 			bestIndex = start;
       
  1094 			bestLength = length;
       
  1095 			}
       
  1096 		input.Next();
       
  1097 		}
       
  1098 	if (bestLength == 0)
       
  1099 		return 0;
       
  1100 	aCombined = KUnicodeToIndexHash[KCompositionMapping[bestIndex]] & 0xFFFFF;
       
  1101 	return bestLength;
       
  1102 	}
       
  1103 
       
  1104 //////////////////////////////////////////////////////////////////////////////////////////////
       
  1105 // COLLATION
       
  1106 //////////////////////////////////////////////////////////////////////////////////////////////
       
  1107 
       
  1108 //////////////////////////////////////////////////////////////////////////////////////////////
       
  1109 // TDecompositionIterator class
       
  1110 
       
  1111 /** 
       
  1112 @internalComponent
       
  1113 */
       
  1114 void TDecompositionIterator::Set(const TUTF32Iterator& a)
       
  1115 	{
       
  1116 	iBase = a;
       
  1117 	if (!iBase.AtEnd())
       
  1118         {
       
  1119         (void)::Decompose(iBase, iDecomposition);
       
  1120         }
       
  1121 	}
       
  1122 
       
  1123 /** 
       
  1124 @internalComponent
       
  1125 */
       
  1126 TDecompositionIterator::TDecompositionIterator(const TUTF32Iterator& a)
       
  1127 	{
       
  1128 	Set(a);
       
  1129 	}
       
  1130 
       
  1131 /** 
       
  1132 @internalComponent
       
  1133 */
       
  1134 TBool TDecompositionIterator::AtEnd() const
       
  1135 	{
       
  1136 	return iBase.AtEnd();
       
  1137 	}
       
  1138 
       
  1139 /** 
       
  1140 @internalComponent
       
  1141 */
       
  1142 TChar TDecompositionIterator::Current() const
       
  1143 	{
       
  1144 	return iDecomposition.Current();
       
  1145 	}
       
  1146 
       
  1147 /** 
       
  1148 @internalComponent
       
  1149 */
       
  1150 void TDecompositionIterator::Next()
       
  1151 	{
       
  1152 	ASSERT(!iBase.AtEnd() && !iDecomposition.AtEnd());
       
  1153 	iDecomposition.Next();
       
  1154 	if (!iDecomposition.AtEnd())
       
  1155 		return;
       
  1156 	iBase.Next();
       
  1157 	if (!iBase.AtEnd())
       
  1158         {
       
  1159         (void)::Decompose(iBase, iDecomposition);
       
  1160         }
       
  1161 	}
       
  1162 
       
  1163 /** 
       
  1164 @internalComponent
       
  1165 */
       
  1166 const TText16* TDecompositionIterator::CurrentPosition() const
       
  1167 	{
       
  1168 	return iBase.CurrentPosition();
       
  1169 	}
       
  1170 
       
  1171 /** 
       
  1172 Find out the length and minimum combining class of
       
  1173 the current run of characters of nonzero combining class.
       
  1174 aMinClass and aMinClassPos are not written to if the return
       
  1175 value is 0.
       
  1176 aEndOfRun is written to with the final position of the iteration
       
  1177 if 0 is returned and aEndOfRun is non-null
       
  1178 @internalComponent
       
  1179 */
       
  1180 static TInt ReorderingRun(TInt& aMinClass, TDecompositionIterator& aMinClassPos,
       
  1181                           const TDecompositionIterator& aStart, TBool* aOpenSequence, 
       
  1182                           TInt aMaxDisallowedClass = 0, TDecompositionIterator* aEndOfRun = 0)
       
  1183 	{
       
  1184 	TInt comclass = aStart.AtEnd()? 0 : aStart.Current().GetCombiningClass();
       
  1185 	if (comclass == 0)
       
  1186 		{
       
  1187 		if (aEndOfRun)
       
  1188 			*aEndOfRun = aStart;
       
  1189 		if (aOpenSequence)
       
  1190 			*aOpenSequence = aStart.AtEnd();
       
  1191 		return 0;
       
  1192 		}
       
  1193 	aMinClass = 256;
       
  1194 	TDecompositionIterator i = aStart;
       
  1195 	TInt count = 0;
       
  1196 	while (comclass != 0)
       
  1197 		{
       
  1198 		if (aMaxDisallowedClass < comclass)
       
  1199 			{
       
  1200 			if (comclass < aMinClass)
       
  1201 				{
       
  1202 				aMinClass = comclass;
       
  1203 				aMinClassPos = i;
       
  1204 				}
       
  1205 			++count;
       
  1206 			}
       
  1207 		i.Next();
       
  1208 		comclass = i.AtEnd()? 0 : i.Current().GetCombiningClass();
       
  1209 		}
       
  1210 	if (count == 0 && aEndOfRun)
       
  1211 		*aEndOfRun = i;
       
  1212 	if (aOpenSequence)
       
  1213 		*aOpenSequence = i.AtEnd();
       
  1214 	return count;
       
  1215 	}
       
  1216 
       
  1217 //////////////////////////////////////////////////////////////////////////////////////////////
       
  1218 // TCanonicalDecompositionIterator class
       
  1219 
       
  1220 /** 
       
  1221 @internalComponent
       
  1222 */
       
  1223 void TCanonicalDecompositionIterator::Set(const TUTF32Iterator& a)
       
  1224 	{
       
  1225 	iBase.Set(a);
       
  1226 	iLastPosition = 0;
       
  1227 	if (ReorderingRun(iCurrentCombiningClass, iCurrent, iBase, &iInOpenSequence) < 2)
       
  1228 		iCurrentCombiningClass = 0;
       
  1229 	}
       
  1230 
       
  1231 /** 
       
  1232 @internalComponent
       
  1233 */
       
  1234 TBool TCanonicalDecompositionIterator::AtEnd() const
       
  1235 	{
       
  1236 	return iBase.AtEnd();
       
  1237 	}
       
  1238 
       
  1239 /** 
       
  1240 @internalComponent
       
  1241 */
       
  1242 TChar TCanonicalDecompositionIterator::Current() const
       
  1243 	{
       
  1244 	return iCurrentCombiningClass? iCurrent.Current() : iBase.Current();
       
  1245 	}
       
  1246 
       
  1247 /** 
       
  1248 @internalComponent
       
  1249 */
       
  1250 void TCanonicalDecompositionIterator::Next()
       
  1251 	{
       
  1252 	iLastPosition = iBase.CurrentPosition();
       
  1253 	if (iCurrentCombiningClass == 0)
       
  1254 		{
       
  1255 		iBase.Next();
       
  1256 		if (ReorderingRun(iCurrentCombiningClass, iCurrent, iBase, &iInOpenSequence) < 2)
       
  1257 			iCurrentCombiningClass = 0;
       
  1258 		return;
       
  1259 		}
       
  1260 	// Find the next character in the run with the same combining class
       
  1261 	iCurrent.Next();
       
  1262 	TInt curclass = iCurrent.AtEnd()? 0 : iCurrent.Current().GetCombiningClass();
       
  1263 	while (curclass != 0)
       
  1264 		{
       
  1265 		if (curclass == iCurrentCombiningClass)
       
  1266 			// success
       
  1267 			return;
       
  1268 		iCurrent.Next();
       
  1269 		curclass = iCurrent.AtEnd()? 0 : iCurrent.Current().GetCombiningClass();
       
  1270 		}
       
  1271 	// There are none left in the current class. Find out what the next one is.
       
  1272 	if (0 == ReorderingRun(iCurrentCombiningClass, iCurrent, iBase, 0, iCurrentCombiningClass, &iBase))
       
  1273 		iCurrentCombiningClass = 0;
       
  1274 	}
       
  1275 
       
  1276 /** 
       
  1277 @internalComponent
       
  1278 */
       
  1279 const TText16* TCanonicalDecompositionIterator::CurrentPositionIfAtCharacter() const
       
  1280 	{
       
  1281 	if (iCurrentCombiningClass != 0)
       
  1282 		return 0;
       
  1283 	const TText16* p = iBase.CurrentPosition();
       
  1284 	return iLastPosition == p? 0 : p;
       
  1285 	}
       
  1286 
       
  1287 /** 
       
  1288 @internalComponent
       
  1289 */
       
  1290 TBool TCanonicalDecompositionIterator::IsInOpenSequence() const
       
  1291 	{
       
  1292 	return iInOpenSequence;
       
  1293 	}
       
  1294 
       
  1295 //////////////////////////////////////////////////////////////////////////////////////////////
       
  1296 // TCanonicalDecompositionIteratorCached class
       
  1297 
       
  1298 /** 
       
  1299 @internalComponent
       
  1300 */
       
  1301 void TCanonicalDecompositionIteratorCached::Set(const TUTF32Iterator& a)
       
  1302 	{
       
  1303 	iBase.Set(a);
       
  1304 	iCacheStart = 0;
       
  1305 	iCacheSize = 0;
       
  1306 	}
       
  1307 
       
  1308 /** 
       
  1309 @internalComponent
       
  1310 */
       
  1311 TBool TCanonicalDecompositionIteratorCached::AtEnd() const
       
  1312 	{
       
  1313 	return iCacheSize == 0 && iBase.AtEnd();
       
  1314 	}
       
  1315 
       
  1316 /** 
       
  1317 @internalComponent
       
  1318 */
       
  1319 void TCanonicalDecompositionIteratorCached::Next(TInt aOffset)
       
  1320 	{
       
  1321 	ASSERT(0 <= aOffset);
       
  1322 	ASSERT(0 <= iCacheSize);
       
  1323 	ASSERT(0 != iCacheSize || !iBase.AtEnd());
       
  1324 	if (aOffset <= iCacheSize)
       
  1325 		{
       
  1326 		iCacheSize -= aOffset;
       
  1327 		iCacheStart = (iCacheStart + aOffset) & (KMaxLookAhead - 1);
       
  1328 		return;
       
  1329 		}
       
  1330 	aOffset -= iCacheSize;
       
  1331 	iCacheSize = 0;
       
  1332 	while (aOffset != 0)
       
  1333 		{
       
  1334 		iBase.Next();
       
  1335 		--aOffset;
       
  1336 		}
       
  1337 	}
       
  1338 
       
  1339 /** 
       
  1340 Get the character at the position of the iterator plus aOffset steps.
       
  1341 Returns -1 if we are looking too far ahead.
       
  1342 @internalComponent
       
  1343 */
       
  1344 TChar TCanonicalDecompositionIteratorCached::Get(TInt aOffset)
       
  1345 	{
       
  1346 	// should be assert debug: there is a chance this could go off with
       
  1347 	// bad collation tables
       
  1348 	ASSERT(aOffset <= KMaxLookAhead);
       
  1349 	while (iCacheSize <= aOffset)
       
  1350 		{
       
  1351 		if (iBase.AtEnd())
       
  1352 			return TChar(static_cast <TUint> (-1));
       
  1353 		TInt cachePos = (iCacheStart + iCacheSize) & (KMaxLookAhead - 1);
       
  1354 		iCache[cachePos].iChar = iBase.Current();
       
  1355 		iCache[cachePos].iPos = iBase.CurrentPositionIfAtCharacter();
       
  1356 		++iCacheSize;
       
  1357 		iBase.Next();
       
  1358 		}
       
  1359 	return iCacheSize == aOffset? iBase.Current() : iCache[(iCacheStart + aOffset) & (KMaxLookAhead - 1)].iChar;
       
  1360 	}
       
  1361 
       
  1362 /** 
       
  1363 If the current position in the original string is representable
       
  1364 as a pointer into it and we know what it is, return it.
       
  1365 @internalComponent
       
  1366 */
       
  1367 const TText16* TCanonicalDecompositionIteratorCached::CurrentPositionIfAtCharacter() const
       
  1368 	{
       
  1369 	if(iCacheSize == 0)
       
  1370 		{
       
  1371 		return iBase.CurrentPositionIfAtCharacter();
       
  1372 		}
       
  1373 	return iCache[iCacheStart & (KMaxLookAhead - 1)].iPos;
       
  1374 	}
       
  1375