kernel/eka/euser/unicode/Compare.cpp
changeset 0 a41df078684a
child 90 947f0dc9f7a8
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/kernel/eka/euser/unicode/Compare.cpp	Mon Oct 19 15:55:17 2009 +0100
@@ -0,0 +1,1388 @@
+// Copyright (c) 2001-2009 Nokia Corporation and/or its subsidiary(-ies).
+// All rights reserved.
+// This component and the accompanying materials are made available
+// under the terms of the License "Eclipse Public License v1.0"
+// which accompanies this distribution, and is available
+// at the URL "http://www.eclipse.org/legal/epl-v10.html".
+//
+// Initial Contributors:
+// Nokia Corporation - initial contribution.
+//
+// Contributors:
+//
+// Description:
+// Folding and decomposition implementation
+// 
+//
+
+#include "FoldDecomp.inl"
+#include "CompareImp.h"
+#include "u32std.h"
+
+#define ARRAY_LENGTH(a) (static_cast<TInt>(sizeof(a)/sizeof(a[0])))
+
+////////////////////////////////////////////////////////////////////////////////////////////
+// Global functions
+////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+@internalComponent
+*/
+TChar UTF16ToChar(const TText16* a)
+	{
+	if (0xD800 <= a[0])
+		{
+		if (a[0] < 0xE000)
+			{
+            if (a[0] < 0xDC00 && ::IsLowSurrogate(a[1]))
+				{
+                TChar c = ::PairSurrogates(a[0], a[1]);
+				if ((c & 0xFFFE) != 0xFFFE)
+					return c;
+				}
+			return 0xFFFF;
+			}
+		if (a[0] == 0xFFFE)
+			return 0xFFFF;
+		}
+	return a[0];
+	}
+
+/**
+Is a character a base character (ETrue) or a combiner (EFalse)?
+For now, we will treat all control characters as base characters.
+@internalComponent
+*/
+TBool IsBaseCharacter(TChar a)
+	{
+	if(a < 0x220)
+        {
+		// These Unicode characters are all assigned
+		// and none of them is a combining character
+		return ETrue;
+        }
+	return (a.GetCategory() & 0xFFF0) - TChar::EMarkGroup;
+	}
+
+/**
+@internalComponent
+*/
+inline TInt GetDecompositionIndex(TChar a)
+	{
+    TInt i = DecompositionHashStart(a);
+	TUint32 v = KUnicodeToIndexHash[i];
+	if (!v)
+		return -1;
+	if ((v & 0xFFFFF) != a)
+		{
+        TInt step = DecompositionHashStep(a);
+		do	{
+			i = (i + step) & KDecompositionHashBitmask;
+			v = KUnicodeToIndexHash[i];
+			if (!v)
+				return -1;
+			} while ((v & 0xFFFFF) != a);
+		}
+// it is important that this is an unsigned shift
+	return static_cast<TInt>(v >> 20);
+	}
+
+/**
+Will not work if an invalid index is passed.
+@internalComponent
+*/
+static TUTF32Iterator GetFoldedDecomposition(TInt aIndex)
+	{
+	TInt singletonIndex = aIndex - (sizeof(KNonSingletonFolds)/sizeof(KNonSingletonFolds[0])/2);
+	if (0 <= singletonIndex)
+		return TUTF32Iterator(KSingletonFolds + singletonIndex);
+	const TText* start = KNonSingletonFolds + aIndex * 2;
+	if (*start != KLongD)
+		return TUTF32Iterator(start, start + 2,
+			TUTF32Iterator::EStartsWithValidCharacter);
+	TInt longDecompIndex = start[1];
+	start = KLongDecompositions + (longDecompIndex & 0xFFF);
+	return TUTF32Iterator(start, start + (longDecompIndex >> 12) + 3,
+		TUTF32Iterator::EStartsWithValidCharacter);
+	}
+
+/**
+@internalComponent
+*/
+static TChar GetFirstFoldedChar(TChar a)
+	{
+    TInt index = ::GetDecompositionIndex(a);
+    return index < 0? a : ::GetFoldedDecomposition(index).Current();
+	}
+
+/**
+@internalComponent
+*/
+static TBool FirstFoldedCodeIsNot(TInt aNonSurrogate, TInt aFoldedNonSurrogate)
+	{
+    TInt index = ::GetDecompositionIndex(aNonSurrogate);
+	if (index < 0)
+		return aNonSurrogate - aFoldedNonSurrogate;
+	TInt singletonIndex = index - (sizeof(KNonSingletonFolds)/sizeof(KNonSingletonFolds[0])/2);
+	if (0 < singletonIndex)
+		return KSingletonFolds[singletonIndex] - aFoldedNonSurrogate;
+	const TText* start = KNonSingletonFolds + index * 2;
+	if (start[0] == KLongD)
+		start = KLongDecompositions + (start[1] & 0xFFF);
+	return *start - aFoldedNonSurrogate;
+	}
+
+/**
+@internalComponent
+*/
+static TInt GetClass(TFoldedDecompIterator& a)
+	{
+	ASSERT(!a.AtEnd());
+	a.EnterFoldedSequence();
+	TChar ch = a.Current();
+	TInt cl = ch.GetCombiningClass();
+	if (cl == 0)
+		// Assume starters have been folded from ypogegrammeni
+		cl = 240;
+	return cl;
+	}
+
+////////////////////////////////////////////////////////////////////////////////////////////
+// TUTF32Iterator
+////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+@internalComponent
+*/
+void TUTF32Iterator::Next()
+	{
+	ASSERT(iStart != iEnd);
+	while (++iStart != iEnd)
+		{
+        iCurrent = ::UTF16ToChar(iStart);
+		if (iCurrent != 0xFFFF)
+			return;
+		}
+	}
+
+/**
+Locates a base character in a string using a folded comparision. Will not find combining 
+characters, nor will it consider Korean combining Jamo to be equivalent to Hangul.
+@internalComponent
+*/
+TBool TUTF32Iterator::LocateFoldedBaseCharacter(TChar aChar)
+	{
+	// A quick shortcut for simple rejections
+	if (aChar < 0xFFFF)
+		{
+        while (iStart != iEnd && *iStart < 0xD800 && ::FirstFoldedCodeIsNot(*iStart, aChar))
+			++iStart;
+		if (iStart != iEnd)
+			{
+            iCurrent = ::UTF16ToChar(iStart);
+			if (iCurrent == 0xFFFF)
+				Next();
+			}
+		}
+	while (!AtEnd())
+		{
+        TChar foldedChar = ::GetFirstFoldedChar(iCurrent);
+		if (aChar == foldedChar)
+			return ETrue;
+		Next();
+		}
+	return EFalse;
+	}
+
+////////////////////////////////////////////////////////////////////////////////////////////
+// TFoldedDecompIterator
+////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+@internalComponent
+*/
+TFoldedDecompIterator::TFoldedDecompIterator(const TUTF32Iterator& a)
+	{
+	Set(a);
+	}
+
+/**
+@internalComponent
+*/
+TBool TFoldedDecompIterator::AtEnd() const
+	{
+	return iOriginal.AtEnd();
+	}
+
+/**
+@internalComponent
+*/
+TBool TFoldedDecompIterator::AtEndOrWildcard() const
+	{
+	// neither '?' nor '*' have decomposition sequences, so we can assume that
+	// if we are pointing at one or the other, we don't need to check if we
+	// are in a decomposition sequence or not.
+	return iOriginal.AtEnd() || iOriginal.Current() == '?' || iOriginal.Current() == '*';
+	}
+
+/**
+@internalComponent
+*/
+TBool TFoldedDecompIterator::EnterFoldedSequence()
+	{
+	ASSERT(!AtEnd());
+	return !IsInFoldedSequence() && StrictEnterFoldedSequence();
+	}
+
+/**
+Enter folded sequence, assuming that we are not already in one
+@internalComponent
+*/
+TBool TFoldedDecompIterator::StrictEnterFoldedSequence()
+	{
+	ASSERT(!AtEnd());
+	ASSERT(!IsInFoldedSequence());
+    TInt index = ::GetDecompositionIndex(iOriginal.Current());
+	if (index < 0)
+		return EFalse;
+	iFolded = ::GetFoldedDecomposition(index);
+	return ETrue;
+	}
+
+/**
+An iota might have folded from a combining ypogegrammeni.
+If the current character is a base character, this function will
+detect whether it was folded from a combining character (and
+therefore does not constitute a grapheme boundary).
+@internalComponent
+*/
+TBool TFoldedDecompIterator::CurrentIsBaseFoldedFromCombiner() const
+	{
+	if (!IsInFoldedSequence())
+		return EFalse;
+	// The only character that does this is Ypogerammeni, which folds to iota
+	if (iFolded.Current() != 0x3B9)
+		return EFalse;
+	// If the unfolded character is a combiner then it cannot contain an iota,
+	// so it must be an ypogegrammeni that has been folded to one.
+	// This will catch 0x345, the ypogegrammeni itself.
+    if (!::IsBaseCharacter(iOriginal.Current()))
+		return ETrue;
+	// Otherwise the base character will be at the start of the decomposition
+	// sequence. We will assume that it is folded from a genuine iota if and
+	// only if there is an iota at the start of the folded decomposition
+	// sequence. (In theory there might be an iota with a combining
+	// ypogegrammeni, but in practice this will not occur).
+	TInt index = ::GetDecompositionIndex(iOriginal.Current());
+	ASSERT(0 <= index);
+	TUTF32Iterator folded = ::GetFoldedDecomposition(index);
+	return folded.Current() != 0x3B9;
+	}
+
+/**
+@internalComponent
+*/
+TChar TFoldedDecompIterator::Current() const
+	{
+	ASSERT(!AtEnd());
+	return IsInFoldedSequence()? iFolded.Current() : iOriginal.Current();
+	}
+
+/** 
+Move past this code if it matches unfolded or folded 
+@internalComponent
+*/
+TBool TFoldedDecompIterator::Match(TChar aCode)
+	{
+	ASSERT(!AtEnd());
+	if (!IsInFoldedSequence())
+		{
+		if (aCode == iOriginal.Current())
+			{
+			iOriginal.Next();
+			return ETrue;
+			}
+		if (!StrictEnterFoldedSequence())
+			return EFalse;
+		}
+	ASSERT(IsInFoldedSequence());
+	if (aCode == iFolded.Current())
+		{
+		iFolded.Next();
+		if (iFolded.AtEnd())
+			iOriginal.Next();
+		return ETrue;
+		}
+	return EFalse;
+	}
+
+/** 
+Move this and argument past matching character. 
+@internalComponent
+*/
+TBool TFoldedDecompIterator::Match(TFoldedDecompIterator& aThat)
+	{
+	ASSERT(!AtEnd());
+	if (!IsInFoldedSequence())
+		{
+		if ( aThat.Match(iOriginal.Current()) )
+			{
+			iOriginal.Next();
+			return ETrue;
+			}
+		if (!StrictEnterFoldedSequence())
+			return EFalse;
+		}
+	ASSERT(IsInFoldedSequence());
+	if ( aThat.Match(iFolded.Current()) )
+		{
+		iFolded.Next();
+		if (iFolded.AtEnd())
+			iOriginal.Next();
+		return ETrue;
+		}
+	return EFalse;
+	}
+
+/** 
+@internalComponent
+*/
+void TFoldedDecompIterator::Next()
+	{
+	ASSERT(!AtEnd());
+	if (IsInFoldedSequence())
+		{
+		iFolded.Next();
+		if (IsInFoldedSequence())
+			return;
+		}
+	iOriginal.Next();
+	}
+
+////////////////////////////////////////////////////////////////////////////////////////////
+// TFoldedSortedDecompIterator
+////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+Set this iterator to iterate over the next combining characters.
+Iotas in folded sequences are assumed to be character class 240
+(combining ypogegrammeni). Next() must be used once before
+the first call to Current(), as long as AtEnd() returns false.
+@param aBase Sets the start of the iteration. This value is advanced to the
+             end of the iteration.
+@return The number of codes in the iteration.
+@internalComponent
+*/
+TInt TFoldedSortedDecompIterator::Set(TFoldedDecompIterator &aBase)
+	{
+	iStart = aBase;
+	TInt length = 0;
+	iCurrentClass = 256;
+
+	const TUnicodeDataSet* charDataSet = GetLocaleCharSet()->iCharDataSet;
+
+	// Find the next starter (i.e. character with combining class 0).
+	// We must not count iota folded from ypogegrammeni.
+	// Ypogegrammeni has combining class 240.
+	// Iota has combining class 0.
+	// Also we will be searching for the character with the smallest
+	// combining class to start at.
+	while (!aBase.AtEnd())
+		{
+		aBase.EnterFoldedSequence();
+		TChar ch = aBase.Current();
+		TInt cl = TUnicode(TUint(ch)).GetCombiningClass(charDataSet);
+		if (cl == 0)
+			{
+			if (aBase.CurrentIsBaseFoldedFromCombiner())
+				cl = 240;
+			else
+				break;
+			}
+		if (cl < iCurrentClass)
+			{
+			iCurrentClass = cl;
+			iCurrent = aBase;
+			iCurrentCount = length;
+			}
+		++length;
+		aBase.Next();
+		}
+	iRemaining = length;
+	iLength = length;
+	ASSERT(iLength == 0 || iCurrentClass < 256);
+	return length;
+	}
+
+/** 
+Set this iterator so that AtEnd() returns ETrue. 
+@internalComponent
+*/
+void TFoldedSortedDecompIterator::Set()
+	{
+	iRemaining = 0;
+	}
+
+/** 
+@internalComponent
+*/
+TBool TFoldedSortedDecompIterator::AtEnd() const
+	{
+	return iRemaining == 0;
+	}
+
+/** 
+@internalComponent
+*/
+TChar TFoldedSortedDecompIterator::Current() const
+	{
+	ASSERT(!AtEnd());
+	return iCurrent.Current();
+	}
+
+/** 
+@internalComponent
+*/
+void TFoldedSortedDecompIterator::Next()
+	{
+	ASSERT(!AtEnd());
+	--iRemaining;
+	while (++iCurrentCount != iLength)
+		{
+		iCurrent.Next();
+        if (::GetClass(iCurrent) == iCurrentClass)
+			return;
+		}
+	// We have not found any more of the same class, so we will look
+	// for the smallest one larger.
+	TInt minClass = 256;
+	TFoldedDecompIterator searcher(iStart);
+	TInt searchCount = 0;
+	while (searchCount != iLength)
+		{
+        TInt cl = ::GetClass(searcher);
+		if (iCurrentClass < cl && cl < minClass)
+			{
+			minClass = cl;
+			iCurrentCount = searchCount;
+			iCurrent = searcher;
+			}
+		++searchCount;
+		searcher.Next();
+		}
+	iCurrentClass = minClass;
+	ASSERT(minClass < 256 || AtEnd());
+	}
+
+////////////////////////////////////////////////////////////////////////////////////////////
+// TFoldedCanonicalIterator
+////////////////////////////////////////////////////////////////////////////////////////////
+
+/** 
+@internalComponent
+*/
+TFoldedCanonicalIterator::TFoldedCanonicalIterator(const TUTF32Iterator& a)
+	{
+	iBase.Set(a);
+	iSorted.Set();
+    if(!iBase.AtEnd())
+        {
+	    iBase.EnterFoldedSequence();
+	    if (iBase.Current().GetCombiningClass() != 0 || iBase.CurrentIsBaseFoldedFromCombiner())
+            {
+		    iSorted.Set(iBase);
+            }
+        }
+	}
+
+/** 
+@internalComponent
+*/
+TBool TFoldedCanonicalIterator::AtEnd() const
+	{
+	return iSorted.AtEnd() && iBase.AtEnd();
+	}
+
+/** 
+@internalComponent
+*/
+TChar TFoldedCanonicalIterator::Current() const
+	{
+	ASSERT(!iBase.AtEnd() || !iSorted.AtEnd());
+	return iSorted.AtEnd()? iBase.Current() : iSorted.Current();
+	}
+
+/** 
+@internalComponent
+*/
+void TFoldedCanonicalIterator::Next(const TUnicodeDataSet* aCharDataSet)
+	{
+	ASSERT(!iBase.AtEnd() || !iSorted.AtEnd());
+	if (!iSorted.AtEnd())
+		{
+		iSorted.Next();
+		return;
+		}
+	iBase.Next();
+    if(!iBase.AtEnd())
+        {
+	    iBase.EnterFoldedSequence();
+ 	    if (TUnicode(TUint(iBase.Current())).GetCombiningClass(aCharDataSet) != 0 || iBase.CurrentIsBaseFoldedFromCombiner())
+           {
+		    iSorted.Set(iBase);
+            }
+        }
+	}
+
+////////////////////////////////////////////////////////////////////////////////////////////
+// Folding - Global functions and structures
+////////////////////////////////////////////////////////////////////////////////////////////
+
+/** 
+@internalComponent
+*/
+struct TEndTester 
+    { 
+    typedef enum {EGenuine, EWildcard} TType;
+
+    inline TEndTester(TType aType) :
+        iType(aType)
+        {
+        }
+
+    inline TBool operator()(const TFoldedDecompIterator& a) const
+        {
+        return iType == EGenuine ? a.AtEnd() : a.AtEndOrWildcard();
+        } 
+
+private:
+    TType iType;
+
+    };
+
+/**
+Consumes as much of aCandidate as matches aSearchTerm up to the next '?' or
+'*' wildcard or the end of aSearchTerm.
+It is assumed that the search term begins with a base character.
+Returns true if and only if the whole search term was matched
+with a whole number of UTF16s in the candidate string.
+On return of ETrue the candidate string iterator will have consumed the
+matching characters the search term will have had all its matching characters
+consumed.
+@internalComponent
+*/
+TBool ConsumeFoldedMatch(TUTF32Iterator& aCandidateString, TUTF32Iterator& aSearchTerm,
+                         const TEndTester& aEndTester)
+	{
+	TBool assumeBase = ETrue;
+	TFoldedDecompIterator st(aSearchTerm);
+	TFoldedDecompIterator cs(aCandidateString);
+	while (!aEndTester(st))
+		{
+		if (cs.AtEnd())
+			return EFalse;
+		if (st.Match(cs))
+			{
+			assumeBase = EFalse;
+			if (aEndTester(st))
+				{
+                // We have a match...
+                if (cs.IsInFoldedSequence())
+                    // but it was against only part of a character
+                    // in the original string, so we fail.
+                    return EFalse;
+				aCandidateString = cs.BaseIterator();
+				aSearchTerm = st.BaseIterator();
+				return ETrue;
+				}
+			continue;
+			}
+		// did not match. We need to re-order canonically.
+		if (assumeBase)
+			// The first characters did not match.. do not bother
+			// to re-order.
+			return EFalse;
+		TFoldedSortedDecompIterator csc;
+		TInt cscLength = csc.Set(cs);
+		if (cscLength < 2)
+			// If there are less than two characters to be reordered,
+			// there is no hope of getting a match by re-ordering
+			return EFalse;
+		TFoldedSortedDecompIterator stc;
+		if (cscLength != stc.Set(st))
+			// if the strings have differing numbers of characters,
+			// there can be no match
+			return EFalse;
+		while (!stc.AtEnd())
+			{
+			ASSERT(!csc.AtEnd());
+			if (stc.Current() != csc.Current())
+				// well, we tried.
+				return EFalse;
+			stc.Next();
+			csc.Next();
+			}
+		}
+	// If the candidate string is in a folded sequence, then
+	// we should not accept the match, as we require all matches
+	// to be for a whole number of characters in the original string.
+	if (cs.IsInFoldedSequence())
+		return EFalse;
+	aCandidateString = cs.BaseIterator();
+	aSearchTerm = st.BaseIterator();
+	return ETrue;
+	}
+
+/** 
+@internalComponent
+*/
+TBool ConsumeFoldedMatchWholeGraphemes(TUTF32Iterator& aCandidateString, TUTF32Iterator& aSearchTerm, 
+                                       const TEndTester& aEndTester)
+	{
+    if (!::ConsumeFoldedMatch(aCandidateString, aSearchTerm, aEndTester))
+		return EFalse;
+    return aCandidateString.AtEnd() || ::IsBaseCharacter(aCandidateString.Current());
+	}
+
+/** 
+@internalComponent
+*/
+static TBool ConsumeGrapheme(TUTF32Iterator& a)
+	{
+	if (a.AtEnd())
+		return EFalse;
+	a.Next();
+    while (!a.AtEnd() && !::IsBaseCharacter(a.Current()))
+		a.Next();
+	return ETrue;
+	}
+
+/** 
+@internalComponent
+*/
+TBool MatchSectionFolded(TUTF32Iterator& aCandidateString, TUTF32Iterator& aSearchTerm)
+	{
+    TEndTester endTester(TEndTester::EWildcard);
+    while(::ConsumeFoldedMatchWholeGraphemes(aCandidateString, aSearchTerm, endTester))
+		{
+		if (aSearchTerm.AtEnd() || aSearchTerm.Current() == '*')
+			return ETrue;
+		ASSERT(aSearchTerm.Current() == '?');
+		aSearchTerm.Next();
+        if (!::ConsumeGrapheme(aCandidateString))
+			return EFalse;
+		}
+	return EFalse;
+	}
+
+/** 
+@internalComponent
+*/
+TBool FindMatchSectionFolded(TUTF32Iterator& aCandidateString, TUTF32Iterator& aSearchTerm)
+	{
+	// match as many graphemes as there are leading ?s
+	while (!aSearchTerm.AtEnd()
+		&& aSearchTerm.Current() != '*' && aSearchTerm.Current() == '?')
+		{
+        if (!::ConsumeGrapheme(aCandidateString))
+			return EFalse;
+		aSearchTerm.Next();
+		}
+	if (aSearchTerm.AtEnd() || aSearchTerm.Current() == '*')
+		return ETrue;
+    TChar firstCharOfSearchTerm = ::GetFirstFoldedChar(aSearchTerm.Current());
+	TUTF32Iterator savedST(aSearchTerm);
+	while (aCandidateString.LocateFoldedBaseCharacter(firstCharOfSearchTerm))
+		{
+		TUTF32Iterator savedCS = aCandidateString;
+		if (::MatchSectionFolded(aCandidateString, aSearchTerm))
+			return ETrue;
+		aSearchTerm = savedST;
+		aCandidateString = savedCS;
+		aCandidateString.Next();
+		}
+	return EFalse;
+	}
+
+/** 
+@internalComponent
+*/
+TBool MatchStringFolded(const TText16* aCandidateStringStart, const TText16* aCandidateStringEnd,
+                        const TText16* aSearchTermStart, const TText16* aSearchTermEnd)
+	{
+	TUTF32Iterator candidate(aCandidateStringStart, aCandidateStringEnd);
+	TUTF32Iterator searchTerm(aSearchTermStart, aSearchTermEnd);
+	// First section of search term must match exactly at the start of the
+	// candidate string.
+	if (!::MatchSectionFolded(candidate, searchTerm))
+		return EFalse;
+
+	// If there was only one section, it must match the whole candidate string.
+	if (searchTerm.AtEnd())
+		return candidate.AtEnd();
+
+	while (!searchTerm.AtEnd())
+		{
+		searchTerm.Next();
+		if (!::FindMatchSectionFolded(candidate, searchTerm))
+			return EFalse;
+		}
+
+	// The last section must match exactly at the end of the candidate string.
+	if (candidate.AtEnd())	// shortcut
+		return ETrue;
+	const TText16* searchTermLastSection = aSearchTermEnd;
+	while (searchTermLastSection != aSearchTermStart
+		&& searchTermLastSection[-1] != '*')
+		--searchTermLastSection;
+	if (searchTermLastSection == aSearchTermEnd)
+		// last section is null, so trivially matches
+		return ETrue;
+	// Count graphemes by counting the number of base characters.
+	// The first one is assumed to start a grapheme.
+	TInt graphemeCount = 1;
+	for (const TText16* s = searchTermLastSection + 1; s != aSearchTermEnd; ++s)
+		{
+        if (::IsBaseCharacter(*s))
+			++graphemeCount;
+		}
+	// Count this many graphemes back in the candidate string
+	const TText16* candidateLastSection = aCandidateStringEnd;
+	while (graphemeCount != 0
+		&& candidateLastSection != aCandidateStringStart)
+		{
+        if (::IsBaseCharacter(*--candidateLastSection))
+			--graphemeCount;
+		}
+	TUTF32Iterator last(candidateLastSection, aCandidateStringEnd);
+	TUTF32Iterator st(searchTermLastSection, aSearchTermEnd);
+	return ::MatchSectionFolded(last, st);
+	}
+
+/** 
+Implementation of MatchF
+(slow if there is a match: MatchStringFolded is better, but does not return
+the position of the match)
+@internalComponent
+*/
+TInt LocateMatchStringFolded(const TText16* aCandidateStringStart, const TText16* aCandidateStringEnd,
+                             const TText16* aSearchTermStart, const TText16* aSearchTermEnd)
+	{
+	// Quick shortcut for simple non-match
+	if (aSearchTermStart != aSearchTermEnd && *aSearchTermStart != '*')
+		{
+		if (aCandidateStringStart == aCandidateStringEnd)
+			return KErrNotFound;
+		// We won't bother with non-characters and surrogate pairs.
+		if (*aSearchTermStart != '?'
+			&& *aSearchTermStart < 0xD800
+			&& *aCandidateStringStart < 0xD800
+            && ::GetFirstFoldedChar(*aSearchTermStart) != ::GetFirstFoldedChar(*aCandidateStringStart))
+			return KErrNotFound;
+		}
+    if (!::MatchStringFolded(aCandidateStringStart, aCandidateStringEnd,
+		aSearchTermStart, aSearchTermEnd))
+		return KErrNotFound;
+	// find where it matches
+	while (aSearchTermStart != aSearchTermEnd && *aSearchTermStart == '*')
+		++aSearchTermStart;
+	const TText16* end = aSearchTermStart;
+	while (end != aSearchTermEnd && *end != '*')
+		++end;
+	// To preserve existing behaviour, a search term consisting of nothing
+	// but stars is considered to match at 0.
+	if (end == aSearchTermStart)
+		return 0;
+	for (const TText16* csSection = aCandidateStringStart;
+		csSection <= aCandidateStringEnd; ++csSection)
+		{
+		TUTF32Iterator cs(csSection, aCandidateStringEnd);
+		TUTF32Iterator st(aSearchTermStart, end);
+        if (::MatchSectionFolded(cs, st))
+			{
+			// If this match must match exactly at the end, we must keep
+			// going.
+			// This could be a lot faster, with some optimizations.
+			if (end == aSearchTermEnd)
+				{
+				if (!cs.AtEnd())
+					continue;
+				}
+			return csSection - aCandidateStringStart;
+			}
+        // Because this function is using TUTF32Iterator, which means the
+        // original author want to support surrogate. Take it as a defect and
+        // fix it, while do not define a new LocateMatchStringFoldedSurrogate().
+        if (IsSurrogate(*csSection))
+        	++csSection;
+		}
+	// this should never happen!
+	ASSERT(0);
+	return KErrNotFound;
+	}
+
+/** 
+Implementation of FindF
+@internalComponent
+*/
+TInt FindFolded(TUTF32Iterator& aCandidateString, TUTF32Iterator& aSearchTerm)
+	{
+    //Empty aSearchTerm string? - Then return 0 - keep the new implementation functionally 
+    //compatible with the old one.
+    if(aSearchTerm.Length() == 0)
+        {
+        return 0;
+        }
+	const TText16* candidateStartPosition = aCandidateString.CurrentPosition();
+    TChar firstCharOfSearchTerm = ::GetFirstFoldedChar(aSearchTerm.Current());
+	TUTF32Iterator savedST(aSearchTerm);
+	while (aCandidateString.LocateFoldedBaseCharacter(firstCharOfSearchTerm))
+		{
+		TUTF32Iterator savedCS = aCandidateString;
+        TEndTester endTester(TEndTester::EGenuine);
+        if (::ConsumeFoldedMatchWholeGraphemes(aCandidateString, aSearchTerm, endTester))
+			return savedCS.CurrentPosition() - candidateStartPosition;
+		aSearchTerm = savedST;
+		aCandidateString = savedCS;
+		aCandidateString.Next();
+		}
+	return KErrNotFound;
+	}
+
+/** 
+Implementation of boolean CompareF
+@internalComponent
+*/
+TBool EqualsFolded(TUTF32Iterator& aLeft, TUTF32Iterator& aRight)
+	{
+    TEndTester endTester(TEndTester::EGenuine);
+    if (::ConsumeFoldedMatchWholeGraphemes(aLeft, aRight, endTester))
+		return aLeft.AtEnd();
+	return EFalse;
+	}
+
+/** 
+Implementation of tri-state CompareF
+@internalComponent
+*/
+TInt CompareFolded(const TUTF32Iterator& aLeft, const TUTF32Iterator& aRight)
+	{
+	TFoldedCanonicalIterator left(aLeft);
+	TFoldedCanonicalIterator right(aRight);
+
+	const TUnicodeDataSet* charDataSet = GetLocaleCharSet()->iCharDataSet;
+
+	while (!left.AtEnd())
+		{
+		if (right.AtEnd())
+			return 1;
+		TChar cr = right.Current();
+		TChar cl = left.Current();
+		if (cr != cl)
+			return cl - cr;
+        right.Next(charDataSet);
+        left.Next(charDataSet);
+		}
+	return right.AtEnd()? 0 : -1;
+	}
+
+////////////////////////////////////////////////////////////////////////////////////////////
+// Composition/Decomposition - Global functions and structures
+////////////////////////////////////////////////////////////////////////////////////////////
+
+/** 
+@internalComponent
+*/
+static TUTF32Iterator IndexToNonSingletonDecomposition(TInt aIndex)
+	{
+	const TText* start = KNonSingletonDecompositions + aIndex * 2;
+	if (*start != KLongD)
+		return TUTF32Iterator(start, start + 2, TUTF32Iterator::EStartsWithValidCharacter);
+	TInt longDecompIndex = start[1];
+	start = KLongDecompositions + (longDecompIndex & 0xFFF);
+	return TUTF32Iterator(start, start + (longDecompIndex >> 12) + 3, TUTF32Iterator::EStartsWithValidCharacter);
+	}
+
+/** 
+Come up with a decomposition for the current value pointed at by the iterator
+@internalComponent
+*/
+static TBool Decompose(const TUTF32Iterator& aUTF32It, TUTF32Iterator& aOutIt)
+	{
+    TInt index = ::GetDecompositionIndex(aUTF32It.Current());
+	TInt singletonIndex = index - (sizeof(KNonSingletonDecompositions)/sizeof(KNonSingletonDecompositions[0])/2);
+	const TInt SizeOfSingletonTable = sizeof(KSingletonDecompositions)/sizeof(KSingletonDecompositions[0]);
+	if(index < 0 || SizeOfSingletonTable <= singletonIndex)
+        {
+        aOutIt = aUTF32It.CurrentAsIterator();
+		return EFalse;//There is not any valid decomposition
+        }
+	if(0 <= singletonIndex)
+        {
+        // KSingletonDecompositions contains some items that come from "ShortDecompsLongFolds".
+        // "ShortDecompsLongFolds" contains some characters that have no composition, but have "long fold".
+        // This basically specific to non-BMP characters with fold also outside BMP.
+        // For example:
+        // 10400;DESERET CAPITAL LETTER LONG I;Lu;0;L;;;;;N;;;;10428;
+        // In Unicode 5.0.0, totally, there are 40 similar characters, which are U+10400-U+10427.
+        if (KSingletonDecompositions[singletonIndex] == 0xFFFF)
+        	return EFalse;
+		aOutIt = TUTF32Iterator(KSingletonDecompositions + singletonIndex);
+        }
+    else
+        {
+        aOutIt = ::IndexToNonSingletonDecomposition(index);
+        }
+    return ETrue;//Valid decomposition
+	}
+
+/** 
+@internalComponent
+*/
+static TUTF32Iterator CharToUTF32Iterator(TChar aChar, TDes16& aBuf)
+	{
+	aBuf.Zero();
+	if (aChar < 0x10000)
+		aBuf.Append(aChar);
+	else
+		{
+		aBuf.Append((aChar >> 10) + 0xD7C0);
+		aBuf.Append((aChar & 0x3FF) + 0xDC00);
+		}
+	const TText16* t = aBuf.Ptr();
+	return TUTF32Iterator(t, t + aBuf.Length());
+	}
+
+/** 
+@internalComponent
+*/
+TBool DecomposeChar(TChar aCh, TPtrC16& aResult)
+    {
+    aResult.Set(NULL, 0);
+    TBuf16<2> srcBuf;
+    TUTF32Iterator it = ::CharToUTF32Iterator(aCh, srcBuf);
+    TBool res = ::Decompose(it, it);
+    if(res)
+        {
+        aResult.Set(it.CurrentPosition(), it.Length());
+        }
+    return res;
+    }
+
+/** 
+Turn an index into the hash table known to point to a non-singleton
+decomposition into that decomposition.
+@internalComponent
+*/
+static TUTF32Iterator HashIndexToDecomposition(TInt aIndex)
+	{
+	TUint32 v = KUnicodeToIndexHash[aIndex];
+	ASSERT(v != 0);
+	TInt index = static_cast<TInt>(v >> 20);
+	ASSERT(index < ARRAY_LENGTH(KNonSingletonDecompositions)/2);
+    return ::IndexToNonSingletonDecomposition(index);
+	}
+
+/**
+Takes a start and (one past the) end index into KCompostionMapping
+and a number of UTF16 characters (aLengthSoFar). All of the compositions
+within the range must have their first aLengthSoFar UTF16 characters
+matching.
+
+On entry, if aStart == aEnd then there is no possibility of a match so return
+immediately with EFalse. To continue, aStart must be strictly less than aEnd.
+
+Afterwards, aStart and aEnd will be narrowed to all those compositions
+where the aLengthSoFar'th UTF16 character matches aNextCharacter.
+No further compositions existing is indicated by aStart == aEnd.
+
+@return ETrue if the composition at aStart is exactly of length aLengthSoFar + 1.
+@internalComponent
+*/
+static TBool RefineComposition(TInt& aStart, TInt& aEnd, TInt aLengthSoFar, TInt aNextCharacter)
+	{
+	if (aStart == aEnd)
+		return EFalse;
+	ASSERT((TUint)aStart < (TUint)aEnd);
+ 	ASSERT((TUint)aEnd <= (TUint)ARRAY_LENGTH(KCompositionMapping));
+    TUTF32Iterator startIterator(::HashIndexToDecomposition(KCompositionMapping[aStart]));
+	if (startIterator.Length() == aLengthSoFar)
+		++aStart;
+
+	// Find a single example of a decomposition that is suitable
+	TInt mid;
+	TUTF32Iterator midIt;
+	for (;;)
+		{
+		if (aStart == aEnd)
+			return EFalse;
+		mid = aStart + ((aEnd - aStart) >> 1);
+        midIt = ::HashIndexToDecomposition(KCompositionMapping[mid]);
+		ASSERT(aLengthSoFar < midIt.Length());
+		TInt midItChar = midIt[aLengthSoFar];
+		if (midItChar < aNextCharacter)
+			aStart = mid + 1;
+		else if (aNextCharacter < midItChar)
+			aEnd = mid;
+		else
+			{
+			startIterator = midIt;
+			break;
+			}
+		}
+
+	// FInd the first decomposition that does not match
+	TInt start2 = mid + 1;
+	while (start2 != aEnd)
+		{
+		ASSERT(start2 < aEnd);
+		TInt mid2 = start2 + ((aEnd - start2) >> 1);
+        midIt = ::HashIndexToDecomposition(KCompositionMapping[mid2]);
+		ASSERT(aLengthSoFar < midIt.Length());
+		TInt midItChar = midIt[aLengthSoFar];
+		ASSERT(aNextCharacter <= midItChar);
+		if (aNextCharacter < midItChar)
+			aEnd = mid2;
+		else
+			start2 = mid2 + 1;
+		}
+
+	// Find the first decomposition that matches
+	while (aStart != mid)
+		{
+		ASSERT(aStart < mid);
+		TInt mid2 = aStart + ((mid - aStart) >> 1);
+        midIt = ::HashIndexToDecomposition(KCompositionMapping[mid2]);
+		ASSERT(aLengthSoFar < midIt.Length());
+		TInt midItChar = midIt[aLengthSoFar];
+		ASSERT(midItChar <= aNextCharacter);
+		if (midItChar < aNextCharacter)
+			aStart = mid2 + 1;
+		else
+			{
+			startIterator = midIt;
+			mid = mid2;
+			}
+		}
+
+	return startIterator.Length() == (aLengthSoFar + 1);
+	}
+
+/** 
+@internalComponent
+*/
+static TBool RefineCompositionUTF32(TInt& aStart, TInt& aEnd, TInt& aLengthSoFar, TChar aChar)
+	{
+	if (aChar < 0x10000)
+        return ::RefineComposition(aStart, aEnd, aLengthSoFar++, aChar);
+    ::RefineComposition(aStart, aEnd, aLengthSoFar++, (aChar >> 10) + 0xD7C0);
+	if (aStart == aEnd)
+		return EFalse;
+    return ::RefineComposition(aStart, aEnd, aLengthSoFar++, (aChar & 0x3FF) + 0xDC00);
+	}
+
+/**
+Combine as many of the characters presented as possible into a single
+character.
+@return The number of characters successfully combined.
+@param aCombined If a nonzero value is returned, this contains
+            the character that is that number of characters from the start of
+            aDes combined.
+@internalComponent
+*/
+TInt CombineAsMuchAsPossible(const TDesC16& aDes, TChar& aCombined)
+	{
+	TInt start = 0;
+	TInt end = sizeof(KCompositionMapping)/sizeof(KCompositionMapping[0]);
+	TInt length = 0;
+	TInt bestIndex = 0;
+	TInt bestLength = 0;
+	const TText16* ptr = aDes.Ptr();
+	TUTF32Iterator input(ptr, ptr + aDes.Length());
+	while (!input.AtEnd())
+		{
+        if (::RefineCompositionUTF32(start, end, length, input.Current()))
+			{
+			bestIndex = start;
+			bestLength = length;
+			}
+		input.Next();
+		}
+	if (bestLength == 0)
+		return 0;
+	aCombined = KUnicodeToIndexHash[KCompositionMapping[bestIndex]] & 0xFFFFF;
+	return bestLength;
+	}
+
+//////////////////////////////////////////////////////////////////////////////////////////////
+// COLLATION
+//////////////////////////////////////////////////////////////////////////////////////////////
+
+//////////////////////////////////////////////////////////////////////////////////////////////
+// TDecompositionIterator class
+
+/** 
+@internalComponent
+*/
+void TDecompositionIterator::Set(const TUTF32Iterator& a)
+	{
+	iBase = a;
+	if (!iBase.AtEnd())
+        {
+        (void)::Decompose(iBase, iDecomposition);
+        }
+	}
+
+/** 
+@internalComponent
+*/
+TDecompositionIterator::TDecompositionIterator(const TUTF32Iterator& a)
+	{
+	Set(a);
+	}
+
+/** 
+@internalComponent
+*/
+TBool TDecompositionIterator::AtEnd() const
+	{
+	return iBase.AtEnd();
+	}
+
+/** 
+@internalComponent
+*/
+TChar TDecompositionIterator::Current() const
+	{
+	return iDecomposition.Current();
+	}
+
+/** 
+@internalComponent
+*/
+void TDecompositionIterator::Next()
+	{
+	ASSERT(!iBase.AtEnd() && !iDecomposition.AtEnd());
+	iDecomposition.Next();
+	if (!iDecomposition.AtEnd())
+		return;
+	iBase.Next();
+	if (!iBase.AtEnd())
+        {
+        (void)::Decompose(iBase, iDecomposition);
+        }
+	}
+
+/** 
+@internalComponent
+*/
+const TText16* TDecompositionIterator::CurrentPosition() const
+	{
+	return iBase.CurrentPosition();
+	}
+
+/** 
+Find out the length and minimum combining class of
+the current run of characters of nonzero combining class.
+aMinClass and aMinClassPos are not written to if the return
+value is 0.
+aEndOfRun is written to with the final position of the iteration
+if 0 is returned and aEndOfRun is non-null
+@internalComponent
+*/
+static TInt ReorderingRun(TInt& aMinClass, TDecompositionIterator& aMinClassPos,
+                          const TDecompositionIterator& aStart, TBool* aOpenSequence, 
+                          TInt aMaxDisallowedClass = 0, TDecompositionIterator* aEndOfRun = 0)
+	{
+	TInt comclass = aStart.AtEnd()? 0 : aStart.Current().GetCombiningClass();
+	if (comclass == 0)
+		{
+		if (aEndOfRun)
+			*aEndOfRun = aStart;
+		if (aOpenSequence)
+			*aOpenSequence = aStart.AtEnd();
+		return 0;
+		}
+	aMinClass = 256;
+	TDecompositionIterator i = aStart;
+	TInt count = 0;
+	while (comclass != 0)
+		{
+		if (aMaxDisallowedClass < comclass)
+			{
+			if (comclass < aMinClass)
+				{
+				aMinClass = comclass;
+				aMinClassPos = i;
+				}
+			++count;
+			}
+		i.Next();
+		comclass = i.AtEnd()? 0 : i.Current().GetCombiningClass();
+		}
+	if (count == 0 && aEndOfRun)
+		*aEndOfRun = i;
+	if (aOpenSequence)
+		*aOpenSequence = i.AtEnd();
+	return count;
+	}
+
+//////////////////////////////////////////////////////////////////////////////////////////////
+// TCanonicalDecompositionIterator class
+
+/** 
+@internalComponent
+*/
+void TCanonicalDecompositionIterator::Set(const TUTF32Iterator& a)
+	{
+	iBase.Set(a);
+	iLastPosition = 0;
+	if (ReorderingRun(iCurrentCombiningClass, iCurrent, iBase, &iInOpenSequence) < 2)
+		iCurrentCombiningClass = 0;
+	}
+
+/** 
+@internalComponent
+*/
+TBool TCanonicalDecompositionIterator::AtEnd() const
+	{
+	return iBase.AtEnd();
+	}
+
+/** 
+@internalComponent
+*/
+TChar TCanonicalDecompositionIterator::Current() const
+	{
+	return iCurrentCombiningClass? iCurrent.Current() : iBase.Current();
+	}
+
+/** 
+@internalComponent
+*/
+void TCanonicalDecompositionIterator::Next()
+	{
+	iLastPosition = iBase.CurrentPosition();
+	if (iCurrentCombiningClass == 0)
+		{
+		iBase.Next();
+		if (ReorderingRun(iCurrentCombiningClass, iCurrent, iBase, &iInOpenSequence) < 2)
+			iCurrentCombiningClass = 0;
+		return;
+		}
+	// Find the next character in the run with the same combining class
+	iCurrent.Next();
+	TInt curclass = iCurrent.AtEnd()? 0 : iCurrent.Current().GetCombiningClass();
+	while (curclass != 0)
+		{
+		if (curclass == iCurrentCombiningClass)
+			// success
+			return;
+		iCurrent.Next();
+		curclass = iCurrent.AtEnd()? 0 : iCurrent.Current().GetCombiningClass();
+		}
+	// There are none left in the current class. Find out what the next one is.
+	if (0 == ReorderingRun(iCurrentCombiningClass, iCurrent, iBase, 0, iCurrentCombiningClass, &iBase))
+		iCurrentCombiningClass = 0;
+	}
+
+/** 
+@internalComponent
+*/
+const TText16* TCanonicalDecompositionIterator::CurrentPositionIfAtCharacter() const
+	{
+	if (iCurrentCombiningClass != 0)
+		return 0;
+	const TText16* p = iBase.CurrentPosition();
+	return iLastPosition == p? 0 : p;
+	}
+
+/** 
+@internalComponent
+*/
+TBool TCanonicalDecompositionIterator::IsInOpenSequence() const
+	{
+	return iInOpenSequence;
+	}
+
+//////////////////////////////////////////////////////////////////////////////////////////////
+// TCanonicalDecompositionIteratorCached class
+
+/** 
+@internalComponent
+*/
+void TCanonicalDecompositionIteratorCached::Set(const TUTF32Iterator& a)
+	{
+	iBase.Set(a);
+	iCacheStart = 0;
+	iCacheSize = 0;
+	}
+
+/** 
+@internalComponent
+*/
+TBool TCanonicalDecompositionIteratorCached::AtEnd() const
+	{
+	return iCacheSize == 0 && iBase.AtEnd();
+	}
+
+/** 
+@internalComponent
+*/
+void TCanonicalDecompositionIteratorCached::Next(TInt aOffset)
+	{
+	ASSERT(0 <= aOffset);
+	ASSERT(0 <= iCacheSize);
+	ASSERT(0 != iCacheSize || !iBase.AtEnd());
+	if (aOffset <= iCacheSize)
+		{
+		iCacheSize -= aOffset;
+		iCacheStart = (iCacheStart + aOffset) & (KMaxLookAhead - 1);
+		return;
+		}
+	aOffset -= iCacheSize;
+	iCacheSize = 0;
+	while (aOffset != 0)
+		{
+		iBase.Next();
+		--aOffset;
+		}
+	}
+
+/** 
+Get the character at the position of the iterator plus aOffset steps.
+Returns -1 if we are looking too far ahead.
+@internalComponent
+*/
+TChar TCanonicalDecompositionIteratorCached::Get(TInt aOffset)
+	{
+	// should be assert debug: there is a chance this could go off with
+	// bad collation tables
+	ASSERT(aOffset <= KMaxLookAhead);
+	while (iCacheSize <= aOffset)
+		{
+		if (iBase.AtEnd())
+			return TChar(static_cast <TUint> (-1));
+		TInt cachePos = (iCacheStart + iCacheSize) & (KMaxLookAhead - 1);
+		iCache[cachePos].iChar = iBase.Current();
+		iCache[cachePos].iPos = iBase.CurrentPositionIfAtCharacter();
+		++iCacheSize;
+		iBase.Next();
+		}
+	return iCacheSize == aOffset? iBase.Current() : iCache[(iCacheStart + aOffset) & (KMaxLookAhead - 1)].iChar;
+	}
+
+/** 
+If the current position in the original string is representable
+as a pointer into it and we know what it is, return it.
+@internalComponent
+*/
+const TText16* TCanonicalDecompositionIteratorCached::CurrentPositionIfAtCharacter() const
+	{
+	if(iCacheSize == 0)
+		{
+		return iBase.CurrentPositionIfAtCharacter();
+		}
+	return iCache[iCacheStart & (KMaxLookAhead - 1)].iPos;
+	}
+