fontservices/textbase/sgdi/LinebreakComplex.cpp
changeset 45 662fa7de7023
equal deleted inserted replaced
41:ea44a32a96bc 45:662fa7de7023
       
     1 // Copyright (c) 2003-2010 Nokia Corporation and/or its subsidiary(-ies).
       
     2 // All rights reserved.
       
     3 // This component and the accompanying materials are made available
       
     4 // under the terms of "Eclipse Public License v1.0"
       
     5 // which accompanies this distribution, and is available
       
     6 // at the URL "http://www.eclipse.org/legal/epl-v10.html".
       
     7 //
       
     8 // Initial Contributors:
       
     9 // Nokia Corporation - initial contribution.
       
    10 //
       
    11 // Contributors:
       
    12 //
       
    13 // Description:
       
    14 //
       
    15 
       
    16 
       
    17 #include <e32std.h>
       
    18 #include <e32svr.h>
       
    19 #include "LineBreak.h"
       
    20 #include "LineBreakImp.h"
       
    21 #include "GlyphSel.h"
       
    22 
       
    23 const TText16 KThaiCodePageStart = 0x0E00;
       
    24 const TText16 KThaiCodePageEnd   = 0x0E5C;
       
    25 const TUint KNumThaiCharacters = KThaiCodePageEnd - KThaiCodePageStart;
       
    26 
       
    27 /**
       
    28  Ecanpsulates rules for when Thai character sequence line breaking.
       
    29 @internalComponent
       
    30 */
       
    31 class ThaiLinebreakRules
       
    32 	{
       
    33 public:
       
    34 	enum TCharClassification
       
    35 		{
       
    36 		EOutOfRange,
       
    37 
       
    38 		EConsOAng,
       
    39 		EConsYoYak,
       
    40 		EConsHoHip,
       
    41 		EConsWoWean,
       
    42 		EConsDigraph,
       
    43 		EConsOther,
       
    44 
       
    45 		EPostVowelA,
       
    46 		EPostVowelAA,
       
    47 		EPostVowelOther,
       
    48 		EPreVowel,
       
    49 
       
    50 		EDepMaiHanAkat,
       
    51 		EDepSaraI,
       
    52 		EDepOther,
       
    53 
       
    54 		// marker for end
       
    55 		EMaxClassification
       
    56 		};
       
    57 	enum
       
    58 		{
       
    59 		KOutOfRangeFlag = 1 << EOutOfRange,
       
    60 		KConsOAngFlag = 1 << EConsOAng,
       
    61 		KConsYoYakFlag = 1 << EConsYoYak,
       
    62 		KConsHoHipFlag = 1 << EConsHoHip,
       
    63 		KConsWoWeanFlag = 1 << EConsWoWean,
       
    64 		KConsDigraphFlag = 1 << EConsDigraph,
       
    65 		KConsOtherFlag = 1 << EConsOther,
       
    66 		KPostVowelAFlag = 1 << EPostVowelA,
       
    67 		KPostVowelAAFlag = 1 << EPostVowelAA,
       
    68 		KPostVowelOtherFlag = 1 << EPostVowelOther,
       
    69 		KPreVowelFlag = 1 << EPreVowel,
       
    70 		KDepMaiHanAkatFlag = 1 << EDepMaiHanAkat,
       
    71 		KDepSaraIFlag = 1 << EDepSaraI,
       
    72 		KDepOtherFlag = 1 << EDepOther,
       
    73 
       
    74 		KSpecialDepFlags = KDepMaiHanAkatFlag | KDepSaraIFlag,
       
    75 		KPostVowelFlags = KPostVowelAFlag | KPostVowelAAFlag | KPostVowelOtherFlag,
       
    76 		KConsFlags = KConsOtherFlag | KConsDigraphFlag | KConsWoWeanFlag
       
    77 			| KConsHoHipFlag | KConsYoYakFlag | KConsOAngFlag,
       
    78 		KAllFlags = KOutOfRangeFlag | KConsOAngFlag | KConsYoYakFlag
       
    79 			| KConsHoHipFlag | KConsWoWeanFlag | KConsDigraphFlag
       
    80 			| KConsOtherFlag | KPostVowelAFlag | KPostVowelAAFlag
       
    81 			| KPostVowelOtherFlag | KPreVowelFlag | KDepMaiHanAkatFlag
       
    82 			| KDepSaraIFlag | KDepOtherFlag
       
    83 		};
       
    84 
       
    85 	/** Returns the Thai linebreaking class of the character. */
       
    86 	static TCharClassification Class(TInt aChar);
       
    87 	/** Returns true if aChar is a combining character according to aBreaker. */
       
    88 	static TBool IsCombiningChar(TInt aChar, const MLineBreaker& aBreaker);
       
    89 	/** Returns the Thai linebreaking class of the character at (*aChar),
       
    90 	searching backwards for the base character if it is foreign and of type CM.
       
    91 	@param aChar The position of the character within the string.
       
    92 	@param aStart The start of the string.
       
    93 	@param aBreaker The line breaker to query for line breaking class.
       
    94 	*/
       
    95 	static TCharClassification DerivedClass(const TText* aChar,
       
    96 		const TText* aStart, const MLineBreaker& aBreaker);
       
    97 	/**
       
    98 	 Gets the line break rule for the previous and current character pair
       
    99 	@param aPrevClass Class of the previous character.
       
   100 	@param aClass Class of the current character.
       
   101 	@return the rule code corresponding to the input pair.
       
   102 	*/
       
   103 	static TBool BreakAllowedBetween(
       
   104 		TCharClassification aPrevClass, TCharClassification aClass);
       
   105 	/** Returns whether a line break is allowed before the SA character at
       
   106 	(*aChar).
       
   107 	@param aChar The position of the character within the string.
       
   108 	@param aStart The start of the string.
       
   109 	@param aBreaker The line breaker to query for line breaking class.
       
   110 	*/
       
   111 	static TBool BreakAllowedAt(const TText* aChar,
       
   112 		const TText* aStart, const MLineBreaker& aBreaker);
       
   113 
       
   114 	/**
       
   115 	 Classification of each character
       
   116 	*/
       
   117 	static const TUint8 KCharClassifications[KNumThaiCharacters];
       
   118 
       
   119 	/**
       
   120 	 Rules table of prev to next character
       
   121 	*/
       
   122 	static const TUint32 KRules[EMaxClassification];
       
   123 	};
       
   124 
       
   125 const TUint8 ThaiLinebreakRules::KCharClassifications[KNumThaiCharacters] = 
       
   126 	{
       
   127 	EOutOfRange,	// 0x0E00
       
   128 	EConsOther,		// 0x0E01
       
   129 	EConsOther,		// 0x0E02
       
   130 	EConsOther,		// 0x0E03
       
   131 	EConsOther,		// 0x0E04
       
   132 	EConsOther,		// 0x0E05
       
   133 	EConsOther,		// 0x0E06
       
   134 	EConsDigraph,	// 0x0E07
       
   135 	EConsOther,		// 0x0E08
       
   136 	EConsOther,		// 0x0E09
       
   137 	EConsOther,		// 0x0E0A
       
   138 	EConsOther,		// 0x0E0B
       
   139 	EConsOther,		// 0x0E0C
       
   140 	EConsOther,		// 0x0E0D
       
   141 	EConsOther,		// 0x0E0E
       
   142 	EConsOther,		// 0x0E0F
       
   143 
       
   144 	EConsOther,		// 0x0E10
       
   145 	EConsOther,		// 0x0E11
       
   146 	EConsOther,		// 0x0E12
       
   147 	EConsOther,		// 0x0E13
       
   148 	EConsOther,		// 0x0E14
       
   149 	EConsOther,		// 0x0E15
       
   150 	EConsOther,		// 0x0E16
       
   151 	EConsOther,		// 0x0E17
       
   152 	EConsOther,		// 0x0E18
       
   153 	EConsDigraph,	// 0x0E19
       
   154 	EConsOther,		// 0x0E1A
       
   155 	EConsOther,		// 0x0E1B
       
   156 	EConsOther,		// 0x0E1C
       
   157 	EConsOther,		// 0x0E1D
       
   158 	EConsOther,		// 0x0E1E
       
   159 	EConsOther,		// 0x0E1F
       
   160 
       
   161 	EConsOther,		// 0x0E20
       
   162 	EConsDigraph,	// 0x0E21
       
   163 	EConsYoYak,		// 0x0E22
       
   164 	EConsDigraph,	// 0x0E23
       
   165 	EConsOther,		// 0x0E24
       
   166 	EConsDigraph,	// 0x0E25
       
   167 	EConsOther,		// 0x0E26
       
   168 	EConsWoWean,	// 0x0E27
       
   169 	EConsOther,		// 0x0E28
       
   170 	EConsOther,		// 0x0E29
       
   171 	EConsOther,		// 0x0E2A
       
   172 	EConsHoHip,		// 0x0E2B
       
   173 	EConsOther,		// 0x0E2C
       
   174 	EConsOAng,		// 0x0E2D
       
   175 	EConsOther,		// 0x0E2E
       
   176 	EOutOfRange,	// 0x0E2F
       
   177 
       
   178 	EPostVowelA,	// 0x0E30
       
   179 	EDepMaiHanAkat,	// 0x0E31
       
   180 	EPostVowelAA,	// 0x0E32
       
   181 	EPostVowelOther,// 0x0E33
       
   182 
       
   183 	EDepSaraI,		// 0x0E34
       
   184 	EDepOther,		// 0x0E35
       
   185 	EDepOther,		// 0x0E36
       
   186 	EDepOther,		// 0x0E37
       
   187 	EDepOther,		// 0x0E38
       
   188 	EDepOther,		// 0x0E39
       
   189 	EDepOther,		// 0x0E3A
       
   190 
       
   191 	EOutOfRange,	// 0x0E3B
       
   192 	EOutOfRange,	// 0x0E3C
       
   193 	EOutOfRange,	// 0x0E3D
       
   194 	EOutOfRange,	// 0x0E3E
       
   195 	EOutOfRange,	// 0x0E3F
       
   196 
       
   197 	EPreVowel,		// 0x0E40
       
   198 	EPreVowel,		// 0x0E41
       
   199 	EPreVowel,		// 0x0E42
       
   200 	EPreVowel,		// 0x0E43
       
   201 	EPreVowel,		// 0x0E44
       
   202 
       
   203 	EPostVowelOther,// 0x0E45
       
   204 	EOutOfRange,	// 0x0E46
       
   205 
       
   206 	EDepOther,		// 0x0E47
       
   207 	EDepOther,		// 0x0E48
       
   208 	EDepOther,		// 0x0E49
       
   209 	EDepOther,		// 0x0E4A
       
   210 	EDepOther,		// 0x0E4B
       
   211 	EDepOther,		// 0x0E4C
       
   212 	EDepOther,		// 0x0E4D
       
   213 	EDepOther,		// 0x0E4E
       
   214 	EOutOfRange,	// 0x0E4F
       
   215 
       
   216 	EOutOfRange,		// 0x0E50
       
   217 	EOutOfRange,		// 0x0E51
       
   218 	EOutOfRange,		// 0x0E52
       
   219 	EOutOfRange,		// 0x0E53
       
   220 	EOutOfRange,		// 0x0E54
       
   221 	EOutOfRange,		// 0x0E55
       
   222 	EOutOfRange,		// 0x0E56
       
   223 	EOutOfRange,		// 0x0E57
       
   224 	EOutOfRange,		// 0x0E58
       
   225 	EOutOfRange,		// 0x0E59
       
   226 	EOutOfRange,		// 0x0E5A
       
   227 	EOutOfRange			// 0x0E5B
       
   228 	};
       
   229 
       
   230 const TUint32 KNormalBreaksBeforeCons =
       
   231 	ThaiLinebreakRules::KPreVowelFlag
       
   232 	| ThaiLinebreakRules::KConsOtherFlag
       
   233 	| ThaiLinebreakRules::KConsDigraphFlag
       
   234 	| ThaiLinebreakRules::KConsHoHipFlag
       
   235 	| ThaiLinebreakRules::KOutOfRangeFlag;
       
   236 const TUint32 KNormalBreaksBeforePostVowel =
       
   237 	ThaiLinebreakRules::KPreVowelFlag
       
   238 	| ThaiLinebreakRules::KPostVowelFlags
       
   239 	| ThaiLinebreakRules::KConsFlags
       
   240 	| ThaiLinebreakRules::KOutOfRangeFlag;
       
   241 const TUint32 ThaiLinebreakRules::KRules[EMaxClassification] =
       
   242 	{
       
   243 	/* Prev Char EOutOfRange */
       
   244 	KAllFlags - KOutOfRangeFlag,
       
   245 	/* Prev Char EConsOAng */
       
   246 	KNormalBreaksBeforeCons,
       
   247 	/* Prev Char EConsYoYak */
       
   248 	KNormalBreaksBeforeCons,
       
   249 	/* Prev Char EConsHoHip */
       
   250 	KNormalBreaksBeforeCons - KConsDigraphFlag,
       
   251 	/* Prev Char EConsWoWean */
       
   252 	KNormalBreaksBeforeCons - KConsDigraphFlag - KConsHoHipFlag,
       
   253 	/* Prev Char EConsDigraph */
       
   254 	KNormalBreaksBeforeCons,
       
   255 	/* Prev Char EConsOther */
       
   256 	KNormalBreaksBeforeCons,
       
   257 	/* Prev Char EPostVowelA */
       
   258 	KNormalBreaksBeforePostVowel,
       
   259 	/* Prev Char EPostVowelAA */
       
   260 	KNormalBreaksBeforePostVowel - KPostVowelAFlag,
       
   261 	/* Prev Char EPostVowelOther */
       
   262 	KNormalBreaksBeforePostVowel,
       
   263 	/* Prev Char EPreVowel */
       
   264 	KPreVowelFlag | KPostVowelFlags | KOutOfRangeFlag,
       
   265 	/* Prev Char EDepMaiHanAkat */
       
   266 	KSpecialDepFlags | KPreVowelFlag | KPostVowelAAFlag
       
   267 		| KPostVowelAFlag | KOutOfRangeFlag,
       
   268 	/* Prev Char EDepSaraI */
       
   269 	KSpecialDepFlags | KPreVowelFlag | KPostVowelAAFlag
       
   270 		| KPostVowelAFlag | KOutOfRangeFlag,
       
   271 	/* Prev Char EDepOther */
       
   272 	KSpecialDepFlags | KPreVowelFlag | KPostVowelAAFlag
       
   273 		| KPostVowelAFlag | KConsOtherFlag | KConsDigraphFlag
       
   274 		| KConsWoWeanFlag | KConsHoHipFlag | KOutOfRangeFlag
       
   275 	};
       
   276 
       
   277 ThaiLinebreakRules::TCharClassification ThaiLinebreakRules::Class(TInt a)
       
   278     {
       
   279 	return static_cast<TCharClassification>(
       
   280 		(KThaiCodePageStart <= a && a < KThaiCodePageEnd) ?
       
   281 			KCharClassifications[a - KThaiCodePageStart] :
       
   282 			EOutOfRange);
       
   283     }
       
   284 
       
   285 TBool ThaiLinebreakRules::IsCombiningChar(TInt aChar,
       
   286 	const MLineBreaker& aBreaker)
       
   287 	{
       
   288 	TUint dummy1, dummy2;
       
   289 	return aBreaker.LineBreakClass(aChar, dummy1, dummy2) == MLineBreaker::ECmLineBreakClass;
       
   290 	}
       
   291 
       
   292 ThaiLinebreakRules::TCharClassification ThaiLinebreakRules::DerivedClass(
       
   293 	const TText* aChar, const TText* aStart, const MLineBreaker& aBreaker)
       
   294 	{
       
   295 	ThaiLinebreakRules::TCharClassification c = Class(*aChar);
       
   296 	while (c == EOutOfRange && aChar != aStart
       
   297 		&& IsCombiningChar(*aChar, aBreaker))
       
   298 		{
       
   299 		--aChar;
       
   300 		c = Class(*aChar);
       
   301 		}
       
   302 	return c;
       
   303 	}
       
   304 
       
   305 TBool ThaiLinebreakRules::BreakAllowedBetween(
       
   306 	ThaiLinebreakRules::TCharClassification aClass,
       
   307 	ThaiLinebreakRules::TCharClassification aNextClass)
       
   308 	{
       
   309 	return KRules[aClass] & (1 << aNextClass);
       
   310 	}
       
   311 
       
   312 TBool ThaiLinebreakRules::BreakAllowedAt(const TText* aChar,
       
   313 	const TText* aStart, const MLineBreaker& aBreaker)
       
   314 	{
       
   315 	__ASSERT_DEBUG(aStart < aChar, User::Invariant());
       
   316 	TCharClassification c = Class(*aChar);
       
   317 	if (c == EOutOfRange && IsCombiningChar(*aChar, aBreaker))
       
   318 		return EFalse;
       
   319 	return BreakAllowedBetween(DerivedClass(aChar - 1, aStart, aBreaker), c);
       
   320 	}
       
   321 
       
   322 /**
       
   323  Returns whether a line break is possible within a run of characters all having
       
   324  the class ESaLineBreakClass (Complex content). Languages with Unicocde
       
   325  characters having such a class include: Thai, Lao, Myanmar and Khmer. This
       
   326  default implementation of the GetLineBreakInContext() method only supports the
       
   327  Thai script. Breaks are determined in Thai based on a simple understanding of
       
   328  syllable boundaries. When characters from the other unsupported Sa class
       
   329  languages are found the method exits with EFalse.
       
   330 @param aText
       
   331  The text to be searched, which is a contiguous run of characters of class SA
       
   332  (or CM attatched to SA). The break position may be restricted further by
       
   333  aMinBreakPos and aMaxBreakPos, but more text is provided for extra context
       
   334  should it be needed.
       
   335 @param aMinBreakPos
       
   336  The start of the text to be considered for line breaks.
       
   337 @param aMaxBreakPos
       
   338  The end of the text to be considered for line breaks.
       
   339 @param aForwards
       
   340  ETrue if aBreakPos is to be set with the first legal break position,
       
   341  EFalse if aBreakPos is to be set with the last legal break position.
       
   342 @param aBreakPos
       
   343  If break position found on exit its value is >= Min and <= Max 
       
   344  positions supplied.
       
   345 @return TBool
       
   346  ETrue if and only if a legal break was found, EFalse otherwise.
       
   347 @publishedAll
       
   348 @released
       
   349 */
       
   350 EXPORT_C TBool MLineBreaker::GetLineBreakInContext(const TDesC16& aText,
       
   351 		TInt aMinBreakPos, TInt aMaxBreakPos, TBool aForwards,
       
   352 		TInt& aBreakPos) const
       
   353 	{
       
   354 	__ASSERT_DEBUG (0 <= aMinBreakPos && aMaxBreakPos <= aText.Length(), 
       
   355 			Panic(ELineBreakPanic_InvalidInputParam));
       
   356 
       
   357 	TInt length = aText.Length();
       
   358 
       
   359 	if (aMinBreakPos < 1)
       
   360 		aMinBreakPos = 1;
       
   361 	if (length - 1 < aMaxBreakPos)
       
   362 		aMaxBreakPos = length - 1;
       
   363 	if (aMaxBreakPos < aMinBreakPos)
       
   364 		return EFalse;
       
   365 
       
   366 	const TText16* text = aText.Ptr();
       
   367 
       
   368 	if (*text == KZeroWidthSpace)
       
   369 		{
       
   370 		aBreakPos = aMinBreakPos;
       
   371 		return ETrue;
       
   372 		}
       
   373 	else if (*(text+length-1) == KZeroWidthSpace)
       
   374 		return EFalse;
       
   375 	
       
   376 	TInt start = aForwards? aMinBreakPos : aMaxBreakPos;
       
   377 	TInt end = aForwards? aMaxBreakPos + 1 : aMinBreakPos - 1;
       
   378 	TInt direction = aForwards? 1 : -1;
       
   379 	for (TInt i = start; i != end; i += direction)
       
   380 		{
       
   381 		if (ThaiLinebreakRules::BreakAllowedAt(text + i, text, *this))
       
   382 			{
       
   383 			aBreakPos = i;
       
   384 			return ETrue;
       
   385 			}
       
   386 		}
       
   387 	return EFalse;
       
   388 	}