textrendering/texthandling/stext/TXTSCAN.CPP
changeset 0 1fb32624e06b
child 40 91ef7621b7fc
equal deleted inserted replaced
-1:000000000000 0:1fb32624e06b
       
     1 /*
       
     2 * Copyright (c) 2003-2009 Nokia Corporation and/or its subsidiary(-ies).
       
     3 * All rights reserved.
       
     4 * This component and the accompanying materials are made available
       
     5 * under the terms of "Eclipse Public License v1.0"
       
     6 * which accompanies this distribution, and is available
       
     7 * at the URL "http://www.eclipse.org/legal/epl-v10.html".
       
     8 *
       
     9 * Initial Contributors:
       
    10 * Nokia Corporation - initial contribution.
       
    11 *
       
    12 * Contributors:
       
    13 *
       
    14 * Description: 
       
    15 *
       
    16 */
       
    17 
       
    18 
       
    19 #include "TXTETEXT.H"
       
    20 #include "TXTSTD.H"
       
    21 
       
    22 
       
    23 EXPORT_C TInt CPlainText::CharPosOfParagraph(TInt& aLength,TInt aParaOffset)const
       
    24 /** Finds the length and the start position of a paragraph identified by its 
       
    25 paragraph number the first paragraph is numbered zero.
       
    26 
       
    27 Notes:
       
    28 
       
    29 if aParaOffset is invalid, (equal to or greater than the total number of 
       
    30 paragraphs), the function's return value is EScanEndOfData
       
    31 
       
    32 @param aLength On return contains the length of the specified paragraph. 
       
    33 @param aParaOffset The paragraph number. The first paragraph is numbered zero. 
       
    34 @return The document position of the first character in the paragraph. */
       
    35 	{
       
    36 	__TEST_INVARIANT;
       
    37 
       
    38 	TInt startPos=aLength=0;
       
    39 	TUint scanMask=(EScanToUnitStart);
       
    40 	aLength=ScanParas(startPos,scanMask);
       
    41 	for (TInt offset=1;offset<=aParaOffset;offset++)
       
    42 		{
       
    43 		if (startPos<=EScanEndOfData)
       
    44 			return EScanEndOfData;
       
    45 		aLength=ScanParas(startPos,scanMask);
       
    46 		}
       
    47 	if (startPos==EScanEndOfData)
       
    48 		startPos=iByteStore->Size()/sizeof(TText);
       
    49 	return startPos-aLength;
       
    50 	}
       
    51 
       
    52 EXPORT_C TInt CPlainText::ParagraphNumberForPos(TInt& aPos)const
       
    53 /** Gets the number of the paragraph which contains a document position. 
       
    54 Paragraph numbering begins at zero.
       
    55 
       
    56 @param aPos A document position. Must be valid or a panic occurs. On return, 
       
    57 contains the document position of the first character in the paragraph in 
       
    58 which it is located. 
       
    59 @return The number of the paragraph containing the specified document position.
       
    60 The first paragraph is numbered zero. */
       
    61 	{
       
    62 	__TEST_INVARIANT;
       
    63 
       
    64 	__ASSERT_ALWAYS(aPos>=0 && aPos<=DocumentLength(),Panic(ECharPosBeyondDocument));
       
    65 	TUint scanMask=(EScanBackwards|EScanStayIfBoundary|EScanToUnitStart);
       
    66 	ScanParas(aPos,scanMask);
       
    67 	// aPos now holds the 1st char of the paragraph.
       
    68 	TInt paraOffset=0,currentPos=0;
       
    69 	scanMask=EScanToUnitStart;
       
    70 	while (currentPos<aPos)
       
    71 		{// Move forwards, counting paragraphs until we get to the current position.
       
    72 		paraOffset++;
       
    73 		ScanParas(currentPos,scanMask);
       
    74 		}
       
    75 	return paraOffset;
       
    76 	}
       
    77 
       
    78 EXPORT_C void CPlainText::GetWordInfo(TInt aCurrentPos,TInt& aStartPos,TInt& aLength,
       
    79 									  TBool aPictureIsDelimiter,TBool aPunctuationIsDelimiter)const
       
    80 /** Gets the start position and length of the word that contains the document 
       
    81 position specified.
       
    82 
       
    83 @param aCurrentPos A document position. Must be valid or a panic occurs. 
       
    84 @param aStartPos On return, the document position of the first character in 
       
    85 the word containing the position aCurrentPos.
       
    86 @param aLength On return, the length of the word containing document position 
       
    87 aCurrentPos.
       
    88 @param aPictureIsDelimiter ETrue if picture characters should be treated as 
       
    89 word delimiters, false if not. For example, when navigating text, this might 
       
    90 be EFalse, but when spell checking, it might be ETrue. 
       
    91 @param aPunctuationIsDelimiter ETrue if punctuation characters should be treated 
       
    92 as word delimiters, EFalse if not. */
       
    93 	{
       
    94 	__TEST_INVARIANT;
       
    95 
       
    96 	__ASSERT_ALWAYS(aCurrentPos>=0 && aCurrentPos<=DocumentLength(),Panic(ECharPosBeyondDocument));
       
    97 	aStartPos=aLength=aCurrentPos;
       
    98 	// First find the start of the word.
       
    99 	TUint scanMask=(EScanBackwards|
       
   100 					EScanStayIfBoundary|
       
   101 					EScanToUnitStart);
       
   102 	if (aPictureIsDelimiter)
       
   103 		scanMask|=EScanPictureIsDelimiter;
       
   104 	if (aPunctuationIsDelimiter)
       
   105 		scanMask|=EScanPunctuationIsDelimiter;
       
   106 	ScanWords(aStartPos,scanMask);
       
   107 	// Next find the end of the word.
       
   108 	scanMask=(EScanStayIfBoundary|EScanToUnitEnd);
       
   109 	if (aPictureIsDelimiter)
       
   110 		scanMask|=EScanPictureIsDelimiter;
       
   111 	if (aPunctuationIsDelimiter)
       
   112 		scanMask|=EScanPunctuationIsDelimiter;
       
   113 	ScanWords(aLength,scanMask);
       
   114 	aLength-=aStartPos;
       
   115 	__ASSERT_DEBUG(aLength+aStartPos<=(DocumentLength()+1),Panic(ECharPosBeyondDocument));
       
   116 
       
   117 	__TEST_INVARIANT;
       
   118 	}
       
   119 
       
   120 EXPORT_C TInt CPlainText::ToParagraphStart(TInt& aPos) const
       
   121 /** Gets the document position of the start of the paragraph containing the 
       
   122 specified document position.
       
   123 
       
   124 @param aPos A document position. Must be valid or a panic occurs. On return, 
       
   125 contains the document position of the first character in the paragraph in 
       
   126 which it is located.
       
   127 @return The number of characters skipped in scanning to the start of the 
       
   128 paragraph. */
       
   129 	{
       
   130 	__TEST_INVARIANT;
       
   131 	__ASSERT_ALWAYS(aPos>=0 && aPos<=DocumentLength(),Panic(ECharPosBeyondDocument));
       
   132 
       
   133 	int skipped = 0;
       
   134 	while (aPos > 0)
       
   135 		{
       
   136 		TPtrC8 p = iByteStore->BackPtr(aPos * sizeof(TText));
       
   137 		const TText* start = (TText*)p.Ptr();
       
   138 		const TText* end = start + p.Length() / sizeof(TText);
       
   139 		while (end > start)
       
   140 			{
       
   141 			end--;
       
   142 			if (*end == EParagraphDelimiter)
       
   143 				return skipped;
       
   144 			aPos--;
       
   145 			skipped++;
       
   146 			}
       
   147 		}
       
   148 	return skipped;
       
   149 	}
       
   150 
       
   151 EXPORT_C TInt CPlainText::WordCount()const
       
   152 /** Gets a count of the number of words in the text object.
       
   153 
       
   154 @return The number of words in the text object. */
       
   155 	{return CountUnits(EUnitIsWord);}
       
   156 
       
   157 EXPORT_C TInt CPlainText::ParagraphCount()const
       
   158 /** Gets a count of the number of paragraphs in the text object.
       
   159 
       
   160 Note: the paragraph delimiter which terminates every text object means this 
       
   161 function always returns a count of at least one.
       
   162 
       
   163 @return The number of paragraphs contained in the text object. */
       
   164 	{//return CountUnits(EUnitIsParagraph);}
       
   165 	TInt pos=0,unitCount=0;
       
   166 	TUint scanMask=(EScanToUnitStart);
       
   167 	while (pos!=EScanEndOfData)
       
   168 		{
       
   169 		unitCount++;
       
   170 		ScanParas(pos,scanMask);
       
   171 		}
       
   172 	return unitCount;
       
   173 	}
       
   174 
       
   175 TInt CPlainText::CountUnits(TUnitOfText aContext)const
       
   176 // Returns the number of units in the document, where
       
   177 // units are defined by the parameter aContext.
       
   178 //
       
   179 	{
       
   180 	SScanData scanData;
       
   181 	TInt pos=0,unitCount=0;
       
   182 	TUint scanMask=(EScanToUnitStart|EScanJoinDelimiters|EScanStayIfBoundary);
       
   183 	InitScanControl(pos,scanMask,aContext,scanData);
       
   184 	ScanUnit(pos,aContext,scanData);
       
   185 	// The above code skips over leading white space
       
   186 	scanData.scanMask &=~ EScanStayIfBoundary;
       
   187 	while(pos!=EScanEndOfData)
       
   188 		{
       
   189 		unitCount++;
       
   190 		ScanUnit(pos,aContext,scanData);
       
   191 		}//	Count complete, so return pos to the beginning of the data
       
   192 	pos=0;
       
   193 	return unitCount;
       
   194 	}
       
   195 
       
   196 
       
   197 
       
   198 
       
   199 EXPORT_C TInt CPlainText::ScanWords(TInt& aPos,TUint& aScanMask)const
       
   200 /** Scans from a document position to the beginning or end of a word. The 
       
   201 destination is determined by a scan mask. The scan can either be forwards 
       
   202 (the default) or backwards, and the destination may be the first or last 
       
   203 character in the word containing the position, or the first character in 
       
   204 the next word.
       
   205 
       
   206 Note: If an attempt is made to scan beyond the end of text delimiter, on return, 
       
   207 aPos is set to EScanEndOfData  and the function's return value indicates 
       
   208 the number of characters skipped in passing the end of text delimiter.
       
   209 
       
   210 @param aPos The document position from which to scan. Must be valid, or a 
       
   211 panic occurs. On return, contains the new document position. 
       
   212 @param aScanMask The scan mask. See the enumeration whose values begin with 
       
   213 CPlainText::EScanBackwards. 
       
   214 @return The number of characters skipped to reach the new document position. */
       
   215 	{
       
   216 	__ASSERT_ALWAYS(aPos>=0 && aPos<=DocumentLength(),Panic(ECharPosBeyondDocument));
       
   217 	SScanData scanData;
       
   218 	InitScanControl(aPos,aScanMask,EUnitIsWord,scanData);
       
   219 	return ScanUnit(aPos,EUnitIsWord,scanData);
       
   220 	}
       
   221 
       
   222 EXPORT_C TInt CPlainText::ScanParas(TInt& aPos,TUint& aScanMask)const
       
   223 /** Scans from a document position to the beginning or end of a paragraph. The 
       
   224 destination is determined by a scan mask. The scan can either be forwards 
       
   225 (the default) or backwards, and the destination may be the first or last 
       
   226 character in the paragraph containing the position, or the first character in 
       
   227 the next paragraph. 
       
   228 
       
   229 Note: if an attempt is made to scan beyond the end of text delimiter, on return, 
       
   230 aPos is set to EScanEndOfData ) and the function's return value indicates 
       
   231 the number of characters skipped in passing the end of text delimiter.
       
   232 
       
   233 @param aPos The document position to scan from. Must be valid or a panic occurs. 
       
   234 On return, contains the new document position. 
       
   235 @param aScanMask The scan mask. See the enumeration whose values begin with 
       
   236 CPlainText::EScanBackwards. 
       
   237 @return The number of characters skipped to reach the new document position. */
       
   238 	{
       
   239 	__ASSERT_ALWAYS(aPos>=0 && aPos<=DocumentLength(),Panic(ECharPosBeyondDocument));
       
   240 	SScanData scanData;
       
   241 	InitScanControl(aPos,aScanMask,EUnitIsParagraph,scanData);
       
   242 	return ScanUnit(aPos,EUnitIsParagraph,scanData);
       
   243 	}
       
   244 
       
   245 
       
   246 void CPlainText::InitScanControl(TInt& aPos,TUint& aScanMask,TUnitOfText aContext,SScanData& aScanData)const
       
   247 // Initialises the control segment of the bitmask
       
   248 // used to determine when scan has completed.
       
   249 // Calls the initialisation of aScanData.
       
   250 //
       
   251 	{
       
   252 	aScanData.pos=aPos;
       
   253 	aScanData.scanMask=0;
       
   254 	aScanData.scanMask|=aScanMask;
       
   255 	if (aScanData.scanMask & EScanStayIfBoundary)
       
   256 		{//Scan one unit the other way first.
       
   257 			aScanData.scanMask ^= EScanBackwards;  // Alter scan direction.
       
   258 			InitScanData(aScanData.pos,aScanData);				
       
   259 			TestForDelimiter(aContext,aScanData);
       
   260 			aScanData.scanMask ^= EScanBackwards;  // Restore original scan direction.
       
   261 		}
       
   262 	InitScanData(aScanData.pos,aScanData);
       
   263 	if(!aPos && (aScanData.scanMask&(EScanStayIfBoundary|EScanBackwards))==EScanStayIfBoundary)
       
   264 		{//At the start of the buffer and scanning forwards and ScanStayIfBoundary set.
       
   265 		aScanData.scanMask &=~aScanData.EInsideUnit;
       
   266 		}
       
   267 	else 
       
   268 		{
       
   269 		TestForDelimiter(aContext,aScanData);
       
   270 		if (aScanData.scanMask & aScanData.EIsDelimiter)
       
   271 			aScanData.scanMask &=~ aScanData.EInsideUnit;
       
   272 		else
       
   273  			aScanData.scanMask |= aScanData.EInsideUnit;
       
   274 		}
       
   275 	if (aScanData.scanMask & EScanBackwards)
       
   276 		{//Set stop flags relative to scan direction
       
   277 		if (aScanData.scanMask &  EScanToUnitStart)
       
   278 			aScanData.scanMask |= aScanData.EStopEnd;
       
   279 		if (aScanData.scanMask &  EScanToUnitEnd)
       
   280 			aScanData.scanMask |= aScanData.EStopBegin;
       
   281 		}
       
   282 	else
       
   283 		{//Set stop flags relative to scan direction
       
   284 		if (aScanData.scanMask &  EScanToUnitStart)
       
   285 			aScanData.scanMask |= aScanData.EStopBegin;
       
   286 		if (aScanData.scanMask &  EScanToUnitEnd)
       
   287 			aScanData.scanMask |= aScanData.EStopEnd;
       
   288 		}
       
   289 	}
       
   290 
       
   291 
       
   292 void CPlainText::InitScanData(TInt aPos,SScanData& aScanData)const
       
   293 // Initialises the necessary elements of the scan structure, for ScanXxx 
       
   294 // methods.  These are used to track scan progress.
       
   295 //
       
   296 	{
       
   297 	aScanData.pos=aPos;
       
   298 	aScanData.oldPos=aPos;
       
   299 	aScanData.currentSegLen=0;
       
   300 	if (aScanData.scanMask & EScanBackwards)
       
   301 		{//Scanning backwards.
       
   302 		aScanData.delta=-1;
       
   303 		aScanData.totalBufLen=aPos;
       
   304 		}
       
   305 	else
       
   306 		{//Scanning forwards.
       
   307 		aScanData.delta=+1;
       
   308 		aScanData.totalBufLen=((iByteStore->Size()/sizeof(TText))-aScanData.pos);
       
   309 		}
       
   310 	}	
       
   311 
       
   312 
       
   313 TInt CPlainText::ScanUnit(TInt& aPos,TUnitOfText aContext,SScanData& aScanData)const
       
   314 // Scan from position aPos, by one unit.
       
   315 // Update aPos to the current position, and return the number
       
   316 // of characters skipped or EScanEndOfData if at the end of the buffer.
       
   317 //
       
   318 	{
       
   319 	while (TestForDelimiter(aContext,aScanData))
       
   320 		{//More data has been read
       
   321 		if (aScanData.scanMask & aScanData.EInsideUnit)
       
   322 			{
       
   323 			if (aScanData.scanMask & aScanData.EIsDelimiter)
       
   324 				{
       
   325 				aScanData.scanMask &=~aScanData.EInsideUnit;
       
   326 				if (aScanData.scanMask & aScanData.EStopEnd)
       
   327 					break;
       
   328 				}
       
   329 			 }
       
   330 		else
       
   331 			{
       
   332 			if (aScanData.scanMask & aScanData.EIsDelimiter)
       
   333 				{
       
   334 				if (!(aScanData.scanMask & EScanJoinDelimiters))
       
   335 					break;
       
   336 				}
       
   337 	   		else
       
   338 				{
       
   339 				aScanData.scanMask |= aScanData.EInsideUnit;
       
   340 				if (aScanData.scanMask & aScanData.EStopBegin)
       
   341 					break;
       
   342 				}
       
   343 			}
       
   344 		}
       
   345 	TInt charsSkipped=(aScanData.scanMask & EScanBackwards)? aPos-aScanData.oldPos : 
       
   346 										aScanData.oldPos-aPos;
       
   347 	aPos=(aScanData.oldPos<(TInt)(iByteStore->Size()/sizeof(TText)))? aScanData.oldPos : EScanEndOfData;
       
   348 	return charsSkipped;
       
   349 	}
       
   350 
       
   351 
       
   352 TBool CPlainText::TestForDelimiter(TUnitOfText aContext,SScanData& aScanData) const
       
   353 /** Return ETrue if a character was read successfully.
       
   354 Set EIsDelimiter flag if the character read
       
   355 is a delimiter of the appropriate type.*/	
       
   356     {
       
   357 	aScanData.scanMask |= aScanData.EIsDelimiter;
       
   358 	aScanData.oldPos = aScanData.pos;
       
   359 	TChar c;
       
   360 	if (!GetChar(aScanData,c))
       
   361 		return FALSE;
       
   362 
       
   363 	TBool found = FALSE;
       
   364 	if (aContext == EUnitIsParagraph)
       
   365 		found = c == EParagraphDelimiter;
       
   366 	else if (aContext == EUnitIsWord)
       
   367 		{
       
   368 		if (c == EParagraphDelimiter ||
       
   369 			c == ELineBreak ||
       
   370 			c == EPageBreak ||
       
   371 			c == ESpace ||
       
   372 			c == ETabCharacter)
       
   373 			found = TRUE;
       
   374 		if (!found && (aScanData.scanMask & EScanPictureIsDelimiter))
       
   375 			{
       
   376 			if (c == EPictureCharacter)
       
   377 				found = TRUE;
       
   378 			}
       
   379 		if (!found)
       
   380 			{
       
   381 			TChar::TCategory cat = c.GetCategory();
       
   382 			if (cat == TChar::EZsCategory) // it's a space
       
   383 				found = TRUE;
       
   384 			if (!found &&
       
   385 				(aScanData.scanMask & EScanPunctuationIsDelimiter) &&
       
   386 				(cat & TChar::EPunctuationGroup))
       
   387 				{
       
   388 				if (c != EHyphenMinus &&
       
   389 					c != EApostrophe &&
       
   390 					c != EPotentialHyphen &&
       
   391 					c != EHyphen && 
       
   392 					c != ERightSingleQuote)
       
   393 					found = TRUE;
       
   394 				}
       
   395 			}
       
   396 		}
       
   397 
       
   398 	if (found)
       
   399 		aScanData.scanMask |= aScanData.EIsDelimiter;
       
   400 	else
       
   401 		aScanData.scanMask &= ~aScanData.EIsDelimiter;
       
   402 	return TRUE;
       
   403 	}
       
   404 
       
   405 
       
   406 TBool CPlainText::GetChar(SScanData& aScanData,TChar& aChar)const
       
   407 // If there is one, write the next character to aChar, and return ETrue,
       
   408 // otherwise return EFalse.
       
   409 //
       
   410 	{
       
   411 	TPtrC view(_S("a"));  // Dummy value cos no default constructor.
       
   412 	if (!aScanData.currentSegLen)	
       
   413 		{//First time in, or new segment.
       
   414 		if (!aScanData.totalBufLen)
       
   415 			{//No data left to read.
       
   416 			return EFalse;	
       
   417 			}
       
   418 		if (aScanData.delta < 0)
       
   419 			{//Scanning backwards
       
   420 			TPtrC8 tempView=iByteStore->BackPtr(aScanData.pos*sizeof(TText));
       
   421 			view.Set((TText*)tempView.Ptr(),tempView.Length()/sizeof(TText));
       
   422 			}
       
   423 		else
       
   424 			{//Scanning forwards
       
   425 			TPtrC8 tempView=iByteStore->Ptr(aScanData.pos*sizeof(TText));
       
   426 			view.Set((TText*)tempView.Ptr(),tempView.Length()/sizeof(TText));
       
   427 			}//Ptr now returns a TDes.
       
   428 		aScanData.currentSegLen=(TInt)Min(aScanData.totalBufLen,(TInt)view.Length());
       
   429 		aScanData.totalBufLen-=aScanData.currentSegLen;
       
   430 		aScanData.buf=(TText*)view.Ptr();
       
   431 		if (aScanData.delta < 0)
       
   432 			aScanData.buf+=aScanData.currentSegLen-1;
       
   433 		}
       
   434 	aChar=*(aScanData.buf);  // aChar takes the character just read.
       
   435 	aScanData.buf+=aScanData.delta;  // pText increments correctly for both ASCII and Unicode
       
   436 	aScanData.pos+=aScanData.delta;
       
   437 	aScanData.currentSegLen-=1;  // len holds the number of chars left, for ASCII and Unicode
       
   438 	return ETrue;
       
   439 	}
       
   440 
       
   441