textrendering/texthandling/stext/TXTPLAIN.CPP
changeset 0 1fb32624e06b
equal deleted inserted replaced
-1:000000000000 0:1fb32624e06b
       
     1 /*
       
     2 * Copyright (c) 1998-2009 Nokia Corporation and/or its subsidiary(-ies).
       
     3 * All rights reserved.
       
     4 * This component and the accompanying materials are made available
       
     5 * under the terms of "Eclipse Public License v1.0"
       
     6 * which accompanies this distribution, and is available
       
     7 * at the URL "http://www.eclipse.org/legal/epl-v10.html".
       
     8 *
       
     9 * Initial Contributors:
       
    10 * Nokia Corporation - initial contribution.
       
    11 *
       
    12 * Contributors:
       
    13 *
       
    14 * Description: 
       
    15 * Implementation of the classes that import and export plain text.
       
    16 *
       
    17 */
       
    18 
       
    19 
       
    20 #include "TXTSTD.H"
       
    21 #include "TXTPLAIN.H"
       
    22 #include "charconv.h"
       
    23 
       
    24 TPlainTextIOState::	TPlainTextIOState(const CPlainText::TImportExportParam& aParam,
       
    25 									  CPlainText::TImportExportResult& aResult,
       
    26 									  RWriteStream& aOutput,RReadStream& aInput):
       
    27 	iParam(aParam),
       
    28 	iResult(aResult),
       
    29 	iOutput(aOutput),
       
    30 	iInput(aInput),
       
    31 	iConverter(NULL),
       
    32 	iSwapInput(FALSE),
       
    33 	iCheckByteOrder(FALSE)
       
    34 	{
       
    35 	aResult = CPlainText::TImportExportResult();  // zero output counters; aResult may be re-used.
       
    36 	}
       
    37 
       
    38 TText TPlainTextIOState::ReadRawCharL()
       
    39 	{
       
    40 	TText c;
       
    41 	if (iParam.iInputInternal)
       
    42 		iInput.ReadL((TUint8*)&c,sizeof(TText));
       
    43 	else
       
    44 		c = iInput.ReadUint16L();
       
    45 	if (iSwapInput)
       
    46 		c = (TText)(((c << 8) & 0xFF00) | ((c >> 8) & 0xFF));
       
    47 	if (iCheckByteOrder)
       
    48 		{
       
    49 		if (c == CEditableText::EReversedByteOrderMark)
       
    50 			{
       
    51 			c = CEditableText::EByteOrderMark;
       
    52 			iSwapInput = !iSwapInput;
       
    53 			}
       
    54 		iCheckByteOrder = FALSE;
       
    55 		}
       
    56 	iResult.iInputChars++;
       
    57 	return c;
       
    58 	}
       
    59 
       
    60 void TPlainTextIOState::WriteRawCharL(TText aChar)
       
    61 	{
       
    62 	if (iResult.iOutputChars < iParam.iMaxOutputChars)
       
    63 		{
       
    64 		if (iParam.iOutputInternal)
       
    65 			iOutput.WriteL((TUint8*)&aChar,sizeof(TText));
       
    66 		else
       
    67 			iOutput.WriteUint16L(aChar);
       
    68 		iResult.iOutputChars++;
       
    69 		}
       
    70 	}
       
    71 
       
    72 CPlainTextConverter* CPlainTextConverter::NewLC()
       
    73 	{
       
    74 	CPlainTextConverter* c = new(ELeave) CPlainTextConverter;
       
    75 	CleanupStack::PushL(c);
       
    76 	c->iConversionBuffer = new(ELeave) TUint8[EConversionBufferSize];
       
    77 	return c;
       
    78 	}
       
    79 
       
    80 CPlainTextConverter::~CPlainTextConverter()
       
    81 	{
       
    82 	delete iConverter;
       
    83 	delete [] iConversionBuffer;
       
    84 	}
       
    85 
       
    86 /*
       
    87 Prepare to convert between Unicode and a foreign encoding.
       
    88 If aSample is non-null it can be used to guess the foreign encoding, but only if iParam.iGuessForeignEncoding is true.
       
    89 */
       
    90 void CPlainTextConverter::PrepareToConvertL(TPlainTextIOState& aState,const TDesC8* aSample)
       
    91 	{
       
    92 	RFs rfs;
       
    93 
       
    94 	iConverter = CCnvCharacterSetConverter::NewL();
       
    95 	if (aState.iParam.iFileSession == NULL)
       
    96 		{
       
    97 		TInt error = rfs.Connect();
       
    98 		User::LeaveIfError(error);
       
    99 				
       
   100 		CleanupClosePushL(rfs);
       
   101 		}
       
   102 	else
       
   103 		rfs = *aState.iParam.iFileSession;
       
   104 
       
   105 	TUint foreign_encoding = aState.iParam.iForeignEncoding;
       
   106 	
       
   107 	// Try to guess the foreign encoding.
       
   108 	if (aSample && aState.iParam.iGuessForeignEncoding)
       
   109 		{
       
   110 		CArrayFix<CCnvCharacterSetConverter::SCharacterSet>* charsets =
       
   111 			CCnvCharacterSetConverter::CreateArrayOfCharacterSetsAvailableLC(rfs);
       
   112 		TInt confidence = 0;
       
   113 		CCnvCharacterSetConverter::AutoDetectCharacterSetL(confidence,foreign_encoding,*charsets,*aSample);
       
   114 		CleanupStack::PopAndDestroy(charsets);
       
   115 		if (confidence < 50)
       
   116 			User::Leave(KErrNotSupported);
       
   117 		}
       
   118 
       
   119 	if (iConverter->PrepareToConvertToOrFromL(foreign_encoding,rfs) != CCnvCharacterSetConverter::EAvailable)
       
   120 		User::Leave(KErrNotSupported);
       
   121 	aState.iResult.iForeignEncoding = foreign_encoding;
       
   122 	if (aState.iParam.iFileSession == NULL)
       
   123 		{
       
   124 		CleanupStack::Pop(); // rfs
       
   125 		rfs.Close();
       
   126 		}
       
   127 	}
       
   128 
       
   129 void TPlainTextWriter::TranslateL(const CPlainText::TImportExportParam& aParam,CPlainText::TImportExportResult& aResult,
       
   130 								  RWriteStream& aOutput,RReadStream& aInput)
       
   131 	{
       
   132 	TPlainTextWriter writer(aParam,aResult,aOutput,aInput);
       
   133 	writer.TranslateHelperL();
       
   134 	}
       
   135 
       
   136 TPlainTextWriter::TPlainTextWriter(const CPlainText::TImportExportParam& aParam,CPlainText::TImportExportResult& aResult,
       
   137 								   RWriteStream& aOutput,RReadStream& aInput):
       
   138 	TPlainTextIOState(aParam,aResult,aOutput,aInput),
       
   139 	iLineLength(0),
       
   140 	iLineBuffer(NULL),
       
   141 	iMaxLineBufferLength(0)
       
   142 	{
       
   143 	}
       
   144 
       
   145 void TPlainTextWriter::TranslateHelperL()
       
   146 	{
       
   147 	if (iParam.iForeignEncoding)
       
   148 		{
       
   149 		iConverter = CPlainTextConverter::NewLC();
       
   150 		iConverter->PrepareToConvertL(*this,NULL);
       
   151 		}
       
   152 
       
   153 	if (iParam.iOrganisation == CPlainText::EOrganiseByLine)
       
   154 		iMaxLineLength = iParam.iMaxLineLength;
       
   155 	else
       
   156 		iMaxLineLength = KMaxTInt; // when exporting by paragraph, the wrapping width has no effect
       
   157 	if (iMaxLineLength <= 0)
       
   158 		iMaxLineLength = KMaxTInt;
       
   159 	iLineLength = 0;
       
   160 	if (iMaxLineLength < KMaxTInt)
       
   161 		iMaxLineBufferLength = iMaxLineLength;
       
   162 	else if (iParam.iForeignEncoding)
       
   163 		iMaxLineBufferLength = EDefaultLineBufferSize;
       
   164 	if (iMaxLineBufferLength)
       
   165 		iLineBuffer = new(ELeave) TText[iMaxLineBufferLength];
       
   166 	else
       
   167 		iLineBuffer = NULL;
       
   168 	CleanupStack::PushL(iLineBuffer);
       
   169 	TRAPD(error,TranslateToEofL());
       
   170 	if (error == KErrEof)
       
   171 		error = KErrNone;
       
   172 	if (error == KErrNone)
       
   173 		{
       
   174 		FlushL();
       
   175 		iOutput.CommitL();
       
   176 		}
       
   177 	CleanupStack::Pop(iLineBuffer);
       
   178 	delete [] iLineBuffer;
       
   179 	if (iConverter)
       
   180 		CleanupStack::PopAndDestroy(iConverter);
       
   181 	User::LeaveIfError(error);
       
   182 	}
       
   183 
       
   184 void TPlainTextWriter::TranslateToEofL()
       
   185 	{
       
   186 	while (!Finished())
       
   187 		{
       
   188 		TText c = ReadRawCharL();
       
   189 		switch (c)
       
   190 			{
       
   191 			// Write a CR-LF at a forced line break if organising by line.
       
   192 			case CEditableText::ELineBreak:
       
   193 				if (iParam.iOrganisation == CPlainText::EOrganiseByLine)
       
   194 					{
       
   195 					FlushL();
       
   196 					WriteNewLineL();
       
   197 					}
       
   198 				else
       
   199 					WriteCharL(c);
       
   200 				break;
       
   201 
       
   202 			// Write a CR-LF at the end of the paragraph, then an extra one if lines are split by CR-LFs.
       
   203 			case CEditableText::EParagraphDelimiter:
       
   204 				FlushL();
       
   205 				WriteNewLineL();
       
   206 				if (iParam.iOrganisation == CPlainText::EOrganiseByLine)
       
   207 					WriteNewLineL();
       
   208 				break;
       
   209 
       
   210 			default:
       
   211 				WriteCharL(c);
       
   212 			}
       
   213 		}
       
   214 	}
       
   215 
       
   216 void TPlainTextWriter::FlushL()
       
   217 	{
       
   218 	if (iLineBuffer)
       
   219 		WriteAndConvertL(iLineBuffer,iLineLength);
       
   220 	iLineLength = 0;
       
   221 	}
       
   222 
       
   223 void TPlainTextWriter::WriteCharL(TText aChar)
       
   224 	{
       
   225 	if (iLineBuffer)
       
   226 		{
       
   227 		if (iLineLength >= iMaxLineBufferLength)
       
   228 			{
       
   229 			int linebreak = iMaxLineBufferLength;
       
   230 			int stripped_linebreak = iMaxLineBufferLength;
       
   231 
       
   232 			if (iLineLength >= iMaxLineLength)
       
   233 				{
       
   234 				for (linebreak = iMaxLineLength; linebreak > 0; linebreak--)
       
   235 					if (iLineBuffer[linebreak - 1] == ' ')
       
   236 						break;
       
   237 				if (linebreak == 0)
       
   238 					linebreak = iMaxLineLength;
       
   239 
       
   240 				// Strip a single trailing space if any; it is added when text is imported.
       
   241 				stripped_linebreak = linebreak;
       
   242 				if (iLineBuffer[linebreak - 1] == ' ')
       
   243 					stripped_linebreak = linebreak - 1;
       
   244 				}
       
   245 
       
   246 			WriteAndConvertL(iLineBuffer,stripped_linebreak);
       
   247 			if (iLineLength >= iMaxLineLength)
       
   248 				WriteNewLineL();
       
   249 			int i = linebreak;
       
   250 			int j = 0;
       
   251 			while (i < iMaxLineBufferLength)
       
   252 				iLineBuffer[j++] = iLineBuffer[i++];
       
   253 			iLineLength = j;
       
   254 			}
       
   255 		iLineBuffer[iLineLength++] = aChar;
       
   256 		}
       
   257 	else
       
   258 		WriteRawCharL(aChar);
       
   259 	}
       
   260 
       
   261 void TPlainTextWriter::WriteNewLineL()
       
   262 	{
       
   263 	WriteAndConvertL(_S("\x0d\x0a"),2);
       
   264 	}
       
   265 
       
   266 void TPlainTextWriter::WriteAndConvertL(const TText* aText,TInt aLength)
       
   267 	{
       
   268 	if (iConverter)
       
   269 		{
       
   270 		while (aLength > 0)
       
   271 			{
       
   272 			TPtrC source(aText,aLength);
       
   273 			TPtr8 dest(iConverter->iConversionBuffer,CPlainTextConverter::EConversionBufferSize);
       
   274 			int remainder = iConverter->iConverter->ConvertFromUnicode(dest,source);
       
   275 			if (remainder < 0)
       
   276 				User::Leave(KErrCorrupt);
       
   277 			int available = iParam.iMaxOutputChars - iResult.iOutputChars;
       
   278 			if (available < dest.Length())
       
   279 				dest.SetLength(available);
       
   280 			if (dest.Length() > 0)
       
   281 				{
       
   282 				iOutput.WriteL(dest);
       
   283 				iResult.iOutputChars += dest.Length();
       
   284 				}
       
   285 			int converted = aLength - remainder;
       
   286 			aText += converted;
       
   287 			aLength -= converted;
       
   288 			}
       
   289 		}
       
   290 	else
       
   291 		{
       
   292 		while (aLength-- > 0)
       
   293 			WriteRawCharL(*aText++);
       
   294 		}
       
   295 	}
       
   296 
       
   297 TPlainTextReader::TPlainTextReader(const CPlainText::TImportExportParam& aParam,CPlainText::TImportExportResult& aResult,
       
   298 								   RWriteStream& aOutput,RReadStream& aInput):
       
   299 	TPlainTextIOState(aParam,aResult,aOutput,aInput),
       
   300 	iInputBuffer(NULL),
       
   301 	iInputLength(0),
       
   302 	iInputPos(0),
       
   303 	iConversionState(CCnvCharacterSetConverter::KStateDefault)
       
   304 	{
       
   305 	iCheckByteOrder = TRUE;
       
   306 	}
       
   307 
       
   308 void TPlainTextReader::TranslateL(const CPlainText::TImportExportParam& aParam,CPlainText::TImportExportResult& aResult,
       
   309 								  RWriteStream& aOutput,RReadStream& aInput)
       
   310 	{
       
   311 	TPlainTextReader reader(aParam,aResult,aOutput,aInput);
       
   312 	if(reader.iParam.iOrganisation == CPlainText::EOrganiseByLine)
       
   313 		{
       
   314 		TLineTextWriter txtWriter(reader);
       
   315 		TSLBTransaltor slbTranslator(txtWriter);
       
   316 		reader.TranslateHelperL(slbTranslator);
       
   317 		}
       
   318 	else
       
   319 		{
       
   320 		TParagraphTextWriter txtWriter(reader);
       
   321 		TSLBTransaltor slbTranslator(txtWriter);
       
   322 		reader.TranslateHelperL(slbTranslator);
       
   323 		}
       
   324 	}
       
   325 
       
   326 void TPlainTextReader::TranslateHelperL(TSLBTransaltor& aSLBTranslator)
       
   327 	{
       
   328 	if (iParam.iForeignEncoding || iParam.iGuessForeignEncoding)
       
   329 		{
       
   330 		iConverter = CPlainTextConverter::NewLC();
       
   331 		iInputBuffer = new(ELeave) TText[EInputBufferSize];
       
   332 		CleanupStack::PushL(iInputBuffer);
       
   333 		}
       
   334 	else
       
   335 		iInputBuffer = NULL;
       
   336 	TRAPD(error,TranslateToEofL(aSLBTranslator));
       
   337 	if (error == KErrEof)
       
   338 		error = KErrNone;
       
   339 	if (error == KErrNone)
       
   340 		iOutput.CommitL();
       
   341 	if (iConverter)
       
   342 		{
       
   343 		CleanupStack::Pop(iInputBuffer);
       
   344 		delete [] iInputBuffer;
       
   345 		CleanupStack::PopAndDestroy(iConverter);
       
   346 		}
       
   347 	User::LeaveIfError(error);
       
   348 	}
       
   349 
       
   350 void TPlainTextReader::TranslateToEofL(TSLBTransaltor& aSLBTranslator)
       
   351 	{
       
   352 	while(!Finished())
       
   353 		{
       
   354 		TText c = ReadAndConvertL();
       
   355 		aSLBTranslator.ProcessL(c);
       
   356 		}
       
   357 	aSLBTranslator.FlushL();
       
   358 	}
       
   359 
       
   360 TText TPlainTextReader::ReadAndConvertL()
       
   361 	{
       
   362 	// Read EConversionBufferSize bytes into a conversion buffer (iConversionBuffer).
       
   363 	// Using CharConv convert this into unicode and place in a destination buffer (dest).
       
   364 	// This may result in some bytes that cannot be converted (remainder) as that
       
   365 	// character encoding is truncated.
       
   366 	// This remainder is then moved to the begining of the conversion buffer and more
       
   367 	// data read in after it, in effect untruncating that last character.
       
   368 	// Before this next read takes place the next converted unicode character is returned
       
   369 	// until the destination buffer positional pointers reach the end where more data is
       
   370 	// required for processing.
       
   371 	//  
       
   372 	if (iConverter && iInputBuffer)
       
   373 		{
       
   374 		if (iInputPos >= iInputLength)
       
   375 			{
       
   376 			/*
       
   377 			Attempt to read more foreign characters if there are less than 20,
       
   378 			which is the current maximum length of a multibyte character sequence for CHARCONV.
       
   379 			Use MStreamBuf::ReadL, which doesn't leave on EOF, rather than RReadStream::ReadL,
       
   380 			which does, and doesn't tell you how much was actually read.
       
   381 			*/
       
   382 			if (iConverter->iConversionBufferLength < 20)
       
   383 				iConverter->iConversionBufferLength +=
       
   384 					iInput.Source()->ReadL(iConverter->iConversionBuffer + iConverter->iConversionBufferLength,
       
   385 										   CPlainTextConverter::EConversionBufferSize - iConverter->iConversionBufferLength);
       
   386 
       
   387 			// Create the converter late so we have a sample of foreign text for auto-detection of the encoding.
       
   388 			if (!iConverter->iConverter)
       
   389 				{
       
   390 				TPtrC8 sample(iConverter->iConversionBuffer,iConverter->iConversionBufferLength);
       
   391 				iConverter->PrepareToConvertL(*this,&sample);
       
   392 				}
       
   393 
       
   394 			// Translate from the foreign encoding to Unicode.
       
   395 			TPtr dest(iInputBuffer,0,EInputBufferSize);
       
   396 			TPtrC8 source(iConverter->iConversionBuffer,iConverter->iConversionBufferLength);
       
   397 			int remainder = iConverter->iConverter->ConvertToUnicode(dest,source,iConversionState);
       
   398 			if (remainder < 0)
       
   399 				User::Leave(KErrCorrupt);
       
   400 
       
   401 			// Move the remaining foreign characters if any to the start of the buffer
       
   402 			// so that on the next read it can be joined with its truncated part.
       
   403 			for (int i = 0, j = iConverter->iConversionBufferLength - remainder; i < remainder; ++i, ++j)
       
   404 				iConverter->iConversionBuffer[i] = iConverter->iConversionBuffer[j];
       
   405 			iConverter->iConversionBufferLength = remainder;
       
   406 
       
   407 			iInputPos = 0;
       
   408 			iInputLength = dest.Length();
       
   409 			if (iInputLength == 0)
       
   410 				User::Leave(KErrEof);
       
   411 			}
       
   412 		iResult.iInputChars++;
       
   413 		return iInputBuffer[iInputPos++];
       
   414 		}
       
   415 	else
       
   416 		return ReadRawCharL();
       
   417 	}
       
   418 
       
   419 /**
       
   420 The method processes the imput characters, writing them to the output, but skipping 
       
   421 the picture characters (CEditableText::EPictureCharacter).
       
   422 The method is not called directly and should not be called. It implements 
       
   423 MOutputChar::OutputCharL(TChar aChar) and is called from TParagraphTextWriter and 
       
   424 TLineTextWriter implementations.
       
   425 @param aChar Character to be processed.
       
   426 */
       
   427 void TPlainTextReader::OutputCharL(TText aChar)
       
   428 	{
       
   429 	switch(aChar)
       
   430 		{
       
   431 		case CEditableText::EByteOrderMark :
       
   432 			// leading byte order marks are ignored
       
   433 			if(iResult.iInputChars > 1)
       
   434 				{
       
   435 				WriteRawCharL(aChar);
       
   436 				}
       
   437 			break;
       
   438 		case CEditableText::EPictureCharacter:
       
   439 			//Picture characters are ignored because they would cause ETEXT to panic when it attempted to find
       
   440 			//the picture corresponding to the character.
       
   441 			break;
       
   442 		default:
       
   443 			WriteRawCharL(aChar);
       
   444 			break;
       
   445 		}
       
   446 	}
       
   447