|
1 /* |
|
2 * Copyright (c) 1998-2009 Nokia Corporation and/or its subsidiary(-ies). |
|
3 * All rights reserved. |
|
4 * This component and the accompanying materials are made available |
|
5 * under the terms of "Eclipse Public License v1.0" |
|
6 * which accompanies this distribution, and is available |
|
7 * at the URL "http://www.eclipse.org/legal/epl-v10.html". |
|
8 * |
|
9 * Initial Contributors: |
|
10 * Nokia Corporation - initial contribution. |
|
11 * |
|
12 * Contributors: |
|
13 * |
|
14 * Description: |
|
15 * Implementation of the classes that import and export plain text. |
|
16 * |
|
17 */ |
|
18 |
|
19 |
|
20 #include "TXTSTD.H" |
|
21 #include "TXTPLAIN.H" |
|
22 #include "charconv.h" |
|
23 |
|
24 TPlainTextIOState:: TPlainTextIOState(const CPlainText::TImportExportParam& aParam, |
|
25 CPlainText::TImportExportResult& aResult, |
|
26 RWriteStream& aOutput,RReadStream& aInput): |
|
27 iParam(aParam), |
|
28 iResult(aResult), |
|
29 iOutput(aOutput), |
|
30 iInput(aInput), |
|
31 iConverter(NULL), |
|
32 iSwapInput(FALSE), |
|
33 iCheckByteOrder(FALSE) |
|
34 { |
|
35 aResult = CPlainText::TImportExportResult(); // zero output counters; aResult may be re-used. |
|
36 } |
|
37 |
|
38 TText TPlainTextIOState::ReadRawCharL() |
|
39 { |
|
40 TText c; |
|
41 if (iParam.iInputInternal) |
|
42 iInput.ReadL((TUint8*)&c,sizeof(TText)); |
|
43 else |
|
44 c = iInput.ReadUint16L(); |
|
45 if (iSwapInput) |
|
46 c = (TText)(((c << 8) & 0xFF00) | ((c >> 8) & 0xFF)); |
|
47 if (iCheckByteOrder) |
|
48 { |
|
49 if (c == CEditableText::EReversedByteOrderMark) |
|
50 { |
|
51 c = CEditableText::EByteOrderMark; |
|
52 iSwapInput = !iSwapInput; |
|
53 } |
|
54 iCheckByteOrder = FALSE; |
|
55 } |
|
56 iResult.iInputChars++; |
|
57 return c; |
|
58 } |
|
59 |
|
60 void TPlainTextIOState::WriteRawCharL(TText aChar) |
|
61 { |
|
62 if (iResult.iOutputChars < iParam.iMaxOutputChars) |
|
63 { |
|
64 if (iParam.iOutputInternal) |
|
65 iOutput.WriteL((TUint8*)&aChar,sizeof(TText)); |
|
66 else |
|
67 iOutput.WriteUint16L(aChar); |
|
68 iResult.iOutputChars++; |
|
69 } |
|
70 } |
|
71 |
|
72 CPlainTextConverter* CPlainTextConverter::NewLC() |
|
73 { |
|
74 CPlainTextConverter* c = new(ELeave) CPlainTextConverter; |
|
75 CleanupStack::PushL(c); |
|
76 c->iConversionBuffer = new(ELeave) TUint8[EConversionBufferSize]; |
|
77 return c; |
|
78 } |
|
79 |
|
80 CPlainTextConverter::~CPlainTextConverter() |
|
81 { |
|
82 delete iConverter; |
|
83 delete [] iConversionBuffer; |
|
84 } |
|
85 |
|
86 /* |
|
87 Prepare to convert between Unicode and a foreign encoding. |
|
88 If aSample is non-null it can be used to guess the foreign encoding, but only if iParam.iGuessForeignEncoding is true. |
|
89 */ |
|
90 void CPlainTextConverter::PrepareToConvertL(TPlainTextIOState& aState,const TDesC8* aSample) |
|
91 { |
|
92 RFs rfs; |
|
93 |
|
94 iConverter = CCnvCharacterSetConverter::NewL(); |
|
95 if (aState.iParam.iFileSession == NULL) |
|
96 { |
|
97 TInt error = rfs.Connect(); |
|
98 User::LeaveIfError(error); |
|
99 |
|
100 CleanupClosePushL(rfs); |
|
101 } |
|
102 else |
|
103 rfs = *aState.iParam.iFileSession; |
|
104 |
|
105 TUint foreign_encoding = aState.iParam.iForeignEncoding; |
|
106 |
|
107 // Try to guess the foreign encoding. |
|
108 if (aSample && aState.iParam.iGuessForeignEncoding) |
|
109 { |
|
110 CArrayFix<CCnvCharacterSetConverter::SCharacterSet>* charsets = |
|
111 CCnvCharacterSetConverter::CreateArrayOfCharacterSetsAvailableLC(rfs); |
|
112 TInt confidence = 0; |
|
113 CCnvCharacterSetConverter::AutoDetectCharacterSetL(confidence,foreign_encoding,*charsets,*aSample); |
|
114 CleanupStack::PopAndDestroy(charsets); |
|
115 if (confidence < 50) |
|
116 User::Leave(KErrNotSupported); |
|
117 } |
|
118 |
|
119 if (iConverter->PrepareToConvertToOrFromL(foreign_encoding,rfs) != CCnvCharacterSetConverter::EAvailable) |
|
120 User::Leave(KErrNotSupported); |
|
121 aState.iResult.iForeignEncoding = foreign_encoding; |
|
122 if (aState.iParam.iFileSession == NULL) |
|
123 { |
|
124 CleanupStack::Pop(); // rfs |
|
125 rfs.Close(); |
|
126 } |
|
127 } |
|
128 |
|
129 void TPlainTextWriter::TranslateL(const CPlainText::TImportExportParam& aParam,CPlainText::TImportExportResult& aResult, |
|
130 RWriteStream& aOutput,RReadStream& aInput) |
|
131 { |
|
132 TPlainTextWriter writer(aParam,aResult,aOutput,aInput); |
|
133 writer.TranslateHelperL(); |
|
134 } |
|
135 |
|
136 TPlainTextWriter::TPlainTextWriter(const CPlainText::TImportExportParam& aParam,CPlainText::TImportExportResult& aResult, |
|
137 RWriteStream& aOutput,RReadStream& aInput): |
|
138 TPlainTextIOState(aParam,aResult,aOutput,aInput), |
|
139 iLineLength(0), |
|
140 iLineBuffer(NULL), |
|
141 iMaxLineBufferLength(0) |
|
142 { |
|
143 } |
|
144 |
|
145 void TPlainTextWriter::TranslateHelperL() |
|
146 { |
|
147 if (iParam.iForeignEncoding) |
|
148 { |
|
149 iConverter = CPlainTextConverter::NewLC(); |
|
150 iConverter->PrepareToConvertL(*this,NULL); |
|
151 } |
|
152 |
|
153 if (iParam.iOrganisation == CPlainText::EOrganiseByLine) |
|
154 iMaxLineLength = iParam.iMaxLineLength; |
|
155 else |
|
156 iMaxLineLength = KMaxTInt; // when exporting by paragraph, the wrapping width has no effect |
|
157 if (iMaxLineLength <= 0) |
|
158 iMaxLineLength = KMaxTInt; |
|
159 iLineLength = 0; |
|
160 if (iMaxLineLength < KMaxTInt) |
|
161 iMaxLineBufferLength = iMaxLineLength; |
|
162 else if (iParam.iForeignEncoding) |
|
163 iMaxLineBufferLength = EDefaultLineBufferSize; |
|
164 if (iMaxLineBufferLength) |
|
165 iLineBuffer = new(ELeave) TText[iMaxLineBufferLength]; |
|
166 else |
|
167 iLineBuffer = NULL; |
|
168 CleanupStack::PushL(iLineBuffer); |
|
169 TRAPD(error,TranslateToEofL()); |
|
170 if (error == KErrEof) |
|
171 error = KErrNone; |
|
172 if (error == KErrNone) |
|
173 { |
|
174 FlushL(); |
|
175 iOutput.CommitL(); |
|
176 } |
|
177 CleanupStack::Pop(iLineBuffer); |
|
178 delete [] iLineBuffer; |
|
179 if (iConverter) |
|
180 CleanupStack::PopAndDestroy(iConverter); |
|
181 User::LeaveIfError(error); |
|
182 } |
|
183 |
|
184 void TPlainTextWriter::TranslateToEofL() |
|
185 { |
|
186 while (!Finished()) |
|
187 { |
|
188 TText c = ReadRawCharL(); |
|
189 switch (c) |
|
190 { |
|
191 // Write a CR-LF at a forced line break if organising by line. |
|
192 case CEditableText::ELineBreak: |
|
193 if (iParam.iOrganisation == CPlainText::EOrganiseByLine) |
|
194 { |
|
195 FlushL(); |
|
196 WriteNewLineL(); |
|
197 } |
|
198 else |
|
199 WriteCharL(c); |
|
200 break; |
|
201 |
|
202 // Write a CR-LF at the end of the paragraph, then an extra one if lines are split by CR-LFs. |
|
203 case CEditableText::EParagraphDelimiter: |
|
204 FlushL(); |
|
205 WriteNewLineL(); |
|
206 if (iParam.iOrganisation == CPlainText::EOrganiseByLine) |
|
207 WriteNewLineL(); |
|
208 break; |
|
209 |
|
210 default: |
|
211 WriteCharL(c); |
|
212 } |
|
213 } |
|
214 } |
|
215 |
|
216 void TPlainTextWriter::FlushL() |
|
217 { |
|
218 if (iLineBuffer) |
|
219 WriteAndConvertL(iLineBuffer,iLineLength); |
|
220 iLineLength = 0; |
|
221 } |
|
222 |
|
223 void TPlainTextWriter::WriteCharL(TText aChar) |
|
224 { |
|
225 if (iLineBuffer) |
|
226 { |
|
227 if (iLineLength >= iMaxLineBufferLength) |
|
228 { |
|
229 int linebreak = iMaxLineBufferLength; |
|
230 int stripped_linebreak = iMaxLineBufferLength; |
|
231 |
|
232 if (iLineLength >= iMaxLineLength) |
|
233 { |
|
234 for (linebreak = iMaxLineLength; linebreak > 0; linebreak--) |
|
235 if (iLineBuffer[linebreak - 1] == ' ') |
|
236 break; |
|
237 if (linebreak == 0) |
|
238 linebreak = iMaxLineLength; |
|
239 |
|
240 // Strip a single trailing space if any; it is added when text is imported. |
|
241 stripped_linebreak = linebreak; |
|
242 if (iLineBuffer[linebreak - 1] == ' ') |
|
243 stripped_linebreak = linebreak - 1; |
|
244 } |
|
245 |
|
246 WriteAndConvertL(iLineBuffer,stripped_linebreak); |
|
247 if (iLineLength >= iMaxLineLength) |
|
248 WriteNewLineL(); |
|
249 int i = linebreak; |
|
250 int j = 0; |
|
251 while (i < iMaxLineBufferLength) |
|
252 iLineBuffer[j++] = iLineBuffer[i++]; |
|
253 iLineLength = j; |
|
254 } |
|
255 iLineBuffer[iLineLength++] = aChar; |
|
256 } |
|
257 else |
|
258 WriteRawCharL(aChar); |
|
259 } |
|
260 |
|
261 void TPlainTextWriter::WriteNewLineL() |
|
262 { |
|
263 WriteAndConvertL(_S("\x0d\x0a"),2); |
|
264 } |
|
265 |
|
266 void TPlainTextWriter::WriteAndConvertL(const TText* aText,TInt aLength) |
|
267 { |
|
268 if (iConverter) |
|
269 { |
|
270 while (aLength > 0) |
|
271 { |
|
272 TPtrC source(aText,aLength); |
|
273 TPtr8 dest(iConverter->iConversionBuffer,CPlainTextConverter::EConversionBufferSize); |
|
274 int remainder = iConverter->iConverter->ConvertFromUnicode(dest,source); |
|
275 if (remainder < 0) |
|
276 User::Leave(KErrCorrupt); |
|
277 int available = iParam.iMaxOutputChars - iResult.iOutputChars; |
|
278 if (available < dest.Length()) |
|
279 dest.SetLength(available); |
|
280 if (dest.Length() > 0) |
|
281 { |
|
282 iOutput.WriteL(dest); |
|
283 iResult.iOutputChars += dest.Length(); |
|
284 } |
|
285 int converted = aLength - remainder; |
|
286 aText += converted; |
|
287 aLength -= converted; |
|
288 } |
|
289 } |
|
290 else |
|
291 { |
|
292 while (aLength-- > 0) |
|
293 WriteRawCharL(*aText++); |
|
294 } |
|
295 } |
|
296 |
|
297 TPlainTextReader::TPlainTextReader(const CPlainText::TImportExportParam& aParam,CPlainText::TImportExportResult& aResult, |
|
298 RWriteStream& aOutput,RReadStream& aInput): |
|
299 TPlainTextIOState(aParam,aResult,aOutput,aInput), |
|
300 iInputBuffer(NULL), |
|
301 iInputLength(0), |
|
302 iInputPos(0), |
|
303 iConversionState(CCnvCharacterSetConverter::KStateDefault) |
|
304 { |
|
305 iCheckByteOrder = TRUE; |
|
306 } |
|
307 |
|
308 void TPlainTextReader::TranslateL(const CPlainText::TImportExportParam& aParam,CPlainText::TImportExportResult& aResult, |
|
309 RWriteStream& aOutput,RReadStream& aInput) |
|
310 { |
|
311 TPlainTextReader reader(aParam,aResult,aOutput,aInput); |
|
312 if(reader.iParam.iOrganisation == CPlainText::EOrganiseByLine) |
|
313 { |
|
314 TLineTextWriter txtWriter(reader); |
|
315 TSLBTransaltor slbTranslator(txtWriter); |
|
316 reader.TranslateHelperL(slbTranslator); |
|
317 } |
|
318 else |
|
319 { |
|
320 TParagraphTextWriter txtWriter(reader); |
|
321 TSLBTransaltor slbTranslator(txtWriter); |
|
322 reader.TranslateHelperL(slbTranslator); |
|
323 } |
|
324 } |
|
325 |
|
326 void TPlainTextReader::TranslateHelperL(TSLBTransaltor& aSLBTranslator) |
|
327 { |
|
328 if (iParam.iForeignEncoding || iParam.iGuessForeignEncoding) |
|
329 { |
|
330 iConverter = CPlainTextConverter::NewLC(); |
|
331 iInputBuffer = new(ELeave) TText[EInputBufferSize]; |
|
332 CleanupStack::PushL(iInputBuffer); |
|
333 } |
|
334 else |
|
335 iInputBuffer = NULL; |
|
336 TRAPD(error,TranslateToEofL(aSLBTranslator)); |
|
337 if (error == KErrEof) |
|
338 error = KErrNone; |
|
339 if (error == KErrNone) |
|
340 iOutput.CommitL(); |
|
341 if (iConverter) |
|
342 { |
|
343 CleanupStack::Pop(iInputBuffer); |
|
344 delete [] iInputBuffer; |
|
345 CleanupStack::PopAndDestroy(iConverter); |
|
346 } |
|
347 User::LeaveIfError(error); |
|
348 } |
|
349 |
|
350 void TPlainTextReader::TranslateToEofL(TSLBTransaltor& aSLBTranslator) |
|
351 { |
|
352 while(!Finished()) |
|
353 { |
|
354 TText c = ReadAndConvertL(); |
|
355 aSLBTranslator.ProcessL(c); |
|
356 } |
|
357 aSLBTranslator.FlushL(); |
|
358 } |
|
359 |
|
360 TText TPlainTextReader::ReadAndConvertL() |
|
361 { |
|
362 // Read EConversionBufferSize bytes into a conversion buffer (iConversionBuffer). |
|
363 // Using CharConv convert this into unicode and place in a destination buffer (dest). |
|
364 // This may result in some bytes that cannot be converted (remainder) as that |
|
365 // character encoding is truncated. |
|
366 // This remainder is then moved to the begining of the conversion buffer and more |
|
367 // data read in after it, in effect untruncating that last character. |
|
368 // Before this next read takes place the next converted unicode character is returned |
|
369 // until the destination buffer positional pointers reach the end where more data is |
|
370 // required for processing. |
|
371 // |
|
372 if (iConverter && iInputBuffer) |
|
373 { |
|
374 if (iInputPos >= iInputLength) |
|
375 { |
|
376 /* |
|
377 Attempt to read more foreign characters if there are less than 20, |
|
378 which is the current maximum length of a multibyte character sequence for CHARCONV. |
|
379 Use MStreamBuf::ReadL, which doesn't leave on EOF, rather than RReadStream::ReadL, |
|
380 which does, and doesn't tell you how much was actually read. |
|
381 */ |
|
382 if (iConverter->iConversionBufferLength < 20) |
|
383 iConverter->iConversionBufferLength += |
|
384 iInput.Source()->ReadL(iConverter->iConversionBuffer + iConverter->iConversionBufferLength, |
|
385 CPlainTextConverter::EConversionBufferSize - iConverter->iConversionBufferLength); |
|
386 |
|
387 // Create the converter late so we have a sample of foreign text for auto-detection of the encoding. |
|
388 if (!iConverter->iConverter) |
|
389 { |
|
390 TPtrC8 sample(iConverter->iConversionBuffer,iConverter->iConversionBufferLength); |
|
391 iConverter->PrepareToConvertL(*this,&sample); |
|
392 } |
|
393 |
|
394 // Translate from the foreign encoding to Unicode. |
|
395 TPtr dest(iInputBuffer,0,EInputBufferSize); |
|
396 TPtrC8 source(iConverter->iConversionBuffer,iConverter->iConversionBufferLength); |
|
397 int remainder = iConverter->iConverter->ConvertToUnicode(dest,source,iConversionState); |
|
398 if (remainder < 0) |
|
399 User::Leave(KErrCorrupt); |
|
400 |
|
401 // Move the remaining foreign characters if any to the start of the buffer |
|
402 // so that on the next read it can be joined with its truncated part. |
|
403 for (int i = 0, j = iConverter->iConversionBufferLength - remainder; i < remainder; ++i, ++j) |
|
404 iConverter->iConversionBuffer[i] = iConverter->iConversionBuffer[j]; |
|
405 iConverter->iConversionBufferLength = remainder; |
|
406 |
|
407 iInputPos = 0; |
|
408 iInputLength = dest.Length(); |
|
409 if (iInputLength == 0) |
|
410 User::Leave(KErrEof); |
|
411 } |
|
412 iResult.iInputChars++; |
|
413 return iInputBuffer[iInputPos++]; |
|
414 } |
|
415 else |
|
416 return ReadRawCharL(); |
|
417 } |
|
418 |
|
419 /** |
|
420 The method processes the imput characters, writing them to the output, but skipping |
|
421 the picture characters (CEditableText::EPictureCharacter). |
|
422 The method is not called directly and should not be called. It implements |
|
423 MOutputChar::OutputCharL(TChar aChar) and is called from TParagraphTextWriter and |
|
424 TLineTextWriter implementations. |
|
425 @param aChar Character to be processed. |
|
426 */ |
|
427 void TPlainTextReader::OutputCharL(TText aChar) |
|
428 { |
|
429 switch(aChar) |
|
430 { |
|
431 case CEditableText::EByteOrderMark : |
|
432 // leading byte order marks are ignored |
|
433 if(iResult.iInputChars > 1) |
|
434 { |
|
435 WriteRawCharL(aChar); |
|
436 } |
|
437 break; |
|
438 case CEditableText::EPictureCharacter: |
|
439 //Picture characters are ignored because they would cause ETEXT to panic when it attempted to find |
|
440 //the picture corresponding to the character. |
|
441 break; |
|
442 default: |
|
443 WriteRawCharL(aChar); |
|
444 break; |
|
445 } |
|
446 } |
|
447 |