|
1 // Copyright (c) 2001-2009 Nokia Corporation and/or its subsidiary(-ies). |
|
2 // All rights reserved. |
|
3 // This component and the accompanying materials are made available |
|
4 // under the terms of "Eclipse Public License v1.0" |
|
5 // which accompanies this distribution, and is available |
|
6 // at the URL "http://www.eclipse.org/legal/epl-v10.html". |
|
7 // |
|
8 // Initial Contributors: |
|
9 // Nokia Corporation - initial contribution. |
|
10 // |
|
11 // Contributors: |
|
12 // |
|
13 // Description: |
|
14 // |
|
15 |
|
16 #include <s32file.h> |
|
17 |
|
18 #include "CHtmlToCrtConvCharsetConvert.h" |
|
19 #include "CHtmlToCrtConvBuffer.h" |
|
20 #include "CHtmlToCrtConverter.h" |
|
21 #include "MHtmlToCrtConvResourceFile.h" |
|
22 #include <chtmltocrtconverter.rsg> |
|
23 |
|
24 const TInt KSampleBufferSize = 256; |
|
25 const TInt KMetaTagCharsetValueBufferLength = 40; |
|
26 const TInt KMimimumConfidenceLevel = 50; |
|
27 |
|
28 CHtmlToCrtConvCharsetConvert* CHtmlToCrtConvCharsetConvert::NewL(CHtmlToCrtConvBuffer& aBuffer, RFs& aFsSession, CCnvCharacterSetConverter& aCnvCharacterSetConverter, MHtmlToCrtConvResourceFile& aResourceFile, MConverterUiObserver* aObserver) |
|
29 { |
|
30 CHtmlToCrtConvCharsetConvert* self=new(ELeave) CHtmlToCrtConvCharsetConvert(aBuffer, aObserver, aFsSession, aCnvCharacterSetConverter, aResourceFile); |
|
31 CleanupStack::PushL(self); |
|
32 self->ConstructL(); |
|
33 CleanupStack::Pop(self); |
|
34 return self; |
|
35 } |
|
36 |
|
37 void CHtmlToCrtConvCharsetConvert::ConstructL() |
|
38 { |
|
39 iArrayOfCharacterSetsAvailable=CCnvCharacterSetConverter::CreateArrayOfCharacterSetsAvailableL(iFsSession); |
|
40 } |
|
41 |
|
42 CHtmlToCrtConvCharsetConvert::CHtmlToCrtConvCharsetConvert(CHtmlToCrtConvBuffer& aBuffer, MConverterUiObserver* aObserver, RFs& aFsSession, CCnvCharacterSetConverter& aCnvCharacterSetConverter, MHtmlToCrtConvResourceFile& aResourceFile) |
|
43 :iCnvCharacterSetConverter(aCnvCharacterSetConverter) |
|
44 ,iBuffer(aBuffer) |
|
45 ,iObserver(aObserver) |
|
46 ,iResourceFile(aResourceFile) |
|
47 ,iFsSession(aFsSession) |
|
48 { |
|
49 } |
|
50 |
|
51 CHtmlToCrtConvCharsetConvert::~CHtmlToCrtConvCharsetConvert() |
|
52 { |
|
53 delete iArrayOfCharacterSetsAvailable; |
|
54 } |
|
55 |
|
56 //================================================================================== |
|
57 //GetCharSetFromPasswordL - uses MConverterUiObserver::QueryPasswordL |
|
58 //================================================================================== |
|
59 TBool CHtmlToCrtConvCharsetConvert::GetCharSetFromPasswordL() |
|
60 { |
|
61 if(iObserver) |
|
62 { |
|
63 HBufC16* unicodeCharsetName=iObserver->QueryPasswordL(KHtmlToCrtRequestCharset); |
|
64 |
|
65 if(unicodeCharsetName) |
|
66 { |
|
67 //convert from unicode |
|
68 HBufC8* charsetName=HBufC8::NewLC(unicodeCharsetName->Length()); |
|
69 ConvertFromUnicodeL(*unicodeCharsetName, *charsetName); |
|
70 //convert standard name of character set to TUint identifier |
|
71 iCharacterSetIdentifier=iCnvCharacterSetConverter.ConvertStandardNameOfCharacterSetToIdentifierL(*(charsetName), iFsSession); |
|
72 CleanupStack::PopAndDestroy(charsetName); |
|
73 } |
|
74 } |
|
75 return iCharacterSetIdentifier; |
|
76 } |
|
77 |
|
78 //================================================================================== |
|
79 //GetCharSetFromMetaTagL - searches for META tag, then extracts charset value |
|
80 //<META http-equiv="Content-Type" content="text/html; charset = us-ascii"> |
|
81 //================================================================================== |
|
82 TBool CHtmlToCrtConvCharsetConvert::GetCharSetFromMetaTagL() |
|
83 { |
|
84 HBufC8* temp=HBufC8::NewLC(KSampleBufferSize); |
|
85 TPtr8 buffer(temp->Des()); |
|
86 TInt offset=0; |
|
87 iBuffer.GetSampleOfTextFromFileL(buffer, KSampleBufferSize, offset); |
|
88 |
|
89 _LIT8(KMeta, "<meta"); |
|
90 _LIT8(KClosingTagIndicator, ">"); |
|
91 _LIT8(KCharset, "charset"); |
|
92 _LIT8(KEquals, "="); |
|
93 |
|
94 offset=FindAndDeleteString(buffer, KMeta); |
|
95 if(offset>=0)//meta found |
|
96 { |
|
97 //find > |
|
98 TInt offset=buffer.FindF(KClosingTagIndicator); |
|
99 if(offset>=0)//> found |
|
100 { |
|
101 //delete > onwards - Delete() adjusts length so as not to delete beyond end of buffer |
|
102 buffer.Delete(offset, buffer.Length()); |
|
103 |
|
104 //now buffer only contains META tag |
|
105 offset=FindAndDeleteString(buffer, KCharset); |
|
106 if(offset>=0)//charset found |
|
107 { |
|
108 offset=FindAndDeleteString(buffer, KEquals); |
|
109 if(offset>=0)//= found |
|
110 { |
|
111 ParseCharsetValue(buffer); |
|
112 //ConvertStandardNameOfCharacterSetToIdentifierL returns the UID of a |
|
113 //character set for a given Internet-standard name |
|
114 //zero is returned if none found |
|
115 iCharacterSetIdentifier=iCnvCharacterSetConverter.ConvertStandardNameOfCharacterSetToIdentifierL(buffer, iFsSession); |
|
116 } |
|
117 } |
|
118 } |
|
119 } |
|
120 CleanupStack::PopAndDestroy(temp); |
|
121 return iCharacterSetIdentifier; |
|
122 } |
|
123 |
|
124 //================================================================================== |
|
125 //FindAndDeleteString - deletes up to and including string |
|
126 //================================================================================== |
|
127 TInt CHtmlToCrtConvCharsetConvert::FindAndDeleteString(TDes8& aBuffer, const TDesC8& aString) |
|
128 { |
|
129 TInt offset=aBuffer.FindF(aString); |
|
130 TInt length=aString.Length(); |
|
131 |
|
132 if(offset>=0)//string found |
|
133 { |
|
134 //delete up to and including string |
|
135 ASSERT(offset+length <= aBuffer.Length()); |
|
136 aBuffer.Delete(0, offset+length); |
|
137 } |
|
138 return offset; |
|
139 } |
|
140 //================================================================================== |
|
141 //ParseCharsetValue - charset value read until " or white space character seen |
|
142 //================================================================================== |
|
143 void CHtmlToCrtConvCharsetConvert::ParseCharsetValue(TDes8& aBuffer) |
|
144 { |
|
145 aBuffer.TrimLeft(); |
|
146 TInt length=aBuffer.Length(); |
|
147 if(length) |
|
148 { |
|
149 TChar character; |
|
150 TBuf8<KMetaTagCharsetValueBufferLength> charsetBuffer; |
|
151 TBool finishedReadingValue=EFalse; |
|
152 TInt ii=0; |
|
153 while(ii<length && !finishedReadingValue) |
|
154 { |
|
155 character=aBuffer[ii++]; |
|
156 if(character.IsSpace() || character=='"') |
|
157 { |
|
158 finishedReadingValue=ETrue; |
|
159 } |
|
160 else |
|
161 { |
|
162 if(charsetBuffer.Length()<KMetaTagCharsetValueBufferLength) |
|
163 { |
|
164 charsetBuffer.Append(character); |
|
165 } |
|
166 else |
|
167 { |
|
168 ii=length; |
|
169 } |
|
170 } |
|
171 } |
|
172 aBuffer.Copy(charsetBuffer); |
|
173 } |
|
174 } |
|
175 //================================================================================== |
|
176 //GetCharSetUsingAutoDetectL |
|
177 //================================================================================== |
|
178 TBool CHtmlToCrtConvCharsetConvert::GetCharSetUsingAutoDetectL() |
|
179 { |
|
180 HBufC8* temp=HBufC8::NewLC(KSampleBufferSize); |
|
181 TPtr8 buffer(temp->Des()); |
|
182 TInt offset=256; |
|
183 //it's likely that the start of the file will contain the header, so offset |
|
184 //is used to read from a position within the file |
|
185 iBuffer.GetSampleOfTextFromFileL(buffer, KSampleBufferSize, offset); |
|
186 |
|
187 TInt confidence=0; |
|
188 |
|
189 if(buffer.Length()) |
|
190 { |
|
191 iCnvCharacterSetConverter.AutoDetectCharacterSetL(confidence, iCharacterSetIdentifier, *iArrayOfCharacterSetsAvailable, buffer); |
|
192 } |
|
193 |
|
194 CleanupStack::PopAndDestroy(temp); |
|
195 |
|
196 if(iCharacterSetIdentifier == KCharacterSetIdentifierAscii) |
|
197 { |
|
198 //In order to support Latin letters, the character set needs to |
|
199 //be widened from ASCII to Code Page 1252 |
|
200 iCharacterSetIdentifier = KCharacterSetIdentifierCodePage1252; |
|
201 } |
|
202 |
|
203 //confidence in range 0-100 |
|
204 if(confidence<KMimimumConfidenceLevel) |
|
205 { |
|
206 //low level of confidence in the chosen character set, so reset |
|
207 // - this method of determining character set not successful |
|
208 iCharacterSetIdentifier=0; |
|
209 return EFalse; |
|
210 } |
|
211 else |
|
212 return ETrue; |
|
213 } |
|
214 |
|
215 //============================================================= |
|
216 //PrepareForConvertToUnicodeL |
|
217 //============================================================= |
|
218 void CHtmlToCrtConvCharsetConvert::PrepareForConvertToUnicodeL() |
|
219 { |
|
220 if(!iCharacterSetIdentifier) |
|
221 { |
|
222 //get default character set for locale from resource file |
|
223 HBufC* unicodeDefaultCharacterSet=iResourceFile.ReadResourceHBufCLC(R_CNV_DEFAULT_CHARACTER_SET); |
|
224 |
|
225 //convert from unicode |
|
226 HBufC8* defaultCharacterSet=HBufC8::NewLC(unicodeDefaultCharacterSet->Length()); |
|
227 ConvertFromUnicodeL(*unicodeDefaultCharacterSet, *defaultCharacterSet); |
|
228 |
|
229 //convert standard name of character set to TUint identifier |
|
230 iCharacterSetIdentifier=iCnvCharacterSetConverter.ConvertStandardNameOfCharacterSetToIdentifierL(*(defaultCharacterSet), iFsSession); |
|
231 CleanupStack::PopAndDestroy(2); //defaultCharacterSet, unicodeDefaultCharacterSet |
|
232 } |
|
233 |
|
234 iCnvCharacterSetConverter.PrepareToConvertToOrFromL(iCharacterSetIdentifier, *iArrayOfCharacterSetsAvailable, iFsSession); |
|
235 } |
|
236 //============================================================= |
|
237 //ConvertFromUnicodeL |
|
238 //============================================================= |
|
239 void CHtmlToCrtConvCharsetConvert::ConvertFromUnicodeL(const TDesC& aUnicode, HBufC8& aNarrow) |
|
240 { |
|
241 TPtr8 ptrNarrow(aNarrow.Des()); |
|
242 iCnvCharacterSetConverter.PrepareToConvertToOrFromL(KCharacterSetIdentifierAscii, *iArrayOfCharacterSetsAvailable, iFsSession); |
|
243 iCnvCharacterSetConverter.ConvertFromUnicode(ptrNarrow, aUnicode); |
|
244 } |