|
1 /* |
|
2 * Copyright (c) 2007-2010 Sebastian Brannstrom, Lars Persson, EmbedDev AB |
|
3 * |
|
4 * All rights reserved. |
|
5 * This component and the accompanying materials are made available |
|
6 * under the terms of the License "Eclipse Public License v1.0" |
|
7 * which accompanies this distribution, and is available |
|
8 * at the URL "http://www.eclipse.org/legal/epl-v10.html". |
|
9 * |
|
10 * Initial Contributors: |
|
11 * EmbedDev AB - initial contribution. |
|
12 * |
|
13 * Contributors: |
|
14 * |
|
15 * Description: |
|
16 * |
|
17 */ |
|
18 |
|
19 #include "FeedParser.h" |
|
20 #include <f32file.h> |
|
21 #include <bautils.h> |
|
22 #include <s32file.h> |
|
23 #include <charconv.h> |
|
24 #include <xml/stringdictionarycollection.h> |
|
25 #include <utf.h> |
|
26 #include <tinternetdate.h> |
|
27 #include "debug.h" |
|
28 |
|
29 using namespace Xml; |
|
30 const TInt KMaxParseBuffer = 1024; |
|
31 const TInt KMaxStringBuffer = 100; |
|
32 |
|
33 CFeedParser::CFeedParser(MFeedParserObserver& aCallbacks, RFs& aFs) : iCallbacks(aCallbacks), iRfs(aFs) |
|
34 { |
|
35 } |
|
36 |
|
37 CFeedParser::~CFeedParser() |
|
38 { |
|
39 } |
|
40 |
|
41 void CFeedParser::ParseFeedL(const TFileName &feedFileName, CFeedInfo *info, TUint aMaxItems) |
|
42 { |
|
43 //DP1("ParseFeedL BEGIN: %S", &feedFileName); |
|
44 |
|
45 _LIT8(KXmlMimeType, "text/xml"); |
|
46 // Contruct the parser object |
|
47 CParser* parser = CParser::NewLC(KXmlMimeType, *this); |
|
48 iActiveFeed = info; |
|
49 iFeedState = EStateRoot; |
|
50 iActiveShow = NULL; |
|
51 iItemsParsed = 0; |
|
52 iMaxItems = aMaxItems; |
|
53 iStoppedParsing = EFalse; |
|
54 iEncoding = ELatin1; |
|
55 |
|
56 ParseL(*parser, iRfs, feedFileName); |
|
57 |
|
58 CleanupStack::PopAndDestroy(parser); |
|
59 |
|
60 //DP("ParseFeedL END"); |
|
61 } |
|
62 |
|
63 // from MContentHandler |
|
64 void CFeedParser::OnStartDocumentL(const RDocumentParameters& aDocParam, TInt /*aErrorCode*/) |
|
65 { |
|
66 DP("OnStartDocumentL()"); |
|
67 HBufC* charset = HBufC::NewLC(KMaxParseBuffer); |
|
68 charset->Des().Copy(aDocParam.CharacterSetName().DesC()); |
|
69 iEncoding = EUtf8; |
|
70 if (charset->CompareF(_L("utf-8")) == 0) { |
|
71 DP("setting UTF8"); |
|
72 iEncoding = EUtf8; |
|
73 } else if (charset->CompareF(_L("ISO-8859-1")) == 0) { |
|
74 iEncoding = EUtf8; //Latin1; |
|
75 } else { |
|
76 DP1("unknown charset: %S", &charset); |
|
77 } |
|
78 CleanupStack::PopAndDestroy(charset);//buffer |
|
79 } |
|
80 |
|
81 void CFeedParser::OnEndDocumentL(TInt /*aErrorCode*/) |
|
82 { |
|
83 //DP("OnEndDocumentL()"); |
|
84 iCallbacks.ParsingCompleteL(iActiveFeed); |
|
85 } |
|
86 |
|
87 void CFeedParser::OnStartElementL(const RTagInfo& aElement, const RAttributeArray& aAttributes, TInt /*aErrorCode*/) |
|
88 { |
|
89 if (iStoppedParsing) { |
|
90 iActiveShow = NULL; |
|
91 return; |
|
92 } |
|
93 |
|
94 TBuf<KMaxStringBuffer> str; |
|
95 str.Copy(aElement.LocalName().DesC()); |
|
96 //DP2("OnStartElementL START state=%d, element=%S", iFeedState, &str); |
|
97 iBuffer.Zero(); |
|
98 switch (iFeedState) { |
|
99 case EStateRoot: |
|
100 // <channel> |
|
101 if (str.CompareF(KTagChannel) == 0) { |
|
102 iFeedState = EStateChannel; |
|
103 } |
|
104 break; |
|
105 case EStateChannel: |
|
106 // <channel> <item> |
|
107 if(str.CompareF(KTagItem) == 0) { |
|
108 //DP("New item"); |
|
109 iFeedState=EStateItem; |
|
110 |
|
111 iActiveShow = NULL; |
|
112 iActiveShow = CShowInfo::NewL(); |
|
113 if (iActiveShow == NULL) { |
|
114 DP("Out of memory!"); |
|
115 iStoppedParsing = ETrue; |
|
116 return; |
|
117 } |
|
118 iActiveShow->SetFeedUid(iActiveFeed->Uid()); |
|
119 |
|
120 // <channel> <lastBuildDate> |
|
121 } else if (str.CompareF(KTagLastBuildDate) == 0) { |
|
122 DP("LastBuildDate BEGIN"); |
|
123 iFeedState=EStateChannelLastBuildDate; |
|
124 // <channel> <link> |
|
125 }else if (str.CompareF(KTagTitle) == 0) { |
|
126 iFeedState=EStateChannelTitle; |
|
127 // <channel> <link> |
|
128 } else if (str.CompareF(KTagLink) == 0) { |
|
129 iFeedState = EStateChannelLink; |
|
130 // <channel> <description> |
|
131 } else if (str.CompareF(KTagDescription) == 0) { |
|
132 iFeedState=EStateChannelDescription; |
|
133 // <channel> <image> |
|
134 } else if (str.CompareF(KTagImage) == 0) { |
|
135 for (int i=0;i<aAttributes.Count();i++) { |
|
136 RAttribute attr = aAttributes[i]; |
|
137 TBuf<KMaxStringBuffer> attr16; |
|
138 attr16.Copy(attr.Attribute().LocalName().DesC().Left(KMaxStringBuffer)); |
|
139 HBufC* val16 = CnvUtfConverter::ConvertToUnicodeFromUtf8L(attr.Value().DesC().Left(KMaxParseBuffer)); |
|
140 CleanupStack::PushL(val16); |
|
141 |
|
142 // href=... |
|
143 if (attr16.Compare(KTagHref) == 0) { |
|
144 iActiveFeed->SetImageUrlL(*val16); |
|
145 } |
|
146 CleanupStack::PopAndDestroy(val16); |
|
147 } |
|
148 |
|
149 iFeedState=EStateChannelImage; |
|
150 } |
|
151 break; |
|
152 case EStateChannelImage: |
|
153 // <channel> <image> <url> |
|
154 if (str.CompareF(KTagUrl) == 0) { |
|
155 iFeedState=EStateChannelImageUrl; |
|
156 } else { |
|
157 iFeedState=EStateChannelImage; |
|
158 } |
|
159 break; |
|
160 case EStateItem: |
|
161 // <channel> <item> <title> |
|
162 if (str.CompareF(KTagTitle) == 0) { |
|
163 iFeedState=EStateItemTitle; |
|
164 // <channel> <item> <link> |
|
165 } else if (str.CompareF(KTagLink) == 0) { |
|
166 iFeedState=EStateItemLink; |
|
167 // <channel> <item> <enclosure ...> |
|
168 } else if (str.CompareF(KTagEnclosure) == 0) { |
|
169 //DP("Enclosure START"); |
|
170 for (int i=0;i<aAttributes.Count();i++) { |
|
171 RAttribute attr = aAttributes[i]; |
|
172 TBuf<KMaxStringBuffer> attr16; |
|
173 attr16.Copy(attr.Attribute().LocalName().DesC()); |
|
174 // url=... |
|
175 if (attr16.Compare(KTagUrl) == 0) { |
|
176 HBufC* val16 = HBufC::NewLC(KMaxParseBuffer); |
|
177 val16->Des().Copy(attr.Value().DesC()); |
|
178 iActiveShow->SetUrlL(*val16); |
|
179 CleanupStack::PopAndDestroy(val16); |
|
180 // length=... |
|
181 } else if (attr16.Compare(KTagLength) == 0) { |
|
182 TLex8 lex(attr.Value().DesC()); |
|
183 TUint size = 0; |
|
184 lex.Val(size, EDecimal); |
|
185 iActiveShow->SetShowSize(size); |
|
186 } |
|
187 } |
|
188 // <channel> <item> <description> |
|
189 } else if (str.CompareF(KTagDescription) == 0) { |
|
190 iFeedState=EStateItemDescription; |
|
191 // <channel> <item> <pubdate> |
|
192 } else if (str.CompareF(KTagPubDate) == 0) { |
|
193 //DP("LastBuildDate BEGIN"); |
|
194 iFeedState = EStateItemPubDate; |
|
195 } |
|
196 break; |
|
197 default: |
|
198 //DP2("Ignoring tag %S when in state %d", &str, iFeedState); |
|
199 break; |
|
200 } |
|
201 // DP1("OnStartElementL END state=%d", iFeedState); |
|
202 } |
|
203 |
|
204 void CFeedParser::OnEndElementL(const RTagInfo& aElement, TInt /*aErrorCode*/) |
|
205 { |
|
206 |
|
207 if (iStoppedParsing) { |
|
208 return; |
|
209 } |
|
210 |
|
211 iBuffer.Trim(); |
|
212 |
|
213 TDesC8 lName = aElement.LocalName().DesC(); |
|
214 TBuf<KMaxStringBuffer> str; |
|
215 str.Copy(aElement.LocalName().DesC()); |
|
216 |
|
217 //DP2("OnEndElementL START state=%d, element=%S", iFeedState, &str); |
|
218 |
|
219 switch (iFeedState) { |
|
220 case EStateChannelTitle: |
|
221 if(str.CompareF(KTagTitle) == 0) { |
|
222 if (iActiveFeed->CustomTitle() == EFalse) { |
|
223 iActiveFeed->SetTitleL(iBuffer); |
|
224 } |
|
225 iFeedState = EStateChannel; |
|
226 } |
|
227 break; |
|
228 case EStateChannelLink: |
|
229 iActiveFeed->SetLinkL(iBuffer); |
|
230 iFeedState = EStateChannel; |
|
231 break; |
|
232 case EStateChannelDescription: |
|
233 iActiveFeed->SetDescriptionL(iBuffer); |
|
234 iFeedState = EStateChannel; |
|
235 break; |
|
236 case EStateChannelLastBuildDate: |
|
237 { |
|
238 //DP("LastBuildDate END"); |
|
239 TInternetDate internetDate; |
|
240 TBuf8<128> temp; |
|
241 temp.Copy(iBuffer); |
|
242 |
|
243 TRAPD(parseError, internetDate.SetDateL(temp)); |
|
244 if(parseError == KErrNone) { |
|
245 if (TTime(internetDate.DateTime()) > iActiveFeed->BuildDate()) { |
|
246 DP("Successfully parsed build date"); |
|
247 iActiveFeed->SetBuildDate(TTime(internetDate.DateTime())); |
|
248 } else { |
|
249 DP("*** Nothing new, aborting parsing"); |
|
250 iStoppedParsing = ETrue; |
|
251 } |
|
252 } else { |
|
253 DP("Failed to parse last build date"); |
|
254 } |
|
255 iFeedState = EStateChannel; |
|
256 } |
|
257 break; |
|
258 case EStateChannelImageUrl: |
|
259 //DP1("Image url: %S", &iBuffer); |
|
260 iActiveFeed->SetImageUrlL(iBuffer); |
|
261 iFeedState = EStateChannelImage; |
|
262 break; |
|
263 case EStateChannelImage: |
|
264 if(str.CompareF(KTagImage) == 0) { |
|
265 iFeedState = EStateChannel; |
|
266 } |
|
267 break; |
|
268 case EStateItem: |
|
269 if (str.CompareF(KTagItem) == 0) |
|
270 { |
|
271 iCallbacks.NewShowL(*iActiveShow); |
|
272 |
|
273 delete iActiveShow; |
|
274 |
|
275 // We should now be finished with the show. |
|
276 iActiveShow = NULL; |
|
277 |
|
278 iItemsParsed++; |
|
279 //DP2("iItemsParsed: %d, iMaxItems: %d", iItemsParsed, iMaxItems); |
|
280 if (iItemsParsed > iMaxItems) |
|
281 { |
|
282 iStoppedParsing = ETrue; |
|
283 DP("*** Too many items, aborting parsing"); |
|
284 } |
|
285 |
|
286 iFeedState=EStateChannel; |
|
287 } |
|
288 break; |
|
289 case EStateItemPubDate: |
|
290 DP1("PubDate END: iBuffer='%S'", &iBuffer); |
|
291 if (str.CompareF(KTagPubDate) == 0) { |
|
292 // hack for feeds that don't always write day as two digits |
|
293 TChar five(iBuffer[5]); |
|
294 TChar six(iBuffer[6]); |
|
295 |
|
296 if (five.IsDigit() && !six.IsDigit()) { |
|
297 TBuf<KMaxStringBuffer> fix; |
|
298 fix.Copy(iBuffer.Left(4)); |
|
299 fix.Append(_L(" 0")); |
|
300 fix.Append(iBuffer.Mid(5)); |
|
301 iBuffer.Copy(fix); |
|
302 } |
|
303 // end hack |
|
304 |
|
305 // hack for feeds that write out months in full |
|
306 |
|
307 if (iBuffer[11] != ' ') { |
|
308 TPtrC midPtr = iBuffer.Mid(8); |
|
309 |
|
310 int spacePos = midPtr.Find(_L(" ")); |
|
311 |
|
312 if (spacePos != KErrNotFound) { |
|
313 //DP1("Month: %S", &midPtr.Left(spacePos)); |
|
314 |
|
315 TBuf16<KBufferLength> newBuffer; |
|
316 newBuffer.Copy(iBuffer.Left(11)); |
|
317 newBuffer.Append(_L(" ")); |
|
318 newBuffer.Append(iBuffer.Mid(11+spacePos)); |
|
319 //DP1("newBuffer: %S", &newBuffer); |
|
320 iBuffer.Copy(newBuffer); |
|
321 } |
|
322 } |
|
323 |
|
324 // hack for feeds that write days and months as UPPERCASE |
|
325 TChar one(iBuffer[1]); |
|
326 TChar two(iBuffer[2]); |
|
327 TChar nine(iBuffer[9]); |
|
328 TChar ten(iBuffer[10]); |
|
329 |
|
330 one.LowerCase(); |
|
331 two.LowerCase(); |
|
332 nine.LowerCase(); |
|
333 ten.LowerCase(); |
|
334 |
|
335 iBuffer[1] = one; |
|
336 iBuffer[2] = two; |
|
337 iBuffer[9] = nine; |
|
338 iBuffer[10] = ten; |
|
339 |
|
340 TBuf8<128> temp; |
|
341 temp.Copy(iBuffer); |
|
342 |
|
343 TInternetDate internetDate; |
|
344 TRAPD(parseError, internetDate.SetDateL(temp)); |
|
345 if(parseError == KErrNone) { |
|
346 //DP1("PubDate parse success: '%S'", &iBuffer); |
|
347 iActiveShow->SetPubDate(TTime(internetDate.DateTime())); |
|
348 |
|
349 |
|
350 DP6("Successfully parsed pubdate %d/%d/%d %d:%d:%d", |
|
351 iActiveShow->PubDate().DateTime().Year(), |
|
352 iActiveShow->PubDate().DateTime().Month(), |
|
353 iActiveShow->PubDate().DateTime().Day(), |
|
354 iActiveShow->PubDate().DateTime().Hour(), |
|
355 iActiveShow->PubDate().DateTime().Minute(), |
|
356 iActiveShow->PubDate().DateTime().Second()); |
|
357 |
|
358 } else { |
|
359 DP2("Pubdate parse error: '%S', error=%d", &iBuffer, parseError); |
|
360 } |
|
361 } |
|
362 iFeedState=EStateItem; |
|
363 break; |
|
364 case EStateItemTitle: |
|
365 //DP1("title: %S", &iBuffer); |
|
366 iActiveShow->SetTitleL(iBuffer); |
|
367 iFeedState = EStateItem; |
|
368 break; |
|
369 case EStateItemLink: |
|
370 if (iActiveShow->Url().Length() == 0) { |
|
371 iActiveShow->SetUrlL(iBuffer); |
|
372 } |
|
373 iFeedState = EStateItem; |
|
374 break; |
|
375 case EStateItemDescription: |
|
376 iActiveShow->SetDescriptionL(iBuffer); |
|
377 iFeedState = EStateItem; |
|
378 break; |
|
379 default: |
|
380 // fall back to channel level when in doubt |
|
381 iFeedState = EStateChannel; |
|
382 //DP2("Don't know how to handle end tag %S when in state %d", &str, iFeedState); |
|
383 break; |
|
384 } |
|
385 |
|
386 //DP1("OnEndElementL END state=%d", iFeedState); |
|
387 } |
|
388 |
|
389 void CFeedParser::OnContentL(const TDesC8& aBytes, TInt /*aErrorCode*/) |
|
390 { |
|
391 TBuf<KBufferLength> temp; |
|
392 if (iEncoding == EUtf8) { |
|
393 CnvUtfConverter::ConvertToUnicodeFromUtf8(temp, aBytes); |
|
394 } else { |
|
395 temp.Copy(aBytes); |
|
396 } |
|
397 |
|
398 if(temp.Length() + iBuffer.Length() < KBufferLength) { |
|
399 iBuffer.Append(temp); |
|
400 } |
|
401 } |
|
402 |
|
403 void CFeedParser::OnStartPrefixMappingL(const RString& /*aPrefix*/, const RString& /*aUri*/, TInt /*aErrorCode*/) |
|
404 { |
|
405 DP("OnStartPrefixMappingL()"); |
|
406 } |
|
407 |
|
408 void CFeedParser::OnEndPrefixMappingL(const RString& /*aPrefix*/, TInt /*aErrorCode*/) |
|
409 { |
|
410 DP("OnEndPrefixMappingL()"); |
|
411 } |
|
412 |
|
413 void CFeedParser::OnIgnorableWhiteSpaceL(const TDesC8& /*aBytes*/, TInt /*aErrorCode*/) |
|
414 { |
|
415 DP("OnIgnorableWhiteSpaceL()"); |
|
416 } |
|
417 |
|
418 void CFeedParser::OnSkippedEntityL(const RString& /*aName*/, TInt /*aErrorCode*/) |
|
419 { |
|
420 DP("OnSkippedEntityL()"); |
|
421 } |
|
422 |
|
423 void CFeedParser::OnProcessingInstructionL(const TDesC8& /*aTarget*/, const TDesC8& /*aData*/, TInt /*aErrorCode*/) |
|
424 { |
|
425 DP("OnProcessingInstructionL()"); |
|
426 } |
|
427 |
|
428 void CFeedParser::OnError(TInt aErrorCode) |
|
429 { |
|
430 DP1("CFeedParser::OnError %d", aErrorCode); |
|
431 } |
|
432 |
|
433 TAny* CFeedParser::GetExtendedInterface(const TInt32 /*aUid*/) |
|
434 { |
|
435 DP("GetExtendedInterface()"); |
|
436 return NULL; |
|
437 } |
|
438 |
|
439 CFeedInfo& CFeedParser::ActiveFeed() |
|
440 { |
|
441 return *iActiveFeed; |
|
442 } |