author | Simon Howkins <simonh@symbian.org> |
Mon, 15 Nov 2010 14:53:34 +0000 | |
branch | RCL_3 |
changeset 105 | 871af676edac |
parent 95 | d96eed154187 |
permissions | -rw-r--r-- |
94 | 1 |
/* |
2 |
* Copyright (c) 2000 - 2004 Nokia Corporation and/or its subsidiary(-ies). |
|
3 |
* All rights reserved. |
|
4 |
* This component and the accompanying materials are made available |
|
5 |
* under the terms of the License "Eclipse Public License v1.0" |
|
6 |
* which accompanies this distribution, and is available |
|
7 |
* at the URL "http://www.eclipse.org/legal/epl-v10.html". |
|
8 |
* |
|
9 |
* Initial Contributors: |
|
10 |
* Nokia Corporation - initial contribution. |
|
11 |
* |
|
12 |
* Contributors: |
|
13 |
* |
|
14 |
* Description: |
|
15 |
* |
|
16 |
*/ |
|
17 |
||
18 |
/* |
|
19 |
||
20 |
HTML Parser |
|
21 |
||
22 |
*/ |
|
23 |
#ifndef FEA_RME_NOHTMLPARSER |
|
24 |
||
25 |
#include "nwx_defs.h" |
|
26 |
#include "CHtmlpParser.h" |
|
27 |
#include <nw_string_char.h> |
|
28 |
#include "BrsrStatusCodes.h" |
|
29 |
#include "nwx_string.h" |
|
30 |
#include "nw_htmlp_to_wbxml.h" |
|
31 |
#include "nw_htmlp_html_dict.h" |
|
32 |
#include "nw_htmlp_wml_dict.h" |
|
33 |
#include <nw_wbxml_dictionary.h> |
|
34 |
#include <nw_encoder_stringtable.h> |
|
35 |
||
36 |
/* "<?" len 2 */ |
|
37 |
#define NW_HTMLP_String_PiFormStartLength 2 |
|
38 |
static |
|
39 |
const NW_Uint8 NW_HTMLP_String_PiFormStart[NW_HTMLP_String_PiFormStartLength] = |
|
40 |
{ |
|
41 |
'<', '?' |
|
42 |
}; |
|
43 |
||
44 |
/* "?>" len 2 */ |
|
45 |
#define NW_HTMLP_String_PiFormStopLength 2 |
|
46 |
static |
|
47 |
const NW_Uint8 NW_HTMLP_String_PiFormStop[NW_HTMLP_String_PiFormStopLength] = |
|
48 |
{ |
|
49 |
'?', '>' |
|
50 |
}; |
|
51 |
||
52 |
/* "<!DOCTYPE" len 9 */ |
|
53 |
#define NW_HTMLP_String_DoctypeStartLength 9 |
|
54 |
static |
|
55 |
const NW_Uint8 NW_HTMLP_String_DoctypeStart[NW_HTMLP_String_DoctypeStartLength] = |
|
56 |
{ |
|
57 |
'<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E' |
|
58 |
}; |
|
59 |
||
60 |
/* "<!--" len 4 comment start */ |
|
61 |
#define NW_HTMLP_String_CommentStartLength 4 |
|
62 |
static |
|
63 |
const NW_Uint8 NW_HTMLP_String_CommentStart[NW_HTMLP_String_CommentStartLength] = |
|
64 |
{ |
|
65 |
'<', '!', '-', '-' |
|
66 |
}; |
|
67 |
||
68 |
/* "-->" len 3 comment end */ |
|
69 |
#define NW_HTMLP_String_CommentStopLength 3 |
|
70 |
static |
|
71 |
const NW_Uint8 NW_HTMLP_String_CommentStop[NW_HTMLP_String_CommentStopLength] = |
|
72 |
{ |
|
73 |
'-', '-', '>' |
|
74 |
}; |
|
75 |
||
76 |
/* "<!--" len 3 comment start */ |
|
77 |
#define NW_HTMLP_String_ImodeCommentStartLength 3 |
|
78 |
static |
|
79 |
const NW_Uint8 NW_HTMLP_String_ImodeCommentStart[NW_HTMLP_String_ImodeCommentStartLength] = |
|
80 |
{ |
|
81 |
'<', '!', '-' |
|
82 |
}; |
|
83 |
||
84 |
/* "-->" len 3 comment end */ |
|
85 |
#define NW_HTMLP_String_ImodeCommentStopLength 2 |
|
86 |
static |
|
87 |
const NW_Uint8 NW_HTMLP_String_ImodeCommentStop[NW_HTMLP_String_ImodeCommentStopLength] = |
|
88 |
{ |
|
89 |
'-', '>' |
|
90 |
}; |
|
91 |
||
92 |
/* "-->" len 3 comment end */ |
|
93 |
#define NW_HTMLP_String_Comment2StopLength 4 |
|
94 |
static |
|
95 |
const NW_Uint8 NW_HTMLP_String_Comment2Stop[NW_HTMLP_String_Comment2StopLength] = |
|
96 |
{ |
|
97 |
'-', '-','!','>' |
|
98 |
}; |
|
99 |
||
100 |
/* "<!" len 2 comment start */ |
|
101 |
#define NW_HTMLP_String_LooseCommentStartLength 2 |
|
102 |
static |
|
103 |
const NW_Uint8 NW_HTMLP_String_LooseCommentStart[NW_HTMLP_String_LooseCommentStartLength] = |
|
104 |
{ |
|
105 |
'<', '!' |
|
106 |
}; |
|
107 |
||
108 |
/* "<![CDATA[" len 9 */ |
|
109 |
#define NW_HTMLP_String_CdataStartLength 9 |
|
110 |
static |
|
111 |
const NW_Uint8 NW_HTMLP_String_CdataStart[NW_HTMLP_String_CdataStartLength] = |
|
112 |
{ |
|
113 |
'<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[' |
|
114 |
}; |
|
115 |
||
116 |
/* "]]>" len 3 */ |
|
117 |
#define NW_HTMLP_String_CdataStopLength 3 |
|
118 |
static |
|
119 |
const NW_Uint8 NW_HTMLP_String_CdataStop[NW_HTMLP_String_CdataStopLength] = |
|
120 |
{ |
|
121 |
']', ']', '>' |
|
122 |
}; |
|
123 |
||
124 |
/* "/>" len 2 */ |
|
125 |
#define NW_HTMLP_String_MiniEndLength 2 |
|
126 |
static |
|
127 |
const NW_Uint8 NW_HTMLP_String_MiniEnd[NW_HTMLP_String_MiniEndLength] = |
|
128 |
{ |
|
129 |
'/', '>' |
|
130 |
}; |
|
131 |
||
132 |
/* ">" len 1 */ |
|
133 |
#define NW_HTMLP_String_TagEndLength 1 |
|
134 |
static |
|
135 |
const NW_Uint8 NW_HTMLP_String_TagEnd[NW_HTMLP_String_TagEndLength] = |
|
136 |
{ |
|
137 |
'>' |
|
138 |
}; |
|
139 |
||
140 |
/* "</script>" len 9 */ |
|
141 |
||
142 |
#define NW_HTMLP_String_EndScriptLength 9 |
|
143 |
static |
|
144 |
const NW_Uint8 NW_HTMLP_String_EndScript[NW_HTMLP_String_EndScriptLength] = |
|
145 |
{ |
|
146 |
'<', '/','s','c','r','i','p','t','>' |
|
147 |
}; |
|
148 |
||
149 |
// =" or =' len 2 |
|
150 |
#define NW_HTMLP_String_EqualQuoteLength 2 |
|
151 |
static |
|
152 |
const NW_Uint8 NW_HTMLP_String_EqualDblQuote[NW_HTMLP_String_EqualQuoteLength] = |
|
153 |
{ |
|
154 |
'=', '\"' |
|
155 |
}; |
|
156 |
static |
|
157 |
const NW_Uint8 NW_HTMLP_String_EqualSngQuote[NW_HTMLP_String_EqualQuoteLength] = |
|
158 |
{ |
|
159 |
'=', '\'' |
|
160 |
}; |
|
161 |
||
162 |
// TODO: Throughout this class iCBs is checked against null. It appears that iCBs is |
|
163 |
// created in the constructor and not deallocated until the destructor. If |
|
164 |
// this is the case all of the iCBs null check should be removed. |
|
165 |
||
166 |
//lint -esym(794, CHtmlpParser::iCBs) Conceivable use of null pointer |
|
167 |
||
168 |
/* constructors and destructor of CHtmlpParser */ |
|
169 |
||
170 |
CHtmlpParser* CHtmlpParser::NewL( |
|
171 |
NW_Uint32 inputByteCount, |
|
172 |
NW_Uint8* pInputBuf, |
|
173 |
NW_HTMLP_ElementTableIndex_t elementCount, |
|
174 |
NW_HTMLP_ElementDescriptionConst_t* pElementDictionary, |
|
175 |
NW_Uint32 encoding, |
|
176 |
NW_Bool consumeSpaces, |
|
177 |
void * parser, |
|
178 |
NW_Bool isScript) |
|
179 |
{ |
|
180 |
CHtmlpParser * self = (CHtmlpParser *)parser; |
|
181 |
if (!self) |
|
182 |
{ |
|
183 |
self = new (ELeave) CHtmlpParser(consumeSpaces); |
|
184 |
CleanupStack::PushL(self); |
|
185 |
self->ConstructL(inputByteCount, pInputBuf, elementCount, pElementDictionary, encoding, isScript); |
|
186 |
CleanupStack::Pop(); //instance |
|
187 |
return self; |
|
188 |
} |
|
189 |
else |
|
190 |
{ |
|
191 |
User::LeaveIfError( |
|
192 |
NW_HTMLP_Lexer_InitFromBuffer(self->iLexer, inputByteCount, pInputBuf, elementCount, pElementDictionary)); |
|
193 |
NW_HTMLP_Lexer_SetEncoding(self->iLexer, encoding); |
|
194 |
return self; |
|
195 |
} |
|
196 |
} |
|
197 |
||
198 |
CHtmlpParser::CHtmlpParser( NW_Bool consumeSpaces) |
|
199 |
{ |
|
200 |
iLastTextBuf = NULL; |
|
201 |
iLastValid = -1; |
|
202 |
iPreviousValidOutput = NULL; |
|
203 |
iConsumeSpaces = consumeSpaces; |
|
204 |
iRestarted = NW_FALSE; |
|
205 |
iWithinNoscript = NW_FALSE; |
|
206 |
} |
|
207 |
||
208 |
void CHtmlpParser::ConstructL(NW_Uint32 inputByteCount, |
|
209 |
NW_Uint8* pInputBuf, |
|
210 |
NW_HTMLP_ElementTableIndex_t elementCount, |
|
211 |
NW_HTMLP_ElementDescriptionConst_t* pElementDictionary, |
|
212 |
NW_Uint32 encoding, |
|
213 |
NW_Bool isScript) |
|
214 |
{ |
|
215 |
iSPLElemHandling = (NW_HTMLP_SPL_Elem_Handling_t*)NW_Mem_Malloc(sizeof(NW_HTMLP_SPL_Elem_Handling_t)); |
|
216 |
User::LeaveIfNull( iSPLElemHandling ); |
|
217 |
||
218 |
NW_HTMLP_ElementParseState_New(); |
|
219 |
User::LeaveIfNull( iElementParseState ); |
|
220 |
||
221 |
iCBs = (NW_HTMLP_EventCallbacks_t*)NW_Mem_Malloc(sizeof(NW_HTMLP_EventCallbacks_t)); |
|
222 |
User::LeaveIfNull( iCBs ); |
|
223 |
NW_Mem_memset(iCBs, 0, sizeof(NW_HTMLP_EventCallbacks_t)); |
|
224 |
||
225 |
iLexer = (NW_HTMLP_Lexer_t* )NW_Mem_Malloc(sizeof(NW_HTMLP_Lexer_t)); |
|
226 |
/* Reregister the document text with the Reader in preparation for parsing. */ |
|
227 |
User::LeaveIfError( |
|
228 |
NW_HTMLP_Lexer_InitFromBuffer(iLexer, inputByteCount, pInputBuf, elementCount, pElementDictionary)); |
|
229 |
||
230 |
NW_HTMLP_Lexer_SetEncoding(iLexer, encoding); |
|
231 |
||
232 |
/* Null out all WBXML generation callback addresses. */ |
|
233 |
NW_Mem_memset(iCBs, 0, sizeof(NW_HTMLP_EventCallbacks_t)); |
|
234 |
||
235 |
iOrigEncoding = -1; |
|
236 |
iLeftBytes = NULL; |
|
237 |
iIsScript = isScript; |
|
238 |
} |
|
239 |
||
240 |
CHtmlpParser::~CHtmlpParser() |
|
241 |
{ |
|
242 |
NW_Mem_Free(iSPLElemHandling); |
|
243 |
||
244 |
if (iElementParseState) |
|
245 |
{ |
|
246 |
NW_HTMLP_ElementParseState_Delete(&iElementParseState); |
|
247 |
} |
|
248 |
if (iLastValidStack) |
|
249 |
{ |
|
250 |
NW_HTMLP_ElementParseState_Delete(&iLastValidStack); |
|
251 |
} |
|
252 |
||
253 |
if (iCBs->charsetContext) |
|
254 |
{ |
|
255 |
NW_Mem_Free(iCBs->charsetContext); |
|
256 |
} |
|
257 |
NW_Mem_Free(iCBs); |
|
258 |
||
259 |
NW_Mem_Free ((void *)iLexer); |
|
260 |
NW_Buffer_Free(iLastTextBuf); |
|
261 |
NW_Buffer_Free(iPreviousValidOutput); |
|
262 |
NW_Buffer_Free(iVisitedHeadText); |
|
263 |
} |
|
264 |
||
265 |
||
266 |
/* |
|
267 |
on entry: lexer read position is at the first character of keyword |
|
268 |
on return: If matched keyword, then *pMatch == NW_TRUE, interval marks |
|
269 |
.........: keyword in doc and lexer read position is just after keyword. |
|
270 |
.........: NOTE: Keyword match just means the string of keyword chars |
|
271 |
.........: exists at the read position so it does not mean that the keyword |
|
272 |
.........: is delimited at the end---it might be followed by more name chars. |
|
273 |
.........: If did not match keyword, then *pMatch == NW_FALSE and lexer |
|
274 |
.........: read position is unchanged from on entry. |
|
275 |
eof handling: if encounters EOF while attempting operation then returns |
|
276 |
............: *pMatch == NW_FALSE and KBrsrSuccess |
|
277 |
on error return: return value is not KBrsrSuccess, *pMatch == NW_FALSE |
|
278 |
...............: and lexer read position is unspecified |
|
279 |
*/ |
|
280 |
TBrowserStatusCode CHtmlpParser::NW_HTMLP_IfExistsConsumeKeywordCase(NW_HTMLP_Interval_t* pI, |
|
281 |
NW_Uint32 asciiCharCount, |
|
282 |
const NW_Uint8* pKeyword, |
|
283 |
NW_Bool CaseSensitive, |
|
284 |
NW_Bool* pMatch) |
|
285 |
{ |
|
286 |
NW_HTMLP_Lexer_Position_t position; |
|
287 |
TBrowserStatusCode e; |
|
288 |
NW_Bool match; |
|
289 |
||
290 |
*pMatch = NW_FALSE; |
|
291 |
NW_HTMLP_Lexer_GetPosition(iLexer, &position); |
|
292 |
NW_HTMLP_Interval_Init(pI); |
|
293 |
e = NW_HTMLP_Lexer_AsciiStringCompareCase(iLexer, asciiCharCount, pKeyword, CaseSensitive, &match); |
|
294 |
if (e == KBrsrSuccess) { |
|
295 |
if (match == NW_TRUE) { |
|
296 |
NW_HTMLP_Interval_Start(pI, iLexer); |
|
297 |
e = NW_HTMLP_Lexer_AdvanceOffset(iLexer, asciiCharCount); |
|
298 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
299 |
NW_HTMLP_Interval_Init(pI); |
|
300 |
return e; |
|
301 |
} |
|
302 |
NW_HTMLP_Interval_Stop(pI, iLexer); |
|
303 |
*pMatch = NW_TRUE; |
|
304 |
} else { |
|
305 |
NW_HTMLP_Lexer_SetPosition(iLexer, &position); |
|
306 |
} |
|
307 |
} |
|
308 |
return e; |
|
309 |
} |
|
310 |
||
311 |
TBrowserStatusCode CHtmlpParser::NW_HTMLP_IfExistsConsumeKeyword(NW_HTMLP_Interval_t* pI, |
|
312 |
NW_Uint32 asciiCharCount, |
|
313 |
const NW_Uint8* pKeyword, |
|
314 |
NW_Bool* pMatch) |
|
315 |
{ |
|
316 |
||
317 |
return NW_HTMLP_IfExistsConsumeKeywordCase( pI, |
|
318 |
asciiCharCount, |
|
319 |
pKeyword, |
|
320 |
NW_TRUE, |
|
321 |
pMatch); |
|
322 |
} |
|
323 |
||
324 |
/* |
|
325 |
on entry: no assumptions |
|
326 |
on return: Lexer read position moved ahead until character at current |
|
327 |
.........: read position is not whitespace. Return value is KBrsrSuccess. |
|
328 |
eof handling: if encounters EOF then stops at that point and returns |
|
329 |
............: KBrsrSuccess |
|
330 |
on error return: return value is not KBrsrSuccess and lexer read position |
|
331 |
...............: is unspecified |
|
332 |
*/ |
|
333 |
||
334 |
TBrowserStatusCode CHtmlpParser::NW_HTMLP_SkipSpace() |
|
335 |
{ |
|
336 |
TBrowserStatusCode e = KBrsrSuccess; |
|
337 |
NW_Bool match = NW_FALSE; |
|
338 |
||
339 |
while (!NW_HTMLP_Lexer_AtEnd(iLexer)) { |
|
340 |
e = NW_HTMLP_Lexer_IsSpace(iLexer, &match); |
|
341 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
342 |
return e; |
|
343 |
} |
|
344 |
if (match == NW_TRUE) { |
|
345 |
e = NW_HTMLP_Lexer_Advance(iLexer); |
|
346 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
347 |
return e; |
|
348 |
} |
|
349 |
} else { |
|
350 |
break; |
|
351 |
} |
|
352 |
} |
|
353 |
return e; |
|
354 |
} |
|
355 |
||
356 |
/* |
|
357 |
on entry: no assumptions |
|
358 |
on return: Lexer read position moved ahead until character at current |
|
359 |
.........: read position is not whitespace or a 'junk' character. |
|
360 |
.........: The junk characters were found on live websites. |
|
361 |
.........: Return value is KBrsrSuccess. |
|
362 |
eof handling: if encounters EOF then stops at that point and returns |
|
363 |
............: KBrsrSuccess |
|
364 |
on error return: return value is not KBrsrSuccess and lexer read position |
|
365 |
...............: is unspecified |
|
366 |
*/ |
|
367 |
||
368 |
TBrowserStatusCode CHtmlpParser::NW_HTMLP_SkipJunk() |
|
369 |
{ |
|
370 |
TBrowserStatusCode e = KBrsrSuccess; |
|
371 |
NW_Bool match = NW_FALSE; |
|
372 |
||
373 |
while (!NW_HTMLP_Lexer_AtEnd(iLexer)) |
|
374 |
{ |
|
375 |
||
376 |
e = NW_HTMLP_Lexer_IsSpace(iLexer, &match); |
|
377 |
if (BRSR_STAT_IS_FAILURE(e)) |
|
378 |
{ |
|
379 |
return e; |
|
380 |
} |
|
381 |
||
382 |
if (match == NW_FALSE) |
|
383 |
{ |
|
384 |
e = NW_HTMLP_Lexer_AsciiCharCompare(iLexer, ';', &match); |
|
385 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
386 |
return e; |
|
387 |
} |
|
388 |
} |
|
389 |
||
390 |
if (match == NW_FALSE) |
|
391 |
{ |
|
392 |
e = NW_HTMLP_Lexer_AsciiCharCompare(iLexer, '+', &match); |
|
393 |
if (BRSR_STAT_IS_FAILURE(e)) |
|
394 |
{ |
|
395 |
return e; |
|
396 |
} |
|
397 |
} |
|
398 |
||
399 |
if (match == NW_TRUE) |
|
400 |
{ |
|
401 |
e = NW_HTMLP_Lexer_Advance(iLexer); |
|
402 |
if (BRSR_STAT_IS_FAILURE(e)) |
|
403 |
{ |
|
404 |
return e; |
|
405 |
} |
|
406 |
} |
|
407 |
else |
|
408 |
{ |
|
409 |
break; |
|
410 |
} |
|
411 |
} |
|
412 |
return e; |
|
413 |
} |
|
414 |
||
415 |
/* |
|
416 |
on entry: no assumptions |
|
417 |
on return: Lexer read position moved ahead until character at current |
|
418 |
.........: read position is not CR or LF. Return value is KBrsrSuccess. |
|
419 |
eof handling: if encounters EOF then stops at that point and returns |
|
420 |
............: KBrsrSuccess |
|
421 |
on error return: return value is not KBrsrSuccess and lexer read position |
|
422 |
...............: is unspecified |
|
423 |
*/ |
|
424 |
||
425 |
TBrowserStatusCode CHtmlpParser::NW_HTMLP_SkipCRLF() |
|
426 |
{ |
|
427 |
TBrowserStatusCode e = KBrsrSuccess; |
|
428 |
NW_Bool match = NW_FALSE; |
|
429 |
||
430 |
while (!NW_HTMLP_Lexer_AtEnd(iLexer)) { |
|
431 |
e = NW_HTMLP_Lexer_IsCRLF(iLexer, &match); |
|
432 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
433 |
return e; |
|
434 |
} |
|
435 |
if (match == NW_TRUE) { |
|
436 |
e = NW_HTMLP_Lexer_Advance(iLexer); |
|
437 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
438 |
return e; |
|
439 |
} |
|
440 |
} else { |
|
441 |
break; |
|
442 |
} |
|
443 |
} |
|
444 |
return e; |
|
445 |
} |
|
446 |
||
447 |
/* |
|
448 |
A name (e.g., tag name) matches the pattern "[a-zA-Z][a-zA-Z0-9.-_:]*\s" |
|
449 |
Assumes starting read position is at the first character of name. |
|
450 |
On return: *pMatch = NW_TRUE if found name, NW_FALSE if no name parsed |
|
451 |
On return: If *pMatch == NW_TRUE, then *pI marks the name. |
|
452 |
||
453 |
TBD: loosen this up to allow the pattern [a-zA-Z][^/?]*\s |
|
454 |
don't allow ? due to ?> on PI forms |
|
455 |
don't allow / due to /> on empty element |
|
456 |
*/ |
|
457 |
||
458 |
TBrowserStatusCode CHtmlpParser::NW_HTMLP_ParseName(NW_Bool* pMatch, |
|
459 |
NW_HTMLP_Interval_t* pI) |
|
460 |
{ |
|
461 |
NW_HTMLP_Lexer_Position_t position; |
|
462 |
TBrowserStatusCode sl = KBrsrSuccess; |
|
463 |
NW_Bool isLetter = NW_FALSE; |
|
464 |
TBrowserStatusCode sd = KBrsrSuccess; |
|
465 |
NW_Bool isDigit = NW_FALSE; |
|
466 |
TBrowserStatusCode sp = KBrsrSuccess; |
|
467 |
NW_Bool isPeriod = NW_FALSE; |
|
468 |
TBrowserStatusCode sh = KBrsrSuccess; |
|
469 |
NW_Bool isHyphen = NW_FALSE; |
|
470 |
TBrowserStatusCode su = KBrsrSuccess; |
|
471 |
NW_Bool isUnderscore = NW_FALSE; |
|
472 |
TBrowserStatusCode sc = KBrsrSuccess; |
|
473 |
NW_Bool isColon = NW_FALSE; |
|
474 |
TBrowserStatusCode s = KBrsrSuccess; |
|
475 |
||
476 |
*pMatch = NW_FALSE; |
|
477 |
NW_HTMLP_Lexer_GetPosition(iLexer, &position); |
|
478 |
NW_HTMLP_Interval_Init(pI); |
|
479 |
sl = NW_HTMLP_Lexer_IsAsciiLetter(iLexer, &isLetter); |
|
480 |
if (!BRSR_STAT_IS_FAILURE(sl)) { |
|
481 |
if (isLetter) { |
|
482 |
NW_HTMLP_Interval_Start(pI, iLexer); |
|
483 |
while ((!BRSR_STAT_IS_FAILURE(sl) && !BRSR_STAT_IS_FAILURE(su) |
|
484 |
&& !BRSR_STAT_IS_FAILURE(sc) && !BRSR_STAT_IS_FAILURE(sd) |
|
485 |
&& !BRSR_STAT_IS_FAILURE(sp) && !BRSR_STAT_IS_FAILURE(sh) |
|
486 |
&& !BRSR_STAT_IS_FAILURE(s)) |
|
487 |
&& (isLetter || isDigit || isPeriod || isHyphen |
|
488 |
|| isUnderscore || isColon)) { |
|
489 |
s = NW_HTMLP_Lexer_Advance(iLexer); |
|
490 |
sl = NW_HTMLP_Lexer_IsAsciiLetter(iLexer, &isLetter); |
|
491 |
sd = NW_HTMLP_Lexer_IsAsciiDigit(iLexer, &isDigit); |
|
492 |
sp = NW_HTMLP_Lexer_AsciiCharCompare(iLexer, '.', &isPeriod); |
|
493 |
sh = NW_HTMLP_Lexer_AsciiCharCompare(iLexer, '-', &isHyphen); |
|
494 |
su = NW_HTMLP_Lexer_AsciiCharCompare(iLexer, '_', &isUnderscore); |
|
495 |
sc = NW_HTMLP_Lexer_AsciiCharCompare(iLexer, ':', &isColon); |
|
496 |
} |
|
497 |
NW_HTMLP_Interval_Stop(pI, iLexer); |
|
498 |
*pMatch = NW_TRUE; |
|
499 |
} |
|
500 |
} |
|
501 |
if (BRSR_STAT_IS_FAILURE(sl) || BRSR_STAT_IS_FAILURE(su) |
|
502 |
|| BRSR_STAT_IS_FAILURE(sc) || BRSR_STAT_IS_FAILURE(sd) |
|
503 |
|| BRSR_STAT_IS_FAILURE(sp) || BRSR_STAT_IS_FAILURE(sh) |
|
504 |
|| BRSR_STAT_IS_FAILURE(s)) { |
|
505 |
return KBrsrFailure; |
|
506 |
} |
|
507 |
if (*pMatch == NW_FALSE) { |
|
508 |
s = NW_HTMLP_Lexer_SetPosition(iLexer, &position); |
|
509 |
if (BRSR_STAT_IS_FAILURE(s)) { |
|
510 |
return s; |
|
511 |
} |
|
512 |
} |
|
513 |
return KBrsrSuccess; |
|
514 |
} |
|
515 |
||
516 |
/* |
|
517 |
on entry: lexer read position is just after '<!--' |
|
518 |
on return: If skipped comment, then *pMatch == NW_TRUE and |
|
519 |
.........: lexer read position is just after closing '>'. |
|
520 |
.........: If did not skip comment, then *pMatch == NW_FALSE and |
|
521 |
.........: lexer read position is unspecified. |
|
522 |
.........: In either case return value is KBrsrSuccess. |
|
523 |
eof handling: if encounters EOF while attempting operation then returns |
|
524 |
............: *pMatch == NW_FALSE and KBrsrSuccess with lexer read |
|
525 |
............: position at EOF |
|
526 |
on error return: return value is not KBrsrSuccess, *pMatch == NW_FALSE |
|
527 |
...............: and lexer read position is unspecified |
|
528 |
*/ |
|
529 |
TBrowserStatusCode CHtmlpParser::NW_HTMLP_FinishComment(NW_Bool* pMatch, |
|
530 |
NW_Bool imodeComment, |
|
531 |
NW_Bool looseComment) |
|
532 |
{ |
|
533 |
NW_HTMLP_Lexer_Position_t position; |
|
534 |
NW_HTMLP_Interval_t interval; |
|
535 |
TBrowserStatusCode e; |
|
536 |
NW_Bool match = NW_FALSE; |
|
537 |
NW_Uint8* pKeyword = NULL; |
|
538 |
NW_Uint8* pKeyword2 = NULL; |
|
539 |
NW_Uint32 count = 0; |
|
540 |
NW_Uint32 count2 = 0; |
|
541 |
NW_Bool foundFirstElement = NW_FALSE; |
|
542 |
NW_HTMLP_Lexer_Position_t fistElementPosition; |
|
543 |
TBrowserStatusCode status = KBrsrSuccess; |
|
544 |
||
545 |
/* Does three forms of comment: |
|
546 |
"strict" where comment must end with exactly '-->' |
|
547 |
(no whitespace allowed between '--' and '>' as in specification) |
|
548 |
"imode" where comments ends at "->" |
|
549 |
"loose" where comment ends at first '>' |
|
550 |
First apply "strict" and if that fails to match, then apply "loose". |
|
551 |
||
552 |
Note: This is slightly different than the specifications but conforms |
|
553 |
to common browser behavior. */ |
|
554 |
||
555 |
*pMatch = NW_FALSE; |
|
556 |
NW_HTMLP_Lexer_GetPosition(iLexer, &position); |
|
557 |
||
558 |
if(looseComment) |
|
559 |
{ |
|
560 |
pKeyword =(NW_Uint8*)&NW_HTMLP_String_TagEnd; |
|
561 |
count = NW_HTMLP_String_TagEndLength; |
|
562 |
} |
|
563 |
else if(imodeComment) |
|
564 |
{ |
|
565 |
pKeyword = (NW_Uint8*) &NW_HTMLP_String_ImodeCommentStop; |
|
566 |
count = NW_HTMLP_String_ImodeCommentStopLength; |
|
567 |
} |
|
568 |
else |
|
569 |
{ |
|
570 |
pKeyword = (NW_Uint8*) &NW_HTMLP_String_CommentStop; |
|
571 |
count = NW_HTMLP_String_CommentStopLength; |
|
572 |
pKeyword2 = (NW_Uint8*) &NW_HTMLP_String_Comment2Stop; |
|
573 |
count2 = NW_HTMLP_String_Comment2StopLength; |
|
574 |
||
575 |
} |
|
576 |
||
577 |
while (!NW_HTMLP_Lexer_AtEnd(iLexer)) |
|
578 |
{ |
|
579 |
// If the loose, imode, or normal end-comment is found comsume |
|
580 |
// it and set pMatch to true. |
|
581 |
e = NW_HTMLP_IfExistsConsumeKeyword(&interval, count, |
|
582 |
pKeyword, &match); |
|
583 |
if (BRSR_STAT_IS_FAILURE(e)) |
|
584 |
{ |
|
585 |
status = e; |
|
586 |
break; |
|
587 |
} |
|
588 |
||
589 |
if (match == NW_TRUE) |
|
590 |
{ |
|
591 |
*pMatch = NW_TRUE; |
|
592 |
break; |
|
593 |
} |
|
594 |
||
595 |
// If the "--!>" end-comment is found comsume it and set |
|
596 |
// pMatch to true. |
|
597 |
if (pKeyword2) |
|
598 |
{ |
|
599 |
e = NW_HTMLP_IfExistsConsumeKeyword(&interval, count2, |
|
600 |
pKeyword2, &match); |
|
601 |
if (BRSR_STAT_IS_FAILURE(e)) |
|
602 |
{ |
|
603 |
status = e; |
|
604 |
break; |
|
605 |
} |
|
606 |
} |
|
607 |
if (match == NW_TRUE) |
|
608 |
{ |
|
609 |
*pMatch = NW_TRUE; |
|
610 |
break; |
|
611 |
} |
|
612 |
||
613 |
// As it searches for the end-comment it also keeps track of |
|
614 |
// the first embeded tag. This tag is later used to force end |
|
615 |
// the comment if it reaches the end of the file before finding |
|
616 |
// a end-comment. |
|
617 |
if (!foundFirstElement) |
|
618 |
{ |
|
619 |
e = NW_HTMLP_Lexer_AsciiCharCompare(iLexer, '<', &match); |
|
620 |
if (BRSR_STAT_IS_FAILURE(e)) |
|
621 |
{ |
|
622 |
status = e; |
|
623 |
break; |
|
624 |
} |
|
625 |
if (match == NW_TRUE) |
|
626 |
{ |
|
627 |
foundFirstElement = NW_TRUE; |
|
628 |
NW_HTMLP_Lexer_GetPosition(iLexer, &fistElementPosition); |
|
629 |
} |
|
630 |
} |
|
631 |
||
632 |
// Otherwise advance a char and test for end-comment again. |
|
633 |
e = NW_HTMLP_Lexer_Advance(iLexer); |
|
634 |
||
635 |
if (BRSR_STAT_IS_FAILURE(e)) |
|
636 |
{ |
|
637 |
status = e; |
|
638 |
break; |
|
639 |
} |
|
640 |
}/*end while*/ |
|
641 |
||
642 |
||
643 |
// If end-comment wasn't found in this block, it may be missing. |
|
644 |
if ((match == NW_FALSE) && NW_HTMLP_Lexer_AtEnd(iLexer) && foundFirstElement) |
|
645 |
{ |
|
646 |
// If the document is completed then terminate the comment just |
|
647 |
// before the first start tag after begin-comment. |
|
648 |
if (iDocComplete) |
|
649 |
{ |
|
650 |
NW_HTMLP_Lexer_SetPosition(iLexer, &fistElementPosition); |
|
651 |
*pMatch = NW_TRUE; |
|
652 |
} |
|
653 |
||
654 |
// Otherwise note that we may be tracking a unterminated comment. |
|
655 |
else |
|
656 |
{ |
|
657 |
iTrackingUnTerminatedComment = NW_TRUE; |
|
658 |
} |
|
659 |
} |
|
660 |
||
661 |
||
662 |
return status; |
|
663 |
} |
|
664 |
||
665 |
||
666 |
/* |
|
667 |
on entry: assumes lexer read position is character following '<![CDATA[?' |
|
668 |
on return: If consumed a well-formed CDATA, then *pMatch == NW_TRUE and |
|
669 |
.........: lexer read position is just after closing ']]>'. |
|
670 |
.........: If did not consume CDATA, then *pMatch == NW_FALSE and |
|
671 |
.........: lexer read position is unspecified. |
|
672 |
.........: In either case return value is KBrsrSuccess. |
|
673 |
eof handling: if encounters EOF while attempting operation then returns |
|
674 |
............: *pMatch == NW_FALSE and KBrsrSuccess with lexer read |
|
675 |
............: position at EOF |
|
676 |
on error return: return value is not KBrsrSuccess, *pMatch == NW_FALSE |
|
677 |
...............: and lexer read position is unspecified |
|
678 |
*/ |
|
679 |
||
680 |
TBrowserStatusCode CHtmlpParser::NW_HTMLP_FinishCdata(NW_HTMLP_Interval_t* pInterval, |
|
681 |
NW_Bool* pMatch) |
|
682 |
{ |
|
683 |
TBrowserStatusCode e; |
|
684 |
NW_HTMLP_Interval_t interval; |
|
685 |
NW_Bool match; |
|
686 |
||
687 |
*pMatch = NW_FALSE; |
|
688 |
NW_HTMLP_Interval_Start(pInterval, iLexer); |
|
689 |
||
690 |
while (!NW_HTMLP_Lexer_AtEnd(iLexer)) { |
|
691 |
NW_HTMLP_Interval_Stop(pInterval, iLexer); |
|
692 |
/* end with match if see ']]>' */ |
|
693 |
e = NW_HTMLP_IfExistsConsumeKeyword(&interval, |
|
694 |
NW_HTMLP_String_CdataStopLength, |
|
695 |
NW_HTMLP_String_CdataStop, |
|
696 |
&match); |
|
697 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
698 |
return e; |
|
699 |
} |
|
700 |
if (match == NW_TRUE) { |
|
701 |
*pMatch = NW_TRUE; |
|
702 |
break; |
|
703 |
} |
|
704 |
/* advance */ |
|
705 |
e = NW_HTMLP_Lexer_Advance(iLexer); |
|
706 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
707 |
return e; |
|
708 |
} |
|
709 |
} |
|
710 |
return KBrsrSuccess; |
|
711 |
} |
|
712 |
||
713 |
/* |
|
714 |
on entry: assumes lexer read position is character following '<!DOCTYPE' |
|
715 |
on return: If consumed a well-formed DOCTYPE, then *pMatch == NW_TRUE and |
|
716 |
.........: lexer read position is just after closing '>'. |
|
717 |
.........: If did not consume a DOCTYPE, then *pMatch == NW_FALSE and |
|
718 |
.........: lexer read position is unspecified. |
|
719 |
.........: In either case return value is KBrsrSuccess. |
|
720 |
eof handling: if encounters EOF while attempting operation then returns |
|
721 |
............: *pMatch == NW_FALSE and KBrsrSuccess with lexer read |
|
722 |
............: position at EOF |
|
723 |
on error return: return value is not KBrsrSuccess, *pMatch == NW_FALSE |
|
724 |
...............: and lexer read position is unspecified |
|
725 |
*/ |
|
726 |
||
727 |
TBrowserStatusCode CHtmlpParser::NW_HTMLP_FinishDoctype(NW_Bool* pMatch, |
|
728 |
NW_HTMLP_Interval_t* doctypeInterval) |
|
729 |
{ |
|
730 |
/* Because DOCTYPE attributes may include strings and inside these strings |
|
731 |
the characters '>' and '<' may be used, strings must be parsed. Strings are |
|
732 |
quoted either with '\'' or '\"'. */ |
|
733 |
TBrowserStatusCode e; |
|
734 |
NW_HTMLP_Interval_t interval; |
|
735 |
NW_HTMLP_Interval_t arguments; |
|
736 |
NW_HTMLP_Lexer_Position_t position; |
|
737 |
||
738 |
*pMatch = NW_FALSE; |
|
739 |
e = NW_HTMLP_SkipSpace(); |
|
740 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
741 |
return e; |
|
742 |
} |
|
743 |
||
744 |
NW_HTMLP_Interval_Start(&arguments, iLexer); |
|
745 |
*pMatch = NW_FALSE; |
|
746 |
||
747 |
while (!NW_HTMLP_Lexer_AtEnd(iLexer)) { |
|
748 |
/* end with match if see '>' */ |
|
749 |
NW_HTMLP_Interval_Stop(&arguments, iLexer); /* keep moving along end */ |
|
750 |
||
751 |
e = NW_HTMLP_IfExistsConsumeKeyword(&interval, |
|
752 |
NW_HTMLP_String_TagEndLength, |
|
753 |
NW_HTMLP_String_TagEnd, |
|
754 |
pMatch); |
|
755 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
756 |
return e; |
|
757 |
} |
|
758 |
if (*pMatch == NW_TRUE){ |
|
759 |
if ((iCBs != NULL) && (iCBs->piFormCB != NULL)) { |
|
760 |
NW_Uint32 cp_cnt = 0; |
|
761 |
e = (*(iCBs->piFormCB))(iLexer, doctypeInterval, &arguments, iCBs->pClientPointer, &cp_cnt); |
|
762 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
763 |
return e; |
|
764 |
} |
|
765 |
updateCurrentCP(cp_cnt); |
|
766 |
} |
|
767 |
break; |
|
768 |
} |
|
769 |
/* advance */ |
|
770 |
e = NW_HTMLP_Lexer_Advance(iLexer); |
|
771 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
772 |
return e; |
|
773 |
} |
|
774 |
} |
|
775 |
if (*pMatch == NW_FALSE) { |
|
776 |
e = NW_HTMLP_Lexer_SetPosition(iLexer, &position); |
|
777 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
778 |
return e; |
|
779 |
} |
|
780 |
} |
|
781 |
return KBrsrSuccess; |
|
782 |
} |
|
783 |
||
784 |
/* |
|
785 |
on entry: lexer read position is at '<' |
|
786 |
on return: If consumed a comment, then *pMatch == NW_TRUE and |
|
787 |
.........: lexer read position is just after closing '>'. |
|
788 |
.........: If did not consume a comment, then *pMatch == NW_FALSE and |
|
789 |
.........: lexer read position is unchanged. |
|
790 |
.........: In either case return value is KBrsrSuccess. |
|
791 |
.........: There are three types of comments are handled here: |
|
792 |
.........: (1) Exact syntax comment (<!-- Some Text --> ) |
|
793 |
.........: (2) Imode Comments (<!- Some Text -> ) |
|
794 |
.........: (3) Loose Comments (<! Some Text > ) |
|
795 |
.........: Note: Loose Comments must avoid consuming <!DOCTYPE ...> and <![CDATA ...]]>. |
|
796 |
eof handling: if encounters EOF while attempting operation then returns |
|
797 |
............: *pMatch == NW_FALSE and KBrsrSuccess with lexer read |
|
798 |
............: position unchanged |
|
799 |
on error return: return value is not KBrsrSuccess, *pMatch == NW_FALSE |
|
800 |
...............: and lexer read position is unspecified |
|
801 |
*/ |
|
802 |
||
803 |
TBrowserStatusCode CHtmlpParser::NW_HTMLP_IfLegalConsumeComment(NW_Bool* pMatch) |
|
804 |
{ |
|
805 |
NW_HTMLP_Lexer_Position_t position; |
|
806 |
NW_HTMLP_Interval_t interval; |
|
807 |
TBrowserStatusCode e; |
|
808 |
NW_Bool match; |
|
809 |
NW_Bool imodeComment = NW_FALSE; |
|
810 |
NW_Bool looseComment = NW_FALSE; |
|
811 |
NW_Bool isNotComment = NW_FALSE; |
|
812 |
||
813 |
iTrackingUnTerminatedComment = NW_FALSE; |
|
814 |
*pMatch = NW_FALSE; |
|
815 |
NW_HTMLP_Lexer_GetPosition(iLexer, &position); |
|
816 |
||
817 |
// Check first for correct syntax comments. |
|
818 |
e = NW_HTMLP_IfExistsConsumeKeyword(&interval, |
|
819 |
NW_HTMLP_String_CommentStartLength, |
|
820 |
NW_HTMLP_String_CommentStart, |
|
821 |
&match); |
|
822 |
if (BRSR_STAT_IS_FAILURE(e)) |
|
823 |
{ |
|
824 |
return e; |
|
825 |
} |
|
826 |
||
827 |
// Check for IMode type of comments. |
|
828 |
if (match == NW_FALSE) |
|
829 |
{ |
|
830 |
e = NW_HTMLP_IfExistsConsumeKeyword(&interval, |
|
831 |
NW_HTMLP_String_ImodeCommentStartLength, |
|
832 |
NW_HTMLP_String_ImodeCommentStart, |
|
833 |
&match); |
|
834 |
if (BRSR_STAT_IS_FAILURE(e)) |
|
835 |
{ |
|
836 |
return e; |
|
837 |
} |
|
838 |
if (match == NW_TRUE) |
|
839 |
imodeComment = NW_TRUE; |
|
840 |
} |
|
841 |
||
842 |
||
843 |
// Check for <!DOCTYPE before loose comment |
|
844 |
if (match == NW_FALSE) |
|
845 |
{ |
|
846 |
e = NW_HTMLP_IfExistsConsumeKeyword(&interval, |
|
847 |
NW_HTMLP_String_DoctypeStartLength, |
|
848 |
NW_HTMLP_String_DoctypeStart, |
|
849 |
&match); |
|
850 |
if (BRSR_STAT_IS_FAILURE(e)) |
|
851 |
{ |
|
852 |
return e; |
|
853 |
} |
|
854 |
if (match == NW_TRUE) |
|
855 |
{ |
|
856 |
isNotComment = NW_TRUE; |
|
857 |
e = NW_HTMLP_Lexer_SetPosition(iLexer, &position); |
|
858 |
if (BRSR_STAT_IS_FAILURE(e)) |
|
859 |
{ |
|
860 |
return e; |
|
861 |
} |
|
862 |
} |
|
863 |
} |
|
864 |
||
865 |
// Check for <![CDATA before loose comment |
|
866 |
if (match == NW_FALSE && isNotComment == NW_FALSE) |
|
867 |
{ |
|
868 |
e = NW_HTMLP_IfExistsConsumeKeyword(&interval, |
|
869 |
NW_HTMLP_String_CdataStartLength, |
|
870 |
NW_HTMLP_String_CdataStart, |
|
871 |
&match); |
|
872 |
if (BRSR_STAT_IS_FAILURE(e)) |
|
873 |
{ |
|
874 |
return e; |
|
875 |
} |
|
876 |
if (match == NW_TRUE) |
|
877 |
{ |
|
878 |
isNotComment = NW_TRUE; |
|
879 |
e = NW_HTMLP_Lexer_SetPosition(iLexer, &position); |
|
880 |
if (BRSR_STAT_IS_FAILURE(e)) |
|
881 |
{ |
|
882 |
return e; |
|
883 |
} |
|
884 |
} |
|
885 |
} |
|
886 |
||
887 |
// Check for loose comments |
|
888 |
if (match == NW_FALSE && isNotComment == NW_FALSE) |
|
889 |
{ |
|
890 |
e = NW_HTMLP_IfExistsConsumeKeyword(&interval, |
|
891 |
NW_HTMLP_String_LooseCommentStartLength, |
|
892 |
NW_HTMLP_String_LooseCommentStart, |
|
893 |
&match); |
|
894 |
if (BRSR_STAT_IS_FAILURE(e)) |
|
895 |
{ |
|
896 |
return e; |
|
897 |
} |
|
898 |
if (match == NW_TRUE) |
|
899 |
looseComment = NW_TRUE; |
|
900 |
} |
|
901 |
||
902 |
if (match == NW_TRUE && isNotComment == NW_FALSE) |
|
903 |
{ |
|
904 |
e = NW_HTMLP_FinishComment(&match, imodeComment, |
|
905 |
looseComment); |
|
906 |
if (BRSR_STAT_IS_FAILURE(e)) |
|
907 |
{ |
|
908 |
return e; |
|
909 |
} |
|
910 |
if (match == NW_FALSE) |
|
911 |
{ |
|
912 |
if (NW_HTMLP_Lexer_AtEnd(iLexer)) |
|
913 |
{ |
|
914 |
*pMatch = NW_FALSE; |
|
915 |
iLastTextBegin = position.readPosition; |
|
916 |
if (iLastScriptStart == -1) |
|
917 |
{ |
|
918 |
setValidMarks(); |
|
919 |
} |
|
920 |
} |
|
921 |
} |
|
922 |
else |
|
923 |
{ |
|
924 |
*pMatch = NW_TRUE; |
|
925 |
} |
|
926 |
} |
|
927 |
return KBrsrSuccess; |
|
928 |
} |
|
929 |
||
930 |
/* |
|
931 |
on entry: assumes lexer read position is at "<" |
|
932 |
on return: If consumed a well-formed PI, then *pMatch == NW_TRUE and |
|
933 |
.........: lexer read position is just after closing '?>'. |
|
934 |
.........: If did not consume a PI, then *pMatch == NW_FALSE and |
|
935 |
.........: lexer read position is unchanged |
|
936 |
eof handling: if encounters EOF while attempting operation then returns |
|
937 |
............: *pMatch == NW_FALSE and KBrsrSuccess with lexer read |
|
938 |
............: position unchanged |
|
939 |
on error return: return value is not KBrsrSuccess, *pMatch == NW_FALSE |
|
940 |
...............: and lexer read position is unspecified |
|
941 |
*/ |
|
942 |
||
943 |
TBrowserStatusCode CHtmlpParser::NW_HTMLP_IfLegalConsumePi(NW_Bool* pMatch) |
|
944 |
{ |
|
945 |
TBrowserStatusCode e; |
|
946 |
NW_HTMLP_Interval_t interval; |
|
947 |
NW_HTMLP_Interval_t name; |
|
948 |
NW_HTMLP_Interval_t arguments; |
|
949 |
NW_HTMLP_Lexer_Position_t position; |
|
950 |
NW_Bool isXml = NW_FALSE; |
|
951 |
||
952 |
/* The XML spec says that PI forms may not use '?>' anywhere except to end |
|
953 |
the PI form. This means that quoted strings should not be recognized, but |
|
954 |
this may be something that should be relaxed. */ |
|
955 |
NW_HTMLP_Lexer_GetPosition(iLexer, &position); |
|
956 |
e = NW_HTMLP_IfExistsConsumeKeyword(&interval, |
|
957 |
NW_HTMLP_String_PiFormStartLength, |
|
958 |
NW_HTMLP_String_PiFormStart, |
|
959 |
pMatch); |
|
960 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
961 |
return e; |
|
962 |
} |
|
963 |
if (*pMatch == NW_TRUE) { |
|
964 |
/* parse PITarget */ |
|
965 |
e = NW_HTMLP_ParseName(pMatch, &name); |
|
966 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
967 |
return e; |
|
968 |
} |
|
969 |
if (*pMatch == NW_TRUE) { |
|
970 |
/* Find xml charset definition */ |
|
971 |
if (iNeedCharsetDetect && (name.charStop - name.charStart == 3)) |
|
972 |
{ |
|
973 |
static const NW_Ucs2 xmlStr[] = {'x','m','l','\0'}; |
|
974 |
||
975 |
if (NW_Byte_Strnicmp((const NW_Byte*)(iLexer->pBuf + name.start), (const NW_Byte*)xmlStr, NW_Str_Strlen(xmlStr)*sizeof(NW_Ucs2)) == 0) |
|
976 |
{ |
|
977 |
isXml = NW_TRUE; |
|
978 |
} |
|
979 |
} |
|
980 |
/* skip any whitespace */ |
|
981 |
e = NW_HTMLP_SkipSpace(); |
|
982 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
983 |
return e; |
|
984 |
} |
|
985 |
NW_HTMLP_Interval_Start(&arguments, iLexer); |
|
986 |
*pMatch = NW_FALSE; |
|
987 |
while (!NW_HTMLP_Lexer_AtEnd(iLexer)) { |
|
988 |
/* End with match if see '?>' */ |
|
989 |
NW_HTMLP_Interval_Stop(&arguments, iLexer); /* keep moving along end */ |
|
990 |
e = NW_HTMLP_IfExistsConsumeKeyword(&interval, |
|
991 |
NW_HTMLP_String_PiFormStopLength, |
|
992 |
NW_HTMLP_String_PiFormStop, |
|
993 |
pMatch); |
|
994 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
995 |
return e; |
|
996 |
} |
|
997 |
if (*pMatch == NW_TRUE) { |
|
998 |
if ((iCBs != NULL) && (iCBs->piFormCB != NULL)) { |
|
999 |
NW_Uint32 cp_cnt = 0; |
|
1000 |
e = (*(iCBs->piFormCB))(iLexer, &name, &arguments, iCBs->pClientPointer, &cp_cnt); |
|
1001 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
1002 |
return e; |
|
1003 |
} |
|
1004 |
updateCurrentCP(cp_cnt); |
|
1005 |
} |
|
1006 |
break; |
|
1007 |
} |
|
1008 |
/* advance */ |
|
1009 |
e = NW_HTMLP_Lexer_Advance(iLexer); |
|
1010 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
1011 |
return e; |
|
1012 |
} |
|
1013 |
||
1014 |
||
1015 |
if (isXml) |
|
1016 |
{ |
|
1017 |
e = NW_HTMLP_SPL_Elem_Handle_xml_charset(); |
|
1018 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
1019 |
return e; |
|
1020 |
} |
|
1021 |
} |
|
1022 |
||
1023 |
} |
|
1024 |
} |
|
1025 |
} |
|
1026 |
if (*pMatch == NW_FALSE) { |
|
1027 |
e = NW_HTMLP_Lexer_SetPosition(iLexer, &position); |
|
1028 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
1029 |
return e; |
|
1030 |
} |
|
1031 |
} |
|
1032 |
return KBrsrSuccess; |
|
1033 |
} |
|
1034 |
||
1035 |
/* |
|
1036 |
assumes read position is at "<" |
|
1037 |
on return: *pMatch == NW_TRUE if consumed a well-formed CDATA |
|
1038 |
if *pMatch == NW_FALSE then read position is unchanged |
|
1039 |
*/ |
|
1040 |
TBrowserStatusCode CHtmlpParser::NW_HTMLP_IfLegalConsumeCdata(NW_Bool* pMatch) |
|
1041 |
{ |
|
1042 |
TBrowserStatusCode e; |
|
1043 |
NW_HTMLP_Interval_t interval; |
|
1044 |
NW_HTMLP_Lexer_Position_t position; |
|
1045 |
||
1046 |
/* If element is ill-formed: |
|
1047 |
1. save location before opening markup |
|
1048 |
2. if processing reaches end of doc before ending element, |
|
1049 |
then reset to starting markup and return with *pMatch == NW_FALSE */ |
|
1050 |
||
1051 |
NW_HTMLP_Lexer_GetPosition(iLexer, &position); |
|
1052 |
e = NW_HTMLP_IfExistsConsumeKeyword(&interval, |
|
1053 |
NW_HTMLP_String_CdataStartLength, |
|
1054 |
NW_HTMLP_String_CdataStart, |
|
1055 |
pMatch); |
|
1056 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
1057 |
return e; |
|
1058 |
} |
|
1059 |
if (*pMatch == NW_TRUE) { |
|
1060 |
e = NW_HTMLP_FinishCdata(&interval, pMatch); |
|
1061 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
1062 |
return e; |
|
1063 |
} |
|
1064 |
if (*pMatch == NW_FALSE) { |
|
1065 |
e = NW_HTMLP_Lexer_SetPosition(iLexer, &position); |
|
1066 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
1067 |
return e; |
|
1068 |
} |
|
1069 |
} else { |
|
1070 |
if (iCBs->cdataCB != NULL) { |
|
1071 |
e = (*(iCBs->cdataCB))(iLexer, &interval, iCBs->pClientPointer); |
|
1072 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
1073 |
return e; |
|
1074 |
} |
|
1075 |
} |
|
1076 |
} |
|
1077 |
} |
|
1078 |
return KBrsrSuccess; |
|
1079 |
} |
|
1080 |
||
1081 |
/* |
|
1082 |
assumes read position is at "<". This is used to handle the |
|
1083 |
special case such as when comments appears inside the script |
|
1084 |
element. |
|
1085 |
||
1086 |
on return: *pMatch == NW_TRUE if consumed comment type |
|
1087 |
if *pMatch == NW_FALSE then read position is unchanged |
|
1088 |
*/ |
|
1089 |
||
1090 |
TBrowserStatusCode CHtmlpParser::NW_HTMLP_HandleSpecialCases(NW_Bool* pMatch) |
|
1091 |
{ |
|
1092 |
TBrowserStatusCode e; |
|
1093 |
||
1094 |
*pMatch = NW_FALSE; |
|
1095 |
||
1096 |
if(iSPLElemHandling->splHandle == NW_TRUE) |
|
1097 |
{ |
|
1098 |
switch(iSPLElemHandling->type) |
|
1099 |
{ |
|
1100 |
case NW_HTMLP_SPL_SCRIPT: |
|
1101 |
||
1102 |
//Don't loose the contents inside the script as ECMA script |
|
1103 |
//requires these. Also, at the same time consume all of these |
|
1104 |
//so that funny characters are not appreared on the screen. |
|
1105 |
//A counter is used for this purpose that will keep track |
|
1106 |
//of all open/close comments ("<--" and "-->" inside the script). |
|
1107 |
// It has been found that there can be nested comments inside the |
|
1108 |
// <script> or starting <script> can end with </SCRIPT> element. |
|
1109 |
||
1110 |
if(!NW_HTMLP_Lexer_AtEnd(iLexer) ) |
|
1111 |
{ |
|
1112 |
NW_Int32 iCommentCnt = 0; |
|
1113 |
NW_HTMLP_Interval_t interval; |
|
1114 |
NW_HTMLP_Interval_t intervalTotal; |
|
1115 |
TBrowserStatusCode s_hyphen = KBrsrSuccess; //Hypen |
|
1116 |
NW_Bool isHyphen = NW_FALSE; |
|
1117 |
TBrowserStatusCode s_lt = KBrsrSuccess; // '<' for starting tag |
|
1118 |
NW_Bool isLT = NW_FALSE; |
|
1119 |
NW_Bool isStartComment = NW_FALSE; |
|
1120 |
NW_Bool isEndComment = NW_FALSE; |
|
1121 |
NW_Bool isEndScript = NW_FALSE; |
|
1122 |
NW_Bool isTrueEndScript = NW_FALSE; |
|
1123 |
NW_Uint32 scriptContentLen = 0; |
|
1124 |
||
1125 |
NW_HTMLP_Interval_Start(&intervalTotal, iLexer); |
|
1126 |
||
1127 |
do |
|
1128 |
{ |
|
1129 |
||
1130 |
||
1131 |
//Check comment starting ("<!--") is there in the script |
|
1132 |
||
1133 |
e = NW_HTMLP_IfExistsConsumeKeyword(&interval, |
|
1134 |
NW_HTMLP_String_CommentStartLength, |
|
1135 |
NW_HTMLP_String_CommentStart, |
|
1136 |
&isStartComment); |
|
1137 |
||
1138 |
||
1139 |
if (BRSR_STAT_IS_FAILURE(e)) |
|
1140 |
{ |
|
1141 |
return e; |
|
1142 |
} |
|
1143 |
||
1144 |
if(isStartComment == NW_TRUE) //It is comment starting ("<" |
|
1145 |
{ |
|
1146 |
// to handle bug WLIU-5Y4LP2: washingtonpost.com where the first comment is not appropriately closed |
|
1147 |
// once we see a new comment start, we assume the previous comment ends, and therefore |
|
1148 |
// iCommentCnt can only be 0 or 1 |
|
1149 |
iCommentCnt = 1; |
|
1150 |
isStartComment = NW_FALSE; |
|
1151 |
} |
|
1152 |
||
1153 |
/*Check for end of </script> here */ |
|
1154 |
||
1155 |
e = NW_HTMLP_Lexer_AsciiStringCompareCase(iLexer, |
|
1156 |
NW_HTMLP_String_EndScriptLength, |
|
1157 |
NW_HTMLP_String_EndScript,NW_FALSE, |
|
1158 |
&isEndScript); |
|
1159 |
||
1160 |
||
1161 |
if (BRSR_STAT_IS_FAILURE(e)) |
|
1162 |
{ |
|
1163 |
return e; |
|
1164 |
} |
|
1165 |
||
1166 |
if(isEndScript == NW_TRUE && (iCommentCnt == 0)) |
|
1167 |
{ |
|
1168 |
//Actual end of the </script>, so we return |
|
1169 |
isTrueEndScript = NW_TRUE; |
|
1170 |
NW_HTMLP_Interval_Stop(&intervalTotal, iLexer); |
|
1171 |
*pMatch = NW_TRUE; |
|
1172 |
break; |
|
1173 |
} |
|
1174 |
||
1175 |
//Look for following case |
|
1176 |
// (1) Starting comment ("<--") |
|
1177 |
// (2) Closing comment ("-->") |
|
1178 |
// (3) End tag ("/script") outside comment. If it is |
|
1179 |
// inside then we don't do anything. Keep reading the |
|
1180 |
// data. |
|
1181 |
||
1182 |
||
1183 |
||
1184 |
while ( (!NW_HTMLP_Lexer_AtEnd(iLexer)) && |
|
1185 |
!BRSR_STAT_IS_FAILURE(s_lt) && |
|
1186 |
!BRSR_STAT_IS_FAILURE(s_hyphen) && |
|
1187 |
(!isHyphen && !isLT) |
|
1188 |
) |
|
1189 |
{ |
|
1190 |
||
1191 |
e = NW_HTMLP_Lexer_Advance(iLexer); |
|
1192 |
if (BRSR_STAT_IS_FAILURE(e)) |
|
1193 |
{ |
|
1194 |
return e; |
|
1195 |
} |
|
1196 |
||
1197 |
s_hyphen = NW_HTMLP_Lexer_AsciiCharCompare(iLexer, '-', &isHyphen); |
|
1198 |
s_lt = NW_HTMLP_Lexer_AsciiCharCompare(iLexer, '<', &isLT); |
|
1199 |
}//end while |
|
1200 |
||
1201 |
||
1202 |
if( BRSR_STAT_IS_FAILURE(s_hyphen) || BRSR_STAT_IS_FAILURE(s_lt) ) |
|
1203 |
{ |
|
1204 |
return KBrsrFailure; |
|
1205 |
} |
|
1206 |
||
1207 |
||
1208 |
if(isLT == NW_TRUE) //Starting '<' |
|
1209 |
{ |
|
1210 |
isLT = NW_FALSE; //For next cycle |
|
1211 |
//Check for another starting comment will be taken care in |
|
1212 |
//beginning of loop. |
|
1213 |
||
1214 |
//Check for end script tag |
|
1215 |
||
1216 |
e = NW_HTMLP_Lexer_AsciiStringCompareCase(iLexer, |
|
1217 |
NW_HTMLP_String_EndScriptLength, |
|
1218 |
NW_HTMLP_String_EndScript,NW_FALSE, |
|
1219 |
&isEndScript); |
|
1220 |
||
1221 |
||
1222 |
if (BRSR_STAT_IS_FAILURE(e)) |
|
1223 |
{ |
|
1224 |
return e; |
|
1225 |
} |
|
1226 |
||
1227 |
||
1228 |
/* Not actual </script> */ |
|
1229 |
||
1230 |
if(isEndScript == NW_TRUE && (iCommentCnt >= 1) ) |
|
1231 |
{ |
|
1232 |
//The end tag might be in comment, skip |
|
1233 |
e = NW_HTMLP_Lexer_AdvanceOffset(iLexer, |
|
1234 |
NW_HTMLP_String_EndScriptLength); |
|
1235 |
if (BRSR_STAT_IS_FAILURE(e)) |
|
1236 |
{ |
|
1237 |
return e; |
|
1238 |
} |
|
1239 |
continue; |
|
1240 |
} |
|
1241 |
else if(isEndScript == NW_TRUE && (iCommentCnt == 0)) |
|
1242 |
{ |
|
1243 |
//Actual end of the </script>, so we return |
|
1244 |
isTrueEndScript = NW_TRUE; |
|
1245 |
NW_HTMLP_Interval_Stop(&intervalTotal, iLexer); |
|
1246 |
*pMatch = NW_TRUE; |
|
1247 |
break; |
|
1248 |
} |
|
1249 |
}//end if(s_lt == NW_TRUE) //Starting '<' |
|
1250 |
||
1251 |
if(isHyphen == NW_TRUE) |
|
1252 |
{ |
|
1253 |
isHyphen = NW_FALSE; //For next cycle |
|
1254 |
//Check for end of comments "-->" |
|
1255 |
e = NW_HTMLP_IfExistsConsumeKeyword(&interval, |
|
1256 |
NW_HTMLP_String_CommentStopLength, |
|
1257 |
NW_HTMLP_String_CommentStop, |
|
1258 |
&isEndComment); |
|
1259 |
||
1260 |
||
1261 |
if (BRSR_STAT_IS_FAILURE(e)) |
|
1262 |
{ |
|
1263 |
return e; |
|
1264 |
} |
|
1265 |
||
1266 |
if( (isEndComment == NW_TRUE) && (iCommentCnt > 0) ) |
|
1267 |
{ |
|
1268 |
iCommentCnt--; |
|
1269 |
/*For next cycle */ |
|
1270 |
s_hyphen = NW_HTMLP_Lexer_AsciiCharCompare(iLexer, '-', &isHyphen); |
|
1271 |
s_lt = NW_HTMLP_Lexer_AsciiCharCompare(iLexer, '<', &isLT); |
|
1272 |
}//end if(isEndComment == NW_TRUE) |
|
1273 |
}//end if(isHyphen == NW_TRUE) |
|
1274 |
||
1275 |
} while (!NW_HTMLP_Lexer_AtEnd(iLexer)); |
|
1276 |
||
1277 |
//Now make call back to write these contents as whole. |
|
1278 |
//The end of tag will be handled separately in main routine. |
|
1279 |
||
1280 |
||
1281 |
scriptContentLen = NW_HTMLP_Interval_ByteCount(&intervalTotal); |
|
1282 |
||
1283 |
if( (isTrueEndScript == NW_TRUE) && (scriptContentLen > 0) ) |
|
1284 |
{ |
|
1285 |
if (iCBs->contentCB != NULL) |
|
1286 |
{ |
|
1287 |
||
1288 |
e = (*(iCBs->contentCB))(iLexer, &intervalTotal, iCBs->pClientPointer); |
|
1289 |
if (BRSR_STAT_IS_FAILURE(e)) |
|
1290 |
{ |
|
1291 |
return e; |
|
1292 |
} |
|
1293 |
} |
|
1294 |
iSPLElemHandling->type = NW_HTMLP_SPL_NONE; |
|
1295 |
iElementParseState->readPosition = iLexer->readPosition; |
|
1296 |
}//end if(isTrueEndScript == NW_TRUE) |
|
1297 |
else |
|
1298 |
{ |
|
1299 |
iSPLElemHandling->type = NW_HTMLP_SPL_NONE; |
|
1300 |
} |
|
1301 |
} //if(!NW_HTMLP_Lexer_AtEnd(iLexer) ) |
|
1302 |
||
1303 |
break; |
|
1304 |
default: |
|
1305 |
break; |
|
1306 |
}//end swicth() |
|
1307 |
}//end if(iSPLElemHandling->splHandle == NW_TRUE) |
|
1308 |
||
1309 |
return KBrsrSuccess; |
|
1310 |
} |
|
1311 |
||
1312 |
/* |
|
1313 |
assumes read position is at "<" |
|
1314 |
on return: *pMatch == NW_TRUE if consumed a well-formed DOCTYPE |
|
1315 |
if *pMatch == NW_FALSE then read position is unchanged |
|
1316 |
*/ |
|
1317 |
||
1318 |
TBrowserStatusCode CHtmlpParser::NW_HTMLP_IfLegalConsumeDoctype(NW_Bool* pMatch) |
|
1319 |
{ |
|
1320 |
TBrowserStatusCode e; |
|
1321 |
NW_HTMLP_Interval_t interval; |
|
1322 |
NW_HTMLP_Lexer_Position_t position; |
|
1323 |
NW_HTMLP_Interval_t doctypeInterval; |
|
1324 |
||
1325 |
/* If element is ill-formed: |
|
1326 |
1. save location before opening markup |
|
1327 |
2. if processing reaches end of doc before ending element, |
|
1328 |
then reset to starting markup and return with *pMatch == NW_FALSE */ |
|
1329 |
||
1330 |
NW_HTMLP_Lexer_GetPosition(iLexer, &position); |
|
1331 |
NW_HTMLP_Interval_Start(&doctypeInterval, iLexer); |
|
1332 |
||
1333 |
e = NW_HTMLP_IfExistsConsumeKeywordCase(&interval, |
|
1334 |
NW_HTMLP_String_DoctypeStartLength, |
|
1335 |
NW_HTMLP_String_DoctypeStart, |
|
1336 |
NW_FALSE, |
|
1337 |
pMatch); |
|
1338 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
1339 |
return e; |
|
1340 |
} |
|
1341 |
if (*pMatch == NW_TRUE) { |
|
1342 |
NW_HTMLP_Interval_Stop(&doctypeInterval, iLexer); |
|
1343 |
e = NW_HTMLP_FinishDoctype(pMatch, &doctypeInterval); |
|
1344 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
1345 |
return e; |
|
1346 |
} |
|
1347 |
if (*pMatch == NW_FALSE) { |
|
1348 |
e = NW_HTMLP_Lexer_SetPosition(iLexer, &position); |
|
1349 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
1350 |
return e; |
|
1351 |
} |
|
1352 |
} |
|
1353 |
} |
|
1354 |
return KBrsrSuccess; |
|
1355 |
} |
|
1356 |
||
1357 |
||
1358 |
TBrowserStatusCode CHtmlpParser::NW_HTMLP_SPL_Elem_Before_HTML_tag(NW_HTMLP_Lexer_t* pL, const NW_HTMLP_Interval_t* pI_content, |
|
1359 |
NW_Bool* onlySplChar) |
|
1360 |
{ |
|
1361 |
NW_Uint32 length; |
|
1362 |
NW_Uint32 byteLength; |
|
1363 |
NW_Uint8* pContent; |
|
1364 |
TBrowserStatusCode s; |
|
1365 |
NW_Uint32 i =0; |
|
1366 |
||
1367 |
*onlySplChar = NW_FALSE; |
|
1368 |
||
1369 |
if (!NW_HTMLP_Interval_IsWellFormed(pI_content)) |
|
1370 |
{ |
|
1371 |
return KBrsrFailure; |
|
1372 |
} |
|
1373 |
||
1374 |
length = pI_content->stop - pI_content->start; |
|
1375 |
byteLength = length; |
|
1376 |
||
1377 |
s = NW_HTMLP_Lexer_DataAddressFromBuffer(pL, pI_content->start, |
|
1378 |
&byteLength, |
|
1379 |
&pContent); |
|
1380 |
if (BRSR_STAT_IS_FAILURE(s)) |
|
1381 |
{ |
|
1382 |
return s; |
|
1383 |
} |
|
1384 |
||
1385 |
if (byteLength != length) |
|
1386 |
{ |
|
1387 |
return KBrsrFailure; |
|
1388 |
} |
|
1389 |
||
1390 |
for(i=0; i < byteLength; i++) |
|
1391 |
{ |
|
1392 |
||
1393 |
if(pContent[i] <= 0x20) |
|
1394 |
{ |
|
1395 |
continue; |
|
1396 |
} |
|
1397 |
else |
|
1398 |
{ |
|
1399 |
break; |
|
1400 |
} |
|
1401 |
}//end for(..) |
|
1402 |
||
1403 |
if(i == byteLength ) |
|
1404 |
{ |
|
1405 |
*onlySplChar = NW_TRUE; |
|
1406 |
} |
|
1407 |
||
1408 |
return KBrsrSuccess; |
|
1409 |
||
1410 |
}//end CHtmlpParser::NW_HTMLP_SPL_Elem_Before_HTML_tag |
|
1411 |
||
1412 |
||
1413 |
TBrowserStatusCode CHtmlpParser::NW_HTMLP_ConsumeContent(NW_Bool* pMatch) |
|
1414 |
{ |
|
1415 |
TBrowserStatusCode e; |
|
1416 |
NW_HTMLP_Interval_t interval; |
|
1417 |
NW_Bool match, matchspace, hasTable, hasTr, hasTd, hasPre, hasCaption, |
|
1418 |
allSpaces; // if the comment only contains white spaces |
|
1419 |
NW_Int32 i; |
|
1420 |
||
1421 |
*pMatch = NW_FALSE; |
|
1422 |
||
1423 |
hasTable = hasTr = hasTd = hasPre = hasCaption = NW_FALSE; |
|
1424 |
allSpaces = NW_TRUE; |
|
1425 |
||
1426 |
if (NW_HTMLP_Lexer_AtEnd(iLexer)) |
|
1427 |
return KBrsrSuccess; |
|
1428 |
/* if just after element start then skip one line break if found */ |
|
1429 |
if (iElementParseState->readPosition == iLexer->readPosition) { |
|
1430 |
/* skip over one instance of line break: 0xd, 0xa, or 0xd+0xa */ |
|
1431 |
e = NW_HTMLP_Lexer_AsciiCharCompare(iLexer, 0xd, &match); |
|
1432 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
1433 |
return e; |
|
1434 |
} |
|
1435 |
if (match == NW_TRUE) { |
|
1436 |
e = NW_HTMLP_Lexer_Advance(iLexer); |
|
1437 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
1438 |
return e; |
|
1439 |
} |
|
1440 |
} |
|
1441 |
if (NW_HTMLP_Lexer_AtEnd(iLexer)) { |
|
1442 |
return KBrsrSuccess; |
|
1443 |
} |
|
1444 |
e = NW_HTMLP_Lexer_AsciiCharCompare(iLexer, 0xa, &match); |
|
1445 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
1446 |
return e; |
|
1447 |
} |
|
1448 |
if (match == NW_TRUE) { |
|
1449 |
e = NW_HTMLP_Lexer_Advance(iLexer); |
|
1450 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
1451 |
return e; |
|
1452 |
} |
|
1453 |
} |
|
1454 |
if (NW_HTMLP_Lexer_AtEnd(iLexer)) { |
|
1455 |
return KBrsrSuccess; |
|
1456 |
} |
|
1457 |
/* Did we advance over anything? */ |
|
1458 |
if (iElementParseState->readPosition != iLexer->readPosition) { |
|
1459 |
/* check at new beginning for possible start of markup */ |
|
1460 |
e = NW_HTMLP_Lexer_AsciiCharCompare(iLexer, '<', &match); |
|
1461 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
1462 |
return e; |
|
1463 |
} |
|
1464 |
if (match == NW_TRUE) { |
|
1465 |
return KBrsrSuccess; |
|
1466 |
} |
|
1467 |
} |
|
1468 |
} |
|
1469 |
||
1470 |
for (i = 0; iIsHtml && (i <= iElementParseState->openElementStackPointer); i++) |
|
1471 |
{ |
|
1472 |
switch ((iElementParseState->pOpenElementStack)[i]) |
|
1473 |
{ |
|
1474 |
case HTMLP_HTML_TAG_INDEX_TABLE: |
|
1475 |
hasTable = NW_TRUE; |
|
1476 |
hasTr = hasTd = NW_FALSE; |
|
1477 |
break; |
|
1478 |
case HTMLP_HTML_TAG_INDEX_TR: |
|
1479 |
hasTr = NW_TRUE; |
|
1480 |
hasTd = NW_FALSE; |
|
1481 |
break; |
|
1482 |
case HTMLP_HTML_TAG_INDEX_TD: |
|
1483 |
case HTMLP_HTML_TAG_INDEX_TH: |
|
1484 |
hasTd = NW_TRUE; |
|
1485 |
break; |
|
1486 |
case HTMLP_HTML_TAG_INDEX_PRE: |
|
1487 |
hasPre = NW_TRUE; |
|
1488 |
break; |
|
1489 |
case HTMLP_HTML_TAG_INDEX_CAPTION: |
|
1490 |
hasCaption = NW_TRUE; |
|
1491 |
break; |
|
1492 |
||
1493 |
} |
|
1494 |
} |
|
1495 |
||
1496 |
NW_HTMLP_Interval_Start(&interval, iLexer); |
|
1497 |
do |
|
1498 |
{ |
|
1499 |
e = NW_HTMLP_Lexer_Advance(iLexer); |
|
1500 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
1501 |
return e; |
|
1502 |
} |
|
1503 |
||
1504 |
match = NW_FALSE; |
|
1505 |
if (!NW_HTMLP_Lexer_AtEnd(iLexer)) { |
|
1506 |
// temporary hack for "my yahoo" page containing 0 byte in text |
|
1507 |
e = NW_HTMLP_Lexer_AsciiCharCompare(iLexer, 0x0, &match); |
|
1508 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
1509 |
return e; |
|
1510 |
} |
|
1511 |
if (match) |
|
1512 |
{ |
|
1513 |
e = NW_HTMLP_Lexer_Advance(iLexer); |
|
1514 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
1515 |
return e; |
|
1516 |
} |
|
1517 |
return KBrsrSuccess; |
|
1518 |
} |
|
1519 |
/* look for start of markup */ |
|
1520 |
e = NW_HTMLP_Lexer_AsciiCharCompare(iLexer, '<', &match); |
|
1521 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
1522 |
return e; |
|
1523 |
} |
|
1524 |
if (!match) |
|
1525 |
{ |
|
1526 |
e = NW_HTMLP_Lexer_IsSpace(iLexer, &matchspace); |
|
1527 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
1528 |
return e; |
|
1529 |
} |
|
1530 |
if (!matchspace) |
|
1531 |
allSpaces = NW_FALSE; |
|
1532 |
} |
|
1533 |
||
1534 |
} |
|
1535 |
if (NW_HTMLP_Lexer_AtEnd(iLexer) || (match == NW_TRUE)) { |
|
1536 |
NW_HTMLP_Interval_Stop(&interval, iLexer); |
|
1537 |
*pMatch = NW_TRUE; |
|
1538 |
/* set the mark (iLastInvlid) in the output so that the start of the invalid dom can be recorded |
|
1539 |
for input buffer, we remember this text (iLastTextBuf) so it can be inserted at |
|
1540 |
begining of the next segment */ |
|
1541 |
if (NW_HTMLP_Lexer_AtEnd(iLexer) && !iDocComplete) |
|
1542 |
{ |
|
1543 |
iLastTextBegin = interval.start; |
|
1544 |
if (iLastScriptStart == -1) |
|
1545 |
setValidMarks(); |
|
1546 |
} |
|
1547 |
else |
|
1548 |
{ |
|
1549 |
if (hasTable && !hasCaption && (hasPre || !allSpaces)) |
|
1550 |
{ |
|
1551 |
if ((!hasTr) && (iCBs->startTagCB)) |
|
1552 |
{ |
|
1553 |
e = (*(iCBs->startTagCB))((iLexer->pElementDictionary)[HTMLP_HTML_TAG_INDEX_TR].tag[0], |
|
1554 |
&((iLexer->pElementDictionary)[HTMLP_HTML_TAG_INDEX_TR].tag[1]), |
|
1555 |
iCBs->pClientPointer, NW_FALSE); |
|
1556 |
updateCurrentCP(); |
|
1557 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
1558 |
return e; |
|
1559 |
} |
|
1560 |
} |
|
1561 |
if ((!hasTd) && (iCBs->startTagCB)) |
|
1562 |
{ |
|
1563 |
e = (*(iCBs->startTagCB))((iLexer->pElementDictionary)[HTMLP_HTML_TAG_INDEX_TD].tag[0], |
|
1564 |
&((iLexer->pElementDictionary)[HTMLP_HTML_TAG_INDEX_TD].tag[1]), |
|
1565 |
iCBs->pClientPointer, NW_FALSE); |
|
1566 |
updateCurrentCP(); |
|
1567 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
1568 |
return e; |
|
1569 |
} |
|
1570 |
} |
|
1571 |
} |
|
1572 |
/* content callback */ |
|
1573 |
if (iCBs->contentCB != NULL && (iLastTextBegin == -1)) |
|
1574 |
{ |
|
1575 |
/*Handle special case for any content before <HTML> tag. This causes |
|
1576 |
*the false page switch so be careful. |
|
1577 |
*/ |
|
1578 |
if( (firstSegment == NW_TRUE) && (htmlTagFound == NW_FALSE) ) |
|
1579 |
{ |
|
1580 |
NW_Bool onlySplChar; |
|
1581 |
e = NW_HTMLP_SPL_Elem_Before_HTML_tag(iLexer, &interval,&onlySplChar); |
|
1582 |
if( (e == KBrsrSuccess) && (onlySplChar == NW_TRUE) ) |
|
1583 |
{ |
|
1584 |
return KBrsrSuccess; |
|
1585 |
} |
|
1586 |
} |
|
1587 |
e = (*(iCBs->contentCB))(iLexer, &interval, iCBs->pClientPointer); |
|
1588 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
1589 |
return e; |
|
1590 |
} |
|
1591 |
} /*end if (iCBs->contentCB != NULL && (iLastTextBegin == -1))*/ |
|
1592 |
} |
|
1593 |
||
1594 |
break; |
|
1595 |
} |
|
1596 |
} while (!NW_HTMLP_Lexer_AtEnd(iLexer)); |
|
1597 |
||
1598 |
if (hasTable && !hasCaption && (hasPre || !allSpaces)) |
|
1599 |
{ |
|
1600 |
if ((!hasTd) && (iCBs->endTagCB)) |
|
1601 |
{ |
|
1602 |
e = (*(iCBs->endTagCB))((iLexer->pElementDictionary)[HTMLP_HTML_TAG_INDEX_TD].tag[0], &((iLexer->pElementDictionary)[HTMLP_HTML_TAG_INDEX_TD].tag[1]), NW_FALSE, /* not empty */ |
|
1603 |
iCBs->pClientPointer); |
|
1604 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
1605 |
return e; |
|
1606 |
} |
|
1607 |
} |
|
1608 |
if ((!hasTr) && (iCBs->endTagCB)) |
|
1609 |
{ |
|
1610 |
e = (*(iCBs->endTagCB))((iLexer->pElementDictionary)[HTMLP_HTML_TAG_INDEX_TR].tag[0], &((iLexer->pElementDictionary)[HTMLP_HTML_TAG_INDEX_TR].tag[1]), NW_FALSE, /* not empty */ |
|
1611 |
iCBs->pClientPointer); |
|
1612 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
1613 |
return e; |
|
1614 |
} |
|
1615 |
} |
|
1616 |
} |
|
1617 |
||
1618 |
return KBrsrSuccess; |
|
1619 |
} |
|
1620 |
||
1621 |
TBrowserStatusCode CHtmlpParser::NW_HTMLP_CheckTagEndOrSpace (NW_Bool* aMatch) |
|
1622 |
{ |
|
1623 |
TBrowserStatusCode e; |
|
1624 |
||
1625 |
e = NW_HTMLP_Lexer_IsSpace(iLexer, aMatch); |
|
1626 |
if (BRSR_STAT_IS_FAILURE(e)) |
|
1627 |
{ |
|
1628 |
return e; |
|
1629 |
} |
|
1630 |
if (*aMatch) |
|
1631 |
{ |
|
1632 |
return e; |
|
1633 |
} |
|
1634 |
||
1635 |
e = NW_HTMLP_Lexer_AsciiStringCompare (iLexer, |
|
1636 |
NW_HTMLP_String_MiniEndLength, |
|
1637 |
NW_HTMLP_String_MiniEnd, |
|
1638 |
aMatch); |
|
1639 |
if (BRSR_STAT_IS_FAILURE(e)) |
|
1640 |
{ |
|
1641 |
return e; |
|
1642 |
} |
|
1643 |
if (*aMatch) |
|
1644 |
{ |
|
1645 |
return e; |
|
1646 |
} |
|
1647 |
||
1648 |
e = NW_HTMLP_Lexer_AsciiCharCompare(iLexer, '>', aMatch); |
|
1649 |
return e; |
|
1650 |
} |
|
1651 |
||
1652 |
TBrowserStatusCode CHtmlpParser::NW_HTMLP_CheckTagEnd (NW_Bool* aMatch) |
|
1653 |
{ |
|
1654 |
TBrowserStatusCode e; |
|
1655 |
||
1656 |
e = NW_HTMLP_Lexer_AsciiStringCompare (iLexer, |
|
1657 |
NW_HTMLP_String_MiniEndLength, |
|
1658 |
NW_HTMLP_String_MiniEnd, |
|
1659 |
aMatch); |
|
1660 |
if (BRSR_STAT_IS_FAILURE(e)) |
|
1661 |
{ |
|
1662 |
return e; |
|
1663 |
} |
|
1664 |
if (*aMatch) |
|
1665 |
{ |
|
1666 |
return e; |
|
1667 |
} |
|
1668 |
||
1669 |
e = NW_HTMLP_Lexer_AsciiCharCompare(iLexer, '>', aMatch); |
|
1670 |
return e; |
|
1671 |
} |
|
1672 |
||
1673 |
/* |
|
1674 |
on entry: assumes lexer position is just after '=' |
|
1675 |
on return: If consumed a value, then *pMatch == NW_TRUE and |
|
1676 |
.........: lexer read position is just after last value |
|
1677 |
.........: related char: if quotes are used then it |
|
1678 |
.........: is the character just after the closing quote. |
|
1679 |
.........: If no value is consumed then *pMatch == NW_FALSE |
|
1680 |
.........: and lexer read position is unchanged. |
|
1681 |
.........: In either case return value is KBrsrSuccess. |
|
1682 |
.........: When *pMatch == NW_TRUE then the interval bounds the |
|
1683 |
.........: value and does not include enclosing quote chars, if used. |
|
1684 |
.........: NOTE: Interval may be zero length if value is the empty |
|
1685 |
.........: string or value is missing. To differentiate the two the |
|
1686 |
.........: out param *pMissingValue, if true indicates a missing value. |
|
1687 |
.........: *pMissingValue is only valid if *pMatch == NW_TRUE. |
|
1688 |
eof handling: If encounters EOF while attempting operation, then returns |
|
1689 |
............: *pMatch == NW_FALSE if value is not well-formed with lexer read |
|
1690 |
............: position unchanged, or *pMatch == NW_TRUE if value is well-formed |
|
1691 |
............: with lexer read position at EOF. |
|
1692 |
............: In either case, return value is KBrsrSuccess. |
|
1693 |
on error return: return value is not KBrsrSuccess, *pMatch == NW_FALSE, |
|
1694 |
...............: *pMissingValue and lexer read position is unspecified |
|
1695 |
*/ |
|
1696 |
||
1697 |
TBrowserStatusCode CHtmlpParser::NW_HTMLP_AttributeValueConsume(NW_Bool* pMissingValue, |
|
1698 |
NW_HTMLP_Interval_t* pI, |
|
1699 |
NW_Bool* pMatch,NW_Bool splAttrFound) |
|
1700 |
{ |
|
1701 |
/* |
|
1702 |
If no quotes are used around the value, then value is delimited by |
|
1703 |
whitespace (not including CR/LF) or on encountering either "/>" or '>'. |
|
1704 |
This differs from the HTML 4.01 specification which states: |
|
1705 |
||
1706 |
"The attribute value may only contain letters (a-z and A-Z), |
|
1707 |
digits (0-9), hyphens (ASCII decimal 45), periods (ASCII decimal 46), |
|
1708 |
underscores (ASCII decimal 95), and colons (ASCII decimal 58)." |
|
1709 |
||
1710 |
However, most browsers follow a looser syntax such as the one implemented |
|
1711 |
here. To see this, try browsing this weird doc: |
|
1712 |
<a href=foo"" title=a~!@#$%^&*()+={}[];:?<"`'|\/>the link</a> |
|
1713 |
||
1714 |
Otherwise, the value may be any quoted string not containing the |
|
1715 |
quotation mark used as a delimiter for that string. |
|
1716 |
*/ |
|
1717 |
||
1718 |
NW_HTMLP_Lexer_Position_t position; |
|
1719 |
TBrowserStatusCode e; |
|
1720 |
NW_Bool match; |
|
1721 |
NW_Bool skippingSpace; |
|
1722 |
// added for bug DCAN-5Q9PK8 |
|
1723 |
// quotedValue is True in the case: attrName='attrVal' |
|
1724 |
// quotedValue is False in the case: attrName=attr'xxx'Val |
|
1725 |
NW_Bool quotedValue = NW_FALSE; |
|
1726 |
// used only after a Quote is find |
|
1727 |
NW_HTMLP_Lexer_Position_t spaceBfEqualQuo; |
|
1728 |
// set to TRUE only when NW_HTMLP_Lexer_GetPosition(iLexer, &spaceBfEqualQuo) is called |
|
1729 |
NW_Bool spaceBfEqualQuoSet = NW_FALSE; |
|
1730 |
// match for double quote |
|
1731 |
NW_Bool matchD; |
|
1732 |
// match for single quote |
|
1733 |
NW_Bool matchS; |
|
1734 |
// quote could be double " or single ' |
|
1735 |
NW_Uint8 quote = 0; |
|
1736 |
// set to NW_TRUE when value consumed |
|
1737 |
NW_Bool valConsumed = NW_FALSE; |
|
1738 |
||
1739 |
*pMatch = NW_FALSE; |
|
1740 |
*pMissingValue = NW_TRUE; |
|
1741 |
NW_HTMLP_Lexer_GetPosition(iLexer, &position); |
|
1742 |
||
1743 |
/* optional whitespace */ |
|
1744 |
e = NW_HTMLP_SkipSpace(); |
|
1745 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
1746 |
return e; |
|
1747 |
} |
|
1748 |
||
1749 |
NW_HTMLP_Interval_Start(pI, iLexer); |
|
1750 |
while (!NW_HTMLP_Lexer_AtEnd(iLexer)) { |
|
1751 |
/* optional CRs or LFs */ |
|
1752 |
e = NW_HTMLP_SkipCRLF(); |
|
1753 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
1754 |
return e; |
|
1755 |
} |
|
1756 |
/* end with match if see '>' */ |
|
1757 |
e = NW_HTMLP_Lexer_AsciiCharCompare(iLexer, '>', &match); |
|
1758 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
1759 |
return e; |
|
1760 |
} |
|
1761 |
if (match == NW_TRUE) { |
|
1762 |
NW_HTMLP_Interval_Stop(pI, iLexer); |
|
1763 |
*pMatch = NW_TRUE; |
|
1764 |
break; |
|
1765 |
} |
|
1766 |
/* end with match if see "/>" */ |
|
1767 |
if(splAttrFound != NW_TRUE) |
|
1768 |
{ |
|
1769 |
e = NW_HTMLP_Lexer_AsciiStringCompare(iLexer, NW_HTMLP_String_MiniEndLength, |
|
1770 |
NW_HTMLP_String_MiniEnd, |
|
1771 |
&match); |
|
1772 |
} |
|
1773 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
1774 |
return e; |
|
1775 |
} |
|
1776 |
if (match == NW_TRUE) { |
|
1777 |
NW_HTMLP_Interval_Stop(pI, iLexer); |
|
1778 |
*pMatch = NW_TRUE; |
|
1779 |
break; |
|
1780 |
} |
|
1781 |
/* end with match if see whitespace */ |
|
1782 |
e = NW_HTMLP_Lexer_IsSpace(iLexer, &match); |
|
1783 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
1784 |
return e; |
|
1785 |
} |
|
1786 |
if (match == NW_TRUE) { |
|
1787 |
NW_HTMLP_Interval_Stop(pI, iLexer); |
|
1788 |
*pMatch = NW_TRUE; |
|
1789 |
break; |
|
1790 |
} |
|
1791 |
matchD = NW_FALSE; |
|
1792 |
matchS = NW_FALSE; |
|
1793 |
// check for string beginning with double quote '\"' |
|
1794 |
e = NW_HTMLP_Lexer_AsciiCharCompare(iLexer, '\"', &matchD); |
|
1795 |
if (BRSR_STAT_IS_FAILURE(e)) |
|
1796 |
{ |
|
1797 |
return e; |
|
1798 |
} |
|
1799 |
// check for string beginning with single quote '\'' |
|
1800 |
e = NW_HTMLP_Lexer_AsciiCharCompare(iLexer, '\'', &matchS); |
|
1801 |
if (BRSR_STAT_IS_FAILURE(e)) |
|
1802 |
{ |
|
1803 |
return e; |
|
1804 |
} |
|
1805 |
if (matchD) |
|
1806 |
{ |
|
1807 |
match = NW_TRUE; |
|
1808 |
quote = '\"'; |
|
1809 |
} |
|
1810 |
else if (matchS) |
|
1811 |
{ |
|
1812 |
match = NW_TRUE; |
|
1813 |
quote = '\''; |
|
1814 |
} |
|
1815 |
if (match) |
|
1816 |
{ |
|
1817 |
*pMissingValue = NW_FALSE; |
|
1818 |
if (iLexer->readPosition == pI->start) |
|
1819 |
{ |
|
1820 |
quotedValue = NW_TRUE; |
|
1821 |
} |
|
1822 |
e= NW_HTMLP_Lexer_Advance(iLexer); |
|
1823 |
if (BRSR_STAT_IS_FAILURE(e)) |
|
1824 |
{ |
|
1825 |
return e; |
|
1826 |
} |
|
1827 |
// skip leading white space in quoted attribute value |
|
1828 |
e = NW_HTMLP_SkipSpace(); |
|
1829 |
if (BRSR_STAT_IS_FAILURE(e)) |
|
1830 |
{ |
|
1831 |
return e; |
|
1832 |
} |
|
1833 |
NW_HTMLP_Interval_Start(pI, iLexer); |
|
1834 |
// begin with zero length interval |
|
1835 |
NW_HTMLP_Interval_Stop(pI, iLexer); |
|
1836 |
skippingSpace = NW_FALSE; |
|
1837 |
do |
|
1838 |
{ |
|
1839 |
/* optional CRs or LFs */ |
|
1840 |
e = NW_HTMLP_SkipCRLF(); |
|
1841 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
1842 |
return e; |
|
1843 |
} |
|
1844 |
if (NW_HTMLP_Lexer_AtEnd(iLexer)) |
|
1845 |
{ |
|
1846 |
// not well-formed at EOF |
|
1847 |
return KBrsrSuccess; |
|
1848 |
} |
|
1849 |
if (!skippingSpace) |
|
1850 |
{ |
|
1851 |
NW_HTMLP_Interval_Stop(pI, iLexer); |
|
1852 |
} |
|
1853 |
e = NW_HTMLP_Lexer_IsSpace(iLexer, &skippingSpace); |
|
1854 |
if (BRSR_STAT_IS_FAILURE(e)) |
|
1855 |
{ |
|
1856 |
return e; |
|
1857 |
} |
|
1858 |
////////////////////////////////////////////////////////////////////////////////// |
|
1859 |
// update spaceBfEqualQuo once new space is found |
|
1860 |
if (skippingSpace) |
|
1861 |
{ |
|
1862 |
NW_HTMLP_Lexer_GetPosition(iLexer, &spaceBfEqualQuo); |
|
1863 |
spaceBfEqualQuoSet = NW_TRUE; |
|
1864 |
} |
|
1865 |
////////////////////////////////////////////////////////////////////////////////// |
|
1866 |
// for the case <a href=/url-trx.jsp?title=driver's>Renew driver's license</a> |
|
1867 |
// quote is part of real content |
|
1868 |
// stop parsing value if seeing > or /> |
|
1869 |
// however, this is checked only when it's unquoted value |
|
1870 |
// in the case <a href='bracket > in quote'>, which is quoted value |
|
1871 |
// NW_HTMLP_CheckTagEnd shouldn't be called |
|
1872 |
if (!quotedValue) |
|
1873 |
{ |
|
1874 |
e = NW_HTMLP_CheckTagEnd (&match); |
|
1875 |
if (BRSR_STAT_IS_FAILURE(e)) |
|
1876 |
{ |
|
1877 |
return e; |
|
1878 |
} |
|
1879 |
if (match) |
|
1880 |
{ |
|
1881 |
// value consumed |
|
1882 |
valConsumed = NW_TRUE; |
|
1883 |
break; |
|
1884 |
} |
|
1885 |
} |
|
1886 |
////////////////////////////////////////////////////////////////////////////////// |
|
1887 |
// special handling the case: name1="val1 name2="val2" |
|
1888 |
// if seeing =", we can assume that the ending quote for the value is missing |
|
1889 |
if (matchD) |
|
1890 |
{ |
|
1891 |
e = NW_HTMLP_Lexer_AsciiStringCompare (iLexer, |
|
1892 |
NW_HTMLP_String_EqualQuoteLength, |
|
1893 |
NW_HTMLP_String_EqualDblQuote, |
|
1894 |
&match); |
|
1895 |
} |
|
1896 |
else if (matchS) |
|
1897 |
{ |
|
1898 |
e = NW_HTMLP_Lexer_AsciiStringCompare (iLexer, |
|
1899 |
NW_HTMLP_String_EqualQuoteLength, |
|
1900 |
NW_HTMLP_String_EqualSngQuote, |
|
1901 |
&match); |
|
1902 |
} |
|
1903 |
if (BRSR_STAT_IS_FAILURE(e)) |
|
1904 |
{ |
|
1905 |
return e; |
|
1906 |
} |
|
1907 |
if (match) |
|
1908 |
{ |
|
1909 |
if (spaceBfEqualQuoSet) |
|
1910 |
{ |
|
1911 |
// set the lexer to the position of the right most space before =" |
|
1912 |
NW_HTMLP_Lexer_SetPosition (iLexer, &spaceBfEqualQuo); |
|
1913 |
} |
|
1914 |
else |
|
1915 |
{ |
|
1916 |
////////////////////////////////////////////////////////////////////////////////// |
|
1917 |
// special handling the case: <a href='b='>b</a> |
|
1918 |
// if seeing =' and spaceBfEqualQuo is not set, |
|
1919 |
// we can assume that consumption of value is over |
|
1920 |
// important to advance two steps so iLexer will point to '>' |
|
1921 |
e = NW_HTMLP_Lexer_Advance(iLexer); |
|
1922 |
NW_HTMLP_Interval_Stop(pI, iLexer); |
|
1923 |
e = NW_HTMLP_Lexer_Advance(iLexer); |
|
1924 |
if (BRSR_STAT_IS_FAILURE(e)) |
|
1925 |
{ |
|
1926 |
return e; |
|
1927 |
} |
|
1928 |
} |
|
1929 |
// value consumed |
|
1930 |
valConsumed = NW_TRUE; |
|
1931 |
break; |
|
1932 |
} |
|
1933 |
////////////////////////////////////////////////////////////////////////////////// |
|
1934 |
e = NW_HTMLP_Lexer_AsciiCharCompare(iLexer, quote, &match); |
|
1935 |
if (BRSR_STAT_IS_FAILURE(e)) |
|
1936 |
{ |
|
1937 |
return e; |
|
1938 |
} |
|
1939 |
e = NW_HTMLP_Lexer_Advance(iLexer); |
|
1940 |
if (BRSR_STAT_IS_FAILURE(e)) |
|
1941 |
{ |
|
1942 |
return e; |
|
1943 |
} |
|
1944 |
if (match) // to handle the attrbite="value"" case |
|
1945 |
{ |
|
1946 |
e = NW_HTMLP_Lexer_AsciiCharCompare(iLexer, quote, &match); |
|
1947 |
if (BRSR_STAT_IS_FAILURE(e)) |
|
1948 |
{ |
|
1949 |
return e; |
|
1950 |
} |
|
1951 |
if (match) |
|
1952 |
{ |
|
1953 |
e = NW_HTMLP_Lexer_Advance(iLexer); |
|
1954 |
} |
|
1955 |
valConsumed = NW_TRUE; |
|
1956 |
} |
|
1957 |
////////////////////////////////////////////////////////////////////////////////// |
|
1958 |
} while (!valConsumed); |
|
1959 |
||
1960 |
*pMatch = NW_TRUE; |
|
1961 |
// two cases would break out the above do-while loop: either quotedValue is true or false |
|
1962 |
// true quotedValue means value consumed finished, we should break the outter loop as well |
|
1963 |
// speical case: <a href='a.com/name=driver's license'> where iLexer is pointing to s after ' |
|
1964 |
// this is true quotedValue, we consume value right after driver as IE does so |
|
1965 |
// ignore the part of: s license' |
|
1966 |
if (quotedValue) |
|
1967 |
{ |
|
1968 |
break; |
|
1969 |
} |
|
1970 |
||
1971 |
////////////////////////////////////////////////////////////////////////////////// |
|
1972 |
// false quotedValue needs more check before deciding breaking the outter loop or not |
|
1973 |
// case: <a href=preQuo'val'aftrQuo> |
|
1974 |
// continue to parse the attribute value if check fails |
|
1975 |
e = NW_HTMLP_CheckTagEndOrSpace (&match); |
|
1976 |
if (BRSR_STAT_IS_FAILURE(e)) |
|
1977 |
{ |
|
1978 |
return e; |
|
1979 |
} |
|
1980 |
if (match) |
|
1981 |
{ |
|
1982 |
break; |
|
1983 |
} |
|
1984 |
else |
|
1985 |
{ |
|
1986 |
// avoid calling NW_HTMLP_Lexer_Advance below, since we have called it |
|
1987 |
continue; |
|
1988 |
} |
|
1989 |
////////////////////////////////////////////////////////////////////////////////// |
|
1990 |
} // end of match==NW_TRUE of either ' or " |
|
1991 |
||
1992 |
/* advance in unquoted value */ |
|
1993 |
*pMissingValue = NW_FALSE; |
|
1994 |
e = NW_HTMLP_Lexer_Advance(iLexer); |
|
1995 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
1996 |
return e; |
|
1997 |
} |
|
1998 |
} /* end of loop */ |
|
1999 |
/* catch EOF for values without quotes */ |
|
2000 |
if (NW_HTMLP_Lexer_AtEnd(iLexer)) { |
|
2001 |
*pMatch = NW_TRUE; |
|
2002 |
NW_HTMLP_Interval_Stop(pI, iLexer); |
|
2003 |
} |
|
2004 |
return KBrsrSuccess; |
|
2005 |
} |
|
2006 |
||
2007 |
/* |
|
2008 |
on entry: Function parses the name and comapre for "href" tag. If found then |
|
2009 |
........: *splAttrFound=TRUE otherwise false. |
|
2010 |
on return: void |
|
2011 |
||
2012 |
*/ |
|
2013 |
||
2014 |
void CHtmlpParser::NW_HTMLP_SPL_Elem_Handle_Attr(NW_HTMLP_Interval_t* pI_name, |
|
2015 |
NW_Bool* splAttrFound) |
|
2016 |
{ |
|
2017 |
NW_Uint32 length; |
|
2018 |
NW_Uint32 nameLength; |
|
2019 |
NW_Uint8* pName; |
|
2020 |
NW_Ucs2 c_ucs2; |
|
2021 |
NW_Uint32 i; |
|
2022 |
NW_Int32 bytesRead; |
|
2023 |
TBrowserStatusCode s; |
|
2024 |
static const NW_Ucs2 hrefStr[] = {'h','r','e','f','\0'}; |
|
2025 |
||
2026 |
*splAttrFound = NW_FALSE; |
|
2027 |
||
2028 |
if (!NW_HTMLP_Interval_IsWellFormed(pI_name)) { |
|
2029 |
return; |
|
2030 |
} |
|
2031 |
||
2032 |
/* var name setup */ |
|
2033 |
||
2034 |
length = NW_HTMLP_Interval_ByteCount(pI_name); |
|
2035 |
nameLength = length; /* byte count */ |
|
2036 |
s = NW_HTMLP_Lexer_DataAddressFromBuffer(iLexer, pI_name->start, |
|
2037 |
&nameLength, &pName); |
|
2038 |
if (BRSR_STAT_IS_FAILURE(s)) { |
|
2039 |
return; |
|
2040 |
} |
|
2041 |
if (nameLength != length) { |
|
2042 |
return; |
|
2043 |
} |
|
2044 |
||
2045 |
nameLength = NW_HTMLP_Interval_CharCount(pI_name); /* char count */ |
|
2046 |
||
2047 |
/* force attribute name to lower case for A-Z only, |
|
2048 |
this alters the doc in-place */ |
|
2049 |
||
2050 |
/* FUTURE: unfortunately, there is no writeChar to go with the readChar |
|
2051 |
so the work around until there is better encoding support is to only |
|
2052 |
work with ASCII, 8859-1, UTF-8 and UCS-2. In these encodings we can |
|
2053 |
handle writing because only UCS-2 uses two bytes for an ASCII char. */ |
|
2054 |
NW_ASSERT((iLexer->encoding == HTTP_us_ascii) |
|
2055 |
|| (iLexer->encoding == HTTP_iso_8859_1) |
|
2056 |
|| (iLexer->encoding == HTTP_utf_8) |
|
2057 |
|| (iLexer->encoding == HTTP_iso_10646_ucs_2)); |
|
2058 |
if (!((iLexer->encoding == HTTP_us_ascii) |
|
2059 |
|| (iLexer->encoding == HTTP_iso_8859_1) |
|
2060 |
|| (iLexer->encoding == HTTP_utf_8) |
|
2061 |
|| (iLexer->encoding == HTTP_iso_10646_ucs_2))) { |
|
2062 |
return; |
|
2063 |
} |
|
2064 |
for (i = 0; i < length;) { |
|
2065 |
bytesRead = NW_String_readChar(&(pName[i]), &c_ucs2, iLexer->encoding); |
|
2066 |
if (bytesRead == -1) { |
|
2067 |
return; |
|
2068 |
} |
|
2069 |
/* force doc ascii uppercase to lowercase */ |
|
2070 |
if ((c_ucs2 >= (NW_Ucs2)'A') && (c_ucs2 <= (NW_Ucs2)'Z')) { |
|
2071 |
c_ucs2 += 0x20; /* offset in ascii from upper to lower */ |
|
2072 |
} |
|
2073 |
if (iLexer->encoding == HTTP_iso_10646_ucs_2) { |
|
2074 |
NW_ASSERT(bytesRead == 2); |
|
2075 |
/* accomodate either endianness */ |
|
2076 |
if (pName[i] == 0) { |
|
2077 |
pName[i+1] = (NW_Uint8)c_ucs2; |
|
2078 |
} else { |
|
2079 |
pName[i] = (NW_Uint8)c_ucs2; |
|
2080 |
} |
|
2081 |
} else { |
|
2082 |
NW_ASSERT(bytesRead == 1); |
|
2083 |
NW_ASSERT((iLexer->encoding == HTTP_us_ascii) |
|
2084 |
|| (iLexer->encoding == HTTP_iso_8859_1) |
|
2085 |
|| (iLexer->encoding == HTTP_utf_8)); |
|
2086 |
pName[i] = (NW_Uint8)c_ucs2; |
|
2087 |
} |
|
2088 |
i += bytesRead; |
|
2089 |
}//end for(..) |
|
2090 |
||
2091 |
if (NW_Byte_Strnicmp((const NW_Byte*)pName, (const NW_Byte*)hrefStr, NW_Str_Strlen(hrefStr)*sizeof(NW_Ucs2)) == 0) |
|
2092 |
{ |
|
2093 |
*splAttrFound = NW_TRUE; |
|
2094 |
} |
|
2095 |
||
2096 |
return; |
|
2097 |
}//end NW_HTMLP_SPL_Elem_Handle_Attr(..) |
|
2098 |
||
2099 |
TBrowserStatusCode CHtmlpParser::NW_HTMLP_SPL_Elem_Handle_Meta(NW_HTMLP_Interval_t* pI_name, |
|
2100 |
NW_HTMLP_Interval_t* pI_attvalue, |
|
2101 |
NW_HTMLP_SPL_Elem_Handling_t* splElem) |
|
2102 |
{ |
|
2103 |
TBrowserStatusCode status = KBrsrSuccess; |
|
2104 |
NW_Int32 numUnconvertible, indexFirstUnconvertible; |
|
2105 |
NW_Buffer_t* outBuf; |
|
2106 |
NW_Bool charsetSpecified = NW_FALSE; |
|
2107 |
||
2108 |
NW_ASSERT(iLexer != NULL); |
|
2109 |
NW_ASSERT(pI_name != NULL); |
|
2110 |
NW_ASSERT(pI_attvalue != NULL); |
|
2111 |
NW_ASSERT(splElem != NULL); |
|
2112 |
||
2113 |
NW_Uint32 i = pI_attvalue->start; |
|
2114 |
if (iCBs == NULL) |
|
2115 |
return status; |
|
2116 |
switch (splElem->type) { |
|
2117 |
case NW_HTMLP_SPL_META: |
|
2118 |
{ |
|
2119 |
/* After detecting meta, try to detect content=... */ |
|
2120 |
static const NW_Ucs2 attrNameStr[] = {'c','o','n','t','e','n','t','\0'}; |
|
2121 |
static const NW_Ucs2 attrValStr[] = {'c','h','a','r','s','e','t','=','\0'}; |
|
2122 |
||
2123 |
/* Assuming latin-1 charset. If it was not latin-1, no need to look for meta charset */ |
|
2124 |
if (pI_name->charStop - pI_name->charStart == NW_Str_Strlen(attrNameStr) && /* check that the attribute name is content */ |
|
2125 |
NW_Byte_Strnicmp((const NW_Byte*)(iLexer->pBuf + pI_name->start), (const NW_Byte*)attrNameStr, NW_Str_Strlen(attrNameStr)*sizeof(NW_Ucs2)) == 0) |
|
2126 |
{ |
|
2127 |
while (i <= pI_attvalue->stop - NW_Str_Strlen(attrValStr)*sizeof(NW_Ucs2)) /* Find charset in the attribute value */ |
|
2128 |
{ |
|
2129 |
if (NW_Byte_Strnicmp((const NW_Byte*)(iLexer->pBuf + i), (const NW_Byte*)attrValStr, NW_Str_Strlen(attrValStr)*sizeof(NW_Ucs2)) == 0) |
|
2130 |
{ |
|
2131 |
i += NW_Str_Strlen(attrValStr)*sizeof(NW_Ucs2); // Move after charset= |
|
2132 |
charsetSpecified = NW_TRUE; |
|
2133 |
break; |
|
2134 |
} |
|
2135 |
i += sizeof(NW_Ucs2); |
|
2136 |
} |
|
2137 |
} |
|
2138 |
// street html support: |
|
2139 |
// handle <meta content="text/html" charset="xxxx"> case |
|
2140 |
// in this case, attrName = charset and attrVal = xxxx |
|
2141 |
else if (pI_name->charStop - pI_name->charStart == NW_Str_Strlen(attrValStr) - 1 && /* check that the attribute name is charset */ |
|
2142 |
NW_Byte_Strnicmp((const NW_Byte*)(iLexer->pBuf + pI_name->start), (const NW_Byte*)attrValStr, (NW_Str_Strlen(attrValStr) -1)*sizeof(NW_Ucs2)) == 0) |
|
2143 |
{ |
|
2144 |
charsetSpecified = NW_TRUE; |
|
2145 |
} |
|
2146 |
||
2147 |
// if we found the structure in meta that indicate charset specification, |
|
2148 |
// call charsetConvertCallback to perform the charset lookup. |
|
2149 |
if (charsetSpecified) |
|
2150 |
{ |
|
2151 |
NW_Buffer_t body; |
|
2152 |
NW_Uint32 selectedCharset; |
|
2153 |
NW_ASSERT(iCBs->charsetConvertCallback != NULL); |
|
2154 |
NW_ASSERT(iCBs->charsetContext != NULL); |
|
2155 |
||
2156 |
/* Set the buffer that needs to be converted */ |
|
2157 |
body.data = iLexer->pBuf; |
|
2158 |
body.allocatedLength = iLexer->byteCount; |
|
2159 |
body.length = iLexer->byteCount; |
|
2160 |
||
2161 |
/* In case of empty charset declaration */ |
|
2162 |
if ( pI_attvalue->stop - i == 0) |
|
2163 |
{ |
|
2164 |
status = KBrsrSuccess; |
|
2165 |
goto finish_handleMeta; |
|
2166 |
} |
|
2167 |
status = iCBs->charsetConvertCallback(iCBs->charsetContext, pI_attvalue->stop - i, i, &body, |
|
2168 |
&numUnconvertible, &indexFirstUnconvertible, &outBuf, |
|
2169 |
&selectedCharset); |
|
2170 |
||
2171 |
if (status != KBrsrSuccess) |
|
2172 |
{ |
|
2173 |
goto finish_handleMeta; |
|
2174 |
} |
|
2175 |
/* Was the buffer converted? */ |
|
2176 |
NW_HTMLP_Lexer_SetEncoding(iLexer, selectedCharset); |
|
2177 |
iOrigEncoding = selectedCharset; |
|
2178 |
status = KBrsrRestartParsing; |
|
2179 |
goto finish_handleMeta; |
|
2180 |
} // if (charsetSpecified) |
|
2181 |
break; |
|
2182 |
} // case NW_HTMLP_SPL_META |
|
2183 |
default: |
|
2184 |
break; |
|
2185 |
} //switch (splElem->type) |
|
2186 |
finish_handleMeta: |
|
2187 |
return status; |
|
2188 |
} |
|
2189 |
||
2190 |
TBrowserStatusCode CHtmlpParser::NW_HTMLP_SPL_Elem_Handle_xml_charset() |
|
2191 |
{ |
|
2192 |
TBrowserStatusCode status = KBrsrSuccess; |
|
2193 |
NW_Int32 numUnconvertible, indexFirstUnconvertible; |
|
2194 |
NW_Buffer_t* outBuf = NULL; |
|
95
d96eed154187
Revision: 201034
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
94
diff
changeset
|
2195 |
NW_Uint32 i=0,offset =0; |
94 | 2196 |
static const NW_Ucs2 encodingStr[] = {'e','n','c','o','d','i','n','g','=','\0'}; |
2197 |
NW_Uint32 encodingLen = NW_Str_Strlen(encodingStr)*sizeof(NW_Ucs2); |
|
95
d96eed154187
Revision: 201034
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
94
diff
changeset
|
2198 |
for(i = iLexer->readPosition + encodingLen - sizeof(NW_Ucs2);(iLexer->pBuf[i] != '"' && iLexer->pBuf[i] != '\'' && iLexer->pBuf[i] != '?' && i < iLexer->byteCount ) ;i+=sizeof(NW_Ucs2)) |
d96eed154187
Revision: 201034
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
94
diff
changeset
|
2199 |
{ |
d96eed154187
Revision: 201034
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
94
diff
changeset
|
2200 |
if(iLexer->pBuf[i] == ' ') |
d96eed154187
Revision: 201034
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
94
diff
changeset
|
2201 |
offset+= sizeof(NW_Ucs2); |
d96eed154187
Revision: 201034
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
94
diff
changeset
|
2202 |
} |
d96eed154187
Revision: 201034
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
94
diff
changeset
|
2203 |
if (NW_Byte_Strnicmp((const NW_Byte*)(iLexer->pBuf + iLexer->readPosition), (const NW_Byte*)encodingStr, encodingLen - sizeof(NW_Ucs2 )) == 0 && |
94 | 2204 |
(iLexer->pBuf[iLexer->readPosition + encodingLen] == '"' || |
95
d96eed154187
Revision: 201034
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
94
diff
changeset
|
2205 |
iLexer->pBuf[iLexer->readPosition + encodingLen] == '\'' || offset)) |
94 | 2206 |
{ |
2207 |
NW_Buffer_t body; |
|
2208 |
NW_Uint32 selectedCharset; |
|
2209 |
NW_ASSERT(iCBs->charsetConvertCallback != NULL); |
|
2210 |
NW_ASSERT(iCBs->charsetContext != NULL); |
|
95
d96eed154187
Revision: 201034
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
94
diff
changeset
|
2211 |
for (i = iLexer->readPosition + encodingLen + sizeof(NW_Ucs2) + offset; |
94 | 2212 |
i < iLexer->byteCount && iLexer->pBuf[i] != '"' && iLexer->byteCount && iLexer->pBuf[i] != '\'' && iLexer->pBuf[i] != '?'; |
2213 |
i += sizeof(NW_Ucs2)) |
|
2214 |
; |
|
2215 |
if (((iLexer->pBuf[i] == '"') || (iLexer->pBuf[i] == '\'')) && i > iLexer->readPosition + encodingLen) |
|
2216 |
{ |
|
2217 |
encodingLen += sizeof(NW_Ucs2); //Skip the quotes |
|
2218 |
/* Set the buffer that needs to be converted */ |
|
2219 |
body.data = iLexer->pBuf; |
|
2220 |
body.allocatedLength = iLexer->byteCount; |
|
2221 |
body.length = iLexer->byteCount; |
|
2222 |
||
2223 |
/* In case of empty XML declaration, ignore it */ |
|
2224 |
if (i - iLexer->readPosition - encodingLen ==0) |
|
2225 |
{ |
|
2226 |
status = KBrsrSuccess; |
|
2227 |
goto finish_xml_charset; |
|
2228 |
} |
|
95
d96eed154187
Revision: 201034
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
94
diff
changeset
|
2229 |
status = iCBs->charsetConvertCallback(iCBs->charsetContext, i - iLexer->readPosition - encodingLen - offset, iLexer->readPosition + encodingLen + offset, &body, |
94 | 2230 |
&numUnconvertible, &indexFirstUnconvertible, &outBuf, &selectedCharset); |
2231 |
if (status != KBrsrSuccess) |
|
2232 |
{ |
|
2233 |
goto finish_xml_charset; |
|
2234 |
} |
|
2235 |
||
2236 |
NW_HTMLP_Lexer_SetEncoding(iLexer, selectedCharset); |
|
2237 |
iOrigEncoding = selectedCharset; |
|
2238 |
status = KBrsrRestartParsing; |
|
2239 |
goto finish_xml_charset; |
|
2240 |
} |
|
2241 |
} |
|
2242 |
finish_xml_charset: |
|
2243 |
return status; |
|
2244 |
} |
|
2245 |
||
2246 |
TBrowserStatusCode CHtmlpParser::NW_HTMLP_SPL_Elem_Handle_BodyStart(NW_HTMLP_ElementDescriptionConst_t* pElement) |
|
2247 |
{ |
|
2248 |
TBrowserStatusCode status = KBrsrSuccess; |
|
2249 |
NW_Int32 numUnconvertible, indexFirstUnconvertible; |
|
2250 |
NW_Buffer_t * outBuf = NULL; |
|
2251 |
||
2252 |
NW_REQUIRED_PARAM( pElement ); |
|
2253 |
||
2254 |
NW_ASSERT(iLexer != NULL); |
|
2255 |
NW_ASSERT(pElement != NULL); |
|
2256 |
||
2257 |
||
2258 |
NW_Buffer_t body; |
|
2259 |
NW_Uint32 selectedCharset = 0; |
|
2260 |
NW_ASSERT(iCBs->charsetConvertCallback != NULL); |
|
2261 |
NW_ASSERT(iCBs->charsetContext != NULL); |
|
2262 |
||
2263 |
/* Set the buffer that needs to be converted */ |
|
2264 |
body.data = iLexer->pBuf; |
|
2265 |
body.allocatedLength = iLexer->byteCount; |
|
2266 |
body.length = iLexer->byteCount; |
|
2267 |
status = iCBs->charsetConvertCallback(iCBs->charsetContext, 0, 0, &body, |
|
2268 |
&numUnconvertible, &indexFirstUnconvertible, &outBuf, &selectedCharset); |
|
2269 |
||
2270 |
if (status != KBrsrSuccess) |
|
2271 |
{ |
|
2272 |
return status; |
|
2273 |
} |
|
2274 |
/* Was the buffer converted? */ |
|
2275 |
NW_HTMLP_Lexer_SetEncoding(iLexer, selectedCharset); |
|
2276 |
iOrigEncoding = selectedCharset; |
|
2277 |
return KBrsrRestartParsing; |
|
2278 |
||
2279 |
} |
|
2280 |
||
2281 |
/* |
|
2282 |
on entry: assumes lexer read position is in whitespace or at first character |
|
2283 |
........: of an attribute name |
|
2284 |
on return: If consumes an attribute with (optional) value, then *pMatch == NW_TRUE |
|
2285 |
.........: and lexer read position is just after the attribute value pair |
|
2286 |
.........: which could be whitespace, a '>', or EOF. |
|
2287 |
.........: If no attribute with (optional) value consumed, then |
|
2288 |
.........: *pMatch == NW_FALSE and lexer read position unchanged. |
|
2289 |
.........: In either case return value is KBrsrSuccess. |
|
2290 |
.........: When *pMatch == NW_TRUE then the *pI_name bounds the attribute name |
|
2291 |
.........: and pI_value bounds the value which does not include enclosing quote |
|
2292 |
.........: chars, if used. |
|
2293 |
.........: NOTE: *pI_value may be zero length if value is the empty |
|
2294 |
.........: string or value is missing. To differentiate the two the |
|
2295 |
.........: out param *pMissingValue, if true indicates a missing value. |
|
2296 |
.........: *pMissingValue is only valid if *pMatch == NW_TRUE. |
|
2297 |
eof handling: If encounters EOF while attempting operation, then returns |
|
2298 |
............: *pMatch == NW_FALSE only if result up to EOF could not form either |
|
2299 |
............: a name without a value or a value that is not well-formed. |
|
2300 |
............: If *pMatch == NW_FALSE on EOF then lexer read position is unchanged, |
|
2301 |
............: If *pMatch == NW_TRUE on EOF then with lexer read position at EOF. |
|
2302 |
............: In either case, return value is KBrsrSuccess. |
|
2303 |
on error return: return value is not KBrsrSuccess, *pMatch == NW_FALSE, |
|
2304 |
...............: *pMissingValue and lexer read position is unspecified |
|
2305 |
*/ |
|
2306 |
||
2307 |
TBrowserStatusCode CHtmlpParser::NW_HTMLP_AttributeNameAndValueConsume( |
|
2308 |
NW_HTMLP_Interval_t* pI_name, |
|
2309 |
NW_Bool* pMissingValue, |
|
2310 |
NW_HTMLP_Interval_t* pI_attvalue, |
|
2311 |
NW_Bool* pMatch, |
|
2312 |
NW_HTMLP_SPL_Elem_Handling_t* splElem) |
|
2313 |
{ |
|
2314 |
/* |
|
2315 |
Looks for the following patterns: |
|
2316 |
1. name (form with no value) |
|
2317 |
2. name= (form with missing value, degenerate but accepted) |
|
2318 |
3. name=value (unquoted value) |
|
2319 |
4. name='value' (single quoted value) |
|
2320 |
5. name="value" (double quoted value) |
|
2321 |
||
2322 |
The forms are terminated by: matching quotes, whitespace, "/>", '>' or EOF. |
|
2323 |
*/ |
|
2324 |
NW_HTMLP_Lexer_Position_t position; |
|
2325 |
TBrowserStatusCode e; |
|
2326 |
NW_Bool match; |
|
2327 |
NW_Bool splAttrFound = NW_FALSE; |
|
2328 |
||
2329 |
*pMatch = NW_FALSE; |
|
2330 |
*pMissingValue = NW_TRUE; |
|
2331 |
NW_HTMLP_Lexer_GetPosition(iLexer, &position); |
|
2332 |
||
2333 |
NW_HTMLP_Interval_Init(pI_name); |
|
2334 |
NW_HTMLP_Interval_Init(pI_attvalue); |
|
2335 |
||
2336 |
/* optional whitespace */ |
|
2337 |
e = NW_HTMLP_SkipSpace(); |
|
2338 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
2339 |
return e; |
|
2340 |
} |
|
2341 |
||
2342 |
/* Name */ |
|
2343 |
e = NW_HTMLP_ParseName(&match, pI_name); |
|
2344 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
2345 |
return KBrsrFailure; |
|
2346 |
} |
|
2347 |
if (match == NW_FALSE) { |
|
2348 |
// added check for quote if attr name is not present, if |
|
2349 |
// quote is next, skip to after next quote |
|
2350 |
e = NW_HTMLP_Lexer_AsciiCharCompare(iLexer, '\"', &match); |
|
2351 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
2352 |
return e; |
|
2353 |
} |
|
2354 |
// skip to next quote |
|
2355 |
if (match== NW_TRUE) |
|
2356 |
{ |
|
2357 |
match = NW_FALSE; |
|
2358 |
do { |
|
2359 |
e = NW_HTMLP_Lexer_Advance(iLexer); |
|
2360 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
2361 |
return e; |
|
2362 |
} |
|
2363 |
if (NW_HTMLP_Lexer_AtEnd(iLexer)) { |
|
2364 |
return KBrsrSuccess; |
|
2365 |
} |
|
2366 |
e = NW_HTMLP_Lexer_AsciiCharCompare(iLexer, '\"', &match); |
|
2367 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
2368 |
return e; |
|
2369 |
} |
|
2370 |
} while (match == NW_FALSE); |
|
2371 |
e = NW_HTMLP_Lexer_Advance(iLexer); |
|
2372 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
2373 |
return e; |
|
2374 |
} |
|
2375 |
*pMatch = NW_TRUE; |
|
2376 |
return KBrsrSuccess; |
|
2377 |
} |
|
2378 |
else |
|
2379 |
{ |
|
2380 |
return KBrsrSuccess; |
|
2381 |
} |
|
2382 |
} |
|
2383 |
||
2384 |
if(splElem && (splElem->splHandle == NW_TRUE)) |
|
2385 |
{ |
|
2386 |
if( (splElem->type == NW_HTMLP_SPL_ANCHOR_ATTR ) || |
|
2387 |
(splElem->type == NW_HTMLP_SPL_BASE_ATTR) ) |
|
2388 |
{ |
|
2389 |
NW_HTMLP_SPL_Elem_Handle_Attr(pI_name,&splAttrFound); |
|
2390 |
} |
|
2391 |
}//endif if(splElem && (splElem->splHandle == NW_TRUE)) |
|
2392 |
||
2393 |
||
2394 |
/* optional whitespace */ |
|
2395 |
e = NW_HTMLP_SkipSpace(); |
|
2396 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
2397 |
return e; |
|
2398 |
} |
|
2399 |
/* '=' */ |
|
2400 |
e = NW_HTMLP_Lexer_AsciiCharCompare(iLexer, '=', &match); |
|
2401 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
2402 |
return e; |
|
2403 |
} |
|
2404 |
if (match == NW_TRUE) { |
|
2405 |
e = NW_HTMLP_Lexer_Advance(iLexer); |
|
2406 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
2407 |
return e; |
|
2408 |
} |
|
2409 |
e = NW_HTMLP_AttributeValueConsume(pMissingValue, pI_attvalue, &match,splAttrFound); |
|
2410 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
2411 |
return e; |
|
2412 |
} |
|
2413 |
if(splElem && (splElem->splHandle == NW_TRUE)) |
|
2414 |
{ |
|
2415 |
if( splElem->type == NW_HTMLP_SPL_META ) |
|
2416 |
{ |
|
2417 |
e = NW_HTMLP_SPL_Elem_Handle_Meta(pI_name,pI_attvalue, splElem); |
|
2418 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
2419 |
return e; |
|
2420 |
} |
|
2421 |
} |
|
2422 |
}//endif if(splElem && (splElem->splHandle == NW_TRUE)) |
|
2423 |
||
2424 |
if (match == NW_FALSE) { |
|
2425 |
return KBrsrSuccess; |
|
2426 |
} |
|
2427 |
} |
|
2428 |
*pMatch = NW_TRUE; |
|
2429 |
return KBrsrSuccess; |
|
2430 |
} |
|
2431 |
||
2432 |
/* |
|
2433 |
on entry: no assumptions about lexer read position |
|
2434 |
on return: If finds a match for regular expression "/\s*>", |
|
2435 |
.........: then *pMatch == NW_TRUE and the lexer read position |
|
2436 |
.........: is just after the '>' and return value is KBrsrSuccess. |
|
2437 |
eof handling: If encounters EOF while attempting operation, then |
|
2438 |
............: *pMatch == NW_FALSE and return value is KBrsrSuccess. |
|
2439 |
on error return: return value is not KBrsrSuccess, *pMatch == NW_FALSE |
|
2440 |
...............: and lexer read position is unspecified |
|
2441 |
*/ |
|
2442 |
TBrowserStatusCode CHtmlpParser::NW_HTMLP_IfExistsConsumeMinEmptyElementSyntax(NW_Bool* pMatch) |
|
2443 |
{ |
|
2444 |
NW_HTMLP_Lexer_Position_t position; |
|
2445 |
NW_Bool match; |
|
2446 |
TBrowserStatusCode e; |
|
2447 |
||
2448 |
*pMatch = NW_FALSE; |
|
2449 |
NW_HTMLP_Lexer_GetPosition(iLexer, &position); |
|
2450 |
e = NW_HTMLP_Lexer_AsciiCharCompare(iLexer, '/', &match); |
|
2451 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
2452 |
return e; |
|
2453 |
} |
|
2454 |
if (match == NW_TRUE) { |
|
2455 |
e = NW_HTMLP_Lexer_Advance(iLexer); |
|
2456 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
2457 |
return e; |
|
2458 |
} |
|
2459 |
e = NW_HTMLP_SkipSpace(); |
|
2460 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
2461 |
return e; |
|
2462 |
} |
|
2463 |
e = NW_HTMLP_Lexer_AsciiCharCompare(iLexer, '>', pMatch); |
|
2464 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
2465 |
return e; |
|
2466 |
} |
|
2467 |
if (*pMatch == NW_TRUE) { |
|
2468 |
e = NW_HTMLP_Lexer_Advance(iLexer); |
|
2469 |
} else { |
|
2470 |
e = NW_HTMLP_Lexer_SetPosition(iLexer, &position); |
|
2471 |
} |
|
2472 |
} |
|
2473 |
return e; |
|
2474 |
} |
|
2475 |
||
2476 |
/* |
|
2477 |
on entry: assumes lexer read position is in whitespace or at first character |
|
2478 |
........: of an attribute name |
|
2479 |
........: This function special cases the callback pointer. It *iCBs == NULL, |
|
2480 |
........: then no callbacks are done. This is so that attribute lists on |
|
2481 |
........: unknown tags and on end tags can be consumed but ignored. |
|
2482 |
on return: If either the attribute list is well-formed, including an empty |
|
2483 |
.........: attribute list, then *pMatch == NW_TRUE and the lexer read position |
|
2484 |
.........: is just after the closing '>' or '/>' (for xhtml empty tag processing). |
|
2485 |
.........: If the end is '/>', then *pIsEmptyTagEnd == NW_TRUE. |
|
2486 |
.........: If no attribute list is consumed, due a malformed attribute |
|
2487 |
.........: name or value, then *pMatch == NW_FALSE, *pIsEmptyTagEnd == NW_FALSE |
|
2488 |
.........: and the lexer read position is unchanged. |
|
2489 |
.........: In either case return value is KBrsrSuccess. |
|
2490 |
eof handling: If encounters EOF while attempting operation, then |
|
2491 |
............: *pMatch == NW_FALSE and return value is KBrsrSuccess. |
|
2492 |
on error return: return value is not KBrsrSuccess, *pMatch == NW_FALSE |
|
2493 |
...............: and lexer read position is unspecified |
|
2494 |
*/ |
|
2495 |
TBrowserStatusCode CHtmlpParser::NW_HTMLP_IfLegalConsumeAttributes( |
|
2496 |
NW_Bool* pMatch, |
|
2497 |
NW_Bool* pIsEmptyTagEnd, |
|
2498 |
const struct NW_HTMLP_EventCallbacks_s* pCBs, |
|
2499 |
NW_HTMLP_SPL_Elem_Handling_t* splElem) |
|
2500 |
{ |
|
2501 |
NW_HTMLP_Interval_t I_attName; |
|
2502 |
NW_HTMLP_Interval_t I_attValue; |
|
2503 |
NW_HTMLP_Lexer_Position_t position; |
|
2504 |
NW_Uint32 attributeCount; |
|
2505 |
TBrowserStatusCode e; |
|
2506 |
NW_Bool missingValue; |
|
2507 |
NW_Bool match; |
|
2508 |
NW_Uint32 cp_cnt; |
|
2509 |
NW_Bool malformated = NW_FALSE; |
|
2510 |
||
2511 |
*pMatch = NW_FALSE; |
|
2512 |
*pIsEmptyTagEnd = NW_FALSE; |
|
2513 |
NW_HTMLP_Lexer_GetPosition(iLexer, &position); |
|
2514 |
||
2515 |
for (attributeCount = 0;;) { |
|
2516 |
||
2517 |
/* optional space and junk. */ |
|
2518 |
e = NW_HTMLP_SkipJunk(); |
|
2519 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
2520 |
return e; |
|
2521 |
} |
|
2522 |
||
2523 |
/* try '>' end */ |
|
2524 |
e = NW_HTMLP_Lexer_AsciiCharCompare(iLexer, '>', &match); |
|
2525 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
2526 |
return e; |
|
2527 |
} |
|
2528 |
if (match == NW_TRUE) { |
|
2529 |
e = NW_HTMLP_Lexer_Advance(iLexer); |
|
2530 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
2531 |
return e; |
|
2532 |
} |
|
2533 |
*pMatch = NW_TRUE; |
|
2534 |
break; |
|
2535 |
} |
|
2536 |
else if (NW_HTMLP_Lexer_AtEnd(iLexer)) |
|
2537 |
{ |
|
2538 |
return KBrsrSuccess; |
|
2539 |
} |
|
2540 |
/* try /\s*> end */ |
|
2541 |
e = NW_HTMLP_IfExistsConsumeMinEmptyElementSyntax(&match); |
|
2542 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
2543 |
return e; |
|
2544 |
} |
|
2545 |
if (match == NW_TRUE) { |
|
2546 |
*pMatch = NW_TRUE; |
|
2547 |
*pIsEmptyTagEnd = NW_TRUE; |
|
2548 |
break; |
|
2549 |
} |
|
2550 |
/* look for attributes */ |
|
2551 |
e = NW_HTMLP_AttributeNameAndValueConsume(&I_attName, &missingValue, |
|
2552 |
&I_attValue, &match, splElem); |
|
2553 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
2554 |
return e; |
|
2555 |
} |
|
2556 |
if (match == NW_FALSE) { |
|
2557 |
// street html support, if parsing attr nv pair fails and |
|
2558 |
// it's not end of buffer yet, that indicates that it's likely |
|
2559 |
// to be a mal-formated attr list. In this case, we want to |
|
2560 |
// advance to where the end tag is. |
|
2561 |
if (!NW_HTMLP_Lexer_AtEnd(iLexer)) { |
|
2562 |
do { |
|
2563 |
e = NW_HTMLP_Lexer_Advance(iLexer); |
|
2564 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
2565 |
return e; |
|
2566 |
} |
|
2567 |
if (NW_HTMLP_Lexer_AtEnd(iLexer)) { |
|
2568 |
return KBrsrSuccess; |
|
2569 |
} |
|
2570 |
e = NW_HTMLP_Lexer_AsciiCharCompare(iLexer, '>', &match); |
|
2571 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
2572 |
return e; |
|
2573 |
} |
|
2574 |
} while (match == NW_FALSE); |
|
2575 |
if (match) |
|
2576 |
malformated = NW_TRUE; |
|
2577 |
} |
|
2578 |
else |
|
2579 |
return KBrsrSuccess; |
|
2580 |
} |
|
2581 |
// if I_AttName byte count is 0, that means we don't |
|
2582 |
// have attr name but an attr value within quote. |
|
2583 |
if (NW_HTMLP_Interval_ByteCount(&I_attName) !=0 && !malformated) |
|
2584 |
{ |
|
2585 |
if (!iIsHtml && pCBs != NULL && (pCBs->attributeStartCB != NULL)) { // the content is WML |
|
2586 |
e = NW_HTMLP_ValidateWMLAttribute(iLexer, &I_attName, |
|
2587 |
pCBs->pClientPointer); |
|
2588 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
2589 |
continue; // attempt to get the NEXT attribute |
|
2590 |
} |
|
2591 |
} |
|
2592 |
attributeCount++; |
|
2593 |
/* note: attribute value may be zero length interval */ |
|
2594 |
if ((pCBs != NULL) && (pCBs->attributeStartCB != NULL)) { |
|
2595 |
e = (*(pCBs->attributeStartCB))(pCBs->pClientPointer); |
|
2596 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
2597 |
return e; |
|
2598 |
} |
|
2599 |
if (pCBs->attributeNameAndValueCB != NULL) { |
|
2600 |
e = (*(pCBs->attributeNameAndValueCB))(iLexer, &I_attName, |
|
2601 |
missingValue, &I_attValue, |
|
2602 |
pCBs->pClientPointer, &cp_cnt); |
|
2603 |
updateCurrentCP(cp_cnt); |
|
2604 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
2605 |
return e; |
|
2606 |
} |
|
2607 |
} |
|
2608 |
} |
|
2609 |
} |
|
2610 |
} /* end of loop */ |
|
2611 |
if ((attributeCount > 0) && (*pMatch == NW_TRUE)) { |
|
2612 |
if ((pCBs != NULL) && (pCBs->attributesEndCB != NULL)) { |
|
2613 |
e = (*(pCBs->attributesEndCB))(attributeCount, pCBs->pClientPointer); |
|
2614 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
2615 |
return e; |
|
2616 |
} |
|
2617 |
} |
|
2618 |
} |
|
2619 |
return KBrsrSuccess; |
|
2620 |
} |
|
2621 |
||
2622 |
NW_HTMLP_ElementDescriptionConst_t* CHtmlpParser::NW_HTMLP_ElementFindTagFromDoc( |
|
2623 |
NW_HTMLP_Interval_t* pI, |
|
2624 |
NW_HTMLP_ElementTableIndex_t* pIndex) |
|
2625 |
{ |
|
2626 |
NW_Uint8* pTagInDoc; |
|
2627 |
NW_Uint32 byteCount; |
|
2628 |
NW_Uint32 charCount; |
|
2629 |
NW_Int32 bytesRead; |
|
2630 |
NW_Uint32 j; |
|
2631 |
NW_Int32 tagIndex; |
|
2632 |
NW_Ucs2 c_ucs2; |
|
2633 |
NW_HTMLP_ElementTableIndex_t i; |
|
2634 |
TBrowserStatusCode e; |
|
2635 |
NW_Bool match = NW_FALSE; |
|
2636 |
||
2637 |
byteCount = NW_HTMLP_Interval_ByteCount(pI); |
|
2638 |
e = NW_HTMLP_Lexer_DataAddressFromBuffer(iLexer, pI->start, |
|
2639 |
&byteCount, |
|
2640 |
&pTagInDoc); |
|
2641 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
2642 |
return NULL; |
|
2643 |
} |
|
2644 |
NW_ASSERT(byteCount == NW_HTMLP_Interval_ByteCount(pI)); |
|
2645 |
if (byteCount != NW_HTMLP_Interval_ByteCount(pI)) { |
|
2646 |
return NULL; |
|
2647 |
} |
|
2648 |
charCount = NW_HTMLP_Interval_CharCount(pI); |
|
2649 |
for (i = 0; i < iLexer->elementCount; i++) { |
|
2650 |
match = NW_FALSE; |
|
2651 |
if (charCount == (iLexer->pElementDictionary)[i].tag[0]) { |
|
2652 |
match = NW_TRUE; |
|
2653 |
tagIndex = 0; |
|
2654 |
for (j = 0; j < charCount; j++) { |
|
2655 |
bytesRead = NW_String_readChar(&(pTagInDoc[tagIndex]), |
|
2656 |
&c_ucs2, iLexer->encoding); |
|
2657 |
if (bytesRead == -1) { |
|
2658 |
return NULL; |
|
2659 |
} |
|
2660 |
tagIndex += bytesRead; |
|
2661 |
/* force doc ascii uppercase to lowercase */ |
|
2662 |
if ((c_ucs2 >= (NW_Ucs2)'A') && (c_ucs2 <= (NW_Ucs2)'Z')) { |
|
2663 |
c_ucs2 += 0x20; /* offset in ascii from upper to lower */ |
|
2664 |
} |
|
2665 |
if (c_ucs2 != (NW_Ucs2)((iLexer->pElementDictionary)[i].tag[j+1])) { |
|
2666 |
match = NW_FALSE; |
|
2667 |
break; |
|
2668 |
} |
|
2669 |
} |
|
2670 |
} |
|
2671 |
if (match == NW_TRUE) { |
|
2672 |
break; |
|
2673 |
} |
|
2674 |
} |
|
2675 |
*pIndex = i; |
|
2676 |
if (match == NW_TRUE) { |
|
2677 |
return &((iLexer->pElementDictionary)[i]); |
|
2678 |
} else { |
|
2679 |
return NULL; |
|
2680 |
} |
|
2681 |
} |
|
2682 |
||
2683 |
||
2684 |
TBrowserStatusCode CHtmlpParser::NW_HTMLP_ElementParseState_New() |
|
2685 |
{ |
|
2686 |
iElementParseState = ((NW_HTMLP_ElementParseState_t*) |
|
2687 |
NW_Mem_Malloc(sizeof(NW_HTMLP_ElementParseState_t))); |
|
2688 |
if (iElementParseState == NULL) { |
|
2689 |
return KBrsrOutOfMemory; |
|
2690 |
} |
|
2691 |
iElementParseState->openElementStackPointer = -1; |
|
2692 |
iElementParseState->openElementStackCapacity = NW_HTMLP_OPEN_ELEMENT_INITIAL_STACK_CAPACITY; |
|
2693 |
iElementParseState->pOpenElementStack = ((NW_HTMLP_ElementTableIndex_t*) |
|
2694 |
NW_Mem_Malloc(sizeof(NW_HTMLP_ElementTableIndex_t) |
|
2695 |
* iElementParseState->openElementStackCapacity)); |
|
2696 |
if (iElementParseState->pOpenElementStack == NULL) { |
|
2697 |
NW_Mem_Free(iElementParseState); |
|
2698 |
iElementParseState = NULL; |
|
2699 |
return KBrsrOutOfMemory; |
|
2700 |
} |
|
2701 |
iElementParseState->inPCDATA = NW_FALSE; |
|
2702 |
iElementParseState->readPosition = 0; |
|
2703 |
return KBrsrSuccess; |
|
2704 |
} |
|
2705 |
||
2706 |
TBrowserStatusCode CHtmlpParser::NW_HTMLP_ElementParseState_Clone(NW_HTMLP_ElementParseState_t** ppS) |
|
2707 |
{ |
|
2708 |
NW_Int32 sp; |
|
2709 |
NW_HTMLP_ElementParseState_Delete(ppS); |
|
2710 |
*ppS = ((NW_HTMLP_ElementParseState_t*) |
|
2711 |
NW_Mem_Malloc(sizeof(NW_HTMLP_ElementParseState_t))); |
|
2712 |
if (*ppS == NULL) { |
|
2713 |
return KBrsrOutOfMemory; |
|
2714 |
} |
|
2715 |
**ppS = *iElementParseState; |
|
2716 |
(*ppS)->pOpenElementStack = ((NW_HTMLP_ElementTableIndex_t*) |
|
2717 |
NW_Mem_Malloc(sizeof(NW_HTMLP_ElementTableIndex_t) |
|
2718 |
* (*ppS)->openElementStackCapacity)); |
|
2719 |
||
2720 |
if ((*ppS)->pOpenElementStack == NULL) { |
|
2721 |
NW_Mem_Free(*ppS); |
|
2722 |
*ppS = NULL; |
|
2723 |
return KBrsrOutOfMemory; |
|
2724 |
} |
|
2725 |
||
2726 |
for (sp = 0; sp <= iElementParseState->openElementStackPointer; sp++) { |
|
2727 |
(*ppS)->pOpenElementStack[sp] = iElementParseState->pOpenElementStack[sp]; |
|
2728 |
} |
|
2729 |
return KBrsrSuccess; |
|
2730 |
} |
|
2731 |
||
2732 |
void CHtmlpParser::NW_HTMLP_ElementParseState_Delete(NW_HTMLP_ElementParseState_t** ppS) |
|
2733 |
{ |
|
2734 |
if (*ppS) |
|
2735 |
NW_Mem_Free((*ppS)->pOpenElementStack); |
|
2736 |
NW_Mem_Free(*ppS); |
|
2737 |
*ppS = NULL; |
|
2738 |
} |
|
2739 |
||
2740 |
TBrowserStatusCode CHtmlpParser::NW_HTMLP_ElementParseState_Push(NW_HTMLP_ElementTableIndex_t index) |
|
2741 |
{ |
|
2742 |
NW_ASSERT(iElementParseState->openElementStackPointer >= -1); |
|
2743 |
NW_ASSERT(iElementParseState->openElementStackPointer < iElementParseState->openElementStackCapacity); |
|
2744 |
||
2745 |
iElementParseState->openElementStackPointer += 1; |
|
2746 |
||
2747 |
if (iElementParseState->openElementStackPointer == iElementParseState->openElementStackCapacity) { |
|
2748 |
/* realloc stack */ |
|
2749 |
NW_Int32 newStackCapacity = (iElementParseState->openElementStackCapacity |
|
2750 |
+ NW_HTMLP_OPEN_ELEMENT_STACK_GROW_BY); |
|
2751 |
NW_HTMLP_ElementTableIndex_t* pNewStack |
|
2752 |
= ((NW_HTMLP_ElementTableIndex_t*) |
|
2753 |
NW_Mem_Malloc(sizeof(NW_HTMLP_ElementTableIndex_t) * newStackCapacity)); |
|
2754 |
if (pNewStack == NULL) { |
|
2755 |
return KBrsrOutOfMemory; |
|
2756 |
} |
|
2757 |
NW_Mem_memcpy(pNewStack, iElementParseState->pOpenElementStack, (sizeof(NW_HTMLP_ElementTableIndex_t) |
|
2758 |
* iElementParseState->openElementStackCapacity)); |
|
2759 |
NW_Mem_Free(iElementParseState->pOpenElementStack); |
|
2760 |
iElementParseState->pOpenElementStack = pNewStack; |
|
2761 |
iElementParseState->openElementStackCapacity = newStackCapacity; |
|
2762 |
} |
|
2763 |
||
2764 |
(iElementParseState->pOpenElementStack)[iElementParseState->openElementStackPointer] = index; |
|
2765 |
||
2766 |
return KBrsrSuccess; |
|
2767 |
} |
|
2768 |
||
2769 |
NW_HTMLP_ElementTableIndex_t CHtmlpParser::NW_HTMLP_ElementParseState_Pop() |
|
2770 |
{ |
|
2771 |
NW_ASSERT(iElementParseState->openElementStackPointer >= 0); |
|
2772 |
NW_ASSERT(iElementParseState->openElementStackPointer < iElementParseState->openElementStackCapacity); |
|
2773 |
||
2774 |
if (iElementParseState->openElementStackPointer >= 0) { |
|
2775 |
return (iElementParseState->pOpenElementStack)[iElementParseState->openElementStackPointer--]; |
|
2776 |
} |
|
2777 |
/* BUG: this is a bogus value but something needs to be returned */ |
|
2778 |
return 0; |
|
2779 |
} |
|
2780 |
||
2781 |
TBrowserStatusCode CHtmlpParser::NW_HTMLP_ElementParseState_ImpliedClosings(NW_HTMLP_ElementDescriptionConst_t* pElement) |
|
2782 |
{ |
|
2783 |
const NW_Uint8* pBuf; |
|
2784 |
NW_Int32 sp; |
|
2785 |
NW_Uint32 i; |
|
2786 |
NW_HTMLP_ElementTableIndex_t stackTopElement; |
|
2787 |
TBrowserStatusCode e; |
|
2788 |
NW_Uint8 l; |
|
2789 |
||
2790 |
if ((pElement == NULL) || (pElement->closes == NULL)) { |
|
2791 |
return KBrsrSuccess; |
|
2792 |
} |
|
2793 |
/* Logic here is that if the close list says close something on the open |
|
2794 |
element stack, no matter the position on the open element stack then close |
|
2795 |
all elements through the one matched on the open element stack. Repeat |
|
2796 |
until there is nothing on the open element stack that matches an item in |
|
2797 |
the closes list. However, abort if item on element stack is in blocks list. */ |
|
2798 |
for (sp = iElementParseState->openElementStackPointer; sp >= 0; sp--) { |
|
2799 |
/* check blocks, if any */ |
|
2800 |
if (pElement->blocks != NULL) { |
|
2801 |
for (i = 0; i < pElement->blocks[0]; i++) { |
|
2802 |
if (pElement->blocks[i+1] == iElementParseState->pOpenElementStack[sp]) { |
|
2803 |
return KBrsrSuccess; |
|
2804 |
} |
|
2805 |
} |
|
2806 |
} |
|
2807 |
/* check closes */ |
|
2808 |
for (i = 0; i < pElement->closes[0]; i++) { |
|
2809 |
if (pElement->closes[i+1] == iElementParseState->pOpenElementStack[sp]) { |
|
2810 |
while (sp <= iElementParseState->openElementStackPointer) { |
|
2811 |
stackTopElement = NW_HTMLP_ElementParseState_Pop(); |
|
2812 |
if(stackTopElement < NW_HTMLP_DTD_ElementTableMask) |
|
2813 |
{ |
|
2814 |
/* end tag callback (close element) */ |
|
2815 |
l = (iLexer->pElementDictionary)[stackTopElement].tag[0]; |
|
2816 |
pBuf = &((iLexer->pElementDictionary)[stackTopElement].tag[1]); |
|
2817 |
if (iCBs->endTagCB != NULL) |
|
2818 |
{ |
|
2819 |
e = (*(iCBs->endTagCB))(l, pBuf, NW_FALSE, /* not empty */ |
|
2820 |
iCBs->pClientPointer); |
|
2821 |
if (BRSR_STAT_IS_FAILURE(e)) |
|
2822 |
{ |
|
2823 |
return e; |
|
2824 |
} |
|
2825 |
} |
|
2826 |
}//end if(stackTopElement < NW_HTMLP_DTD_ElementTableMask) |
|
2827 |
}//end while(..) |
|
2828 |
}//end if(..) |
|
2829 |
}//end for(..) |
|
2830 |
} |
|
2831 |
return KBrsrSuccess; |
|
2832 |
} |
|
2833 |
||
2834 |
//In this function if(index >= NW_HTMLP_DTD_ElementTableMask) that implies that |
|
2835 |
//do closing of the Non DTD tag and use parameter "tagInterval" to find the |
|
2836 |
//name. So, parameter "index" is used to distinguish between the dictionary and |
|
2837 |
//non-dictionary elements. |
|
2838 |
||
2839 |
TBrowserStatusCode CHtmlpParser::NW_HTMLP_ElementParseState_ExplicitClosing( |
|
2840 |
NW_HTMLP_ElementTableIndex_t index, |
|
2841 |
NW_HTMLP_ElementDescriptionConst_t* pElement) |
|
2842 |
{ |
|
2843 |
const NW_Uint8* pBuf; |
|
2844 |
NW_Int32 sp; |
|
2845 |
NW_HTMLP_ElementTableIndex_t stackTopElement; |
|
2846 |
TBrowserStatusCode e = KBrsrSuccess; |
|
2847 |
NW_Uint8 l; |
|
2848 |
||
2849 |
// pElement is NULL case of Non DTD Elements. |
|
2850 |
||
2851 |
if(index < NW_HTMLP_DTD_ElementTableMask) |
|
2852 |
{ |
|
2853 |
if( pElement == NULL) |
|
2854 |
{ |
|
2855 |
return KBrsrSuccess; |
|
2856 |
} |
|
2857 |
} |
|
2858 |
||
2859 |
/* ignore the html and body close tags since there may be more than one of them */ |
|
2860 |
if (iIsHtml && (index == HTMLP_HTML_TAG_INDEX_HTML || index == HTMLP_HTML_TAG_INDEX_BODY)) |
|
2861 |
return KBrsrSuccess; |
|
2862 |
||
2863 |
/* If matching oe on stack at any position, then close all open |
|
2864 |
through and including match. If no matching oe on stack then |
|
2865 |
ignore end tag. */ |
|
2866 |
for (sp = iElementParseState->openElementStackPointer; sp >= 0; sp--) { |
|
2867 |
if((iIsHtml && index == HTMLP_HTML_TAG_INDEX_TD) && |
|
2868 |
(iElementParseState->pOpenElementStack[sp] == HTMLP_HTML_TAG_INDEX_TR || |
|
2869 |
iElementParseState->pOpenElementStack[sp] == HTMLP_HTML_TAG_INDEX_TABLE)) |
|
2870 |
{ |
|
2871 |
break; |
|
2872 |
} |
|
2873 |
if((iIsHtml && index == HTMLP_HTML_TAG_INDEX_TR) && |
|
2874 |
(iElementParseState->pOpenElementStack[sp] == HTMLP_HTML_TAG_INDEX_TD || |
|
2875 |
iElementParseState->pOpenElementStack[sp] == HTMLP_HTML_TAG_INDEX_TABLE)) |
|
2876 |
{ |
|
2877 |
break; |
|
2878 |
} |
|
2879 |
if (index == iElementParseState->pOpenElementStack[sp]) { |
|
2880 |
while (sp <= iElementParseState->openElementStackPointer) |
|
2881 |
{ |
|
2882 |
stackTopElement = NW_HTMLP_ElementParseState_Pop(); |
|
2883 |
||
2884 |
if(stackTopElement < NW_HTMLP_DTD_ElementTableMask) |
|
2885 |
{ |
|
2886 |
/* end tag callback (close element) */ |
|
2887 |
l = (iLexer->pElementDictionary)[stackTopElement].tag[0]; |
|
2888 |
pBuf = &((iLexer->pElementDictionary)[stackTopElement].tag[1]); |
|
2889 |
if (iCBs->endTagCB != NULL) { |
|
2890 |
e = (*(iCBs->endTagCB))(l, pBuf, NW_FALSE, /* not empty */ |
|
2891 |
iCBs->pClientPointer); |
|
2892 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
2893 |
return e; |
|
2894 |
} |
|
2895 |
} |
|
2896 |
}//end if(stackTopElement < NW_HTMLP_DTD_ElementTableMask) |
|
2897 |
}//end while |
|
2898 |
/* stop when first match found */ |
|
2899 |
break; |
|
2900 |
} |
|
2901 |
} |
|
2902 |
return KBrsrSuccess; |
|
2903 |
} |
|
2904 |
||
2905 |
TBrowserStatusCode CHtmlpParser::NW_HTMLP_ElementParseState_CloseAll() |
|
2906 |
{ |
|
2907 |
NW_HTMLP_ElementTableIndex_t stackTopElement; |
|
2908 |
const NW_Uint8* pBuf; |
|
2909 |
NW_Int32 sp; |
|
2910 |
TBrowserStatusCode e; |
|
2911 |
NW_Uint8 l; |
|
2912 |
||
2913 |
for (sp = iElementParseState->openElementStackPointer; sp >= 0; sp--) |
|
2914 |
{ |
|
2915 |
stackTopElement = NW_HTMLP_ElementParseState_Pop(); |
|
2916 |
||
2917 |
if(stackTopElement < NW_HTMLP_DTD_ElementTableMask) |
|
2918 |
{ |
|
2919 |
||
2920 |
/* end tag callback (close element) */ |
|
2921 |
l = (iLexer->pElementDictionary)[stackTopElement].tag[0]; |
|
2922 |
pBuf = &((iLexer->pElementDictionary)[stackTopElement].tag[1]); |
|
2923 |
if (iCBs->endTagCB != NULL) { |
|
2924 |
e = (*(iCBs->endTagCB))(l, pBuf, NW_FALSE, /* not empty */ |
|
2925 |
iCBs->pClientPointer); |
|
2926 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
2927 |
return e; |
|
2928 |
} |
|
2929 |
} |
|
2930 |
} |
|
2931 |
}//end for |
|
2932 |
||
2933 |
return KBrsrSuccess; |
|
2934 |
} |
|
2935 |
||
2936 |
/* |
|
2937 |
reset all the special element handling */ |
|
2938 |
void CHtmlpParser::NW_HTMLP_SPL_Elem_Initialize() |
|
2939 |
{ |
|
2940 |
NW_Mem_memset(iSPLElemHandling->tagName,0,128); |
|
2941 |
iSPLElemHandling->type = NW_HTMLP_SPL_NONE; |
|
2942 |
iSPLElemHandling->splHandle = NW_FALSE; |
|
2943 |
} |
|
2944 |
||
2945 |
/* |
|
2946 |
on entry: This function checks if any spcial handling is required for the |
|
2947 |
........: the elements if so then set the flag for the this and copy the |
|
2948 |
........: tag name for the latter use. |
|
2949 |
........: |
|
2950 |
........: |
|
2951 |
on return: void |
|
2952 |
||
2953 |
*/ |
|
2954 |
void CHtmlpParser::NW_HTMLP_SPL_Elem_Setup(NW_HTMLP_ElementTableIndex_t elementIndex, |
|
2955 |
NW_Bool findCharset) |
|
2956 |
{ |
|
2957 |
NW_ASSERT(iSPLElemHandling->type == NW_HTMLP_SPL_NONE); |
|
2958 |
NW_ASSERT(iSPLElemHandling->splHandle == NW_FALSE); |
|
2959 |
||
2960 |
iSPLElemHandling->splHandle = NW_TRUE; |
|
2961 |
if (iIsHtml) |
|
2962 |
{ |
|
2963 |
switch (elementIndex) |
|
2964 |
{ |
|
2965 |
case HTMLP_HTML_TAG_INDEX_A: |
|
2966 |
iSPLElemHandling->type = NW_HTMLP_SPL_ANCHOR_ATTR; |
|
2967 |
break; |
|
2968 |
case HTMLP_HTML_TAG_INDEX_META: |
|
2969 |
if (findCharset) |
|
2970 |
iSPLElemHandling->type = NW_HTMLP_SPL_META; |
|
2971 |
break; |
|
2972 |
case HTMLP_HTML_TAG_INDEX_SCRIPT: |
|
2973 |
iSPLElemHandling->type = NW_HTMLP_SPL_SCRIPT; |
|
2974 |
break; |
|
2975 |
case HTMLP_HTML_TAG_INDEX_NOSCRIPT: |
|
2976 |
iSPLElemHandling->type = NW_HTMLP_SPL_NOSCRIPT; |
|
2977 |
break; |
|
2978 |
case HTMLP_HTML_TAG_INDEX_BODY: |
|
2979 |
iSPLElemHandling->type = NW_HTMLP_SPL_BODY; |
|
2980 |
break; |
|
2981 |
case HTMLP_HTML_TAG_INDEX_TITLE: |
|
2982 |
iSPLElemHandling->type = NW_HTMLP_SPL_TITLE; |
|
2983 |
break; |
|
2984 |
case HTMLP_HTML_TAG_INDEX_BASE: |
|
2985 |
iSPLElemHandling->type = NW_HTMLP_SPL_BASE_ATTR; |
|
2986 |
break; |
|
2987 |
default: |
|
2988 |
iSPLElemHandling->type = NW_HTMLP_SPL_NONE; |
|
2989 |
} |
|
2990 |
} |
|
2991 |
else |
|
2992 |
{ |
|
2993 |
switch (elementIndex) |
|
2994 |
{ |
|
2995 |
case HTMLP_WML_TAG_INDEX_A: |
|
2996 |
iSPLElemHandling->type = NW_HTMLP_SPL_ANCHOR_ATTR; |
|
2997 |
break; |
|
2998 |
case HTMLP_WML_TAG_INDEX_META: |
|
2999 |
if (findCharset) |
|
3000 |
iSPLElemHandling->type = NW_HTMLP_SPL_META; |
|
3001 |
break; |
|
3002 |
default: |
|
3003 |
iSPLElemHandling->type = NW_HTMLP_SPL_NONE; |
|
3004 |
} |
|
3005 |
} |
|
3006 |
}//end NW_HTMLP_SPL_Elem_Setup() |
|
3007 |
||
3008 |
||
3009 |
/* |
|
3010 |
* Function to handle the special case after <br> element. If the lot of |
|
3011 |
* spaces are there in the document after the <br> then this gives |
|
3012 |
* the false code page switch. |
|
3013 |
*/ |
|
3014 |
||
3015 |
TBrowserStatusCode CHtmlpParser::NW_HTMLP_BR_SPL_Handle() |
|
3016 |
{ |
|
3017 |
TBrowserStatusCode e = KBrsrSuccess; |
|
3018 |
NW_Bool match = NW_TRUE; |
|
3019 |
NW_Bool matchSpl = NW_TRUE; |
|
3020 |
||
3021 |
while(match) |
|
3022 |
{ |
|
3023 |
e = NW_HTMLP_Lexer_AsciiCharCompare(iLexer, 0x0, &matchSpl); |
|
3024 |
if (BRSR_STAT_IS_FAILURE(e)) |
|
3025 |
{ |
|
3026 |
return e; |
|
3027 |
} |
|
3028 |
if (matchSpl == NW_FALSE) |
|
3029 |
{ |
|
3030 |
// check for '0xa' for "nl" |
|
3031 |
e = NW_HTMLP_Lexer_AsciiCharCompare(iLexer, 0xa, &matchSpl); |
|
3032 |
if (BRSR_STAT_IS_FAILURE(e)) |
|
3033 |
{ |
|
3034 |
return e; |
|
3035 |
} |
|
3036 |
} |
|
3037 |
||
3038 |
if( (matchSpl == NW_TRUE) && !NW_HTMLP_Lexer_AtEnd(iLexer) ) |
|
3039 |
{ |
|
3040 |
e = NW_HTMLP_Lexer_Advance(iLexer); |
|
3041 |
if (BRSR_STAT_IS_FAILURE(e)) |
|
3042 |
{ |
|
3043 |
return e; |
|
3044 |
} |
|
3045 |
} |
|
3046 |
else |
|
3047 |
{ |
|
3048 |
match = NW_FALSE; |
|
3049 |
} |
|
3050 |
}//end while |
|
3051 |
||
3052 |
return e; |
|
3053 |
||
3054 |
}//end CHtmlpParser::NW_HTMLP_BR_SPL_Handle() |
|
3055 |
||
3056 |
||
3057 |
TBrowserStatusCode CHtmlpParser::NW_HTMLP_IfLegalConsumeElementStart(NW_Bool* pMatch) |
|
3058 |
{ |
|
3059 |
TBrowserStatusCode e = KBrsrSuccess; |
|
3060 |
NW_HTMLP_Interval_t interval; |
|
3061 |
NW_Bool match; |
|
3062 |
NW_Bool emptyElement; |
|
3063 |
NW_HTMLP_Lexer_Position_t position; |
|
3064 |
NW_HTMLP_Lexer_Position_t startPosition; |
|
3065 |
NW_HTMLP_Lexer_Position_t positionAttributeList; |
|
3066 |
NW_HTMLP_ElementDescriptionConst_t* pElement; |
|
3067 |
NW_HTMLP_ElementTableIndex_t elementIndex; |
|
3068 |
NW_HTMLP_ElementTableIndex_t insertedIndex = 0; |
|
3069 |
NW_Bool inserted = NW_FALSE; |
|
3070 |
NW_Bool hasTable, hasTr, hasTd, needTable, needTr, needTd; |
|
3071 |
NW_Int32 i; |
|
3072 |
||
3073 |
*pMatch = NW_FALSE; |
|
3074 |
hasTable = hasTr = hasTd = needTr = needTd = needTable =NW_FALSE; |
|
3075 |
||
3076 |
NW_HTMLP_SPL_Elem_Initialize(); |
|
3077 |
NW_HTMLP_Interval_Init(&interval); |
|
3078 |
NW_HTMLP_Lexer_GetPosition(iLexer, &position); |
|
3079 |
NW_HTMLP_Lexer_GetPosition(iLexer, &startPosition); |
|
3080 |
/* advance past '<' */ |
|
3081 |
e = NW_HTMLP_Lexer_Advance(iLexer); |
|
3082 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
3083 |
return e; |
|
3084 |
} |
|
3085 |
/* get tag */ |
|
3086 |
e = NW_HTMLP_ParseName(&match, &interval); |
|
3087 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
3088 |
return e; |
|
3089 |
} |
|
3090 |
if (match == NW_FALSE) { |
|
3091 |
// This isn't a start tag of an element. |
|
3092 |
e = NW_HTMLP_Lexer_AsciiCharCompare(iLexer, '%', &match); |
|
3093 |
if (BRSR_STAT_IS_FAILURE(e)) |
|
3094 |
{ |
|
3095 |
return e; |
|
3096 |
} |
|
3097 |
if (match == NW_TRUE) |
|
3098 |
{ |
|
3099 |
// if page contains <html><%response.cachecontrol=public%><head>... |
|
3100 |
// ignore <%response.cachecontrol=public%> |
|
3101 |
// for more detail, see JHAN-5XSR7Y |
|
3102 |
// if tag starts with <%, then ignore it by forcing match to NW_TRUE, |
|
3103 |
// and setting iLexer position to where <% ends |
|
3104 |
do |
|
3105 |
{ |
|
3106 |
e = NW_HTMLP_Lexer_Advance(iLexer); |
|
3107 |
if (BRSR_STAT_IS_FAILURE(e)) |
|
3108 |
{ |
|
3109 |
return e; |
|
3110 |
} |
|
3111 |
if (NW_HTMLP_Lexer_AtEnd(iLexer)) |
|
3112 |
{ |
|
3113 |
/* not well-formed at EOF */ |
|
3114 |
match = NW_TRUE; |
|
3115 |
return KBrsrSuccess; |
|
3116 |
} |
|
3117 |
e = NW_HTMLP_Lexer_AsciiCharCompare (iLexer, '>', &match); |
|
3118 |
if (BRSR_STAT_IS_FAILURE(e)) |
|
3119 |
{ |
|
3120 |
return e; |
|
3121 |
} |
|
3122 |
} while (!match); |
|
3123 |
NW_HTMLP_Lexer_Advance(iLexer); |
|
3124 |
} |
|
3125 |
else |
|
3126 |
{ |
|
3127 |
// no <% found |
|
3128 |
e = NW_HTMLP_Lexer_SetPosition(iLexer, &position); |
|
3129 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
3130 |
return e; |
|
3131 |
} |
|
3132 |
} |
|
3133 |
return KBrsrSuccess; |
|
3134 |
} |
|
3135 |
||
3136 |
// look up the tag in our table and convert from using an interval to |
|
3137 |
// a table entry |
|
3138 |
pElement = NW_HTMLP_ElementFindTagFromDoc(&interval, &elementIndex); |
|
3139 |
||
3140 |
// If tag is unknown, consume it, ignoring it as though it wasn't |
|
3141 |
// there. Only the tag is consumed. Everything between it and its |
|
3142 |
// closing tag are kept and processed as if the tag weren't there |
|
3143 |
// in the first place. |
|
3144 |
if (!pElement) |
|
3145 |
{ |
|
3146 |
match = NW_FALSE; |
|
3147 |
do |
|
3148 |
{ |
|
3149 |
if (NW_HTMLP_Lexer_AtEnd(iLexer)) |
|
3150 |
{ |
|
3151 |
// This could be illegal tag or end of chunk in the middle of a tag |
|
3152 |
if (!iDocComplete) |
|
3153 |
{ |
|
3154 |
TInt lastTextLen = iLexer->byteCount - startPosition.readPosition; |
|
3155 |
||
3156 |
iLastTextBuf = (NW_Buffer_t *)NW_Buffer_New(lastTextLen); |
|
3157 |
if (iLastTextBuf == NULL) { |
|
3158 |
return KBrsrOutOfMemory; |
|
3159 |
} |
|
3160 |
else |
|
3161 |
{ |
|
3162 |
(void)NW_Mem_memcpy(iLastTextBuf->data, iLexer->pBuf + startPosition.readPosition, lastTextLen); |
|
3163 |
iLastTextBuf->length = lastTextLen; |
|
3164 |
} |
|
3165 |
} |
|
3166 |
match = NW_TRUE; |
|
3167 |
return KBrsrSuccess; |
|
3168 |
} |
|
3169 |
e = NW_HTMLP_Lexer_AsciiCharCompare (iLexer, '>', &match); |
|
3170 |
if (BRSR_STAT_IS_FAILURE(e)) |
|
3171 |
{ |
|
3172 |
return e; |
|
3173 |
} |
|
3174 |
e = NW_HTMLP_Lexer_Advance(iLexer); |
|
3175 |
if (BRSR_STAT_IS_FAILURE(e)) |
|
3176 |
{ |
|
3177 |
return e; |
|
3178 |
} |
|
3179 |
} while (!match); |
|
3180 |
*pMatch = NW_TRUE; |
|
3181 |
return KBrsrSuccess; |
|
3182 |
} |
|
3183 |
||
3184 |
||
3185 |
if (iIsHtml && elementIndex == HTMLP_HTML_TAG_INDEX_NOSCRIPT) |
|
3186 |
{ |
|
3187 |
if (iWithinNoscript) |
|
3188 |
{ |
|
3189 |
// already within the noscript, treat this noscript as </noscript> |
|
3190 |
e = NW_HTMLP_IfLegalConsumeAttributes(&match, &emptyElement, NULL, NULL); |
|
3191 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
3192 |
return e; |
|
3193 |
} |
|
3194 |
if (match == NW_FALSE) |
|
3195 |
{ |
|
3196 |
/* This is a malformed attribute list. */ |
|
3197 |
e = NW_HTMLP_Lexer_SetPosition(iLexer, &position); |
|
3198 |
if (BRSR_STAT_IS_FAILURE(e)) |
|
3199 |
{ |
|
3200 |
return e; |
|
3201 |
} |
|
3202 |
*pMatch = NW_FALSE; |
|
3203 |
return KBrsrSuccess; |
|
3204 |
} |
|
3205 |
||
3206 |
e = NW_HTMLP_ElementParseState_ExplicitClosing(elementIndex, pElement); |
|
3207 |
if (BRSR_STAT_IS_FAILURE(e)) |
|
3208 |
{ |
|
3209 |
return e; |
|
3210 |
} |
|
3211 |
iWithinNoscript = NW_FALSE; |
|
3212 |
*pMatch = NW_TRUE; |
|
3213 |
iElementParseState->readPosition = iLexer->readPosition; |
|
3214 |
return KBrsrSuccess; |
|
3215 |
} |
|
3216 |
else |
|
3217 |
{ |
|
3218 |
// set within noscript to be true |
|
3219 |
iWithinNoscript = NW_TRUE; |
|
3220 |
} |
|
3221 |
} |
|
3222 |
||
3223 |
/* Some contents are there before the <html> tags. Flag to handle these contents. Special case |
|
3224 |
* for handling the page switch. This is checked in the first segment only. |
|
3225 |
*/ |
|
3226 |
||
3227 |
if( iIsHtml && firstSegment && iCBs->startTagCB && (elementIndex == HTMLP_HTML_TAG_INDEX_HTML) ) |
|
3228 |
{ |
|
3229 |
htmlTagFound = NW_TRUE; |
|
3230 |
} |
|
3231 |
||
3232 |
//If some special handling of the element is required. |
|
3233 |
if(pElement && (pElement->splHandling == NW_TRUE) ) |
|
3234 |
{ |
|
3235 |
NW_HTMLP_SPL_Elem_Setup(elementIndex, iNeedCharsetDetect); |
|
3236 |
||
3237 |
if (iSPLElemHandling->type == NW_HTMLP_SPL_SCRIPT) |
|
3238 |
{ |
|
3239 |
iLastScriptStart = position.readPosition; |
|
3240 |
setValidMarks(); |
|
3241 |
} |
|
3242 |
else if(iSPLElemHandling->type == NW_HTMLP_SPL_NOSCRIPT ) |
|
3243 |
{ |
|
3244 |
iLastScriptStart = position.readPosition; |
|
3245 |
setValidMarks(); |
|
3246 |
} |
|
3247 |
else if (iSPLElemHandling->type == NW_HTMLP_SPL_BODY) |
|
3248 |
{ |
|
3249 |
if (iNeedCharsetDetect) |
|
3250 |
{ |
|
3251 |
e = NW_HTMLP_SPL_Elem_Handle_BodyStart(pElement); |
|
3252 |
return e; |
|
3253 |
} |
|
3254 |
} |
|
3255 |
} |
|
3256 |
||
3257 |
/* |
|
3258 |
now have a tag but must check attribute list before executing callbacks |
|
3259 |
*/ |
|
3260 |
||
3261 |
/* first pass over attribute list just checks syntax and is |
|
3262 |
done without callbacks */ |
|
3263 |
NW_HTMLP_Lexer_GetPosition(iLexer, &positionAttributeList); |
|
3264 |
e = NW_HTMLP_IfLegalConsumeAttributes(&match, &emptyElement, NULL, iSPLElemHandling); |
|
3265 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
3266 |
return e; |
|
3267 |
} |
|
3268 |
if (match == NW_FALSE) { |
|
3269 |
/* This is a malformed attribute list. */ |
|
3270 |
e = NW_HTMLP_Lexer_SetPosition(iLexer, &position); |
|
3271 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
3272 |
return e; |
|
3273 |
} |
|
3274 |
return KBrsrSuccess; |
|
3275 |
} |
|
3276 |
||
3277 |
/* do implied closing of open elements */ |
|
3278 |
e = NW_HTMLP_ElementParseState_ImpliedClosings(pElement); |
|
3279 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
3280 |
return e; |
|
3281 |
} |
|
3282 |
/* tag CB */ |
|
3283 |
if (iCBs->startTagCB != NULL) |
|
3284 |
{ |
|
3285 |
// first handle the case of insert necessary tr or td |
|
3286 |
if (iIsHtml) |
|
3287 |
{ |
|
3288 |
for (i = 0; i <= iElementParseState->openElementStackPointer; i++) |
|
3289 |
{ |
|
3290 |
switch ((iElementParseState->pOpenElementStack)[i]) |
|
3291 |
{ |
|
3292 |
case HTMLP_HTML_TAG_INDEX_TABLE: |
|
3293 |
hasTable = NW_TRUE; |
|
3294 |
hasTr = hasTd = NW_FALSE; |
|
3295 |
break; |
|
3296 |
case HTMLP_HTML_TAG_INDEX_TR: |
|
3297 |
hasTr = NW_TRUE; |
|
3298 |
hasTd = NW_FALSE; |
|
3299 |
break; |
|
3300 |
case HTMLP_HTML_TAG_INDEX_TD: |
|
3301 |
case HTMLP_HTML_TAG_INDEX_TH: |
|
3302 |
hasTd = NW_TRUE; |
|
3303 |
break; |
|
3304 |
} |
|
3305 |
} |
|
3306 |
} |
|
3307 |
else |
|
3308 |
{ // !iIsHtml == WML content |
|
3309 |
// if content is WML make sure <table>, <tr> and <td> are present or needed. |
|
3310 |
// if one or more is needed, they will be added to stack (openElementStack), below. |
|
3311 |
for (i = 0; i <= iElementParseState->openElementStackPointer; i++) |
|
3312 |
{ |
|
3313 |
switch ((iElementParseState->pOpenElementStack)[i]) |
|
3314 |
{ |
|
3315 |
case HTMLP_WML_TAG_INDEX_TABLE: |
|
3316 |
hasTable = NW_TRUE; |
|
3317 |
hasTr = hasTd = NW_FALSE; |
|
3318 |
break; |
|
3319 |
case HTMLP_WML_TAG_INDEX_TR: |
|
3320 |
hasTr = NW_TRUE; |
|
3321 |
hasTd = NW_FALSE; |
|
3322 |
break; |
|
3323 |
case HTMLP_WML_TAG_INDEX_TD: |
|
3324 |
hasTd = NW_TRUE; |
|
3325 |
break; |
|
3326 |
} |
|
3327 |
} |
|
3328 |
} |
|
3329 |
if (hasTable) |
|
3330 |
{ |
|
3331 |
if ( (iIsHtml && (elementIndex == HTMLP_HTML_TAG_INDEX_TD || elementIndex == HTMLP_HTML_TAG_INDEX_TH)) |
|
3332 |
|| (!iIsHtml && (elementIndex == HTMLP_WML_TAG_INDEX_TD)) ) |
|
3333 |
{ |
|
3334 |
if (!hasTr) |
|
3335 |
needTr = NW_TRUE; |
|
3336 |
} |
|
3337 |
/* try to use the form direct under the table, because it's very comman and table |
|
3338 |
do handles it */ |
|
3339 |
else if ( iIsHtml && (elementIndex != HTMLP_HTML_TAG_INDEX_TR |
|
3340 |
&& elementIndex != HTMLP_HTML_TAG_INDEX_FORM |
|
3341 |
&& elementIndex != HTMLP_HTML_TAG_INDEX_CAPTION) ) |
|
3342 |
{ |
|
3343 |
if (!hasTr) |
|
3344 |
needTr = NW_TRUE; |
|
3345 |
if (!hasTd) |
|
3346 |
needTd = NW_TRUE; |
|
3347 |
} |
|
3348 |
} |
|
3349 |
else |
|
3350 |
{ |
|
3351 |
if ( (iIsHtml && elementIndex == HTMLP_HTML_TAG_INDEX_TD) |
|
3352 |
|| (!iIsHtml && elementIndex == HTMLP_WML_TAG_INDEX_TD) ) |
|
3353 |
{ |
|
3354 |
needTable = NW_TRUE; |
|
3355 |
||
3356 |
if (!hasTr) |
|
3357 |
needTr = NW_TRUE; |
|
3358 |
} |
|
3359 |
else if ( (iIsHtml && elementIndex == HTMLP_HTML_TAG_INDEX_TR) |
|
3360 |
|| (!iIsHtml && elementIndex == HTMLP_WML_TAG_INDEX_TR) ) |
|
3361 |
needTable = NW_TRUE; |
|
3362 |
} |
|
3363 |
if (needTable) |
|
3364 |
{ |
|
3365 |
TUint16 temp = HTMLP_HTML_TAG_INDEX_TABLE; |
|
3366 |
if (!iIsHtml) { |
|
3367 |
temp = HTMLP_WML_TAG_INDEX_TABLE; |
|
3368 |
} |
|
3369 |
e = (*(iCBs->startTagCB))((iLexer->pElementDictionary)[temp].tag[0], |
|
3370 |
&((iLexer->pElementDictionary)[temp].tag[1]), |
|
3371 |
iCBs->pClientPointer, NW_FALSE); |
|
3372 |
updateCurrentCP(); |
|
3373 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
3374 |
return e; |
|
3375 |
} |
|
3376 |
e = NW_HTMLP_ElementParseState_Push(temp); |
|
3377 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
3378 |
return e; |
|
3379 |
} |
|
3380 |
} |
|
3381 |
if (needTr) |
|
3382 |
{ |
|
3383 |
TUint16 temp = HTMLP_HTML_TAG_INDEX_TR; |
|
3384 |
if (!iIsHtml) { |
|
3385 |
temp = HTMLP_WML_TAG_INDEX_TR; |
|
3386 |
} |
|
3387 |
e = (*(iCBs->startTagCB))((iLexer->pElementDictionary)[temp].tag[0], |
|
3388 |
&((iLexer->pElementDictionary)[temp].tag[1]), |
|
3389 |
iCBs->pClientPointer, NW_FALSE); |
|
3390 |
updateCurrentCP(); |
|
3391 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
3392 |
return e; |
|
3393 |
} |
|
3394 |
e = NW_HTMLP_ElementParseState_Push(temp); |
|
3395 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
3396 |
return e; |
|
3397 |
} |
|
3398 |
} |
|
3399 |
if (needTd) |
|
3400 |
{ |
|
3401 |
TUint16 temp = HTMLP_HTML_TAG_INDEX_TD; |
|
3402 |
if (!iIsHtml) { |
|
3403 |
temp = HTMLP_WML_TAG_INDEX_TD; |
|
3404 |
} |
|
3405 |
e = (*(iCBs->startTagCB))((iLexer->pElementDictionary)[temp].tag[0], |
|
3406 |
&((iLexer->pElementDictionary)[temp].tag[1]), |
|
3407 |
iCBs->pClientPointer, NW_FALSE); |
|
3408 |
updateCurrentCP(); |
|
3409 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
3410 |
return e; |
|
3411 |
||
3412 |
} |
|
3413 |
e = NW_HTMLP_ElementParseState_Push(temp); |
|
3414 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
3415 |
return e; |
|
3416 |
} |
|
3417 |
} |
|
3418 |
||
3419 |
if (iIsHtml) |
|
3420 |
{ |
|
3421 |
switch (elementIndex) |
|
3422 |
{ |
|
3423 |
case HTMLP_HTML_TAG_INDEX_FORM: |
|
3424 |
if (!iHasForm) |
|
3425 |
{ |
|
3426 |
iHasForm = NW_TRUE; |
|
3427 |
} |
|
3428 |
break; |
|
3429 |
case HTMLP_HTML_TAG_INDEX_INPUT: |
|
3430 |
case HTMLP_HTML_TAG_INDEX_SELECT: |
|
3431 |
case HTMLP_HTML_TAG_INDEX_TEXTAREA: |
|
3432 |
case HTMLP_HTML_TAG_INDEX_LABEL: |
|
3433 |
case HTMLP_HTML_TAG_INDEX_FIELDSET: |
|
3434 |
// if the content is from script, no need to create fake form element |
|
3435 |
// this is due to some live web site page (such as mlb.com) where the |
|
3436 |
// input controls are in script, but form is outside the script. |
|
3437 |
if (!iHasForm && !iIsScript) |
|
3438 |
{ |
|
3439 |
inserted = NW_TRUE; |
|
3440 |
insertedIndex = HTMLP_HTML_TAG_INDEX_FORM; |
|
3441 |
e = (*(iCBs->startTagCB))((iLexer->pElementDictionary)[insertedIndex].tag[0], |
|
3442 |
&((iLexer->pElementDictionary)[insertedIndex].tag[1]), |
|
3443 |
iCBs->pClientPointer, NW_FALSE); |
|
3444 |
updateCurrentCP(); |
|
3445 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
3446 |
return e; |
|
3447 |
} |
|
3448 |
iHasForm = NW_TRUE; |
|
3449 |
} |
|
3450 |
} |
|
3451 |
} |
|
3452 |
||
3453 |
if (pElement) |
|
3454 |
{ |
|
3455 |
e = (*(iCBs->startTagCB))((iLexer->pElementDictionary)[elementIndex].tag[0], |
|
3456 |
&((iLexer->pElementDictionary)[elementIndex].tag[1]), |
|
3457 |
iCBs->pClientPointer, NW_FALSE); |
|
3458 |
updateCurrentCP(); |
|
3459 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
3460 |
return e; |
|
3461 |
} |
|
3462 |
} |
|
3463 |
} |
|
3464 |
||
3465 |
/* reposition for a second pass over attribute list */ |
|
3466 |
e = NW_HTMLP_Lexer_SetPosition(iLexer, &positionAttributeList); |
|
3467 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
3468 |
return e; |
|
3469 |
} |
|
3470 |
/* second pass over attribute list with callbacks */ |
|
3471 |
e = NW_HTMLP_IfLegalConsumeAttributes(&match, &emptyElement, iCBs, iSPLElemHandling); |
|
3472 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
3473 |
return e; |
|
3474 |
} |
|
3475 |
if (match == NW_FALSE) { |
|
3476 |
/* Something is unexpectedly wrong. We just did a second |
|
3477 |
pass over the attribute list (this time with callbacks enabled) and |
|
3478 |
it should always match because the first pass results check should |
|
3479 |
have caught a syntax error */ |
|
3480 |
return KBrsrFailure; |
|
3481 |
} |
|
3482 |
||
3483 |
/* accept this opening markup */ |
|
3484 |
||
3485 |
if (inserted) |
|
3486 |
{ |
|
3487 |
e = NW_HTMLP_ElementParseState_Push( insertedIndex ); |
|
3488 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
3489 |
return e; |
|
3490 |
} |
|
3491 |
} |
|
3492 |
||
3493 |
/* Sometimes spaces (0x00) after <br> tag gives false switch |
|
3494 |
* code page in the wbxml buffer. |
|
3495 |
*/ |
|
3496 |
||
3497 |
if(pElement && (elementIndex == HTMLP_HTML_TAG_INDEX_BR) && (iIsHtml == NW_TRUE) ) |
|
3498 |
{ |
|
3499 |
e = NW_HTMLP_BR_SPL_Handle(); |
|
3500 |
if (BRSR_STAT_IS_FAILURE(e)) |
|
3501 |
{ |
|
3502 |
return e; |
|
3503 |
} |
|
3504 |
} |
|
3505 |
||
3506 |
if (pElement && (pElement->contentType != EMPTY) && (emptyElement != NW_TRUE)) |
|
3507 |
{ |
|
3508 |
e = NW_HTMLP_ElementParseState_Push(elementIndex); |
|
3509 |
if (BRSR_STAT_IS_FAILURE(e)) |
|
3510 |
{ |
|
3511 |
return e; |
|
3512 |
} |
|
3513 |
if (pElement->contentType == PCDATA) |
|
3514 |
{ |
|
3515 |
iElementParseState->inPCDATA = NW_TRUE; |
|
3516 |
} |
|
3517 |
} |
|
3518 |
||
3519 |
if (pElement && (pElement->contentType == EMPTY) || (emptyElement == NW_TRUE) ) |
|
3520 |
{ |
|
3521 |
/* This takes care of Empty DTD element Tags closing */ |
|
3522 |
if (iCBs->endTagCB != NULL) |
|
3523 |
{ |
|
3524 |
e = (*(iCBs->endTagCB))(0, NULL, NW_TRUE, /* empty */ |
|
3525 |
iCBs->pClientPointer); |
|
3526 |
if (BRSR_STAT_IS_FAILURE(e)) |
|
3527 |
{ |
|
3528 |
return e; |
|
3529 |
} |
|
3530 |
} |
|
3531 |
} |
|
3532 |
/* <plaintext> handler */ |
|
3533 |
if (pElement && (pElement->contentType == PLAINTEXT ) ) |
|
3534 |
{ |
|
3535 |
NW_HTMLP_Interval_Start(&interval, iLexer); |
|
3536 |
while (!NW_HTMLP_Lexer_AtEnd(iLexer)) |
|
3537 |
{ |
|
3538 |
e = NW_HTMLP_Lexer_Advance(iLexer); |
|
3539 |
if (BRSR_STAT_IS_FAILURE(e)) |
|
3540 |
{ |
|
3541 |
return e; |
|
3542 |
} |
|
3543 |
} |
|
3544 |
||
3545 |
NW_HTMLP_Interval_Stop(&interval, iLexer); |
|
3546 |
if (iCBs->contentCB != NULL) |
|
3547 |
{ |
|
3548 |
e = (*(iCBs->contentCB))(iLexer, &interval, iCBs->pClientPointer); |
|
3549 |
if (BRSR_STAT_IS_FAILURE(e)) |
|
3550 |
{ |
|
3551 |
return e; |
|
3552 |
} |
|
3553 |
} |
|
3554 |
} |
|
3555 |
*pMatch = NW_TRUE; |
|
3556 |
iElementParseState->readPosition = iLexer->readPosition; |
|
3557 |
return KBrsrSuccess; |
|
3558 |
}//end CHtmlpParser::NW_HTMLP_IfLegalConsumeElementStart(NW_Bool* pMatch) |
|
3559 |
||
3560 |
TBrowserStatusCode CHtmlpParser::NW_HTMLP_IfLegalConsumeElementEnd(NW_Bool* pMatch) |
|
3561 |
{ |
|
3562 |
NW_HTMLP_Interval_t interval; |
|
3563 |
NW_HTMLP_Lexer_Position_t position; |
|
3564 |
NW_HTMLP_ElementDescriptionConst_t* pElement; |
|
3565 |
NW_HTMLP_ElementTableIndex_t elementIndex; |
|
3566 |
TBrowserStatusCode e = KBrsrSuccess; |
|
3567 |
NW_Bool match; |
|
3568 |
NW_Bool emptyElement; |
|
3569 |
||
3570 |
*pMatch = NW_FALSE; |
|
3571 |
NW_HTMLP_Interval_Init(&interval); |
|
3572 |
NW_HTMLP_Lexer_GetPosition(iLexer, &position); |
|
3573 |
/* advance past '<' */ |
|
3574 |
e = NW_HTMLP_Lexer_Advance(iLexer); |
|
3575 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
3576 |
return e; |
|
3577 |
} |
|
3578 |
/* check for '/' */ |
|
3579 |
e = NW_HTMLP_Lexer_AsciiCharCompare(iLexer, '/', &match); |
|
3580 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
3581 |
return e; |
|
3582 |
} |
|
3583 |
||
3584 |
if (match == NW_FALSE) { |
|
3585 |
/* This isn't an end tag of an element. */ |
|
3586 |
e = NW_HTMLP_Lexer_SetPosition(iLexer, &position); |
|
3587 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
3588 |
return e; |
|
3589 |
} |
|
3590 |
return KBrsrSuccess; |
|
3591 |
} |
|
3592 |
||
3593 |
/* advance past '/' */ |
|
3594 |
e = NW_HTMLP_Lexer_Advance(iLexer); |
|
3595 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
3596 |
return e; |
|
3597 |
} |
|
3598 |
/* get tag */ |
|
3599 |
e = NW_HTMLP_ParseName(&match, &interval); |
|
3600 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
3601 |
return e; |
|
3602 |
} |
|
3603 |
if (match == NW_FALSE) { |
|
3604 |
/* This isn't a start tag of an element. */ |
|
3605 |
e = NW_HTMLP_Lexer_SetPosition(iLexer, &position); |
|
3606 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
3607 |
return e; |
|
3608 |
} |
|
3609 |
return KBrsrSuccess; |
|
3610 |
} |
|
3611 |
||
3612 |
/* look up the tag in our table and convert from using |
|
3613 |
an interval to a table entry */ |
|
3614 |
pElement = NW_HTMLP_ElementFindTagFromDoc(&interval, &elementIndex); |
|
3615 |
||
3616 |
/* If end of head, and charset was not detected, need to default to variant specific, |
|
3617 |
and do the conversion now */ |
|
3618 |
if(pElement && (pElement->splHandling == NW_TRUE) ) |
|
3619 |
{ |
|
3620 |
if (iIsHtml) |
|
3621 |
{ |
|
3622 |
if( elementIndex == HTMLP_HTML_TAG_INDEX_SCRIPT) |
|
3623 |
{ |
|
3624 |
iLastScriptStart = -1; |
|
3625 |
} |
|
3626 |
else if(elementIndex == HTMLP_HTML_TAG_INDEX_NOSCRIPT) |
|
3627 |
{ |
|
3628 |
iLastScriptStart = -1; |
|
3629 |
iWithinNoscript = NW_FALSE; |
|
3630 |
} |
|
3631 |
} |
|
3632 |
} |
|
3633 |
||
3634 |
||
3635 |
/* like normal attribute list parse but don't use callbacks */ |
|
3636 |
e = NW_HTMLP_IfLegalConsumeAttributes(&match, &emptyElement, NULL, NULL); |
|
3637 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
3638 |
return e; |
|
3639 |
} |
|
3640 |
if (match == NW_FALSE) { |
|
3641 |
/* This is a malformed attribute list. */ |
|
3642 |
e = NW_HTMLP_Lexer_SetPosition(iLexer, &position); |
|
3643 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
3644 |
return e; |
|
3645 |
} |
|
3646 |
return KBrsrSuccess; |
|
3647 |
} |
|
3648 |
/* Catch PCDATA mode */ |
|
3649 |
if (iElementParseState->inPCDATA == NW_TRUE) |
|
3650 |
{ |
|
3651 |
NW_ASSERT(iElementParseState->openElementStackPointer >= 0); |
|
3652 |
NW_ASSERT(iElementParseState->openElementStackPointer < iElementParseState->openElementStackCapacity); |
|
3653 |
if ((pElement != NULL) |
|
3654 |
&& (elementIndex==(iElementParseState->pOpenElementStack)[iElementParseState->openElementStackPointer])) |
|
3655 |
{ |
|
3656 |
iElementParseState->inPCDATA = NW_FALSE; |
|
3657 |
if( (iSPLElemHandling->splHandle == NW_TRUE) && |
|
3658 |
(iSPLElemHandling->type == NW_HTMLP_SPL_TITLE) ) |
|
3659 |
{ |
|
3660 |
iSPLElemHandling->type = NW_HTMLP_SPL_NONE; |
|
3661 |
} |
|
3662 |
} |
|
3663 |
else if( (iSPLElemHandling->splHandle == NW_TRUE) && |
|
3664 |
(iSPLElemHandling->type == NW_HTMLP_SPL_TITLE) ) |
|
3665 |
{ |
|
3666 |
/*Handle mis-spelled </title> end tag */ |
|
3667 |
iElementParseState->inPCDATA = NW_FALSE; |
|
3668 |
iSPLElemHandling->type = NW_HTMLP_SPL_NONE; |
|
3669 |
} |
|
3670 |
else |
|
3671 |
{ |
|
3672 |
/* reject this ending markup as not matching open element */ |
|
3673 |
e = NW_HTMLP_Lexer_SetPosition(iLexer, &position); |
|
3674 |
if (BRSR_STAT_IS_FAILURE(e)) |
|
3675 |
{ |
|
3676 |
return e; |
|
3677 |
} |
|
3678 |
return KBrsrSuccess; |
|
3679 |
} |
|
3680 |
} //end if (iElementParseState->inPCDATA == NW_TRUE) |
|
3681 |
||
3682 |
/* unknown tag filter */ |
|
3683 |
if (pElement != NULL) { |
|
3684 |
e = NW_HTMLP_ElementParseState_ExplicitClosing(elementIndex, pElement); |
|
3685 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
3686 |
return e; |
|
3687 |
} |
|
3688 |
} |
|
3689 |
||
3690 |
*pMatch = NW_TRUE; |
|
3691 |
return KBrsrSuccess; |
|
3692 |
} |
|
3693 |
||
3694 |
TBrowserStatusCode CHtmlpParser::setValidMarks() |
|
3695 |
{ |
|
3696 |
NW_HTMLP_ElementParseState_Clone(&iLastValidStack); |
|
3697 |
if (iWbxmlWriter) |
|
3698 |
{ |
|
3699 |
iLastValid = ((NW_HTMLP_WbxmlEncoder_t*)iCBs->pClientPointer)->pE->index; |
|
3700 |
iValidTagCP = iCurrentTagCP; |
|
3701 |
iValidAttrCP = iCurrentAttrCP; |
|
3702 |
iValidCPCount = iCurrentCPCount; |
|
3703 |
} |
|
3704 |
return KBrsrSuccess; |
|
3705 |
} |
|
3706 |
||
3707 |
TBrowserStatusCode CHtmlpParser::commitValidMarks() |
|
3708 |
{ |
|
3709 |
NW_Buffer_t * tmp_buf = NULL; |
|
3710 |
NW_Uint32 lastTextLen = 0; |
|
3711 |
NW_Uint32 previousBufLen = 0; |
|
3712 |
||
3713 |
/* trim the output if necessary */ |
|
3714 |
if (iWbxmlWriter && iWbxmlWriter->index != (NW_Uint32)iLastValid) |
|
3715 |
iWbxmlWriter->index = iLastValid; |
|
3716 |
iElementParseState->openElementStackPointer = iLastValidStack->openElementStackPointer; |
|
3717 |
iElementParseState->readPosition = iLastValidStack->readPosition; |
|
3718 |
||
3719 |
/* reform the valid output */ |
|
3720 |
if (iPreviousValidOutput) |
|
3721 |
iLastValid += iPreviousValidOutput->length; |
|
3722 |
||
3723 |
// only generate output wbxml buffer when real parsing starts |
|
3724 |
if (!iNeedCharsetDetect) |
|
3725 |
{ |
|
3726 |
tmp_buf = iPreviousValidOutput; |
|
3727 |
iPreviousValidOutput = NW_Buffer_New(iLastValid); |
|
3728 |
if (!iPreviousValidOutput) |
|
3729 |
{ |
|
3730 |
return KBrsrOutOfMemory; |
|
3731 |
} |
|
3732 |
if (tmp_buf) |
|
3733 |
{ |
|
3734 |
previousBufLen = tmp_buf->length; |
|
3735 |
(void)NW_Mem_memcpy(iPreviousValidOutput->data, tmp_buf->data, previousBufLen); |
|
3736 |
} |
|
3737 |
(void)NW_Mem_memcpy(iPreviousValidOutput->data + previousBufLen, |
|
3738 |
iWbxmlWriter->pBuf, |
|
3739 |
iLastValid - previousBufLen); |
|
3740 |
iPreviousValidOutput->length = iLastValid; |
|
3741 |
||
3742 |
if (tmp_buf) |
|
3743 |
NW_Buffer_Free(tmp_buf); |
|
3744 |
} |
|
3745 |
||
3746 |
/* record the last text kept from the chunk */ |
|
3747 |
if (iLastScriptStart != -1) |
|
3748 |
{ |
|
3749 |
iLastTextBegin = iLastScriptStart; |
|
3750 |
} |
|
3751 |
||
3752 |
if (iLastTextBegin != -1) |
|
3753 |
{ |
|
3754 |
lastTextLen = iLexer->byteCount - iLastTextBegin; |
|
3755 |
||
3756 |
iLastTextBuf = (NW_Buffer_t *)NW_Buffer_New(lastTextLen); |
|
3757 |
if (iLastTextBuf == NULL) { |
|
3758 |
return KBrsrOutOfMemory; |
|
3759 |
} |
|
3760 |
else |
|
3761 |
{ |
|
3762 |
(void)NW_Mem_memcpy(iLastTextBuf->data, iLexer->pBuf + iLastTextBegin, lastTextLen); |
|
3763 |
iLastTextBuf->length = lastTextLen; |
|
3764 |
} |
|
3765 |
} |
|
3766 |
return KBrsrSuccess; |
|
3767 |
} |
|
3768 |
||
3769 |
NW_Buffer_t* CHtmlpParser::getLastTextBuf() |
|
3770 |
{ |
|
3771 |
return iLastTextBuf; |
|
3772 |
} |
|
3773 |
||
3774 |
NW_Int32 CHtmlpParser::getLastValid() |
|
3775 |
{ |
|
3776 |
return iLastValid; |
|
3777 |
} |
|
3778 |
||
3779 |
NW_Uint32 CHtmlpParser::getCodePageSwitchCount() |
|
3780 |
{ |
|
3781 |
return iValidCPCount; |
|
3782 |
} |
|
3783 |
||
3784 |
NW_Buffer_t* CHtmlpParser::getPreviousValidOutput() |
|
3785 |
{ |
|
3786 |
return iPreviousValidOutput; |
|
3787 |
} |
|
3788 |
||
3789 |
NW_HTMLP_EventCallbacks_t * CHtmlpParser::getEventCallbacks() |
|
3790 |
{ |
|
3791 |
return iCBs; |
|
3792 |
} |
|
3793 |
||
3794 |
NW_HTMLP_Lexer_t* CHtmlpParser::getLexer() |
|
3795 |
{ |
|
3796 |
return iLexer; |
|
3797 |
} |
|
3798 |
||
3799 |
TBrowserStatusCode CHtmlpParser::NW_HTMLP_Parse(NW_Bool isFirstSegment, |
|
3800 |
NW_Bool docComplete, NW_Bool needCharsetDetect) |
|
3801 |
{ |
|
3802 |
TBrowserStatusCode e = KBrsrSuccess; |
|
3803 |
NW_Bool match = NW_FALSE; |
|
3804 |
||
3805 |
iDocComplete = docComplete; |
|
3806 |
iNeedCharsetDetect = needCharsetDetect; |
|
3807 |
if (iNeedCharsetDetect) |
|
3808 |
{ |
|
3809 |
NW_ASSERT(iCBs->charsetConvertCallback); |
|
3810 |
NW_ASSERT(iCBs->charsetContext); |
|
3811 |
} |
|
3812 |
if (isFirstSegment) |
|
3813 |
{ |
|
3814 |
// initialize iIsHtml |
|
3815 |
iIsHtml = NW_FALSE; |
|
3816 |
iHasForm = NW_FALSE; |
|
3817 |
firstSegment = NW_TRUE; |
|
3818 |
htmlTagFound = NW_FALSE; |
|
3819 |
if (iLexer->pElementDictionary == NW_HTMLP_Get_ElementDescriptionTable()) |
|
3820 |
{ |
|
3821 |
iIsHtml = NW_TRUE; |
|
3822 |
} |
|
3823 |
if (iLastValidStack) |
|
3824 |
{ |
|
3825 |
NW_HTMLP_ElementParseState_Delete(&iLastValidStack); |
|
3826 |
NW_HTMLP_ElementParseState_New(); |
|
3827 |
} |
|
3828 |
} |
|
3829 |
else |
|
3830 |
{ |
|
3831 |
firstSegment = NW_FALSE; |
|
3832 |
htmlTagFound = NW_FALSE; |
|
3833 |
} |
|
3834 |
||
3835 |
if ((iLexer == NULL) || (iCBs == NULL)) { |
|
3836 |
return KBrsrFailure; |
|
3837 |
} |
|
3838 |
||
3839 |
iSPLElemHandling->splHandle = NW_FALSE; |
|
3840 |
iSPLElemHandling->type = NW_HTMLP_SPL_NONE; |
|
3841 |
||
3842 |
NW_Buffer_Free(iLastTextBuf); |
|
3843 |
iLastTextBuf = NULL; |
|
3844 |
||
3845 |
if (iCBs->pClientPointer != NULL) |
|
3846 |
{ |
|
3847 |
iWbxmlWriter = ((NW_HTMLP_WbxmlEncoder_t*)iCBs->pClientPointer)->pE; |
|
3848 |
iWbxmlWriter->tagCodePage = iValidTagCP; |
|
3849 |
iWbxmlWriter->attributeCodePage = iValidAttrCP; |
|
3850 |
// WLIU_DEBUG: iWbxmlWriter->cp_count = iValidCPCount; |
|
3851 |
iCurrentTagCP = iValidTagCP; |
|
3852 |
iCurrentAttrCP = iValidAttrCP; |
|
3853 |
iCurrentCPCount = iValidCPCount; |
|
3854 |
} |
|
3855 |
||
3856 |
if (iLastValidStack) |
|
3857 |
{ |
|
3858 |
iElementParseState = iLastValidStack; |
|
3859 |
iLastValidStack = NULL; |
|
3860 |
} |
|
3861 |
||
3862 |
iLastTextBegin = -1; |
|
3863 |
iLastScriptStart = -1; |
|
3864 |
||
3865 |
/* start of document callback */ |
|
3866 |
if (isFirstSegment && iCBs->beginDocumentCB != NULL) { |
|
3867 |
e = (*(iCBs->beginDocumentCB))(iLexer, iCBs->pClientPointer); |
|
3868 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
3869 |
goto htmlp_parse_error; |
|
3870 |
} |
|
3871 |
} |
|
3872 |
/* ignore any leading whitespace */ |
|
3873 |
e = NW_HTMLP_SkipSpace(); |
|
3874 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
3875 |
goto htmlp_parse_error; |
|
3876 |
} |
|
3877 |
||
3878 |
while (!NW_HTMLP_Lexer_AtEnd(iLexer)) { |
|
3879 |
||
3880 |
if (iConsumeSpaces) |
|
3881 |
{ |
|
3882 |
/* Consume spaces between tags only for WML text */ |
|
3883 |
e = NW_HTMLP_SkipSpace(); |
|
3884 |
} |
|
3885 |
||
3886 |
/* Take care of special cases */ |
|
3887 |
||
3888 |
e = NW_HTMLP_HandleSpecialCases(&match); |
|
3889 |
||
3890 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
3891 |
goto htmlp_parse_error; |
|
3892 |
} |
|
3893 |
if (match == NW_TRUE) { |
|
3894 |
continue; |
|
3895 |
} |
|
3896 |
||
3897 |
/* look for start of markup */ |
|
3898 |
e = NW_HTMLP_Lexer_AsciiCharCompare(iLexer, '<', &match); |
|
3899 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
3900 |
goto htmlp_parse_error; |
|
3901 |
} |
|
3902 |
if (match == NW_TRUE) { |
|
3903 |
||
3904 |
e = NW_HTMLP_IfLegalConsumeElementEnd(&match); |
|
3905 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
3906 |
goto htmlp_parse_error; |
|
3907 |
} |
|
3908 |
if (match == NW_TRUE) { |
|
3909 |
continue; |
|
3910 |
} |
|
3911 |
||
3912 |
if (iElementParseState->inPCDATA == NW_FALSE) |
|
3913 |
{ |
|
3914 |
e = NW_HTMLP_IfLegalConsumeComment(&match); |
|
3915 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
3916 |
goto htmlp_parse_error; |
|
3917 |
} |
|
3918 |
if (match == NW_TRUE) { |
|
3919 |
continue; |
|
3920 |
} |
|
3921 |
||
3922 |
e = NW_HTMLP_IfLegalConsumeDoctype(&match); |
|
3923 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
3924 |
goto htmlp_parse_error; |
|
3925 |
} |
|
3926 |
if (match == NW_TRUE) { |
|
3927 |
continue; |
|
3928 |
} |
|
3929 |
||
3930 |
e = NW_HTMLP_IfLegalConsumePi(&match); |
|
3931 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
3932 |
goto htmlp_parse_error; |
|
3933 |
} |
|
3934 |
if (match == NW_TRUE) { |
|
3935 |
continue; |
|
3936 |
} |
|
3937 |
||
3938 |
e = NW_HTMLP_IfLegalConsumeCdata(&match); |
|
3939 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
3940 |
goto htmlp_parse_error; |
|
3941 |
} |
|
3942 |
if (match == NW_TRUE) { |
|
3943 |
continue; |
|
3944 |
} |
|
3945 |
||
3946 |
e = NW_HTMLP_IfLegalConsumeElementStart(&match); |
|
3947 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
3948 |
goto htmlp_parse_error; |
|
3949 |
} |
|
3950 |
if (match == NW_TRUE) { |
|
3951 |
continue; |
|
3952 |
} |
|
3953 |
} |
|
3954 |
} |
|
3955 |
||
3956 |
/* process character data up to start of possible markup '<' */ |
|
3957 |
e = NW_HTMLP_ConsumeContent(&match); |
|
3958 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
3959 |
goto htmlp_parse_error; |
|
3960 |
} |
|
3961 |
} |
|
3962 |
||
3963 |
if (iLastValid == -1 && iWbxmlWriter) |
|
3964 |
{ |
|
3965 |
iLastValid = iWbxmlWriter->index; |
|
3966 |
} |
|
3967 |
||
3968 |
/* remember the valid stack before close all is called */ |
|
3969 |
if ((iLastScriptStart == -1) && (iLastTextBegin == -1)) |
|
3970 |
{ |
|
3971 |
setValidMarks(); |
|
3972 |
} |
|
3973 |
commitValidMarks(); |
|
3974 |
||
3975 |
e = NW_HTMLP_ElementParseState_CloseAll(); |
|
3976 |
if (BRSR_STAT_IS_FAILURE(e)) { |
|
3977 |
goto htmlp_parse_error; |
|
3978 |
} |
|
3979 |
||
3980 |
/* end of document callback */ |
|
3981 |
if (iCBs->endDocumentCB != NULL /*&& e != KBrsrRestartParsing*/) |
|
3982 |
{ |
|
3983 |
e = (*(iCBs->endDocumentCB))(iLexer, e, iCBs->pClientPointer); |
|
3984 |
} |
|
3985 |
||
3986 |
htmlp_parse_error: |
|
3987 |
||
3988 |
NW_HTMLP_ElementParseState_Delete(&iElementParseState); |
|
3989 |
if (e == KBrsrRestartParsing) |
|
3990 |
{ |
|
3991 |
NW_HTMLP_ElementParseState_Delete(&iLastValidStack); |
|
3992 |
iLastValidStack = NULL; |
|
3993 |
NW_HTMLP_ElementParseState_New(); |
|
3994 |
} |
|
3995 |
||
3996 |
// iVisitedHeadText is no longer needed unless it is tracking an |
|
3997 |
// unterminated comment. In this cases iVisitedHeadText is needed |
|
3998 |
// because NW_HTMLP_NotifyDocComplete calls NW_HTHMLP_RealParse |
|
3999 |
// which needs it. |
|
4000 |
if (isFirstSegment && iRestarted && !iTrackingUnTerminatedComment) |
|
4001 |
{ |
|
4002 |
NW_Buffer_Free(iVisitedHeadText); |
|
4003 |
iVisitedHeadText = NULL; |
|
4004 |
} |
|
4005 |
||
4006 |
return e; |
|
4007 |
} |
|
4008 |
||
4009 |
TBrowserStatusCode CHtmlpParser::appendVisitedHeadText(NW_Byte* new_data, NW_Uint32 new_len) |
|
4010 |
{ |
|
4011 |
TBrowserStatusCode status = KBrsrSuccess; |
|
4012 |
NW_Buffer_t * tmp_buf; |
|
4013 |
NW_Uint32 old_len = 0; |
|
4014 |
||
4015 |
tmp_buf = iVisitedHeadText; |
|
4016 |
if (tmp_buf) |
|
4017 |
old_len = tmp_buf->length; |
|
4018 |
iVisitedHeadText = NULL; |
|
4019 |
iVisitedHeadText = (NW_Buffer_t *)NW_Buffer_New(old_len + new_len); |
|
4020 |
if (!iVisitedHeadText) |
|
4021 |
{ |
|
4022 |
status = KBrsrOutOfMemory; |
|
4023 |
goto cleanup; |
|
4024 |
} |
|
4025 |
||
4026 |
if (tmp_buf) |
|
4027 |
{ |
|
4028 |
(void)NW_Mem_memcpy(iVisitedHeadText->data, tmp_buf->data, old_len); |
|
4029 |
} |
|
4030 |
(void)NW_Mem_memcpy(iVisitedHeadText->data + old_len, new_data, new_len); |
|
4031 |
iVisitedHeadText->length = old_len + new_len; |
|
4032 |
||
4033 |
cleanup: |
|
4034 |
NW_Buffer_Free(tmp_buf); |
|
4035 |
return status; |
|
4036 |
} |
|
4037 |
||
4038 |
NW_Buffer_t* CHtmlpParser::getVisitedHeadText() |
|
4039 |
{ |
|
4040 |
return iVisitedHeadText; |
|
4041 |
} |
|
4042 |
||
4043 |
void CHtmlpParser::updateCurrentCP() |
|
4044 |
{ |
|
4045 |
if (iCurrentTagCP != iWbxmlWriter->tagCodePage) |
|
4046 |
{ |
|
4047 |
// WLIU_DEBUG: NW_ASSERT(iCurrentCPCount == iWbxmlWriter->cp_count -1); |
|
4048 |
iCurrentCPCount++; |
|
4049 |
iCurrentTagCP = iWbxmlWriter->tagCodePage; |
|
4050 |
} |
|
4051 |
else if(iCurrentAttrCP != iWbxmlWriter->attributeCodePage) |
|
4052 |
{ |
|
4053 |
// WLIU_DEBUG: NW_ASSERT(iCurrentCPCount == iWbxmlWriter->cp_count -1); |
|
4054 |
iCurrentCPCount++; |
|
4055 |
iCurrentAttrCP = iWbxmlWriter->attributeCodePage; |
|
4056 |
} |
|
4057 |
} |
|
4058 |
||
4059 |
void CHtmlpParser::updateCurrentCP(NW_Uint32 switches) |
|
4060 |
{ |
|
4061 |
if (switches != 0) |
|
4062 |
{ |
|
4063 |
iCurrentCPCount += switches; |
|
4064 |
iCurrentTagCP = iWbxmlWriter->tagCodePage; |
|
4065 |
iCurrentAttrCP = iWbxmlWriter->attributeCodePage; |
|
4066 |
} |
|
4067 |
// WLIU_DEBUG: NW_ASSERT(iCurrentCPCount == iWbxmlWriter->cp_count); |
|
4068 |
||
4069 |
} |
|
4070 |
||
4071 |
#else |
|
4072 |
||
4073 |
void FeaRmeNoHTMLParser_htmlp_parser(){ |
|
4074 |
int i = 0; |
|
4075 |
i+=1; |
|
4076 |
} |
|
4077 |
#endif /* FEA_RME_NOHTMLPARSER */ |
|
4078 |
||
4079 |
||
4080 |
||
4081 |
// For WML purposes, which requires stricter checking of DTD, than HTML, |
|
4082 |
// return the status of checking the attribute name |
|
4083 |
// against the appropriate attribute dictionary. |
|
4084 |
TBrowserStatusCode CHtmlpParser::NW_HTMLP_ValidateWMLAttribute( |
|
4085 |
NW_HTMLP_Lexer_t* apL, |
|
4086 |
const NW_HTMLP_Interval_t* apI_name, |
|
4087 |
void* apV) |
|
4088 |
{ |
|
4089 |
NW_HTMLP_WbxmlEncoder_t* pTE = (NW_HTMLP_WbxmlEncoder_t*)apV; |
|
4090 |
NW_WBXML_Writer_t* pW = pTE->pE; |
|
4091 |
NW_Uint32 length; |
|
4092 |
NW_Uint32 nameLength; |
|
4093 |
NW_Uint32 sizeChar = 1; |
|
4094 |
NW_Uint8* pName; |
|
4095 |
TBrowserStatusCode s; |
|
4096 |
||
4097 |
NW_ASSERT(!iIsHtml); |
|
4098 |
||
4099 |
// make sure can handle the encoding used |
|
4100 |
if (!((pTE->encoding == HTTP_us_ascii) |
|
4101 |
|| (pTE->encoding == HTTP_iso_8859_1) |
|
4102 |
|| (pTE->encoding == HTTP_utf_8) |
|
4103 |
|| (pTE->encoding == HTTP_iso_10646_ucs_2))) { |
|
4104 |
return KBrsrFailure; |
|
4105 |
} |
|
4106 |
||
4107 |
// establish the size of a character |
|
4108 |
if (apL->encoding == HTTP_iso_10646_ucs_2) { |
|
4109 |
sizeChar = 2; |
|
4110 |
} |
|
4111 |
||
4112 |
// name well-formed? |
|
4113 |
if (!NW_HTMLP_Interval_IsWellFormed(apI_name)) { |
|
4114 |
return KBrsrFailure; |
|
4115 |
} |
|
4116 |
||
4117 |
// var name setup |
|
4118 |
length = NW_HTMLP_Interval_ByteCount(apI_name); |
|
4119 |
nameLength = length; // byte count |
|
4120 |
s = NW_HTMLP_Lexer_DataAddressFromBuffer(apL, apI_name->start, |
|
4121 |
&nameLength, &pName); |
|
4122 |
if (BRSR_STAT_IS_FAILURE(s)) { |
|
4123 |
return s; |
|
4124 |
} |
|
4125 |
if (nameLength != length) { |
|
4126 |
return KBrsrFailure; |
|
4127 |
} |
|
4128 |
nameLength = NW_HTMLP_Interval_CharCount(apI_name); // char count |
|
4129 |
||
4130 |
// copy the name into a zero terminated string for dictionary checking |
|
4131 |
NW_Uint8* pByteName = NULL; |
|
4132 |
NW_String_t nameString; |
|
4133 |
pByteName = (NW_Uint8*)NW_Mem_Malloc( length + sizeChar ); // allocating bytes |
|
4134 |
if (!pByteName) { |
|
4135 |
return KBrsrOutOfMemory; |
|
4136 |
} |
|
4137 |
NW_Mem_memset(pByteName, NULL, length + sizeChar); // enough extra space for terminating null characters |
|
4138 |
NW_Mem_memcpy(pByteName, pName, length); |
|
4139 |
NW_String_initialize( &nameString, pByteName, pTE->encoding); |
|
4140 |
NW_Int16 retVal = -1; // initialize return to be -1 (meaning: attribute not found) |
|
4141 |
retVal = NW_WBXML_Dictionary_getAttributeToken( pW->pAttributeDictionary, //NW_WBXML_Dictionary_t* dictionary, |
|
4142 |
&nameString, //const NW_String_t* pNameString, |
|
4143 |
pTE->encoding, //NW_Uint32 encoding, |
|
4144 |
NW_FALSE ); //NW_Bool matchCase |
|
4145 |
NW_Mem_Free( pByteName); |
|
4146 |
pByteName = NULL; |
|
4147 |
||
4148 |
// the attribute was not found in the dictionary - as this is for WML - send back bad content message |
|
4149 |
if (retVal == -1) { |
|
4150 |
return KBrsrWmlbrowserBadContent; |
|
4151 |
} |
|
4152 |
||
4153 |
return KBrsrSuccess; |
|
4154 |
} |