1 /* |
|
2 * Copyright (c) 2000 - 2001 Nokia Corporation and/or its subsidiary(-ies). |
|
3 * All rights reserved. |
|
4 * This component and the accompanying materials are made available |
|
5 * under the terms of the License "Eclipse Public License v1.0" |
|
6 * which accompanies this distribution, and is available |
|
7 * at the URL "http://www.eclipse.org/legal/epl-v10.html". |
|
8 * |
|
9 * Initial Contributors: |
|
10 * Nokia Corporation - initial contribution. |
|
11 * |
|
12 * Contributors: |
|
13 * |
|
14 * Description: |
|
15 * |
|
16 */ |
|
17 |
|
18 |
|
19 #include "cxml_internal.h" |
|
20 #include <xml/cxml/nw_xmlp_xmlreader.h> |
|
21 #include <xml/cxml/nw_xmlp_xmlparser.h> |
|
22 #include <xml/cxml/nw_string_string.h> |
|
23 #include "cxml_xmlp_entity.h" |
|
24 #include "cxml_xmlp_int_entity.h" |
|
25 |
|
26 /*IMPORTANT NOTE: This array must be sorted by entity name. The lookup function |
|
27 * does a binary search. |
|
28 * |
|
29 * When you update this table make sure the constant |
|
30 * CXML_Num_CaseInsensitive_Entries which is the count of |
|
31 * caseinsensitive entries is updated correctly |
|
32 */ |
|
33 |
|
34 #define CXML_Num_CaseInsensitive_Entries 126 |
|
35 |
|
36 |
|
37 |
|
38 static |
|
39 const CXML_EntitySet_Entry_t CXML_EntitySet_Array[] = |
|
40 { |
|
41 // CaseSensitive entries |
|
42 {(CXML_Uint8*) "AElig", 198 }, |
|
43 {(CXML_Uint8*) "Aacute", 193 }, |
|
44 {(CXML_Uint8*) "Acirc", 194 }, |
|
45 {(CXML_Uint8*) "Agrave", 192 }, |
|
46 {(CXML_Uint8*) "Alpha", 913 }, |
|
47 {(CXML_Uint8*) "Aring", 197 }, |
|
48 {(CXML_Uint8*) "Atilde", 195 }, |
|
49 {(CXML_Uint8*) "Auml", 196 }, |
|
50 {(CXML_Uint8*) "Beta", 914 }, |
|
51 {(CXML_Uint8*) "Ccedil", 199 }, |
|
52 {(CXML_Uint8*) "Chi", 935 }, |
|
53 {(CXML_Uint8*) "Dagger", 8225 }, |
|
54 {(CXML_Uint8*) "Delta", 916 }, |
|
55 {(CXML_Uint8*) "ETH", 208 }, |
|
56 {(CXML_Uint8*) "Eacute", 201 }, |
|
57 {(CXML_Uint8*) "Ecirc", 202 }, |
|
58 {(CXML_Uint8*) "Egrave", 200 }, |
|
59 {(CXML_Uint8*) "Epsilon", 917 }, |
|
60 {(CXML_Uint8*) "Eta", 919 }, |
|
61 {(CXML_Uint8*) "Euml", 203 }, |
|
62 {(CXML_Uint8*) "Gamma", 915 }, |
|
63 {(CXML_Uint8*) "Iacute", 205 }, |
|
64 {(CXML_Uint8*) "Icirc", 206 }, |
|
65 {(CXML_Uint8*) "Igrave", 204 }, |
|
66 {(CXML_Uint8*) "Iota", 921 }, |
|
67 {(CXML_Uint8*) "Iuml", 207 }, |
|
68 {(CXML_Uint8*) "Kappa", 922 }, |
|
69 {(CXML_Uint8*) "Lambda", 923 }, |
|
70 {(CXML_Uint8*) "Mu", 924 }, |
|
71 {(CXML_Uint8*) "Ntilde", 209 }, |
|
72 {(CXML_Uint8*) "Nu", 925 }, |
|
73 {(CXML_Uint8*) "OElig", 338 }, |
|
74 {(CXML_Uint8*) "Oacute", 211 }, |
|
75 {(CXML_Uint8*) "Ocirc", 212 }, |
|
76 {(CXML_Uint8*) "Ograve", 210 }, |
|
77 {(CXML_Uint8*) "Omega", 937 }, |
|
78 {(CXML_Uint8*) "Omicron", 927 }, |
|
79 {(CXML_Uint8*) "Oslash", 216 }, |
|
80 {(CXML_Uint8*) "Otilde", 213 }, |
|
81 {(CXML_Uint8*) "Ouml", 214 }, |
|
82 {(CXML_Uint8*) "Phi", 934 }, |
|
83 {(CXML_Uint8*) "Pi", 928 }, |
|
84 {(CXML_Uint8*) "Prime", 8243 }, |
|
85 {(CXML_Uint8*) "Psi", 936 }, |
|
86 {(CXML_Uint8*) "Rho", 929 }, |
|
87 {(CXML_Uint8*) "Scaron", 352 }, |
|
88 {(CXML_Uint8*) "Sigma", 931 }, |
|
89 {(CXML_Uint8*) "THORN", 222 }, |
|
90 {(CXML_Uint8*) "Tau", 932 }, |
|
91 {(CXML_Uint8*) "Theta", 920 }, |
|
92 {(CXML_Uint8*) "Uacute", 218 }, |
|
93 {(CXML_Uint8*) "Ucirc", 219 }, |
|
94 {(CXML_Uint8*) "Ugrave", 217 }, |
|
95 {(CXML_Uint8*) "Upsilon", 933 }, |
|
96 {(CXML_Uint8*) "Uuml", 220 }, |
|
97 {(CXML_Uint8*) "Xi", 926 }, |
|
98 {(CXML_Uint8*) "Yacute", 221 }, |
|
99 {(CXML_Uint8*) "Yuml", 376 }, |
|
100 {(CXML_Uint8*) "Zeta", 918 }, |
|
101 {(CXML_Uint8*) "aacute", 225 }, |
|
102 {(CXML_Uint8*) "acirc", 226 }, |
|
103 {(CXML_Uint8*) "acute", 180 }, |
|
104 {(CXML_Uint8*) "aelig", 230 }, |
|
105 {(CXML_Uint8*) "agrave", 224 }, |
|
106 {(CXML_Uint8*) "alpha", 945 }, |
|
107 {(CXML_Uint8*) "atilde", 227 }, |
|
108 {(CXML_Uint8*) "auml", 228 }, |
|
109 {(CXML_Uint8*) "beta", 946 }, |
|
110 {(CXML_Uint8*) "ccedil", 231 }, |
|
111 {(CXML_Uint8*) "chi", 967 }, |
|
112 {(CXML_Uint8*) "dArr", 8659 }, |
|
113 {(CXML_Uint8*) "dagger", 8224 }, |
|
114 {(CXML_Uint8*) "darr", 8595 }, |
|
115 {(CXML_Uint8*) "delta", 948 }, |
|
116 {(CXML_Uint8*) "eacute", 233 }, |
|
117 {(CXML_Uint8*) "ecirc", 234 }, |
|
118 {(CXML_Uint8*) "egrave", 232 }, |
|
119 {(CXML_Uint8*) "epsilon", 949 }, |
|
120 {(CXML_Uint8*) "eta", 951 }, |
|
121 {(CXML_Uint8*) "euml", 235 }, |
|
122 {(CXML_Uint8*) "gamma", 947 }, |
|
123 {(CXML_Uint8*) "hArr", 8660 }, |
|
124 {(CXML_Uint8*) "harr", 8596 }, |
|
125 {(CXML_Uint8*) "iacute", 237 }, |
|
126 {(CXML_Uint8*) "icirc", 238 }, |
|
127 {(CXML_Uint8*) "igrave", 236 }, |
|
128 {(CXML_Uint8*) "iota", 953 }, |
|
129 {(CXML_Uint8*) "iuml", 239 }, |
|
130 {(CXML_Uint8*) "kappa", 954 }, |
|
131 {(CXML_Uint8*) "lArr", 8656 }, |
|
132 {(CXML_Uint8*) "lambda", 955 }, |
|
133 {(CXML_Uint8*) "larr", 8592 }, |
|
134 {(CXML_Uint8*) "mu", 956 }, |
|
135 {(CXML_Uint8*) "ntilde", 241 }, |
|
136 {(CXML_Uint8*) "nu", 957 }, |
|
137 {(CXML_Uint8*) "oacute", 243 }, |
|
138 {(CXML_Uint8*) "ocirc", 244 }, |
|
139 {(CXML_Uint8*) "oelig", 339 }, |
|
140 {(CXML_Uint8*) "ograve", 242 }, |
|
141 {(CXML_Uint8*) "omega", 969 }, |
|
142 {(CXML_Uint8*) "omicron", 959 }, |
|
143 {(CXML_Uint8*) "oslash", 248 }, |
|
144 {(CXML_Uint8*) "otilde", 245 }, |
|
145 {(CXML_Uint8*) "otimes", 8855 }, |
|
146 {(CXML_Uint8*) "ouml", 246 }, |
|
147 {(CXML_Uint8*) "phi", 966 }, |
|
148 {(CXML_Uint8*) "pi", 960 }, |
|
149 {(CXML_Uint8*) "psi", 968 }, |
|
150 {(CXML_Uint8*) "rArr", 8658 }, |
|
151 {(CXML_Uint8*) "rarr", 8594 }, |
|
152 {(CXML_Uint8*) "rho", 961 }, |
|
153 {(CXML_Uint8*) "scaron", 353 }, |
|
154 {(CXML_Uint8*) "sigma", 963 }, |
|
155 {(CXML_Uint8*) "tau", 964 }, |
|
156 {(CXML_Uint8*) "theta", 952 }, |
|
157 {(CXML_Uint8*) "thorn", 254 }, |
|
158 {(CXML_Uint8*) "uArr", 8657 }, |
|
159 {(CXML_Uint8*) "uacute", 250 }, |
|
160 {(CXML_Uint8*) "uarr", 8593 }, |
|
161 {(CXML_Uint8*) "ucirc", 251 }, |
|
162 {(CXML_Uint8*) "ugrave", 249 }, |
|
163 {(CXML_Uint8*) "upsilon", 965 }, |
|
164 {(CXML_Uint8*) "uuml", 252 }, |
|
165 {(CXML_Uint8*) "xi", 958 }, |
|
166 {(CXML_Uint8*) "yacute", 253 }, |
|
167 {(CXML_Uint8*) "yuml", 255 }, |
|
168 {(CXML_Uint8*) "zeta", 950 }, |
|
169 {(CXML_Uint8*) "zwj", 8205 }, |
|
170 {(CXML_Uint8*) "zwnj", 8204 }, |
|
171 // Case Insensitive entries |
|
172 {(CXML_Uint8*) "alefsym", 8501 }, |
|
173 {(CXML_Uint8*) "amp", 38 }, |
|
174 {(CXML_Uint8*) "and", 8743 }, |
|
175 {(CXML_Uint8*) "ang", 8736 }, |
|
176 {(CXML_Uint8*) "apos", 39 }, |
|
177 {(CXML_Uint8*) "aring", 229 }, |
|
178 {(CXML_Uint8*) "asymp", 8776 }, |
|
179 {(CXML_Uint8*) "bdquo", 8222 }, |
|
180 {(CXML_Uint8*) "brvbar", 166 }, |
|
181 {(CXML_Uint8*) "bull", 8226 }, |
|
182 {(CXML_Uint8*) "cap", 8745 }, |
|
183 {(CXML_Uint8*) "cedil", 184 }, |
|
184 {(CXML_Uint8*) "cent", 162 }, |
|
185 {(CXML_Uint8*) "circ", 710 }, |
|
186 {(CXML_Uint8*) "clubs", 9827 }, |
|
187 {(CXML_Uint8*) "cong", 8773 }, |
|
188 {(CXML_Uint8*) "copy", 169 }, |
|
189 {(CXML_Uint8*) "crarr", 8629 }, |
|
190 {(CXML_Uint8*) "cup", 8746 }, |
|
191 {(CXML_Uint8*) "curren", 164 }, |
|
192 {(CXML_Uint8*) "deg", 176 }, |
|
193 {(CXML_Uint8*) "diams", 9830 }, |
|
194 {(CXML_Uint8*) "divide", 247 }, |
|
195 {(CXML_Uint8*) "empty", 8709 }, |
|
196 {(CXML_Uint8*) "emsp", 8195 }, |
|
197 {(CXML_Uint8*) "ensp", 8194 }, |
|
198 {(CXML_Uint8*) "equiv", 8801 }, |
|
199 {(CXML_Uint8*) "eth", 240 }, |
|
200 {(CXML_Uint8*) "euro", 8364 }, |
|
201 {(CXML_Uint8*) "exist", 8707 }, |
|
202 {(CXML_Uint8*) "fnof", 402 }, |
|
203 {(CXML_Uint8*) "forall", 8704 }, |
|
204 {(CXML_Uint8*) "frac12", 189 }, |
|
205 {(CXML_Uint8*) "frac14", 188 }, |
|
206 {(CXML_Uint8*) "frac34", 190 }, |
|
207 {(CXML_Uint8*) "frasl", 8260 }, |
|
208 {(CXML_Uint8*) "ge", 8805 }, |
|
209 {(CXML_Uint8*) "gt", 62 }, |
|
210 {(CXML_Uint8*) "hearts", 9829 }, |
|
211 {(CXML_Uint8*) "hellip", 8230 }, |
|
212 {(CXML_Uint8*) "iexcl", 161 }, |
|
213 {(CXML_Uint8*) "image", 8465 }, |
|
214 {(CXML_Uint8*) "infin", 8734 }, |
|
215 {(CXML_Uint8*) "int", 8747 }, |
|
216 {(CXML_Uint8*) "iquest", 191 }, |
|
217 {(CXML_Uint8*) "isin", 8712 }, |
|
218 {(CXML_Uint8*) "lang", 9001 }, |
|
219 {(CXML_Uint8*) "laquo", 171 }, |
|
220 {(CXML_Uint8*) "lceil", 8968 }, |
|
221 {(CXML_Uint8*) "ldquo", 8220 }, |
|
222 {(CXML_Uint8*) "le", 8804 }, |
|
223 {(CXML_Uint8*) "lfloor", 8970 }, |
|
224 {(CXML_Uint8*) "lowast", 8727 }, |
|
225 {(CXML_Uint8*) "loz", 9674 }, |
|
226 {(CXML_Uint8*) "lrm", 8206 }, |
|
227 {(CXML_Uint8*) "lsaquo", 8249 }, |
|
228 {(CXML_Uint8*) "lsquo", 8216 }, |
|
229 {(CXML_Uint8*) "lt", 60 }, |
|
230 {(CXML_Uint8*) "macr", 175 }, |
|
231 {(CXML_Uint8*) "mdash", 8212 }, |
|
232 {(CXML_Uint8*) "micro", 181 }, |
|
233 {(CXML_Uint8*) "middot", 183 }, |
|
234 {(CXML_Uint8*) "minus", 8722 }, |
|
235 {(CXML_Uint8*) "nabla", 8711 }, |
|
236 {(CXML_Uint8*) "nbsp", 160 }, |
|
237 {(CXML_Uint8*) "ndash", 8211 }, |
|
238 {(CXML_Uint8*) "ne", 8800 }, |
|
239 {(CXML_Uint8*) "ni", 8715 }, |
|
240 {(CXML_Uint8*) "not", 172 }, |
|
241 {(CXML_Uint8*) "notin", 8713 }, |
|
242 {(CXML_Uint8*) "nsub", 8836 }, |
|
243 {(CXML_Uint8*) "oline", 8254 }, |
|
244 {(CXML_Uint8*) "oplus", 8853 }, |
|
245 {(CXML_Uint8*) "or", 8744 }, |
|
246 {(CXML_Uint8*) "ordf", 170 }, |
|
247 {(CXML_Uint8*) "ordm", 186 }, |
|
248 {(CXML_Uint8*) "para", 182 }, |
|
249 {(CXML_Uint8*) "part", 8706 }, |
|
250 {(CXML_Uint8*) "permil", 8240 }, |
|
251 {(CXML_Uint8*) "perp", 8869 }, |
|
252 {(CXML_Uint8*) "piv", 982 }, |
|
253 {(CXML_Uint8*) "plusmn", 177 }, |
|
254 {(CXML_Uint8*) "pound", 163 }, |
|
255 {(CXML_Uint8*) "prime", 8242 }, |
|
256 {(CXML_Uint8*) "prod", 8719 }, |
|
257 {(CXML_Uint8*) "prop", 8733 }, |
|
258 {(CXML_Uint8*) "quot", 34 }, |
|
259 {(CXML_Uint8*) "radic", 8730 }, |
|
260 {(CXML_Uint8*) "rang", 9002 }, |
|
261 {(CXML_Uint8*) "raquo", 187 }, |
|
262 {(CXML_Uint8*) "rceil", 8969 }, |
|
263 {(CXML_Uint8*) "rdquo", 8221 }, |
|
264 {(CXML_Uint8*) "real", 8476 }, |
|
265 {(CXML_Uint8*) "reg", 174 }, |
|
266 {(CXML_Uint8*) "rfloor", 8971 }, |
|
267 {(CXML_Uint8*) "rlm", 8207 }, |
|
268 {(CXML_Uint8*) "rsaquo", 8250 }, |
|
269 {(CXML_Uint8*) "rsquo", 8217 }, |
|
270 {(CXML_Uint8*) "sbquo", 8218 }, |
|
271 {(CXML_Uint8*) "sdot", 8901 }, |
|
272 {(CXML_Uint8*) "sect", 167 }, |
|
273 {(CXML_Uint8*) "shy", 173 }, |
|
274 {(CXML_Uint8*) "sigmaf", 962 }, |
|
275 {(CXML_Uint8*) "sim", 8764 }, |
|
276 {(CXML_Uint8*) "spades", 9824 }, |
|
277 {(CXML_Uint8*) "sub", 8834 }, |
|
278 {(CXML_Uint8*) "sube", 8838 }, |
|
279 {(CXML_Uint8*) "sum", 8721 }, |
|
280 {(CXML_Uint8*) "sup", 8835 }, |
|
281 {(CXML_Uint8*) "sup1", 185 }, |
|
282 {(CXML_Uint8*) "sup2", 178 }, |
|
283 {(CXML_Uint8*) "sup3", 179 }, |
|
284 {(CXML_Uint8*) "supe", 8839 }, |
|
285 {(CXML_Uint8*) "szlig", 223 }, |
|
286 {(CXML_Uint8*) "there4", 8756 }, |
|
287 {(CXML_Uint8*) "thetasym", 977 }, |
|
288 {(CXML_Uint8*) "thinsp", 8201 }, |
|
289 {(CXML_Uint8*) "tilde", 732 }, |
|
290 {(CXML_Uint8*) "times", 215 }, |
|
291 {(CXML_Uint8*) "trade", 8482 }, |
|
292 {(CXML_Uint8*)"uml", 168 }, |
|
293 {(CXML_Uint8*) "upsih", 978 }, |
|
294 {(CXML_Uint8*) "weierp", 8472 }, |
|
295 {(CXML_Uint8*) "yen", 165 }, |
|
296 {(CXML_Uint8*) "zwj", 8205 }, |
|
297 {(CXML_Uint8*) "zwnj", 8204 }, |
|
298 }; |
|
299 |
|
300 const CXML_Uint32 CXML_ALL_PREDEFINE_ENTITIES = (sizeof(CXML_EntitySet_Array) |
|
301 / sizeof(CXML_EntitySet_Entry_t)); |
|
302 |
|
303 /* It assumes the parser at the begginning i.e '&' symbol. |
|
304 * The entity name stops at (;) or at '>'. If it is stopped |
|
305 * at (;) then it is a valid entity. |
|
306 * If it stops at '>' then it is not a valid entity. |
|
307 */ |
|
308 |
|
309 |
|
310 NW_Status_t CXML_XML_Parser_Entity(NW_XML_Reader_t* pT, |
|
311 NW_XML_Reader_Interval_t* I_entityData, |
|
312 NW_Bool* entityFound) |
|
313 { |
|
314 NW_Status_t s; |
|
315 NW_Bool endFound = NW_FALSE; |
|
316 //NW_Uint32 isSpace; |
|
317 NW_Uint32 cnt=0; |
|
318 NW_Uint32 match; |
|
319 |
|
320 s = NW_XML_Reader_Advance(pT); //Pass over the '&' symbol |
|
321 |
|
322 if (NW_STAT_IS_FAILURE(s)) |
|
323 { |
|
324 return NW_STAT_FAILURE; |
|
325 } |
|
326 |
|
327 NW_XML_Reader_Interval_Start(I_entityData, pT); |
|
328 |
|
329 // Just check for false entity or error in entity e.g. If entity is not |
|
330 // terminated by (;) |
|
331 |
|
332 |
|
333 for(cnt=0; ; ) |
|
334 { |
|
335 |
|
336 cnt++; |
|
337 |
|
338 //check for terminating entity character |
|
339 |
|
340 s = NW_XML_Reader_AsciiCharMatch(pT, ';', &match); |
|
341 |
|
342 if (NW_STAT_IS_FAILURE(s)) |
|
343 { |
|
344 return NW_STAT_FAILURE; |
|
345 } |
|
346 |
|
347 if(match) |
|
348 { |
|
349 endFound = NW_TRUE; |
|
350 break; |
|
351 } |
|
352 |
|
353 //Check condition if it is malformed entity. Exit at least |
|
354 //at the end of attribute or contents. |
|
355 |
|
356 s = NW_XML_Reader_AsciiCharMatch(pT, '>', &match); |
|
357 |
|
358 if (NW_STAT_IS_FAILURE(s)) |
|
359 { |
|
360 return NW_STAT_FAILURE; |
|
361 } |
|
362 |
|
363 if(match) |
|
364 { |
|
365 endFound = NW_FALSE; |
|
366 break; |
|
367 } |
|
368 |
|
369 //Move to next character |
|
370 |
|
371 s = NW_XML_Reader_Advance(pT); |
|
372 |
|
373 if (NW_STAT_IS_FAILURE(s)) |
|
374 { |
|
375 return NW_STAT_FAILURE; |
|
376 } |
|
377 }//end for |
|
378 |
|
379 if(endFound == NW_TRUE) |
|
380 { |
|
381 NW_XML_Reader_Interval_Stop(I_entityData, pT); |
|
382 *entityFound = NW_TRUE; |
|
383 |
|
384 //Move over end of entity i.e. (;) character. |
|
385 |
|
386 s = NW_XML_Reader_Advance(pT); |
|
387 |
|
388 if (NW_STAT_IS_FAILURE(s)) |
|
389 { |
|
390 return NW_STAT_FAILURE; |
|
391 } |
|
392 } |
|
393 else |
|
394 { |
|
395 *entityFound = NW_FALSE; |
|
396 s = NW_STAT_SUCCESS; |
|
397 } |
|
398 |
|
399 |
|
400 return s; |
|
401 }//end CXML_Parser_Entity(...) |
|
402 |
|
403 |
|
404 /* Function to verify the digit depending on its base */ |
|
405 |
|
406 static |
|
407 CXML_Int32 CXML_XML_IsValidDigit (const CXML_Ucs2 ch, |
|
408 NW_Int32 base) |
|
409 { |
|
410 switch (base) { |
|
411 case 10: |
|
412 return CXML_Str_Isdigit (ch); |
|
413 |
|
414 case 16: |
|
415 return CXML_Str_Isxdigit (ch); |
|
416 |
|
417 default: |
|
418 return 0; |
|
419 } |
|
420 } |
|
421 |
|
422 |
|
423 /* |
|
424 * Function converts the entity string to the entity value. |
|
425 */ |
|
426 |
|
427 static |
|
428 NW_Bool CXML_XML_GetNumEntityChar (const CXML_Ucs2* instring, |
|
429 CXML_Ucs2 *retchar, |
|
430 CXML_Int32 base) |
|
431 { |
|
432 CXML_Int32 result = 0; |
|
433 CXML_Int32 prevResult; |
|
434 CXML_Uint32 currDigit = 0; |
|
435 |
|
436 if (*instring == 0) { |
|
437 return NW_FALSE; |
|
438 } |
|
439 |
|
440 while (*instring != 0) { |
|
441 if (!(CXML_XML_IsValidDigit (*instring, base))) |
|
442 return NW_FALSE; |
|
443 |
|
444 if (CXML_Str_Isdigit (*instring)) { |
|
445 currDigit = *instring - CXML_ASCII_0; |
|
446 instring++; |
|
447 } |
|
448 else { |
|
449 currDigit = CXML_Str_ToUpper (*instring) - CXML_ASCII_UPPER_A + 10; |
|
450 instring++; |
|
451 } |
|
452 |
|
453 prevResult = result; |
|
454 result = result * base + currDigit; |
|
455 if (result < prevResult) { |
|
456 return NW_FALSE; |
|
457 } |
|
458 } |
|
459 if (result > 0xffff || result < 0x0) |
|
460 return NW_FALSE; |
|
461 |
|
462 *retchar = (NW_Ucs2) result; |
|
463 return NW_TRUE; |
|
464 }//end NW_Bool CXML_XML_GetNumEntityChar(..) |
|
465 |
|
466 |
|
467 /* This function uses the binary search to find the predefined |
|
468 * entity names and corresponding value if found in the array. |
|
469 */ |
|
470 |
|
471 static NW_Ucs2 |
|
472 CXML_Get_Entity_Val (NW_Ucs2 *name) |
|
473 { |
|
474 CXML_Int32 index; |
|
475 NW_Status_t s = NW_STAT_SUCCESS; |
|
476 const CXML_EntitySet_Entry_t* entry; |
|
477 NW_Ucs2* ucs2TempStr = NULL; |
|
478 CXML_Uint32 entityLength = 0; |
|
479 |
|
480 /* First do a binary search search in the case sensitive part of the array */ |
|
481 CXML_Int32 low = 0; |
|
482 CXML_Int32 high = CXML_ALL_PREDEFINE_ENTITIES - CXML_Num_CaseInsensitive_Entries - 1; |
|
483 CXML_Int32 res = 0; |
|
484 |
|
485 while (low <= high ) { |
|
486 index = (high + low) / 2; |
|
487 entry = & (CXML_EntitySet_Array[index]); |
|
488 entityLength = CXML_Asc_strlen((CXML_Int8 *)entry->name); |
|
489 |
|
490 s = NW_String_byteToUCS2Char(entry->name,entityLength,&ucs2TempStr); |
|
491 |
|
492 if (NW_STAT_IS_FAILURE(s)) |
|
493 { |
|
494 return 0; |
|
495 } |
|
496 |
|
497 // do a case insensitive string comparison |
|
498 |
|
499 res = CXML_Str_StrcmpConst( name, ucs2TempStr ); |
|
500 |
|
501 if(ucs2TempStr != NULL) |
|
502 { |
|
503 NW_Mem_Free(ucs2TempStr); |
|
504 ucs2TempStr = NULL; |
|
505 } |
|
506 |
|
507 if ( res > 0 ) { |
|
508 /* name is ahead of this slot. Increase low bound. */ |
|
509 low = index + 1; |
|
510 } else if ( res < 0 ) { |
|
511 /* name is behind this slot. Decrease high bound. */ |
|
512 high = index - 1; |
|
513 } else { |
|
514 /* Found the entity name. Return its value. */ |
|
515 return entry->value; |
|
516 } |
|
517 } |
|
518 |
|
519 /* if no match was found search in the case insensitive part of the table. */ |
|
520 |
|
521 low = CXML_ALL_PREDEFINE_ENTITIES - CXML_Num_CaseInsensitive_Entries; |
|
522 high = CXML_ALL_PREDEFINE_ENTITIES - 1; |
|
523 res = 0; |
|
524 while (low <= high ) |
|
525 { |
|
526 index = (high + low) / 2; |
|
527 entry = &(CXML_EntitySet_Array[index]); |
|
528 entityLength = CXML_Asc_strlen((CXML_Int8 *)entry->name); |
|
529 |
|
530 s = NW_String_byteToUCS2Char(entry->name,entityLength,&ucs2TempStr); |
|
531 |
|
532 if (NW_STAT_IS_FAILURE(s)) |
|
533 { |
|
534 return 0; |
|
535 } |
|
536 |
|
537 // do a case insensitive string comparison |
|
538 |
|
539 res = CXML_Str_Stricmp( name, ucs2TempStr ); |
|
540 |
|
541 if(ucs2TempStr != NULL) |
|
542 { |
|
543 NW_Mem_Free(ucs2TempStr); |
|
544 ucs2TempStr = NULL; |
|
545 } |
|
546 |
|
547 |
|
548 if ( res > 0 ) { |
|
549 /* name is ahead of this slot. Increase low bound. */ |
|
550 low = index + 1; |
|
551 } else if ( res < 0 ) { |
|
552 /* name is behind this slot. Decrease high bound. */ |
|
553 high = index - 1; |
|
554 } else { |
|
555 /* Found the entity name. Return its value. */ |
|
556 return entry->value; |
|
557 } |
|
558 }/*end while */ |
|
559 |
|
560 /* if no match were found we return 0 */ |
|
561 return 0; |
|
562 } |
|
563 |
|
564 /* This function is called when entity of valid syntax is found. The |
|
565 * entity validity is checked here. |
|
566 * |
|
567 * E.g. ⋙ is valid syntax entity but not a valid entity. In this |
|
568 * case continue parsing the entity as it is. Create this |
|
569 * as normal contents not as the entity. |
|
570 * |
|
571 * entityContent ==> Entity Name (e.g. "amp") |
|
572 * length ==> Length of Entity Name. |
|
573 * entityVal ==> This parameter will contain the entity value. |
|
574 * if it is character entites (decimal, hex or predefined) |
|
575 * |
|
576 * entityFound ==> TRUE if it is valid entity of any kind. |
|
577 * encoding ==> Encoding of input string. |
|
578 * |
|
579 * Resolve the following type of entities |
|
580 * 1) Character hex entry |
|
581 * 2) Character decimal entry |
|
582 * 3) Prefined entry |
|
583 * 4) Internal Entities defined in the DTD. |
|
584 * |
|
585 */ |
|
586 |
|
587 |
|
588 static |
|
589 NW_Status_t CXML_XML_Resolve_Entity( CXML_Uint8* entityContent, |
|
590 CXML_Uint32 length, |
|
591 NW_Uint32* entityVal, |
|
592 CXML_Uint8** entityValStr, |
|
593 NW_Bool* entityFound, |
|
594 NW_Uint32 encoding, |
|
595 void* internalEntityList) |
|
596 { |
|
597 NW_Status_t s = NW_STAT_SUCCESS; |
|
598 NW_Ucs2 entityChar = 0; |
|
599 NW_Ucs2* ucs2Str = NULL; |
|
600 NW_Bool entityGot = NW_FALSE; |
|
601 |
|
602 |
|
603 |
|
604 if( encoding == HTTP_iso_10646_ucs_2 ) |
|
605 { |
|
606 /* Extra two bytes for NULL termination as UCS should be always |
|
607 * even bytes. |
|
608 */ |
|
609 ucs2Str = (NW_Ucs2*) NW_Mem_Malloc(length+2); |
|
610 if(ucs2Str != NULL) |
|
611 { |
|
612 (void)NW_Mem_memcpy(ucs2Str, entityContent, length ); |
|
613 /* Null-terminate the string */ |
|
614 ucs2Str[length/2] = '\000'; |
|
615 } |
|
616 else |
|
617 { |
|
618 s = NW_STAT_OUT_OF_MEMORY; |
|
619 } |
|
620 } |
|
621 else |
|
622 { |
|
623 s = NW_String_byteToUCS2Char(entityContent,length,&ucs2Str); |
|
624 } |
|
625 |
|
626 |
|
627 if (NW_STAT_IS_FAILURE(s)) |
|
628 { |
|
629 if(ucs2Str != NULL) |
|
630 { |
|
631 NW_Mem_Free(ucs2Str); |
|
632 } |
|
633 return s; |
|
634 } |
|
635 |
|
636 /*Check and validate for Hex/Decimal numeric character entry */ |
|
637 |
|
638 if(ucs2Str[0] == '#') |
|
639 { |
|
640 entityGot = CXML_TRUE; |
|
641 if( (ucs2Str[1] == 'x') || (ucs2Str[1] == 'X') ) //Hex entity |
|
642 { |
|
643 |
|
644 if(CXML_XML_GetNumEntityChar(ucs2Str + 2,&entityChar,16) != NW_TRUE) |
|
645 { |
|
646 /* Entity syntax is valid, but entity value is wrong so continue parsing it as |
|
647 * normal syntax. |
|
648 */ |
|
649 *entityFound = CXML_FALSE; |
|
650 if(ucs2Str != NULL) |
|
651 { |
|
652 NW_Mem_Free(ucs2Str); |
|
653 } |
|
654 return NW_STAT_SUCCESS; |
|
655 } |
|
656 *entityVal = (entityChar); |
|
657 *entityValStr = NULL; |
|
658 } |
|
659 else |
|
660 { |
|
661 /*Check and validate for Decimal numeric character entry */ |
|
662 if(CXML_XML_GetNumEntityChar(ucs2Str + 1,&entityChar,10) != NW_TRUE) |
|
663 { |
|
664 /* Entity syntax is valid, but entity value is wrong so continue parsing it as |
|
665 * normal syntax. |
|
666 */ |
|
667 *entityFound = CXML_FALSE; |
|
668 if(ucs2Str != NULL) |
|
669 { |
|
670 NW_Mem_Free(ucs2Str); |
|
671 } |
|
672 return NW_STAT_SUCCESS; |
|
673 } |
|
674 *entityVal = (entityChar); |
|
675 *entityValStr = NULL; |
|
676 } |
|
677 *entityFound = CXML_TRUE; |
|
678 } //end if(ucs2Str[0] == '#') |
|
679 |
|
680 /* Check for only predefined entities */ |
|
681 |
|
682 if(entityGot != CXML_TRUE) |
|
683 { |
|
684 if( ( entityChar = CXML_Get_Entity_Val(ucs2Str) ) != 0) |
|
685 { |
|
686 entityGot = NW_TRUE; |
|
687 } |
|
688 |
|
689 |
|
690 if(entityGot == NW_TRUE) |
|
691 { |
|
692 *entityVal = entityChar; |
|
693 *entityValStr = NULL; |
|
694 *entityFound = CXML_TRUE; |
|
695 } |
|
696 } //end if(..) |
|
697 |
|
698 /*Check for the Internal Entity */ |
|
699 |
|
700 |
|
701 if(entityGot != CXML_TRUE) |
|
702 { |
|
703 s = CXML_XML_Resolve_Internal_Entity(ucs2Str, entityValStr, entityFound, encoding, internalEntityList); |
|
704 *entityVal = 0; |
|
705 } |
|
706 |
|
707 |
|
708 if(ucs2Str != NULL) |
|
709 { |
|
710 NW_Mem_Free(ucs2Str); |
|
711 } |
|
712 |
|
713 return NW_STAT_SUCCESS; |
|
714 }//end CXML_XML_Resolve_Entity() |
|
715 |
|
716 /* Reads the entity data. Decide about the entity. */ |
|
717 |
|
718 |
|
719 NW_Status_t CXML_XML_Handle_entity(NW_XML_Reader_t* pT, |
|
720 NW_XML_Reader_Interval_t* pI_entityData, |
|
721 NW_Uint32* entityVal, |
|
722 NW_Uint8** entityValStr, |
|
723 NW_Bool* entityFound, |
|
724 void* internalEntityList) |
|
725 { |
|
726 NW_Status_t s; |
|
727 NW_Uint32 length; |
|
728 NW_Uint32 byteLength; |
|
729 NW_Uint8* pContent; |
|
730 |
|
731 if (!NW_XML_Reader_Interval_IsWellFormed(pI_entityData)) { |
|
732 return NW_STAT_FAILURE; |
|
733 } |
|
734 |
|
735 length = pI_entityData->stop - pI_entityData->start; |
|
736 byteLength = length; |
|
737 |
|
738 s = NW_XML_Reader_DataAddressFromBuffer(pT, pI_entityData->start, |
|
739 &byteLength, |
|
740 &pContent); |
|
741 if (NW_STAT_IS_FAILURE(s)) { |
|
742 return s; |
|
743 } |
|
744 |
|
745 if (byteLength != length) { |
|
746 return NW_STAT_FAILURE; |
|
747 } |
|
748 |
|
749 s = CXML_XML_Resolve_Entity(pContent,length,entityVal,entityValStr,entityFound, |
|
750 pT->encoding, internalEntityList); |
|
751 |
|
752 return s; |
|
753 }//end CXML_XML_Handle_entity(..) |
|
754 |
|
755 /* The following function converts the numeric entities to the |
|
756 * predefined entity. |
|
757 */ |
|
758 |
|
759 static |
|
760 NW_Status_t CXML_XML_Entity_to_Ascii(CXML_Uint32 entityVal, |
|
761 CXML_Byte* entityStr, |
|
762 CXML_Uint32* strLen) |
|
763 { |
|
764 NW_Uint32 i = 0, j=0; |
|
765 CXML_Uint32 entityLength = 0; |
|
766 NW_Status_t s = NW_STAT_FAILURE; |
|
767 |
|
768 for(i=0; i < CXML_ALL_PREDEFINE_ENTITIES; i++) |
|
769 { |
|
770 if(CXML_EntitySet_Array[i].value == entityVal) |
|
771 { |
|
772 entityStr[0] = '&'; //Starting of the entity |
|
773 |
|
774 entityLength = CXML_Asc_strlen((CXML_Int8 *)CXML_EntitySet_Array[i].name); |
|
775 |
|
776 for(j=0; j < entityLength; j++) |
|
777 { |
|
778 entityStr[j+1] = CXML_EntitySet_Array[i].name[j]; |
|
779 } |
|
780 |
|
781 entityStr[j + 1] = ';' ; //end of entity |
|
782 *strLen = entityLength + 2; |
|
783 entityStr[*strLen] = '\0'; |
|
784 s = NW_STAT_SUCCESS; |
|
785 break; |
|
786 }//endif |
|
787 }//end for(..) |
|
788 |
|
789 return s; |
|
790 }//end CXML_XML_Ascii_to_Entity(..) |
|
791 |
|
792 /* This function assumes that predefined entities. The predefined |
|
793 * entities are defined in the CXML_EntitySet_Array[]. |
|
794 * |
|
795 */ |
|
796 |
|
797 EXPORT_C |
|
798 NW_Status_t CXML_XML_Get_Entity(CXML_Uint32 entityVal, |
|
799 CXML_Byte* entityStr, |
|
800 CXML_Uint32* strLen) |
|
801 { |
|
802 NW_Status_t s = NW_STAT_SUCCESS; |
|
803 *strLen = 0; |
|
804 |
|
805 |
|
806 s = CXML_XML_Entity_to_Ascii( entityVal,entityStr,strLen); |
|
807 |
|
808 if(s == NW_STAT_FAILURE) |
|
809 { |
|
810 *entityStr = NULL; |
|
811 *strLen = NULL; |
|
812 } |
|
813 |
|
814 return s; |
|
815 }//end CXML_XML_Get_Entity() |
|
816 |
|