|
1 /** |
|
2 * libxml2_uri.c: set of generic URI related routines |
|
3 * |
|
4 * Reference: RFC 2396 |
|
5 * |
|
6 * See Copyright for the status of this software. |
|
7 * |
|
8 * daniel@veillard.com |
|
9 * Portion Copyright © 2009 Nokia Corporation and/or its subsidiary(-ies). All rights reserved. |
|
10 */ |
|
11 |
|
12 #define IN_LIBXML |
|
13 #include "xmlenglibxml.h" |
|
14 |
|
15 #include <string.h> |
|
16 |
|
17 #include <stdapis/libxml2/libxml2_uri.h> |
|
18 #include <stdapis/libxml2/libxml2_globals.h> |
|
19 |
|
20 /************************************************************************ |
|
21 * * |
|
22 * Macros to differentiate various character type * |
|
23 * directly extracted from RFC 2396 * |
|
24 * * |
|
25 ************************************************************************/ |
|
26 |
|
27 /* |
|
28 * alpha = lowalpha | upalpha |
|
29 */ |
|
30 #define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x)) |
|
31 |
|
32 |
|
33 /* |
|
34 * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" | |
|
35 * "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" | |
|
36 * "u" | "v" | "w" | "x" | "y" | "z" |
|
37 */ |
|
38 |
|
39 #define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z')) |
|
40 |
|
41 /* |
|
42 * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" | |
|
43 * "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" | |
|
44 * "U" | "V" | "W" | "X" | "Y" | "Z" |
|
45 */ |
|
46 #define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z')) |
|
47 |
|
48 /* |
|
49 * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" |
|
50 */ |
|
51 |
|
52 #define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9')) |
|
53 |
|
54 /* |
|
55 * alphanum = alpha | digit |
|
56 */ |
|
57 |
|
58 #define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x)) |
|
59 |
|
60 /* |
|
61 * hex = digit | "A" | "B" | "C" | "D" | "E" | "F" | |
|
62 * "a" | "b" | "c" | "d" | "e" | "f" |
|
63 */ |
|
64 |
|
65 #define IS_HEX(x) ((IS_DIGIT(x)) || (((x) >= 'a') && ((x) <= 'f')) || \ |
|
66 (((x) >= 'A') && ((x) <= 'F'))) |
|
67 |
|
68 /* |
|
69 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")" |
|
70 */ |
|
71 |
|
72 #define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \ |
|
73 ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') || \ |
|
74 ((x) == '(') || ((x) == ')')) |
|
75 |
|
76 |
|
77 /* |
|
78 * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
|
79 */ |
|
80 |
|
81 #define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \ |
|
82 ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \ |
|
83 ((x) == '+') || ((x) == '$') || ((x) == ',')) |
|
84 |
|
85 static int isReserved(char ch) |
|
86 { |
|
87 return IS_RESERVED(ch); |
|
88 } |
|
89 |
|
90 /* |
|
91 * unreserved = alphanum | mark |
|
92 */ |
|
93 #define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x)) |
|
94 |
|
95 static int isUnreserved(char ch) |
|
96 { |
|
97 return IS_UNRESERVED(ch); |
|
98 } |
|
99 /* |
|
100 * escaped = "%" hex hex |
|
101 */ |
|
102 |
|
103 #define IS_ESCAPED(p) \ |
|
104 ((*(p) == '%') && (IS_HEX((p)[1])) && (IS_HEX((p)[2]))) |
|
105 |
|
106 static int isEscaped(const char* pch) |
|
107 { |
|
108 return IS_ESCAPED(pch); |
|
109 } |
|
110 /* |
|
111 * uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" | |
|
112 * "&" | "=" | "+" | "$" | "," |
|
113 */ |
|
114 /* |
|
115 #define IS_URIC_NO_SLASH(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) ||\ |
|
116 ((*(p) == ';')) || ((*(p) == '?')) || ((*(p) == ':')) ||\ |
|
117 ((*(p) == '@')) || ((*(p) == '&')) || ((*(p) == '=')) ||\ |
|
118 ((*(p) == '+')) || ((*(p) == '$')) || ((*(p) == ','))) |
|
119 |
|
120 Optimized into: |
|
121 |
|
122 #define IS_URIC_NO_SLASH(p) \ |
|
123 ((IS_UNRESERVED(*(p))) || \ |
|
124 (IS_ESCAPED(p)) || \ |
|
125 ((*(p)) != '/' && IS_RESERVED(p))) |
|
126 */ |
|
127 static int isUriCNoSlash(const char* pch) |
|
128 { |
|
129 char ch = *pch; |
|
130 return isUnreserved(ch) || |
|
131 isEscaped(pch) || |
|
132 (ch != '/' && isReserved(ch)); |
|
133 } |
|
134 |
|
135 |
|
136 /* |
|
137 * pchar = unreserved | escaped | ":" | "@" | "&" | "=" | "+" | "$" | "," |
|
138 */ |
|
139 |
|
140 /* |
|
141 #define IS_PCHAR(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \ |
|
142 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||\ |
|
143 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||\ |
|
144 ((*(p) == ','))) |
|
145 */ |
|
146 static int isPChar(const char* pch) |
|
147 { |
|
148 char ch = *pch; |
|
149 return isUnreserved(ch) || |
|
150 isEscaped(pch) || |
|
151 ch == ':' || ch == '@' || ch == '&' || |
|
152 ch == '=' || ch == '+' || ch == '$' || |
|
153 ch == ','; |
|
154 } |
|
155 |
|
156 |
|
157 /* |
|
158 * rel_segment = 1*( unreserved | escaped | |
|
159 * ";" | "@" | "&" | "=" | "+" | "$" | "," ) |
|
160 */ |
|
161 |
|
162 /* |
|
163 #define IS_SEGMENT(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \ |
|
164 ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) || \ |
|
165 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || \ |
|
166 ((*(p) == ','))) |
|
167 */ |
|
168 static int isSegment(const char* pch) |
|
169 { |
|
170 char ch = *pch; |
|
171 return isUnreserved(ch) || isEscaped(pch) || |
|
172 ch == ';' || ch == '@' || ch == '&' || |
|
173 ch == '=' || ch == '+' || ch == '$' || |
|
174 ch == ','; |
|
175 } |
|
176 /* |
|
177 * scheme = alpha *( alpha | digit | "+" | "-" | "." ) |
|
178 */ |
|
179 |
|
180 #define IS_SCHEME(x) ((IS_ALPHA(x)) || (IS_DIGIT(x)) || \ |
|
181 ((x) == '+') || ((x) == '-') || ((x) == '.')) |
|
182 |
|
183 /* the macros is used only once |
|
184 static int isScheme(char x) |
|
185 { |
|
186 return ((IS_ALPHA(x)) || (IS_DIGIT(x)) || |
|
187 (x) == '+') || ((x) == '-') || ((x) == '.')) |
|
188 } |
|
189 */ |
|
190 |
|
191 /* |
|
192 * reg_name = 1*( unreserved | escaped | "$" | "," | |
|
193 * ";" | ":" | "@" | "&" | "=" | "+" ) |
|
194 */ |
|
195 |
|
196 /* |
|
197 #define IS_REG_NAME(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \ |
|
198 ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) || \ |
|
199 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) || \ |
|
200 ((*(p) == '=')) || ((*(p) == '+'))) |
|
201 */ |
|
202 |
|
203 static int isRegName(const char* pch) |
|
204 { |
|
205 char ch = *pch; |
|
206 return isUnreserved(ch) || isEscaped(pch) || |
|
207 ch == '$' || ch == ',' || ch == ';' || |
|
208 ch == ':' || ch == '@' || ch == '&' || |
|
209 ch == '=' || ch == '+'; |
|
210 } |
|
211 /* |
|
212 * userinfo = *( unreserved | escaped | ";" | ":" | "&" | "=" | |
|
213 * "+" | "$" | "," ) |
|
214 */ |
|
215 |
|
216 /* |
|
217 #define IS_USERINFO(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \ |
|
218 ((*(p) == ';')) || ((*(p) == ':')) || ((*(p) == '&')) || \ |
|
219 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || \ |
|
220 ((*(p) == ','))) |
|
221 */ |
|
222 |
|
223 |
|
224 static int isUserInfo(const char* pch) |
|
225 { |
|
226 char ch = *pch; |
|
227 return |
|
228 isUnreserved(ch) || |
|
229 isEscaped(pch) || |
|
230 ch == ';' || ch == ':' || ch == '&' || |
|
231 ch == '=' || ch == '+' || ch == '$' || |
|
232 ch == ','; |
|
233 } |
|
234 |
|
235 /* |
|
236 * uric = reserved | unreserved | escaped |
|
237 */ |
|
238 |
|
239 //#define IS_URIC(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || (IS_RESERVED(*(p)))) |
|
240 |
|
241 static int isUriC(const char* pch) |
|
242 { |
|
243 char ch = *pch; |
|
244 return isUnreserved(ch) || isEscaped(pch) || isReserved(ch); |
|
245 } |
|
246 |
|
247 /* |
|
248 * unwise = "{" | "}" | "|" | "\" | "^" | "[" | "]" | "`" |
|
249 */ |
|
250 |
|
251 /* |
|
252 #define IS_UNWISE(p) \ |
|
253 (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) || \ |
|
254 ((*(p) == '\\'))|| ((*(p) == '^')) || ((*(p) == '[')) || \ |
|
255 ((*(p) == ']')) || ((*(p) == '`'))) |
|
256 */ |
|
257 |
|
258 static int isUnwise(char ch) |
|
259 { |
|
260 return |
|
261 ch == '{' || ch == '}' || ch == '|' || |
|
262 ch == '\\' || ch == '^' || ch == '[' || |
|
263 ch == ']' || ch == '`'; |
|
264 } |
|
265 /* |
|
266 * Skip to next pointer char, handle escaped sequences |
|
267 */ |
|
268 |
|
269 #define NEXT(p) ((*p == '%')? p += 3 : p++) |
|
270 |
|
271 /* |
|
272 * Productions from the spec. |
|
273 * |
|
274 * authority = server | reg_name |
|
275 * reg_name = 1*( unreserved | escaped | "$" | "," | |
|
276 * ";" | ":" | "@" | "&" | "=" | "+" ) |
|
277 * |
|
278 * path = [ abs_path | opaque_part ] |
|
279 */ |
|
280 |
|
281 /************************************************************************ |
|
282 * * |
|
283 * Generic URI structure functions * |
|
284 * * |
|
285 ************************************************************************/ |
|
286 |
|
287 /** |
|
288 * xmlCreateURI: |
|
289 * |
|
290 * Simply creates an empty xmlURI |
|
291 * |
|
292 * Returns the new structure or NULL in case of error |
|
293 * |
|
294 * OOM: possible --> returns NULL , sets OOM flag |
|
295 */ |
|
296 XMLPUBFUNEXPORT xmlURIPtr |
|
297 xmlCreateURI(void) { |
|
298 xmlURIPtr ret; |
|
299 |
|
300 ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI)); |
|
301 if (ret == NULL) { |
|
302 xmlGenericError(xmlGenericErrorContext, EMBED_ERRTXT("xmlCreateURI: out of memory\n")); |
|
303 return(NULL); |
|
304 } |
|
305 memset(ret, 0, sizeof(xmlURI)); |
|
306 return(ret); |
|
307 } |
|
308 |
|
309 /** |
|
310 * xmlSaveUri: |
|
311 * @param uri pointer to an xmlURI |
|
312 * |
|
313 * Save the URI as an escaped string |
|
314 * |
|
315 * Returns a new string (to be deallocated by caller) |
|
316 * |
|
317 * OOM: |
|
318 */ |
|
319 XMLPUBFUNEXPORT xmlChar* |
|
320 xmlSaveUri(xmlURIPtr uri) |
|
321 { |
|
322 xmlChar* ret;// = NULL; |
|
323 const char* p; |
|
324 int len; |
|
325 int max; |
|
326 char ch; |
|
327 |
|
328 if (!uri) |
|
329 return(NULL); |
|
330 |
|
331 max = 80; |
|
332 ret = (xmlChar*) xmlMallocAtomic((max + 1) * sizeof(xmlChar)); |
|
333 if (!ret) |
|
334 goto OOM; |
|
335 |
|
336 len = 0; |
|
337 |
|
338 if (uri->scheme != NULL) { |
|
339 p = uri->scheme; |
|
340 |
|
341 while (*p != 0) { |
|
342 if (len >= max) { |
|
343 xmlChar* tmp; |
|
344 max *= 2; |
|
345 tmp = (xmlChar*) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); // DONE: Fix xmlRealloc |
|
346 if (!tmp) |
|
347 goto OOM; |
|
348 ret = tmp; |
|
349 } |
|
350 ret[len++] = *p++; |
|
351 } |
|
352 if (len >= max) { |
|
353 xmlChar* tmp; |
|
354 max *= 2; |
|
355 tmp = (xmlChar*) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); // DONE: Fix xmlRealloc |
|
356 if (!tmp) |
|
357 goto OOM; |
|
358 ret = tmp; |
|
359 } |
|
360 ret[len++] = ':'; |
|
361 } |
|
362 if (uri->opaque != NULL) { |
|
363 p = uri->opaque; |
|
364 while (*p != 0) { |
|
365 if (len + 3 >= max) { |
|
366 xmlChar* tmp; |
|
367 max *= 2; |
|
368 tmp = (xmlChar*) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); // DONE: Fix xmlRealloc |
|
369 if (!tmp) |
|
370 goto OOM; |
|
371 ret = tmp; |
|
372 } |
|
373 if (isReserved(*p) || isUnreserved(*p)) |
|
374 { |
|
375 ret[len++] = *p++; |
|
376 } |
|
377 else |
|
378 { |
|
379 int val = *(unsigned char*)p++; |
|
380 int hi = val / 0x10; |
|
381 int lo = val % 0x10; |
|
382 ret[len++] = '%'; |
|
383 ret[len++] = hi + (hi > 9? 'A'-10 : '0'); |
|
384 ret[len++] = lo + (lo > 9? 'A'-10 : '0'); |
|
385 } |
|
386 } |
|
387 } |
|
388 else |
|
389 { |
|
390 if (uri->server != NULL) { |
|
391 if (len + 3 >= max) { |
|
392 xmlChar* tmp; |
|
393 max *= 2; |
|
394 tmp = (xmlChar*) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); // DONE: Fix xmlRealloc |
|
395 if (!tmp) |
|
396 goto OOM; |
|
397 ret = tmp; |
|
398 } |
|
399 ret[len++] = '/'; |
|
400 ret[len++] = '/'; |
|
401 if (uri->user != NULL) { |
|
402 p = uri->user; |
|
403 |
|
404 while (*p != 0) { |
|
405 if (len + 3 >= max) { |
|
406 xmlChar* tmp; |
|
407 max *= 2; |
|
408 tmp = (xmlChar*) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); // DONE: Fix xmlRealloc |
|
409 if (!tmp) |
|
410 goto OOM; |
|
411 ret = tmp; |
|
412 } |
|
413 ch = *p; |
|
414 if (isUnreserved(ch) || |
|
415 ch == ';' || ch == ':' || |
|
416 ch == '&' || ch == '=' || |
|
417 ch == '+' || ch == '$' || |
|
418 ch == ',') |
|
419 { |
|
420 ret[len++] = *p++; |
|
421 } |
|
422 else |
|
423 { |
|
424 int val = *(unsigned char *)p++; |
|
425 int hi = val / 0x10, lo = val % 0x10; |
|
426 ret[len++] = '%'; |
|
427 ret[len++] = hi + (hi > 9? 'A'-10 : '0'); |
|
428 ret[len++] = lo + (lo > 9? 'A'-10 : '0'); |
|
429 } |
|
430 } |
|
431 if (len + 3 >= max) { |
|
432 xmlChar* tmp; |
|
433 max *= 2; |
|
434 tmp = (xmlChar*) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); // DONE: Fix xmlRealloc |
|
435 if (!tmp) |
|
436 goto OOM; |
|
437 ret = tmp; |
|
438 } |
|
439 ret[len++] = '@'; |
|
440 } |
|
441 p = uri->server; |
|
442 while (*p != 0) { |
|
443 if (len >= max) { |
|
444 xmlChar* tmp; |
|
445 max *= 2; |
|
446 tmp = (xmlChar*) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); // DONE: Fix xmlRealloc |
|
447 if (!tmp) |
|
448 goto OOM; |
|
449 ret = tmp; |
|
450 } |
|
451 ret[len++] = *p++; |
|
452 } |
|
453 if (uri->port > 0) { |
|
454 if (len + 10 >= max) { |
|
455 xmlChar* tmp; |
|
456 max *= 2; |
|
457 tmp = (xmlChar*) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); // DONE: Fix xmlRealloc |
|
458 if (!tmp) |
|
459 goto OOM; |
|
460 ret = tmp; |
|
461 } |
|
462 len += snprintf((char*) &ret[len], max - len, ":%d", uri->port); |
|
463 } |
|
464 } |
|
465 else if (uri->authority != NULL) { |
|
466 if (len + 3 >= max) { |
|
467 xmlChar* tmp; |
|
468 max *= 2; |
|
469 tmp = (xmlChar*) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); // DONE: Fix xmlRealloc |
|
470 if (!tmp) |
|
471 goto OOM; |
|
472 ret = tmp; |
|
473 } |
|
474 ret[len++] = '/'; |
|
475 ret[len++] = '/'; |
|
476 p = uri->authority; |
|
477 while (*p != 0) { |
|
478 if (len + 3 >= max) { |
|
479 xmlChar* tmp; |
|
480 max *= 2; |
|
481 tmp = (xmlChar*) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); // DONE: Fix xmlRealloc |
|
482 if (!tmp) |
|
483 goto OOM; |
|
484 ret = tmp; |
|
485 } |
|
486 ch = *p; |
|
487 if (isUnreserved(ch) || |
|
488 ch == '$' || ch == ',' || ch == ';' || |
|
489 ch == ':' || ch == '@' || ch == '&' || |
|
490 ch == '=' || ch == '+') |
|
491 { |
|
492 ret[len++] = *p++; |
|
493 } |
|
494 else |
|
495 { |
|
496 int val = *(unsigned char *)p++; |
|
497 int hi = val / 0x10; |
|
498 int lo = val % 0x10; |
|
499 ret[len++] = '%'; |
|
500 ret[len++] = hi + (hi > 9? 'A'-10 : '0'); |
|
501 ret[len++] = lo + (lo > 9? 'A'-10 : '0'); |
|
502 } |
|
503 } |
|
504 } |
|
505 else if (uri->scheme != NULL) { |
|
506 if (len + 3 >= max) { |
|
507 xmlChar* tmp; |
|
508 max *= 2; |
|
509 tmp = (xmlChar*) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); // DONE: Fix xmlRealloc |
|
510 if (!tmp) |
|
511 goto OOM; |
|
512 ret = tmp; |
|
513 } |
|
514 ret[len++] = '/'; |
|
515 ret[len++] = '/'; |
|
516 } |
|
517 if (uri->path != NULL) { |
|
518 p = uri->path; |
|
519 while (*p != 0) { |
|
520 if (len + 3 >= max) { |
|
521 xmlChar* tmp; |
|
522 max *= 2; |
|
523 tmp = (xmlChar*) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); // DONE: Fix xmlRealloc |
|
524 if (!tmp) |
|
525 goto OOM; |
|
526 ret = tmp; |
|
527 } |
|
528 ch = *p; |
|
529 if (isUnreserved(ch) || ch == '/' || |
|
530 ch == ';' || ch == '@' || ch == '&' || |
|
531 ch == '=' || ch == '+' || ch == '$' || |
|
532 ch == ',') |
|
533 { |
|
534 ret[len++] = *p++; |
|
535 } |
|
536 else |
|
537 { |
|
538 int val = *(unsigned char *)p++; |
|
539 int hi = val / 0x10; |
|
540 int lo = val % 0x10; |
|
541 ret[len++] = '%'; |
|
542 ret[len++] = hi + (hi > 9? 'A'-10 : '0'); |
|
543 ret[len++] = lo + (lo > 9? 'A'-10 : '0'); |
|
544 } |
|
545 } |
|
546 } |
|
547 if (uri->query != NULL) { |
|
548 |
|
549 if (len + 3 >= max) { |
|
550 xmlChar* tmp; |
|
551 max *= 2; |
|
552 tmp = (xmlChar*) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); // DONE: Fix xmlRealloc |
|
553 if (!tmp) |
|
554 goto OOM; |
|
555 ret = tmp; |
|
556 } |
|
557 ret[len++] = '?'; |
|
558 p = uri->query; |
|
559 while (*p != 0) { |
|
560 if (len + 3 >= max) { |
|
561 xmlChar* tmp; |
|
562 max *= 2; |
|
563 tmp = (xmlChar*) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); // DONE: Fix xmlRealloc |
|
564 if (!tmp) |
|
565 goto OOM; |
|
566 ret = tmp; |
|
567 } |
|
568 if (isUnreserved(*p) || isReserved(*p)) |
|
569 { |
|
570 ret[len++] = *p++; |
|
571 } |
|
572 else |
|
573 { |
|
574 int val = *(unsigned char *)p++; |
|
575 int hi = val / 0x10; |
|
576 int lo = val % 0x10; |
|
577 ret[len++] = '%'; |
|
578 ret[len++] = hi + (hi > 9? 'A'-10 : '0'); |
|
579 ret[len++] = lo + (lo > 9? 'A'-10 : '0'); |
|
580 } |
|
581 } |
|
582 } |
|
583 } |
|
584 if (uri->fragment != NULL) { |
|
585 if (len + 3 >= max) { |
|
586 xmlChar* tmp; |
|
587 max *= 2; |
|
588 tmp = (xmlChar*) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); // DONE: Fix xmlRealloc |
|
589 if (!tmp) |
|
590 goto OOM; |
|
591 ret = tmp; |
|
592 } |
|
593 ret[len++] = '#'; |
|
594 p = uri->fragment; |
|
595 |
|
596 while (*p != 0) { |
|
597 if (len + 3 >= max) { |
|
598 xmlChar* tmp; |
|
599 max *= 2; |
|
600 tmp = (xmlChar*) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); // DONE: Fix xmlRealloc |
|
601 if (!tmp) |
|
602 goto OOM; |
|
603 ret = tmp; |
|
604 } |
|
605 if (isUnreserved(*p) || isReserved(*p)) |
|
606 { |
|
607 ret[len++] = *p++; |
|
608 } |
|
609 else |
|
610 { |
|
611 int val = *(unsigned char *)p++; |
|
612 int hi = val / 0x10; |
|
613 int lo = val % 0x10; |
|
614 ret[len++] = '%'; |
|
615 ret[len++] = hi + (hi > 9? 'A'-10 : '0'); |
|
616 ret[len++] = lo + (lo > 9? 'A'-10 : '0'); |
|
617 } |
|
618 } |
|
619 } |
|
620 if (len >= max) { |
|
621 xmlChar* tmp; |
|
622 max *= 2; |
|
623 tmp = (xmlChar*) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); // DONE: Fix xmlRealloc |
|
624 if (!tmp) |
|
625 goto OOM; |
|
626 ret = tmp; |
|
627 } |
|
628 ret[len++] = 0; |
|
629 return(ret); |
|
630 //------------------------------ |
|
631 OOM: |
|
632 if(ret) |
|
633 xmlFree(ret); |
|
634 xmlGenericError(xmlGenericErrorContext, EMBED_ERRTXT("xmlSaveUri: out of memory\n")); |
|
635 return(NULL); |
|
636 } |
|
637 |
|
638 #ifndef XMLENGINE_EXCLUDE_FILE_FUNC |
|
639 /** |
|
640 * xmlPrintURI: |
|
641 * @param stream a FILE* for the output |
|
642 * @param uri pointer to an xmlURI |
|
643 * |
|
644 * Prints the URI in the stream stream. |
|
645 */ |
|
646 void |
|
647 xmlPrintURI(FILE *stream, xmlURIPtr uri) { |
|
648 xmlChar *out; |
|
649 |
|
650 out = xmlSaveUri(uri); |
|
651 if (out != NULL) { |
|
652 fprintf(stream, "%s", (char *) out); |
|
653 xmlFree(out); |
|
654 } |
|
655 } |
|
656 #endif |
|
657 |
|
658 /** |
|
659 * xmlCleanURI: |
|
660 * @param uri pointer to an xmlURI |
|
661 * |
|
662 * Make sure the xmlURI struct is free of content |
|
663 * |
|
664 * OOM: never |
|
665 */ |
|
666 static void |
|
667 xmlCleanURI(xmlURIPtr uri) { |
|
668 if (uri == NULL) return; |
|
669 |
|
670 if (uri->scheme){ |
|
671 xmlFree(uri->scheme); |
|
672 uri->scheme = NULL; |
|
673 } |
|
674 if (uri->server){ |
|
675 xmlFree(uri->server); |
|
676 uri->server = NULL; |
|
677 } |
|
678 if (uri->user){ |
|
679 xmlFree(uri->user); |
|
680 uri->user = NULL; |
|
681 } |
|
682 if (uri->path) { |
|
683 xmlFree(uri->path); |
|
684 uri->path = NULL; |
|
685 } |
|
686 if (uri->fragment) { |
|
687 xmlFree(uri->fragment); |
|
688 uri->fragment = NULL; |
|
689 } |
|
690 if (uri->opaque) { |
|
691 xmlFree(uri->opaque); |
|
692 uri->opaque = NULL; |
|
693 } |
|
694 if (uri->authority) { |
|
695 xmlFree(uri->authority); |
|
696 uri->authority = NULL; |
|
697 } |
|
698 if (uri->query) { |
|
699 xmlFree(uri->query); |
|
700 uri->query = NULL; |
|
701 } |
|
702 } |
|
703 |
|
704 /** |
|
705 * xmlFreeURI: |
|
706 * @param uri pointer to an xmlURI |
|
707 * |
|
708 * Free up the xmlURI struct |
|
709 */ |
|
710 XMLPUBFUNEXPORT void |
|
711 xmlFreeURI(xmlURIPtr uri) { |
|
712 if (uri == NULL) return; |
|
713 |
|
714 if (uri->scheme) xmlFree(uri->scheme); |
|
715 if (uri->server) xmlFree(uri->server); |
|
716 if (uri->user ) xmlFree(uri->user); |
|
717 if (uri->path ) xmlFree(uri->path); |
|
718 if (uri->fragment) xmlFree(uri->fragment); |
|
719 if (uri->opaque) xmlFree(uri->opaque); |
|
720 if (uri->authority) xmlFree(uri->authority); |
|
721 if (uri->query) xmlFree(uri->query); |
|
722 xmlFree(uri); |
|
723 } |
|
724 |
|
725 /************************************************************************ |
|
726 * * |
|
727 * Helper functions * |
|
728 * * |
|
729 ************************************************************************/ |
|
730 |
|
731 /** |
|
732 * xmlNormalizeURIPath: |
|
733 * @param path pointer to the path string |
|
734 * |
|
735 * Applies the 5 normalization steps to a path string--that is, RFC 2396 |
|
736 * Section 5.2, steps 6.c through 6.g. |
|
737 * |
|
738 * Normalization occurs directly on the string, no new allocation is done |
|
739 * |
|
740 * Returns 0 or an error code |
|
741 */ |
|
742 XMLPUBFUNEXPORT int |
|
743 xmlNormalizeURIPath(char *path) { |
|
744 char *cur, *out; |
|
745 |
|
746 if (path == NULL) |
|
747 return(-1); |
|
748 |
|
749 /* Skip all initial "/" chars. We want to get to the beginning of the |
|
750 * first non-empty segment. |
|
751 */ |
|
752 cur = path; |
|
753 while (cur[0] == '/') |
|
754 ++cur; |
|
755 if (cur[0] == '\0') |
|
756 return(0); |
|
757 |
|
758 /* Keep everything we've seen so far. */ |
|
759 out = cur; |
|
760 |
|
761 /* |
|
762 * Analyze each segment in sequence for cases (c) and (d). |
|
763 */ |
|
764 while (cur[0] != '\0') { |
|
765 /* |
|
766 * c) All occurrences of "./", where "." is a complete path segment, |
|
767 * are removed from the buffer string. |
|
768 */ |
|
769 if ((cur[0] == '.') && (cur[1] == '/')) { |
|
770 cur += 2; |
|
771 /* '//' normalization should be done at this point too */ |
|
772 while (cur[0] == '/') |
|
773 cur++; |
|
774 continue; |
|
775 } |
|
776 |
|
777 /* |
|
778 * d) If the buffer string ends with "." as a complete path segment, |
|
779 * that "." is removed. |
|
780 */ |
|
781 if ((cur[0] == '.') && (cur[1] == '\0')) |
|
782 break; |
|
783 |
|
784 /* Otherwise keep the segment. */ |
|
785 while (cur[0] != '/') { |
|
786 if (cur[0] == '\0') |
|
787 goto done_cd; |
|
788 (out++)[0] = (cur++)[0]; |
|
789 } |
|
790 /* nomalize // */ |
|
791 while ((cur[0] == '/') && (cur[1] == '/')) |
|
792 cur++; |
|
793 |
|
794 (out++)[0] = (cur++)[0]; |
|
795 } |
|
796 done_cd: |
|
797 out[0] = '\0'; |
|
798 |
|
799 /* Reset to the beginning of the first segment for the next sequence. */ |
|
800 cur = path; |
|
801 while (cur[0] == '/') |
|
802 ++cur; |
|
803 if (cur[0] == '\0') |
|
804 return(0); |
|
805 |
|
806 /* |
|
807 * Analyze each segment in sequence for cases (e) and (f). |
|
808 * |
|
809 * e) All occurrences of "<segment>/../", where <segment> is a |
|
810 * complete path segment not equal to "..", are removed from the |
|
811 * buffer string. Removal of these path segments is performed |
|
812 * iteratively, removing the leftmost matching pattern on each |
|
813 * iteration, until no matching pattern remains. |
|
814 * |
|
815 * f) If the buffer string ends with "<segment>/..", where <segment> |
|
816 * is a complete path segment not equal to "..", that |
|
817 * "<segment>/.." is removed. |
|
818 * |
|
819 * To satisfy the "iterative" clause in (e), we need to collapse the |
|
820 * string every time we find something that needs to be removed. Thus, |
|
821 * we don't need to keep two pointers into the string: we only need a |
|
822 * "current position" pointer. |
|
823 */ |
|
824 while (1) { |
|
825 char *segp, *tmp; |
|
826 |
|
827 /* At the beginning of each iteration of this loop, "cur" points to |
|
828 * the first character of the segment we want to examine. |
|
829 */ |
|
830 |
|
831 /* Find the end of the current segment. */ |
|
832 segp = cur; |
|
833 while ((segp[0] != '/') && (segp[0] != '\0')) |
|
834 ++segp; |
|
835 |
|
836 /* If this is the last segment, we're done (we need at least two |
|
837 * segments to meet the criteria for the (e) and (f) cases). |
|
838 */ |
|
839 if (segp[0] == '\0') |
|
840 break; |
|
841 |
|
842 /* If the first segment is "..", or if the next segment _isn't_ "..", |
|
843 * keep this segment and try the next one. |
|
844 */ |
|
845 ++segp; |
|
846 if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3)) |
|
847 || ((segp[0] != '.') || (segp[1] != '.') |
|
848 || ((segp[2] != '/') && (segp[2] != '\0')))) { |
|
849 cur = segp; |
|
850 continue; |
|
851 } |
|
852 |
|
853 /* If we get here, remove this segment and the next one and back up |
|
854 * to the previous segment (if there is one), to implement the |
|
855 * "iteratively" clause. It's pretty much impossible to back up |
|
856 * while maintaining two pointers into the buffer, so just compact |
|
857 * the whole buffer now. |
|
858 */ |
|
859 |
|
860 /* If this is the end of the buffer, we're done. */ |
|
861 if (segp[2] == '\0') { |
|
862 cur[0] = '\0'; |
|
863 break; |
|
864 } |
|
865 /* Valgrind complained, strcpy(cur, segp + 3); */ |
|
866 /* string will overlap, do not use strcpy */ |
|
867 tmp = cur; |
|
868 segp += 3; |
|
869 while ((*tmp++ = *segp++) != 0) {} // NOTE: no loop body here |
|
870 |
|
871 /* If there are no previous segments, then keep going from here. */ |
|
872 segp = cur; |
|
873 while ((segp > path) && ((--segp)[0] == '/')) |
|
874 ; |
|
875 if (segp == path) |
|
876 continue; |
|
877 |
|
878 /* "segp" is pointing to the end of a previous segment; find it's |
|
879 * start. We need to back up to the previous segment and start |
|
880 * over with that to handle things like "foo/bar/../..". If we |
|
881 * don't do this, then on the first pass we'll remove the "bar/..", |
|
882 * but be pointing at the second ".." so we won't realize we can also |
|
883 * remove the "foo/..". |
|
884 */ |
|
885 cur = segp; |
|
886 while ((cur > path) && (cur[-1] != '/')) |
|
887 --cur; |
|
888 } |
|
889 out[0] = '\0'; |
|
890 |
|
891 /* |
|
892 * g) If the resulting buffer string still begins with one or more |
|
893 * complete path segments of "..", then the reference is |
|
894 * considered to be in error. Implementations may handle this |
|
895 * error by retaining these components in the resolved path (i.e., |
|
896 * treating them as part of the final URI), by removing them from |
|
897 * the resolved path (i.e., discarding relative levels above the |
|
898 * root), or by avoiding traversal of the reference. |
|
899 * |
|
900 * We discard them from the final path. |
|
901 */ |
|
902 if (path[0] == '/') { |
|
903 cur = path; |
|
904 while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.') |
|
905 && ((cur[3] == '/') || (cur[3] == '\0'))) |
|
906 cur += 3; |
|
907 |
|
908 if (cur != path) { |
|
909 out = path; |
|
910 while (cur[0] != '\0') |
|
911 (out++)[0] = (cur++)[0]; |
|
912 out[0] = 0; |
|
913 } |
|
914 } |
|
915 |
|
916 return(0); |
|
917 } |
|
918 |
|
919 static int is_hex(char c) { |
|
920 if (((c >= '0') && (c <= '9')) || |
|
921 ((c >= 'a') && (c <= 'f')) || |
|
922 ((c >= 'A') && (c <= 'F'))) |
|
923 return(1); |
|
924 return(0); |
|
925 } |
|
926 |
|
927 /** |
|
928 * xmlURIUnescapeString: |
|
929 * @param str the string to unescape |
|
930 * @param len the length in bytes to unescape (or <= 0 to indicate full string) |
|
931 * @param target optional destination buffer |
|
932 * |
|
933 * Unescaping routine, does not do validity checks ! |
|
934 * Output is direct unsigned char translation of %XX values (no encoding) |
|
935 * |
|
936 * Returns an copy of the string, but unescaped |
|
937 * |
|
938 * OOM: possible --> sets OOM when returns NULL for target==NULL |
|
939 */ |
|
940 XMLPUBFUNEXPORT char* |
|
941 xmlURIUnescapeString(const char* str, int len, char* target) { |
|
942 char* ret; |
|
943 char* out; |
|
944 const char *in; |
|
945 |
|
946 if (!str) |
|
947 return(NULL); |
|
948 if (len <= 0) |
|
949 len = strlen(str); |
|
950 if (len < 0) |
|
951 return(NULL); |
|
952 |
|
953 if (!target) { |
|
954 ret = (char*) xmlMallocAtomic(len + 1); // may set OOM |
|
955 if (!ret) { |
|
956 xmlGenericError(xmlGenericErrorContext, EMBED_ERRTXT("xmlURIUnescapeString: out of memory\n")); |
|
957 return(NULL); |
|
958 } |
|
959 } else |
|
960 ret = target; |
|
961 |
|
962 in = str; |
|
963 out = ret; |
|
964 while(len > 0) { |
|
965 if ((*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) |
|
966 { |
|
967 in++; |
|
968 if ((*in >= '0') && (*in <= '9')) |
|
969 *out = (*in - '0'); |
|
970 else if ((*in >= 'a') && (*in <= 'f')) |
|
971 *out = (*in - 'a') + 10; |
|
972 else if ((*in >= 'A') && (*in <= 'F')) |
|
973 *out = (*in - 'A') + 10; |
|
974 in++; |
|
975 if ((*in >= '0') && (*in <= '9')) |
|
976 *out = *out * 16 + (*in - '0'); |
|
977 else if ((*in >= 'a') && (*in <= 'f')) |
|
978 *out = *out * 16 + (*in - 'a') + 10; |
|
979 else if ((*in >= 'A') && (*in <= 'F')) |
|
980 *out = *out * 16 + (*in - 'A') + 10; |
|
981 in++; |
|
982 len -= 3; |
|
983 out++; |
|
984 } else { |
|
985 *out++ = *in++; |
|
986 len--; |
|
987 } |
|
988 } |
|
989 *out = 0; |
|
990 return(ret); |
|
991 } |
|
992 |
|
993 /** |
|
994 * xmlURIEscapeStr: |
|
995 * @param str string to escape |
|
996 * @param list exception list string of chars not to escape |
|
997 * |
|
998 * This routine escapes a string to hex, ignoring reserved characters (a-z) |
|
999 * and the characters in the exception list. |
|
1000 * |
|
1001 * Returns a new escaped string or NULL in case of error. |
|
1002 */ |
|
1003 XMLPUBFUNEXPORT xmlChar* |
|
1004 xmlURIEscapeStr(const xmlChar* str, const xmlChar* list) |
|
1005 { |
|
1006 xmlChar* ret; |
|
1007 xmlChar ch; |
|
1008 const xmlChar *in; |
|
1009 |
|
1010 unsigned int len, out; |
|
1011 |
|
1012 if (!str) |
|
1013 return(NULL); |
|
1014 len = xmlStrlen(str); |
|
1015 if (!(len > 0)) return(NULL); |
|
1016 |
|
1017 len += 20; |
|
1018 ret = (xmlChar*) xmlMallocAtomic(len); |
|
1019 if (!ret) |
|
1020 goto OOM; |
|
1021 |
|
1022 in = (const xmlChar *) str; |
|
1023 out = 0; |
|
1024 while(*in != 0) { |
|
1025 if (len - out <= 3) { |
|
1026 xmlChar* tmp; |
|
1027 len += 20; |
|
1028 tmp = (xmlChar*) xmlRealloc(ret, len); // DONE: Fix xmlRealloc |
|
1029 if (!tmp) |
|
1030 goto OOM; |
|
1031 ret = tmp; |
|
1032 } |
|
1033 |
|
1034 ch = *in; |
|
1035 |
|
1036 if ((ch != '@') && (!isUnreserved(ch)) && (!xmlStrchr(list, ch))) { |
|
1037 unsigned char val; |
|
1038 ret[out++] = '%'; |
|
1039 val = ch >> 4; |
|
1040 if (val <= 9) |
|
1041 ret[out++] = '0' + val; |
|
1042 else |
|
1043 ret[out++] = 'A' + val - 0xA; |
|
1044 val = ch & 0xF; |
|
1045 if (val <= 9) |
|
1046 ret[out++] = '0' + val; |
|
1047 else |
|
1048 ret[out++] = 'A' + val - 0xA; |
|
1049 in++; |
|
1050 } else { |
|
1051 ret[out++] = *in++; |
|
1052 } |
|
1053 } // while(*in != 0) |
|
1054 ret[out] = 0; |
|
1055 return(ret); |
|
1056 //--------------------- |
|
1057 OOM: |
|
1058 if(ret) |
|
1059 xmlFree(ret); |
|
1060 xmlGenericError(xmlGenericErrorContext, EMBED_ERRTXT("xmlURIEscapeStr: out of memory\n")); |
|
1061 return(NULL); |
|
1062 } |
|
1063 |
|
1064 /** |
|
1065 * xmlURIEscape: |
|
1066 * @param str the string of the URI to escape |
|
1067 * |
|
1068 * Escaping routine, does not do validity checks ! |
|
1069 * It will try to escape the chars needing this, but this is heuristic |
|
1070 * based it's impossible to be sure. |
|
1071 * |
|
1072 * Returns an copy of the string, but escaped |
|
1073 * |
|
1074 * 25 May 2001 |
|
1075 * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly |
|
1076 * according to RFC2396. |
|
1077 * - Carl Douglas |
|
1078 */ |
|
1079 XMLPUBFUNEXPORT xmlChar* |
|
1080 xmlURIEscape(const xmlChar * str) |
|
1081 { |
|
1082 xmlChar* ret; |
|
1083 xmlChar* segment; |
|
1084 xmlURIPtr uri; |
|
1085 int ret2; |
|
1086 |
|
1087 #define NULLCHK(p) if(!p) goto OOM |
|
1088 |
|
1089 |
|
1090 if (str == NULL) |
|
1091 return (NULL); |
|
1092 |
|
1093 uri = xmlCreateURI(); |
|
1094 if (uri != NULL) { |
|
1095 /* |
|
1096 * Allow escaping errors in the unescaped form |
|
1097 */ |
|
1098 uri->cleanup = 1; |
|
1099 ret2 = xmlParseURIReference(uri, (const char *)str); |
|
1100 if (ret2) { |
|
1101 xmlFreeURI(uri); |
|
1102 return (NULL); |
|
1103 } |
|
1104 } |
|
1105 |
|
1106 if (!uri) |
|
1107 return NULL; |
|
1108 |
|
1109 ret = NULL; |
|
1110 |
|
1111 if (uri->scheme) { |
|
1112 segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-."); |
|
1113 NULLCHK(segment); |
|
1114 ret = xmlStrcat(ret, segment); |
|
1115 ret = xmlStrcat(ret, BAD_CAST ":"); |
|
1116 xmlFree(segment); |
|
1117 } |
|
1118 |
|
1119 if (uri->authority) { |
|
1120 segment = xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@"); |
|
1121 NULLCHK(segment); |
|
1122 ret = xmlStrcat(ret, BAD_CAST "//"); |
|
1123 ret = xmlStrcat(ret, segment); |
|
1124 xmlFree(segment); |
|
1125 } |
|
1126 |
|
1127 if (uri->user) { |
|
1128 segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,"); |
|
1129 NULLCHK(segment); |
|
1130 ret = xmlStrcat(ret,BAD_CAST "//"); |
|
1131 ret = xmlStrcat(ret, segment); |
|
1132 ret = xmlStrcat(ret, BAD_CAST "@"); |
|
1133 xmlFree(segment); |
|
1134 } |
|
1135 |
|
1136 if (uri->server) { |
|
1137 segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@"); |
|
1138 NULLCHK(segment); |
|
1139 if (uri->user == NULL) |
|
1140 ret = xmlStrcat(ret, BAD_CAST "//"); |
|
1141 ret = xmlStrcat(ret, segment); |
|
1142 xmlFree(segment); |
|
1143 } |
|
1144 |
|
1145 if (uri->port) { |
|
1146 xmlChar port[10]; |
|
1147 |
|
1148 snprintf((char *) port, 10, "%d", uri->port); |
|
1149 ret = xmlStrcat(ret, BAD_CAST ":"); |
|
1150 ret = xmlStrcat(ret, port); |
|
1151 } |
|
1152 |
|
1153 if (uri->path) { |
|
1154 segment = xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;"); |
|
1155 NULLCHK(segment); |
|
1156 ret = xmlStrcat(ret, segment); |
|
1157 xmlFree(segment); |
|
1158 } |
|
1159 |
|
1160 if (uri->query) { |
|
1161 segment = xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$"); |
|
1162 NULLCHK(segment); |
|
1163 ret = xmlStrcat(ret, BAD_CAST "?"); |
|
1164 ret = xmlStrcat(ret, segment); |
|
1165 xmlFree(segment); |
|
1166 } |
|
1167 |
|
1168 if (uri->opaque) { |
|
1169 segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST ""); |
|
1170 NULLCHK(segment); |
|
1171 ret = xmlStrcat(ret, segment); |
|
1172 xmlFree(segment); |
|
1173 } |
|
1174 |
|
1175 if (uri->fragment) { |
|
1176 segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#"); |
|
1177 NULLCHK(segment); |
|
1178 ret = xmlStrcat(ret, BAD_CAST "#"); |
|
1179 ret = xmlStrcat(ret, segment); |
|
1180 xmlFree(segment); |
|
1181 } |
|
1182 |
|
1183 xmlFreeURI(uri); |
|
1184 #undef NULLCHK |
|
1185 |
|
1186 return (ret); |
|
1187 //-------------------- |
|
1188 OOM: |
|
1189 xmlGenericError(xmlGenericErrorContext, EMBED_ERRTXT("xmlURIEscape: out of memory\n")); |
|
1190 xmlFreeURI(uri); |
|
1191 if(ret) xmlFree(ret); |
|
1192 return NULL; |
|
1193 } |
|
1194 |
|
1195 /************************************************************************ |
|
1196 * * |
|
1197 * Escaped URI parsing * |
|
1198 * * |
|
1199 ************************************************************************/ |
|
1200 |
|
1201 /** |
|
1202 * xmlParseURIFragment: |
|
1203 * @param uri pointer to an URI structure |
|
1204 * @param str pointer to the string to analyze |
|
1205 * |
|
1206 * Parse an URI fragment string and fills in the appropriate fields |
|
1207 * of the uri structure. |
|
1208 * |
|
1209 * fragment = *uric |
|
1210 * |
|
1211 * Returns 0 or the error code |
|
1212 * |
|
1213 * OOM: possible --> returns XML_ERR_NO_MEMORY and sets OOM flag |
|
1214 */ |
|
1215 static int |
|
1216 xmlParseURIFragment(xmlURIPtr uri, const char **str) |
|
1217 { |
|
1218 const char *cur = *str; |
|
1219 |
|
1220 if (str == NULL) |
|
1221 return (-1); |
|
1222 |
|
1223 while (isUriC(cur) || isUnwise(*cur)) |
|
1224 NEXT(cur); |
|
1225 |
|
1226 if (uri != NULL) { |
|
1227 if (uri->fragment != NULL) |
|
1228 xmlFree(uri->fragment); |
|
1229 uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL); // may set OOM flag |
|
1230 if(!uri->fragment) |
|
1231 return XML_ERR_NO_MEMORY; |
|
1232 } |
|
1233 *str = cur; |
|
1234 return (0); |
|
1235 } |
|
1236 |
|
1237 /** |
|
1238 * xmlParseURIQuery: |
|
1239 * @param uri pointer to an URI structure |
|
1240 * @param str pointer to the string to analyze |
|
1241 * |
|
1242 * Parse the query part of an URI |
|
1243 * |
|
1244 * query = *uric |
|
1245 * |
|
1246 * Returns 0 or the error code |
|
1247 * |
|
1248 * OOM: possible for uri!=NULL --> returns XML_ERR_NO_MEMORY and sets OOM flag |
|
1249 */ |
|
1250 static int |
|
1251 xmlParseURIQuery(xmlURIPtr uri, const char **str) |
|
1252 { |
|
1253 const char *cur = *str; |
|
1254 |
|
1255 if (str == NULL) |
|
1256 return (-1); |
|
1257 |
|
1258 while (isUriC(cur) || (uri && uri->cleanup && isUnwise(*cur))) |
|
1259 NEXT(cur); |
|
1260 if (uri != NULL) { |
|
1261 if (uri->query != NULL) |
|
1262 xmlFree(uri->query); |
|
1263 uri->query = xmlURIUnescapeString(*str, cur - *str, NULL); // may set OOM flag |
|
1264 if(!uri->query) |
|
1265 return XML_ERR_NO_MEMORY; |
|
1266 } |
|
1267 *str = cur; |
|
1268 return (0); |
|
1269 } |
|
1270 |
|
1271 /** |
|
1272 * xmlParseURIScheme: |
|
1273 * @param uri pointer to an URI structure |
|
1274 * @param str pointer to the string to analyze |
|
1275 * |
|
1276 * Parse an URI scheme |
|
1277 * |
|
1278 * scheme = alpha *( alpha | digit | "+" | "-" | "." ) |
|
1279 * |
|
1280 * Returns 0 or the error code |
|
1281 * |
|
1282 * OOM: possible --> sets OOM flag returns XML_ERR_NO_MEMORY |
|
1283 */ |
|
1284 static int |
|
1285 xmlParseURIScheme(xmlURIPtr uri, const char **str) { |
|
1286 const char *cur; |
|
1287 |
|
1288 if (str == NULL) |
|
1289 return(-1); |
|
1290 |
|
1291 cur = *str; |
|
1292 if (!IS_ALPHA(*cur)) |
|
1293 return(1); // was 2 which is XML_ERR_NO_MEMORY |
|
1294 cur++; |
|
1295 while (IS_SCHEME(*cur)) |
|
1296 cur++; |
|
1297 if (uri != NULL) { |
|
1298 if (uri->scheme) |
|
1299 xmlFree(uri->scheme); |
|
1300 /* !!! strndup */ |
|
1301 uri->scheme = xmlURIUnescapeString(*str, cur - *str, NULL); // may set OOM, when returns NULL |
|
1302 if(!uri->scheme) |
|
1303 return XML_ERR_NO_MEMORY; |
|
1304 } |
|
1305 *str = cur; |
|
1306 return(0); |
|
1307 } |
|
1308 |
|
1309 /** |
|
1310 * xmlParseURIOpaquePart: |
|
1311 * @param uri pointer to an URI structure |
|
1312 * @param str pointer to the string to analyze |
|
1313 * |
|
1314 * Parse an URI opaque part |
|
1315 * |
|
1316 * opaque_part = uric_no_slash *uric |
|
1317 * |
|
1318 * Returns 0 or the error code |
|
1319 * |
|
1320 * OOM: possible --> sets OOM flag and returns XML_ERR_NO_MEMORY |
|
1321 */ |
|
1322 static int |
|
1323 xmlParseURIOpaquePart(xmlURIPtr uri, const char **str) |
|
1324 { |
|
1325 const char* cur; |
|
1326 |
|
1327 if (str == NULL) |
|
1328 return (-1); |
|
1329 |
|
1330 cur = *str; |
|
1331 if (!(isUriCNoSlash(cur) || |
|
1332 ( (uri != NULL) && (uri->cleanup) && (isUnwise(*cur)) ) |
|
1333 )) |
|
1334 { |
|
1335 return (3); |
|
1336 } |
|
1337 NEXT(cur); |
|
1338 while (isUriC(cur) || ((uri != NULL) && (uri->cleanup) && (isUnwise(*cur)))) |
|
1339 { |
|
1340 NEXT(cur); |
|
1341 } |
|
1342 if (uri != NULL) { |
|
1343 if (uri->opaque != NULL) |
|
1344 xmlFree(uri->opaque); |
|
1345 uri->opaque = xmlURIUnescapeString(*str, cur - *str, NULL); // may set OOM flag |
|
1346 if(!uri->opaque) |
|
1347 return XML_ERR_NO_MEMORY; |
|
1348 } |
|
1349 *str = cur; |
|
1350 return (0); |
|
1351 } |
|
1352 |
|
1353 /** |
|
1354 * xmlParseURIServer: |
|
1355 * @param uri pointer to an URI structure |
|
1356 * @param str pointer to the string to analyze |
|
1357 * |
|
1358 * Parse a server subpart of an URI, it's a finer grain analysis |
|
1359 * of the authority part. |
|
1360 * |
|
1361 * server = [ [ userinfo "@" ] hostport ] |
|
1362 * userinfo = *( unreserved | escaped | |
|
1363 * ";" | ":" | "&" | "=" | "+" | "$" | "," ) |
|
1364 * hostport = host [ ":" port ] |
|
1365 * host = hostname | IPv4address |
|
1366 * hostname = *( domainlabel "." ) toplabel [ "." ] |
|
1367 * domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum |
|
1368 * toplabel = alpha | alpha *( alphanum | "-" ) alphanum |
|
1369 * IPv4address = 1*digit "." 1*digit "." 1*digit "." 1*digit |
|
1370 * port = *digit |
|
1371 * |
|
1372 * Returns 0 or the error code |
|
1373 * |
|
1374 * OOM: possible --> sets OOM flag and returns XML_ERR_NO_MEMORY |
|
1375 */ |
|
1376 static int |
|
1377 xmlParseURIServer(xmlURIPtr uri, const char **str) { |
|
1378 const char* cur; |
|
1379 const char* host; |
|
1380 const char* tmp; |
|
1381 const int IPmax = 4; |
|
1382 int oct; |
|
1383 |
|
1384 if (str == NULL) |
|
1385 return(-1); |
|
1386 |
|
1387 cur = *str; |
|
1388 |
|
1389 /* |
|
1390 * is there an userinfo ? |
|
1391 */ |
|
1392 while (isUserInfo(cur)) |
|
1393 { |
|
1394 NEXT(cur); |
|
1395 } |
|
1396 |
|
1397 if (*cur == '@') |
|
1398 { |
|
1399 if (uri != NULL) { |
|
1400 if (uri->user != NULL) |
|
1401 xmlFree(uri->user); |
|
1402 uri->user = xmlURIUnescapeString(*str, cur - *str, NULL); // may set OOM flag |
|
1403 if(!uri->user) |
|
1404 return XML_ERR_NO_MEMORY; |
|
1405 } |
|
1406 cur++; |
|
1407 } else { |
|
1408 if (uri != NULL) { |
|
1409 if (uri->user != NULL) |
|
1410 xmlFree(uri->user); |
|
1411 uri->user = NULL; |
|
1412 } |
|
1413 cur = *str; |
|
1414 } |
|
1415 /* |
|
1416 * This can be empty in the case where there is no server |
|
1417 */ |
|
1418 host = cur; |
|
1419 if (*cur == '/') { |
|
1420 if (uri != NULL) { |
|
1421 if (uri->authority != NULL) |
|
1422 xmlFree(uri->authority); |
|
1423 uri->authority = NULL; |
|
1424 if (uri->server != NULL) |
|
1425 xmlFree(uri->server); |
|
1426 uri->server = NULL; |
|
1427 uri->port = 0; |
|
1428 } |
|
1429 return(0); |
|
1430 } |
|
1431 /* |
|
1432 * host part of hostport can derive either an IPV4 address |
|
1433 * or an unresolved name. Check the IP first, it easier to detect |
|
1434 * errors if wrong one |
|
1435 */ |
|
1436 for (oct = 0; oct < IPmax; ++oct) { |
|
1437 if (*cur == '.') |
|
1438 return(3); /* e.g. http://.xml/ or http://18.29..30/ */ |
|
1439 while(IS_DIGIT(*cur)) |
|
1440 cur++; |
|
1441 if (oct == (IPmax-1)) |
|
1442 continue; |
|
1443 if (*cur != '.') |
|
1444 break; |
|
1445 cur++; |
|
1446 } |
|
1447 if (oct < IPmax || (*cur == '.' && cur++) || IS_ALPHA(*cur)) { |
|
1448 /* maybe host_name */ |
|
1449 if (!IS_ALPHANUM(*cur)) |
|
1450 return(4); /* e.g. http://xml.$oft */ |
|
1451 do { |
|
1452 do ++cur; while (IS_ALPHANUM(*cur)); |
|
1453 if (*cur == '-') { |
|
1454 --cur; |
|
1455 if (*cur == '.') |
|
1456 return(5); /* e.g. http://xml.-soft */ |
|
1457 ++cur; |
|
1458 continue; |
|
1459 } |
|
1460 if (*cur == '.') { |
|
1461 --cur; |
|
1462 if (*cur == '-') |
|
1463 return(6); /* e.g. http://xml-.soft */ |
|
1464 if (*cur == '.') |
|
1465 return(7); /* e.g. http://xml..soft */ |
|
1466 ++cur; |
|
1467 continue; |
|
1468 } |
|
1469 break; |
|
1470 } while (1); |
|
1471 |
|
1472 tmp = cur; |
|
1473 if (tmp[-1] == '.') |
|
1474 --tmp; /* e.g. http://xml.$Oft/ */ |
|
1475 do --tmp; while (tmp >= host && IS_ALPHANUM(*tmp)); |
|
1476 if ((++tmp == host || tmp[-1] == '.') && !IS_ALPHA(*tmp)) |
|
1477 return(8); /* e.g. http://xmlsOft.0rg/ */ |
|
1478 } |
|
1479 if (uri != NULL) { |
|
1480 if (uri->authority != NULL) |
|
1481 xmlFree(uri->authority); |
|
1482 uri->authority = NULL; |
|
1483 if (uri->server != NULL) |
|
1484 xmlFree(uri->server); |
|
1485 uri->server = xmlURIUnescapeString(host, cur - host, NULL); // may set OOM flag |
|
1486 if(!uri->server) |
|
1487 return XML_ERR_NO_MEMORY; |
|
1488 } |
|
1489 /* |
|
1490 * finish by checking for a port presence. |
|
1491 */ |
|
1492 if (*cur == ':') { |
|
1493 cur++; |
|
1494 if (IS_DIGIT(*cur)) { |
|
1495 if (uri != NULL) |
|
1496 uri->port = 0; |
|
1497 while (IS_DIGIT(*cur)) { |
|
1498 if (uri != NULL) |
|
1499 uri->port = uri->port * 10 + (*cur - '0'); |
|
1500 cur++; |
|
1501 } |
|
1502 } |
|
1503 } |
|
1504 *str = cur; |
|
1505 return(0); |
|
1506 } |
|
1507 |
|
1508 /** |
|
1509 * xmlParseURIRelSegment: |
|
1510 * @param uri pointer to an URI structure |
|
1511 * @param str pointer to the string to analyze |
|
1512 * |
|
1513 * Parse an URI relative segment |
|
1514 * |
|
1515 * rel_segment = 1*( unreserved | escaped | ";" | "@" | "&" | "=" | |
|
1516 * "+" | "$" | "," ) |
|
1517 * |
|
1518 * Returns 0 or the error code |
|
1519 * |
|
1520 * OOM: possible --> sets OOM and returns XML_ERR_NO_MEMORY |
|
1521 */ |
|
1522 static int |
|
1523 xmlParseURIRelSegment(xmlURIPtr uri, const char **str) |
|
1524 { |
|
1525 const char *cur; |
|
1526 |
|
1527 if (str == NULL) |
|
1528 return (-1); |
|
1529 |
|
1530 cur = *str; |
|
1531 if (!(isSegment(cur) || ((uri) && (uri->cleanup) && (isUnwise(*cur))))) { |
|
1532 return (3); |
|
1533 } |
|
1534 NEXT(cur); |
|
1535 while (isSegment(cur) || ((uri) && (uri->cleanup) && (isUnwise(*cur)))) |
|
1536 NEXT(cur); |
|
1537 if (uri != NULL) { |
|
1538 if (uri->path != NULL) |
|
1539 xmlFree(uri->path); |
|
1540 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL); // may set OOM flag |
|
1541 if(!uri->path) |
|
1542 return XML_ERR_NO_MEMORY; |
|
1543 } |
|
1544 *str = cur; |
|
1545 return (0); |
|
1546 } |
|
1547 |
|
1548 /** |
|
1549 * xmlParseURIPathSegments: |
|
1550 * @param uri pointer to an URI structure |
|
1551 * @param str pointer to the string to analyze |
|
1552 * @param slash should we add a leading slash |
|
1553 * |
|
1554 * Parse an URI set of path segments |
|
1555 * |
|
1556 * path_segments = segment *( "/" segment ) |
|
1557 * segment = *pchar *( ";" param ) |
|
1558 * param = *pchar |
|
1559 * |
|
1560 * Returns 0 or the error code |
|
1561 * |
|
1562 * OOM: possible --> sets OOM flag and returns XML_ERR_NO_MEMORY |
|
1563 */ |
|
1564 static int |
|
1565 xmlParseURIPathSegments(xmlURIPtr uri, const char **str, int slash) |
|
1566 { |
|
1567 const char *cur; |
|
1568 |
|
1569 if (str == NULL) |
|
1570 return (-1); |
|
1571 |
|
1572 cur = *str; |
|
1573 // XMLENGINE: BEGIN REPLACE |
|
1574 for(;;) { |
|
1575 // Replace while(1) for infinite loop |
|
1576 // do { |
|
1577 // XMLENGINE: END REPLACE |
|
1578 |
|
1579 while (isPChar(cur) || (uri && uri->cleanup && (isUnwise(*cur)))) |
|
1580 { |
|
1581 NEXT(cur); |
|
1582 } |
|
1583 while (*cur == ';') { |
|
1584 cur++; |
|
1585 while (isPChar(cur) || ((uri != NULL) && (uri->cleanup) && (isUnwise(*cur)))) |
|
1586 NEXT(cur); |
|
1587 } |
|
1588 if (*cur != '/') |
|
1589 break; |
|
1590 cur++; |
|
1591 // XMLENGINE: BEGIN REPLACE |
|
1592 } |
|
1593 //} while (1); |
|
1594 // XMLENGINE: END REPLACE |
|
1595 if (uri != NULL) { |
|
1596 int len, len2 = 0; |
|
1597 char *path; |
|
1598 |
|
1599 /* |
|
1600 * Concat the set of path segments to the current path |
|
1601 */ |
|
1602 len = cur - *str; |
|
1603 if (slash) |
|
1604 len++; |
|
1605 |
|
1606 if (uri->path != NULL) { |
|
1607 len2 = strlen(uri->path); |
|
1608 len += len2; |
|
1609 } |
|
1610 path = (char *) xmlMallocAtomic(len + 1); // may set OOM flag |
|
1611 if (path == NULL) { |
|
1612 xmlGenericError(xmlGenericErrorContext, EMBED_ERRTXT("xmlParseURIPathSegments: out of memory\n")); |
|
1613 *str = cur; |
|
1614 //return (-1); |
|
1615 return XML_ERR_NO_MEMORY; |
|
1616 } |
|
1617 if (uri->path != NULL) |
|
1618 memcpy(path, uri->path, len2); |
|
1619 if (slash) { |
|
1620 path[len2] = '/'; |
|
1621 len2++; |
|
1622 } |
|
1623 path[len2] = 0; |
|
1624 if (cur - *str > 0) |
|
1625 xmlURIUnescapeString(*str, cur - *str, &path[len2]); // MAY NOT set OOM flag !!! |
|
1626 if (uri->path != NULL) |
|
1627 xmlFree(uri->path); |
|
1628 uri->path = path; |
|
1629 } |
|
1630 *str = cur; |
|
1631 return (0); |
|
1632 } |
|
1633 |
|
1634 /** |
|
1635 * xmlParseURIAuthority: |
|
1636 * @param uri pointer to an URI structure |
|
1637 * @param str pointer to the string to analyze |
|
1638 * |
|
1639 * Parse the authority part of an URI. |
|
1640 * |
|
1641 * authority = server | reg_name |
|
1642 * server = [ [ userinfo "@" ] hostport ] |
|
1643 * reg_name = 1*( unreserved | escaped | "$" | "," | ";" | ":" | |
|
1644 * "@" | "&" | "=" | "+" ) |
|
1645 * |
|
1646 * Note : this is completely ambiguous since reg_name is allowed to |
|
1647 * use the full set of chars in use by server: |
|
1648 * |
|
1649 * 3.2.1. Registry-based Naming Authority |
|
1650 * |
|
1651 * The structure of a registry-based naming authority is specific |
|
1652 * to the URI scheme, but constrained to the allowed characters |
|
1653 * for an authority component. |
|
1654 * |
|
1655 * Returns 0 or the error code |
|
1656 * |
|
1657 * OOM: possible --> OOM flag is set, returns XML_ERR_NO_MEMORY |
|
1658 */ |
|
1659 static int |
|
1660 xmlParseURIAuthority(xmlURIPtr uri, const char** str) { |
|
1661 const char *cur; |
|
1662 int ret; |
|
1663 |
|
1664 if (str == NULL) |
|
1665 return(-1); |
|
1666 |
|
1667 cur = *str; |
|
1668 |
|
1669 /* |
|
1670 * try first to parse it as a server string. |
|
1671 */ |
|
1672 ret = xmlParseURIServer(uri, str); // may set OOM flag -- returns XML_ERR_NO_MEMORY |
|
1673 if ((ret == 0) && (*str != NULL) && |
|
1674 ((**str == 0) || (**str == '/') || (**str == '?'))) |
|
1675 return(0); |
|
1676 if(ret!=0) |
|
1677 return ret; // error happened, maybe OOM |
|
1678 *str = cur; |
|
1679 |
|
1680 /* |
|
1681 * failed, fallback to reg_name |
|
1682 */ |
|
1683 if (!isRegName(cur)) { |
|
1684 return(5); |
|
1685 } |
|
1686 NEXT(cur); |
|
1687 while (isRegName(cur)) |
|
1688 NEXT(cur); |
|
1689 |
|
1690 if (uri != NULL) { |
|
1691 if (uri->server != NULL) |
|
1692 xmlFree(uri->server); |
|
1693 uri->server = NULL; |
|
1694 if (uri->user != NULL) |
|
1695 xmlFree(uri->user); |
|
1696 uri->user = NULL; |
|
1697 if (uri->authority != NULL) |
|
1698 xmlFree(uri->authority); |
|
1699 uri->authority = xmlURIUnescapeString(*str, cur - *str, NULL); // may set OOM |
|
1700 if(!uri->authority) |
|
1701 return XML_ERR_NO_MEMORY; |
|
1702 } |
|
1703 *str = cur; |
|
1704 return(0); |
|
1705 } |
|
1706 |
|
1707 /** |
|
1708 * xmlParseURIHierPart: |
|
1709 * @param uri pointer to an URI structure |
|
1710 * @param str pointer to the string to analyze |
|
1711 * |
|
1712 * Parse an URI hierarchical part |
|
1713 * |
|
1714 * hier_part = ( net_path | abs_path ) [ "?" query ] |
|
1715 * abs_path = "/" path_segments |
|
1716 * net_path = "//" authority [ abs_path ] |
|
1717 * |
|
1718 * Returns 0 or the error code |
|
1719 * |
|
1720 * OOM: possible --> sets OOM flag and returns XML_ERR_NO_MEMORY |
|
1721 */ |
|
1722 static int |
|
1723 xmlParseURIHierPart(xmlURIPtr uri, const char** str) { |
|
1724 int ret; |
|
1725 const char* cur; |
|
1726 |
|
1727 if (!str) |
|
1728 return(-1); |
|
1729 |
|
1730 cur = *str; |
|
1731 |
|
1732 if ((cur[0] == '/') && (cur[1] == '/')) |
|
1733 { |
|
1734 cur += 2; |
|
1735 ret = xmlParseURIAuthority(uri, &cur); // may set OOM flag and return XML_ERR_NO_MEMORY |
|
1736 if (ret != 0) |
|
1737 return(ret); |
|
1738 |
|
1739 if (cur[0] == '/') { |
|
1740 cur++; |
|
1741 ret = xmlParseURIPathSegments(uri, &cur, 1); |
|
1742 } |
|
1743 } else if (cur[0] == '/') { |
|
1744 cur++; |
|
1745 ret = xmlParseURIPathSegments(uri, &cur, 1); |
|
1746 } else { |
|
1747 return(4); |
|
1748 } |
|
1749 if (ret != 0) |
|
1750 return(ret); // there was error, maybe OOM |
|
1751 |
|
1752 if (*cur == '?') { |
|
1753 cur++; |
|
1754 ret = xmlParseURIQuery(uri, &cur); // may set OOM flag |
|
1755 if (ret != 0) |
|
1756 return(ret); |
|
1757 } |
|
1758 *str = cur; |
|
1759 return(0); |
|
1760 } |
|
1761 |
|
1762 /** |
|
1763 * xmlParseAbsoluteURI: |
|
1764 * @param uri pointer to an URI structure |
|
1765 * @param str pointer to the string to analyze |
|
1766 * |
|
1767 * Parse an URI reference string and fills in the appropriate fields |
|
1768 * of the uri structure |
|
1769 * |
|
1770 * absoluteURI = scheme ":" ( hier_part | opaque_part ) |
|
1771 * |
|
1772 * Returns 0 or the error code |
|
1773 * |
|
1774 * OOM: possible --> sets OOM and returns XML_ERR_NO_MEMORY |
|
1775 */ |
|
1776 static int |
|
1777 xmlParseAbsoluteURI(xmlURIPtr uri, const char** str) { |
|
1778 int ret; |
|
1779 const char* cur; |
|
1780 |
|
1781 if (str == NULL) |
|
1782 return(-1); |
|
1783 |
|
1784 cur = *str; |
|
1785 |
|
1786 ret = xmlParseURIScheme(uri, str); // may set OOM and return XML_ERR_NO_MEMORY |
|
1787 if (ret != 0) return(ret); |
|
1788 |
|
1789 if (**str != ':') { |
|
1790 *str = cur; |
|
1791 return(1); |
|
1792 } |
|
1793 (*str)++; |
|
1794 if (**str == '/') |
|
1795 return(xmlParseURIHierPart(uri, str)); // may set OOM flag |
|
1796 return(xmlParseURIOpaquePart(uri, str)); //may set OOM flag |
|
1797 } |
|
1798 |
|
1799 /** |
|
1800 * xmlParseRelativeURI: |
|
1801 * @param uri pointer to an URI structure |
|
1802 * @param str pointer to the string to analyze |
|
1803 * |
|
1804 * Parse an relative URI string and fills in the appropriate fields |
|
1805 * of the uri structure |
|
1806 * |
|
1807 * relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ] |
|
1808 * abs_path = "/" path_segments |
|
1809 * net_path = "//" authority [ abs_path ] |
|
1810 * rel_path = rel_segment [ abs_path ] |
|
1811 * |
|
1812 * Returns 0 or the error code |
|
1813 * |
|
1814 * OOM: possible --> sets OOM flag and return XML_ERR_NO_MEMORY |
|
1815 */ |
|
1816 static int |
|
1817 xmlParseRelativeURI(xmlURIPtr uri, const char **str) { |
|
1818 int ret = 0; |
|
1819 const char *cur; |
|
1820 |
|
1821 if (str == NULL) |
|
1822 return(-1); |
|
1823 |
|
1824 cur = *str; |
|
1825 if ((cur[0] == '/') && (cur[1] == '/')) { |
|
1826 cur += 2; |
|
1827 |
|
1828 ret = xmlParseURIAuthority(uri, &cur); // may set OOM flag |
|
1829 if (ret != 0) |
|
1830 return(ret); |
|
1831 if (cur[0] == '/') { |
|
1832 cur++; |
|
1833 ret = xmlParseURIPathSegments(uri, &cur, 1); // may set OOM flag |
|
1834 } |
|
1835 } else if (cur[0] == '/') { |
|
1836 cur++; |
|
1837 ret = xmlParseURIPathSegments(uri, &cur, 1); // may set OOM flag |
|
1838 } else if (cur[0] != '#' && cur[0] != '?') { |
|
1839 ret = xmlParseURIRelSegment(uri, &cur); // may set OOM flag |
|
1840 if (ret != 0) |
|
1841 return(ret); |
|
1842 if (cur[0] == '/') { |
|
1843 cur++; |
|
1844 ret = xmlParseURIPathSegments(uri, &cur, 1); // may set OOM flag |
|
1845 } |
|
1846 } |
|
1847 if (ret != 0) |
|
1848 return(ret); |
|
1849 if (*cur == '?') { |
|
1850 cur++; |
|
1851 ret = xmlParseURIQuery(uri, &cur); // may set OOM flag |
|
1852 if (ret != 0) |
|
1853 return(ret); |
|
1854 } |
|
1855 *str = cur; |
|
1856 return(ret); |
|
1857 } |
|
1858 |
|
1859 /** |
|
1860 * xmlParseURIReference: |
|
1861 * @param uri pointer to an URI structure |
|
1862 * @param str the string to analyze |
|
1863 * |
|
1864 * Parse an URI reference string and fills in the appropriate fields |
|
1865 * of the uri structure |
|
1866 * |
|
1867 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ] |
|
1868 * |
|
1869 * Returns 0 or the error code |
|
1870 * |
|
1871 * OOM: possible --> sets OOM flag and returns XML_ERR_NO_MEMORY |
|
1872 */ |
|
1873 XMLPUBFUNEXPORT int |
|
1874 xmlParseURIReference(xmlURIPtr uri, const char *str) { |
|
1875 int ret; |
|
1876 const char *tmp = str; |
|
1877 |
|
1878 if (!str) |
|
1879 return(-1); |
|
1880 xmlCleanURI(uri); |
|
1881 |
|
1882 /* |
|
1883 * Try first to parse absolute refs, then fallback to relative if |
|
1884 * it fails. |
|
1885 */ |
|
1886 ret = xmlParseAbsoluteURI(uri, &str); // may set OOM flag |
|
1887 if (ret == XML_ERR_NO_MEMORY) |
|
1888 return ret; |
|
1889 |
|
1890 if (ret != 0) { // No, it is not an absolute URI, try it as a relative one... |
|
1891 xmlCleanURI(uri); |
|
1892 str = tmp; |
|
1893 ret = xmlParseRelativeURI(uri, &str); // may set OOM flag |
|
1894 if (ret == XML_ERR_NO_MEMORY) |
|
1895 return ret; |
|
1896 } |
|
1897 |
|
1898 if (ret != 0) { |
|
1899 xmlCleanURI(uri); |
|
1900 return(ret); |
|
1901 } |
|
1902 |
|
1903 if (*str == '#') { |
|
1904 str++; |
|
1905 ret = xmlParseURIFragment(uri, &str); // may set OOM flag |
|
1906 if (ret != 0) return(ret); |
|
1907 } |
|
1908 if (*str != 0) { |
|
1909 xmlCleanURI(uri); |
|
1910 return(1); |
|
1911 } |
|
1912 return(0); |
|
1913 } |
|
1914 |
|
1915 /** |
|
1916 * xmlParseURI: |
|
1917 * @param str the URI string to analyze |
|
1918 * |
|
1919 * Parse an URI |
|
1920 * |
|
1921 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ] |
|
1922 * |
|
1923 * Returns a newly build xmlURIPtr or NULL in case of error |
|
1924 * |
|
1925 * OOM: possible --> returns NULL for uri!=NULL |
|
1926 */ |
|
1927 XMLPUBFUNEXPORT xmlURIPtr |
|
1928 xmlParseURI(const char *str) |
|
1929 { |
|
1930 xmlURIPtr uri; |
|
1931 int ret; |
|
1932 |
|
1933 if (!str) |
|
1934 return(NULL); |
|
1935 uri = xmlCreateURI(); |
|
1936 if (uri) { |
|
1937 ret = xmlParseURIReference(uri, str); |
|
1938 if (ret) { |
|
1939 xmlCleanURI(uri); |
|
1940 xmlFreeURI(uri); |
|
1941 return(NULL); |
|
1942 } |
|
1943 } |
|
1944 return(uri); |
|
1945 } |
|
1946 |
|
1947 /************************************************************************ |
|
1948 * * |
|
1949 * Public functions * |
|
1950 * * |
|
1951 ************************************************************************/ |
|
1952 |
|
1953 /** |
|
1954 * xmlBuildURI: |
|
1955 * @param URI the URI instance found in the document |
|
1956 * @param base the base value |
|
1957 * |
|
1958 * Computes he final URI of the reference done by checking that |
|
1959 * the given URI is valid, and building the final URI using the |
|
1960 * base URI. This is processed according to section 5.2 of the |
|
1961 * RFC 2396 |
|
1962 * |
|
1963 * 5.2. Resolving Relative References to Absolute Form |
|
1964 * |
|
1965 * Returns a new URI string (to be freed by the caller) or NULL in case |
|
1966 * of error. |
|
1967 * |
|
1968 * OOM: |
|
1969 */ |
|
1970 XMLPUBFUNEXPORT xmlChar* |
|
1971 xmlBuildURI(const xmlChar *URI, const xmlChar *base) { |
|
1972 xmlChar *val = NULL; |
|
1973 int ret, len, indx, cur, out; |
|
1974 xmlURIPtr ref = NULL; |
|
1975 xmlURIPtr bas = NULL; |
|
1976 xmlURIPtr res = NULL; |
|
1977 |
|
1978 /* |
|
1979 * 1) The URI reference is parsed into the potential four components and |
|
1980 * fragment identifier, as described in Section 4.3. |
|
1981 * |
|
1982 * NOTE that a completely empty URI is treated by modern browsers |
|
1983 * as a reference to "." rather than as a synonym for the current |
|
1984 * URI. Should we do that here? |
|
1985 */ |
|
1986 if (!URI) |
|
1987 ret = -1; |
|
1988 else { |
|
1989 if (*URI) { |
|
1990 ref = xmlCreateURI(); |
|
1991 if (ref == NULL) |
|
1992 goto done; |
|
1993 ret = xmlParseURIReference(ref, (const char*) URI); |
|
1994 } |
|
1995 else |
|
1996 ret = 0; |
|
1997 } |
|
1998 if (ret != 0) |
|
1999 goto done; |
|
2000 if ((ref != NULL) && (ref->scheme != NULL)) { |
|
2001 /* |
|
2002 * The URI is absolute don't modify. |
|
2003 */ |
|
2004 val = xmlStrdup(URI); |
|
2005 goto done; |
|
2006 } |
|
2007 if (base == NULL) |
|
2008 ret = -1; |
|
2009 else { |
|
2010 bas = xmlCreateURI(); |
|
2011 if (bas == NULL) |
|
2012 goto done; |
|
2013 ret = xmlParseURIReference(bas, (const char *) base); |
|
2014 } |
|
2015 if (ret != 0) { |
|
2016 if (ref) |
|
2017 val = xmlSaveUri(ref); |
|
2018 goto done; |
|
2019 } |
|
2020 if (ref == NULL) { |
|
2021 /* |
|
2022 * the base fragment must be ignored |
|
2023 */ |
|
2024 if (bas->fragment != NULL) { |
|
2025 xmlFree(bas->fragment); |
|
2026 bas->fragment = NULL; |
|
2027 } |
|
2028 val = xmlSaveUri(bas); |
|
2029 goto done; |
|
2030 } |
|
2031 |
|
2032 /* |
|
2033 * 2) If the path component is empty and the scheme, authority, and |
|
2034 * query components are undefined, then it is a reference to the |
|
2035 * current document and we are done. Otherwise, the reference URI's |
|
2036 * query and fragment components are defined as found (or not found) |
|
2037 * within the URI reference and not inherited from the base URI. |
|
2038 * |
|
2039 * NOTE that in modern browsers, the parsing differs from the above |
|
2040 * in the following aspect: the query component is allowed to be |
|
2041 * defined while still treating this as a reference to the current |
|
2042 * document. |
|
2043 */ |
|
2044 res = xmlCreateURI(); |
|
2045 if (res == NULL) |
|
2046 goto done; |
|
2047 if ((ref->scheme == NULL) && |
|
2048 (ref->path == NULL) && |
|
2049 (ref->authority == NULL)&& |
|
2050 (ref->server == NULL)) |
|
2051 { |
|
2052 if (bas->scheme != NULL) |
|
2053 res->scheme = xmlMemStrdup(bas->scheme); |
|
2054 if (bas->authority != NULL) |
|
2055 res->authority = xmlMemStrdup(bas->authority); |
|
2056 else if (bas->server != NULL) { |
|
2057 res->server = xmlMemStrdup(bas->server); |
|
2058 if (bas->user != NULL) |
|
2059 res->user = xmlMemStrdup(bas->user); |
|
2060 res->port = bas->port; |
|
2061 } |
|
2062 if (bas->path != NULL) |
|
2063 res->path = xmlMemStrdup(bas->path); |
|
2064 if (ref->query != NULL) |
|
2065 res->query = xmlMemStrdup(ref->query); |
|
2066 else if (bas->query != NULL) |
|
2067 res->query = xmlMemStrdup(bas->query); |
|
2068 if (ref->fragment != NULL) |
|
2069 res->fragment = xmlMemStrdup(ref->fragment); |
|
2070 goto step_7; |
|
2071 } |
|
2072 |
|
2073 /* |
|
2074 * 3) If the scheme component is defined, indicating that the reference |
|
2075 * starts with a scheme name, then the reference is interpreted as an |
|
2076 * absolute URI and we are done. Otherwise, the reference URI's |
|
2077 * scheme is inherited from the base URI's scheme component. |
|
2078 */ |
|
2079 if (ref->scheme != NULL) { |
|
2080 val = xmlSaveUri(ref); |
|
2081 goto done; |
|
2082 } |
|
2083 if (bas->scheme != NULL) |
|
2084 res->scheme = xmlMemStrdup(bas->scheme); |
|
2085 |
|
2086 if (ref->query != NULL) |
|
2087 res->query = xmlMemStrdup(ref->query); |
|
2088 if (ref->fragment != NULL) |
|
2089 res->fragment = xmlMemStrdup(ref->fragment); |
|
2090 |
|
2091 /* |
|
2092 * 4) If the authority component is defined, then the reference is a |
|
2093 * network-path and we skip to step 7. Otherwise, the reference |
|
2094 * URI's authority is inherited from the base URI's authority |
|
2095 * component, which will also be undefined if the URI scheme does not |
|
2096 * use an authority component. |
|
2097 */ |
|
2098 if ((ref->authority != NULL) || (ref->server != NULL)) { |
|
2099 if (ref->authority != NULL) |
|
2100 res->authority = xmlMemStrdup(ref->authority); |
|
2101 else { |
|
2102 res->server = xmlMemStrdup(ref->server); |
|
2103 if (ref->user != NULL) |
|
2104 res->user = xmlMemStrdup(ref->user); |
|
2105 res->port = ref->port; |
|
2106 } |
|
2107 if (ref->path != NULL) |
|
2108 res->path = xmlMemStrdup(ref->path); |
|
2109 goto step_7; |
|
2110 } |
|
2111 if (bas->authority != NULL) |
|
2112 res->authority = xmlMemStrdup(bas->authority); |
|
2113 else if (bas->server != NULL) { |
|
2114 res->server = xmlMemStrdup(bas->server); |
|
2115 if (bas->user != NULL) |
|
2116 res->user = xmlMemStrdup(bas->user); |
|
2117 res->port = bas->port; |
|
2118 } |
|
2119 |
|
2120 /* |
|
2121 * 5) If the path component begins with a slash character ("/"), then |
|
2122 * the reference is an absolute-path and we skip to step 7. |
|
2123 */ |
|
2124 if ((ref->path != NULL) && (ref->path[0] == '/')) { |
|
2125 res->path = xmlMemStrdup(ref->path); |
|
2126 goto step_7; |
|
2127 } |
|
2128 |
|
2129 |
|
2130 /* |
|
2131 * 6) If this step is reached, then we are resolving a relative-path |
|
2132 * reference. The relative path needs to be merged with the base |
|
2133 * URI's path. Although there are many ways to do this, we will |
|
2134 * describe a simple method using a separate string buffer. |
|
2135 * |
|
2136 * Allocate a buffer large enough for the result string. |
|
2137 */ |
|
2138 len = 2; /* extra / and 0 */ |
|
2139 if (ref->path != NULL) |
|
2140 len += strlen(ref->path); |
|
2141 if (bas->path != NULL) |
|
2142 len += strlen(bas->path); |
|
2143 res->path = (char *) xmlMallocAtomic(len); |
|
2144 if (res->path == NULL) { |
|
2145 xmlGenericError(xmlGenericErrorContext, EMBED_ERRTXT("xmlBuildURI: out of memory\n")); |
|
2146 goto done; |
|
2147 } |
|
2148 res->path[0] = 0; |
|
2149 |
|
2150 /* |
|
2151 * a) All but the last segment of the base URI's path component is |
|
2152 * copied to the buffer. In other words, any characters after the |
|
2153 * last (right-most) slash character, if any, are excluded. |
|
2154 */ |
|
2155 cur = 0; |
|
2156 out = 0; |
|
2157 if (bas->path != NULL) { |
|
2158 while (bas->path[cur] != 0) { |
|
2159 while ((bas->path[cur] != 0) && (bas->path[cur] != '/')) |
|
2160 cur++; |
|
2161 if (bas->path[cur] == 0) |
|
2162 break; |
|
2163 |
|
2164 cur++; |
|
2165 while (out < cur) { |
|
2166 res->path[out] = bas->path[out]; |
|
2167 out++; |
|
2168 } |
|
2169 } |
|
2170 } |
|
2171 res->path[out] = 0; |
|
2172 |
|
2173 /* |
|
2174 * b) The reference's path component is appended to the buffer |
|
2175 * string. |
|
2176 */ |
|
2177 if (ref->path != NULL && ref->path[0] != 0) { |
|
2178 indx = 0; |
|
2179 /* |
|
2180 * Ensure the path includes a '/' |
|
2181 */ |
|
2182 if ((out == 0) && (bas->server != NULL)) |
|
2183 res->path[out++] = '/'; |
|
2184 while (ref->path[indx] != 0) { |
|
2185 res->path[out++] = ref->path[indx++]; |
|
2186 } |
|
2187 } |
|
2188 res->path[out] = 0; |
|
2189 |
|
2190 /* |
|
2191 * Steps c) to h) are really path normalization steps |
|
2192 */ |
|
2193 xmlNormalizeURIPath(res->path); |
|
2194 |
|
2195 step_7: |
|
2196 |
|
2197 /* |
|
2198 * 7) The resulting URI components, including any inherited from the |
|
2199 * base URI, are recombined to give the absolute form of the URI |
|
2200 * reference. |
|
2201 */ |
|
2202 val = xmlSaveUri(res); |
|
2203 |
|
2204 done: |
|
2205 if (ref) |
|
2206 xmlFreeURI(ref); |
|
2207 if (bas) |
|
2208 xmlFreeURI(bas); |
|
2209 if (res) |
|
2210 xmlFreeURI(res); |
|
2211 return(val); |
|
2212 } |
|
2213 |
|
2214 /** |
|
2215 * xmlCanonicPath: |
|
2216 * @param path the resource locator in a filesystem notation |
|
2217 * |
|
2218 * Constructs a canonic path from the specified path. |
|
2219 * |
|
2220 * Returns a new canonic path, or a duplicate of the path parameter if the |
|
2221 * construction fails. The caller is responsible for freeing the memory occupied |
|
2222 * by the returned string. If there is insufficient memory available, or the |
|
2223 * argument is NULL, the function returns NULL. |
|
2224 * |
|
2225 * OOM: |
|
2226 */ |
|
2227 #define IS_WINDOWS_PATH(p) \ |
|
2228 ((p != NULL) && \ |
|
2229 (((p[0] >= 'a') && (p[0] <= 'z')) || \ |
|
2230 ((p[0] >= 'A') && (p[0] <= 'Z'))) && \ |
|
2231 (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\'))) |
|
2232 //ISSUE: Not finished reviewing for OOM handling / O.K.: 16.05.05 |
|
2233 XMLPUBFUNEXPORT xmlChar* |
|
2234 xmlCanonicPath(const xmlChar *path) |
|
2235 { |
|
2236 #if (defined(_WIN32)||defined(__SYMBIAN32__)) && !defined(__CYGWIN__) |
|
2237 int len = 0; |
|
2238 //int i = 0; |
|
2239 xmlChar *p = NULL; |
|
2240 #endif |
|
2241 xmlChar *ret; |
|
2242 xmlURIPtr uri; |
|
2243 |
|
2244 // DO NOT REMOVE this check |
|
2245 if (path == NULL) |
|
2246 return(NULL); |
|
2247 |
|
2248 if ((uri = xmlParseURI((const char *) path)) != NULL) |
|
2249 { |
|
2250 xmlFreeURI(uri); |
|
2251 return xmlStrdup(path); |
|
2252 } |
|
2253 // It's should be OOM already!!! // ISSUE: Not finished work / OK: 16.05.05 |
|
2254 uri = xmlCreateURI(); |
|
2255 if (uri == NULL) { |
|
2256 return(NULL); |
|
2257 } |
|
2258 |
|
2259 #if (defined(_WIN32)||defined(__SYMBIAN32__)) && !defined(__CYGWIN__) |
|
2260 len = xmlStrlen(path); |
|
2261 if ((len > 2) && IS_WINDOWS_PATH(path)) { |
|
2262 uri->scheme = (char*) xmlStrdup(BAD_CAST "file"); |
|
2263 uri->path = (char*) xmlMallocAtomic(len + 2); |
|
2264 uri->path[0] = '/'; |
|
2265 p = (xmlChar*) uri->path + 1; |
|
2266 strncpy((char*)p, (char*)path, len + 1); |
|
2267 } else { |
|
2268 uri->path = (char*) xmlStrdup(path); |
|
2269 p = (xmlChar*) uri->path; |
|
2270 } |
|
2271 while (*p != '\0') { |
|
2272 if (*p == '\\') |
|
2273 *p = '/'; |
|
2274 p++; |
|
2275 } |
|
2276 #else |
|
2277 uri->path = (char *) xmlStrdup((const xmlChar *) path); |
|
2278 #endif |
|
2279 |
|
2280 ret = xmlSaveUri(uri); |
|
2281 xmlFreeURI(uri); |
|
2282 return(ret); |
|
2283 } |