|
1 /* gmarkup.c - Simple XML-like parser |
|
2 * |
|
3 * Copyright 2000, 2003 Red Hat, Inc. |
|
4 * Portions copyright (c) 2006 Nokia Corporation. All rights reserved. |
|
5 * |
|
6 * GLib is free software; you can redistribute it and/or modify it |
|
7 * under the terms of the GNU Lesser General Public License as |
|
8 * published by the Free Software Foundation; either version 2 of the |
|
9 * License, or (at your option) any later version. |
|
10 * |
|
11 * GLib is distributed in the hope that it will be useful, |
|
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
14 * Lesser General Public License for more details. |
|
15 * |
|
16 * You should have received a copy of the GNU Lesser General Public |
|
17 * License along with GLib; see the file COPYING.LIB. If not, |
|
18 * write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, |
|
19 * Boston, MA 02111-1307, USA. |
|
20 */ |
|
21 |
|
22 #include "config.h" |
|
23 |
|
24 #include <stdarg.h> |
|
25 #include <string.h> |
|
26 #include <stdio.h> |
|
27 #include <stdlib.h> |
|
28 #include <errno.h> |
|
29 |
|
30 #include "glib.h" |
|
31 #include "galias.h" |
|
32 |
|
33 #include "glibintl.h" |
|
34 |
|
35 #ifdef __SYMBIAN32__ |
|
36 #include <glib_wsd.h> |
|
37 #endif |
|
38 |
|
39 |
|
40 #if EMULATOR |
|
41 |
|
42 PLS(error_quark ,g_markup_error_quark,GQuark) |
|
43 #define error_quark (*FUNCTION_NAME(error_quark ,g_markup_error_quark)()) |
|
44 |
|
45 #endif /* EMULATOR */ |
|
46 |
|
47 |
|
48 EXPORT_C GQuark |
|
49 g_markup_error_quark (void) |
|
50 { |
|
51 #if !(EMULATOR) |
|
52 static GQuark error_quark = 0; |
|
53 #endif /*EMULATOR */ |
|
54 |
|
55 if (error_quark == 0) |
|
56 error_quark = g_quark_from_static_string ("g-markup-error-quark"); |
|
57 |
|
58 return error_quark; |
|
59 } |
|
60 |
|
61 #if EMULATOR |
|
62 #undef error_quark |
|
63 #endif /* EMULATOR */ |
|
64 |
|
65 typedef enum |
|
66 { |
|
67 STATE_START, |
|
68 STATE_AFTER_OPEN_ANGLE, |
|
69 STATE_AFTER_CLOSE_ANGLE, |
|
70 STATE_AFTER_ELISION_SLASH, /* the slash that obviates need for end element */ |
|
71 STATE_INSIDE_OPEN_TAG_NAME, |
|
72 STATE_INSIDE_ATTRIBUTE_NAME, |
|
73 STATE_AFTER_ATTRIBUTE_NAME, |
|
74 STATE_BETWEEN_ATTRIBUTES, |
|
75 STATE_AFTER_ATTRIBUTE_EQUALS_SIGN, |
|
76 STATE_INSIDE_ATTRIBUTE_VALUE_SQ, |
|
77 STATE_INSIDE_ATTRIBUTE_VALUE_DQ, |
|
78 STATE_INSIDE_TEXT, |
|
79 STATE_AFTER_CLOSE_TAG_SLASH, |
|
80 STATE_INSIDE_CLOSE_TAG_NAME, |
|
81 STATE_AFTER_CLOSE_TAG_NAME, |
|
82 STATE_INSIDE_PASSTHROUGH, |
|
83 STATE_ERROR |
|
84 } GMarkupParseState; |
|
85 |
|
86 struct _GMarkupParseContext |
|
87 { |
|
88 const GMarkupParser *parser; |
|
89 |
|
90 GMarkupParseFlags flags; |
|
91 |
|
92 gint line_number; |
|
93 gint char_number; |
|
94 |
|
95 gpointer user_data; |
|
96 GDestroyNotify dnotify; |
|
97 |
|
98 /* A piece of character data or an element that |
|
99 * hasn't "ended" yet so we haven't yet called |
|
100 * the callback for it. |
|
101 */ |
|
102 GString *partial_chunk; |
|
103 |
|
104 GMarkupParseState state; |
|
105 GSList *tag_stack; |
|
106 gchar **attr_names; |
|
107 gchar **attr_values; |
|
108 gint cur_attr; |
|
109 gint alloc_attrs; |
|
110 |
|
111 const gchar *current_text; |
|
112 gssize current_text_len; |
|
113 const gchar *current_text_end; |
|
114 |
|
115 GString *leftover_char_portion; |
|
116 |
|
117 /* used to save the start of the last interesting thingy */ |
|
118 const gchar *start; |
|
119 |
|
120 const gchar *iter; |
|
121 |
|
122 guint document_empty : 1; |
|
123 guint parsing : 1; |
|
124 gint balance; |
|
125 }; |
|
126 |
|
127 /** |
|
128 * g_markup_parse_context_new: |
|
129 * @parser: a #GMarkupParser |
|
130 * @flags: one or more #GMarkupParseFlags |
|
131 * @user_data: user data to pass to #GMarkupParser functions |
|
132 * @user_data_dnotify: user data destroy notifier called when the parse context is freed |
|
133 * |
|
134 * Creates a new parse context. A parse context is used to parse |
|
135 * marked-up documents. You can feed any number of documents into |
|
136 * a context, as long as no errors occur; once an error occurs, |
|
137 * the parse context can't continue to parse text (you have to free it |
|
138 * and create a new parse context). |
|
139 * |
|
140 * Return value: a new #GMarkupParseContext |
|
141 **/ |
|
142 EXPORT_C GMarkupParseContext * |
|
143 g_markup_parse_context_new (const GMarkupParser *parser, |
|
144 GMarkupParseFlags flags, |
|
145 gpointer user_data, |
|
146 GDestroyNotify user_data_dnotify) |
|
147 { |
|
148 GMarkupParseContext *context; |
|
149 |
|
150 g_return_val_if_fail (parser != NULL, NULL); |
|
151 context = g_new (GMarkupParseContext, 1); |
|
152 |
|
153 context->parser = parser; |
|
154 context->flags = flags; |
|
155 context->user_data = user_data; |
|
156 context->dnotify = user_data_dnotify; |
|
157 |
|
158 context->line_number = 1; |
|
159 context->char_number = 1; |
|
160 |
|
161 context->partial_chunk = NULL; |
|
162 |
|
163 context->state = STATE_START; |
|
164 context->tag_stack = NULL; |
|
165 context->attr_names = NULL; |
|
166 context->attr_values = NULL; |
|
167 context->cur_attr = -1; |
|
168 context->alloc_attrs = 0; |
|
169 |
|
170 context->current_text = NULL; |
|
171 context->current_text_len = -1; |
|
172 context->current_text_end = NULL; |
|
173 context->leftover_char_portion = NULL; |
|
174 |
|
175 context->start = NULL; |
|
176 context->iter = NULL; |
|
177 |
|
178 context->document_empty = TRUE; |
|
179 context->parsing = FALSE; |
|
180 |
|
181 context->balance = 0; |
|
182 |
|
183 return context; |
|
184 } |
|
185 |
|
186 /** |
|
187 * g_markup_parse_context_free: |
|
188 * @context: a #GMarkupParseContext |
|
189 * |
|
190 * Frees a #GMarkupParseContext. Can't be called from inside |
|
191 * one of the #GMarkupParser functions. |
|
192 * |
|
193 **/ |
|
194 EXPORT_C void |
|
195 g_markup_parse_context_free (GMarkupParseContext *context) |
|
196 { |
|
197 g_return_if_fail (context != NULL); |
|
198 g_return_if_fail (!context->parsing); |
|
199 |
|
200 if (context->dnotify) |
|
201 (* context->dnotify) (context->user_data); |
|
202 |
|
203 g_strfreev (context->attr_names); |
|
204 g_strfreev (context->attr_values); |
|
205 |
|
206 g_slist_foreach (context->tag_stack, (GFunc)g_free, NULL); |
|
207 g_slist_free (context->tag_stack); |
|
208 |
|
209 if (context->partial_chunk) |
|
210 g_string_free (context->partial_chunk, TRUE); |
|
211 |
|
212 if (context->leftover_char_portion) |
|
213 g_string_free (context->leftover_char_portion, TRUE); |
|
214 |
|
215 g_free (context); |
|
216 } |
|
217 |
|
218 static void |
|
219 mark_error (GMarkupParseContext *context, |
|
220 GError *error) |
|
221 { |
|
222 context->state = STATE_ERROR; |
|
223 |
|
224 if (context->parser->error) |
|
225 (*context->parser->error) (context, error, context->user_data); |
|
226 } |
|
227 |
|
228 static void set_error (GMarkupParseContext *context, |
|
229 GError **error, |
|
230 GMarkupError code, |
|
231 const gchar *format, |
|
232 ...) G_GNUC_PRINTF (4, 5); |
|
233 |
|
234 static void |
|
235 set_error (GMarkupParseContext *context, |
|
236 GError **error, |
|
237 GMarkupError code, |
|
238 const gchar *format, |
|
239 ...) |
|
240 { |
|
241 GError *tmp_error; |
|
242 gchar *s; |
|
243 va_list args; |
|
244 |
|
245 va_start (args, format); |
|
246 s = g_strdup_vprintf (format, args); |
|
247 va_end (args); |
|
248 |
|
249 tmp_error = g_error_new (G_MARKUP_ERROR, |
|
250 code, |
|
251 _("Error on line %d char %d: %s"), |
|
252 context->line_number, |
|
253 context->char_number, |
|
254 s); |
|
255 |
|
256 g_free (s); |
|
257 |
|
258 mark_error (context, tmp_error); |
|
259 |
|
260 g_propagate_error (error, tmp_error); |
|
261 } |
|
262 |
|
263 |
|
264 /* To make these faster, we first use the ascii-only tests, then check |
|
265 * for the usual non-alnum name-end chars, and only then call the |
|
266 * expensive unicode stuff. Nobody uses non-ascii in XML tag/attribute |
|
267 * names, so this is a reasonable hack that virtually always avoids |
|
268 * the guniprop call. |
|
269 */ |
|
270 #define IS_COMMON_NAME_END_CHAR(c) \ |
|
271 ((c) == '=' || (c) == '/' || (c) == '>' || (c) == ' ') |
|
272 |
|
273 static gboolean |
|
274 is_name_start_char (const gchar *p) |
|
275 { |
|
276 if (g_ascii_isalpha (*p) || |
|
277 (!IS_COMMON_NAME_END_CHAR (*p) && |
|
278 (*p == '_' || |
|
279 *p == ':' || |
|
280 g_unichar_isalpha (g_utf8_get_char (p))))) |
|
281 return TRUE; |
|
282 else |
|
283 return FALSE; |
|
284 } |
|
285 |
|
286 static gboolean |
|
287 is_name_char (const gchar *p) |
|
288 { |
|
289 if (g_ascii_isalnum (*p) || |
|
290 (!IS_COMMON_NAME_END_CHAR (*p) && |
|
291 (*p == '.' || |
|
292 *p == '-' || |
|
293 *p == '_' || |
|
294 *p == ':' || |
|
295 g_unichar_isalpha (g_utf8_get_char (p))))) |
|
296 return TRUE; |
|
297 else |
|
298 return FALSE; |
|
299 } |
|
300 |
|
301 |
|
302 static gchar* |
|
303 char_str (gunichar c, |
|
304 gchar *buf) |
|
305 { |
|
306 memset (buf, 0, 8); |
|
307 g_unichar_to_utf8 (c, buf); |
|
308 return buf; |
|
309 } |
|
310 |
|
311 static gchar* |
|
312 utf8_str (const gchar *utf8, |
|
313 gchar *buf) |
|
314 { |
|
315 char_str (g_utf8_get_char (utf8), buf); |
|
316 return buf; |
|
317 } |
|
318 |
|
319 static void |
|
320 set_unescape_error (GMarkupParseContext *context, |
|
321 GError **error, |
|
322 const gchar *remaining_text, |
|
323 const gchar *remaining_text_end, |
|
324 GMarkupError code, |
|
325 const gchar *format, |
|
326 ...) |
|
327 { |
|
328 GError *tmp_error; |
|
329 gchar *s; |
|
330 va_list args; |
|
331 gint remaining_newlines; |
|
332 const gchar *p; |
|
333 |
|
334 remaining_newlines = 0; |
|
335 p = remaining_text; |
|
336 while (p != remaining_text_end) |
|
337 { |
|
338 if (*p == '\n') |
|
339 ++remaining_newlines; |
|
340 ++p; |
|
341 } |
|
342 |
|
343 va_start (args, format); |
|
344 s = g_strdup_vprintf (format, args); |
|
345 va_end (args); |
|
346 |
|
347 tmp_error = g_error_new (G_MARKUP_ERROR, |
|
348 code, |
|
349 _("Error on line %d: %s"), |
|
350 context->line_number - remaining_newlines, |
|
351 s); |
|
352 |
|
353 g_free (s); |
|
354 |
|
355 mark_error (context, tmp_error); |
|
356 |
|
357 g_propagate_error (error, tmp_error); |
|
358 } |
|
359 |
|
360 typedef enum |
|
361 { |
|
362 USTATE_INSIDE_TEXT, |
|
363 USTATE_AFTER_AMPERSAND, |
|
364 USTATE_INSIDE_ENTITY_NAME, |
|
365 USTATE_AFTER_CHARREF_HASH |
|
366 } UnescapeState; |
|
367 |
|
368 typedef struct |
|
369 { |
|
370 GMarkupParseContext *context; |
|
371 GString *str; |
|
372 UnescapeState state; |
|
373 const gchar *text; |
|
374 const gchar *text_end; |
|
375 const gchar *entity_start; |
|
376 } UnescapeContext; |
|
377 |
|
378 static const gchar* |
|
379 unescape_text_state_inside_text (UnescapeContext *ucontext, |
|
380 const gchar *p, |
|
381 GError **error) |
|
382 { |
|
383 const gchar *start; |
|
384 gboolean normalize_attribute; |
|
385 |
|
386 if (ucontext->context->state == STATE_INSIDE_ATTRIBUTE_VALUE_SQ || |
|
387 ucontext->context->state == STATE_INSIDE_ATTRIBUTE_VALUE_DQ) |
|
388 normalize_attribute = TRUE; |
|
389 else |
|
390 normalize_attribute = FALSE; |
|
391 |
|
392 start = p; |
|
393 |
|
394 while (p != ucontext->text_end) |
|
395 { |
|
396 if (*p == '&') |
|
397 { |
|
398 break; |
|
399 } |
|
400 else if (normalize_attribute && (*p == '\t' || *p == '\n')) |
|
401 { |
|
402 g_string_append_len (ucontext->str, start, p - start); |
|
403 g_string_append_c (ucontext->str, ' '); |
|
404 p = g_utf8_next_char (p); |
|
405 start = p; |
|
406 } |
|
407 else if (*p == '\r') |
|
408 { |
|
409 g_string_append_len (ucontext->str, start, p - start); |
|
410 g_string_append_c (ucontext->str, normalize_attribute ? ' ' : '\n'); |
|
411 p = g_utf8_next_char (p); |
|
412 if (p != ucontext->text_end && *p == '\n') |
|
413 p = g_utf8_next_char (p); |
|
414 start = p; |
|
415 } |
|
416 else |
|
417 p = g_utf8_next_char (p); |
|
418 } |
|
419 |
|
420 if (p != start) |
|
421 g_string_append_len (ucontext->str, start, p - start); |
|
422 |
|
423 if (p != ucontext->text_end && *p == '&') |
|
424 { |
|
425 p = g_utf8_next_char (p); |
|
426 ucontext->state = USTATE_AFTER_AMPERSAND; |
|
427 } |
|
428 |
|
429 return p; |
|
430 } |
|
431 |
|
432 static const gchar* |
|
433 unescape_text_state_after_ampersand (UnescapeContext *ucontext, |
|
434 const gchar *p, |
|
435 GError **error) |
|
436 { |
|
437 ucontext->entity_start = NULL; |
|
438 |
|
439 if (*p == '#') |
|
440 { |
|
441 p = g_utf8_next_char (p); |
|
442 |
|
443 ucontext->entity_start = p; |
|
444 ucontext->state = USTATE_AFTER_CHARREF_HASH; |
|
445 } |
|
446 else if (!is_name_start_char (p)) |
|
447 { |
|
448 if (*p == ';') |
|
449 { |
|
450 set_unescape_error (ucontext->context, error, |
|
451 p, ucontext->text_end, |
|
452 G_MARKUP_ERROR_PARSE, |
|
453 _("Empty entity '&;' seen; valid " |
|
454 "entities are: & " < > '")); |
|
455 } |
|
456 else |
|
457 { |
|
458 gchar buf[8]; |
|
459 |
|
460 set_unescape_error (ucontext->context, error, |
|
461 p, ucontext->text_end, |
|
462 G_MARKUP_ERROR_PARSE, |
|
463 _("Character '%s' is not valid at " |
|
464 "the start of an entity name; " |
|
465 "the & character begins an entity; " |
|
466 "if this ampersand isn't supposed " |
|
467 "to be an entity, escape it as " |
|
468 "&"), |
|
469 utf8_str (p, buf)); |
|
470 } |
|
471 } |
|
472 else |
|
473 { |
|
474 ucontext->entity_start = p; |
|
475 ucontext->state = USTATE_INSIDE_ENTITY_NAME; |
|
476 } |
|
477 |
|
478 return p; |
|
479 } |
|
480 |
|
481 static const gchar* |
|
482 unescape_text_state_inside_entity_name (UnescapeContext *ucontext, |
|
483 const gchar *p, |
|
484 GError **error) |
|
485 { |
|
486 while (p != ucontext->text_end) |
|
487 { |
|
488 if (*p == ';') |
|
489 break; |
|
490 else if (!is_name_char (p)) |
|
491 { |
|
492 gchar ubuf[8]; |
|
493 |
|
494 set_unescape_error (ucontext->context, error, |
|
495 p, ucontext->text_end, |
|
496 G_MARKUP_ERROR_PARSE, |
|
497 _("Character '%s' is not valid " |
|
498 "inside an entity name"), |
|
499 utf8_str (p, ubuf)); |
|
500 break; |
|
501 } |
|
502 |
|
503 p = g_utf8_next_char (p); |
|
504 } |
|
505 |
|
506 if (ucontext->context->state != STATE_ERROR) |
|
507 { |
|
508 if (p != ucontext->text_end) |
|
509 { |
|
510 gint len = p - ucontext->entity_start; |
|
511 |
|
512 /* move to after semicolon */ |
|
513 p = g_utf8_next_char (p); |
|
514 ucontext->state = USTATE_INSIDE_TEXT; |
|
515 |
|
516 if (strncmp (ucontext->entity_start, "lt", len) == 0) |
|
517 g_string_append_c (ucontext->str, '<'); |
|
518 else if (strncmp (ucontext->entity_start, "gt", len) == 0) |
|
519 g_string_append_c (ucontext->str, '>'); |
|
520 else if (strncmp (ucontext->entity_start, "amp", len) == 0) |
|
521 g_string_append_c (ucontext->str, '&'); |
|
522 else if (strncmp (ucontext->entity_start, "quot", len) == 0) |
|
523 g_string_append_c (ucontext->str, '"'); |
|
524 else if (strncmp (ucontext->entity_start, "apos", len) == 0) |
|
525 g_string_append_c (ucontext->str, '\''); |
|
526 else |
|
527 { |
|
528 gchar *name; |
|
529 |
|
530 name = g_strndup (ucontext->entity_start, len); |
|
531 set_unescape_error (ucontext->context, error, |
|
532 p, ucontext->text_end, |
|
533 G_MARKUP_ERROR_PARSE, |
|
534 _("Entity name '%s' is not known"), |
|
535 name); |
|
536 g_free (name); |
|
537 } |
|
538 } |
|
539 else |
|
540 { |
|
541 set_unescape_error (ucontext->context, error, |
|
542 /* give line number of the & */ |
|
543 ucontext->entity_start, ucontext->text_end, |
|
544 G_MARKUP_ERROR_PARSE, |
|
545 _("Entity did not end with a semicolon; " |
|
546 "most likely you used an ampersand " |
|
547 "character without intending to start " |
|
548 "an entity - escape ampersand as &")); |
|
549 } |
|
550 } |
|
551 #undef MAX_ENT_LEN |
|
552 |
|
553 return p; |
|
554 } |
|
555 |
|
556 static const gchar* |
|
557 unescape_text_state_after_charref_hash (UnescapeContext *ucontext, |
|
558 const gchar *p, |
|
559 GError **error) |
|
560 { |
|
561 gboolean is_hex = FALSE; |
|
562 const char *start; |
|
563 |
|
564 start = ucontext->entity_start; |
|
565 |
|
566 if (*p == 'x') |
|
567 { |
|
568 is_hex = TRUE; |
|
569 p = g_utf8_next_char (p); |
|
570 start = p; |
|
571 } |
|
572 |
|
573 while (p != ucontext->text_end && *p != ';') |
|
574 p = g_utf8_next_char (p); |
|
575 |
|
576 if (p != ucontext->text_end) |
|
577 { |
|
578 g_assert (*p == ';'); |
|
579 |
|
580 /* digit is between start and p */ |
|
581 |
|
582 if (start != p) |
|
583 { |
|
584 gulong l; |
|
585 gchar *end = NULL; |
|
586 |
|
587 errno = 0; |
|
588 if (is_hex) |
|
589 l = strtoul (start, &end, 16); |
|
590 else |
|
591 l = strtoul (start, &end, 10); |
|
592 |
|
593 if (end != p || errno != 0) |
|
594 { |
|
595 set_unescape_error (ucontext->context, error, |
|
596 start, ucontext->text_end, |
|
597 G_MARKUP_ERROR_PARSE, |
|
598 _("Failed to parse '%-.*s', which " |
|
599 "should have been a digit " |
|
600 "inside a character reference " |
|
601 "(ê for example) - perhaps " |
|
602 "the digit is too large"), |
|
603 p - start, start); |
|
604 } |
|
605 else |
|
606 { |
|
607 /* characters XML permits */ |
|
608 if (l == 0x9 || |
|
609 l == 0xA || |
|
610 l == 0xD || |
|
611 (l >= 0x20 && l <= 0xD7FF) || |
|
612 (l >= 0xE000 && l <= 0xFFFD) || |
|
613 (l >= 0x10000 && l <= 0x10FFFF)) |
|
614 { |
|
615 gchar buf[8]; |
|
616 g_string_append (ucontext->str, char_str (l, buf)); |
|
617 } |
|
618 else |
|
619 { |
|
620 set_unescape_error (ucontext->context, error, |
|
621 start, ucontext->text_end, |
|
622 G_MARKUP_ERROR_PARSE, |
|
623 _("Character reference '%-.*s' does not " |
|
624 "encode a permitted character"), |
|
625 p - start, start); |
|
626 } |
|
627 } |
|
628 |
|
629 /* Move to next state */ |
|
630 p = g_utf8_next_char (p); /* past semicolon */ |
|
631 ucontext->state = USTATE_INSIDE_TEXT; |
|
632 } |
|
633 else |
|
634 { |
|
635 set_unescape_error (ucontext->context, error, |
|
636 start, ucontext->text_end, |
|
637 G_MARKUP_ERROR_PARSE, |
|
638 _("Empty character reference; " |
|
639 "should include a digit such as " |
|
640 "dž")); |
|
641 } |
|
642 } |
|
643 else |
|
644 { |
|
645 set_unescape_error (ucontext->context, error, |
|
646 start, ucontext->text_end, |
|
647 G_MARKUP_ERROR_PARSE, |
|
648 _("Character reference did not end with a " |
|
649 "semicolon; " |
|
650 "most likely you used an ampersand " |
|
651 "character without intending to start " |
|
652 "an entity - escape ampersand as &")); |
|
653 } |
|
654 |
|
655 return p; |
|
656 } |
|
657 |
|
658 static gboolean |
|
659 unescape_text (GMarkupParseContext *context, |
|
660 const gchar *text, |
|
661 const gchar *text_end, |
|
662 GString **unescaped, |
|
663 GError **error) |
|
664 { |
|
665 UnescapeContext ucontext; |
|
666 const gchar *p; |
|
667 |
|
668 ucontext.context = context; |
|
669 ucontext.text = text; |
|
670 ucontext.text_end = text_end; |
|
671 ucontext.entity_start = NULL; |
|
672 |
|
673 ucontext.str = g_string_sized_new (text_end - text); |
|
674 |
|
675 ucontext.state = USTATE_INSIDE_TEXT; |
|
676 p = text; |
|
677 |
|
678 while (p != text_end && context->state != STATE_ERROR) |
|
679 { |
|
680 g_assert (p < text_end); |
|
681 |
|
682 switch (ucontext.state) |
|
683 { |
|
684 case USTATE_INSIDE_TEXT: |
|
685 { |
|
686 p = unescape_text_state_inside_text (&ucontext, |
|
687 p, |
|
688 error); |
|
689 } |
|
690 break; |
|
691 |
|
692 case USTATE_AFTER_AMPERSAND: |
|
693 { |
|
694 p = unescape_text_state_after_ampersand (&ucontext, |
|
695 p, |
|
696 error); |
|
697 } |
|
698 break; |
|
699 |
|
700 |
|
701 case USTATE_INSIDE_ENTITY_NAME: |
|
702 { |
|
703 p = unescape_text_state_inside_entity_name (&ucontext, |
|
704 p, |
|
705 error); |
|
706 } |
|
707 break; |
|
708 |
|
709 case USTATE_AFTER_CHARREF_HASH: |
|
710 { |
|
711 p = unescape_text_state_after_charref_hash (&ucontext, |
|
712 p, |
|
713 error); |
|
714 } |
|
715 break; |
|
716 |
|
717 default: |
|
718 g_assert_not_reached (); |
|
719 break; |
|
720 } |
|
721 } |
|
722 |
|
723 if (context->state != STATE_ERROR) |
|
724 { |
|
725 switch (ucontext.state) |
|
726 { |
|
727 case USTATE_INSIDE_TEXT: |
|
728 break; |
|
729 case USTATE_AFTER_AMPERSAND: |
|
730 case USTATE_INSIDE_ENTITY_NAME: |
|
731 set_unescape_error (context, error, |
|
732 NULL, NULL, |
|
733 G_MARKUP_ERROR_PARSE, |
|
734 _("Unfinished entity reference")); |
|
735 break; |
|
736 case USTATE_AFTER_CHARREF_HASH: |
|
737 set_unescape_error (context, error, |
|
738 NULL, NULL, |
|
739 G_MARKUP_ERROR_PARSE, |
|
740 _("Unfinished character reference")); |
|
741 break; |
|
742 } |
|
743 } |
|
744 |
|
745 if (context->state == STATE_ERROR) |
|
746 { |
|
747 g_string_free (ucontext.str, TRUE); |
|
748 *unescaped = NULL; |
|
749 return FALSE; |
|
750 } |
|
751 else |
|
752 { |
|
753 *unescaped = ucontext.str; |
|
754 return TRUE; |
|
755 } |
|
756 } |
|
757 |
|
758 static inline gboolean |
|
759 advance_char (GMarkupParseContext *context) |
|
760 { |
|
761 context->iter = g_utf8_next_char (context->iter); |
|
762 context->char_number += 1; |
|
763 |
|
764 if (context->iter == context->current_text_end) |
|
765 { |
|
766 return FALSE; |
|
767 } |
|
768 else if (*context->iter == '\n') |
|
769 { |
|
770 context->line_number += 1; |
|
771 context->char_number = 1; |
|
772 } |
|
773 |
|
774 return TRUE; |
|
775 } |
|
776 |
|
777 static inline gboolean |
|
778 xml_isspace (char c) |
|
779 { |
|
780 return c == ' ' || c == '\t' || c == '\n' || c == '\r'; |
|
781 } |
|
782 |
|
783 static void |
|
784 skip_spaces (GMarkupParseContext *context) |
|
785 { |
|
786 do |
|
787 { |
|
788 if (!xml_isspace (*context->iter)) |
|
789 return; |
|
790 } |
|
791 while (advance_char (context)); |
|
792 } |
|
793 |
|
794 static void |
|
795 advance_to_name_end (GMarkupParseContext *context) |
|
796 { |
|
797 do |
|
798 { |
|
799 if (!is_name_char (context->iter)) |
|
800 return; |
|
801 } |
|
802 while (advance_char (context)); |
|
803 } |
|
804 |
|
805 static void |
|
806 add_to_partial (GMarkupParseContext *context, |
|
807 const gchar *text_start, |
|
808 const gchar *text_end) |
|
809 { |
|
810 if (context->partial_chunk == NULL) |
|
811 context->partial_chunk = g_string_sized_new (text_end - text_start); |
|
812 |
|
813 if (text_start != text_end) |
|
814 g_string_append_len (context->partial_chunk, text_start, |
|
815 text_end - text_start); |
|
816 |
|
817 /* Invariant here that partial_chunk exists */ |
|
818 } |
|
819 |
|
820 static void |
|
821 truncate_partial (GMarkupParseContext *context) |
|
822 { |
|
823 if (context->partial_chunk != NULL) |
|
824 { |
|
825 context->partial_chunk = g_string_truncate (context->partial_chunk, 0); |
|
826 } |
|
827 } |
|
828 |
|
829 static const gchar* |
|
830 current_element (GMarkupParseContext *context) |
|
831 { |
|
832 return context->tag_stack->data; |
|
833 } |
|
834 |
|
835 static const gchar* |
|
836 current_attribute (GMarkupParseContext *context) |
|
837 { |
|
838 g_assert (context->cur_attr >= 0); |
|
839 return context->attr_names[context->cur_attr]; |
|
840 } |
|
841 |
|
842 static void |
|
843 find_current_text_end (GMarkupParseContext *context) |
|
844 { |
|
845 /* This function must be safe (non-segfaulting) on invalid UTF8. |
|
846 * It assumes the string starts with a character start |
|
847 */ |
|
848 const gchar *end = context->current_text + context->current_text_len; |
|
849 const gchar *p; |
|
850 const gchar *next; |
|
851 |
|
852 g_assert (context->current_text_len > 0); |
|
853 |
|
854 p = g_utf8_find_prev_char (context->current_text, end); |
|
855 |
|
856 g_assert (p != NULL); /* since current_text was a char start */ |
|
857 |
|
858 /* p is now the start of the last character or character portion. */ |
|
859 g_assert (p != end); |
|
860 next = g_utf8_next_char (p); /* this only touches *p, nothing beyond */ |
|
861 |
|
862 if (next == end) |
|
863 { |
|
864 /* whole character */ |
|
865 context->current_text_end = end; |
|
866 } |
|
867 else |
|
868 { |
|
869 /* portion */ |
|
870 context->leftover_char_portion = g_string_new_len (p, end - p); |
|
871 context->current_text_len -= (end - p); |
|
872 context->current_text_end = p; |
|
873 } |
|
874 } |
|
875 |
|
876 |
|
877 static void |
|
878 add_attribute (GMarkupParseContext *context, char *name) |
|
879 { |
|
880 if (context->cur_attr + 2 >= context->alloc_attrs) |
|
881 { |
|
882 context->alloc_attrs += 5; /* silly magic number */ |
|
883 context->attr_names = g_realloc (context->attr_names, sizeof(char*)*context->alloc_attrs); |
|
884 context->attr_values = g_realloc (context->attr_values, sizeof(char*)*context->alloc_attrs); |
|
885 } |
|
886 context->cur_attr++; |
|
887 context->attr_names[context->cur_attr] = name; |
|
888 context->attr_values[context->cur_attr] = NULL; |
|
889 context->attr_names[context->cur_attr+1] = NULL; |
|
890 context->attr_values[context->cur_attr+1] = NULL; |
|
891 } |
|
892 |
|
893 /** |
|
894 * g_markup_parse_context_parse: |
|
895 * @context: a #GMarkupParseContext |
|
896 * @text: chunk of text to parse |
|
897 * @text_len: length of @text in bytes |
|
898 * @error: return location for a #GError |
|
899 * |
|
900 * Feed some data to the #GMarkupParseContext. The data need not |
|
901 * be valid UTF-8; an error will be signaled if it's invalid. |
|
902 * The data need not be an entire document; you can feed a document |
|
903 * into the parser incrementally, via multiple calls to this function. |
|
904 * Typically, as you receive data from a network connection or file, |
|
905 * you feed each received chunk of data into this function, aborting |
|
906 * the process if an error occurs. Once an error is reported, no further |
|
907 * data may be fed to the #GMarkupParseContext; all errors are fatal. |
|
908 * |
|
909 * Return value: %FALSE if an error occurred, %TRUE on success |
|
910 **/ |
|
911 EXPORT_C gboolean |
|
912 g_markup_parse_context_parse (GMarkupParseContext *context, |
|
913 const gchar *text, |
|
914 gssize text_len, |
|
915 GError **error) |
|
916 { |
|
917 const gchar *first_invalid; |
|
918 |
|
919 g_return_val_if_fail (context != NULL, FALSE); |
|
920 g_return_val_if_fail (text != NULL, FALSE); |
|
921 g_return_val_if_fail (context->state != STATE_ERROR, FALSE); |
|
922 g_return_val_if_fail (!context->parsing, FALSE); |
|
923 |
|
924 if (text_len < 0) |
|
925 text_len = strlen (text); |
|
926 |
|
927 if (text_len == 0) |
|
928 return TRUE; |
|
929 |
|
930 context->parsing = TRUE; |
|
931 |
|
932 if (context->leftover_char_portion) |
|
933 { |
|
934 const gchar *first_char; |
|
935 |
|
936 if ((*text & 0xc0) != 0x80) |
|
937 first_char = text; |
|
938 else |
|
939 first_char = g_utf8_find_next_char (text, text + text_len); |
|
940 |
|
941 if (first_char) |
|
942 { |
|
943 /* leftover_char_portion was completed. Parse it. */ |
|
944 GString *portion = context->leftover_char_portion; |
|
945 |
|
946 g_string_append_len (context->leftover_char_portion, |
|
947 text, first_char - text); |
|
948 |
|
949 /* hacks to allow recursion */ |
|
950 context->parsing = FALSE; |
|
951 context->leftover_char_portion = NULL; |
|
952 |
|
953 if (!g_markup_parse_context_parse (context, |
|
954 portion->str, portion->len, |
|
955 error)) |
|
956 { |
|
957 g_assert (context->state == STATE_ERROR); |
|
958 } |
|
959 |
|
960 g_string_free (portion, TRUE); |
|
961 context->parsing = TRUE; |
|
962 |
|
963 /* Skip the fraction of char that was in this text */ |
|
964 text_len -= (first_char - text); |
|
965 text = first_char; |
|
966 } |
|
967 else |
|
968 { |
|
969 /* another little chunk of the leftover char; geez |
|
970 * someone is inefficient. |
|
971 */ |
|
972 g_string_append_len (context->leftover_char_portion, |
|
973 text, text_len); |
|
974 |
|
975 if (context->leftover_char_portion->len > 7) |
|
976 { |
|
977 /* The leftover char portion is too big to be |
|
978 * a UTF-8 character |
|
979 */ |
|
980 set_error (context, |
|
981 error, |
|
982 G_MARKUP_ERROR_BAD_UTF8, |
|
983 _("Invalid UTF-8 encoded text")); |
|
984 } |
|
985 |
|
986 goto finished; |
|
987 } |
|
988 } |
|
989 |
|
990 context->current_text = text; |
|
991 context->current_text_len = text_len; |
|
992 context->iter = context->current_text; |
|
993 context->start = context->iter; |
|
994 |
|
995 /* Nothing left after finishing the leftover char, or nothing |
|
996 * passed in to begin with. |
|
997 */ |
|
998 if (context->current_text_len == 0) |
|
999 goto finished; |
|
1000 |
|
1001 /* find_current_text_end () assumes the string starts at |
|
1002 * a character start, so we need to validate at least |
|
1003 * that much. It doesn't assume any following bytes |
|
1004 * are valid. |
|
1005 */ |
|
1006 if ((*context->current_text & 0xc0) == 0x80) /* not a char start */ |
|
1007 { |
|
1008 set_error (context, |
|
1009 error, |
|
1010 G_MARKUP_ERROR_BAD_UTF8, |
|
1011 _("Invalid UTF-8 encoded text")); |
|
1012 goto finished; |
|
1013 } |
|
1014 |
|
1015 /* Initialize context->current_text_end, possibly adjusting |
|
1016 * current_text_len, and add any leftover char portion |
|
1017 */ |
|
1018 find_current_text_end (context); |
|
1019 |
|
1020 /* Validate UTF8 (must be done after we find the end, since |
|
1021 * we could have a trailing incomplete char) |
|
1022 */ |
|
1023 if (!g_utf8_validate (context->current_text, |
|
1024 context->current_text_len, |
|
1025 &first_invalid)) |
|
1026 { |
|
1027 gint newlines = 0; |
|
1028 const gchar *p; |
|
1029 p = context->current_text; |
|
1030 while (p != context->current_text_end) |
|
1031 { |
|
1032 if (*p == '\n') |
|
1033 ++newlines; |
|
1034 ++p; |
|
1035 } |
|
1036 |
|
1037 context->line_number += newlines; |
|
1038 |
|
1039 set_error (context, |
|
1040 error, |
|
1041 G_MARKUP_ERROR_BAD_UTF8, |
|
1042 _("Invalid UTF-8 encoded text")); |
|
1043 goto finished; |
|
1044 } |
|
1045 |
|
1046 while (context->iter != context->current_text_end) |
|
1047 { |
|
1048 switch (context->state) |
|
1049 { |
|
1050 case STATE_START: |
|
1051 /* Possible next state: AFTER_OPEN_ANGLE */ |
|
1052 |
|
1053 g_assert (context->tag_stack == NULL); |
|
1054 |
|
1055 /* whitespace is ignored outside of any elements */ |
|
1056 skip_spaces (context); |
|
1057 |
|
1058 if (context->iter != context->current_text_end) |
|
1059 { |
|
1060 if (*context->iter == '<') |
|
1061 { |
|
1062 /* Move after the open angle */ |
|
1063 advance_char (context); |
|
1064 |
|
1065 context->state = STATE_AFTER_OPEN_ANGLE; |
|
1066 |
|
1067 /* this could start a passthrough */ |
|
1068 context->start = context->iter; |
|
1069 |
|
1070 /* document is now non-empty */ |
|
1071 context->document_empty = FALSE; |
|
1072 } |
|
1073 else |
|
1074 { |
|
1075 set_error (context, |
|
1076 error, |
|
1077 G_MARKUP_ERROR_PARSE, |
|
1078 _("Document must begin with an element (e.g. <book>)")); |
|
1079 } |
|
1080 } |
|
1081 break; |
|
1082 |
|
1083 case STATE_AFTER_OPEN_ANGLE: |
|
1084 /* Possible next states: INSIDE_OPEN_TAG_NAME, |
|
1085 * AFTER_CLOSE_TAG_SLASH, INSIDE_PASSTHROUGH |
|
1086 */ |
|
1087 if (*context->iter == '?' || |
|
1088 *context->iter == '!') |
|
1089 { |
|
1090 /* include < in the passthrough */ |
|
1091 const gchar *openangle = "<"; |
|
1092 add_to_partial (context, openangle, openangle + 1); |
|
1093 context->start = context->iter; |
|
1094 context->balance = 1; |
|
1095 context->state = STATE_INSIDE_PASSTHROUGH; |
|
1096 } |
|
1097 else if (*context->iter == '/') |
|
1098 { |
|
1099 /* move after it */ |
|
1100 advance_char (context); |
|
1101 |
|
1102 context->state = STATE_AFTER_CLOSE_TAG_SLASH; |
|
1103 } |
|
1104 else if (is_name_start_char (context->iter)) |
|
1105 { |
|
1106 context->state = STATE_INSIDE_OPEN_TAG_NAME; |
|
1107 |
|
1108 /* start of tag name */ |
|
1109 context->start = context->iter; |
|
1110 } |
|
1111 else |
|
1112 { |
|
1113 gchar buf[8]; |
|
1114 |
|
1115 set_error (context, |
|
1116 error, |
|
1117 G_MARKUP_ERROR_PARSE, |
|
1118 _("'%s' is not a valid character following " |
|
1119 "a '<' character; it may not begin an " |
|
1120 "element name"), |
|
1121 utf8_str (context->iter, buf)); |
|
1122 } |
|
1123 break; |
|
1124 |
|
1125 /* The AFTER_CLOSE_ANGLE state is actually sort of |
|
1126 * broken, because it doesn't correspond to a range |
|
1127 * of characters in the input stream as the others do, |
|
1128 * and thus makes things harder to conceptualize |
|
1129 */ |
|
1130 case STATE_AFTER_CLOSE_ANGLE: |
|
1131 /* Possible next states: INSIDE_TEXT, STATE_START */ |
|
1132 if (context->tag_stack == NULL) |
|
1133 { |
|
1134 context->start = NULL; |
|
1135 context->state = STATE_START; |
|
1136 } |
|
1137 else |
|
1138 { |
|
1139 context->start = context->iter; |
|
1140 context->state = STATE_INSIDE_TEXT; |
|
1141 } |
|
1142 break; |
|
1143 |
|
1144 case STATE_AFTER_ELISION_SLASH: |
|
1145 /* Possible next state: AFTER_CLOSE_ANGLE */ |
|
1146 |
|
1147 { |
|
1148 /* We need to pop the tag stack and call the end_element |
|
1149 * function, since this is the close tag |
|
1150 */ |
|
1151 GError *tmp_error = NULL; |
|
1152 |
|
1153 g_assert (context->tag_stack != NULL); |
|
1154 |
|
1155 tmp_error = NULL; |
|
1156 if (context->parser->end_element) |
|
1157 (* context->parser->end_element) (context, |
|
1158 context->tag_stack->data, |
|
1159 context->user_data, |
|
1160 &tmp_error); |
|
1161 |
|
1162 if (tmp_error) |
|
1163 { |
|
1164 mark_error (context, tmp_error); |
|
1165 g_propagate_error (error, tmp_error); |
|
1166 } |
|
1167 else |
|
1168 { |
|
1169 if (*context->iter == '>') |
|
1170 { |
|
1171 /* move after the close angle */ |
|
1172 advance_char (context); |
|
1173 context->state = STATE_AFTER_CLOSE_ANGLE; |
|
1174 } |
|
1175 else |
|
1176 { |
|
1177 gchar buf[8]; |
|
1178 |
|
1179 set_error (context, |
|
1180 error, |
|
1181 G_MARKUP_ERROR_PARSE, |
|
1182 _("Odd character '%s', expected a '>' character " |
|
1183 "to end the start tag of element '%s'"), |
|
1184 utf8_str (context->iter, buf), |
|
1185 current_element (context)); |
|
1186 } |
|
1187 } |
|
1188 |
|
1189 g_free (context->tag_stack->data); |
|
1190 context->tag_stack = g_slist_delete_link (context->tag_stack, |
|
1191 context->tag_stack); |
|
1192 } |
|
1193 break; |
|
1194 |
|
1195 case STATE_INSIDE_OPEN_TAG_NAME: |
|
1196 /* Possible next states: BETWEEN_ATTRIBUTES */ |
|
1197 |
|
1198 /* if there's a partial chunk then it's the first part of the |
|
1199 * tag name. If there's a context->start then it's the start |
|
1200 * of the tag name in current_text, the partial chunk goes |
|
1201 * before that start though. |
|
1202 */ |
|
1203 advance_to_name_end (context); |
|
1204 |
|
1205 if (context->iter == context->current_text_end) |
|
1206 { |
|
1207 /* The name hasn't necessarily ended. Merge with |
|
1208 * partial chunk, leave state unchanged. |
|
1209 */ |
|
1210 add_to_partial (context, context->start, context->iter); |
|
1211 } |
|
1212 else |
|
1213 { |
|
1214 /* The name has ended. Combine it with the partial chunk |
|
1215 * if any; push it on the stack; enter next state. |
|
1216 */ |
|
1217 add_to_partial (context, context->start, context->iter); |
|
1218 context->tag_stack = |
|
1219 g_slist_prepend (context->tag_stack, |
|
1220 g_string_free (context->partial_chunk, |
|
1221 FALSE)); |
|
1222 |
|
1223 context->partial_chunk = NULL; |
|
1224 |
|
1225 context->state = STATE_BETWEEN_ATTRIBUTES; |
|
1226 context->start = NULL; |
|
1227 } |
|
1228 break; |
|
1229 |
|
1230 case STATE_INSIDE_ATTRIBUTE_NAME: |
|
1231 /* Possible next states: AFTER_ATTRIBUTE_NAME */ |
|
1232 |
|
1233 advance_to_name_end (context); |
|
1234 add_to_partial (context, context->start, context->iter); |
|
1235 |
|
1236 /* read the full name, if we enter the equals sign state |
|
1237 * then add the attribute to the list (without the value), |
|
1238 * otherwise store a partial chunk to be prepended later. |
|
1239 */ |
|
1240 if (context->iter != context->current_text_end) |
|
1241 context->state = STATE_AFTER_ATTRIBUTE_NAME; |
|
1242 break; |
|
1243 |
|
1244 case STATE_AFTER_ATTRIBUTE_NAME: |
|
1245 /* Possible next states: AFTER_ATTRIBUTE_EQUALS_SIGN */ |
|
1246 |
|
1247 skip_spaces (context); |
|
1248 |
|
1249 if (context->iter != context->current_text_end) |
|
1250 { |
|
1251 /* The name has ended. Combine it with the partial chunk |
|
1252 * if any; push it on the stack; enter next state. |
|
1253 */ |
|
1254 add_attribute (context, g_string_free (context->partial_chunk, FALSE)); |
|
1255 |
|
1256 context->partial_chunk = NULL; |
|
1257 context->start = NULL; |
|
1258 |
|
1259 if (*context->iter == '=') |
|
1260 { |
|
1261 advance_char (context); |
|
1262 context->state = STATE_AFTER_ATTRIBUTE_EQUALS_SIGN; |
|
1263 } |
|
1264 else |
|
1265 { |
|
1266 gchar buf[8]; |
|
1267 |
|
1268 set_error (context, |
|
1269 error, |
|
1270 G_MARKUP_ERROR_PARSE, |
|
1271 _("Odd character '%s', expected a '=' after " |
|
1272 "attribute name '%s' of element '%s'"), |
|
1273 utf8_str (context->iter, buf), |
|
1274 current_attribute (context), |
|
1275 current_element (context)); |
|
1276 |
|
1277 } |
|
1278 } |
|
1279 break; |
|
1280 |
|
1281 case STATE_BETWEEN_ATTRIBUTES: |
|
1282 /* Possible next states: AFTER_CLOSE_ANGLE, |
|
1283 * AFTER_ELISION_SLASH, INSIDE_ATTRIBUTE_NAME |
|
1284 */ |
|
1285 skip_spaces (context); |
|
1286 |
|
1287 if (context->iter != context->current_text_end) |
|
1288 { |
|
1289 if (*context->iter == '/') |
|
1290 { |
|
1291 advance_char (context); |
|
1292 context->state = STATE_AFTER_ELISION_SLASH; |
|
1293 } |
|
1294 else if (*context->iter == '>') |
|
1295 { |
|
1296 |
|
1297 advance_char (context); |
|
1298 context->state = STATE_AFTER_CLOSE_ANGLE; |
|
1299 } |
|
1300 else if (is_name_start_char (context->iter)) |
|
1301 { |
|
1302 context->state = STATE_INSIDE_ATTRIBUTE_NAME; |
|
1303 /* start of attribute name */ |
|
1304 context->start = context->iter; |
|
1305 } |
|
1306 else |
|
1307 { |
|
1308 gchar buf[8]; |
|
1309 |
|
1310 set_error (context, |
|
1311 error, |
|
1312 G_MARKUP_ERROR_PARSE, |
|
1313 _("Odd character '%s', expected a '>' or '/' " |
|
1314 "character to end the start tag of " |
|
1315 "element '%s', or optionally an attribute; " |
|
1316 "perhaps you used an invalid character in " |
|
1317 "an attribute name"), |
|
1318 utf8_str (context->iter, buf), |
|
1319 current_element (context)); |
|
1320 } |
|
1321 |
|
1322 /* If we're done with attributes, invoke |
|
1323 * the start_element callback |
|
1324 */ |
|
1325 if (context->state == STATE_AFTER_ELISION_SLASH || |
|
1326 context->state == STATE_AFTER_CLOSE_ANGLE) |
|
1327 { |
|
1328 const gchar *start_name; |
|
1329 /* Ugly, but the current code expects an empty array instead of NULL */ |
|
1330 const gchar *empty = NULL; |
|
1331 const gchar **attr_names = ∅ |
|
1332 const gchar **attr_values = ∅ |
|
1333 GError *tmp_error; |
|
1334 |
|
1335 /* Call user callback for element start */ |
|
1336 start_name = current_element (context); |
|
1337 |
|
1338 if (context->cur_attr >= 0) |
|
1339 { |
|
1340 attr_names = (const gchar**)context->attr_names; |
|
1341 attr_values = (const gchar**)context->attr_values; |
|
1342 } |
|
1343 |
|
1344 tmp_error = NULL; |
|
1345 if (context->parser->start_element) |
|
1346 (* context->parser->start_element) (context, |
|
1347 start_name, |
|
1348 (const gchar **)attr_names, |
|
1349 (const gchar **)attr_values, |
|
1350 context->user_data, |
|
1351 &tmp_error); |
|
1352 |
|
1353 /* Go ahead and free the attributes. */ |
|
1354 for (; context->cur_attr >= 0; context->cur_attr--) |
|
1355 { |
|
1356 int pos = context->cur_attr; |
|
1357 g_free (context->attr_names[pos]); |
|
1358 g_free (context->attr_values[pos]); |
|
1359 context->attr_names[pos] = context->attr_values[pos] = NULL; |
|
1360 } |
|
1361 g_assert (context->cur_attr == -1); |
|
1362 g_assert (context->attr_names == NULL || |
|
1363 context->attr_names[0] == NULL); |
|
1364 g_assert (context->attr_values == NULL || |
|
1365 context->attr_values[0] == NULL); |
|
1366 |
|
1367 if (tmp_error != NULL) |
|
1368 { |
|
1369 mark_error (context, tmp_error); |
|
1370 g_propagate_error (error, tmp_error); |
|
1371 } |
|
1372 } |
|
1373 } |
|
1374 break; |
|
1375 |
|
1376 case STATE_AFTER_ATTRIBUTE_EQUALS_SIGN: |
|
1377 /* Possible next state: INSIDE_ATTRIBUTE_VALUE_[SQ/DQ] */ |
|
1378 |
|
1379 skip_spaces (context); |
|
1380 |
|
1381 if (context->iter != context->current_text_end) |
|
1382 { |
|
1383 if (*context->iter == '"') |
|
1384 { |
|
1385 advance_char (context); |
|
1386 context->state = STATE_INSIDE_ATTRIBUTE_VALUE_DQ; |
|
1387 context->start = context->iter; |
|
1388 } |
|
1389 else if (*context->iter == '\'') |
|
1390 { |
|
1391 advance_char (context); |
|
1392 context->state = STATE_INSIDE_ATTRIBUTE_VALUE_SQ; |
|
1393 context->start = context->iter; |
|
1394 } |
|
1395 else |
|
1396 { |
|
1397 gchar buf[8]; |
|
1398 |
|
1399 set_error (context, |
|
1400 error, |
|
1401 G_MARKUP_ERROR_PARSE, |
|
1402 _("Odd character '%s', expected an open quote mark " |
|
1403 "after the equals sign when giving value for " |
|
1404 "attribute '%s' of element '%s'"), |
|
1405 utf8_str (context->iter, buf), |
|
1406 current_attribute (context), |
|
1407 current_element (context)); |
|
1408 } |
|
1409 } |
|
1410 break; |
|
1411 |
|
1412 case STATE_INSIDE_ATTRIBUTE_VALUE_SQ: |
|
1413 case STATE_INSIDE_ATTRIBUTE_VALUE_DQ: |
|
1414 /* Possible next states: BETWEEN_ATTRIBUTES */ |
|
1415 { |
|
1416 gchar delim; |
|
1417 |
|
1418 if (context->state == STATE_INSIDE_ATTRIBUTE_VALUE_SQ) |
|
1419 { |
|
1420 delim = '\''; |
|
1421 } |
|
1422 else |
|
1423 { |
|
1424 delim = '"'; |
|
1425 } |
|
1426 |
|
1427 do |
|
1428 { |
|
1429 if (*context->iter == delim) |
|
1430 break; |
|
1431 } |
|
1432 while (advance_char (context)); |
|
1433 } |
|
1434 if (context->iter == context->current_text_end) |
|
1435 { |
|
1436 /* The value hasn't necessarily ended. Merge with |
|
1437 * partial chunk, leave state unchanged. |
|
1438 */ |
|
1439 add_to_partial (context, context->start, context->iter); |
|
1440 } |
|
1441 else |
|
1442 { |
|
1443 /* The value has ended at the quote mark. Combine it |
|
1444 * with the partial chunk if any; set it for the current |
|
1445 * attribute. |
|
1446 */ |
|
1447 GString *unescaped; |
|
1448 |
|
1449 add_to_partial (context, context->start, context->iter); |
|
1450 |
|
1451 g_assert (context->cur_attr >= 0); |
|
1452 |
|
1453 if (unescape_text (context, |
|
1454 context->partial_chunk->str, |
|
1455 context->partial_chunk->str + |
|
1456 context->partial_chunk->len, |
|
1457 &unescaped, |
|
1458 error)) |
|
1459 { |
|
1460 /* success, advance past quote and set state. */ |
|
1461 context->attr_values[context->cur_attr] = g_string_free (unescaped, FALSE); |
|
1462 advance_char (context); |
|
1463 context->state = STATE_BETWEEN_ATTRIBUTES; |
|
1464 context->start = NULL; |
|
1465 } |
|
1466 |
|
1467 truncate_partial (context); |
|
1468 } |
|
1469 break; |
|
1470 |
|
1471 case STATE_INSIDE_TEXT: |
|
1472 /* Possible next states: AFTER_OPEN_ANGLE */ |
|
1473 do |
|
1474 { |
|
1475 if (*context->iter == '<') |
|
1476 break; |
|
1477 } |
|
1478 while (advance_char (context)); |
|
1479 |
|
1480 /* The text hasn't necessarily ended. Merge with |
|
1481 * partial chunk, leave state unchanged. |
|
1482 */ |
|
1483 |
|
1484 add_to_partial (context, context->start, context->iter); |
|
1485 |
|
1486 if (context->iter != context->current_text_end) |
|
1487 { |
|
1488 GString *unescaped = NULL; |
|
1489 |
|
1490 /* The text has ended at the open angle. Call the text |
|
1491 * callback. |
|
1492 */ |
|
1493 |
|
1494 if (unescape_text (context, |
|
1495 context->partial_chunk->str, |
|
1496 context->partial_chunk->str + |
|
1497 context->partial_chunk->len, |
|
1498 &unescaped, |
|
1499 error)) |
|
1500 { |
|
1501 GError *tmp_error = NULL; |
|
1502 |
|
1503 if (context->parser->text) |
|
1504 (*context->parser->text) (context, |
|
1505 unescaped->str, |
|
1506 unescaped->len, |
|
1507 context->user_data, |
|
1508 &tmp_error); |
|
1509 |
|
1510 g_string_free (unescaped, TRUE); |
|
1511 |
|
1512 if (tmp_error == NULL) |
|
1513 { |
|
1514 /* advance past open angle and set state. */ |
|
1515 advance_char (context); |
|
1516 context->state = STATE_AFTER_OPEN_ANGLE; |
|
1517 /* could begin a passthrough */ |
|
1518 context->start = context->iter; |
|
1519 } |
|
1520 else |
|
1521 { |
|
1522 mark_error (context, tmp_error); |
|
1523 g_propagate_error (error, tmp_error); |
|
1524 } |
|
1525 } |
|
1526 |
|
1527 truncate_partial (context); |
|
1528 } |
|
1529 break; |
|
1530 |
|
1531 case STATE_AFTER_CLOSE_TAG_SLASH: |
|
1532 /* Possible next state: INSIDE_CLOSE_TAG_NAME */ |
|
1533 if (is_name_start_char (context->iter)) |
|
1534 { |
|
1535 context->state = STATE_INSIDE_CLOSE_TAG_NAME; |
|
1536 |
|
1537 /* start of tag name */ |
|
1538 context->start = context->iter; |
|
1539 } |
|
1540 else |
|
1541 { |
|
1542 gchar buf[8]; |
|
1543 |
|
1544 set_error (context, |
|
1545 error, |
|
1546 G_MARKUP_ERROR_PARSE, |
|
1547 _("'%s' is not a valid character following " |
|
1548 "the characters '</'; '%s' may not begin an " |
|
1549 "element name"), |
|
1550 utf8_str (context->iter, buf), |
|
1551 utf8_str (context->iter, buf)); |
|
1552 } |
|
1553 break; |
|
1554 |
|
1555 case STATE_INSIDE_CLOSE_TAG_NAME: |
|
1556 /* Possible next state: AFTER_CLOSE_TAG_NAME */ |
|
1557 advance_to_name_end (context); |
|
1558 add_to_partial (context, context->start, context->iter); |
|
1559 |
|
1560 if (context->iter != context->current_text_end) |
|
1561 context->state = STATE_AFTER_CLOSE_TAG_NAME; |
|
1562 break; |
|
1563 |
|
1564 case STATE_AFTER_CLOSE_TAG_NAME: |
|
1565 /* Possible next state: AFTER_CLOSE_TAG_SLASH */ |
|
1566 |
|
1567 skip_spaces (context); |
|
1568 |
|
1569 if (context->iter != context->current_text_end) |
|
1570 { |
|
1571 gchar *close_name; |
|
1572 |
|
1573 /* The name has ended. Combine it with the partial chunk |
|
1574 * if any; check that it matches stack top and pop |
|
1575 * stack; invoke proper callback; enter next state. |
|
1576 */ |
|
1577 close_name = g_string_free (context->partial_chunk, FALSE); |
|
1578 context->partial_chunk = NULL; |
|
1579 |
|
1580 if (*context->iter != '>') |
|
1581 { |
|
1582 gchar buf[8]; |
|
1583 |
|
1584 set_error (context, |
|
1585 error, |
|
1586 G_MARKUP_ERROR_PARSE, |
|
1587 _("'%s' is not a valid character following " |
|
1588 "the close element name '%s'; the allowed " |
|
1589 "character is '>'"), |
|
1590 utf8_str (context->iter, buf), |
|
1591 close_name); |
|
1592 } |
|
1593 else if (context->tag_stack == NULL) |
|
1594 { |
|
1595 set_error (context, |
|
1596 error, |
|
1597 G_MARKUP_ERROR_PARSE, |
|
1598 _("Element '%s' was closed, no element " |
|
1599 "is currently open"), |
|
1600 close_name); |
|
1601 } |
|
1602 else if (strcmp (close_name, current_element (context)) != 0) |
|
1603 { |
|
1604 set_error (context, |
|
1605 error, |
|
1606 G_MARKUP_ERROR_PARSE, |
|
1607 _("Element '%s' was closed, but the currently " |
|
1608 "open element is '%s'"), |
|
1609 close_name, |
|
1610 current_element (context)); |
|
1611 } |
|
1612 else |
|
1613 { |
|
1614 GError *tmp_error; |
|
1615 advance_char (context); |
|
1616 context->state = STATE_AFTER_CLOSE_ANGLE; |
|
1617 context->start = NULL; |
|
1618 |
|
1619 /* call the end_element callback */ |
|
1620 tmp_error = NULL; |
|
1621 if (context->parser->end_element) |
|
1622 (* context->parser->end_element) (context, |
|
1623 close_name, |
|
1624 context->user_data, |
|
1625 &tmp_error); |
|
1626 |
|
1627 |
|
1628 /* Pop the tag stack */ |
|
1629 g_free (context->tag_stack->data); |
|
1630 context->tag_stack = g_slist_delete_link (context->tag_stack, |
|
1631 context->tag_stack); |
|
1632 |
|
1633 if (tmp_error) |
|
1634 { |
|
1635 mark_error (context, tmp_error); |
|
1636 g_propagate_error (error, tmp_error); |
|
1637 } |
|
1638 } |
|
1639 |
|
1640 g_free (close_name); |
|
1641 } |
|
1642 break; |
|
1643 |
|
1644 case STATE_INSIDE_PASSTHROUGH: |
|
1645 /* Possible next state: AFTER_CLOSE_ANGLE */ |
|
1646 do |
|
1647 { |
|
1648 if (*context->iter == '<') |
|
1649 context->balance++; |
|
1650 if (*context->iter == '>') |
|
1651 { |
|
1652 context->balance--; |
|
1653 add_to_partial (context, context->start, context->iter); |
|
1654 context->start = context->iter; |
|
1655 if ((g_str_has_prefix (context->partial_chunk->str, "<?") |
|
1656 && g_str_has_suffix (context->partial_chunk->str, "?")) || |
|
1657 (g_str_has_prefix (context->partial_chunk->str, "<!--") |
|
1658 && g_str_has_suffix (context->partial_chunk->str, "--")) || |
|
1659 (g_str_has_prefix (context->partial_chunk->str, "<![CDATA[") |
|
1660 && g_str_has_suffix (context->partial_chunk->str, "]]")) || |
|
1661 (g_str_has_prefix (context->partial_chunk->str, "<!DOCTYPE") |
|
1662 && context->balance == 0)) |
|
1663 break; |
|
1664 } |
|
1665 } |
|
1666 while (advance_char (context)); |
|
1667 |
|
1668 if (context->iter == context->current_text_end) |
|
1669 { |
|
1670 /* The passthrough hasn't necessarily ended. Merge with |
|
1671 * partial chunk, leave state unchanged. |
|
1672 */ |
|
1673 add_to_partial (context, context->start, context->iter); |
|
1674 } |
|
1675 else |
|
1676 { |
|
1677 /* The passthrough has ended at the close angle. Combine |
|
1678 * it with the partial chunk if any. Call the passthrough |
|
1679 * callback. Note that the open/close angles are |
|
1680 * included in the text of the passthrough. |
|
1681 */ |
|
1682 GError *tmp_error = NULL; |
|
1683 |
|
1684 advance_char (context); /* advance past close angle */ |
|
1685 add_to_partial (context, context->start, context->iter); |
|
1686 |
|
1687 if (context->parser->passthrough) |
|
1688 (*context->parser->passthrough) (context, |
|
1689 context->partial_chunk->str, |
|
1690 context->partial_chunk->len, |
|
1691 context->user_data, |
|
1692 &tmp_error); |
|
1693 |
|
1694 truncate_partial (context); |
|
1695 |
|
1696 if (tmp_error == NULL) |
|
1697 { |
|
1698 context->state = STATE_AFTER_CLOSE_ANGLE; |
|
1699 context->start = context->iter; /* could begin text */ |
|
1700 } |
|
1701 else |
|
1702 { |
|
1703 mark_error (context, tmp_error); |
|
1704 g_propagate_error (error, tmp_error); |
|
1705 } |
|
1706 } |
|
1707 break; |
|
1708 |
|
1709 case STATE_ERROR: |
|
1710 goto finished; |
|
1711 break; |
|
1712 |
|
1713 default: |
|
1714 g_assert_not_reached (); |
|
1715 break; |
|
1716 } |
|
1717 } |
|
1718 |
|
1719 finished: |
|
1720 context->parsing = FALSE; |
|
1721 |
|
1722 return context->state != STATE_ERROR; |
|
1723 } |
|
1724 |
|
1725 /** |
|
1726 * g_markup_parse_context_end_parse: |
|
1727 * @context: a #GMarkupParseContext |
|
1728 * @error: return location for a #GError |
|
1729 * |
|
1730 * Signals to the #GMarkupParseContext that all data has been |
|
1731 * fed into the parse context with g_markup_parse_context_parse(). |
|
1732 * This function reports an error if the document isn't complete, |
|
1733 * for example if elements are still open. |
|
1734 * |
|
1735 * Return value: %TRUE on success, %FALSE if an error was set |
|
1736 **/ |
|
1737 EXPORT_C gboolean |
|
1738 g_markup_parse_context_end_parse (GMarkupParseContext *context, |
|
1739 GError **error) |
|
1740 { |
|
1741 g_return_val_if_fail (context != NULL, FALSE); |
|
1742 g_return_val_if_fail (!context->parsing, FALSE); |
|
1743 g_return_val_if_fail (context->state != STATE_ERROR, FALSE); |
|
1744 |
|
1745 if (context->partial_chunk != NULL) |
|
1746 { |
|
1747 g_string_free (context->partial_chunk, TRUE); |
|
1748 context->partial_chunk = NULL; |
|
1749 } |
|
1750 |
|
1751 if (context->document_empty) |
|
1752 { |
|
1753 set_error (context, error, G_MARKUP_ERROR_EMPTY, |
|
1754 _("Document was empty or contained only whitespace")); |
|
1755 return FALSE; |
|
1756 } |
|
1757 |
|
1758 context->parsing = TRUE; |
|
1759 |
|
1760 switch (context->state) |
|
1761 { |
|
1762 case STATE_START: |
|
1763 /* Nothing to do */ |
|
1764 break; |
|
1765 |
|
1766 case STATE_AFTER_OPEN_ANGLE: |
|
1767 set_error (context, error, G_MARKUP_ERROR_PARSE, |
|
1768 _("Document ended unexpectedly just after an open angle bracket '<'")); |
|
1769 break; |
|
1770 |
|
1771 case STATE_AFTER_CLOSE_ANGLE: |
|
1772 if (context->tag_stack != NULL) |
|
1773 { |
|
1774 /* Error message the same as for INSIDE_TEXT */ |
|
1775 set_error (context, error, G_MARKUP_ERROR_PARSE, |
|
1776 _("Document ended unexpectedly with elements still open - " |
|
1777 "'%s' was the last element opened"), |
|
1778 current_element (context)); |
|
1779 } |
|
1780 break; |
|
1781 |
|
1782 case STATE_AFTER_ELISION_SLASH: |
|
1783 set_error (context, error, G_MARKUP_ERROR_PARSE, |
|
1784 _("Document ended unexpectedly, expected to see a close angle " |
|
1785 "bracket ending the tag <%s/>"), current_element (context)); |
|
1786 break; |
|
1787 |
|
1788 case STATE_INSIDE_OPEN_TAG_NAME: |
|
1789 set_error (context, error, G_MARKUP_ERROR_PARSE, |
|
1790 _("Document ended unexpectedly inside an element name")); |
|
1791 break; |
|
1792 |
|
1793 case STATE_INSIDE_ATTRIBUTE_NAME: |
|
1794 set_error (context, error, G_MARKUP_ERROR_PARSE, |
|
1795 _("Document ended unexpectedly inside an attribute name")); |
|
1796 break; |
|
1797 |
|
1798 case STATE_BETWEEN_ATTRIBUTES: |
|
1799 set_error (context, error, G_MARKUP_ERROR_PARSE, |
|
1800 _("Document ended unexpectedly inside an element-opening " |
|
1801 "tag.")); |
|
1802 break; |
|
1803 |
|
1804 case STATE_AFTER_ATTRIBUTE_EQUALS_SIGN: |
|
1805 set_error (context, error, G_MARKUP_ERROR_PARSE, |
|
1806 _("Document ended unexpectedly after the equals sign " |
|
1807 "following an attribute name; no attribute value")); |
|
1808 break; |
|
1809 |
|
1810 case STATE_INSIDE_ATTRIBUTE_VALUE_SQ: |
|
1811 case STATE_INSIDE_ATTRIBUTE_VALUE_DQ: |
|
1812 set_error (context, error, G_MARKUP_ERROR_PARSE, |
|
1813 _("Document ended unexpectedly while inside an attribute " |
|
1814 "value")); |
|
1815 break; |
|
1816 |
|
1817 case STATE_INSIDE_TEXT: |
|
1818 g_assert (context->tag_stack != NULL); |
|
1819 set_error (context, error, G_MARKUP_ERROR_PARSE, |
|
1820 _("Document ended unexpectedly with elements still open - " |
|
1821 "'%s' was the last element opened"), |
|
1822 current_element (context)); |
|
1823 break; |
|
1824 |
|
1825 case STATE_AFTER_CLOSE_TAG_SLASH: |
|
1826 case STATE_INSIDE_CLOSE_TAG_NAME: |
|
1827 set_error (context, error, G_MARKUP_ERROR_PARSE, |
|
1828 _("Document ended unexpectedly inside the close tag for " |
|
1829 "element '%s'"), current_element (context)); |
|
1830 break; |
|
1831 |
|
1832 case STATE_INSIDE_PASSTHROUGH: |
|
1833 set_error (context, error, G_MARKUP_ERROR_PARSE, |
|
1834 _("Document ended unexpectedly inside a comment or " |
|
1835 "processing instruction")); |
|
1836 break; |
|
1837 |
|
1838 case STATE_ERROR: |
|
1839 default: |
|
1840 g_assert_not_reached (); |
|
1841 break; |
|
1842 } |
|
1843 |
|
1844 context->parsing = FALSE; |
|
1845 |
|
1846 return context->state != STATE_ERROR; |
|
1847 } |
|
1848 |
|
1849 /** |
|
1850 * g_markup_parse_context_get_element: |
|
1851 * @context: a #GMarkupParseContext |
|
1852 * @returns: the name of the currently open element, or %NULL |
|
1853 * |
|
1854 * Retrieves the name of the currently open element. |
|
1855 * |
|
1856 * Since: 2.2 |
|
1857 **/ |
|
1858 EXPORT_C G_CONST_RETURN gchar * |
|
1859 g_markup_parse_context_get_element (GMarkupParseContext *context) |
|
1860 { |
|
1861 g_return_val_if_fail (context != NULL, NULL); |
|
1862 |
|
1863 if (context->tag_stack == NULL) |
|
1864 return NULL; |
|
1865 else |
|
1866 return current_element (context); |
|
1867 } |
|
1868 |
|
1869 /** |
|
1870 * g_markup_parse_context_get_position: |
|
1871 * @context: a #GMarkupParseContext |
|
1872 * @line_number: return location for a line number, or %NULL |
|
1873 * @char_number: return location for a char-on-line number, or %NULL |
|
1874 * |
|
1875 * Retrieves the current line number and the number of the character on |
|
1876 * that line. Intended for use in error messages; there are no strict |
|
1877 * semantics for what constitutes the "current" line number other than |
|
1878 * "the best number we could come up with for error messages." |
|
1879 * |
|
1880 **/ |
|
1881 EXPORT_C void |
|
1882 g_markup_parse_context_get_position (GMarkupParseContext *context, |
|
1883 gint *line_number, |
|
1884 gint *char_number) |
|
1885 { |
|
1886 g_return_if_fail (context != NULL); |
|
1887 |
|
1888 if (line_number) |
|
1889 *line_number = context->line_number; |
|
1890 |
|
1891 if (char_number) |
|
1892 *char_number = context->char_number; |
|
1893 } |
|
1894 |
|
1895 static void |
|
1896 append_escaped_text (GString *str, |
|
1897 const gchar *text, |
|
1898 gssize length) |
|
1899 { |
|
1900 const gchar *p; |
|
1901 const gchar *end; |
|
1902 |
|
1903 p = text; |
|
1904 end = text + length; |
|
1905 |
|
1906 while (p != end) |
|
1907 { |
|
1908 const gchar *next; |
|
1909 next = g_utf8_next_char (p); |
|
1910 |
|
1911 switch (*p) |
|
1912 { |
|
1913 case '&': |
|
1914 g_string_append (str, "&"); |
|
1915 break; |
|
1916 |
|
1917 case '<': |
|
1918 g_string_append (str, "<"); |
|
1919 break; |
|
1920 |
|
1921 case '>': |
|
1922 g_string_append (str, ">"); |
|
1923 break; |
|
1924 |
|
1925 case '\'': |
|
1926 g_string_append (str, "'"); |
|
1927 break; |
|
1928 |
|
1929 case '"': |
|
1930 g_string_append (str, """); |
|
1931 break; |
|
1932 |
|
1933 default: |
|
1934 g_string_append_len (str, p, next - p); |
|
1935 break; |
|
1936 } |
|
1937 |
|
1938 p = next; |
|
1939 } |
|
1940 } |
|
1941 |
|
1942 /** |
|
1943 * g_markup_escape_text: |
|
1944 * @text: some valid UTF-8 text |
|
1945 * @length: length of @text in bytes, or -1 if the text is nul-terminated |
|
1946 * |
|
1947 * Escapes text so that the markup parser will parse it verbatim. |
|
1948 * Less than, greater than, ampersand, etc. are replaced with the |
|
1949 * corresponding entities. This function would typically be used |
|
1950 * when writing out a file to be parsed with the markup parser. |
|
1951 * |
|
1952 * Note that this function doesn't protect whitespace and line endings |
|
1953 * from being processed according to the XML rules for normalization |
|
1954 * of line endings and attribute values. |
|
1955 * |
|
1956 * Return value: a newly allocated string with the escaped text |
|
1957 **/ |
|
1958 EXPORT_C gchar* |
|
1959 g_markup_escape_text (const gchar *text, |
|
1960 gssize length) |
|
1961 { |
|
1962 GString *str; |
|
1963 |
|
1964 g_return_val_if_fail (text != NULL, NULL); |
|
1965 |
|
1966 if (length < 0) |
|
1967 length = strlen (text); |
|
1968 |
|
1969 /* prealloc at least as long as original text */ |
|
1970 str = g_string_sized_new (length); |
|
1971 append_escaped_text (str, text, length); |
|
1972 |
|
1973 return g_string_free (str, FALSE); |
|
1974 } |
|
1975 |
|
1976 /** |
|
1977 * find_conversion: |
|
1978 * @format: a printf-style format string |
|
1979 * @after: location to store a pointer to the character after |
|
1980 * the returned conversion. On a %NULL return, returns the |
|
1981 * pointer to the trailing NUL in the string |
|
1982 * |
|
1983 * Find the next conversion in a printf-style format string. |
|
1984 * Partially based on code from printf-parser.c, |
|
1985 * Copyright (C) 1999-2000, 2002-2003 Free Software Foundation, Inc. |
|
1986 * |
|
1987 * Return value: pointer to the next conversion in @format, |
|
1988 * or %NULL, if none. |
|
1989 **/ |
|
1990 static const char * |
|
1991 find_conversion (const char *format, |
|
1992 const char **after) |
|
1993 { |
|
1994 const char *start = format; |
|
1995 const char *cp; |
|
1996 |
|
1997 while (*start != '\0' && *start != '%') |
|
1998 start++; |
|
1999 |
|
2000 if (*start == '\0') |
|
2001 { |
|
2002 *after = start; |
|
2003 return NULL; |
|
2004 } |
|
2005 |
|
2006 cp = start + 1; |
|
2007 |
|
2008 if (*cp == '\0') |
|
2009 { |
|
2010 *after = cp; |
|
2011 return NULL; |
|
2012 } |
|
2013 |
|
2014 /* Test for positional argument. */ |
|
2015 if (*cp >= '0' && *cp <= '9') |
|
2016 { |
|
2017 const char *np; |
|
2018 |
|
2019 for (np = cp; *np >= '0' && *np <= '9'; np++) |
|
2020 ; |
|
2021 if (*np == '$') |
|
2022 cp = np + 1; |
|
2023 } |
|
2024 |
|
2025 /* Skip the flags. */ |
|
2026 for (;;) |
|
2027 { |
|
2028 if (*cp == '\'' || |
|
2029 *cp == '-' || |
|
2030 *cp == '+' || |
|
2031 *cp == ' ' || |
|
2032 *cp == '#' || |
|
2033 *cp == '0') |
|
2034 cp++; |
|
2035 else |
|
2036 break; |
|
2037 } |
|
2038 |
|
2039 /* Skip the field width. */ |
|
2040 if (*cp == '*') |
|
2041 { |
|
2042 cp++; |
|
2043 |
|
2044 /* Test for positional argument. */ |
|
2045 if (*cp >= '0' && *cp <= '9') |
|
2046 { |
|
2047 const char *np; |
|
2048 |
|
2049 for (np = cp; *np >= '0' && *np <= '9'; np++) |
|
2050 ; |
|
2051 if (*np == '$') |
|
2052 cp = np + 1; |
|
2053 } |
|
2054 } |
|
2055 else |
|
2056 { |
|
2057 for (; *cp >= '0' && *cp <= '9'; cp++) |
|
2058 ; |
|
2059 } |
|
2060 |
|
2061 /* Skip the precision. */ |
|
2062 if (*cp == '.') |
|
2063 { |
|
2064 cp++; |
|
2065 if (*cp == '*') |
|
2066 { |
|
2067 /* Test for positional argument. */ |
|
2068 if (*cp >= '0' && *cp <= '9') |
|
2069 { |
|
2070 const char *np; |
|
2071 |
|
2072 for (np = cp; *np >= '0' && *np <= '9'; np++) |
|
2073 ; |
|
2074 if (*np == '$') |
|
2075 cp = np + 1; |
|
2076 } |
|
2077 } |
|
2078 else |
|
2079 { |
|
2080 for (; *cp >= '0' && *cp <= '9'; cp++) |
|
2081 ; |
|
2082 } |
|
2083 } |
|
2084 |
|
2085 /* Skip argument type/size specifiers. */ |
|
2086 while (*cp == 'h' || |
|
2087 *cp == 'L' || |
|
2088 *cp == 'l' || |
|
2089 *cp == 'j' || |
|
2090 *cp == 'z' || |
|
2091 *cp == 'Z' || |
|
2092 *cp == 't') |
|
2093 cp++; |
|
2094 |
|
2095 /* Skip the conversion character. */ |
|
2096 cp++; |
|
2097 |
|
2098 *after = cp; |
|
2099 return start; |
|
2100 } |
|
2101 |
|
2102 /** |
|
2103 * g_markup_vprintf_escaped: |
|
2104 * @format: printf() style format string |
|
2105 * @args: variable argument list, similar to vprintf() |
|
2106 * |
|
2107 * Formats the data in @args according to @format, escaping |
|
2108 * all string and character arguments in the fashion |
|
2109 * of g_markup_escape_text(). See g_markup_printf_escaped(). |
|
2110 * |
|
2111 * Return value: newly allocated result from formatting |
|
2112 * operation. Free with g_free(). |
|
2113 * |
|
2114 * Since: 2.4 |
|
2115 **/ |
|
2116 EXPORT_C char * |
|
2117 g_markup_vprintf_escaped (const char *format, |
|
2118 va_list args) |
|
2119 { |
|
2120 GString *format1; |
|
2121 GString *format2; |
|
2122 GString *result = NULL; |
|
2123 gchar *output1 = NULL; |
|
2124 gchar *output2 = NULL; |
|
2125 const char *p, *op1, *op2; |
|
2126 va_list args2; |
|
2127 |
|
2128 /* The technique here, is that we make two format strings that |
|
2129 * have the identical conversions in the identical order to the |
|
2130 * original strings, but differ in the text in-between. We |
|
2131 * then use the normal g_strdup_vprintf() to format the arguments |
|
2132 * with the two new format strings. By comparing the results, |
|
2133 * we can figure out what segments of the output come from |
|
2134 * the the original format string, and what from the arguments, |
|
2135 * and thus know what portions of the string to escape. |
|
2136 * |
|
2137 * For instance, for: |
|
2138 * |
|
2139 * g_markup_printf_escaped ("%s ate %d apples", "Susan & Fred", 5); |
|
2140 * |
|
2141 * We form the two format strings "%sX%dX" and %sY%sY". The results |
|
2142 * of formatting with those two strings are |
|
2143 * |
|
2144 * "%sX%dX" => "Susan & FredX5X" |
|
2145 * "%sY%dY" => "Susan & FredY5Y" |
|
2146 * |
|
2147 * To find the span of the first argument, we find the first position |
|
2148 * where the two arguments differ, which tells us that the first |
|
2149 * argument formatted to "Susan & Fred". We then escape that |
|
2150 * to "Susan & Fred" and join up with the intermediate portions |
|
2151 * of the format string and the second argument to get |
|
2152 * "Susan & Fred ate 5 apples". |
|
2153 */ |
|
2154 |
|
2155 /* Create the two modified format strings |
|
2156 */ |
|
2157 format1 = g_string_new (NULL); |
|
2158 format2 = g_string_new (NULL); |
|
2159 p = format; |
|
2160 while (TRUE) |
|
2161 { |
|
2162 const char *after; |
|
2163 const char *conv = find_conversion (p, &after); |
|
2164 if (!conv) |
|
2165 break; |
|
2166 |
|
2167 g_string_append_len (format1, conv, after - conv); |
|
2168 g_string_append_c (format1, 'X'); |
|
2169 g_string_append_len (format2, conv, after - conv); |
|
2170 g_string_append_c (format2, 'Y'); |
|
2171 |
|
2172 p = after; |
|
2173 } |
|
2174 |
|
2175 /* Use them to format the arguments |
|
2176 */ |
|
2177 G_VA_COPY (args2, args); |
|
2178 |
|
2179 output1 = g_strdup_vprintf (format1->str, args); |
|
2180 va_end (args); |
|
2181 if (!output1) |
|
2182 goto cleanup; |
|
2183 |
|
2184 output2 = g_strdup_vprintf (format2->str, args2); |
|
2185 va_end (args2); |
|
2186 if (!output2) |
|
2187 goto cleanup; |
|
2188 |
|
2189 result = g_string_new (NULL); |
|
2190 |
|
2191 /* Iterate through the original format string again, |
|
2192 * copying the non-conversion portions and the escaped |
|
2193 * converted arguments to the output string. |
|
2194 */ |
|
2195 op1 = output1; |
|
2196 op2 = output2; |
|
2197 p = format; |
|
2198 while (TRUE) |
|
2199 { |
|
2200 const char *after; |
|
2201 const char *output_start; |
|
2202 const char *conv = find_conversion (p, &after); |
|
2203 char *escaped; |
|
2204 |
|
2205 if (!conv) /* The end, after points to the trailing \0 */ |
|
2206 { |
|
2207 g_string_append_len (result, p, after - p); |
|
2208 break; |
|
2209 } |
|
2210 |
|
2211 g_string_append_len (result, p, conv - p); |
|
2212 output_start = op1; |
|
2213 while (*op1 == *op2) |
|
2214 { |
|
2215 op1++; |
|
2216 op2++; |
|
2217 } |
|
2218 |
|
2219 escaped = g_markup_escape_text (output_start, op1 - output_start); |
|
2220 g_string_append (result, escaped); |
|
2221 g_free (escaped); |
|
2222 |
|
2223 p = after; |
|
2224 op1++; |
|
2225 op2++; |
|
2226 } |
|
2227 |
|
2228 cleanup: |
|
2229 g_string_free (format1, TRUE); |
|
2230 g_string_free (format2, TRUE); |
|
2231 g_free (output1); |
|
2232 g_free (output2); |
|
2233 |
|
2234 if (result) |
|
2235 return g_string_free (result, FALSE); |
|
2236 else |
|
2237 return NULL; |
|
2238 } |
|
2239 |
|
2240 /** |
|
2241 * g_markup_printf_escaped: |
|
2242 * @format: printf() style format string |
|
2243 * @Varargs: the arguments to insert in the format string |
|
2244 * |
|
2245 * Formats arguments according to @format, escaping |
|
2246 * all string and character arguments in the fashion |
|
2247 * of g_markup_escape_text(). This is useful when you |
|
2248 * want to insert literal strings into XML-style markup |
|
2249 * output, without having to worry that the strings |
|
2250 * might themselves contain markup. |
|
2251 * |
|
2252 * <informalexample><programlisting> |
|
2253 * const char *store = "Fortnum & Mason"; |
|
2254 * const char *item = "Tea"; |
|
2255 * char *output; |
|
2256 * |
|
2257 * output = g_markup_printf_escaped ("<purchase>" |
|
2258 * "<store>%s</store>" |
|
2259 * "<item>%s</item>" |
|
2260 * "</purchase>", |
|
2261 * store, item); |
|
2262 * </programlisting></informalexample> |
|
2263 * |
|
2264 * Return value: newly allocated result from formatting |
|
2265 * operation. Free with g_free(). |
|
2266 * |
|
2267 * Since: 2.4 |
|
2268 **/ |
|
2269 EXPORT_C char * |
|
2270 g_markup_printf_escaped (const char *format, ...) |
|
2271 { |
|
2272 char *result; |
|
2273 va_list args; |
|
2274 |
|
2275 va_start (args, format); |
|
2276 result = g_markup_vprintf_escaped (format, args); |
|
2277 va_end (args); |
|
2278 |
|
2279 return result; |
|
2280 } |
|
2281 |
|
2282 #define __G_MARKUP_C__ |
|
2283 #include "galiasdef.c" |