glib/libglib/src/gmarkup.c
branchRCL_3
changeset 57 2efc27d87e1c
parent 0 e4d67989cc36
equal deleted inserted replaced
56:acd3cd4aaceb 57:2efc27d87e1c
       
     1 /* gmarkup.c - Simple XML-like parser
       
     2  *
       
     3  *  Copyright 2000, 2003 Red Hat, Inc.
       
     4  * Portions copyright (c) 2006 Nokia Corporation.  All rights reserved.
       
     5  *
       
     6  * GLib is free software; you can redistribute it and/or modify it
       
     7  * under the terms of the GNU Lesser General Public License as
       
     8  * published by the Free Software Foundation; either version 2 of the
       
     9  * License, or (at your option) any later version.
       
    10  *
       
    11  * GLib is distributed in the hope that it will be useful,
       
    12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
       
    13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
       
    14  * Lesser General Public License for more details.
       
    15  *
       
    16  * You should have received a copy of the GNU Lesser General Public
       
    17  * License along with GLib; see the file COPYING.LIB.  If not,
       
    18  * write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
       
    19  *   Boston, MA 02111-1307, USA.
       
    20  */
       
    21 
       
    22 #include "config.h"
       
    23 
       
    24 #include <stdarg.h>
       
    25 #include <string.h>
       
    26 #include <stdio.h>
       
    27 #include <stdlib.h>
       
    28 #include <errno.h>
       
    29 
       
    30 #include "glib.h"
       
    31 #include "galias.h"
       
    32 
       
    33 #include "glibintl.h"
       
    34 
       
    35 #ifdef __SYMBIAN32__
       
    36 #include <glib_wsd.h>
       
    37 #endif
       
    38 
       
    39 
       
    40 #if EMULATOR
       
    41 
       
    42 PLS(error_quark ,g_markup_error_quark,GQuark)
       
    43 #define error_quark  (*FUNCTION_NAME(error_quark ,g_markup_error_quark)())
       
    44 
       
    45 #endif /* EMULATOR */
       
    46 
       
    47 
       
    48 EXPORT_C GQuark
       
    49 g_markup_error_quark (void)
       
    50 {
       
    51   #if !(EMULATOR)
       
    52   static GQuark error_quark = 0;
       
    53   #endif /*EMULATOR */
       
    54 
       
    55   if (error_quark == 0)
       
    56     error_quark = g_quark_from_static_string ("g-markup-error-quark");
       
    57 
       
    58   return error_quark;
       
    59 }
       
    60 
       
    61 #if EMULATOR
       
    62 #undef error_quark
       
    63 #endif /* EMULATOR */
       
    64 
       
    65 typedef enum
       
    66 {
       
    67   STATE_START,
       
    68   STATE_AFTER_OPEN_ANGLE,
       
    69   STATE_AFTER_CLOSE_ANGLE,
       
    70   STATE_AFTER_ELISION_SLASH, /* the slash that obviates need for end element */
       
    71   STATE_INSIDE_OPEN_TAG_NAME,
       
    72   STATE_INSIDE_ATTRIBUTE_NAME,
       
    73   STATE_AFTER_ATTRIBUTE_NAME,
       
    74   STATE_BETWEEN_ATTRIBUTES,
       
    75   STATE_AFTER_ATTRIBUTE_EQUALS_SIGN,
       
    76   STATE_INSIDE_ATTRIBUTE_VALUE_SQ,
       
    77   STATE_INSIDE_ATTRIBUTE_VALUE_DQ,
       
    78   STATE_INSIDE_TEXT,
       
    79   STATE_AFTER_CLOSE_TAG_SLASH,
       
    80   STATE_INSIDE_CLOSE_TAG_NAME,
       
    81   STATE_AFTER_CLOSE_TAG_NAME,
       
    82   STATE_INSIDE_PASSTHROUGH,
       
    83   STATE_ERROR
       
    84 } GMarkupParseState;
       
    85 
       
    86 struct _GMarkupParseContext
       
    87 {
       
    88   const GMarkupParser *parser;
       
    89 
       
    90   GMarkupParseFlags flags;
       
    91 
       
    92   gint line_number;
       
    93   gint char_number;
       
    94 
       
    95   gpointer user_data;
       
    96   GDestroyNotify dnotify;
       
    97 
       
    98   /* A piece of character data or an element that
       
    99    * hasn't "ended" yet so we haven't yet called
       
   100    * the callback for it.
       
   101    */
       
   102   GString *partial_chunk;
       
   103 
       
   104   GMarkupParseState state;
       
   105   GSList *tag_stack;
       
   106   gchar **attr_names;
       
   107   gchar **attr_values;
       
   108   gint cur_attr;
       
   109   gint alloc_attrs;
       
   110 
       
   111   const gchar *current_text;
       
   112   gssize       current_text_len;      
       
   113   const gchar *current_text_end;
       
   114 
       
   115   GString *leftover_char_portion;
       
   116 
       
   117   /* used to save the start of the last interesting thingy */
       
   118   const gchar *start;
       
   119 
       
   120   const gchar *iter;
       
   121 
       
   122   guint document_empty : 1;
       
   123   guint parsing : 1;
       
   124   gint balance;
       
   125 };
       
   126 
       
   127 /**
       
   128  * g_markup_parse_context_new:
       
   129  * @parser: a #GMarkupParser
       
   130  * @flags: one or more #GMarkupParseFlags
       
   131  * @user_data: user data to pass to #GMarkupParser functions
       
   132  * @user_data_dnotify: user data destroy notifier called when the parse context is freed
       
   133  * 
       
   134  * Creates a new parse context. A parse context is used to parse
       
   135  * marked-up documents. You can feed any number of documents into
       
   136  * a context, as long as no errors occur; once an error occurs,
       
   137  * the parse context can't continue to parse text (you have to free it
       
   138  * and create a new parse context).
       
   139  * 
       
   140  * Return value: a new #GMarkupParseContext
       
   141  **/
       
   142 EXPORT_C GMarkupParseContext *
       
   143 g_markup_parse_context_new (const GMarkupParser *parser,
       
   144                             GMarkupParseFlags    flags,
       
   145                             gpointer             user_data,
       
   146                             GDestroyNotify       user_data_dnotify)
       
   147 {
       
   148   GMarkupParseContext *context;
       
   149 
       
   150   g_return_val_if_fail (parser != NULL, NULL);
       
   151   context = g_new (GMarkupParseContext, 1);
       
   152 
       
   153   context->parser = parser;
       
   154   context->flags = flags;
       
   155   context->user_data = user_data;
       
   156   context->dnotify = user_data_dnotify;
       
   157 
       
   158   context->line_number = 1;
       
   159   context->char_number = 1;
       
   160 
       
   161   context->partial_chunk = NULL;
       
   162 
       
   163   context->state = STATE_START;
       
   164   context->tag_stack = NULL;
       
   165   context->attr_names = NULL;
       
   166   context->attr_values = NULL;
       
   167   context->cur_attr = -1;
       
   168   context->alloc_attrs = 0;
       
   169 
       
   170   context->current_text = NULL;
       
   171   context->current_text_len = -1;
       
   172   context->current_text_end = NULL;
       
   173   context->leftover_char_portion = NULL;
       
   174 
       
   175   context->start = NULL;
       
   176   context->iter = NULL;
       
   177 
       
   178   context->document_empty = TRUE;
       
   179   context->parsing = FALSE;
       
   180 
       
   181   context->balance = 0;
       
   182 
       
   183   return context;
       
   184 }
       
   185 
       
   186 /**
       
   187  * g_markup_parse_context_free:
       
   188  * @context: a #GMarkupParseContext
       
   189  * 
       
   190  * Frees a #GMarkupParseContext. Can't be called from inside
       
   191  * one of the #GMarkupParser functions.
       
   192  * 
       
   193  **/
       
   194 EXPORT_C void
       
   195 g_markup_parse_context_free (GMarkupParseContext *context)
       
   196 {
       
   197   g_return_if_fail (context != NULL);
       
   198   g_return_if_fail (!context->parsing);
       
   199 
       
   200   if (context->dnotify)
       
   201     (* context->dnotify) (context->user_data);
       
   202 
       
   203   g_strfreev (context->attr_names);
       
   204   g_strfreev (context->attr_values);
       
   205 
       
   206   g_slist_foreach (context->tag_stack, (GFunc)g_free, NULL);
       
   207   g_slist_free (context->tag_stack);
       
   208 
       
   209   if (context->partial_chunk)
       
   210     g_string_free (context->partial_chunk, TRUE);
       
   211 
       
   212   if (context->leftover_char_portion)
       
   213     g_string_free (context->leftover_char_portion, TRUE);
       
   214 
       
   215   g_free (context);
       
   216 }
       
   217 
       
   218 static void
       
   219 mark_error (GMarkupParseContext *context,
       
   220             GError              *error)
       
   221 {
       
   222   context->state = STATE_ERROR;
       
   223 
       
   224   if (context->parser->error)
       
   225     (*context->parser->error) (context, error, context->user_data);
       
   226 }
       
   227 
       
   228 static void set_error (GMarkupParseContext *context,
       
   229            	       GError             **error,
       
   230            	       GMarkupError         code,
       
   231            	       const gchar         *format,
       
   232            	       ...) G_GNUC_PRINTF (4, 5);
       
   233 
       
   234 static void
       
   235 set_error (GMarkupParseContext *context,
       
   236            GError             **error,
       
   237            GMarkupError         code,
       
   238            const gchar         *format,
       
   239            ...)
       
   240 {
       
   241   GError *tmp_error;
       
   242   gchar *s;
       
   243   va_list args;
       
   244 
       
   245   va_start (args, format);
       
   246   s = g_strdup_vprintf (format, args);
       
   247   va_end (args);
       
   248 
       
   249   tmp_error = g_error_new (G_MARKUP_ERROR,
       
   250                            code,
       
   251                            _("Error on line %d char %d: %s"),
       
   252                            context->line_number,
       
   253                            context->char_number,
       
   254                            s);
       
   255 
       
   256   g_free (s);
       
   257 
       
   258   mark_error (context, tmp_error);
       
   259 
       
   260   g_propagate_error (error, tmp_error);
       
   261 }
       
   262 
       
   263 
       
   264 /* To make these faster, we first use the ascii-only tests, then check
       
   265  * for the usual non-alnum name-end chars, and only then call the
       
   266  * expensive unicode stuff. Nobody uses non-ascii in XML tag/attribute
       
   267  * names, so this is a reasonable hack that virtually always avoids
       
   268  * the guniprop call.
       
   269  */
       
   270 #define IS_COMMON_NAME_END_CHAR(c) \
       
   271   ((c) == '=' || (c) == '/' || (c) == '>' || (c) == ' ')
       
   272 
       
   273 static gboolean
       
   274 is_name_start_char (const gchar *p)
       
   275 {
       
   276   if (g_ascii_isalpha (*p) ||
       
   277       (!IS_COMMON_NAME_END_CHAR (*p) &&
       
   278        (*p == '_' || 
       
   279 	*p == ':' ||
       
   280 	g_unichar_isalpha (g_utf8_get_char (p)))))
       
   281     return TRUE;
       
   282   else
       
   283     return FALSE;
       
   284 }
       
   285 
       
   286 static gboolean
       
   287 is_name_char (const gchar *p)
       
   288 {
       
   289   if (g_ascii_isalnum (*p) ||
       
   290       (!IS_COMMON_NAME_END_CHAR (*p) &&
       
   291        (*p == '.' || 
       
   292 	*p == '-' ||
       
   293 	*p == '_' ||
       
   294 	*p == ':' ||
       
   295 	g_unichar_isalpha (g_utf8_get_char (p)))))
       
   296     return TRUE;
       
   297   else
       
   298     return FALSE;
       
   299 }
       
   300 
       
   301 
       
   302 static gchar*
       
   303 char_str (gunichar c,
       
   304           gchar   *buf)
       
   305 {
       
   306   memset (buf, 0, 8);
       
   307   g_unichar_to_utf8 (c, buf);
       
   308   return buf;
       
   309 }
       
   310 
       
   311 static gchar*
       
   312 utf8_str (const gchar *utf8,
       
   313           gchar       *buf)
       
   314 {
       
   315   char_str (g_utf8_get_char (utf8), buf);
       
   316   return buf;
       
   317 }
       
   318 
       
   319 static void
       
   320 set_unescape_error (GMarkupParseContext *context,
       
   321                     GError             **error,
       
   322                     const gchar         *remaining_text,
       
   323                     const gchar         *remaining_text_end,
       
   324                     GMarkupError         code,
       
   325                     const gchar         *format,
       
   326                     ...)
       
   327 {
       
   328   GError *tmp_error;
       
   329   gchar *s;
       
   330   va_list args;
       
   331   gint remaining_newlines;
       
   332   const gchar *p;
       
   333 
       
   334   remaining_newlines = 0;
       
   335   p = remaining_text;
       
   336   while (p != remaining_text_end)
       
   337     {
       
   338       if (*p == '\n')
       
   339         ++remaining_newlines;
       
   340       ++p;
       
   341     }
       
   342 
       
   343   va_start (args, format);
       
   344   s = g_strdup_vprintf (format, args);
       
   345   va_end (args);
       
   346 
       
   347   tmp_error = g_error_new (G_MARKUP_ERROR,
       
   348                            code,
       
   349                            _("Error on line %d: %s"),
       
   350                            context->line_number - remaining_newlines,
       
   351                            s);
       
   352 
       
   353   g_free (s);
       
   354 
       
   355   mark_error (context, tmp_error);
       
   356 
       
   357   g_propagate_error (error, tmp_error);
       
   358 }
       
   359 
       
   360 typedef enum
       
   361 {
       
   362   USTATE_INSIDE_TEXT,
       
   363   USTATE_AFTER_AMPERSAND,
       
   364   USTATE_INSIDE_ENTITY_NAME,
       
   365   USTATE_AFTER_CHARREF_HASH
       
   366 } UnescapeState;
       
   367 
       
   368 typedef struct
       
   369 {
       
   370   GMarkupParseContext *context;
       
   371   GString *str;
       
   372   UnescapeState state;
       
   373   const gchar *text;
       
   374   const gchar *text_end;
       
   375   const gchar *entity_start;
       
   376 } UnescapeContext;
       
   377 
       
   378 static const gchar*
       
   379 unescape_text_state_inside_text (UnescapeContext *ucontext,
       
   380                                  const gchar     *p,
       
   381                                  GError         **error)
       
   382 {
       
   383   const gchar *start;
       
   384   gboolean normalize_attribute;
       
   385 
       
   386   if (ucontext->context->state == STATE_INSIDE_ATTRIBUTE_VALUE_SQ ||
       
   387       ucontext->context->state == STATE_INSIDE_ATTRIBUTE_VALUE_DQ)
       
   388     normalize_attribute = TRUE;
       
   389   else
       
   390     normalize_attribute = FALSE;
       
   391 
       
   392   start = p;
       
   393   
       
   394   while (p != ucontext->text_end)
       
   395     {
       
   396       if (*p == '&')
       
   397         {
       
   398           break;
       
   399         }
       
   400       else if (normalize_attribute && (*p == '\t' || *p == '\n'))
       
   401         {
       
   402           g_string_append_len (ucontext->str, start, p - start);
       
   403           g_string_append_c (ucontext->str, ' ');
       
   404           p = g_utf8_next_char (p);
       
   405           start = p;
       
   406         }
       
   407       else if (*p == '\r')
       
   408         {
       
   409           g_string_append_len (ucontext->str, start, p - start);
       
   410           g_string_append_c (ucontext->str, normalize_attribute ? ' ' : '\n');
       
   411           p = g_utf8_next_char (p);
       
   412           if (p != ucontext->text_end && *p == '\n')
       
   413             p = g_utf8_next_char (p);
       
   414           start = p;
       
   415         }
       
   416       else
       
   417         p = g_utf8_next_char (p);
       
   418     }
       
   419   
       
   420   if (p != start)
       
   421     g_string_append_len (ucontext->str, start, p - start);
       
   422   
       
   423   if (p != ucontext->text_end && *p == '&')
       
   424     {
       
   425       p = g_utf8_next_char (p);
       
   426       ucontext->state = USTATE_AFTER_AMPERSAND;
       
   427     }
       
   428 
       
   429   return p;
       
   430 }
       
   431 
       
   432 static const gchar*
       
   433 unescape_text_state_after_ampersand (UnescapeContext *ucontext,
       
   434                                      const gchar     *p,
       
   435                                      GError         **error)
       
   436 {
       
   437   ucontext->entity_start = NULL;
       
   438   
       
   439   if (*p == '#')
       
   440     {
       
   441       p = g_utf8_next_char (p);
       
   442 
       
   443       ucontext->entity_start = p;
       
   444       ucontext->state = USTATE_AFTER_CHARREF_HASH;
       
   445     }
       
   446   else if (!is_name_start_char (p))
       
   447     {
       
   448       if (*p == ';')
       
   449         {
       
   450           set_unescape_error (ucontext->context, error,
       
   451                               p, ucontext->text_end,
       
   452                               G_MARKUP_ERROR_PARSE,
       
   453                               _("Empty entity '&;' seen; valid "
       
   454                                 "entities are: &amp; &quot; &lt; &gt; &apos;"));
       
   455         }
       
   456       else
       
   457         {
       
   458           gchar buf[8];
       
   459 
       
   460           set_unescape_error (ucontext->context, error,
       
   461                               p, ucontext->text_end,
       
   462                               G_MARKUP_ERROR_PARSE,
       
   463                               _("Character '%s' is not valid at "
       
   464                                 "the start of an entity name; "
       
   465                                 "the & character begins an entity; "
       
   466                                 "if this ampersand isn't supposed "
       
   467                                 "to be an entity, escape it as "
       
   468                                 "&amp;"),
       
   469                               utf8_str (p, buf));
       
   470         }
       
   471     }
       
   472   else
       
   473     {
       
   474       ucontext->entity_start = p;
       
   475       ucontext->state = USTATE_INSIDE_ENTITY_NAME;
       
   476     }
       
   477 
       
   478   return p;
       
   479 }
       
   480 
       
   481 static const gchar*
       
   482 unescape_text_state_inside_entity_name (UnescapeContext *ucontext,
       
   483                                         const gchar     *p,
       
   484                                         GError         **error)
       
   485 {
       
   486   while (p != ucontext->text_end)
       
   487     {
       
   488       if (*p == ';')
       
   489         break;
       
   490       else if (!is_name_char (p))
       
   491         {
       
   492           gchar ubuf[8];
       
   493 
       
   494           set_unescape_error (ucontext->context, error,
       
   495                               p, ucontext->text_end,
       
   496                               G_MARKUP_ERROR_PARSE,
       
   497                               _("Character '%s' is not valid "
       
   498                                 "inside an entity name"),
       
   499                               utf8_str (p, ubuf));
       
   500           break;
       
   501         }
       
   502 
       
   503       p = g_utf8_next_char (p);
       
   504     }
       
   505 
       
   506   if (ucontext->context->state != STATE_ERROR)
       
   507     {
       
   508       if (p != ucontext->text_end)
       
   509         {
       
   510 	  gint len = p - ucontext->entity_start;
       
   511 
       
   512           /* move to after semicolon */
       
   513           p = g_utf8_next_char (p);
       
   514           ucontext->state = USTATE_INSIDE_TEXT;
       
   515 
       
   516           if (strncmp (ucontext->entity_start, "lt", len) == 0)
       
   517             g_string_append_c (ucontext->str, '<');
       
   518           else if (strncmp (ucontext->entity_start, "gt", len) == 0)
       
   519             g_string_append_c (ucontext->str, '>');
       
   520           else if (strncmp (ucontext->entity_start, "amp", len) == 0)
       
   521             g_string_append_c (ucontext->str, '&');
       
   522           else if (strncmp (ucontext->entity_start, "quot", len) == 0)
       
   523             g_string_append_c (ucontext->str, '"');
       
   524           else if (strncmp (ucontext->entity_start, "apos", len) == 0)
       
   525             g_string_append_c (ucontext->str, '\'');
       
   526           else
       
   527             {
       
   528 	      gchar *name;
       
   529 
       
   530 	      name = g_strndup (ucontext->entity_start, len);
       
   531               set_unescape_error (ucontext->context, error,
       
   532                                   p, ucontext->text_end,
       
   533                                   G_MARKUP_ERROR_PARSE,
       
   534                                   _("Entity name '%s' is not known"),
       
   535                                   name);
       
   536 	      g_free (name);
       
   537             }
       
   538         }
       
   539       else
       
   540         {
       
   541           set_unescape_error (ucontext->context, error,
       
   542                               /* give line number of the & */
       
   543                               ucontext->entity_start, ucontext->text_end,
       
   544                               G_MARKUP_ERROR_PARSE,
       
   545                               _("Entity did not end with a semicolon; "
       
   546                                 "most likely you used an ampersand "
       
   547                                 "character without intending to start "
       
   548                                 "an entity - escape ampersand as &amp;"));
       
   549         }
       
   550     }
       
   551 #undef MAX_ENT_LEN
       
   552 
       
   553   return p;
       
   554 }
       
   555 
       
   556 static const gchar*
       
   557 unescape_text_state_after_charref_hash (UnescapeContext *ucontext,
       
   558                                         const gchar     *p,
       
   559                                         GError         **error)
       
   560 {
       
   561   gboolean is_hex = FALSE;
       
   562   const char *start;
       
   563 
       
   564   start = ucontext->entity_start;
       
   565 
       
   566   if (*p == 'x')
       
   567     {
       
   568       is_hex = TRUE;
       
   569       p = g_utf8_next_char (p);
       
   570       start = p;
       
   571     }
       
   572 
       
   573   while (p != ucontext->text_end && *p != ';')
       
   574     p = g_utf8_next_char (p);
       
   575 
       
   576   if (p != ucontext->text_end)
       
   577     {
       
   578       g_assert (*p == ';');
       
   579 
       
   580       /* digit is between start and p */
       
   581 
       
   582       if (start != p)
       
   583         {
       
   584           gulong l;
       
   585           gchar *end = NULL;
       
   586                     
       
   587           errno = 0;
       
   588           if (is_hex)
       
   589             l = strtoul (start, &end, 16);
       
   590           else
       
   591             l = strtoul (start, &end, 10);
       
   592 
       
   593           if (end != p || errno != 0)
       
   594             {
       
   595               set_unescape_error (ucontext->context, error,
       
   596                                   start, ucontext->text_end,
       
   597                                   G_MARKUP_ERROR_PARSE,
       
   598                                   _("Failed to parse '%-.*s', which "
       
   599                                     "should have been a digit "
       
   600                                     "inside a character reference "
       
   601                                     "(&#234; for example) - perhaps "
       
   602                                     "the digit is too large"),
       
   603                                   p - start, start);
       
   604             }
       
   605           else
       
   606             {
       
   607               /* characters XML permits */
       
   608               if (l == 0x9 ||
       
   609                   l == 0xA ||
       
   610                   l == 0xD ||
       
   611                   (l >= 0x20 && l <= 0xD7FF) ||
       
   612                   (l >= 0xE000 && l <= 0xFFFD) ||
       
   613                   (l >= 0x10000 && l <= 0x10FFFF))
       
   614                 {
       
   615                   gchar buf[8];
       
   616                   g_string_append (ucontext->str, char_str (l, buf));
       
   617                 }
       
   618               else
       
   619                 {
       
   620                   set_unescape_error (ucontext->context, error,
       
   621                                       start, ucontext->text_end,
       
   622                                       G_MARKUP_ERROR_PARSE,
       
   623                                       _("Character reference '%-.*s' does not "
       
   624 					"encode a permitted character"),
       
   625                                       p - start, start);
       
   626                 }
       
   627             }
       
   628 
       
   629           /* Move to next state */
       
   630           p = g_utf8_next_char (p); /* past semicolon */
       
   631           ucontext->state = USTATE_INSIDE_TEXT;
       
   632         }
       
   633       else
       
   634         {
       
   635           set_unescape_error (ucontext->context, error,
       
   636                               start, ucontext->text_end,
       
   637                               G_MARKUP_ERROR_PARSE,
       
   638                               _("Empty character reference; "
       
   639                                 "should include a digit such as "
       
   640                                 "&#454;"));
       
   641         }
       
   642     }
       
   643   else
       
   644     {
       
   645       set_unescape_error (ucontext->context, error,
       
   646                           start, ucontext->text_end,
       
   647                           G_MARKUP_ERROR_PARSE,
       
   648                           _("Character reference did not end with a "
       
   649                             "semicolon; "
       
   650                             "most likely you used an ampersand "
       
   651                             "character without intending to start "
       
   652                             "an entity - escape ampersand as &amp;"));
       
   653     }
       
   654 
       
   655   return p;
       
   656 }
       
   657 
       
   658 static gboolean
       
   659 unescape_text (GMarkupParseContext *context,
       
   660                const gchar         *text,
       
   661                const gchar         *text_end,
       
   662                GString            **unescaped,
       
   663                GError             **error)
       
   664 {
       
   665   UnescapeContext ucontext;
       
   666   const gchar *p;
       
   667 
       
   668   ucontext.context = context;
       
   669   ucontext.text = text;
       
   670   ucontext.text_end = text_end;
       
   671   ucontext.entity_start = NULL;
       
   672   
       
   673   ucontext.str = g_string_sized_new (text_end - text);
       
   674 
       
   675   ucontext.state = USTATE_INSIDE_TEXT;
       
   676   p = text;
       
   677 
       
   678   while (p != text_end && context->state != STATE_ERROR)
       
   679     {
       
   680       g_assert (p < text_end);
       
   681       
       
   682       switch (ucontext.state)
       
   683         {
       
   684         case USTATE_INSIDE_TEXT:
       
   685           {
       
   686             p = unescape_text_state_inside_text (&ucontext,
       
   687                                                  p,
       
   688                                                  error);
       
   689           }
       
   690           break;
       
   691 
       
   692         case USTATE_AFTER_AMPERSAND:
       
   693           {
       
   694             p = unescape_text_state_after_ampersand (&ucontext,
       
   695                                                      p,
       
   696                                                      error);
       
   697           }
       
   698           break;
       
   699 
       
   700 
       
   701         case USTATE_INSIDE_ENTITY_NAME:
       
   702           {
       
   703             p = unescape_text_state_inside_entity_name (&ucontext,
       
   704                                                         p,
       
   705                                                         error);
       
   706           }
       
   707           break;
       
   708 
       
   709         case USTATE_AFTER_CHARREF_HASH:
       
   710           {
       
   711             p = unescape_text_state_after_charref_hash (&ucontext,
       
   712                                                         p,
       
   713                                                         error);
       
   714           }
       
   715           break;
       
   716 
       
   717         default:
       
   718           g_assert_not_reached ();
       
   719           break;
       
   720         }
       
   721     }
       
   722 
       
   723   if (context->state != STATE_ERROR) 
       
   724     {
       
   725       switch (ucontext.state) 
       
   726 	{
       
   727 	case USTATE_INSIDE_TEXT:
       
   728 	  break;
       
   729 	case USTATE_AFTER_AMPERSAND:
       
   730 	case USTATE_INSIDE_ENTITY_NAME:
       
   731 	  set_unescape_error (context, error,
       
   732 			      NULL, NULL,
       
   733 			      G_MARKUP_ERROR_PARSE,
       
   734 			      _("Unfinished entity reference"));
       
   735 	  break;
       
   736 	case USTATE_AFTER_CHARREF_HASH:
       
   737 	  set_unescape_error (context, error,
       
   738 			      NULL, NULL,
       
   739 			      G_MARKUP_ERROR_PARSE,
       
   740 			      _("Unfinished character reference"));
       
   741 	  break;
       
   742 	}
       
   743     }
       
   744 
       
   745   if (context->state == STATE_ERROR)
       
   746     {
       
   747       g_string_free (ucontext.str, TRUE);
       
   748       *unescaped = NULL;
       
   749       return FALSE;
       
   750     }
       
   751   else
       
   752     {
       
   753       *unescaped = ucontext.str;
       
   754       return TRUE;
       
   755     }
       
   756 }
       
   757 
       
   758 static inline gboolean
       
   759 advance_char (GMarkupParseContext *context)
       
   760 {  
       
   761   context->iter = g_utf8_next_char (context->iter);
       
   762   context->char_number += 1;
       
   763 
       
   764   if (context->iter == context->current_text_end)
       
   765     {
       
   766       return FALSE;
       
   767     }
       
   768   else if (*context->iter == '\n')
       
   769     {
       
   770       context->line_number += 1;
       
   771       context->char_number = 1;
       
   772     }
       
   773   
       
   774   return TRUE;
       
   775 }
       
   776 
       
   777 static inline gboolean
       
   778 xml_isspace (char c)
       
   779 {
       
   780   return c == ' ' || c == '\t' || c == '\n' || c == '\r';
       
   781 }
       
   782 
       
   783 static void
       
   784 skip_spaces (GMarkupParseContext *context)
       
   785 {
       
   786   do
       
   787     {
       
   788       if (!xml_isspace (*context->iter))
       
   789         return;
       
   790     }
       
   791   while (advance_char (context));
       
   792 }
       
   793 
       
   794 static void
       
   795 advance_to_name_end (GMarkupParseContext *context)
       
   796 {
       
   797   do
       
   798     {
       
   799       if (!is_name_char (context->iter))
       
   800         return;
       
   801     }
       
   802   while (advance_char (context));
       
   803 }
       
   804 
       
   805 static void
       
   806 add_to_partial (GMarkupParseContext *context,
       
   807                 const gchar         *text_start,
       
   808                 const gchar         *text_end)
       
   809 {
       
   810   if (context->partial_chunk == NULL)
       
   811     context->partial_chunk = g_string_sized_new (text_end - text_start);
       
   812 
       
   813   if (text_start != text_end)
       
   814     g_string_append_len (context->partial_chunk, text_start,
       
   815                          text_end - text_start);
       
   816 
       
   817   /* Invariant here that partial_chunk exists */
       
   818 }
       
   819 
       
   820 static void
       
   821 truncate_partial (GMarkupParseContext *context)
       
   822 {
       
   823   if (context->partial_chunk != NULL)
       
   824     {
       
   825       context->partial_chunk = g_string_truncate (context->partial_chunk, 0);
       
   826     }
       
   827 }
       
   828 
       
   829 static const gchar*
       
   830 current_element (GMarkupParseContext *context)
       
   831 {
       
   832   return context->tag_stack->data;
       
   833 }
       
   834 
       
   835 static const gchar*
       
   836 current_attribute (GMarkupParseContext *context)
       
   837 {
       
   838   g_assert (context->cur_attr >= 0);
       
   839   return context->attr_names[context->cur_attr];
       
   840 }
       
   841 
       
   842 static void
       
   843 find_current_text_end (GMarkupParseContext *context)
       
   844 {
       
   845   /* This function must be safe (non-segfaulting) on invalid UTF8.
       
   846    * It assumes the string starts with a character start
       
   847    */
       
   848   const gchar *end = context->current_text + context->current_text_len;
       
   849   const gchar *p;
       
   850   const gchar *next;
       
   851 
       
   852   g_assert (context->current_text_len > 0);
       
   853 
       
   854   p = g_utf8_find_prev_char (context->current_text, end);
       
   855 
       
   856   g_assert (p != NULL); /* since current_text was a char start */
       
   857 
       
   858   /* p is now the start of the last character or character portion. */
       
   859   g_assert (p != end);
       
   860   next = g_utf8_next_char (p); /* this only touches *p, nothing beyond */
       
   861 
       
   862   if (next == end)
       
   863     {
       
   864       /* whole character */
       
   865       context->current_text_end = end;
       
   866     }
       
   867   else
       
   868     {
       
   869       /* portion */
       
   870       context->leftover_char_portion = g_string_new_len (p, end - p);
       
   871       context->current_text_len -= (end - p);
       
   872       context->current_text_end = p;
       
   873     }
       
   874 }
       
   875 
       
   876 
       
   877 static void
       
   878 add_attribute (GMarkupParseContext *context, char *name)
       
   879 {
       
   880   if (context->cur_attr + 2 >= context->alloc_attrs)
       
   881     {
       
   882       context->alloc_attrs += 5; /* silly magic number */
       
   883       context->attr_names = g_realloc (context->attr_names, sizeof(char*)*context->alloc_attrs);
       
   884       context->attr_values = g_realloc (context->attr_values, sizeof(char*)*context->alloc_attrs);         
       
   885     }
       
   886   context->cur_attr++;
       
   887   context->attr_names[context->cur_attr] = name;
       
   888   context->attr_values[context->cur_attr] = NULL;
       
   889   context->attr_names[context->cur_attr+1] = NULL;
       
   890   context->attr_values[context->cur_attr+1] = NULL;
       
   891 }
       
   892 
       
   893 /**
       
   894  * g_markup_parse_context_parse:
       
   895  * @context: a #GMarkupParseContext
       
   896  * @text: chunk of text to parse
       
   897  * @text_len: length of @text in bytes
       
   898  * @error: return location for a #GError
       
   899  * 
       
   900  * Feed some data to the #GMarkupParseContext. The data need not
       
   901  * be valid UTF-8; an error will be signaled if it's invalid.
       
   902  * The data need not be an entire document; you can feed a document
       
   903  * into the parser incrementally, via multiple calls to this function.
       
   904  * Typically, as you receive data from a network connection or file,
       
   905  * you feed each received chunk of data into this function, aborting
       
   906  * the process if an error occurs. Once an error is reported, no further
       
   907  * data may be fed to the #GMarkupParseContext; all errors are fatal.
       
   908  * 
       
   909  * Return value: %FALSE if an error occurred, %TRUE on success
       
   910  **/
       
   911 EXPORT_C gboolean
       
   912 g_markup_parse_context_parse (GMarkupParseContext *context,
       
   913                               const gchar         *text,
       
   914                               gssize               text_len,
       
   915                               GError             **error)
       
   916 {
       
   917   const gchar *first_invalid;
       
   918   
       
   919   g_return_val_if_fail (context != NULL, FALSE);
       
   920   g_return_val_if_fail (text != NULL, FALSE);
       
   921   g_return_val_if_fail (context->state != STATE_ERROR, FALSE);
       
   922   g_return_val_if_fail (!context->parsing, FALSE);
       
   923   
       
   924   if (text_len < 0)
       
   925     text_len = strlen (text);
       
   926 
       
   927   if (text_len == 0)
       
   928     return TRUE;
       
   929   
       
   930   context->parsing = TRUE;
       
   931   
       
   932   if (context->leftover_char_portion)
       
   933     {
       
   934       const gchar *first_char;
       
   935 
       
   936       if ((*text & 0xc0) != 0x80)
       
   937         first_char = text;
       
   938       else
       
   939         first_char = g_utf8_find_next_char (text, text + text_len);
       
   940 
       
   941       if (first_char)
       
   942         {
       
   943           /* leftover_char_portion was completed. Parse it. */
       
   944           GString *portion = context->leftover_char_portion;
       
   945           
       
   946           g_string_append_len (context->leftover_char_portion,
       
   947                                text, first_char - text);
       
   948 
       
   949           /* hacks to allow recursion */
       
   950           context->parsing = FALSE;
       
   951           context->leftover_char_portion = NULL;
       
   952           
       
   953           if (!g_markup_parse_context_parse (context,
       
   954                                              portion->str, portion->len,
       
   955                                              error))
       
   956             {
       
   957               g_assert (context->state == STATE_ERROR);
       
   958             }
       
   959           
       
   960           g_string_free (portion, TRUE);
       
   961           context->parsing = TRUE;
       
   962 
       
   963           /* Skip the fraction of char that was in this text */
       
   964           text_len -= (first_char - text);
       
   965           text = first_char;
       
   966         }
       
   967       else
       
   968         {
       
   969           /* another little chunk of the leftover char; geez
       
   970            * someone is inefficient.
       
   971            */
       
   972           g_string_append_len (context->leftover_char_portion,
       
   973                                text, text_len);
       
   974 
       
   975           if (context->leftover_char_portion->len > 7)
       
   976             {
       
   977               /* The leftover char portion is too big to be
       
   978                * a UTF-8 character
       
   979                */
       
   980               set_error (context,
       
   981                          error,
       
   982                          G_MARKUP_ERROR_BAD_UTF8,
       
   983                          _("Invalid UTF-8 encoded text"));
       
   984             }
       
   985           
       
   986           goto finished;
       
   987         }
       
   988     }
       
   989 
       
   990   context->current_text = text;
       
   991   context->current_text_len = text_len;
       
   992   context->iter = context->current_text;
       
   993   context->start = context->iter;
       
   994 
       
   995   /* Nothing left after finishing the leftover char, or nothing
       
   996    * passed in to begin with.
       
   997    */
       
   998   if (context->current_text_len == 0)
       
   999     goto finished;
       
  1000 
       
  1001   /* find_current_text_end () assumes the string starts at
       
  1002    * a character start, so we need to validate at least
       
  1003    * that much. It doesn't assume any following bytes
       
  1004    * are valid.
       
  1005    */
       
  1006   if ((*context->current_text & 0xc0) == 0x80) /* not a char start */
       
  1007     {
       
  1008       set_error (context,
       
  1009                  error,
       
  1010                  G_MARKUP_ERROR_BAD_UTF8,
       
  1011                  _("Invalid UTF-8 encoded text"));
       
  1012       goto finished;
       
  1013     }
       
  1014 
       
  1015   /* Initialize context->current_text_end, possibly adjusting
       
  1016    * current_text_len, and add any leftover char portion
       
  1017    */
       
  1018   find_current_text_end (context);
       
  1019 
       
  1020   /* Validate UTF8 (must be done after we find the end, since
       
  1021    * we could have a trailing incomplete char)
       
  1022    */
       
  1023   if (!g_utf8_validate (context->current_text,
       
  1024 			context->current_text_len,
       
  1025 			&first_invalid))
       
  1026     {
       
  1027       gint newlines = 0;
       
  1028       const gchar *p;
       
  1029       p = context->current_text;
       
  1030       while (p != context->current_text_end)
       
  1031         {
       
  1032           if (*p == '\n')
       
  1033             ++newlines;
       
  1034           ++p;
       
  1035         }
       
  1036 
       
  1037       context->line_number += newlines;
       
  1038 
       
  1039       set_error (context,
       
  1040                  error,
       
  1041                  G_MARKUP_ERROR_BAD_UTF8,
       
  1042                  _("Invalid UTF-8 encoded text"));
       
  1043       goto finished;
       
  1044     }
       
  1045 
       
  1046   while (context->iter != context->current_text_end)
       
  1047     {
       
  1048       switch (context->state)
       
  1049         {
       
  1050         case STATE_START:
       
  1051           /* Possible next state: AFTER_OPEN_ANGLE */
       
  1052 
       
  1053           g_assert (context->tag_stack == NULL);
       
  1054 
       
  1055           /* whitespace is ignored outside of any elements */
       
  1056           skip_spaces (context);
       
  1057 
       
  1058           if (context->iter != context->current_text_end)
       
  1059             {
       
  1060               if (*context->iter == '<')
       
  1061                 {
       
  1062                   /* Move after the open angle */
       
  1063                   advance_char (context);
       
  1064 
       
  1065                   context->state = STATE_AFTER_OPEN_ANGLE;
       
  1066 
       
  1067                   /* this could start a passthrough */
       
  1068                   context->start = context->iter;
       
  1069 
       
  1070                   /* document is now non-empty */
       
  1071                   context->document_empty = FALSE;
       
  1072                 }
       
  1073               else
       
  1074                 {
       
  1075                   set_error (context,
       
  1076                              error,
       
  1077                              G_MARKUP_ERROR_PARSE,
       
  1078                              _("Document must begin with an element (e.g. <book>)"));
       
  1079                 }
       
  1080             }
       
  1081           break;
       
  1082 
       
  1083         case STATE_AFTER_OPEN_ANGLE:
       
  1084           /* Possible next states: INSIDE_OPEN_TAG_NAME,
       
  1085            *  AFTER_CLOSE_TAG_SLASH, INSIDE_PASSTHROUGH
       
  1086            */
       
  1087           if (*context->iter == '?' ||
       
  1088               *context->iter == '!')
       
  1089             {
       
  1090               /* include < in the passthrough */
       
  1091               const gchar *openangle = "<";
       
  1092               add_to_partial (context, openangle, openangle + 1);
       
  1093               context->start = context->iter;
       
  1094 	      context->balance = 1;
       
  1095               context->state = STATE_INSIDE_PASSTHROUGH;
       
  1096             }
       
  1097           else if (*context->iter == '/')
       
  1098             {
       
  1099               /* move after it */
       
  1100               advance_char (context);
       
  1101 
       
  1102               context->state = STATE_AFTER_CLOSE_TAG_SLASH;
       
  1103             }
       
  1104           else if (is_name_start_char (context->iter))
       
  1105             {
       
  1106               context->state = STATE_INSIDE_OPEN_TAG_NAME;
       
  1107 
       
  1108               /* start of tag name */
       
  1109               context->start = context->iter;
       
  1110             }
       
  1111           else
       
  1112             {
       
  1113               gchar buf[8];
       
  1114 
       
  1115               set_error (context,
       
  1116                          error,
       
  1117                          G_MARKUP_ERROR_PARSE,
       
  1118                          _("'%s' is not a valid character following "
       
  1119                            "a '<' character; it may not begin an "
       
  1120                            "element name"),
       
  1121                          utf8_str (context->iter, buf));
       
  1122             }
       
  1123           break;
       
  1124 
       
  1125           /* The AFTER_CLOSE_ANGLE state is actually sort of
       
  1126            * broken, because it doesn't correspond to a range
       
  1127            * of characters in the input stream as the others do,
       
  1128            * and thus makes things harder to conceptualize
       
  1129            */
       
  1130         case STATE_AFTER_CLOSE_ANGLE:
       
  1131           /* Possible next states: INSIDE_TEXT, STATE_START */
       
  1132           if (context->tag_stack == NULL)
       
  1133             {
       
  1134               context->start = NULL;
       
  1135               context->state = STATE_START;
       
  1136             }
       
  1137           else
       
  1138             {
       
  1139               context->start = context->iter;
       
  1140               context->state = STATE_INSIDE_TEXT;
       
  1141             }
       
  1142           break;
       
  1143 
       
  1144         case STATE_AFTER_ELISION_SLASH:
       
  1145           /* Possible next state: AFTER_CLOSE_ANGLE */
       
  1146 
       
  1147           {
       
  1148             /* We need to pop the tag stack and call the end_element
       
  1149              * function, since this is the close tag
       
  1150              */
       
  1151             GError *tmp_error = NULL;
       
  1152           
       
  1153             g_assert (context->tag_stack != NULL);
       
  1154 
       
  1155             tmp_error = NULL;
       
  1156             if (context->parser->end_element)
       
  1157               (* context->parser->end_element) (context,
       
  1158                                                 context->tag_stack->data,
       
  1159                                                 context->user_data,
       
  1160                                                 &tmp_error);
       
  1161           
       
  1162             if (tmp_error)
       
  1163               {
       
  1164                 mark_error (context, tmp_error);
       
  1165                 g_propagate_error (error, tmp_error);
       
  1166               }          
       
  1167             else
       
  1168               {
       
  1169                 if (*context->iter == '>')
       
  1170                   {
       
  1171                     /* move after the close angle */
       
  1172                     advance_char (context);
       
  1173                     context->state = STATE_AFTER_CLOSE_ANGLE;
       
  1174                   }
       
  1175                 else
       
  1176                   {
       
  1177                     gchar buf[8];
       
  1178 
       
  1179                     set_error (context,
       
  1180                                error,
       
  1181                                G_MARKUP_ERROR_PARSE,
       
  1182                                _("Odd character '%s', expected a '>' character "
       
  1183                                  "to end the start tag of element '%s'"),
       
  1184                                utf8_str (context->iter, buf),
       
  1185                                current_element (context));
       
  1186                   }
       
  1187               }
       
  1188 
       
  1189             g_free (context->tag_stack->data);
       
  1190             context->tag_stack = g_slist_delete_link (context->tag_stack,
       
  1191                                                       context->tag_stack);
       
  1192           }
       
  1193           break;
       
  1194 
       
  1195         case STATE_INSIDE_OPEN_TAG_NAME:
       
  1196           /* Possible next states: BETWEEN_ATTRIBUTES */
       
  1197 
       
  1198           /* if there's a partial chunk then it's the first part of the
       
  1199            * tag name. If there's a context->start then it's the start
       
  1200            * of the tag name in current_text, the partial chunk goes
       
  1201            * before that start though.
       
  1202            */
       
  1203           advance_to_name_end (context);
       
  1204 
       
  1205           if (context->iter == context->current_text_end)
       
  1206             {
       
  1207               /* The name hasn't necessarily ended. Merge with
       
  1208                * partial chunk, leave state unchanged.
       
  1209                */
       
  1210               add_to_partial (context, context->start, context->iter);
       
  1211             }
       
  1212           else
       
  1213             {
       
  1214               /* The name has ended. Combine it with the partial chunk
       
  1215                * if any; push it on the stack; enter next state.
       
  1216                */
       
  1217               add_to_partial (context, context->start, context->iter);
       
  1218               context->tag_stack =
       
  1219                 g_slist_prepend (context->tag_stack,
       
  1220                                  g_string_free (context->partial_chunk,
       
  1221                                                 FALSE));
       
  1222 
       
  1223               context->partial_chunk = NULL;
       
  1224 
       
  1225               context->state = STATE_BETWEEN_ATTRIBUTES;
       
  1226               context->start = NULL;
       
  1227             }
       
  1228           break;
       
  1229 
       
  1230         case STATE_INSIDE_ATTRIBUTE_NAME:
       
  1231           /* Possible next states: AFTER_ATTRIBUTE_NAME */
       
  1232 
       
  1233           advance_to_name_end (context);
       
  1234 	  add_to_partial (context, context->start, context->iter);
       
  1235 
       
  1236           /* read the full name, if we enter the equals sign state
       
  1237            * then add the attribute to the list (without the value),
       
  1238            * otherwise store a partial chunk to be prepended later.
       
  1239            */
       
  1240           if (context->iter != context->current_text_end)
       
  1241 	    context->state = STATE_AFTER_ATTRIBUTE_NAME;
       
  1242 	  break;
       
  1243 
       
  1244 	case STATE_AFTER_ATTRIBUTE_NAME:
       
  1245           /* Possible next states: AFTER_ATTRIBUTE_EQUALS_SIGN */
       
  1246 
       
  1247 	  skip_spaces (context);
       
  1248 
       
  1249 	  if (context->iter != context->current_text_end)
       
  1250 	    {
       
  1251 	      /* The name has ended. Combine it with the partial chunk
       
  1252 	       * if any; push it on the stack; enter next state.
       
  1253 	       */
       
  1254               add_attribute (context, g_string_free (context->partial_chunk, FALSE));
       
  1255 	      
       
  1256               context->partial_chunk = NULL;
       
  1257               context->start = NULL;
       
  1258 	      
       
  1259               if (*context->iter == '=')
       
  1260                 {
       
  1261                   advance_char (context);
       
  1262                   context->state = STATE_AFTER_ATTRIBUTE_EQUALS_SIGN;
       
  1263                 }
       
  1264               else
       
  1265                 {
       
  1266                   gchar buf[8];
       
  1267 
       
  1268                   set_error (context,
       
  1269                              error,
       
  1270                              G_MARKUP_ERROR_PARSE,
       
  1271                              _("Odd character '%s', expected a '=' after "
       
  1272                                "attribute name '%s' of element '%s'"),
       
  1273                              utf8_str (context->iter, buf),
       
  1274                              current_attribute (context),
       
  1275                              current_element (context));
       
  1276 		  
       
  1277                 }
       
  1278             }
       
  1279           break;
       
  1280 
       
  1281         case STATE_BETWEEN_ATTRIBUTES:
       
  1282           /* Possible next states: AFTER_CLOSE_ANGLE,
       
  1283            * AFTER_ELISION_SLASH, INSIDE_ATTRIBUTE_NAME
       
  1284            */
       
  1285           skip_spaces (context);
       
  1286 
       
  1287           if (context->iter != context->current_text_end)
       
  1288             {
       
  1289               if (*context->iter == '/')
       
  1290                 {
       
  1291                   advance_char (context);
       
  1292                   context->state = STATE_AFTER_ELISION_SLASH;
       
  1293                 }
       
  1294               else if (*context->iter == '>')
       
  1295                 {
       
  1296 
       
  1297                   advance_char (context);
       
  1298                   context->state = STATE_AFTER_CLOSE_ANGLE;
       
  1299                 }
       
  1300               else if (is_name_start_char (context->iter))
       
  1301                 {
       
  1302                   context->state = STATE_INSIDE_ATTRIBUTE_NAME;
       
  1303                   /* start of attribute name */
       
  1304                   context->start = context->iter;
       
  1305                 }
       
  1306               else
       
  1307                 {
       
  1308                   gchar buf[8];
       
  1309 
       
  1310                   set_error (context,
       
  1311                              error,
       
  1312                              G_MARKUP_ERROR_PARSE,
       
  1313                              _("Odd character '%s', expected a '>' or '/' "
       
  1314                                "character to end the start tag of "
       
  1315                                "element '%s', or optionally an attribute; "
       
  1316                                "perhaps you used an invalid character in "
       
  1317                                "an attribute name"),
       
  1318                              utf8_str (context->iter, buf),
       
  1319                              current_element (context));
       
  1320                 }
       
  1321 
       
  1322               /* If we're done with attributes, invoke
       
  1323                * the start_element callback
       
  1324                */
       
  1325               if (context->state == STATE_AFTER_ELISION_SLASH ||
       
  1326                   context->state == STATE_AFTER_CLOSE_ANGLE)
       
  1327                 {
       
  1328                   const gchar *start_name;
       
  1329 		  /* Ugly, but the current code expects an empty array instead of NULL */
       
  1330 		  const gchar *empty = NULL;
       
  1331                   const gchar **attr_names =  &empty;
       
  1332                   const gchar **attr_values = &empty;
       
  1333                   GError *tmp_error;
       
  1334 
       
  1335                   /* Call user callback for element start */
       
  1336                   start_name = current_element (context);
       
  1337 
       
  1338 		  if (context->cur_attr >= 0)
       
  1339 		    {
       
  1340 		      attr_names = (const gchar**)context->attr_names;
       
  1341 		      attr_values = (const gchar**)context->attr_values;
       
  1342 		    }
       
  1343 
       
  1344                   tmp_error = NULL;
       
  1345                   if (context->parser->start_element)
       
  1346                     (* context->parser->start_element) (context,
       
  1347                                                         start_name,
       
  1348                                                         (const gchar **)attr_names,
       
  1349                                                         (const gchar **)attr_values,
       
  1350                                                         context->user_data,
       
  1351                                                         &tmp_error);
       
  1352 
       
  1353                   /* Go ahead and free the attributes. */
       
  1354 		  for (; context->cur_attr >= 0; context->cur_attr--)
       
  1355 		    {
       
  1356 		      int pos = context->cur_attr;
       
  1357 		      g_free (context->attr_names[pos]);
       
  1358 		      g_free (context->attr_values[pos]);
       
  1359 		      context->attr_names[pos] = context->attr_values[pos] = NULL;
       
  1360 		    }
       
  1361                   g_assert (context->cur_attr == -1);
       
  1362                   g_assert (context->attr_names == NULL ||
       
  1363                             context->attr_names[0] == NULL);
       
  1364                   g_assert (context->attr_values == NULL ||
       
  1365                             context->attr_values[0] == NULL);
       
  1366                   
       
  1367                   if (tmp_error != NULL)
       
  1368                     {
       
  1369                       mark_error (context, tmp_error);
       
  1370                       g_propagate_error (error, tmp_error);
       
  1371                     }
       
  1372                 }
       
  1373             }
       
  1374           break;
       
  1375 
       
  1376         case STATE_AFTER_ATTRIBUTE_EQUALS_SIGN:
       
  1377           /* Possible next state: INSIDE_ATTRIBUTE_VALUE_[SQ/DQ] */
       
  1378 
       
  1379 	  skip_spaces (context);
       
  1380 
       
  1381 	  if (context->iter != context->current_text_end)
       
  1382 	    {
       
  1383 	      if (*context->iter == '"')
       
  1384 		{
       
  1385 		  advance_char (context);
       
  1386 		  context->state = STATE_INSIDE_ATTRIBUTE_VALUE_DQ;
       
  1387 		  context->start = context->iter;
       
  1388 		}
       
  1389 	      else if (*context->iter == '\'')
       
  1390 		{
       
  1391 		  advance_char (context);
       
  1392 		  context->state = STATE_INSIDE_ATTRIBUTE_VALUE_SQ;
       
  1393 		  context->start = context->iter;
       
  1394 		}
       
  1395 	      else
       
  1396 		{
       
  1397 		  gchar buf[8];
       
  1398 		  
       
  1399 		  set_error (context,
       
  1400 			     error,
       
  1401 			     G_MARKUP_ERROR_PARSE,
       
  1402 			     _("Odd character '%s', expected an open quote mark "
       
  1403 			       "after the equals sign when giving value for "
       
  1404 			       "attribute '%s' of element '%s'"),
       
  1405 			     utf8_str (context->iter, buf),
       
  1406 			     current_attribute (context),
       
  1407 			     current_element (context));
       
  1408 		}
       
  1409 	    }
       
  1410           break;
       
  1411 
       
  1412         case STATE_INSIDE_ATTRIBUTE_VALUE_SQ:
       
  1413         case STATE_INSIDE_ATTRIBUTE_VALUE_DQ:
       
  1414           /* Possible next states: BETWEEN_ATTRIBUTES */
       
  1415 	  {
       
  1416 	    gchar delim;
       
  1417 
       
  1418 	    if (context->state == STATE_INSIDE_ATTRIBUTE_VALUE_SQ) 
       
  1419 	      {
       
  1420 		delim = '\'';
       
  1421 	      }
       
  1422 	    else 
       
  1423 	      {
       
  1424 		delim = '"';
       
  1425 	      }
       
  1426 
       
  1427 	    do
       
  1428 	      {
       
  1429 		if (*context->iter == delim)
       
  1430 		  break;
       
  1431 	      }
       
  1432 	    while (advance_char (context));
       
  1433 	  }
       
  1434           if (context->iter == context->current_text_end)
       
  1435             {
       
  1436               /* The value hasn't necessarily ended. Merge with
       
  1437                * partial chunk, leave state unchanged.
       
  1438                */
       
  1439               add_to_partial (context, context->start, context->iter);
       
  1440             }
       
  1441           else
       
  1442             {
       
  1443               /* The value has ended at the quote mark. Combine it
       
  1444                * with the partial chunk if any; set it for the current
       
  1445                * attribute.
       
  1446                */
       
  1447               GString *unescaped;
       
  1448               
       
  1449               add_to_partial (context, context->start, context->iter);
       
  1450 
       
  1451               g_assert (context->cur_attr >= 0);
       
  1452               
       
  1453               if (unescape_text (context,
       
  1454                                  context->partial_chunk->str,
       
  1455                                  context->partial_chunk->str +
       
  1456                                  context->partial_chunk->len,
       
  1457                                  &unescaped,
       
  1458                                  error))
       
  1459                 {
       
  1460                   /* success, advance past quote and set state. */
       
  1461                   context->attr_values[context->cur_attr] = g_string_free (unescaped, FALSE);
       
  1462                   advance_char (context);
       
  1463                   context->state = STATE_BETWEEN_ATTRIBUTES;
       
  1464                   context->start = NULL;
       
  1465                 }
       
  1466               
       
  1467               truncate_partial (context);
       
  1468             }
       
  1469           break;
       
  1470 
       
  1471         case STATE_INSIDE_TEXT:
       
  1472           /* Possible next states: AFTER_OPEN_ANGLE */
       
  1473           do
       
  1474             {
       
  1475               if (*context->iter == '<')
       
  1476                 break;
       
  1477             }
       
  1478           while (advance_char (context));
       
  1479 
       
  1480           /* The text hasn't necessarily ended. Merge with
       
  1481            * partial chunk, leave state unchanged.
       
  1482            */
       
  1483 
       
  1484           add_to_partial (context, context->start, context->iter);
       
  1485 
       
  1486           if (context->iter != context->current_text_end)
       
  1487             {
       
  1488               GString *unescaped = NULL;
       
  1489 
       
  1490               /* The text has ended at the open angle. Call the text
       
  1491                * callback.
       
  1492                */
       
  1493               
       
  1494               if (unescape_text (context,
       
  1495                                  context->partial_chunk->str,
       
  1496                                  context->partial_chunk->str +
       
  1497                                  context->partial_chunk->len,
       
  1498                                  &unescaped,
       
  1499                                  error))
       
  1500                 {
       
  1501                   GError *tmp_error = NULL;
       
  1502 
       
  1503                   if (context->parser->text)
       
  1504                     (*context->parser->text) (context,
       
  1505                                               unescaped->str,
       
  1506                                               unescaped->len,
       
  1507                                               context->user_data,
       
  1508                                               &tmp_error);
       
  1509                   
       
  1510                   g_string_free (unescaped, TRUE);
       
  1511 
       
  1512                   if (tmp_error == NULL)
       
  1513                     {
       
  1514                       /* advance past open angle and set state. */
       
  1515                       advance_char (context);
       
  1516                       context->state = STATE_AFTER_OPEN_ANGLE;
       
  1517                       /* could begin a passthrough */
       
  1518                       context->start = context->iter;
       
  1519                     }
       
  1520                   else
       
  1521                     {
       
  1522                       mark_error (context, tmp_error);
       
  1523                       g_propagate_error (error, tmp_error);
       
  1524                     }
       
  1525                 }
       
  1526 
       
  1527               truncate_partial (context);
       
  1528             }
       
  1529           break;
       
  1530 
       
  1531         case STATE_AFTER_CLOSE_TAG_SLASH:
       
  1532           /* Possible next state: INSIDE_CLOSE_TAG_NAME */
       
  1533           if (is_name_start_char (context->iter))
       
  1534             {
       
  1535               context->state = STATE_INSIDE_CLOSE_TAG_NAME;
       
  1536 
       
  1537               /* start of tag name */
       
  1538               context->start = context->iter;
       
  1539             }
       
  1540           else
       
  1541             {
       
  1542               gchar buf[8];
       
  1543 
       
  1544               set_error (context,
       
  1545                          error,
       
  1546                          G_MARKUP_ERROR_PARSE,
       
  1547                          _("'%s' is not a valid character following "
       
  1548                            "the characters '</'; '%s' may not begin an "
       
  1549                            "element name"),
       
  1550                          utf8_str (context->iter, buf),
       
  1551                          utf8_str (context->iter, buf));
       
  1552             }
       
  1553           break;
       
  1554 
       
  1555         case STATE_INSIDE_CLOSE_TAG_NAME:
       
  1556           /* Possible next state: AFTER_CLOSE_TAG_NAME */
       
  1557           advance_to_name_end (context);
       
  1558 	  add_to_partial (context, context->start, context->iter);
       
  1559 
       
  1560           if (context->iter != context->current_text_end)
       
  1561 	    context->state = STATE_AFTER_CLOSE_TAG_NAME;
       
  1562 	  break;
       
  1563 
       
  1564 	case STATE_AFTER_CLOSE_TAG_NAME:
       
  1565           /* Possible next state: AFTER_CLOSE_TAG_SLASH */
       
  1566 
       
  1567 	  skip_spaces (context);
       
  1568 	  
       
  1569 	  if (context->iter != context->current_text_end)
       
  1570 	    {
       
  1571 	      gchar *close_name;
       
  1572 
       
  1573 	      /* The name has ended. Combine it with the partial chunk
       
  1574 	       * if any; check that it matches stack top and pop
       
  1575 	       * stack; invoke proper callback; enter next state.
       
  1576 	       */
       
  1577 	      close_name = g_string_free (context->partial_chunk, FALSE);
       
  1578 	      context->partial_chunk = NULL;
       
  1579               
       
  1580 	      if (*context->iter != '>')
       
  1581 		{
       
  1582 		  gchar buf[8];
       
  1583 
       
  1584 		  set_error (context,
       
  1585 			     error,
       
  1586 			     G_MARKUP_ERROR_PARSE,
       
  1587 			     _("'%s' is not a valid character following "
       
  1588 			       "the close element name '%s'; the allowed "
       
  1589 			       "character is '>'"),
       
  1590 			     utf8_str (context->iter, buf),
       
  1591 			     close_name);
       
  1592 		}
       
  1593 	      else if (context->tag_stack == NULL)
       
  1594 		{
       
  1595 		  set_error (context,
       
  1596 			     error,
       
  1597 			     G_MARKUP_ERROR_PARSE,
       
  1598 			     _("Element '%s' was closed, no element "
       
  1599 			       "is currently open"),
       
  1600 			     close_name);
       
  1601 		}
       
  1602 	      else if (strcmp (close_name, current_element (context)) != 0)
       
  1603 		{
       
  1604 		  set_error (context,
       
  1605 			     error,
       
  1606 			     G_MARKUP_ERROR_PARSE,
       
  1607 			     _("Element '%s' was closed, but the currently "
       
  1608 			       "open element is '%s'"),
       
  1609 			     close_name,
       
  1610 			     current_element (context));
       
  1611 		}
       
  1612 	      else
       
  1613 		{
       
  1614 		  GError *tmp_error;
       
  1615 		  advance_char (context);
       
  1616 		  context->state = STATE_AFTER_CLOSE_ANGLE;
       
  1617 		  context->start = NULL;
       
  1618 		  
       
  1619 		  /* call the end_element callback */
       
  1620 		  tmp_error = NULL;
       
  1621 		  if (context->parser->end_element)
       
  1622 		    (* context->parser->end_element) (context,
       
  1623 						      close_name,
       
  1624 						      context->user_data,
       
  1625 						      &tmp_error);
       
  1626 		  
       
  1627 		  
       
  1628 		  /* Pop the tag stack */
       
  1629 		  g_free (context->tag_stack->data);
       
  1630 		  context->tag_stack = g_slist_delete_link (context->tag_stack,
       
  1631 							    context->tag_stack);
       
  1632 		  
       
  1633 		  if (tmp_error)
       
  1634                     {
       
  1635                       mark_error (context, tmp_error);
       
  1636                       g_propagate_error (error, tmp_error);
       
  1637                     }
       
  1638                 }
       
  1639 	      
       
  1640               g_free (close_name);
       
  1641             }
       
  1642           break;
       
  1643 	  
       
  1644         case STATE_INSIDE_PASSTHROUGH:
       
  1645           /* Possible next state: AFTER_CLOSE_ANGLE */
       
  1646           do
       
  1647             {
       
  1648 	      if (*context->iter == '<') 
       
  1649 		context->balance++;
       
  1650               if (*context->iter == '>') 
       
  1651 		{
       
  1652 		  context->balance--;
       
  1653 		  add_to_partial (context, context->start, context->iter);
       
  1654 		  context->start = context->iter;
       
  1655 		  if ((g_str_has_prefix (context->partial_chunk->str, "<?")
       
  1656 		       && g_str_has_suffix (context->partial_chunk->str, "?")) ||
       
  1657 		      (g_str_has_prefix (context->partial_chunk->str, "<!--")
       
  1658 		       && g_str_has_suffix (context->partial_chunk->str, "--")) ||
       
  1659 		      (g_str_has_prefix (context->partial_chunk->str, "<![CDATA[") 
       
  1660 		       && g_str_has_suffix (context->partial_chunk->str, "]]")) ||
       
  1661 		      (g_str_has_prefix (context->partial_chunk->str, "<!DOCTYPE")
       
  1662 		       && context->balance == 0)) 
       
  1663 		    break;
       
  1664 		}
       
  1665             }
       
  1666           while (advance_char (context));
       
  1667 
       
  1668           if (context->iter == context->current_text_end)
       
  1669             {
       
  1670               /* The passthrough hasn't necessarily ended. Merge with
       
  1671                * partial chunk, leave state unchanged.
       
  1672                */
       
  1673               add_to_partial (context, context->start, context->iter);
       
  1674             }
       
  1675           else
       
  1676             {
       
  1677               /* The passthrough has ended at the close angle. Combine
       
  1678                * it with the partial chunk if any. Call the passthrough
       
  1679                * callback. Note that the open/close angles are
       
  1680                * included in the text of the passthrough.
       
  1681                */
       
  1682               GError *tmp_error = NULL;
       
  1683 
       
  1684               advance_char (context); /* advance past close angle */
       
  1685               add_to_partial (context, context->start, context->iter);
       
  1686 
       
  1687               if (context->parser->passthrough)
       
  1688                 (*context->parser->passthrough) (context,
       
  1689                                                  context->partial_chunk->str,
       
  1690                                                  context->partial_chunk->len,
       
  1691                                                  context->user_data,
       
  1692                                                  &tmp_error);
       
  1693                   
       
  1694               truncate_partial (context);
       
  1695 
       
  1696               if (tmp_error == NULL)
       
  1697                 {
       
  1698                   context->state = STATE_AFTER_CLOSE_ANGLE;
       
  1699                   context->start = context->iter; /* could begin text */
       
  1700                 }
       
  1701               else
       
  1702                 {
       
  1703                   mark_error (context, tmp_error);
       
  1704                   g_propagate_error (error, tmp_error);
       
  1705                 }
       
  1706             }
       
  1707           break;
       
  1708 
       
  1709         case STATE_ERROR:
       
  1710           goto finished;
       
  1711           break;
       
  1712 
       
  1713         default:
       
  1714           g_assert_not_reached ();
       
  1715           break;
       
  1716         }
       
  1717     }
       
  1718 
       
  1719  finished:
       
  1720   context->parsing = FALSE;
       
  1721 
       
  1722   return context->state != STATE_ERROR;
       
  1723 }
       
  1724 
       
  1725 /**
       
  1726  * g_markup_parse_context_end_parse:
       
  1727  * @context: a #GMarkupParseContext
       
  1728  * @error: return location for a #GError
       
  1729  * 
       
  1730  * Signals to the #GMarkupParseContext that all data has been
       
  1731  * fed into the parse context with g_markup_parse_context_parse().
       
  1732  * This function reports an error if the document isn't complete,
       
  1733  * for example if elements are still open.
       
  1734  * 
       
  1735  * Return value: %TRUE on success, %FALSE if an error was set
       
  1736  **/
       
  1737 EXPORT_C gboolean
       
  1738 g_markup_parse_context_end_parse (GMarkupParseContext *context,
       
  1739                                   GError             **error)
       
  1740 {
       
  1741   g_return_val_if_fail (context != NULL, FALSE);
       
  1742   g_return_val_if_fail (!context->parsing, FALSE);
       
  1743   g_return_val_if_fail (context->state != STATE_ERROR, FALSE);
       
  1744 
       
  1745   if (context->partial_chunk != NULL)
       
  1746     {
       
  1747       g_string_free (context->partial_chunk, TRUE);
       
  1748       context->partial_chunk = NULL;
       
  1749     }
       
  1750 
       
  1751   if (context->document_empty)
       
  1752     {
       
  1753       set_error (context, error, G_MARKUP_ERROR_EMPTY,
       
  1754                  _("Document was empty or contained only whitespace"));
       
  1755       return FALSE;
       
  1756     }
       
  1757   
       
  1758   context->parsing = TRUE;
       
  1759   
       
  1760   switch (context->state)
       
  1761     {
       
  1762     case STATE_START:
       
  1763       /* Nothing to do */
       
  1764       break;
       
  1765 
       
  1766     case STATE_AFTER_OPEN_ANGLE:
       
  1767       set_error (context, error, G_MARKUP_ERROR_PARSE,
       
  1768                  _("Document ended unexpectedly just after an open angle bracket '<'"));
       
  1769       break;
       
  1770 
       
  1771     case STATE_AFTER_CLOSE_ANGLE:
       
  1772       if (context->tag_stack != NULL)
       
  1773         {
       
  1774           /* Error message the same as for INSIDE_TEXT */
       
  1775           set_error (context, error, G_MARKUP_ERROR_PARSE,
       
  1776                      _("Document ended unexpectedly with elements still open - "
       
  1777                        "'%s' was the last element opened"),
       
  1778                      current_element (context));
       
  1779         }
       
  1780       break;
       
  1781       
       
  1782     case STATE_AFTER_ELISION_SLASH:
       
  1783       set_error (context, error, G_MARKUP_ERROR_PARSE,
       
  1784                  _("Document ended unexpectedly, expected to see a close angle "
       
  1785                    "bracket ending the tag <%s/>"), current_element (context));
       
  1786       break;
       
  1787 
       
  1788     case STATE_INSIDE_OPEN_TAG_NAME:
       
  1789       set_error (context, error, G_MARKUP_ERROR_PARSE,
       
  1790                  _("Document ended unexpectedly inside an element name"));
       
  1791       break;
       
  1792 
       
  1793     case STATE_INSIDE_ATTRIBUTE_NAME:
       
  1794       set_error (context, error, G_MARKUP_ERROR_PARSE,
       
  1795                  _("Document ended unexpectedly inside an attribute name"));
       
  1796       break;
       
  1797 
       
  1798     case STATE_BETWEEN_ATTRIBUTES:
       
  1799       set_error (context, error, G_MARKUP_ERROR_PARSE,
       
  1800                  _("Document ended unexpectedly inside an element-opening "
       
  1801                    "tag."));
       
  1802       break;
       
  1803 
       
  1804     case STATE_AFTER_ATTRIBUTE_EQUALS_SIGN:
       
  1805       set_error (context, error, G_MARKUP_ERROR_PARSE,
       
  1806                  _("Document ended unexpectedly after the equals sign "
       
  1807                    "following an attribute name; no attribute value"));
       
  1808       break;
       
  1809 
       
  1810     case STATE_INSIDE_ATTRIBUTE_VALUE_SQ:
       
  1811     case STATE_INSIDE_ATTRIBUTE_VALUE_DQ:
       
  1812       set_error (context, error, G_MARKUP_ERROR_PARSE,
       
  1813                  _("Document ended unexpectedly while inside an attribute "
       
  1814                    "value"));
       
  1815       break;
       
  1816 
       
  1817     case STATE_INSIDE_TEXT:
       
  1818       g_assert (context->tag_stack != NULL);
       
  1819       set_error (context, error, G_MARKUP_ERROR_PARSE,
       
  1820                  _("Document ended unexpectedly with elements still open - "
       
  1821                    "'%s' was the last element opened"),
       
  1822                  current_element (context));
       
  1823       break;
       
  1824 
       
  1825     case STATE_AFTER_CLOSE_TAG_SLASH:
       
  1826     case STATE_INSIDE_CLOSE_TAG_NAME:
       
  1827       set_error (context, error, G_MARKUP_ERROR_PARSE,
       
  1828                  _("Document ended unexpectedly inside the close tag for "
       
  1829                    "element '%s'"), current_element (context));
       
  1830       break;
       
  1831 
       
  1832     case STATE_INSIDE_PASSTHROUGH:
       
  1833       set_error (context, error, G_MARKUP_ERROR_PARSE,
       
  1834                  _("Document ended unexpectedly inside a comment or "
       
  1835                    "processing instruction"));
       
  1836       break;
       
  1837 
       
  1838     case STATE_ERROR:
       
  1839     default:
       
  1840       g_assert_not_reached ();
       
  1841       break;
       
  1842     }
       
  1843 
       
  1844   context->parsing = FALSE;
       
  1845 
       
  1846   return context->state != STATE_ERROR;
       
  1847 }
       
  1848 
       
  1849 /**
       
  1850  * g_markup_parse_context_get_element:
       
  1851  * @context: a #GMarkupParseContext
       
  1852  * @returns: the name of the currently open element, or %NULL
       
  1853  *
       
  1854  * Retrieves the name of the currently open element.
       
  1855  *
       
  1856  * Since: 2.2
       
  1857  **/
       
  1858 EXPORT_C G_CONST_RETURN gchar *
       
  1859 g_markup_parse_context_get_element (GMarkupParseContext *context)
       
  1860 {
       
  1861   g_return_val_if_fail (context != NULL, NULL);
       
  1862 
       
  1863   if (context->tag_stack == NULL) 
       
  1864     return NULL;
       
  1865   else
       
  1866     return current_element (context);
       
  1867 } 
       
  1868 
       
  1869 /**
       
  1870  * g_markup_parse_context_get_position:
       
  1871  * @context: a #GMarkupParseContext
       
  1872  * @line_number: return location for a line number, or %NULL
       
  1873  * @char_number: return location for a char-on-line number, or %NULL
       
  1874  *
       
  1875  * Retrieves the current line number and the number of the character on
       
  1876  * that line. Intended for use in error messages; there are no strict
       
  1877  * semantics for what constitutes the "current" line number other than
       
  1878  * "the best number we could come up with for error messages."
       
  1879  * 
       
  1880  **/
       
  1881 EXPORT_C void
       
  1882 g_markup_parse_context_get_position (GMarkupParseContext *context,
       
  1883                                      gint                *line_number,
       
  1884                                      gint                *char_number)
       
  1885 {
       
  1886   g_return_if_fail (context != NULL);
       
  1887 
       
  1888   if (line_number)
       
  1889     *line_number = context->line_number;
       
  1890 
       
  1891   if (char_number)
       
  1892     *char_number = context->char_number;
       
  1893 }
       
  1894 
       
  1895 static void
       
  1896 append_escaped_text (GString     *str,
       
  1897                      const gchar *text,
       
  1898                      gssize       length)    
       
  1899 {
       
  1900   const gchar *p;
       
  1901   const gchar *end;
       
  1902 
       
  1903   p = text;
       
  1904   end = text + length;
       
  1905 
       
  1906   while (p != end)
       
  1907     {
       
  1908       const gchar *next;
       
  1909       next = g_utf8_next_char (p);
       
  1910 
       
  1911       switch (*p)
       
  1912         {
       
  1913         case '&':
       
  1914           g_string_append (str, "&amp;");
       
  1915           break;
       
  1916 
       
  1917         case '<':
       
  1918           g_string_append (str, "&lt;");
       
  1919           break;
       
  1920 
       
  1921         case '>':
       
  1922           g_string_append (str, "&gt;");
       
  1923           break;
       
  1924 
       
  1925         case '\'':
       
  1926           g_string_append (str, "&apos;");
       
  1927           break;
       
  1928 
       
  1929         case '"':
       
  1930           g_string_append (str, "&quot;");
       
  1931           break;
       
  1932 
       
  1933         default:
       
  1934           g_string_append_len (str, p, next - p);
       
  1935           break;
       
  1936         }
       
  1937 
       
  1938       p = next;
       
  1939     }
       
  1940 }
       
  1941 
       
  1942 /**
       
  1943  * g_markup_escape_text:
       
  1944  * @text: some valid UTF-8 text
       
  1945  * @length: length of @text in bytes, or -1 if the text is nul-terminated
       
  1946  * 
       
  1947  * Escapes text so that the markup parser will parse it verbatim.
       
  1948  * Less than, greater than, ampersand, etc. are replaced with the
       
  1949  * corresponding entities. This function would typically be used
       
  1950  * when writing out a file to be parsed with the markup parser.
       
  1951  * 
       
  1952  * Note that this function doesn't protect whitespace and line endings
       
  1953  * from being processed according to the XML rules for normalization
       
  1954  * of line endings and attribute values.
       
  1955  * 
       
  1956  * Return value: a newly allocated string with the escaped text
       
  1957  **/
       
  1958 EXPORT_C gchar*
       
  1959 g_markup_escape_text (const gchar *text,
       
  1960                       gssize       length)  
       
  1961 {
       
  1962   GString *str;
       
  1963 
       
  1964   g_return_val_if_fail (text != NULL, NULL);
       
  1965 
       
  1966   if (length < 0)
       
  1967     length = strlen (text);
       
  1968 
       
  1969   /* prealloc at least as long as original text */
       
  1970   str = g_string_sized_new (length);
       
  1971   append_escaped_text (str, text, length);
       
  1972 
       
  1973   return g_string_free (str, FALSE);
       
  1974 }
       
  1975 
       
  1976 /**
       
  1977  * find_conversion:
       
  1978  * @format: a printf-style format string
       
  1979  * @after: location to store a pointer to the character after
       
  1980  *   the returned conversion. On a %NULL return, returns the
       
  1981  *   pointer to the trailing NUL in the string
       
  1982  * 
       
  1983  * Find the next conversion in a printf-style format string.
       
  1984  * Partially based on code from printf-parser.c,
       
  1985  * Copyright (C) 1999-2000, 2002-2003 Free Software Foundation, Inc.
       
  1986  * 
       
  1987  * Return value: pointer to the next conversion in @format,
       
  1988  *  or %NULL, if none.
       
  1989  **/
       
  1990 static const char *
       
  1991 find_conversion (const char  *format,
       
  1992 		 const char **after)
       
  1993 {
       
  1994   const char *start = format;
       
  1995   const char *cp;
       
  1996   
       
  1997   while (*start != '\0' && *start != '%')
       
  1998     start++;
       
  1999 
       
  2000   if (*start == '\0')
       
  2001     {
       
  2002       *after = start;
       
  2003       return NULL;
       
  2004     }
       
  2005 
       
  2006   cp = start + 1;
       
  2007 
       
  2008   if (*cp == '\0')
       
  2009     {
       
  2010       *after = cp;
       
  2011       return NULL;
       
  2012     }
       
  2013   
       
  2014   /* Test for positional argument.  */
       
  2015   if (*cp >= '0' && *cp <= '9')
       
  2016     {
       
  2017       const char *np;
       
  2018       
       
  2019       for (np = cp; *np >= '0' && *np <= '9'; np++)
       
  2020 	;
       
  2021       if (*np == '$')
       
  2022 	cp = np + 1;
       
  2023     }
       
  2024 
       
  2025   /* Skip the flags.  */
       
  2026   for (;;)
       
  2027     {
       
  2028       if (*cp == '\'' ||
       
  2029 	  *cp == '-' ||
       
  2030 	  *cp == '+' ||
       
  2031 	  *cp == ' ' ||
       
  2032 	  *cp == '#' ||
       
  2033 	  *cp == '0')
       
  2034 	cp++;
       
  2035       else
       
  2036 	break;
       
  2037     }
       
  2038 
       
  2039   /* Skip the field width.  */
       
  2040   if (*cp == '*')
       
  2041     {
       
  2042       cp++;
       
  2043 
       
  2044       /* Test for positional argument.  */
       
  2045       if (*cp >= '0' && *cp <= '9')
       
  2046 	{
       
  2047 	  const char *np;
       
  2048 
       
  2049 	  for (np = cp; *np >= '0' && *np <= '9'; np++)
       
  2050 	    ;
       
  2051 	  if (*np == '$')
       
  2052 	    cp = np + 1;
       
  2053 	}
       
  2054     }
       
  2055   else
       
  2056     {
       
  2057       for (; *cp >= '0' && *cp <= '9'; cp++)
       
  2058 	;
       
  2059     }
       
  2060 
       
  2061   /* Skip the precision.  */
       
  2062   if (*cp == '.')
       
  2063     {
       
  2064       cp++;
       
  2065       if (*cp == '*')
       
  2066 	{
       
  2067 	  /* Test for positional argument.  */
       
  2068 	  if (*cp >= '0' && *cp <= '9')
       
  2069 	    {
       
  2070 	      const char *np;
       
  2071 
       
  2072 	      for (np = cp; *np >= '0' && *np <= '9'; np++)
       
  2073 		;
       
  2074 	      if (*np == '$')
       
  2075 		cp = np + 1;
       
  2076 	    }
       
  2077 	}
       
  2078       else
       
  2079 	{
       
  2080 	  for (; *cp >= '0' && *cp <= '9'; cp++)
       
  2081 	    ;
       
  2082 	}
       
  2083     }
       
  2084 
       
  2085   /* Skip argument type/size specifiers.  */
       
  2086   while (*cp == 'h' ||
       
  2087 	 *cp == 'L' ||
       
  2088 	 *cp == 'l' ||
       
  2089 	 *cp == 'j' ||
       
  2090 	 *cp == 'z' ||
       
  2091 	 *cp == 'Z' ||
       
  2092 	 *cp == 't')
       
  2093     cp++;
       
  2094 	  
       
  2095   /* Skip the conversion character.  */
       
  2096   cp++;
       
  2097 
       
  2098   *after = cp;
       
  2099   return start;
       
  2100 }
       
  2101 
       
  2102 /**
       
  2103  * g_markup_vprintf_escaped:
       
  2104  * @format: printf() style format string
       
  2105  * @args: variable argument list, similar to vprintf()
       
  2106  * 
       
  2107  * Formats the data in @args according to @format, escaping
       
  2108  * all string and character arguments in the fashion
       
  2109  * of g_markup_escape_text(). See g_markup_printf_escaped().
       
  2110  * 
       
  2111  * Return value: newly allocated result from formatting
       
  2112  *  operation. Free with g_free().
       
  2113  *
       
  2114  * Since: 2.4
       
  2115  **/
       
  2116 EXPORT_C char *
       
  2117 g_markup_vprintf_escaped (const char *format,
       
  2118 			  va_list     args)
       
  2119 {
       
  2120   GString *format1;
       
  2121   GString *format2;
       
  2122   GString *result = NULL;
       
  2123   gchar *output1 = NULL;
       
  2124   gchar *output2 = NULL;
       
  2125   const char *p, *op1, *op2;
       
  2126   va_list args2;
       
  2127 
       
  2128   /* The technique here, is that we make two format strings that
       
  2129    * have the identical conversions in the identical order to the
       
  2130    * original strings, but differ in the text in-between. We
       
  2131    * then use the normal g_strdup_vprintf() to format the arguments
       
  2132    * with the two new format strings. By comparing the results,
       
  2133    * we can figure out what segments of the output come from
       
  2134    * the the original format string, and what from the arguments,
       
  2135    * and thus know what portions of the string to escape.
       
  2136    *
       
  2137    * For instance, for:
       
  2138    *
       
  2139    *  g_markup_printf_escaped ("%s ate %d apples", "Susan & Fred", 5);
       
  2140    *
       
  2141    * We form the two format strings "%sX%dX" and %sY%sY". The results
       
  2142    * of formatting with those two strings are
       
  2143    *
       
  2144    * "%sX%dX" => "Susan & FredX5X"
       
  2145    * "%sY%dY" => "Susan & FredY5Y"
       
  2146    *
       
  2147    * To find the span of the first argument, we find the first position
       
  2148    * where the two arguments differ, which tells us that the first
       
  2149    * argument formatted to "Susan & Fred". We then escape that
       
  2150    * to "Susan &amp; Fred" and join up with the intermediate portions
       
  2151    * of the format string and the second argument to get
       
  2152    * "Susan &amp; Fred ate 5 apples".
       
  2153    */
       
  2154 
       
  2155   /* Create the two modified format strings
       
  2156    */
       
  2157   format1 = g_string_new (NULL);
       
  2158   format2 = g_string_new (NULL);
       
  2159   p = format;
       
  2160   while (TRUE)
       
  2161     {
       
  2162       const char *after;
       
  2163       const char *conv = find_conversion (p, &after);
       
  2164       if (!conv)
       
  2165 	break;
       
  2166 
       
  2167       g_string_append_len (format1, conv, after - conv);
       
  2168       g_string_append_c (format1, 'X');
       
  2169       g_string_append_len (format2, conv, after - conv);
       
  2170       g_string_append_c (format2, 'Y');
       
  2171 
       
  2172       p = after;
       
  2173     }
       
  2174 
       
  2175   /* Use them to format the arguments
       
  2176    */
       
  2177   G_VA_COPY (args2, args);
       
  2178   
       
  2179   output1 = g_strdup_vprintf (format1->str, args);
       
  2180   va_end (args);
       
  2181   if (!output1)
       
  2182     goto cleanup;
       
  2183   
       
  2184   output2 = g_strdup_vprintf (format2->str, args2);
       
  2185   va_end (args2);
       
  2186   if (!output2)
       
  2187     goto cleanup;
       
  2188 
       
  2189   result = g_string_new (NULL);
       
  2190 
       
  2191   /* Iterate through the original format string again,
       
  2192    * copying the non-conversion portions and the escaped
       
  2193    * converted arguments to the output string.
       
  2194    */
       
  2195   op1 = output1;
       
  2196   op2 = output2;
       
  2197   p = format;
       
  2198   while (TRUE)
       
  2199     {
       
  2200       const char *after;
       
  2201       const char *output_start;
       
  2202       const char *conv = find_conversion (p, &after);
       
  2203       char *escaped;
       
  2204       
       
  2205       if (!conv)	/* The end, after points to the trailing \0 */
       
  2206 	{
       
  2207 	  g_string_append_len (result, p, after - p);
       
  2208 	  break;
       
  2209 	}
       
  2210 
       
  2211       g_string_append_len (result, p, conv - p);
       
  2212       output_start = op1;
       
  2213       while (*op1 == *op2)
       
  2214 	{
       
  2215 	  op1++;
       
  2216 	  op2++;
       
  2217 	}
       
  2218       
       
  2219       escaped = g_markup_escape_text (output_start, op1 - output_start);
       
  2220       g_string_append (result, escaped);
       
  2221       g_free (escaped);
       
  2222       
       
  2223       p = after;
       
  2224       op1++;
       
  2225       op2++;
       
  2226     }
       
  2227 
       
  2228  cleanup:
       
  2229   g_string_free (format1, TRUE);
       
  2230   g_string_free (format2, TRUE);
       
  2231   g_free (output1);
       
  2232   g_free (output2);
       
  2233 
       
  2234   if (result)
       
  2235     return g_string_free (result, FALSE);
       
  2236   else
       
  2237     return NULL;
       
  2238 }
       
  2239 
       
  2240 /**
       
  2241  * g_markup_printf_escaped:
       
  2242  * @format: printf() style format string
       
  2243  * @Varargs: the arguments to insert in the format string
       
  2244  * 
       
  2245  * Formats arguments according to @format, escaping
       
  2246  * all string and character arguments in the fashion
       
  2247  * of g_markup_escape_text(). This is useful when you
       
  2248  * want to insert literal strings into XML-style markup
       
  2249  * output, without having to worry that the strings
       
  2250  * might themselves contain markup.
       
  2251  *
       
  2252  * <informalexample><programlisting>
       
  2253  * const char *store = "Fortnum &amp; Mason";
       
  2254  * const char *item = "Tea";
       
  2255  * char *output;
       
  2256  * &nbsp;
       
  2257  * output = g_markup_printf_escaped ("&lt;purchase&gt;"
       
  2258  *                                   "&lt;store&gt;&percnt;s&lt;/store&gt;"
       
  2259  *                                   "&lt;item&gt;&percnt;s&lt;/item&gt;"
       
  2260  *                                   "&lt;/purchase&gt;",
       
  2261  *                                   store, item);
       
  2262  * </programlisting></informalexample>
       
  2263  * 
       
  2264  * Return value: newly allocated result from formatting
       
  2265  *  operation. Free with g_free().
       
  2266  *
       
  2267  * Since: 2.4
       
  2268  **/
       
  2269 EXPORT_C char *
       
  2270 g_markup_printf_escaped (const char *format, ...)
       
  2271 {
       
  2272   char *result;
       
  2273   va_list args;
       
  2274   
       
  2275   va_start (args, format);
       
  2276   result = g_markup_vprintf_escaped (format, args);
       
  2277   va_end (args);
       
  2278 
       
  2279   return result;
       
  2280 }
       
  2281 
       
  2282 #define __G_MARKUP_C__
       
  2283 #include "galiasdef.c"