glib/libglib/src/gconvert.c
changeset 0 e4d67989cc36
equal deleted inserted replaced
-1:000000000000 0:e4d67989cc36
       
     1 /* GLIB - Library of useful routines for C programming
       
     2  * 
       
     3  * gconvert.c: Convert between character sets using iconv
       
     4  * Copyright Red Hat Inc., 2000
       
     5  * Authors: Havoc Pennington <hp@redhat.com>, Owen Taylor <otaylor@redhat.com
       
     6  * Portions copyright (c) 2006 Nokia Corporation.  All rights reserved.
       
     7  *
       
     8  * This library is free software; you can redistribute it and/or
       
     9  * modify it under the terms of the GNU Lesser General Public
       
    10  * License as published by the Free Software Foundation; either
       
    11  * version 2 of the License, or (at your option) any later version.
       
    12  *
       
    13  * This library is distributed in the hope that it will be useful,
       
    14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
       
    15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
       
    16  * Lesser General Public License for more details.
       
    17  *
       
    18  * You should have received a copy of the GNU Lesser General Public
       
    19  * License along with this library; if not, write to the
       
    20  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
       
    21  * Boston, MA 02111-1307, USA.
       
    22  */
       
    23 
       
    24 #include "config.h"
       
    25 
       
    26 #include <iconv.h>
       
    27 #include <errno.h>
       
    28 #include <stdio.h>
       
    29 #include <string.h>
       
    30 #include <stdlib.h>
       
    31 
       
    32 #include "glib.h"
       
    33 #include "gprintfint.h"
       
    34 #include "gthreadinit.h"
       
    35 #ifdef __SYMBIAN32__
       
    36 #include <glib_wsd.h>
       
    37 #endif
       
    38 
       
    39 #ifdef G_PLATFORM_WIN32
       
    40 #define STRICT
       
    41 #include <windows.h>
       
    42 #undef STRICT
       
    43 #endif
       
    44 
       
    45 #include "glibintl.h"
       
    46 
       
    47 #if defined(USE_LIBICONV_GNU) && !defined (_LIBICONV_H)
       
    48 #error GNU libiconv in use but included iconv.h not from libiconv
       
    49 #endif
       
    50 #if !defined(USE_LIBICONV_GNU) && defined (_LIBICONV_H)
       
    51 #error GNU libiconv not in use but included iconv.h is from libiconv
       
    52 #endif
       
    53 
       
    54 #include "galias.h"
       
    55 
       
    56 #if EMULATOR
       
    57 
       
    58 PLS(quark,g_convert_error_quark,GQuark)
       
    59 #define quark (*FUNCTION_NAME(quark,g_convert_error_quark)())
       
    60 
       
    61 #endif /* EMULATOR */
       
    62 
       
    63 EXPORT_C GQuark 
       
    64 g_convert_error_quark (void)
       
    65 {
       
    66   #if!EMULATOR
       
    67   
       
    68   static GQuark quark;
       
    69   
       
    70   #endif /* EMULATOR */
       
    71   
       
    72   if (!quark)
       
    73     quark = g_quark_from_static_string ("g_convert_error");
       
    74 
       
    75   return quark;
       
    76 }
       
    77 #if EMULATOR
       
    78 #undef quark
       
    79 #endif /* EMULATOR */
       
    80 
       
    81 static gboolean
       
    82 try_conversion (const char *to_codeset,
       
    83 		const char *from_codeset,
       
    84 		iconv_t    *cd)
       
    85 {
       
    86   *cd = iconv_open (to_codeset, from_codeset);
       
    87 
       
    88   if (*cd == (iconv_t)-1 && errno == EINVAL)
       
    89     return FALSE;
       
    90   else
       
    91     return TRUE;
       
    92 }
       
    93 
       
    94 static gboolean
       
    95 try_to_aliases (const char **to_aliases,
       
    96 		const char  *from_codeset,
       
    97 		iconv_t     *cd)
       
    98 {
       
    99   if (to_aliases)
       
   100     {
       
   101       const char **p = to_aliases;
       
   102       while (*p)
       
   103 	{
       
   104 	  if (try_conversion (*p, from_codeset, cd))
       
   105 	    return TRUE;
       
   106 
       
   107 	  p++;
       
   108 	}
       
   109     }
       
   110 
       
   111   return FALSE;
       
   112 }
       
   113 
       
   114 extern const char **_g_charset_get_aliases (const char *canonical_name) G_GNUC_INTERNAL;
       
   115 
       
   116 /**
       
   117  * g_iconv_open:
       
   118  * @to_codeset: destination codeset
       
   119  * @from_codeset: source codeset
       
   120  * 
       
   121  * Same as the standard UNIX routine iconv_open(), but
       
   122  * may be implemented via libiconv on UNIX flavors that lack
       
   123  * a native implementation.
       
   124  * 
       
   125  * GLib provides g_convert() and g_locale_to_utf8() which are likely
       
   126  * more convenient than the raw iconv wrappers.
       
   127  * 
       
   128  * Return value: a "conversion descriptor", or (GIConv)-1 if
       
   129  *  opening the converter failed.
       
   130  **/
       
   131 EXPORT_C GIConv
       
   132 g_iconv_open (const gchar  *to_codeset,
       
   133 	      const gchar  *from_codeset)
       
   134 {
       
   135   iconv_t cd;
       
   136   
       
   137   if (!try_conversion (to_codeset, from_codeset, &cd))
       
   138     {
       
   139       const char **to_aliases = _g_charset_get_aliases (to_codeset);
       
   140       const char **from_aliases = _g_charset_get_aliases (from_codeset);
       
   141 
       
   142       if (from_aliases)
       
   143 	{
       
   144 	  const char **p = from_aliases;
       
   145 	  while (*p)
       
   146 	    {
       
   147 	      if (try_conversion (to_codeset, *p, &cd))
       
   148 		goto out;
       
   149 
       
   150 	      if (try_to_aliases (to_aliases, *p, &cd))
       
   151 		goto out;
       
   152 
       
   153 	      p++;
       
   154 	    }
       
   155 	}
       
   156 
       
   157       if (try_to_aliases (to_aliases, from_codeset, &cd))
       
   158 	goto out;
       
   159     }
       
   160 
       
   161  out:
       
   162   return (cd == (iconv_t)-1) ? (GIConv)-1 : (GIConv)cd;
       
   163 }
       
   164 
       
   165 /**
       
   166  * g_iconv:
       
   167  * @converter: conversion descriptor from g_iconv_open()
       
   168  * @inbuf: bytes to convert
       
   169  * @inbytes_left: inout parameter, bytes remaining to convert in @inbuf
       
   170  * @outbuf: converted output bytes
       
   171  * @outbytes_left: inout parameter, bytes available to fill in @outbuf
       
   172  * 
       
   173  * Same as the standard UNIX routine iconv(), but
       
   174  * may be implemented via libiconv on UNIX flavors that lack
       
   175  * a native implementation.
       
   176  *
       
   177  * GLib provides g_convert() and g_locale_to_utf8() which are likely
       
   178  * more convenient than the raw iconv wrappers.
       
   179  * 
       
   180  * Return value: count of non-reversible conversions, or -1 on error
       
   181  **/
       
   182 EXPORT_C size_t 
       
   183 g_iconv (GIConv   converter,
       
   184 	 gchar  **inbuf,
       
   185 	 gsize   *inbytes_left,
       
   186 	 gchar  **outbuf,
       
   187 	 gsize   *outbytes_left)
       
   188 {
       
   189   iconv_t cd = (iconv_t)converter;
       
   190 
       
   191   return iconv (cd, FIX_CASTING(const char **)inbuf, inbytes_left, outbuf, outbytes_left);
       
   192 }
       
   193 
       
   194 /**
       
   195  * g_iconv_close:
       
   196  * @converter: a conversion descriptor from g_iconv_open()
       
   197  *
       
   198  * Same as the standard UNIX routine iconv_close(), but
       
   199  * may be implemented via libiconv on UNIX flavors that lack
       
   200  * a native implementation. Should be called to clean up
       
   201  * the conversion descriptor from g_iconv_open() when
       
   202  * you are done converting things.
       
   203  *
       
   204  * GLib provides g_convert() and g_locale_to_utf8() which are likely
       
   205  * more convenient than the raw iconv wrappers.
       
   206  * 
       
   207  * Return value: -1 on error, 0 on success
       
   208  **/
       
   209 EXPORT_C gint
       
   210 g_iconv_close (GIConv converter)
       
   211 {
       
   212   iconv_t cd = (iconv_t)converter;
       
   213 
       
   214   return iconv_close (cd);
       
   215 }
       
   216 
       
   217 
       
   218 #ifdef NEED_ICONV_CACHE
       
   219 
       
   220 #define ICONV_CACHE_SIZE   (16)
       
   221 
       
   222 struct _iconv_cache_bucket {
       
   223   gchar *key;
       
   224   guint32 refcount;
       
   225   gboolean used;
       
   226   GIConv cd;
       
   227 };
       
   228 
       
   229 static GList *iconv_cache_list;
       
   230 static GHashTable *iconv_cache;
       
   231 static GHashTable *iconv_open_hash;
       
   232 static guint iconv_cache_size = 0;
       
   233 G_LOCK_DEFINE_STATIC (iconv_cache_lock);
       
   234 
       
   235 /* caller *must* hold the iconv_cache_lock */
       
   236 static void
       
   237 iconv_cache_init (void)
       
   238 {
       
   239   static gboolean initialized = FALSE;
       
   240   
       
   241   if (initialized)
       
   242     return;
       
   243   
       
   244   iconv_cache_list = NULL;
       
   245   iconv_cache = g_hash_table_new (g_str_hash, g_str_equal);
       
   246   iconv_open_hash = g_hash_table_new (g_direct_hash, g_direct_equal);
       
   247   
       
   248   initialized = TRUE;
       
   249 }
       
   250 
       
   251 
       
   252 /**
       
   253  * iconv_cache_bucket_new:
       
   254  * @key: cache key
       
   255  * @cd: iconv descriptor
       
   256  *
       
   257  * Creates a new cache bucket, inserts it into the cache and
       
   258  * increments the cache size.
       
   259  *
       
   260  * Returns a pointer to the newly allocated cache bucket.
       
   261  **/
       
   262 static struct _iconv_cache_bucket *
       
   263 iconv_cache_bucket_new (const gchar *key, GIConv cd)
       
   264 {
       
   265   struct _iconv_cache_bucket *bucket;
       
   266   bucket = g_new (struct _iconv_cache_bucket, 1);
       
   267   bucket->key = g_strdup (key);
       
   268   bucket->refcount = 1;
       
   269   bucket->used = TRUE;
       
   270   bucket->cd = cd;
       
   271   
       
   272   g_hash_table_insert (iconv_cache, bucket->key, bucket);
       
   273   
       
   274   /* FIXME: if we sorted the list so items with few refcounts were
       
   275      first, then we could expire them faster in iconv_cache_expire_unused () */
       
   276   iconv_cache_list = g_list_prepend (iconv_cache_list, bucket);
       
   277   
       
   278   iconv_cache_size++;
       
   279   
       
   280   return bucket;
       
   281 }
       
   282 
       
   283 
       
   284 /**
       
   285  * iconv_cache_bucket_expire:
       
   286  * @node: cache bucket's node
       
   287  * @bucket: cache bucket
       
   288  *
       
   289  * Expires a single cache bucket @bucket. This should only ever be
       
   290  * called on a bucket that currently has no used iconv descriptors
       
   291  * open.
       
   292  *
       
   293  * @node is not a required argument. If @node is not supplied, we
       
   294  * search for it ourselves.
       
   295  **/
       
   296 static void
       
   297 iconv_cache_bucket_expire (GList *node, struct _iconv_cache_bucket *bucket)
       
   298 {
       
   299   g_hash_table_remove (iconv_cache, bucket->key);
       
   300   
       
   301   if (node == NULL)
       
   302     node = g_list_find (iconv_cache_list, bucket);
       
   303   
       
   304   g_assert (node != NULL);
       
   305   
       
   306   if (node->prev)
       
   307     {
       
   308       node->prev->next = node->next;
       
   309       if (node->next)
       
   310         node->next->prev = node->prev;
       
   311     }
       
   312   else
       
   313     {
       
   314       iconv_cache_list = node->next;
       
   315       if (node->next)
       
   316         node->next->prev = NULL;
       
   317     }
       
   318   
       
   319   g_list_free_1 (node);
       
   320   
       
   321   g_free (bucket->key);
       
   322   g_iconv_close (bucket->cd);
       
   323   g_free (bucket);
       
   324   
       
   325   iconv_cache_size--;
       
   326 }
       
   327 
       
   328 
       
   329 /**
       
   330  * iconv_cache_expire_unused:
       
   331  *
       
   332  * Expires as many unused cache buckets as it needs to in order to get
       
   333  * the total number of buckets < ICONV_CACHE_SIZE.
       
   334  **/
       
   335 static void
       
   336 iconv_cache_expire_unused (void)
       
   337 {
       
   338   struct _iconv_cache_bucket *bucket;
       
   339   GList *node, *next;
       
   340   
       
   341   node = iconv_cache_list;
       
   342   while (node && iconv_cache_size >= ICONV_CACHE_SIZE)
       
   343     {
       
   344       next = node->next;
       
   345       
       
   346       bucket = node->data;
       
   347       if (bucket->refcount == 0)
       
   348         iconv_cache_bucket_expire (node, bucket);
       
   349       
       
   350       node = next;
       
   351     }
       
   352 }
       
   353 
       
   354 static GIConv
       
   355 open_converter (const gchar *to_codeset,
       
   356 		const gchar *from_codeset,
       
   357 		GError     **error)
       
   358 {
       
   359   struct _iconv_cache_bucket *bucket;
       
   360   gchar *key;
       
   361   GIConv cd;
       
   362   
       
   363   /* create our key */
       
   364   key = g_alloca (strlen (from_codeset) + strlen (to_codeset) + 2);
       
   365   _g_sprintf (key, "%s:%s", from_codeset, to_codeset);
       
   366   
       
   367   G_LOCK (iconv_cache_lock);
       
   368   
       
   369   /* make sure the cache has been initialized */
       
   370   iconv_cache_init ();
       
   371   
       
   372   bucket = g_hash_table_lookup (iconv_cache, key);
       
   373   if (bucket)
       
   374     {
       
   375       if (bucket->used)
       
   376         {
       
   377           cd = g_iconv_open (to_codeset, from_codeset);
       
   378           if (cd == (GIConv) -1)
       
   379             goto error;
       
   380         }
       
   381       else
       
   382         {
       
   383 	  /* Apparently iconv on Solaris <= 7 segfaults if you pass in
       
   384 	   * NULL for anything but inbuf; work around that. (NULL outbuf
       
   385 	   * or NULL *outbuf is allowed by Unix98.)
       
   386 	   */
       
   387 	  gsize inbytes_left = 0;
       
   388 	  gchar *outbuf = NULL;
       
   389 	  gsize outbytes_left = 0;
       
   390 		
       
   391           cd = bucket->cd;
       
   392           bucket->used = TRUE;
       
   393           
       
   394           /* reset the descriptor */
       
   395           g_iconv (cd, NULL, &inbytes_left, &outbuf, &outbytes_left);
       
   396         }
       
   397       
       
   398       bucket->refcount++;
       
   399     }
       
   400   else
       
   401     {
       
   402       cd = g_iconv_open (to_codeset, from_codeset);
       
   403       if (cd == (GIConv) -1)
       
   404         goto error;
       
   405       
       
   406       iconv_cache_expire_unused ();
       
   407       
       
   408       bucket = iconv_cache_bucket_new (key, cd);
       
   409     }
       
   410   
       
   411   g_hash_table_insert (iconv_open_hash, cd, bucket->key);
       
   412   
       
   413   G_UNLOCK (iconv_cache_lock);
       
   414   
       
   415   return cd;
       
   416   
       
   417  error:
       
   418   
       
   419   G_UNLOCK (iconv_cache_lock);
       
   420   
       
   421   /* Something went wrong.  */
       
   422   if (error)
       
   423     {
       
   424       if (errno == EINVAL)
       
   425 	g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_NO_CONVERSION,
       
   426 		     _("Conversion from character set '%s' to '%s' is not supported"),
       
   427 		     from_codeset, to_codeset);
       
   428       else
       
   429 	g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_FAILED,
       
   430 		     _("Could not open converter from '%s' to '%s'"),
       
   431 		     from_codeset, to_codeset);
       
   432     }
       
   433   
       
   434   return cd;
       
   435 }
       
   436 
       
   437 static int
       
   438 close_converter (GIConv converter)
       
   439 {
       
   440   struct _iconv_cache_bucket *bucket;
       
   441   const gchar *key;
       
   442   GIConv cd;
       
   443   
       
   444   cd = converter;
       
   445   
       
   446   if (cd == (GIConv) -1)
       
   447     return 0;
       
   448   
       
   449   G_LOCK (iconv_cache_lock);
       
   450   
       
   451   key = g_hash_table_lookup (iconv_open_hash, cd);
       
   452   if (key)
       
   453     {
       
   454       g_hash_table_remove (iconv_open_hash, cd);
       
   455       
       
   456       bucket = g_hash_table_lookup (iconv_cache, key);
       
   457       g_assert (bucket);
       
   458       
       
   459       bucket->refcount--;
       
   460       
       
   461       if (cd == bucket->cd)
       
   462         bucket->used = FALSE;
       
   463       else
       
   464         g_iconv_close (cd);
       
   465       
       
   466       if (!bucket->refcount && iconv_cache_size > ICONV_CACHE_SIZE)
       
   467         {
       
   468           /* expire this cache bucket */
       
   469           iconv_cache_bucket_expire (NULL, bucket);
       
   470         }
       
   471     }
       
   472   else
       
   473     {
       
   474       G_UNLOCK (iconv_cache_lock);
       
   475       
       
   476       g_warning ("This iconv context wasn't opened using open_converter");
       
   477       
       
   478       return g_iconv_close (converter);
       
   479     }
       
   480   
       
   481   G_UNLOCK (iconv_cache_lock);
       
   482   
       
   483   return 0;
       
   484 }
       
   485 
       
   486 #else  /* !NEED_ICONV_CACHE */
       
   487 
       
   488 static GIConv
       
   489 open_converter (const gchar *to_codeset,
       
   490 		const gchar *from_codeset,
       
   491 		GError     **error)
       
   492 {
       
   493   GIConv cd;
       
   494 
       
   495   cd = g_iconv_open (to_codeset, from_codeset);
       
   496 
       
   497   if (cd == (GIConv) -1)
       
   498     {
       
   499       /* Something went wrong.  */
       
   500       if (error)
       
   501 	{
       
   502 	  if (errno == EINVAL)
       
   503 	    g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_NO_CONVERSION,
       
   504 			 _("Conversion from character set '%s' to '%s' is not supported"),
       
   505 			 from_codeset, to_codeset);
       
   506 	  else
       
   507 	    g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_FAILED,
       
   508 			 _("Could not open converter from '%s' to '%s'"),
       
   509 			 from_codeset, to_codeset);
       
   510 	}
       
   511     }
       
   512   
       
   513   return cd;
       
   514 }
       
   515 
       
   516 static int
       
   517 close_converter (GIConv cd)
       
   518 {
       
   519   if (cd == (GIConv) -1)
       
   520     return 0;
       
   521   
       
   522   return g_iconv_close (cd);  
       
   523 }
       
   524 
       
   525 #endif /* NEED_ICONV_CACHE */
       
   526 
       
   527 /**
       
   528  * g_convert_with_iconv:
       
   529  * @str:           the string to convert
       
   530  * @len:           the length of the string, or -1 if the string is 
       
   531  *                 nul-terminated<footnoteref linkend="nul-unsafe"/>. 
       
   532  * @converter:     conversion descriptor from g_iconv_open()
       
   533  * @bytes_read:    location to store the number of bytes in the
       
   534  *                 input string that were successfully converted, or %NULL.
       
   535  *                 Even if the conversion was successful, this may be 
       
   536  *                 less than @len if there were partial characters
       
   537  *                 at the end of the input. If the error
       
   538  *                 #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
       
   539  *                 stored will the byte offset after the last valid
       
   540  *                 input sequence.
       
   541  * @bytes_written: the number of bytes stored in the output buffer (not 
       
   542  *                 including the terminating nul).
       
   543  * @error:         location to store the error occuring, or %NULL to ignore
       
   544  *                 errors. Any of the errors in #GConvertError may occur.
       
   545  *
       
   546  * Converts a string from one character set to another. 
       
   547  * 
       
   548  * Note that you should use g_iconv() for streaming 
       
   549  * conversions<footnote id="streaming-state">
       
   550  *  <para>
       
   551  * Despite the fact that @byes_read can return information about partial 
       
   552  * characters, the <literal>g_convert_...</literal> functions
       
   553  * are not generally suitable for streaming. If the underlying converter 
       
   554  * being used maintains internal state, then this won't be preserved 
       
   555  * across successive calls to g_convert(), g_convert_with_iconv() or 
       
   556  * g_convert_with_fallback(). (An example of this is the GNU C converter 
       
   557  * for CP1255 which does not emit a base character until it knows that 
       
   558  * the next character is not a mark that could combine with the base 
       
   559  * character.)
       
   560  *  </para>
       
   561  * </footnote>. 
       
   562  *
       
   563  * Return value: If the conversion was successful, a newly allocated
       
   564  *               nul-terminated string, which must be freed with
       
   565  *               g_free(). Otherwise %NULL and @error will be set.
       
   566  **/
       
   567 EXPORT_C gchar*
       
   568 g_convert_with_iconv (const gchar *str,
       
   569 		      gssize       len,
       
   570 		      GIConv       converter,
       
   571 		      gsize       *bytes_read, 
       
   572 		      gsize       *bytes_written, 
       
   573 		      GError     **error)
       
   574 {
       
   575   gchar *dest;
       
   576   gchar *outp;
       
   577   const gchar *p;
       
   578   const gchar *shift_p = NULL;
       
   579   gsize inbytes_remaining;
       
   580   gsize outbytes_remaining;
       
   581   gsize err;
       
   582   gsize outbuf_size;
       
   583   gboolean have_error = FALSE;
       
   584   gboolean done = FALSE;
       
   585   
       
   586   g_return_val_if_fail (converter != (GIConv) -1, NULL);
       
   587      
       
   588   if (len < 0)
       
   589     len = strlen (str);
       
   590 
       
   591   p = str;
       
   592   inbytes_remaining = len;
       
   593   outbuf_size = len + 1; /* + 1 for nul in case len == 1 */
       
   594   
       
   595   outbytes_remaining = outbuf_size - 1; /* -1 for nul */
       
   596 
       
   597   outp = dest = g_malloc (outbuf_size);
       
   598 
       
   599   while (!done && !have_error)
       
   600     {
       
   601       err = g_iconv (converter, (char **)&p, &inbytes_remaining, &outp, &outbytes_remaining);
       
   602 
       
   603       if (err == (size_t) -1)
       
   604 	{
       
   605 	  switch (errno)
       
   606 	    {
       
   607 	    case EINVAL:
       
   608 	      /* Incomplete text, do not report an error */
       
   609 	      done = TRUE;
       
   610 	      break;
       
   611 	    case E2BIG:
       
   612 	      {
       
   613 		size_t used = outp - dest;
       
   614 		
       
   615 		outbuf_size *= 2;
       
   616 		dest = g_realloc (dest, outbuf_size);
       
   617 		
       
   618 		outp = dest + used;
       
   619 		outbytes_remaining = outbuf_size - used - 1; /* -1 for nul */
       
   620 	      }
       
   621 	      break;
       
   622 	    case EILSEQ:
       
   623 	      if (error)
       
   624 		g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
       
   625 			     _("Invalid byte sequence in conversion input"));
       
   626 	      have_error = TRUE;
       
   627 	      break;
       
   628 	    default:
       
   629 	      if (error)
       
   630 		g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_FAILED,
       
   631 			     _("Error during conversion: %s"),
       
   632 			     g_strerror (errno));
       
   633 	      have_error = TRUE;
       
   634 	      break;
       
   635 	    }
       
   636 	}
       
   637       else 
       
   638 	{
       
   639 	  if (!shift_p)
       
   640 	    {
       
   641 	      /* call g_iconv with NULL inbuf to cleanup shift state */
       
   642 	      shift_p = p;
       
   643 	      p = NULL;
       
   644 	      inbytes_remaining = 0;
       
   645 	    }
       
   646 	  else
       
   647 	    done = TRUE;
       
   648 	}
       
   649     }
       
   650 
       
   651   if (shift_p)
       
   652     p = shift_p;
       
   653 
       
   654   *outp = '\0';
       
   655   
       
   656   if (bytes_read)
       
   657     *bytes_read = p - str;
       
   658   else
       
   659     {
       
   660       if ((p - str) != len) 
       
   661 	{
       
   662           if (!have_error)
       
   663             {
       
   664 	      if (error)
       
   665 		g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_PARTIAL_INPUT,
       
   666 			     _("Partial character sequence at end of input"));
       
   667               have_error = TRUE;
       
   668             }
       
   669 	}
       
   670     }
       
   671 
       
   672   if (bytes_written)
       
   673     *bytes_written = outp - dest;	/* Doesn't include '\0' */
       
   674 
       
   675   if (have_error)
       
   676     {
       
   677       g_free (dest);
       
   678       return NULL;
       
   679     }
       
   680   else
       
   681     return dest;
       
   682 }
       
   683 
       
   684 /**
       
   685  * g_convert:
       
   686  * @str:           the string to convert
       
   687  * @len:           the length of the string, or -1 if the string is 
       
   688  *                 nul-terminated<footnote id="nul-unsafe">
       
   689                      <para>
       
   690                        Note that some encodings may allow nul bytes to 
       
   691                        occur inside strings. In that case, using -1 for 
       
   692                        the @len parameter is unsafe.
       
   693                      </para>
       
   694                    </footnote>. 
       
   695  * @to_codeset:    name of character set into which to convert @str
       
   696  * @from_codeset:  character set of @str.
       
   697  * @bytes_read:    location to store the number of bytes in the
       
   698  *                 input string that were successfully converted, or %NULL.
       
   699  *                 Even if the conversion was successful, this may be 
       
   700  *                 less than @len if there were partial characters
       
   701  *                 at the end of the input. If the error
       
   702  *                 #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
       
   703  *                 stored will the byte offset after the last valid
       
   704  *                 input sequence.
       
   705  * @bytes_written: the number of bytes stored in the output buffer (not 
       
   706  *                 including the terminating nul).
       
   707  * @error:         location to store the error occuring, or %NULL to ignore
       
   708  *                 errors. Any of the errors in #GConvertError may occur.
       
   709  *
       
   710  * Converts a string from one character set to another.
       
   711  *
       
   712  * Note that you should use g_iconv() for streaming 
       
   713  * conversions<footnoteref linkend="streaming-state"/>.
       
   714  *
       
   715  * Return value: If the conversion was successful, a newly allocated
       
   716  *               nul-terminated string, which must be freed with
       
   717  *               g_free(). Otherwise %NULL and @error will be set.
       
   718  **/
       
   719 EXPORT_C gchar*
       
   720 g_convert (const gchar *str,
       
   721            gssize       len,  
       
   722            const gchar *to_codeset,
       
   723            const gchar *from_codeset,
       
   724            gsize       *bytes_read, 
       
   725 	   gsize       *bytes_written, 
       
   726 	   GError     **error)
       
   727 {
       
   728   gchar *res;
       
   729   GIConv cd;
       
   730 
       
   731   g_return_val_if_fail (str != NULL, NULL);
       
   732   g_return_val_if_fail (to_codeset != NULL, NULL);
       
   733   g_return_val_if_fail (from_codeset != NULL, NULL);
       
   734   
       
   735   cd = open_converter (to_codeset, from_codeset, error);
       
   736 
       
   737   if (cd == (GIConv) -1)
       
   738     {
       
   739       if (bytes_read)
       
   740         *bytes_read = 0;
       
   741       
       
   742       if (bytes_written)
       
   743         *bytes_written = 0;
       
   744       
       
   745       return NULL;
       
   746     }
       
   747 
       
   748   res = g_convert_with_iconv (str, len, cd,
       
   749 			      bytes_read, bytes_written,
       
   750 			      error);
       
   751 
       
   752   close_converter (cd);
       
   753 
       
   754   return res;
       
   755 }
       
   756 
       
   757 /**
       
   758  * g_convert_with_fallback:
       
   759  * @str:          the string to convert
       
   760  * @len:          the length of the string, or -1 if the string is 
       
   761  *                nul-terminated<footnoteref linkend="nul-unsafe"/>. 
       
   762  * @to_codeset:   name of character set into which to convert @str
       
   763  * @from_codeset: character set of @str.
       
   764  * @fallback:     UTF-8 string to use in place of character not
       
   765  *                present in the target encoding. (The string must be
       
   766  *                representable in the target encoding). 
       
   767                   If %NULL, characters not in the target encoding will 
       
   768                   be represented as Unicode escapes \uxxxx or \Uxxxxyyyy.
       
   769  * @bytes_read:   location to store the number of bytes in the
       
   770  *                input string that were successfully converted, or %NULL.
       
   771  *                Even if the conversion was successful, this may be 
       
   772  *                less than @len if there were partial characters
       
   773  *                at the end of the input.
       
   774  * @bytes_written: the number of bytes stored in the output buffer (not 
       
   775  *                including the terminating nul).
       
   776  * @error:        location to store the error occuring, or %NULL to ignore
       
   777  *                errors. Any of the errors in #GConvertError may occur.
       
   778  *
       
   779  * Converts a string from one character set to another, possibly
       
   780  * including fallback sequences for characters not representable
       
   781  * in the output. Note that it is not guaranteed that the specification
       
   782  * for the fallback sequences in @fallback will be honored. Some
       
   783  * systems may do a approximate conversion from @from_codeset
       
   784  * to @to_codeset in their iconv() functions, 
       
   785  * in which case GLib will simply return that approximate conversion.
       
   786  *
       
   787  * Note that you should use g_iconv() for streaming 
       
   788  * conversions<footnoteref linkend="streaming-state"/>.
       
   789  *
       
   790  * Return value: If the conversion was successful, a newly allocated
       
   791  *               nul-terminated string, which must be freed with
       
   792  *               g_free(). Otherwise %NULL and @error will be set.
       
   793  **/
       
   794 EXPORT_C gchar*
       
   795 g_convert_with_fallback (const gchar *str,
       
   796 			 gssize       len,    
       
   797 			 const gchar *to_codeset,
       
   798 			 const gchar *from_codeset,
       
   799 			 gchar       *fallback,
       
   800 			 gsize       *bytes_read,
       
   801 			 gsize       *bytes_written,
       
   802 			 GError     **error)
       
   803 {
       
   804   gchar *utf8;
       
   805   gchar *dest;
       
   806   gchar *outp;
       
   807   const gchar *insert_str = NULL;
       
   808   const gchar *p;
       
   809   gsize inbytes_remaining;   
       
   810   const gchar *save_p = NULL;
       
   811   gsize save_inbytes = 0;
       
   812   gsize outbytes_remaining; 
       
   813   gsize err;
       
   814   GIConv cd;
       
   815   gsize outbuf_size;
       
   816   gboolean have_error = FALSE;
       
   817   gboolean done = FALSE;
       
   818 
       
   819   GError *local_error = NULL;
       
   820   
       
   821   g_return_val_if_fail (str != NULL, NULL);
       
   822   g_return_val_if_fail (to_codeset != NULL, NULL);
       
   823   g_return_val_if_fail (from_codeset != NULL, NULL);
       
   824      
       
   825   if (len < 0)
       
   826     len = strlen (str);
       
   827   
       
   828   /* Try an exact conversion; we only proceed if this fails
       
   829    * due to an illegal sequence in the input string.
       
   830    */
       
   831   dest = g_convert (str, len, to_codeset, from_codeset, 
       
   832 		    bytes_read, bytes_written, &local_error);
       
   833   if (!local_error)
       
   834     return dest;
       
   835 
       
   836   if (!g_error_matches (local_error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE))
       
   837     {
       
   838       g_propagate_error (error, local_error);
       
   839       return NULL;
       
   840     }
       
   841   else
       
   842     g_error_free (local_error);
       
   843 
       
   844   local_error = NULL;
       
   845   
       
   846   /* No go; to proceed, we need a converter from "UTF-8" to
       
   847    * to_codeset, and the string as UTF-8.
       
   848    */
       
   849   cd = open_converter (to_codeset, "UTF-8", error);
       
   850   if (cd == (GIConv) -1)
       
   851     {
       
   852       if (bytes_read)
       
   853         *bytes_read = 0;
       
   854       
       
   855       if (bytes_written)
       
   856         *bytes_written = 0;
       
   857       
       
   858       return NULL;
       
   859     }
       
   860 
       
   861   utf8 = g_convert (str, len, "UTF-8", from_codeset, 
       
   862 		    bytes_read, &inbytes_remaining, error);
       
   863   if (!utf8)
       
   864     {
       
   865       close_converter (cd);
       
   866       if (bytes_written)
       
   867         *bytes_written = 0;
       
   868       return NULL;
       
   869     }
       
   870 
       
   871   /* Now the heart of the code. We loop through the UTF-8 string, and
       
   872    * whenever we hit an offending character, we form fallback, convert
       
   873    * the fallback to the target codeset, and then go back to
       
   874    * converting the original string after finishing with the fallback.
       
   875    *
       
   876    * The variables save_p and save_inbytes store the input state
       
   877    * for the original string while we are converting the fallback
       
   878    */
       
   879   p = utf8;
       
   880 
       
   881   outbuf_size = len + 1; /* + 1 for nul in case len == 1 */
       
   882   outbytes_remaining = outbuf_size - 1; /* -1 for nul */
       
   883   outp = dest = g_malloc (outbuf_size);
       
   884 
       
   885   while (!done && !have_error)
       
   886     {
       
   887       size_t inbytes_tmp = inbytes_remaining;
       
   888       err = g_iconv (cd, (char **)&p, &inbytes_tmp, &outp, &outbytes_remaining);
       
   889       inbytes_remaining = inbytes_tmp;
       
   890 
       
   891       if (err == (size_t) -1)
       
   892 	{
       
   893 	  switch (errno)
       
   894 	    {
       
   895 	    case EINVAL:
       
   896 	      g_assert_not_reached();
       
   897 	      break;
       
   898 	    case E2BIG:
       
   899 	      {
       
   900 		size_t used = outp - dest;
       
   901 
       
   902 		outbuf_size *= 2;
       
   903 		dest = g_realloc (dest, outbuf_size);
       
   904 		
       
   905 		outp = dest + used;
       
   906 		outbytes_remaining = outbuf_size - used - 1; /* -1 for nul */
       
   907 		
       
   908 		break;
       
   909 	      }
       
   910 	    case EILSEQ:
       
   911 	      if (save_p)
       
   912 		{
       
   913 		  /* Error converting fallback string - fatal
       
   914 		   */
       
   915 		  g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
       
   916 			       _("Cannot convert fallback '%s' to codeset '%s'"),
       
   917 			       insert_str, to_codeset);
       
   918 		  have_error = TRUE;
       
   919 		  break;
       
   920 		}
       
   921 	      else if (p)
       
   922 		{
       
   923 		  if (!fallback)
       
   924 		    { 
       
   925 		      gunichar ch = g_utf8_get_char (p);
       
   926 		      insert_str = g_strdup_printf (ch < 0x10000 ? "\\u%04x" : "\\U%08x",
       
   927 						    ch);
       
   928 		    }
       
   929 		  else
       
   930 		    insert_str = fallback;
       
   931 		  
       
   932 		  save_p = g_utf8_next_char (p);
       
   933 		  save_inbytes = inbytes_remaining - (save_p - p);
       
   934 		  p = insert_str;
       
   935 		  inbytes_remaining = strlen (p);
       
   936 		  break;
       
   937 		}
       
   938 	      /* fall thru if p is NULL */
       
   939 	    default:
       
   940 	      g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_FAILED,
       
   941 			   _("Error during conversion: %s"),
       
   942 			   g_strerror (errno));
       
   943 	      have_error = TRUE;
       
   944 	      break;
       
   945 	    }
       
   946 	}
       
   947       else
       
   948 	{
       
   949 	  if (save_p)
       
   950 	    {
       
   951 	      if (!fallback)
       
   952 		g_free ((gchar *)insert_str);
       
   953 	      p = save_p;
       
   954 	      inbytes_remaining = save_inbytes;
       
   955 	      save_p = NULL;
       
   956 	    }
       
   957 	  else if (p)
       
   958 	    {
       
   959 	      /* call g_iconv with NULL inbuf to cleanup shift state */
       
   960 	      p = NULL;
       
   961 	      inbytes_remaining = 0;
       
   962 	    }
       
   963 	  else
       
   964 	    done = TRUE;
       
   965 	}
       
   966     }
       
   967 
       
   968   /* Cleanup
       
   969    */
       
   970   *outp = '\0';
       
   971   
       
   972   close_converter (cd);
       
   973 
       
   974   if (bytes_written)
       
   975     *bytes_written = outp - dest;	/* Doesn't include '\0' */
       
   976 
       
   977   g_free (utf8);
       
   978 
       
   979   if (have_error)
       
   980     {
       
   981       if (save_p && !fallback)
       
   982 	g_free ((gchar *)insert_str);
       
   983       g_free (dest);
       
   984       return NULL;
       
   985     }
       
   986   else
       
   987     return dest;
       
   988 }
       
   989 
       
   990 /*
       
   991  * g_locale_to_utf8
       
   992  *
       
   993  * 
       
   994  */
       
   995 
       
   996 static gchar *
       
   997 strdup_len (const gchar *string,
       
   998 	    gssize       len,
       
   999 	    gsize       *bytes_written,
       
  1000 	    gsize       *bytes_read,
       
  1001 	    GError      **error)
       
  1002 	 
       
  1003 {
       
  1004   gsize real_len;
       
  1005 
       
  1006   if (!g_utf8_validate (string, len, NULL))
       
  1007     {
       
  1008       if (bytes_read)
       
  1009 	*bytes_read = 0;
       
  1010       if (bytes_written)
       
  1011 	*bytes_written = 0;
       
  1012 
       
  1013       g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
       
  1014 		   _("Invalid byte sequence in conversion input"));
       
  1015       return NULL;
       
  1016     }
       
  1017   
       
  1018   if (len < 0)
       
  1019     real_len = strlen (string);
       
  1020   else
       
  1021     {
       
  1022       real_len = 0;
       
  1023       
       
  1024       while (real_len < len && string[real_len])
       
  1025 	real_len++;
       
  1026     }
       
  1027   
       
  1028   if (bytes_read)
       
  1029     *bytes_read = real_len;
       
  1030   if (bytes_written)
       
  1031     *bytes_written = real_len;
       
  1032 
       
  1033   return g_strndup (string, real_len);
       
  1034 }
       
  1035 
       
  1036 /**
       
  1037  * g_locale_to_utf8:
       
  1038  * @opsysstring:   a string in the encoding of the current locale. On Windows
       
  1039  *                 this means the system codepage.
       
  1040  * @len:           the length of the string, or -1 if the string is
       
  1041  *                 nul-terminated<footnoteref linkend="nul-unsafe"/>. 
       
  1042  * @bytes_read:    location to store the number of bytes in the
       
  1043  *                 input string that were successfully converted, or %NULL.
       
  1044  *                 Even if the conversion was successful, this may be 
       
  1045  *                 less than @len if there were partial characters
       
  1046  *                 at the end of the input. If the error
       
  1047  *                 #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
       
  1048  *                 stored will the byte offset after the last valid
       
  1049  *                 input sequence.
       
  1050  * @bytes_written: the number of bytes stored in the output buffer (not 
       
  1051  *                 including the terminating nul).
       
  1052  * @error:         location to store the error occuring, or %NULL to ignore
       
  1053  *                 errors. Any of the errors in #GConvertError may occur.
       
  1054  * 
       
  1055  * Converts a string which is in the encoding used for strings by
       
  1056  * the C runtime (usually the same as that used by the operating
       
  1057  * system) in the current locale into a UTF-8 string.
       
  1058  * 
       
  1059  * Return value: The converted string, or %NULL on an error.
       
  1060  **/
       
  1061 EXPORT_C gchar *
       
  1062 g_locale_to_utf8 (const gchar  *opsysstring,
       
  1063 		  gssize        len,            
       
  1064 		  gsize        *bytes_read,    
       
  1065 		  gsize        *bytes_written,
       
  1066 		  GError      **error)
       
  1067 {
       
  1068   const char *charset;
       
  1069 
       
  1070   if (g_get_charset (&charset))
       
  1071     return strdup_len (opsysstring, len, bytes_read, bytes_written, error);
       
  1072   else
       
  1073     return g_convert (opsysstring, len, 
       
  1074 		      "UTF-8", charset, bytes_read, bytes_written, error);
       
  1075 }
       
  1076 
       
  1077 /**
       
  1078  * g_locale_from_utf8:
       
  1079  * @utf8string:    a UTF-8 encoded string 
       
  1080  * @len:           the length of the string, or -1 if the string is
       
  1081  *                 nul-terminated<footnoteref linkend="nul-unsafe"/>. 
       
  1082  * @bytes_read:    location to store the number of bytes in the
       
  1083  *                 input string that were successfully converted, or %NULL.
       
  1084  *                 Even if the conversion was successful, this may be 
       
  1085  *                 less than @len if there were partial characters
       
  1086  *                 at the end of the input. If the error
       
  1087  *                 #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
       
  1088  *                 stored will the byte offset after the last valid
       
  1089  *                 input sequence.
       
  1090  * @bytes_written: the number of bytes stored in the output buffer (not 
       
  1091  *                 including the terminating nul).
       
  1092  * @error:         location to store the error occuring, or %NULL to ignore
       
  1093  *                 errors. Any of the errors in #GConvertError may occur.
       
  1094  * 
       
  1095  * Converts a string from UTF-8 to the encoding used for strings by
       
  1096  * the C runtime (usually the same as that used by the operating
       
  1097  * system) in the current locale.
       
  1098  * 
       
  1099  * Return value: The converted string, or %NULL on an error.
       
  1100  **/
       
  1101 EXPORT_C gchar *
       
  1102 g_locale_from_utf8 (const gchar *utf8string,
       
  1103 		    gssize       len,            
       
  1104 		    gsize       *bytes_read,    
       
  1105 		    gsize       *bytes_written,
       
  1106 		    GError     **error)
       
  1107 {
       
  1108   const gchar *charset;
       
  1109 
       
  1110   if (g_get_charset (&charset))
       
  1111     return strdup_len (utf8string, len, bytes_read, bytes_written, error);
       
  1112   else
       
  1113     return g_convert (utf8string, len,
       
  1114 		      charset, "UTF-8", bytes_read, bytes_written, error);
       
  1115 }
       
  1116 
       
  1117 #ifndef G_PLATFORM_WIN32
       
  1118 
       
  1119 typedef struct _GFilenameCharsetCache GFilenameCharsetCache;
       
  1120 
       
  1121 struct _GFilenameCharsetCache {
       
  1122   gboolean is_utf8;
       
  1123   gchar *charset;
       
  1124   gchar **filename_charsets;
       
  1125 };
       
  1126 
       
  1127 static void
       
  1128 filename_charset_cache_free (gpointer data)
       
  1129 {
       
  1130   GFilenameCharsetCache *cache = data;
       
  1131   g_free (cache->charset);
       
  1132   g_strfreev (cache->filename_charsets);
       
  1133   g_free (cache);
       
  1134 }
       
  1135 
       
  1136 /**
       
  1137  * g_get_filename_charsets:
       
  1138  * @charsets: return location for the %NULL-terminated list of encoding names
       
  1139  *
       
  1140  * Determines the preferred character sets used for filenames.
       
  1141  * The first character set from the @charsets is the filename encoding, the
       
  1142  * subsequent character sets are used when trying to generate a displayable
       
  1143  * representation of a filename, see g_filename_display_name().
       
  1144  *
       
  1145  * On Unix, the character sets are determined by consulting the
       
  1146  * environment variables <envar>G_FILENAME_ENCODING</envar> and
       
  1147  * <envar>G_BROKEN_FILENAMES</envar>. On Windows, the character set
       
  1148  * used in the GLib API is always UTF-8 and said environment variables
       
  1149  * have no effect.
       
  1150  *
       
  1151  * <envar>G_FILENAME_ENCODING</envar> may be set to a comma-separated list 
       
  1152  * of character set names. The special token "@locale" is taken to mean the 
       
  1153  * character set for the current locale. If <envar>G_FILENAME_ENCODING</envar> 
       
  1154  * is not set, but <envar>G_BROKEN_FILENAMES</envar> is, the character set of 
       
  1155  * the current locale is taken as the filename encoding. If neither environment
       
  1156  * variable is set, UTF-8 is taken as the filename encoding, but the character
       
  1157  * set of the current locale is also put in the list of encodings.
       
  1158  *
       
  1159  * The returned @charsets belong to GLib and must not be freed.
       
  1160  *
       
  1161  * Note that on Unix, regardless of the locale character set or
       
  1162  * <envar>G_FILENAME_ENCODING</envar> value, the actual file names present on a
       
  1163  * system might be in any random encoding or just gibberish.
       
  1164  *
       
  1165  * Return value: %TRUE if the filename encoding is UTF-8.
       
  1166  * 
       
  1167  * Since: 2.6
       
  1168  */
       
  1169  
       
  1170 #if EMULATOR
       
  1171 
       
  1172 PLS(cache_private,g_get_filename_charsets,GStaticPrivate)
       
  1173 #define cache_private (*FUNCTION_NAME(cache_private,g_get_filename_charsets)())
       
  1174  
       
  1175 #endif /* EMULATOR */
       
  1176 
       
  1177  
       
  1178 EXPORT_C gboolean
       
  1179 g_get_filename_charsets (G_CONST_RETURN gchar ***filename_charsets)
       
  1180 {
       
  1181   #if !(EMULATOR)
       
  1182   
       
  1183   static GStaticPrivate cache_private = G_STATIC_PRIVATE_INIT;
       
  1184   
       
  1185   #endif /* EMULATOR */
       
  1186   
       
  1187   GFilenameCharsetCache *cache = g_static_private_get (&cache_private);
       
  1188   const gchar *charset;
       
  1189 
       
  1190   if (!cache)
       
  1191     {
       
  1192       cache = g_new0 (GFilenameCharsetCache, 1);
       
  1193       g_static_private_set (&cache_private, cache, filename_charset_cache_free);
       
  1194     }
       
  1195 
       
  1196   g_get_charset (&charset);
       
  1197 
       
  1198   if (!(cache->charset && strcmp (cache->charset, charset) == 0))
       
  1199     {
       
  1200       const gchar *new_charset;
       
  1201       gchar *p;
       
  1202       gint i;
       
  1203 
       
  1204       g_free (cache->charset);
       
  1205       g_strfreev (cache->filename_charsets);
       
  1206       cache->charset = g_strdup (charset);
       
  1207       
       
  1208       p = getenv ("G_FILENAME_ENCODING");
       
  1209       if (p != NULL && p[0] != '\0') 
       
  1210 	{
       
  1211 	  cache->filename_charsets = g_strsplit (p, ",", 0);
       
  1212 	  cache->is_utf8 = (strcmp (cache->filename_charsets[0], "UTF-8") == 0);
       
  1213 
       
  1214 	  for (i = 0; cache->filename_charsets[i]; i++)
       
  1215 	    {
       
  1216 	      if (strcmp ("@locale", cache->filename_charsets[i]) == 0)
       
  1217 		{
       
  1218 		  g_get_charset (&new_charset);
       
  1219 		  g_free (cache->filename_charsets[i]);
       
  1220 		  cache->filename_charsets[i] = g_strdup (new_charset);
       
  1221 		}
       
  1222 	    }
       
  1223 	}
       
  1224       else if (getenv ("G_BROKEN_FILENAMES") != NULL)
       
  1225 	{
       
  1226 	  cache->filename_charsets = g_new0 (gchar *, 2);
       
  1227 	  cache->is_utf8 = g_get_charset (&new_charset);
       
  1228 	  cache->filename_charsets[0] = g_strdup (new_charset);
       
  1229 	}
       
  1230       else 
       
  1231 	{
       
  1232 	  cache->filename_charsets = g_new0 (gchar *, 3);
       
  1233 	  cache->is_utf8 = TRUE;
       
  1234 	  cache->filename_charsets[0] = g_strdup ("UTF-8");
       
  1235 	  if (!g_get_charset (&new_charset))
       
  1236 	    cache->filename_charsets[1] = g_strdup (new_charset);
       
  1237 	}
       
  1238     }
       
  1239 
       
  1240   if (filename_charsets)
       
  1241     *filename_charsets = (const gchar **)cache->filename_charsets;
       
  1242 
       
  1243   return cache->is_utf8;
       
  1244 }
       
  1245 #if EMULATOR
       
  1246 #undef cache_private
       
  1247 #endif /* EMULATOR */
       
  1248 
       
  1249 #else /* G_PLATFORM_WIN32 */
       
  1250 
       
  1251 gboolean
       
  1252 g_get_filename_charsets (G_CONST_RETURN gchar ***filename_charsets) 
       
  1253 {
       
  1254   static const gchar *charsets[] = {
       
  1255     "UTF-8",
       
  1256     NULL
       
  1257   };
       
  1258 
       
  1259 #ifdef G_OS_WIN32
       
  1260   /* On Windows GLib pretends that the filename charset is UTF-8 */
       
  1261   if (filename_charsets)
       
  1262     *filename_charsets = charsets;
       
  1263 
       
  1264   return TRUE;
       
  1265 #else
       
  1266   gboolean result;
       
  1267 
       
  1268   /* Cygwin works like before */
       
  1269   result = g_get_charset (&(charsets[0]));
       
  1270 
       
  1271   if (filename_charsets)
       
  1272     *filename_charsets = charsets;
       
  1273 
       
  1274   return result;
       
  1275 #endif
       
  1276 }
       
  1277 
       
  1278 #endif /* G_PLATFORM_WIN32 */
       
  1279 
       
  1280 static gboolean
       
  1281 get_filename_charset (const gchar **filename_charset)
       
  1282 {
       
  1283   const gchar **charsets;
       
  1284   gboolean is_utf8;
       
  1285   
       
  1286   is_utf8 = g_get_filename_charsets (&charsets);
       
  1287 
       
  1288   if (filename_charset)
       
  1289     *filename_charset = charsets[0];
       
  1290   
       
  1291   return is_utf8;
       
  1292 }
       
  1293 
       
  1294 /* This is called from g_thread_init(). It's used to
       
  1295  * initialize some static data in a threadsafe way.
       
  1296  */
       
  1297 void 
       
  1298 _g_convert_thread_init (void)
       
  1299 {
       
  1300   const gchar **dummy;
       
  1301   (void) g_get_filename_charsets (&dummy);
       
  1302 }
       
  1303 
       
  1304 /**
       
  1305  * g_filename_to_utf8:
       
  1306  * @opsysstring:   a string in the encoding for filenames
       
  1307  * @len:           the length of the string, or -1 if the string is
       
  1308  *                 nul-terminated<footnoteref linkend="nul-unsafe"/>. 
       
  1309  * @bytes_read:    location to store the number of bytes in the
       
  1310  *                 input string that were successfully converted, or %NULL.
       
  1311  *                 Even if the conversion was successful, this may be 
       
  1312  *                 less than @len if there were partial characters
       
  1313  *                 at the end of the input. If the error
       
  1314  *                 #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
       
  1315  *                 stored will the byte offset after the last valid
       
  1316  *                 input sequence.
       
  1317  * @bytes_written: the number of bytes stored in the output buffer (not 
       
  1318  *                 including the terminating nul).
       
  1319  * @error:         location to store the error occuring, or %NULL to ignore
       
  1320  *                 errors. Any of the errors in #GConvertError may occur.
       
  1321  * 
       
  1322  * Converts a string which is in the encoding used by GLib for
       
  1323  * filenames into a UTF-8 string. Note that on Windows GLib uses UTF-8
       
  1324  * for filenames.
       
  1325  * 
       
  1326  * Return value: The converted string, or %NULL on an error.
       
  1327  **/
       
  1328 EXPORT_C gchar*
       
  1329 g_filename_to_utf8 (const gchar *opsysstring, 
       
  1330 		    gssize       len,           
       
  1331 		    gsize       *bytes_read,   
       
  1332 		    gsize       *bytes_written,
       
  1333 		    GError     **error)
       
  1334 {
       
  1335   const gchar *charset;
       
  1336 
       
  1337   if (get_filename_charset (&charset))
       
  1338     return strdup_len (opsysstring, len, bytes_read, bytes_written, error);
       
  1339   else
       
  1340     return g_convert (opsysstring, len, 
       
  1341 		      "UTF-8", charset, bytes_read, bytes_written, error);
       
  1342 }
       
  1343 
       
  1344 #ifdef G_OS_WIN32
       
  1345 
       
  1346 #undef g_filename_to_utf8
       
  1347 
       
  1348 /* Binary compatibility version. Not for newly compiled code. */
       
  1349 
       
  1350 gchar*
       
  1351 g_filename_to_utf8 (const gchar *opsysstring, 
       
  1352 		    gssize       len,           
       
  1353 		    gsize       *bytes_read,   
       
  1354 		    gsize       *bytes_written,
       
  1355 		    GError     **error)
       
  1356 {
       
  1357   const gchar *charset;
       
  1358 
       
  1359   if (g_get_charset (&charset))
       
  1360     return strdup_len (opsysstring, len, bytes_read, bytes_written, error);
       
  1361   else
       
  1362     return g_convert (opsysstring, len, 
       
  1363 		      "UTF-8", charset, bytes_read, bytes_written, error);
       
  1364 }
       
  1365 
       
  1366 #endif
       
  1367 
       
  1368 /**
       
  1369  * g_filename_from_utf8:
       
  1370  * @utf8string:    a UTF-8 encoded string.
       
  1371  * @len:           the length of the string, or -1 if the string is
       
  1372  *                 nul-terminated.
       
  1373  * @bytes_read:    location to store the number of bytes in the
       
  1374  *                 input string that were successfully converted, or %NULL.
       
  1375  *                 Even if the conversion was successful, this may be 
       
  1376  *                 less than @len if there were partial characters
       
  1377  *                 at the end of the input. If the error
       
  1378  *                 #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
       
  1379  *                 stored will the byte offset after the last valid
       
  1380  *                 input sequence.
       
  1381  * @bytes_written: the number of bytes stored in the output buffer (not 
       
  1382  *                 including the terminating nul).
       
  1383  * @error:         location to store the error occuring, or %NULL to ignore
       
  1384  *                 errors. Any of the errors in #GConvertError may occur.
       
  1385  * 
       
  1386  * Converts a string from UTF-8 to the encoding GLib uses for
       
  1387  * filenames. Note that on Windows GLib uses UTF-8 for filenames.
       
  1388  * 
       
  1389  * Return value: The converted string, or %NULL on an error.
       
  1390  **/
       
  1391 EXPORT_C gchar*
       
  1392 g_filename_from_utf8 (const gchar *utf8string,
       
  1393 		      gssize       len,            
       
  1394 		      gsize       *bytes_read,    
       
  1395 		      gsize       *bytes_written,
       
  1396 		      GError     **error)
       
  1397 {
       
  1398   const gchar *charset;
       
  1399 
       
  1400   if (get_filename_charset (&charset))
       
  1401     return strdup_len (utf8string, len, bytes_read, bytes_written, error);
       
  1402   else
       
  1403     return g_convert (utf8string, len,
       
  1404 		      charset, "UTF-8", bytes_read, bytes_written, error);
       
  1405 }
       
  1406 
       
  1407 #ifdef G_OS_WIN32
       
  1408 
       
  1409 #undef g_filename_from_utf8
       
  1410 
       
  1411 /* Binary compatibility version. Not for newly compiled code. */
       
  1412 
       
  1413 gchar*
       
  1414 g_filename_from_utf8 (const gchar *utf8string,
       
  1415 		      gssize       len,            
       
  1416 		      gsize       *bytes_read,    
       
  1417 		      gsize       *bytes_written,
       
  1418 		      GError     **error)
       
  1419 {
       
  1420   const gchar *charset;
       
  1421 
       
  1422   if (g_get_charset (&charset))
       
  1423     return strdup_len (utf8string, len, bytes_read, bytes_written, error);
       
  1424   else
       
  1425     return g_convert (utf8string, len,
       
  1426 		      charset, "UTF-8", bytes_read, bytes_written, error);
       
  1427 }
       
  1428 
       
  1429 #endif
       
  1430 
       
  1431 /* Test of haystack has the needle prefix, comparing case
       
  1432  * insensitive. haystack may be UTF-8, but needle must
       
  1433  * contain only ascii. */
       
  1434 static gboolean
       
  1435 has_case_prefix (const gchar *haystack, const gchar *needle)
       
  1436 {
       
  1437   const gchar *h, *n;
       
  1438   
       
  1439   /* Eat one character at a time. */
       
  1440   h = haystack;
       
  1441   n = needle;
       
  1442 
       
  1443   while (*n && *h &&
       
  1444 	 g_ascii_tolower (*n) == g_ascii_tolower (*h))
       
  1445     {
       
  1446       n++;
       
  1447       h++;
       
  1448     }
       
  1449   
       
  1450   return *n == '\0';
       
  1451 }
       
  1452 
       
  1453 typedef enum {
       
  1454   UNSAFE_ALL        = 0x1,  /* Escape all unsafe characters   */
       
  1455   UNSAFE_ALLOW_PLUS = 0x2,  /* Allows '+'  */
       
  1456   UNSAFE_PATH       = 0x8,  /* Allows '/', '&', '=', ':', '@', '+', '$' and ',' */
       
  1457   UNSAFE_HOST       = 0x10, /* Allows '/' and ':' and '@' */
       
  1458   UNSAFE_SLASHES    = 0x20  /* Allows all characters except for '/' and '%' */
       
  1459 } UnsafeCharacterSet;
       
  1460 
       
  1461 static const guchar acceptable[96] = {
       
  1462   /* A table of the ASCII chars from space (32) to DEL (127) */
       
  1463   /*      !    "    #    $    %    &    '    (    )    *    +    ,    -    .    / */ 
       
  1464   0x00,0x3F,0x20,0x20,0x28,0x00,0x2C,0x3F,0x3F,0x3F,0x3F,0x2A,0x28,0x3F,0x3F,0x1C,
       
  1465   /* 0    1    2    3    4    5    6    7    8    9    :    ;    <    =    >    ? */
       
  1466   0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x38,0x20,0x20,0x2C,0x20,0x20,
       
  1467   /* @    A    B    C    D    E    F    G    H    I    J    K    L    M    N    O */
       
  1468   0x38,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,
       
  1469   /* P    Q    R    S    T    U    V    W    X    Y    Z    [    \    ]    ^    _ */
       
  1470   0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x20,0x20,0x20,0x20,0x3F,
       
  1471   /* `    a    b    c    d    e    f    g    h    i    j    k    l    m    n    o */
       
  1472   0x20,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,
       
  1473   /* p    q    r    s    t    u    v    w    x    y    z    {    |    }    ~  DEL */
       
  1474   0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x20,0x20,0x20,0x3F,0x20
       
  1475 };
       
  1476 
       
  1477 static const gchar hex[16] = "0123456789ABCDEF";
       
  1478 
       
  1479 /* Note: This escape function works on file: URIs, but if you want to
       
  1480  * escape something else, please read RFC-2396 */
       
  1481 static gchar *
       
  1482 g_escape_uri_string (const gchar *string, 
       
  1483 		     UnsafeCharacterSet mask)
       
  1484 {
       
  1485 #define ACCEPTABLE(a) ((a)>=32 && (a)<128 && (acceptable[(a)-32] & use_mask))
       
  1486 
       
  1487   const gchar *p;
       
  1488   gchar *q;
       
  1489   gchar *result;
       
  1490   int c;
       
  1491   gint unacceptable;
       
  1492   UnsafeCharacterSet use_mask;
       
  1493   
       
  1494   g_return_val_if_fail (mask == UNSAFE_ALL
       
  1495 			|| mask == UNSAFE_ALLOW_PLUS
       
  1496 			|| mask == UNSAFE_PATH
       
  1497 			|| mask == UNSAFE_HOST
       
  1498 			|| mask == UNSAFE_SLASHES, NULL);
       
  1499   
       
  1500   unacceptable = 0;
       
  1501   use_mask = mask;
       
  1502   for (p = string; *p != '\0'; p++)
       
  1503     {
       
  1504       c = (guchar) *p;
       
  1505       if (!ACCEPTABLE (c)) 
       
  1506 	unacceptable++;
       
  1507     }
       
  1508 
       
  1509  result = g_malloc (p - string + unacceptable * 2 + 1);
       
  1510   
       
  1511   use_mask = mask;
       
  1512   for (q = result, p = string; *p != '\0'; p++)
       
  1513     {
       
  1514       c = (guchar) *p;
       
  1515       
       
  1516       if (!ACCEPTABLE (c))
       
  1517 	{
       
  1518 	  *q++ = '%'; /* means hex coming */
       
  1519 	  *q++ = hex[c >> 4];
       
  1520 	  *q++ = hex[c & 15];
       
  1521 	}
       
  1522       else
       
  1523 	*q++ = *p;
       
  1524     }
       
  1525   
       
  1526   *q = '\0';
       
  1527   
       
  1528   return result;
       
  1529 }
       
  1530 
       
  1531 
       
  1532 static gchar *
       
  1533 g_escape_file_uri (const gchar *hostname,
       
  1534 		   const gchar *pathname)
       
  1535 {
       
  1536   char *escaped_hostname = NULL;
       
  1537   char *escaped_path;
       
  1538   char *res;
       
  1539 
       
  1540 #if defined(G_OS_WIN32) || defined(__SYMBIAN32__)
       
  1541   char *p, *backslash;
       
  1542 
       
  1543   /* Turn backslashes into forward slashes. That's what Netscape
       
  1544    * does, and they are actually more or less equivalent in Windows.
       
  1545    */
       
  1546   
       
  1547   pathname = g_strdup (pathname);
       
  1548   p = (char *) pathname;
       
  1549   
       
  1550   while ((backslash = strchr (p, '\\')) != NULL)
       
  1551     {
       
  1552       *backslash = '/';
       
  1553       p = backslash + 1;
       
  1554     }
       
  1555 #endif
       
  1556 
       
  1557   if (hostname && *hostname != '\0')
       
  1558     {
       
  1559       escaped_hostname = g_escape_uri_string (hostname, UNSAFE_HOST);
       
  1560     }
       
  1561 
       
  1562   escaped_path = g_escape_uri_string (pathname, UNSAFE_PATH);
       
  1563 
       
  1564   res = g_strconcat ("file://",
       
  1565 		     (escaped_hostname) ? escaped_hostname : "",
       
  1566 		     (*escaped_path != '/') ? "/" : "",
       
  1567 		     escaped_path,
       
  1568 		     NULL);
       
  1569 
       
  1570 #ifdef G_OS_WIN32
       
  1571   g_free ((char *) pathname);
       
  1572 #endif
       
  1573 
       
  1574   g_free (escaped_hostname);
       
  1575   g_free (escaped_path);
       
  1576   
       
  1577   return res;
       
  1578 }
       
  1579 
       
  1580 static int
       
  1581 unescape_character (const char *scanner)
       
  1582 {
       
  1583   int first_digit;
       
  1584   int second_digit;
       
  1585 
       
  1586   first_digit = g_ascii_xdigit_value (scanner[0]);
       
  1587   if (first_digit < 0) 
       
  1588     return -1;
       
  1589   
       
  1590   second_digit = g_ascii_xdigit_value (scanner[1]);
       
  1591   if (second_digit < 0) 
       
  1592     return -1;
       
  1593   
       
  1594   return (first_digit << 4) | second_digit;
       
  1595 }
       
  1596 
       
  1597 static gchar *
       
  1598 g_unescape_uri_string (const char *escaped,
       
  1599 		       int         len,
       
  1600 		       const char *illegal_escaped_characters,
       
  1601 		       gboolean    ascii_must_not_be_escaped)
       
  1602 {
       
  1603   const gchar *in, *in_end;
       
  1604   gchar *out, *result;
       
  1605   int c;
       
  1606   
       
  1607   if (escaped == NULL)
       
  1608     return NULL;
       
  1609 
       
  1610   if (len < 0)
       
  1611     len = strlen (escaped);
       
  1612   result = g_malloc (len + 1);
       
  1613   
       
  1614   out = result;
       
  1615   for (in = escaped, in_end = escaped + len; in < in_end; in++)
       
  1616     {
       
  1617       c = *in;
       
  1618 
       
  1619       if (c == '%')
       
  1620 	{
       
  1621 	  /* catch partial escape sequences past the end of the substring */
       
  1622 	  if (in + 3 > in_end)
       
  1623 	    break;
       
  1624 
       
  1625 	  c = unescape_character (in + 1);
       
  1626 
       
  1627 	  /* catch bad escape sequences and NUL characters */
       
  1628 	  if (c <= 0)
       
  1629 	    break;
       
  1630 
       
  1631 	  /* catch escaped ASCII */
       
  1632 	  if (ascii_must_not_be_escaped && c <= 0x7F)
       
  1633 	    break;
       
  1634 
       
  1635 	  /* catch other illegal escaped characters */
       
  1636 	  if (strchr (illegal_escaped_characters, c) != NULL)
       
  1637 	    break;
       
  1638 
       
  1639 	  in += 2;
       
  1640 	}
       
  1641 
       
  1642       *out++ = c;
       
  1643     }
       
  1644   
       
  1645   g_assert (out - result <= len);
       
  1646   *out = '\0';
       
  1647 
       
  1648   if (in != in_end)
       
  1649     {
       
  1650       g_free (result);
       
  1651       return NULL;
       
  1652     }
       
  1653 
       
  1654   return result;
       
  1655 }
       
  1656 
       
  1657 static gboolean
       
  1658 is_asciialphanum (gunichar c)
       
  1659 {
       
  1660   return c <= 0x7F && g_ascii_isalnum (c);
       
  1661 }
       
  1662 
       
  1663 static gboolean
       
  1664 is_asciialpha (gunichar c)
       
  1665 {
       
  1666   return c <= 0x7F && g_ascii_isalpha (c);
       
  1667 }
       
  1668 
       
  1669 /* allows an empty string */
       
  1670 static gboolean
       
  1671 hostname_validate (const char *hostname)
       
  1672 {
       
  1673   const char *p;
       
  1674   gunichar c, first_char, last_char;
       
  1675 
       
  1676   p = hostname;
       
  1677   if (*p == '\0')
       
  1678     return TRUE;
       
  1679   do
       
  1680     {
       
  1681       /* read in a label */
       
  1682       c = g_utf8_get_char (p);
       
  1683       p = g_utf8_next_char (p);
       
  1684       if (!is_asciialphanum (c))
       
  1685 	return FALSE;
       
  1686       first_char = c;
       
  1687       do
       
  1688 	{
       
  1689 	  last_char = c;
       
  1690 	  c = g_utf8_get_char (p);
       
  1691 	  p = g_utf8_next_char (p);
       
  1692 	}
       
  1693       while (is_asciialphanum (c) || c == '-');
       
  1694       if (last_char == '-')
       
  1695 	return FALSE;
       
  1696       
       
  1697       /* if that was the last label, check that it was a toplabel */
       
  1698       if (c == '\0' || (c == '.' && *p == '\0'))
       
  1699 	return is_asciialpha (first_char);
       
  1700     }
       
  1701   while (c == '.');
       
  1702   return FALSE;
       
  1703 }
       
  1704 
       
  1705 /**
       
  1706  * g_filename_from_uri:
       
  1707  * @uri: a uri describing a filename (escaped, encoded in ASCII).
       
  1708  * @hostname: Location to store hostname for the URI, or %NULL.
       
  1709  *            If there is no hostname in the URI, %NULL will be
       
  1710  *            stored in this location.
       
  1711  * @error: location to store the error occuring, or %NULL to ignore
       
  1712  *         errors. Any of the errors in #GConvertError may occur.
       
  1713  * 
       
  1714  * Converts an escaped ASCII-encoded URI to a local filename in the
       
  1715  * encoding used for filenames. 
       
  1716  * 
       
  1717  * Return value: a newly-allocated string holding the resulting
       
  1718  *               filename, or %NULL on an error.
       
  1719  **/
       
  1720 EXPORT_C gchar *
       
  1721 g_filename_from_uri (const gchar *uri,
       
  1722 		     gchar      **hostname,
       
  1723 		     GError     **error)
       
  1724 {
       
  1725   const char *path_part;
       
  1726   const char *host_part;
       
  1727   char *unescaped_hostname;
       
  1728   char *result;
       
  1729   char *filename;
       
  1730   int offs;
       
  1731 #if defined(G_OS_WIN32) || defined(__SYMBIAN32__)
       
  1732   char *p, *slash;
       
  1733 #endif
       
  1734 
       
  1735   if (hostname)
       
  1736     *hostname = NULL;
       
  1737 
       
  1738   if (!has_case_prefix (uri, "file:/"))
       
  1739     {
       
  1740       g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_BAD_URI,
       
  1741 		   _("The URI '%s' is not an absolute URI using the \"file\" scheme"),
       
  1742 		   uri);
       
  1743       return NULL;
       
  1744     }
       
  1745   
       
  1746   path_part = uri + strlen ("file:");
       
  1747   
       
  1748   if (strchr (path_part, '#') != NULL)
       
  1749     {
       
  1750       g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_BAD_URI,
       
  1751 		   _("The local file URI '%s' may not include a '#'"),
       
  1752 		   uri);
       
  1753       return NULL;
       
  1754     }
       
  1755 	
       
  1756   if (has_case_prefix (path_part, "///")) 
       
  1757     path_part += 2;
       
  1758   else if (has_case_prefix (path_part, "//"))
       
  1759     {
       
  1760       path_part += 2;
       
  1761       host_part = path_part;
       
  1762 
       
  1763       path_part = strchr (path_part, '/');
       
  1764 
       
  1765       if (path_part == NULL)
       
  1766 	{
       
  1767 	  g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_BAD_URI,
       
  1768 		       _("The URI '%s' is invalid"),
       
  1769 		       uri);
       
  1770 	  return NULL;
       
  1771 	}
       
  1772 
       
  1773       unescaped_hostname = g_unescape_uri_string (host_part, path_part - host_part, "", TRUE);
       
  1774 
       
  1775       if (unescaped_hostname == NULL ||
       
  1776 	  !hostname_validate (unescaped_hostname))
       
  1777 	{
       
  1778 	  g_free (unescaped_hostname);
       
  1779 	  g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_BAD_URI,
       
  1780 		       _("The hostname of the URI '%s' is invalid"),
       
  1781 		       uri);
       
  1782 	  return NULL;
       
  1783 	}
       
  1784       
       
  1785       if (hostname)
       
  1786 	*hostname = unescaped_hostname;
       
  1787       else
       
  1788 	g_free (unescaped_hostname);
       
  1789     }
       
  1790 
       
  1791   filename = g_unescape_uri_string (path_part, -1, "/", FALSE);
       
  1792 
       
  1793   if (filename == NULL)
       
  1794     {
       
  1795       g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_BAD_URI,
       
  1796 		   _("The URI '%s' contains invalidly escaped characters"),
       
  1797 		   uri);
       
  1798       return NULL;
       
  1799     }
       
  1800 
       
  1801   offs = 0;
       
  1802 #if defined(G_OS_WIN32) || defined(__SYMBIAN32__)
       
  1803   /* Drop localhost */
       
  1804   if (hostname && *hostname != NULL &&
       
  1805       g_ascii_strcasecmp (*hostname, "localhost") == 0)
       
  1806     {
       
  1807       g_free (*hostname);
       
  1808       *hostname = NULL;
       
  1809     }
       
  1810 
       
  1811   /* Turn slashes into backslashes, because that's the canonical spelling */
       
  1812   p = filename;
       
  1813   while ((slash = strchr (p, '/')) != NULL)
       
  1814     {
       
  1815       *slash = '\\';
       
  1816       p = slash + 1;
       
  1817     }
       
  1818 
       
  1819   /* Windows URIs with a drive letter can be like "file://host/c:/foo"
       
  1820    * or "file://host/c|/foo" (some Netscape versions). In those cases, start
       
  1821    * the filename from the drive letter.
       
  1822    */
       
  1823   if (g_ascii_isalpha (filename[1]))
       
  1824     {
       
  1825       if (filename[2] == ':')
       
  1826 	offs = 1;
       
  1827       else if (filename[2] == '|')
       
  1828 	{
       
  1829 	  filename[2] = ':';
       
  1830 	  offs = 1;
       
  1831 	}
       
  1832     }
       
  1833 #endif
       
  1834 
       
  1835   result = g_strdup (filename + offs);
       
  1836   g_free (filename);
       
  1837 
       
  1838   return result;
       
  1839 }
       
  1840 
       
  1841 #ifdef G_OS_WIN32
       
  1842 
       
  1843 #undef g_filename_from_uri
       
  1844 
       
  1845 gchar *
       
  1846 g_filename_from_uri (const gchar *uri,
       
  1847 		     gchar      **hostname,
       
  1848 		     GError     **error)
       
  1849 {
       
  1850   gchar *utf8_filename;
       
  1851   gchar *retval = NULL;
       
  1852 
       
  1853   utf8_filename = g_filename_from_uri_utf8 (uri, hostname, error);
       
  1854   if (utf8_filename)
       
  1855     {
       
  1856       retval = g_locale_from_utf8 (utf8_filename, -1, NULL, NULL, error);
       
  1857       g_free (utf8_filename);
       
  1858     }
       
  1859   return retval;
       
  1860 }
       
  1861 
       
  1862 #endif
       
  1863 
       
  1864 /**
       
  1865  * g_filename_to_uri:
       
  1866  * @filename: an absolute filename specified in the GLib file name encoding,
       
  1867  *            which is the on-disk file name bytes on Unix, and UTF-8 on 
       
  1868  *            Windows
       
  1869  * @hostname: A UTF-8 encoded hostname, or %NULL for none.
       
  1870  * @error: location to store the error occuring, or %NULL to ignore
       
  1871  *         errors. Any of the errors in #GConvertError may occur.
       
  1872  * 
       
  1873  * Converts an absolute filename to an escaped ASCII-encoded URI.
       
  1874  * 
       
  1875  * Return value: a newly-allocated string holding the resulting
       
  1876  *               URI, or %NULL on an error.
       
  1877  **/
       
  1878 EXPORT_C gchar *
       
  1879 g_filename_to_uri (const gchar *filename,
       
  1880 		   const gchar *hostname,
       
  1881 		   GError     **error)
       
  1882 {
       
  1883   char *escaped_uri;
       
  1884 
       
  1885   g_return_val_if_fail (filename != NULL, NULL);
       
  1886 
       
  1887   if (!g_path_is_absolute (filename))
       
  1888     {
       
  1889       g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_NOT_ABSOLUTE_PATH,
       
  1890 		   _("The pathname '%s' is not an absolute path"),
       
  1891 		   filename);
       
  1892       return NULL;
       
  1893     }
       
  1894 
       
  1895   if (hostname &&
       
  1896       !(g_utf8_validate (hostname, -1, NULL)
       
  1897 	&& hostname_validate (hostname)))
       
  1898     {
       
  1899       g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
       
  1900 		   _("Invalid hostname"));
       
  1901       return NULL;
       
  1902     }
       
  1903   
       
  1904 #if defined(G_OS_WIN32) || defined(__SYMBIAN32__)
       
  1905   /* Don't use localhost unnecessarily */
       
  1906   if (hostname && g_ascii_strcasecmp (hostname, "localhost") == 0)
       
  1907     hostname = NULL;
       
  1908 #endif
       
  1909 
       
  1910   escaped_uri = g_escape_file_uri (hostname, filename);
       
  1911 
       
  1912   return escaped_uri;
       
  1913 }
       
  1914 
       
  1915 #ifdef G_OS_WIN32
       
  1916 
       
  1917 #undef g_filename_to_uri
       
  1918 
       
  1919 gchar *
       
  1920 g_filename_to_uri (const gchar *filename,
       
  1921 		   const gchar *hostname,
       
  1922 		   GError     **error)
       
  1923 {
       
  1924   gchar *utf8_filename;
       
  1925   gchar *retval = NULL;
       
  1926 
       
  1927   utf8_filename = g_locale_to_utf8 (filename, -1, NULL, NULL, error);
       
  1928 
       
  1929   if (utf8_filename)
       
  1930     {
       
  1931       retval = g_filename_to_uri_utf8 (utf8_filename, hostname, error);
       
  1932       g_free (utf8_filename);
       
  1933     }
       
  1934 
       
  1935   return retval;
       
  1936 }
       
  1937 
       
  1938 #endif
       
  1939 
       
  1940 /**
       
  1941  * g_uri_list_extract_uris:
       
  1942  * @uri_list: an URI list 
       
  1943  *
       
  1944  * Splits an URI list conforming to the text/uri-list
       
  1945  * mime type defined in RFC 2483 into individual URIs,
       
  1946  * discarding any comments. The URIs are not validated.
       
  1947  *
       
  1948  * Returns: a newly allocated %NULL-terminated list of
       
  1949  *   strings holding the individual URIs. The array should
       
  1950  *   be freed with g_strfreev().
       
  1951  *
       
  1952  * Since: 2.6
       
  1953  */
       
  1954 EXPORT_C gchar **
       
  1955 g_uri_list_extract_uris (const gchar *uri_list)
       
  1956 {
       
  1957   GSList *uris, *u;
       
  1958   const gchar *p, *q;
       
  1959   gchar **result;
       
  1960   gint n_uris = 0;
       
  1961 
       
  1962   uris = NULL;
       
  1963 
       
  1964   p = uri_list;
       
  1965 
       
  1966   /* We don't actually try to validate the URI according to RFC
       
  1967    * 2396, or even check for allowed characters - we just ignore
       
  1968    * comments and trim whitespace off the ends.  We also
       
  1969    * allow LF delimination as well as the specified CRLF.
       
  1970    *
       
  1971    * We do allow comments like specified in RFC 2483.
       
  1972    */
       
  1973   while (p)
       
  1974     {
       
  1975       if (*p != '#')
       
  1976 	{
       
  1977 	  while (g_ascii_isspace (*p))
       
  1978 	    p++;
       
  1979 
       
  1980 	  q = p;
       
  1981 	  while (*q && (*q != '\n') && (*q != '\r'))
       
  1982 	    q++;
       
  1983 
       
  1984 	  if (q > p)
       
  1985 	    {
       
  1986 	      q--;
       
  1987 	      while (q > p && g_ascii_isspace (*q))
       
  1988 		q--;
       
  1989 
       
  1990 	      if (q > p)
       
  1991 		{
       
  1992 		  uris = g_slist_prepend (uris, g_strndup (p, q - p + 1));
       
  1993 		  n_uris++;
       
  1994 		}
       
  1995 	    }
       
  1996 	}
       
  1997       p = strchr (p, '\n');
       
  1998       if (p)
       
  1999 	p++;
       
  2000     }
       
  2001   result = g_new (gchar *, n_uris + 1);
       
  2002 
       
  2003   result[n_uris--] = NULL;
       
  2004   for (u = uris; u; u = u->next)
       
  2005     result[n_uris--] = u->data;
       
  2006 
       
  2007   g_slist_free (uris);
       
  2008 
       
  2009   return result;
       
  2010 }
       
  2011 
       
  2012 static gchar *
       
  2013 make_valid_utf8 (const gchar *name)
       
  2014 {
       
  2015   GString *string;
       
  2016   const gchar *remainder, *invalid;
       
  2017   gint remaining_bytes, valid_bytes;
       
  2018   
       
  2019   string = NULL;
       
  2020   remainder = name;
       
  2021   remaining_bytes = strlen (name);
       
  2022   
       
  2023   while (remaining_bytes != 0) 
       
  2024     {
       
  2025       if (g_utf8_validate (remainder, remaining_bytes, &invalid)) 
       
  2026 	break;
       
  2027       valid_bytes = invalid - remainder;
       
  2028     
       
  2029       if (string == NULL) 
       
  2030 	string = g_string_sized_new (remaining_bytes);
       
  2031 
       
  2032       g_string_append_len (string, remainder, valid_bytes);
       
  2033       /* append U+FFFD REPLACEMENT CHARACTER */
       
  2034       g_string_append (string, "\357\277\275");
       
  2035       
       
  2036       remaining_bytes -= valid_bytes + 1;
       
  2037       remainder = invalid + 1;
       
  2038     }
       
  2039   
       
  2040   if (string == NULL)
       
  2041     return g_strdup (name);
       
  2042   
       
  2043   g_string_append (string, remainder);
       
  2044 
       
  2045   g_assert (g_utf8_validate (string->str, -1, NULL));
       
  2046   
       
  2047   return g_string_free (string, FALSE);
       
  2048 }
       
  2049 
       
  2050 /**
       
  2051  * g_filename_display_basename:
       
  2052  * @filename: an absolute pathname in the GLib file name encoding
       
  2053  *
       
  2054  * Returns the display basename for the particular filename, guaranteed
       
  2055  * to be valid UTF-8. The display name might not be identical to the filename,
       
  2056  * for instance there might be problems converting it to UTF-8, and some files
       
  2057  * can be translated in the display.
       
  2058  *
       
  2059  * If GLib can not make sense of the encoding of @filename, as a last resort it 
       
  2060  * replaces unknown characters with U+FFFD, the Unicode replacement character.
       
  2061  * You can search the result for the UTF-8 encoding of this character (which is
       
  2062  * "\357\277\275" in octal notation) to find out if @filename was in an invalid
       
  2063  * encoding.
       
  2064  *
       
  2065  * You must pass the whole absolute pathname to this functions so that
       
  2066  * translation of well known locations can be done.
       
  2067  *
       
  2068  * This function is preferred over g_filename_display_name() if you know the
       
  2069  * whole path, as it allows translation.
       
  2070  *
       
  2071  * Return value: a newly allocated string containing
       
  2072  *   a rendition of the basename of the filename in valid UTF-8
       
  2073  *
       
  2074  * Since: 2.6
       
  2075  **/
       
  2076 EXPORT_C gchar *
       
  2077 g_filename_display_basename (const gchar *filename)
       
  2078 {
       
  2079   char *basename;
       
  2080   char *display_name;
       
  2081 
       
  2082   g_return_val_if_fail (filename != NULL, NULL);
       
  2083   
       
  2084   basename = g_path_get_basename (filename);
       
  2085   display_name = g_filename_display_name (basename);
       
  2086   g_free (basename);
       
  2087   return display_name;
       
  2088 }
       
  2089 
       
  2090 /**
       
  2091  * g_filename_display_name:
       
  2092  * @filename: a pathname hopefully in the GLib file name encoding
       
  2093  * 
       
  2094  * Converts a filename into a valid UTF-8 string. The conversion is 
       
  2095  * not necessarily reversible, so you should keep the original around 
       
  2096  * and use the return value of this function only for display purposes.
       
  2097  * Unlike g_filename_to_utf8(), the result is guaranteed to be non-%NULL 
       
  2098  * even if the filename actually isn't in the GLib file name encoding.
       
  2099  *
       
  2100  * If GLib can not make sense of the encoding of @filename, as a last resort it 
       
  2101  * replaces unknown characters with U+FFFD, the Unicode replacement character.
       
  2102  * You can search the result for the UTF-8 encoding of this character (which is
       
  2103  * "\357\277\275" in octal notation) to find out if @filename was in an invalid
       
  2104  * encoding.
       
  2105  *
       
  2106  * If you know the whole pathname of the file you should use
       
  2107  * g_filename_display_basename(), since that allows location-based
       
  2108  * translation of filenames.
       
  2109  *
       
  2110  * Return value: a newly allocated string containing
       
  2111  *   a rendition of the filename in valid UTF-8
       
  2112  *
       
  2113  * Since: 2.6
       
  2114  **/
       
  2115 EXPORT_C gchar *
       
  2116 g_filename_display_name (const gchar *filename)
       
  2117 {
       
  2118   gint i;
       
  2119   const gchar **charsets;
       
  2120   gchar *display_name = NULL;
       
  2121   gboolean is_utf8;
       
  2122  
       
  2123   is_utf8 = g_get_filename_charsets (&charsets);
       
  2124 
       
  2125   if (is_utf8)
       
  2126     {
       
  2127       if (g_utf8_validate (filename, -1, NULL))
       
  2128 	display_name = g_strdup (filename);
       
  2129     }
       
  2130   
       
  2131   if (!display_name)
       
  2132     {
       
  2133       /* Try to convert from the filename charsets to UTF-8.
       
  2134        * Skip the first charset if it is UTF-8.
       
  2135        */
       
  2136       for (i = is_utf8 ? 1 : 0; charsets[i]; i++)
       
  2137 	{
       
  2138 	  display_name = g_convert (filename, -1, "UTF-8", charsets[i], 
       
  2139 				    NULL, NULL, NULL);
       
  2140 
       
  2141 	  if (display_name)
       
  2142 	    break;
       
  2143 	}
       
  2144     }
       
  2145   
       
  2146   /* if all conversions failed, we replace invalid UTF-8
       
  2147    * by a question mark
       
  2148    */
       
  2149   if (!display_name) 
       
  2150     display_name = make_valid_utf8 (filename);
       
  2151 
       
  2152   return display_name;
       
  2153 }
       
  2154 
       
  2155 #define __G_CONVERT_C__
       
  2156 #include "galiasdef.c"