webengine/osswebengine/WebCore/platform/symbian/Libxml2/Libxml2_xmlstring.c
changeset 0 dd21522fd290
equal deleted inserted replaced
-1:000000000000 0:dd21522fd290
       
     1 /*
       
     2  * string.c : an XML string utilities module
       
     3  *
       
     4  * This module provides various utility functions for manipulating
       
     5  * the xmlChar* type. All functions named xmlStr* have been moved here
       
     6  * from the parser.c file (their original home).
       
     7  *
       
     8  * See Copyright for the status of this software.
       
     9  *
       
    10  * UTF8 string routines from:
       
    11  * William Brack <wbrack@mmm.com.hk>
       
    12  *
       
    13  * daniel@veillard.com
       
    14  */
       
    15 
       
    16 #define IN_LIBXML
       
    17 #include "XmlEnglibxml.h"
       
    18 
       
    19 #include <stdlib.h>
       
    20 #include <string.h>
       
    21 #include "Libxml2_xmlmemory.h"
       
    22 #include "Libxml2_parserInternals.h"
       
    23 #include "Libxml2_xmlstring.h"
       
    24 
       
    25 /************************************************************************
       
    26  *                                                                      *
       
    27  *                Commodity functions to handle xmlChars                *
       
    28  *                                                                      *
       
    29  ************************************************************************/
       
    30 
       
    31 /**
       
    32  * xmlStrndup:
       
    33  * @cur:  the input xmlChar *
       
    34  * @len:  the len of @cur
       
    35  *
       
    36  * a strndup for array of xmlChar's
       
    37  *
       
    38  * Returns a new xmlChar * or NULL
       
    39  *
       
    40  * OOM: possible --> returns NULL for (cup!=NULL && len>=0) and sets OOM flag
       
    41  */
       
    42 xmlChar *
       
    43 xmlStrndup(const xmlChar *cur, int len) {
       
    44     xmlChar *ret;
       
    45 
       
    46     if ((cur == NULL) || (len < 0)) return(NULL);
       
    47     ret = (xmlChar *) xmlMallocAtomic((len + 1) * sizeof(xmlChar));
       
    48     if (ret == NULL) {
       
    49         //   TODO: Setup OOM flag here
       
    50         xmlErrMemory(NULL, NULL);
       
    51         return(NULL);
       
    52     }
       
    53     memcpy(ret, cur, len * sizeof(xmlChar));
       
    54     ret[len] = 0;
       
    55     return(ret);
       
    56 }
       
    57 
       
    58 /**
       
    59  * xmlStrdup:
       
    60  * @cur:  the input xmlChar *
       
    61  *
       
    62  * a strdup for array of xmlChar's. Since they are supposed to be
       
    63  * encoded in UTF-8 or an encoding with 8bit based chars, we assume
       
    64  * a termination mark of '0'.
       
    65  *
       
    66  * Returns a new xmlChar * or NULL
       
    67  *
       
    68  * OOM: possible --> returns NULL for cup!=NULL and sets OOM flag
       
    69  */
       
    70 xmlChar *
       
    71 xmlStrdup(const xmlChar *cur) {
       
    72     const xmlChar *p = cur;
       
    73 
       
    74     if (cur == NULL) return(NULL);
       
    75     while (*p != 0) p++; /* non input consuming */
       
    76     return(xmlStrndup(cur, p - cur));
       
    77 }
       
    78 
       
    79 /**
       
    80  * xmlCharStrndup:
       
    81  * @cur:  the input char *
       
    82  * @len:  the len of @cur
       
    83  *
       
    84  * a strndup for char's to xmlChar's
       
    85  *
       
    86  * Returns a new xmlChar * or NULL
       
    87  */
       
    88 
       
    89 xmlChar *
       
    90 xmlCharStrndup(const char *cur, int len) {
       
    91     int i;
       
    92     xmlChar *ret;
       
    93 
       
    94     if ((cur == NULL) || (len < 0)) return(NULL);
       
    95     ret = (xmlChar *) xmlMallocAtomic((len + 1) * sizeof(xmlChar));
       
    96     if (ret == NULL) {
       
    97         xmlErrMemory(NULL, NULL);
       
    98         return(NULL);
       
    99     }
       
   100     for (i = 0;i < len;i++)
       
   101         ret[i] = (xmlChar) cur[i];
       
   102     ret[len] = 0;
       
   103     return(ret);
       
   104 }
       
   105 
       
   106 /**
       
   107  * xmlCharStrdup:
       
   108  * @cur:  the input char *
       
   109  *
       
   110  * a strdup for char's to xmlChar's
       
   111  *
       
   112  * Returns a new xmlChar * or NULL
       
   113  */
       
   114 
       
   115 xmlChar *
       
   116 xmlCharStrdup(const char *cur) {
       
   117     const char *p = cur;
       
   118 
       
   119     if (cur == NULL) return(NULL);
       
   120     while (*p != '\0') p++; /* non input consuming */
       
   121     return(xmlCharStrndup(cur, p - cur));
       
   122 }
       
   123 
       
   124 /**
       
   125  * xmlStrcmp:
       
   126  * @str1:  the first xmlChar *
       
   127  * @str2:  the second xmlChar *
       
   128  *
       
   129  * a strcmp for xmlChar's
       
   130  *
       
   131  * Returns the integer result of the comparison
       
   132  */
       
   133 
       
   134 int
       
   135 xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
       
   136     register int tmp;
       
   137 
       
   138     if (str1 == str2) return(0);
       
   139     if (str1 == NULL) return(-1);
       
   140     if (str2 == NULL) return(1);
       
   141     do {
       
   142         tmp = *str1++ - *str2;
       
   143         if (tmp != 0) return(tmp);
       
   144     } while (*str2++ != 0);
       
   145     return 0;
       
   146 }
       
   147 
       
   148 /**
       
   149  * xmlStrEqual:
       
   150  * @str1:  the first xmlChar *
       
   151  * @str2:  the second xmlChar *
       
   152  *
       
   153  * Check if both string are equal of have same content
       
   154  * Should be a bit more readable and faster than xmlStrEqual()
       
   155  *
       
   156  * Returns 1 if they are equal, 0 if they are different
       
   157  *
       
   158  * OOM: never
       
   159  */
       
   160 
       
   161 int
       
   162 xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
       
   163     if (str1 == str2) return(1);
       
   164     if (str1 == NULL) return(0);
       
   165     if (str2 == NULL) return(0);
       
   166     do {
       
   167         if (*str1++ != *str2)
       
   168             return(0);
       
   169     } while (*str2++);
       
   170     return(1);
       
   171 }
       
   172 
       
   173 /**
       
   174  * xmlStrQEqual:
       
   175  * @pref:  the prefix of the QName
       
   176  * @name:  the localname of the QName
       
   177  * @str:  the second xmlChar *
       
   178  *
       
   179  * Check if a QName is Equal to a given string
       
   180  *
       
   181  * Returns 1 if they are equal, 0 if they are different
       
   182  */
       
   183 
       
   184 int
       
   185 xmlStrQEqual(const xmlChar *pref, const xmlChar *name, const xmlChar *str) {
       
   186     if (pref == NULL) return(xmlStrEqual(name, str));
       
   187     if (name == NULL) return(0);
       
   188     if (str == NULL) return(0);
       
   189 
       
   190     do {
       
   191         if (*pref++ != *str) return(0);
       
   192     } while ((*str++) && (*pref));
       
   193     if (*str++ != ':') return(0);
       
   194     do {
       
   195         if (*name++ != *str) return(0);
       
   196     } while (*str++);
       
   197     return(1);
       
   198 }
       
   199 
       
   200 /**
       
   201  * xmlStrncmp:
       
   202  * @str1:  the first xmlChar *
       
   203  * @str2:  the second xmlChar *
       
   204  * @len:  the max comparison length
       
   205  *
       
   206  * a strncmp for xmlChar's
       
   207  *
       
   208  * Returns the integer result of the comparison
       
   209  *
       
   210  * OOM: never
       
   211  */
       
   212 
       
   213 int
       
   214 xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
       
   215     register int tmp;
       
   216 
       
   217     if (len <= 0) return(0);
       
   218     if (str1 == str2) return(0);
       
   219     if (str1 == NULL) return(-1);
       
   220     if (str2 == NULL) return(1);
       
   221 #ifdef __GNUC__
       
   222     tmp = strncmp(str1, str2, len);
       
   223     return tmp;
       
   224 #else
       
   225     do {
       
   226         tmp = *str1++ - *str2;
       
   227         if (tmp != 0 || --len == 0) return(tmp);
       
   228     } while (*str2++ != 0);
       
   229     return 0;
       
   230 #endif
       
   231 }
       
   232 
       
   233 static const xmlChar casemap[256] = {
       
   234     0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
       
   235     0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
       
   236     0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
       
   237     0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
       
   238     0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
       
   239     0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
       
   240     0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
       
   241     0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
       
   242     0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
       
   243     0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
       
   244     0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
       
   245     0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
       
   246     0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
       
   247     0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
       
   248     0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
       
   249     0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
       
   250     0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
       
   251     0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
       
   252     0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
       
   253     0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
       
   254     0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
       
   255     0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
       
   256     0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
       
   257     0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
       
   258     0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
       
   259     0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
       
   260     0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
       
   261     0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
       
   262     0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
       
   263     0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
       
   264     0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
       
   265     0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
       
   266 };
       
   267 
       
   268 
       
   269 /**
       
   270  * xmlStrcasecmp:
       
   271  * @str1:  the first xmlChar *
       
   272  * @str2:  the second xmlChar *
       
   273  *
       
   274  * a strcasecmp for xmlChar's
       
   275  *
       
   276  * Returns the integer result of the comparison
       
   277  */
       
   278 
       
   279 int
       
   280 xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
       
   281     register int tmp;
       
   282 
       
   283     if (str1 == str2) return(0);
       
   284     if (str1 == NULL) return(-1);
       
   285     if (str2 == NULL) return(1);
       
   286     do {
       
   287         tmp = casemap[*str1++] - casemap[*str2];
       
   288         if (tmp != 0) return(tmp);
       
   289     } while (*str2++ != 0);
       
   290     return 0;
       
   291 }
       
   292 
       
   293 /**
       
   294  * xmlStrncasecmp:
       
   295  * @str1:  the first xmlChar *
       
   296  * @str2:  the second xmlChar *
       
   297  * @len:  the max comparison length
       
   298  *
       
   299  * a strncasecmp for xmlChar's
       
   300  *
       
   301  * Returns the integer result of the comparison
       
   302  */
       
   303 
       
   304 int
       
   305 xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
       
   306     register int tmp;
       
   307 
       
   308     if (len <= 0) return(0);
       
   309     if (str1 == str2) return(0);
       
   310     if (str1 == NULL) return(-1);
       
   311     if (str2 == NULL) return(1);
       
   312     do {
       
   313         tmp = casemap[*str1++] - casemap[*str2];
       
   314         if (tmp != 0 || --len == 0) return(tmp);
       
   315     } while (*str2++ != 0);
       
   316     return 0;
       
   317 }
       
   318 
       
   319 /**
       
   320  * xmlStrchr:
       
   321  * @str:  the xmlChar * array
       
   322  * @val:  the xmlChar to search
       
   323  *
       
   324  * a strchr for xmlChar's
       
   325  *
       
   326  * Returns the xmlChar* for the first occurrence or NULL.
       
   327  */
       
   328 
       
   329 const xmlChar *
       
   330 xmlStrchr(const xmlChar *str, xmlChar val) {
       
   331     if (str == NULL) return(NULL);
       
   332     while (*str != 0) { /* non input consuming */
       
   333         if (*str == val) return((xmlChar *) str);
       
   334         str++;
       
   335     }
       
   336     return(NULL);
       
   337 }
       
   338 
       
   339 /**
       
   340  * xmlStrstr:
       
   341  * @str:  the xmlChar * array (haystack)
       
   342  * @val:  the xmlChar to search (needle)
       
   343  *
       
   344  * a strstr for xmlChar's
       
   345  *
       
   346  * Returns the xmlChar * for the first occurrence or NULL.
       
   347  *
       
   348  * OOM: never
       
   349  */
       
   350 
       
   351 const xmlChar *
       
   352 xmlStrstr(const xmlChar *str, const xmlChar *val) {
       
   353     int n;
       
   354 
       
   355     if (str == NULL) return(NULL);
       
   356     if (val == NULL) return(NULL);
       
   357     n = xmlStrlen(val);
       
   358 
       
   359     if (n == 0) return(str);
       
   360     while (*str != 0) { /* non input consuming */
       
   361         if (*str == *val) {
       
   362             if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
       
   363         }
       
   364         str++;
       
   365     }
       
   366     return(NULL);
       
   367 }
       
   368 
       
   369 /**
       
   370  * xmlStrcasestr:
       
   371  * @str:  the xmlChar * array (haystack)
       
   372  * @val:  the xmlChar to search (needle)
       
   373  *
       
   374  * a case-ignoring strstr for xmlChar's
       
   375  *
       
   376  * Returns the xmlChar * for the first occurrence or NULL.
       
   377  */
       
   378 
       
   379 const xmlChar *
       
   380 xmlStrcasestr(const xmlChar *str, xmlChar *val) {
       
   381     int n;
       
   382 
       
   383     if (str == NULL) return(NULL);
       
   384     if (val == NULL) return(NULL);
       
   385     n = xmlStrlen(val);
       
   386 
       
   387     if (n == 0) return(str);
       
   388     while (*str != 0) { /* non input consuming */
       
   389         if (casemap[*str] == casemap[*val])
       
   390             if (!xmlStrncasecmp(str, val, n)) return(str);
       
   391         str++;
       
   392     }
       
   393     return(NULL);
       
   394 }
       
   395 
       
   396 /**
       
   397  * xmlStrsub:
       
   398  * @str:  the xmlChar * array (haystack)
       
   399  * @start:  the index of the first char (zero based)
       
   400  * @len:  the length of the substring
       
   401  *
       
   402  * Extract a substring of a given string
       
   403  *
       
   404  * Returns the xmlChar * for the first occurrence or NULL.
       
   405  */
       
   406 
       
   407 xmlChar *
       
   408 xmlStrsub(const xmlChar *str, int start, int len) {
       
   409     int i;
       
   410 
       
   411     if (str == NULL) return(NULL);
       
   412     if (start < 0) return(NULL);
       
   413     if (len < 0) return(NULL);
       
   414 
       
   415     for (i = 0;i < start;i++) {
       
   416         if (*str == 0) return(NULL);
       
   417         str++;
       
   418     }
       
   419     if (*str == 0) return(NULL);
       
   420     return(xmlStrndup(str, len));
       
   421 }
       
   422 
       
   423 /**
       
   424  * xmlStrlen:
       
   425  * @str:  the xmlChar * array
       
   426  *
       
   427  * length of a xmlChar's string
       
   428  *
       
   429  * Returns the number of xmlChar contained in the ARRAY.
       
   430  */
       
   431 
       
   432 int
       
   433 xmlStrlen(const xmlChar *str) {
       
   434     int len = 0;
       
   435 
       
   436     if (str == NULL) return(0);
       
   437     while (*str != 0) { /* non input consuming */
       
   438         str++;
       
   439         len++;
       
   440     }
       
   441     return(len);
       
   442 }
       
   443 
       
   444 /**
       
   445  * xmlStrncat:
       
   446  * @cur:  the original xmlChar* array
       
   447  * @add:  the xmlChar* array added
       
   448  * @len:  the length of @add
       
   449  *
       
   450  * a strncat for array of xmlChar's, it will extend @cur with the len
       
   451  * first bytes of @add.
       
   452  *
       
   453  * Returns a new xmlChar*, the original @cur is reallocated if needed
       
   454  * and should not be freed
       
   455  *
       
   456  * OOM: possible --> OOM flag is set  // TODO: support NULL result in OOM
       
   457  */
       
   458 xmlChar*
       
   459 xmlStrncat(xmlChar* cur, const xmlChar* add, int len)
       
   460 {
       
   461     int size;
       
   462     xmlChar* ret;
       
   463 
       
   464     if ((add == NULL) || (len == 0))
       
   465         return(cur);
       
   466     if (cur == NULL)
       
   467         return(xmlStrndup(add, len));
       
   468 
       
   469     size = xmlStrlen(cur);
       
   470     // DONE: Fix xmlRealloc: Nothing to fix! Cur should not be freed...
       
   471     ret = (xmlChar*) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
       
   472     if (!ret) {
       
   473         xmlErrMemory(NULL, NULL);
       
   474         return(cur);
       
   475     }
       
   476     memcpy(&ret[size], add, len * sizeof(xmlChar));
       
   477     ret[size + len] = 0;
       
   478     return(ret);
       
   479 }
       
   480 
       
   481 /**
       
   482  * xmlStrncatNew:
       
   483  * @str1:  first xmlChar string
       
   484  * @str2:  second xmlChar string
       
   485  * @len:  the len of @str2
       
   486  *
       
   487  * same as xmlStrncat, but creates a new string.  The original
       
   488  * two strings are not freed.
       
   489  *
       
   490  * Returns a new xmlChar* or NULL
       
   491  *
       
   492  * OOM: possible --> returns NULL, sets OOM flag
       
   493  */
       
   494 xmlChar *
       
   495 xmlStrncatNew(const xmlChar *str1, const xmlChar *str2, int len) {
       
   496     int size;
       
   497     xmlChar *ret;
       
   498 
       
   499     if ((str2 == NULL) || (len == 0))
       
   500         return(xmlStrdup(str1));
       
   501     if (str1 == NULL)
       
   502         return(xmlStrndup(str2, len));
       
   503 
       
   504     size = xmlStrlen(str1);
       
   505     ret = (xmlChar *) xmlMalloc((size + len + 1) * sizeof(xmlChar));
       
   506     if (ret == NULL) {
       
   507         xmlErrMemory(NULL, NULL); // sets OOM flag
       
   508         //return(xmlStrndup(str1, size)); // TODO: Return NULL, because it is OOM?
       
   509         return NULL;
       
   510     }
       
   511     memcpy(ret, str1, size * sizeof(xmlChar));
       
   512     memcpy(&ret[size], str2, len * sizeof(xmlChar));
       
   513     ret[size + len] = 0;
       
   514     return(ret);
       
   515 }
       
   516 
       
   517 /**
       
   518  * xmlStrcat:
       
   519  * @cur:  the original xmlChar* array
       
   520  * @add:  the xmlChar* array added
       
   521  *
       
   522  * a strcat for array of xmlChar's. Since they are supposed to be
       
   523  * encoded in UTF-8 or an encoding with 8bit based chars, we assume
       
   524  * a termination mark of '0'.
       
   525  *
       
   526  * Returns a new xmlChar* containing the concatenated string.
       
   527  *
       
   528  * OOM: possible --> OOM flag is set // TODO: Support NULL result
       
   529  */
       
   530 xmlChar *
       
   531 xmlStrcat(xmlChar *cur, const xmlChar *add) {
       
   532     const xmlChar *p = add;
       
   533 
       
   534     if (add == NULL)
       
   535         return(cur);
       
   536     if (cur == NULL)
       
   537         return(xmlStrdup(add));
       
   538 
       
   539     while (*p != 0) p++; /* non input consuming */
       
   540     return(xmlStrncat(cur, add, p - add));
       
   541 }
       
   542 
       
   543 /**
       
   544  * xmlStrPrintf:
       
   545  * @buf:   the result buffer.
       
   546  * @len:   the result buffer length.
       
   547  * @msg:   the message with printf formatting.
       
   548  * @...:   extra parameters for the message.
       
   549  *
       
   550  * Formats @msg and places result into @buf.
       
   551  *
       
   552  * Returns the number of characters written to @buf or -1 if an error occurs.
       
   553  */
       
   554 int
       
   555 xmlStrPrintf(xmlChar *buf, int len, const xmlChar *msg, ...) {
       
   556     va_list args;
       
   557     int ret;
       
   558 
       
   559     if((buf == NULL) || (msg == NULL)) {
       
   560         return(-1);
       
   561     }
       
   562 
       
   563     va_start(args, msg);
       
   564     ret = vsnprintf((char *) buf, len, (const char *) msg, args);
       
   565     va_end(args);
       
   566     buf[len - 1] = 0; /* be safe ! */
       
   567 
       
   568     return(ret);
       
   569 }
       
   570 
       
   571 /**
       
   572  * xmlStrVPrintf:
       
   573  * @buf:   the result buffer.
       
   574  * @len:   the result buffer length.
       
   575  * @msg:   the message with printf formatting.
       
   576  * @ap:    extra parameters for the message.
       
   577  *
       
   578  * Formats @msg and places result into @buf.
       
   579  *
       
   580  * Returns the number of characters written to @buf or -1 if an error occurs.
       
   581  */
       
   582 int
       
   583 xmlStrVPrintf(xmlChar *buf, int len, const xmlChar *msg, va_list ap) {
       
   584     int ret;
       
   585 
       
   586     if((buf == NULL) || (msg == NULL)) {
       
   587         return(-1);
       
   588     }
       
   589 
       
   590     ret = vsnprintf((char *) buf, len, (const char *) msg, ap);
       
   591     buf[len - 1] = 0; /* be safe ! */
       
   592 
       
   593     return(ret);
       
   594 }
       
   595 
       
   596 /************************************************************************
       
   597  *                                                                      *
       
   598  *              Generic UTF8 handling routines                          *
       
   599  *                                                                      *
       
   600  * From rfc2044: encoding of the Unicode values on UTF-8:               *
       
   601  *                                                                      *
       
   602  * UCS-4 range (hex.)           UTF-8 octet sequence (binary)           *
       
   603  * 0000 0000-0000 007F   0xxxxxxx                                       *
       
   604  * 0000 0080-0000 07FF   110xxxxx 10xxxxxx                              *
       
   605  * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx                     *
       
   606  *                                                                      *
       
   607  * I hope we won't use values > 0xFFFF anytime soon !                   *
       
   608  *                                                                      *
       
   609  ************************************************************************/
       
   610 
       
   611 
       
   612 /**
       
   613  * xmlUTF8Size:
       
   614  * @utf: pointer to the UTF8 character
       
   615  *
       
   616  * calculates the internal size of a UTF8 character
       
   617  *
       
   618  * returns the numbers of bytes in the character, -1 on format error
       
   619  */
       
   620 int
       
   621 xmlUTF8Size(const xmlChar *utf) {
       
   622     xmlChar mask;
       
   623     int len;
       
   624 
       
   625     if (utf == NULL)
       
   626         return -1;
       
   627     if (*utf < 0x80)
       
   628         return 1;
       
   629     /* check valid UTF8 character */
       
   630     if (!(*utf & 0x40))
       
   631         return -1;
       
   632     /* determine number of bytes in char */
       
   633     len = 2;
       
   634     for (mask=0x20; mask != 0; mask>>=1) {
       
   635         if (!(*utf & mask))
       
   636             return len;
       
   637         len++;
       
   638     }
       
   639     return -1;
       
   640 }
       
   641 
       
   642 /**
       
   643  * xmlUTF8Charcmp:
       
   644  * @utf1: pointer to first UTF8 char
       
   645  * @utf2: pointer to second UTF8 char
       
   646  *
       
   647  * compares the two UCS4 values
       
   648  *
       
   649  * returns result of the compare as with xmlStrncmp
       
   650  */
       
   651 int
       
   652 xmlUTF8Charcmp(const xmlChar *utf1, const xmlChar *utf2) {
       
   653 
       
   654     if (utf1 == NULL ) {
       
   655         if (utf2 == NULL)
       
   656             return 0;
       
   657         return -1;
       
   658     }
       
   659     return xmlStrncmp(utf1, utf2, xmlUTF8Size(utf1));
       
   660 }
       
   661 
       
   662 /**
       
   663  * xmlUTF8Strlen:
       
   664  * @utf:  a sequence of UTF-8 encoded bytes
       
   665  *
       
   666  * compute the length of an UTF8 string, it doesn't do a full UTF8
       
   667  * checking of the content of the string.
       
   668  *
       
   669  * Returns the number of characters in the string or -1 in case of error
       
   670  *
       
   671  * OOM: never
       
   672  */
       
   673 int
       
   674 xmlUTF8Strlen(const xmlChar *utf) {
       
   675     int ret = 0;
       
   676 
       
   677     if (utf == NULL)
       
   678         return(-1);
       
   679 
       
   680     while (*utf != 0) {
       
   681         if (utf[0] & 0x80) {
       
   682             if ((utf[1] & 0xc0) != 0x80)
       
   683                 return(-1);
       
   684             if ((utf[0] & 0xe0) == 0xe0) {
       
   685                 if ((utf[2] & 0xc0) != 0x80)
       
   686                     return(-1);
       
   687                 if ((utf[0] & 0xf0) == 0xf0) {
       
   688                     if ((utf[0] & 0xf8) != 0xf0 || (utf[3] & 0xc0) != 0x80)
       
   689                         return(-1);
       
   690                     utf += 4;
       
   691                 } else {
       
   692                     utf += 3;
       
   693                 }
       
   694             } else {
       
   695                 utf += 2;
       
   696             }
       
   697         } else {
       
   698             utf++;
       
   699         }
       
   700         ret++;
       
   701     }
       
   702     return(ret);
       
   703 }
       
   704 
       
   705 /**
       
   706  * xmlGetUTF8Char:
       
   707  * @utf:  a sequence of UTF-8 encoded bytes
       
   708  * @len:  a pointer to @bytes len
       
   709  *
       
   710  * Read one UTF8 Char from @utf
       
   711  *
       
   712  * Returns the char value or -1 in case of error, and updates *len with the
       
   713  *        number of bytes consumed
       
   714  */
       
   715 int
       
   716 xmlGetUTF8Char(const unsigned char *utf, int *len) {
       
   717     unsigned int c;
       
   718 
       
   719     if (utf == NULL)
       
   720         goto error;
       
   721     if (len == NULL)
       
   722         goto error;
       
   723     if (*len < 1)
       
   724         goto error;
       
   725 
       
   726     c = utf[0];
       
   727     if (c & 0x80) {
       
   728         if (*len < 2)
       
   729             goto error;
       
   730         if ((utf[1] & 0xc0) != 0x80)
       
   731             goto error;
       
   732         if ((c & 0xe0) == 0xe0) {
       
   733             if (*len < 3)
       
   734                 goto error;
       
   735             if ((utf[2] & 0xc0) != 0x80)
       
   736                 goto error;
       
   737             if ((c & 0xf0) == 0xf0) {
       
   738                 if (*len < 4)
       
   739                     goto error;
       
   740                 if ((c & 0xf8) != 0xf0 || (utf[3] & 0xc0) != 0x80)
       
   741                     goto error;
       
   742                 *len = 4;
       
   743                 /* 4-byte code */
       
   744                 c = (utf[0] & 0x7) << 18;
       
   745                 c |= (utf[1] & 0x3f) << 12;
       
   746                 c |= (utf[2] & 0x3f) << 6;
       
   747                 c |= utf[3] & 0x3f;
       
   748             } else {
       
   749               /* 3-byte code */
       
   750                 *len = 3;
       
   751                 c = (utf[0] & 0xf) << 12;
       
   752                 c |= (utf[1] & 0x3f) << 6;
       
   753                 c |= utf[2] & 0x3f;
       
   754             }
       
   755         } else {
       
   756           /* 2-byte code */
       
   757             *len = 2;
       
   758             c = (utf[0] & 0x1f) << 6;
       
   759             c |= utf[1] & 0x3f;
       
   760         }
       
   761     } else {
       
   762         /* 1-byte code */
       
   763         *len = 1;
       
   764     }
       
   765     return(c);
       
   766 
       
   767 error:
       
   768     *len = 0;
       
   769     return(-1);
       
   770 }
       
   771 
       
   772 
       
   773 #ifndef XMLENGINE_EXCLUDE_UNUSED
       
   774 /**
       
   775  * xmlCheckUTF8:
       
   776  * @utf: Pointer to putative UTF-8 encoded string.
       
   777  *
       
   778  * Checks @utf for being valid UTF-8. @utf is assumed to be
       
   779  * null-terminated. This function is not super-strict, as it will
       
   780  * allow longer UTF-8 sequences than necessary. Note that Java is
       
   781  * capable of producing these sequences if provoked. Also note, this
       
   782  * routine checks for the 4-byte maximum size, but does not check for
       
   783  * 0x10ffff maximum value.
       
   784  *
       
   785  * Return value: true if @utf is valid.
       
   786  **/
       
   787 int
       
   788 xmlCheckUTF8(const unsigned char *utf)
       
   789 {
       
   790     int ix;
       
   791     unsigned char c;
       
   792 
       
   793     for (ix = 0; (c = utf[ix]);) {
       
   794         if (c & 0x80) {
       
   795             if ((utf[ix + 1] & 0xc0) != 0x80)
       
   796                 return(0);
       
   797             if ((c & 0xe0) == 0xe0) {
       
   798                 if ((utf[ix + 2] & 0xc0) != 0x80)
       
   799                     return(0);
       
   800                 if ((c & 0xf0) == 0xf0) {
       
   801                     if ((c & 0xf8) != 0xf0 || (utf[ix + 3] & 0xc0) != 0x80)
       
   802                         return(0);
       
   803                     ix += 4;
       
   804                     /* 4-byte code */
       
   805                 } else
       
   806                     /* 3-byte code */
       
   807                     ix += 3;
       
   808             } else
       
   809                 /* 2-byte code */
       
   810                 ix += 2;
       
   811         } else
       
   812             /* 1-byte code */
       
   813             ix++;
       
   814       }
       
   815       return(1);
       
   816 }
       
   817 
       
   818 #endif /* ifndef XMLENGINE_EXCLUDE_UNUSED */
       
   819 
       
   820 
       
   821 /**
       
   822  * xmlUTF8Strsize:
       
   823  * @utf:  a sequence of UTF-8 encoded bytes
       
   824  * @len:  the number of characters in the array
       
   825  *
       
   826  * storage size of an UTF8 string
       
   827  *
       
   828  * Returns the storage size of
       
   829  * the first 'len' characters of ARRAY
       
   830  *
       
   831  */
       
   832 
       
   833 int
       
   834 xmlUTF8Strsize(const xmlChar *utf, int len) {
       
   835     const xmlChar   *ptr=utf;
       
   836     xmlChar         ch;
       
   837 
       
   838     if (len <= 0)
       
   839         return(0);
       
   840 
       
   841     while ( len-- > 0) {
       
   842         if ( !*ptr )
       
   843             break;
       
   844         if ( (ch = *ptr++) & 0x80)
       
   845             while ( (ch<<=1) & 0x80 )
       
   846                 ptr++;
       
   847     }
       
   848     return (ptr - utf);
       
   849 }
       
   850 
       
   851 
       
   852 /**
       
   853  * xmlUTF8Strndup:
       
   854  * @utf:  the input UTF8 *
       
   855  * @len:  the len of @utf (in chars)
       
   856  *
       
   857  * a strndup for array of UTF8's
       
   858  *
       
   859  * Returns a new UTF8 * or NULL
       
   860  */
       
   861 xmlChar *
       
   862 xmlUTF8Strndup(const xmlChar *utf, int len) {
       
   863     xmlChar *ret;
       
   864     int i;
       
   865 
       
   866     if ((utf == NULL) || (len < 0)) return(NULL);
       
   867     i = xmlUTF8Strsize(utf, len);
       
   868     ret = (xmlChar *) xmlMallocAtomic((i + 1) * sizeof(xmlChar));
       
   869     if (ret == NULL) {
       
   870 /*      // TODO: Set OOM flag and report (logging)
       
   871         xmlGenericError(xmlGenericErrorContext,
       
   872                 EMBED_ERRTXT("malloc of %ld byte failed\n"),
       
   873                 (len + 1) * (long)sizeof(xmlChar));
       
   874  */
       
   875         return(NULL);
       
   876     }
       
   877     memcpy(ret, utf, i * sizeof(xmlChar));
       
   878     ret[i] = 0;
       
   879     return(ret);
       
   880 }
       
   881 
       
   882 /**
       
   883  * xmlUTF8Strpos:
       
   884  * @utf:  the input UTF8 *
       
   885  * @pos:  the position of the desired UTF8 char (in chars)
       
   886  *
       
   887  * a function to provide the equivalent of fetching a
       
   888  * character from a string array
       
   889  *
       
   890  * Returns a pointer to the UTF8 character or NULL
       
   891  */
       
   892 xmlChar *
       
   893 xmlUTF8Strpos(const xmlChar *utf, int pos) {
       
   894     xmlChar ch;
       
   895 
       
   896     if (utf == NULL) return(NULL);
       
   897     if ( (pos < 0) || (pos >= xmlUTF8Strlen(utf)) )
       
   898         return(NULL);
       
   899     while (pos--) {
       
   900         if ((ch=*utf++) == 0) return(NULL);
       
   901         if ( ch & 0x80 ) {
       
   902             /* if not simple ascii, verify proper format */
       
   903             if ( (ch & 0xc0) != 0xc0 )
       
   904                 return(NULL);
       
   905             /* then skip over remaining bytes for this char */
       
   906             while ( (ch <<= 1) & 0x80 )
       
   907                 if ( (*utf++ & 0xc0) != 0x80 )
       
   908                     return(NULL);
       
   909         }
       
   910     }
       
   911     return((xmlChar *)utf);
       
   912 }
       
   913 
       
   914 /**
       
   915  * xmlUTF8Strloc:
       
   916  * @utf:  the input UTF8 *
       
   917  * @utfchar:  the UTF8 character to be found
       
   918  *
       
   919  * a function to provide the relative location of a UTF8 char
       
   920  *
       
   921  * Returns the relative character position of the desired char
       
   922  * or -1 if not found
       
   923  */
       
   924 int
       
   925 xmlUTF8Strloc(const xmlChar *utf, const xmlChar *utfchar) {
       
   926     int i, size;
       
   927     xmlChar ch;
       
   928 
       
   929     if (utf==NULL || utfchar==NULL) return -1;
       
   930     size = xmlUTF8Strsize(utfchar, 1);
       
   931         for(i=0; (ch=*utf) != 0; i++) {
       
   932             if (xmlStrncmp(utf, utfchar, size)==0)
       
   933                 return(i);
       
   934             utf++;
       
   935             if ( ch & 0x80 ) {
       
   936                 /* if not simple ascii, verify proper format */
       
   937                 if ( (ch & 0xc0) != 0xc0 )
       
   938                     return(-1);
       
   939                 /* then skip over remaining bytes for this char */
       
   940                 while ( (ch <<= 1) & 0x80 )
       
   941                     if ( (*utf++ & 0xc0) != 0x80 )
       
   942                         return(-1);
       
   943             }
       
   944         }
       
   945 
       
   946     return(-1);
       
   947 }
       
   948 /**
       
   949  * xmlUTF8Strsub:
       
   950  * @utf:  a sequence of UTF-8 encoded bytes
       
   951  * @start: relative pos of first char
       
   952  * @len:   total number to copy
       
   953  *
       
   954  * Create a substring from a given UTF-8 string
       
   955  * Note:  positions are given in units of UTF-8 chars
       
   956  *
       
   957  * Returns a pointer to a newly created string
       
   958  * or NULL if any problem
       
   959  */
       
   960 
       
   961 xmlChar *
       
   962 xmlUTF8Strsub(const xmlChar *utf, int start, int len) {
       
   963     int            i;
       
   964     xmlChar ch;
       
   965 
       
   966     if (utf == NULL) return(NULL);
       
   967     if (start < 0) return(NULL);
       
   968     if (len < 0) return(NULL);
       
   969 
       
   970     /*
       
   971      * Skip over any leading chars
       
   972      */
       
   973     for (i = 0;i < start;i++) {
       
   974         if ((ch=*utf++) == 0) return(NULL);
       
   975         if ( ch & 0x80 ) {
       
   976             /* if not simple ascii, verify proper format */
       
   977             if ( (ch & 0xc0) != 0xc0 )
       
   978                 return(NULL);
       
   979             /* then skip over remaining bytes for this char */
       
   980             while ( (ch <<= 1) & 0x80 )
       
   981                 if ( (*utf++ & 0xc0) != 0x80 )
       
   982                     return(NULL);
       
   983         }
       
   984     }
       
   985 
       
   986     return(xmlUTF8Strndup(utf, len));
       
   987 }
       
   988