xml/libxml2libs/src/libxml2/libxml2_xmlstring.c
changeset 0 e35f40988205
equal deleted inserted replaced
-1:000000000000 0:e35f40988205
       
     1 /*
       
     2  * libxml2_xmlstring.c : an XML string utilities module
       
     3  *
       
     4  * This module provides various utility functions for manipulating
       
     5  * the xmlChar* type. All functions named xmlStr* have been moved here
       
     6  * from the parser.c file (their original home).
       
     7  *
       
     8  * See Copyright for the status of this software.
       
     9  *
       
    10  * UTF8 string routines from:
       
    11  * William Brack <wbrack@mmm.com.hk>
       
    12  *
       
    13  * daniel@veillard.com
       
    14  * Portion Copyright © 2009 Nokia Corporation and/or its subsidiary(-ies). All rights reserved. 
       
    15  */
       
    16 
       
    17 #define IN_LIBXML
       
    18 #include "xmlenglibxml.h"
       
    19 
       
    20 #include <stdlib.h>
       
    21 #include <string.h>
       
    22 #include <stdapis/libxml2/libxml2_globals.h>
       
    23 #include <stdapis/libxml2/libxml2_parserinternals.h>
       
    24 #include "libxml2_errencoding.h"
       
    25 #include <stdapis/libxml2/libxml2_xmlstring.h>
       
    26 
       
    27 /************************************************************************
       
    28  *                                                                      *
       
    29  *                Commodity functions to handle xmlChars                *
       
    30  *                                                                      *
       
    31  ************************************************************************/
       
    32 
       
    33 /**
       
    34  * xmlStrndup:
       
    35  * @param cur the input xmlChar*
       
    36  * @param len the len of cur
       
    37  *
       
    38  * a strndup for array of xmlChar's
       
    39  *
       
    40  * Returns a new xmlChar* or NULL
       
    41  *
       
    42  * OOM: possible --> returns NULL for (cup!=NULL && len>=0) and sets OOM flag
       
    43  */
       
    44 XMLPUBFUNEXPORT xmlChar *
       
    45 xmlStrndup(const xmlChar *cur, int len) {
       
    46     xmlChar *ret;
       
    47 
       
    48     if (!cur || (len < 0))
       
    49         return(NULL);
       
    50     ret = (xmlChar*) xmlMallocAtomic((len + 1) * sizeof(xmlChar));
       
    51     if (!ret) {
       
    52         // NOTE: error reporting is removed; it will be done from xmlMalloc	
       
    53         return ret; // ret is NULL here
       
    54     }
       
    55     memcpy(ret, cur, len * sizeof(xmlChar));
       
    56     ret[len] = 0;
       
    57     return(ret);
       
    58 }
       
    59 
       
    60 /**
       
    61  * xmlStrdup:
       
    62  * @param cur the input xmlChar *
       
    63  *
       
    64  * a strdup for array of xmlChar's. Since they are supposed to be
       
    65  * encoded in UTF-8 or an encoding with 8bit based chars, we assume
       
    66  * a termination mark of '0'.
       
    67  *
       
    68  * Returns a new xmlChar * or NULL
       
    69  *
       
    70  * OOM: possible --> returns NULL for cur!=NULL and sets OOM flag
       
    71  */
       
    72  
       
    73 XMLPUBFUNEXPORT xmlChar*
       
    74 xmlStrdup(const xmlChar *cur) {
       
    75     const xmlChar *p = cur;
       
    76 
       
    77     if (!cur)
       
    78         { // This should not normally happen -- check argument before calling!
       
    79         // NOTE: Avoiding usage of NULL here improves generated assembly!
       
    80         return(xmlChar*)cur /* NULL */;
       
    81         }
       
    82     while (*p != 0)
       
    83         p++; /* non input consuming */
       
    84     return(xmlStrndup(cur, p - cur));
       
    85 }
       
    86 
       
    87 /**
       
    88  * xmlCharStrndup:
       
    89  * @param cur the input char *
       
    90  * @param len the len of cur
       
    91  *
       
    92  * a strndup for char's to xmlChar's
       
    93  *
       
    94  * Returns a new xmlChar * or NULL
       
    95  */
       
    96 
       
    97 XMLPUBFUNEXPORT xmlChar *
       
    98 xmlCharStrndup(const char *cur, int len) {
       
    99     int i;
       
   100     xmlChar *ret;
       
   101 
       
   102     if ((cur == NULL) || (len < 0)) return(NULL);
       
   103     ret = (xmlChar *) xmlMallocAtomic((len + 1) * sizeof(xmlChar));
       
   104     if (ret == NULL) {
       
   105         xmlErrMemory(NULL, NULL);
       
   106         return(NULL);
       
   107     }
       
   108     for (i = 0;i < len;i++)
       
   109         ret[i] = (xmlChar) cur[i];
       
   110     ret[len] = 0;
       
   111     return(ret);
       
   112 }
       
   113 
       
   114 /**
       
   115  * xmlCharStrdup:
       
   116  * @param cur the input char *
       
   117  *
       
   118  * a strdup for char's to xmlChar's
       
   119  *
       
   120  * Returns a new xmlChar * or NULL
       
   121  */
       
   122 
       
   123 XMLPUBFUNEXPORT xmlChar *
       
   124 xmlCharStrdup(const char *cur) {
       
   125     const char *p = cur;
       
   126 
       
   127     if (cur == NULL) return(NULL);
       
   128     while (*p != '\0') p++; /* non input consuming */
       
   129     return(xmlCharStrndup(cur, p - cur));
       
   130 }
       
   131 
       
   132 /**
       
   133  * xmlStrcmp:
       
   134  * @param str1 the first xmlChar *
       
   135  * @param str2 the second xmlChar *
       
   136  *
       
   137  * a strcmp for xmlChar's
       
   138  *
       
   139  * Returns the integer result of the comparison
       
   140  */
       
   141 
       
   142 XMLPUBFUNEXPORT int
       
   143 xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
       
   144     register int tmp;
       
   145 
       
   146     if (str1 == str2) return(0);
       
   147     if (str1 == NULL) return(-1);
       
   148     if (str2 == NULL) return(1);
       
   149     do {
       
   150         tmp = *str1++ - *str2;
       
   151         if (tmp != 0) return(tmp);
       
   152     } while (*str2++ != 0);
       
   153     return 0;
       
   154 }
       
   155 
       
   156 /**
       
   157  * xmlStrEqual:
       
   158  * @param str1 the first xmlChar *
       
   159  * @param str2 the second xmlChar *
       
   160  *
       
   161  * Check if both string are equal of have same content
       
   162  * Should be a bit more readable and faster than xmlStrEqual()
       
   163  *
       
   164  * Returns 1 if they are equal, 0 if they are different
       
   165  *
       
   166  * OOM: never
       
   167  */
       
   168 // OPTIMIZE:
       
   169 //           - consider inlining OR
       
   170 //           - define as macro that tests first bytes prior calling it
       
   171 //           - for many caller functions safty checks are redundant (consider creating version w/o them)
       
   172 XMLPUBFUNEXPORT int
       
   173 xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
       
   174     if (str1 == str2) return(1);
       
   175     if (str1 == NULL) return(0);
       
   176     if (str2 == NULL) return(0);
       
   177     do {
       
   178         if (*str1++ != *str2)
       
   179             return(0);
       
   180     } while (*str2++);
       
   181     return(1);
       
   182 }
       
   183 
       
   184 /**
       
   185  * xmlStrQEqual:
       
   186  * @param pref the prefix of the QName
       
   187  * @param name the localname of the QName
       
   188  * @param str the second xmlChar *
       
   189  *
       
   190  * Check if a QName is Equal to a given string
       
   191  *
       
   192  * Returns 1 if they are equal, 0 if they are different
       
   193  */
       
   194 
       
   195 XMLPUBFUNEXPORT int
       
   196 xmlStrQEqual(const xmlChar *pref, const xmlChar *name, const xmlChar *str) {
       
   197     if (pref == NULL) return(xmlStrEqual(name, str));
       
   198     if (name == NULL) return(0);
       
   199     if (str == NULL) return(0);
       
   200 
       
   201     do {
       
   202         if (*pref++ != *str) return(0);
       
   203     } while ((*str++) && (*pref));
       
   204     if (*str++ != ':') return(0);
       
   205     do {
       
   206         if (*name++ != *str) return(0);
       
   207     } while (*str++);
       
   208     return(1);
       
   209 }
       
   210 
       
   211 /**
       
   212  * xmlStrncmp:
       
   213  * @param str1 the first xmlChar *
       
   214  * @param str2 the second xmlChar *
       
   215  * @param len the max comparison length
       
   216  *
       
   217  * a strncmp for xmlChar's
       
   218  *
       
   219  * Returns the integer result of the comparison
       
   220  *
       
   221  * OOM: never
       
   222  */
       
   223 
       
   224 XMLPUBFUNEXPORT int
       
   225 xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
       
   226     register int tmp;
       
   227 
       
   228     if (len <= 0) return(0);
       
   229     if (str1 == str2) return(0);
       
   230     if (str1 == NULL) return(-1);
       
   231     if (str2 == NULL) return(1);
       
   232 #ifdef __GNUC__
       
   233     tmp = strncmp(str1, str2, len);
       
   234     return tmp;
       
   235 #else
       
   236     do {
       
   237         tmp = *str1++ - *str2;
       
   238         if (tmp != 0 || --len == 0) return(tmp);
       
   239     } while (*str2++ != 0);
       
   240     return 0;
       
   241 #endif
       
   242 }
       
   243 
       
   244 static const xmlChar casemap[256] = {
       
   245     0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
       
   246     0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
       
   247     0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
       
   248     0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
       
   249     0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
       
   250     0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
       
   251     0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
       
   252     0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
       
   253     0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
       
   254     0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
       
   255     0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
       
   256     0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
       
   257     0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
       
   258     0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
       
   259     0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
       
   260     0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
       
   261     0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
       
   262     0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
       
   263     0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
       
   264     0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
       
   265     0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
       
   266     0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
       
   267     0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
       
   268     0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
       
   269     0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
       
   270     0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
       
   271     0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
       
   272     0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
       
   273     0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
       
   274     0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
       
   275     0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
       
   276     0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
       
   277 };
       
   278 
       
   279 
       
   280 /**
       
   281  * xmlStrcasecmp:
       
   282  * @param str1 the first xmlChar *
       
   283  * @param str2 the second xmlChar *
       
   284  *
       
   285  * a strcasecmp for xmlChar's
       
   286  *
       
   287  * Returns the integer result of the comparison
       
   288  */
       
   289 
       
   290 XMLPUBFUNEXPORT int
       
   291 xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
       
   292     register int tmp;
       
   293 
       
   294     if (str1 == str2) return(0);
       
   295     if (str1 == NULL) return(-1);
       
   296     if (str2 == NULL) return(1);
       
   297     do {
       
   298         tmp = casemap[*str1++] - casemap[*str2];
       
   299         if (tmp != 0) return(tmp);
       
   300     } while (*str2++ != 0);
       
   301     return 0;
       
   302 }
       
   303 
       
   304 /**
       
   305  * xmlStrncasecmp:
       
   306  * @param str1 the first xmlChar *
       
   307  * @param str2 the second xmlChar *
       
   308  * @param len the max comparison length
       
   309  *
       
   310  * a strncasecmp for xmlChar's
       
   311  *
       
   312  * Returns the integer result of the comparison
       
   313  */
       
   314 
       
   315 XMLPUBFUNEXPORT int
       
   316 xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
       
   317     register int tmp;
       
   318 
       
   319     if (len <= 0) return(0);
       
   320     if (str1 == str2) return(0);
       
   321     if (str1 == NULL) return(-1);
       
   322     if (str2 == NULL) return(1);
       
   323     do {
       
   324         tmp = casemap[*str1++] - casemap[*str2];
       
   325         if (tmp != 0 || --len == 0) return(tmp);
       
   326     } while (*str2++ != 0);
       
   327     return 0;
       
   328 }
       
   329 
       
   330 /**
       
   331  * xmlStrchr:
       
   332  * @param str the xmlChar * array
       
   333  * @param val the xmlChar to search
       
   334  *
       
   335  * a strchr for xmlChar's
       
   336  *
       
   337  * Returns the xmlChar* for the first occurrence or NULL.
       
   338  */
       
   339 
       
   340 XMLPUBFUNEXPORT const xmlChar *
       
   341 xmlStrchr(const xmlChar *str, xmlChar val) {
       
   342     if (str == NULL) return(NULL);
       
   343     while (*str != 0) { /* non input consuming */
       
   344         if (*str == val) return((xmlChar *) str);
       
   345         str++;
       
   346     }
       
   347     return(NULL);
       
   348 }
       
   349 
       
   350 /**
       
   351  * xmlStrstr:
       
   352  * @param str the xmlChar * array (haystack)
       
   353  * @param val the xmlChar to search (needle)
       
   354  *
       
   355  * a strstr for xmlChar's
       
   356  *
       
   357  * Returns the xmlChar * for the first occurrence or NULL.
       
   358  *
       
   359  * OOM: never
       
   360  */
       
   361 
       
   362 XMLPUBFUNEXPORT const xmlChar *
       
   363 xmlStrstr(const xmlChar *str, const xmlChar *val) {
       
   364     int n;
       
   365 
       
   366     if (str == NULL) return(NULL);
       
   367     if (val == NULL) return(NULL);
       
   368     n = xmlStrlen(val);
       
   369 
       
   370     if (n == 0) return(str);
       
   371     while (*str != 0) { /* non input consuming */
       
   372         if (*str == *val) {
       
   373             if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
       
   374         }
       
   375         str++;
       
   376     }
       
   377     return(NULL);
       
   378 }
       
   379 
       
   380 /**
       
   381  * xmlStrcasestr:
       
   382  * @param str the xmlChar * array (haystack)
       
   383  * @param val the xmlChar to search (needle)
       
   384  *
       
   385  * a case-ignoring strstr for xmlChar's
       
   386  *
       
   387  * Returns the xmlChar * for the first occurrence or NULL.
       
   388  */
       
   389 
       
   390 XMLPUBFUNEXPORT const xmlChar *
       
   391 xmlStrcasestr(const xmlChar *str, xmlChar *val) {
       
   392     int n;
       
   393 
       
   394     if (str == NULL) return(NULL);
       
   395     if (val == NULL) return(NULL);
       
   396     n = xmlStrlen(val);
       
   397 
       
   398     if (n == 0) return(str);
       
   399     while (*str != 0) { /* non input consuming */
       
   400         if (casemap[*str] == casemap[*val])
       
   401             if (!xmlStrncasecmp(str, val, n)) return(str);
       
   402         str++;
       
   403     }
       
   404     return(NULL);
       
   405 }
       
   406 
       
   407 /**
       
   408  * xmlStrsub:
       
   409  * @param str the xmlChar * array (haystack)
       
   410  * @param start the index of the first char (zero based)
       
   411  * @param len the length of the substring
       
   412  *
       
   413  * Extract a substring of a given string
       
   414  *
       
   415  * Returns the xmlChar * for the first occurrence or NULL.
       
   416  */
       
   417 
       
   418 XMLPUBFUNEXPORT xmlChar *
       
   419 xmlStrsub(const xmlChar *str, int start, int len) {
       
   420     int i;
       
   421 
       
   422     if (str == NULL) return(NULL);
       
   423     if (start < 0) return(NULL);
       
   424     if (len < 0) return(NULL);
       
   425 
       
   426     for (i = 0;i < start;i++) {
       
   427         if (*str == 0) return(NULL);
       
   428         str++;
       
   429     }
       
   430     if (*str == 0) return(NULL);
       
   431     return(xmlStrndup(str, len));
       
   432 }
       
   433 
       
   434 /**
       
   435  * xmlStrlen:
       
   436  * @param str the xmlChar * array
       
   437  *
       
   438  * length of a xmlChar's string
       
   439  *
       
   440  * Returns the number of xmlChar contained in the ARRAY.
       
   441  */
       
   442 
       
   443 XMLPUBFUNEXPORT int
       
   444 xmlStrlen(const xmlChar *str) {
       
   445     int len = 0;
       
   446 
       
   447     if (str == NULL) return(0);
       
   448     while (*str != 0) { /* non input consuming */
       
   449         str++;
       
   450         len++;
       
   451     }
       
   452     return(len);
       
   453 }
       
   454 
       
   455 /**
       
   456  * xmlStrncat:
       
   457  * @param cur the original xmlChar* array
       
   458  * @param add the xmlChar* array added
       
   459  * @param len the length of add
       
   460  *
       
   461  * a strncat for array of xmlChar's, it will extend cur with the len
       
   462  * first bytes of add.
       
   463  *
       
   464  * Returns a new xmlChar*, the original cur is reallocated if needed
       
   465  * and should not be freed
       
   466  *
       
   467  * OOM: possible --> OOM flag is set  
       
   468  */
       
   469 XMLPUBFUNEXPORT xmlChar*
       
   470 xmlStrncat(xmlChar* cur, const xmlChar* add, int len)
       
   471 {
       
   472     int size;
       
   473     xmlChar* ret;
       
   474 
       
   475     if ((add == NULL) || (len == 0))
       
   476         return(cur);
       
   477     if (cur == NULL)
       
   478         return(xmlStrndup(add, len));
       
   479 
       
   480     size = xmlStrlen(cur);
       
   481     // DONE: Fix xmlRealloc: Nothing to fix!
       
   482     ret = (xmlChar*) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
       
   483     if (!ret) {
       
   484         xmlErrMemory(NULL, NULL);
       
   485         return(cur);
       
   486     }
       
   487     memcpy(&ret[size], add, len * sizeof(xmlChar));
       
   488     ret[size + len] = 0;
       
   489     return(ret);
       
   490 }
       
   491 
       
   492 /**
       
   493  * xmlStrncatNew:
       
   494  * @param str1 first xmlChar string
       
   495  * @param str2 second xmlChar string
       
   496  * @param len the len of str2
       
   497  *
       
   498  * same as xmlStrncat, but creates a new string.  The original
       
   499  * two strings are not freed.
       
   500  *
       
   501  * Returns a new xmlChar* or NULL
       
   502  *
       
   503  * OOM: possible --> returns NULL, sets OOM flag
       
   504  */
       
   505 XMLPUBFUNEXPORT xmlChar *
       
   506 xmlStrncatNew(const xmlChar *str1, const xmlChar *str2, int len) {
       
   507     int size;
       
   508     xmlChar *ret;
       
   509 
       
   510     if ((str2 == NULL) || (len == 0))
       
   511         return(xmlStrdup(str1));
       
   512     if (str1 == NULL)
       
   513         return(xmlStrndup(str2, len));
       
   514 
       
   515     size = xmlStrlen(str1);
       
   516     ret = (xmlChar *) xmlMalloc((size + len + 1) * sizeof(xmlChar));
       
   517     if (ret == NULL) {
       
   518         xmlErrMemory(NULL, NULL); // sets OOM flag
       
   519         //return(xmlStrndup(str1, size)); 
       
   520         return NULL;
       
   521     }
       
   522     memcpy(ret, str1, size * sizeof(xmlChar));
       
   523     memcpy(&ret[size], str2, len * sizeof(xmlChar));
       
   524     ret[size + len] = 0;
       
   525     return(ret);
       
   526 }
       
   527 
       
   528 /**
       
   529  * xmlStrcat:
       
   530  * @param cur the original xmlChar* array
       
   531  * @param add the xmlChar* array added
       
   532  *
       
   533  * a strcat for array of xmlChar's. Since they are supposed to be
       
   534  * encoded in UTF-8 or an encoding with 8bit based chars, we assume
       
   535  * a termination mark of '0'.
       
   536  *
       
   537  * Returns a new xmlChar* containing the concatenated string.
       
   538  *
       
   539  * OOM: possible --> OOM flag is set 
       
   540  */
       
   541 XMLPUBFUNEXPORT xmlChar*
       
   542 xmlStrcat(xmlChar* cur, const xmlChar *add) {
       
   543     const xmlChar* p = add;
       
   544 
       
   545     if (!add)
       
   546         return(cur);
       
   547     if (!cur)
       
   548         return(xmlStrdup(add));
       
   549 
       
   550     while (*p != 0)
       
   551         p++; /* non input consuming */
       
   552     return(xmlStrncat(cur, add, p - add));
       
   553 }
       
   554 
       
   555 /**
       
   556  * xmlStrPrintf:
       
   557  * @param buf the result buffer.
       
   558  * @param len the result buffer length.
       
   559  * @param msg the message with printf formatting.
       
   560  * @param # extra parameters for the message.
       
   561  *
       
   562  * Formats msg and places result into buf.
       
   563  *
       
   564  * Returns the number of characters written to buf or -1 if an error occurs.
       
   565  */
       
   566 XMLPUBFUNEXPORT int
       
   567 xmlStrPrintf(xmlChar *buf, int len, const xmlChar *msg, ...) {
       
   568     va_list args;
       
   569     int ret;
       
   570 
       
   571     if((buf == NULL) || (msg == NULL)) {
       
   572         return(-1);
       
   573     }
       
   574 
       
   575     va_start(args, msg);
       
   576     ret = vsnprintf((char *) buf, len, (const char *) msg, args);
       
   577     va_end(args);
       
   578     buf[len - 1] = 0; /* be safe ! */
       
   579 
       
   580     return(ret);
       
   581 }
       
   582 
       
   583 /**
       
   584  * xmlStrVPrintf:
       
   585  * @param buf the result buffer.
       
   586  * @param len the result buffer length.
       
   587  * @param msg the message with printf formatting.
       
   588  * @param ap extra parameters for the message.
       
   589  *
       
   590  * Formats msg and places result into buf.
       
   591  *
       
   592  * Returns the number of characters written to buf or -1 if an error occurs.
       
   593  */
       
   594 XMLPUBFUNEXPORT int
       
   595 xmlStrVPrintf(xmlChar *buf, int len, const xmlChar *msg, va_list ap) {
       
   596     int ret;
       
   597 
       
   598     if((buf == NULL) || (msg == NULL)) {
       
   599         return(-1);
       
   600     }
       
   601 
       
   602     ret = vsnprintf((char *) buf, len, (const char *) msg, ap);
       
   603     buf[len - 1] = 0; /* be safe ! */
       
   604 
       
   605     return(ret);
       
   606 }
       
   607 
       
   608 /************************************************************************
       
   609  *                                                                      *
       
   610  *              Generic UTF8 handling routines                          *
       
   611  *                                                                      *
       
   612  * From rfc2044: encoding of the Unicode values on UTF-8:               *
       
   613  *                                                                      *
       
   614  * UCS-4 range (hex.)           UTF-8 octet sequence (binary)           *
       
   615  * 0000 0000-0000 007F   0xxxxxxx                                       *
       
   616  * 0000 0080-0000 07FF   110xxxxx 10xxxxxx                              *
       
   617  * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx                     *
       
   618  *                                                                      *
       
   619  * I hope we won't use values > 0xFFFF anytime soon !                   *
       
   620  *                                                                      *
       
   621  ************************************************************************/
       
   622 
       
   623 
       
   624 /**
       
   625  * xmlUTF8Size:
       
   626  * @param utf pointer to the UTF8 character
       
   627  *
       
   628  * calculates the internal size of a UTF8 character
       
   629  *
       
   630  * returns the numbers of bytes in the character, -1 on format error
       
   631  */
       
   632 XMLPUBFUNEXPORT int
       
   633 xmlUTF8Size(const xmlChar *utf) {
       
   634     xmlChar mask;
       
   635     int len;
       
   636 
       
   637     if (utf == NULL)
       
   638         return -1;
       
   639     if (*utf < 0x80)
       
   640         return 1;
       
   641     /* check valid UTF8 character */
       
   642     if (!(*utf & 0x40))
       
   643         return -1;
       
   644     /* determine number of bytes in char */
       
   645     len = 2;
       
   646     for (mask=0x20; mask != 0; mask>>=1) {
       
   647         if (!(*utf & mask))
       
   648             return len;
       
   649         len++;
       
   650     }
       
   651     return -1;
       
   652 }
       
   653 
       
   654 /**
       
   655  * xmlUTF8Charcmp:
       
   656  * @param utf1 pointer to first UTF8 char
       
   657  * @param utf2 pointer to second UTF8 char
       
   658  *
       
   659  * compares the two UCS4 values
       
   660  *
       
   661  * returns result of the compare as with xmlStrncmp
       
   662  */
       
   663 XMLPUBFUNEXPORT int
       
   664 xmlUTF8Charcmp(const xmlChar *utf1, const xmlChar *utf2) {
       
   665 
       
   666     if (utf1 == NULL ) {
       
   667         if (utf2 == NULL)
       
   668             return 0;
       
   669         return -1;
       
   670     }
       
   671     return xmlStrncmp(utf1, utf2, xmlUTF8Size(utf1));
       
   672 }
       
   673 
       
   674 /**
       
   675  * xmlUTF8Strlen:
       
   676  * @param utf a sequence of UTF-8 encoded bytes
       
   677  *
       
   678  * compute the length of an UTF8 string, it doesn't do a full UTF8
       
   679  * checking of the content of the string.
       
   680  *
       
   681  * Returns the number of characters in the string or -1 in case of error
       
   682  *
       
   683  * OOM: never
       
   684  */
       
   685 XMLPUBFUNEXPORT int
       
   686 xmlUTF8Strlen(const xmlChar *utf) {
       
   687     int ret = 0;
       
   688 
       
   689     if (utf == NULL)
       
   690         return(-1);
       
   691 
       
   692     while (*utf != 0) {
       
   693         if (utf[0] & 0x80) {
       
   694             if ((utf[1] & 0xc0) != 0x80)
       
   695                 return(-1);
       
   696             if ((utf[0] & 0xe0) == 0xe0) {
       
   697                 if ((utf[2] & 0xc0) != 0x80)
       
   698                     return(-1);
       
   699                 if ((utf[0] & 0xf0) == 0xf0) {
       
   700                     if ((utf[0] & 0xf8) != 0xf0 || (utf[3] & 0xc0) != 0x80)
       
   701                         return(-1);
       
   702                     utf += 4;
       
   703                 } else {
       
   704                     utf += 3;
       
   705                 }
       
   706             } else {
       
   707                 utf += 2;
       
   708             }
       
   709         } else {
       
   710             utf++;
       
   711         }
       
   712         ret++;
       
   713     }
       
   714     return(ret);
       
   715 }
       
   716 
       
   717 /**
       
   718  * xmlGetUTF8Char:
       
   719  * @param utf a sequence of UTF-8 encoded bytes
       
   720  * @param len a pointer to bytes len
       
   721  *
       
   722  * Read one UTF8 Char from utf
       
   723  *
       
   724  * Returns the char value or -1 in case of error, and updates *len with the
       
   725  *        number of bytes consumed
       
   726  */
       
   727 XMLPUBFUNEXPORT int
       
   728 xmlGetUTF8Char(const unsigned char *utf, int *len) {
       
   729     unsigned int c;
       
   730 
       
   731     if (utf == NULL)
       
   732         goto error;
       
   733     if (len == NULL)
       
   734         goto error;
       
   735     if (*len < 1)
       
   736         goto error;
       
   737 
       
   738     c = utf[0];
       
   739     if (c & 0x80) {
       
   740         if (*len < 2)
       
   741             goto error;
       
   742         if ((utf[1] & 0xc0) != 0x80)
       
   743             goto error;
       
   744         if ((c & 0xe0) == 0xe0) {
       
   745             if (*len < 3)
       
   746                 goto error;
       
   747             if ((utf[2] & 0xc0) != 0x80)
       
   748                 goto error;
       
   749             if ((c & 0xf0) == 0xf0) {
       
   750                 if (*len < 4)
       
   751                     goto error;
       
   752                 if ((c & 0xf8) != 0xf0 || (utf[3] & 0xc0) != 0x80)
       
   753                     goto error;
       
   754                 *len = 4;
       
   755                 /* 4-byte code */
       
   756                 c = (utf[0] & 0x7) << 18;
       
   757                 c |= (utf[1] & 0x3f) << 12;
       
   758                 c |= (utf[2] & 0x3f) << 6;
       
   759                 c |= utf[3] & 0x3f;
       
   760             } else {
       
   761               /* 3-byte code */
       
   762                 *len = 3;
       
   763                 c = (utf[0] & 0xf) << 12;
       
   764                 c |= (utf[1] & 0x3f) << 6;
       
   765                 c |= utf[2] & 0x3f;
       
   766             }
       
   767         } else {
       
   768           /* 2-byte code */
       
   769             *len = 2;
       
   770             c = (utf[0] & 0x1f) << 6;
       
   771             c |= utf[1] & 0x3f;
       
   772         }
       
   773     } else {
       
   774         /* 1-byte code */
       
   775         *len = 1;
       
   776     }
       
   777     return(c);
       
   778 
       
   779 error:
       
   780     *len = 0;
       
   781     return(-1);
       
   782 }
       
   783 
       
   784 
       
   785 #ifndef XMLENGINE_EXCLUDE_UNUSED
       
   786 /**
       
   787  * xmlCheckUTF8:
       
   788  * @param utf Pointer to putative UTF-8 encoded string.
       
   789  *
       
   790  * Checks utf for being valid UTF-8. utf is assumed to be
       
   791  * null-terminated. This function is not super-strict, as it will
       
   792  * allow longer UTF-8 sequences than necessary. Note that Java is
       
   793  * capable of producing these sequences if provoked. Also note, this
       
   794  * routine checks for the 4-byte maximum size, but does not check for
       
   795  * 0x10ffff maximum value.
       
   796  *
       
   797  * Return value: true if utf is valid.
       
   798  **/
       
   799 int
       
   800 xmlCheckUTF8(const unsigned char *utf)
       
   801 {
       
   802     int ix;
       
   803     unsigned char c;
       
   804 
       
   805     for (ix = 0; (c = utf[ix]);) {
       
   806         if (c & 0x80) {
       
   807             if ((utf[ix + 1] & 0xc0) != 0x80)
       
   808                 return(0);
       
   809             if ((c & 0xe0) == 0xe0) {
       
   810                 if ((utf[ix + 2] & 0xc0) != 0x80)
       
   811                     return(0);
       
   812                 if ((c & 0xf0) == 0xf0) {
       
   813                     if ((c & 0xf8) != 0xf0 || (utf[ix + 3] & 0xc0) != 0x80)
       
   814                         return(0);
       
   815                     ix += 4;
       
   816                     /* 4-byte code */
       
   817                 } else
       
   818                     /* 3-byte code */
       
   819                     ix += 3;
       
   820             } else
       
   821                 /* 2-byte code */
       
   822                 ix += 2;
       
   823         } else
       
   824             /* 1-byte code */
       
   825             ix++;
       
   826       }
       
   827       return(1);
       
   828 }
       
   829 
       
   830 #endif /* ifndef XMLENGINE_EXCLUDE_UNUSED */
       
   831 
       
   832 
       
   833 /**
       
   834  * xmlUTF8Strsize:
       
   835  * @param utf a sequence of UTF-8 encoded bytes
       
   836  * @param len the number of characters in the array
       
   837  *
       
   838  * storage size of an UTF8 string
       
   839  *
       
   840  * Returns the storage size of
       
   841  * the first 'len' characters of ARRAY
       
   842  *
       
   843  */
       
   844 
       
   845 XMLPUBFUNEXPORT int
       
   846 xmlUTF8Strsize(const xmlChar *utf, int len) {
       
   847     const xmlChar   *ptr=utf;
       
   848     xmlChar         ch;
       
   849 
       
   850     if (len <= 0)
       
   851         return(0);
       
   852 
       
   853     while ( len-- > 0) {
       
   854         if ( !*ptr )
       
   855             break;
       
   856         if ( (ch = *ptr++) & 0x80)
       
   857             while ( (ch<<=1) & 0x80 )
       
   858                 ptr++;
       
   859     }
       
   860     return (ptr - utf);
       
   861 }
       
   862 
       
   863 
       
   864 /**
       
   865  * xmlUTF8Strndup:
       
   866  * @param utf the input UTF8 *
       
   867  * @param len the len of utf (in chars)
       
   868  *
       
   869  * a strndup for array of UTF8's
       
   870  *
       
   871  * Returns a new UTF8 * or NULL
       
   872  */
       
   873 XMLPUBFUNEXPORT xmlChar *
       
   874 xmlUTF8Strndup(const xmlChar *utf, int len) {
       
   875     xmlChar *ret;
       
   876     int i;
       
   877 
       
   878     if ((utf == NULL) || (len < 0)) return(NULL);
       
   879     i = xmlUTF8Strsize(utf, len);
       
   880     ret = (xmlChar *) xmlMallocAtomic((i + 1) * sizeof(xmlChar));
       
   881     if (ret == NULL) {
       
   882 /*      
       
   883         xmlGenericError(xmlGenericErrorContext,
       
   884                 EMBED_ERRTXT("malloc of %ld byte failed\n"),
       
   885                 (len + 1) * (long)sizeof(xmlChar));
       
   886  */
       
   887         return(NULL);
       
   888     }
       
   889     memcpy(ret, utf, i * sizeof(xmlChar));
       
   890     ret[i] = 0;
       
   891     return(ret);
       
   892 }
       
   893 
       
   894 /**
       
   895  * xmlUTF8Strpos:
       
   896  * @param utf the input UTF8 *
       
   897  * @param pos the position of the desired UTF8 char (in chars)
       
   898  *
       
   899  * a function to provide the equivalent of fetching a
       
   900  * character from a string array
       
   901  *
       
   902  * Returns a pointer to the UTF8 character or NULL
       
   903  */
       
   904 XMLPUBFUNEXPORT xmlChar *
       
   905 xmlUTF8Strpos(const xmlChar *utf, int pos) {
       
   906     xmlChar ch;
       
   907 
       
   908     if (utf == NULL) return(NULL);
       
   909     if ( (pos < 0) || (pos >= xmlUTF8Strlen(utf)) )
       
   910         return(NULL);
       
   911     while (pos--) {
       
   912         if ((ch=*utf++) == 0) return(NULL);
       
   913         if ( ch & 0x80 ) {
       
   914             /* if not simple ascii, verify proper format */
       
   915             if ( (ch & 0xc0) != 0xc0 )
       
   916                 return(NULL);
       
   917             /* then skip over remaining bytes for this char */
       
   918             while ( (ch <<= 1) & 0x80 )
       
   919                 if ( (*utf++ & 0xc0) != 0x80 )
       
   920                     return(NULL);
       
   921         }
       
   922     }
       
   923     return((xmlChar *)utf);
       
   924 }
       
   925 
       
   926 /**
       
   927  * xmlUTF8Strloc:
       
   928  * @param utf the input UTF8 *
       
   929  * @param utfchar the UTF8 character to be found
       
   930  *
       
   931  * a function to provide the relative location of a UTF8 char
       
   932  *
       
   933  * Returns the relative character position of the desired char
       
   934  * or -1 if not found
       
   935  */
       
   936 XMLPUBFUNEXPORT int
       
   937 xmlUTF8Strloc(const xmlChar *utf, const xmlChar *utfchar) {
       
   938     int i, size;
       
   939     xmlChar ch;
       
   940 
       
   941     if (utf==NULL || utfchar==NULL) return -1;
       
   942     size = xmlUTF8Strsize(utfchar, 1);
       
   943         for(i=0; (ch=*utf) != 0; i++) {
       
   944             if (xmlStrncmp(utf, utfchar, size)==0)
       
   945                 return(i);
       
   946             utf++;
       
   947             if ( ch & 0x80 ) {
       
   948                 /* if not simple ascii, verify proper format */
       
   949                 if ( (ch & 0xc0) != 0xc0 )
       
   950                     return(-1);
       
   951                 /* then skip over remaining bytes for this char */
       
   952                 while ( (ch <<= 1) & 0x80 )
       
   953                     if ( (*utf++ & 0xc0) != 0x80 )
       
   954                         return(-1);
       
   955             }
       
   956         }
       
   957 
       
   958     return(-1);
       
   959 }
       
   960 /**
       
   961  * xmlUTF8Strsub:
       
   962  * @param utf a sequence of UTF-8 encoded bytes
       
   963  * @param start relative pos of first char
       
   964  * @param len total number to copy
       
   965  *
       
   966  * Create a substring from a given UTF-8 string
       
   967  * Note:  positions are given in units of UTF-8 chars
       
   968  *
       
   969  * Returns a pointer to a newly created string
       
   970  * or NULL if any problem
       
   971  */
       
   972 
       
   973 XMLPUBFUNEXPORT xmlChar *
       
   974 xmlUTF8Strsub(const xmlChar *utf, int start, int len) {
       
   975     int            i;
       
   976     xmlChar ch;
       
   977 
       
   978     if (utf == NULL) return(NULL);
       
   979     if (start < 0) return(NULL);
       
   980     if (len < 0) return(NULL);
       
   981 
       
   982     /*
       
   983      * Skip over any leading chars
       
   984      */
       
   985     for (i = 0;i < start;i++) {
       
   986         if ((ch=*utf++) == 0) return(NULL);
       
   987         if ( ch & 0x80 ) {
       
   988             /* if not simple ascii, verify proper format */
       
   989             if ( (ch & 0xc0) != 0xc0 )
       
   990                 return(NULL);
       
   991             /* then skip over remaining bytes for this char */
       
   992             while ( (ch <<= 1) & 0x80 )
       
   993                 if ( (*utf++ & 0xc0) != 0x80 )
       
   994                     return(NULL);
       
   995         }
       
   996     }
       
   997 
       
   998     return(xmlUTF8Strndup(utf, len));
       
   999 }
       
  1000