symbian-qemu-0.9.1-12/python-2.6.1/Modules/expat/xmltok_impl.c
changeset 1 2fb8b9db1c86
equal deleted inserted replaced
0:ffa851df0825 1:2fb8b9db1c86
       
     1 /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
       
     2    See the file COPYING for copying permission.
       
     3 */
       
     4 
       
     5 #ifndef IS_INVALID_CHAR
       
     6 #define IS_INVALID_CHAR(enc, ptr, n) (0)
       
     7 #endif
       
     8 
       
     9 #define INVALID_LEAD_CASE(n, ptr, nextTokPtr) \
       
    10     case BT_LEAD ## n: \
       
    11       if (end - ptr < n) \
       
    12         return XML_TOK_PARTIAL_CHAR; \
       
    13       if (IS_INVALID_CHAR(enc, ptr, n)) { \
       
    14         *(nextTokPtr) = (ptr); \
       
    15         return XML_TOK_INVALID; \
       
    16       } \
       
    17       ptr += n; \
       
    18       break;
       
    19 
       
    20 #define INVALID_CASES(ptr, nextTokPtr) \
       
    21   INVALID_LEAD_CASE(2, ptr, nextTokPtr) \
       
    22   INVALID_LEAD_CASE(3, ptr, nextTokPtr) \
       
    23   INVALID_LEAD_CASE(4, ptr, nextTokPtr) \
       
    24   case BT_NONXML: \
       
    25   case BT_MALFORM: \
       
    26   case BT_TRAIL: \
       
    27     *(nextTokPtr) = (ptr); \
       
    28     return XML_TOK_INVALID;
       
    29 
       
    30 #define CHECK_NAME_CASE(n, enc, ptr, end, nextTokPtr) \
       
    31    case BT_LEAD ## n: \
       
    32      if (end - ptr < n) \
       
    33        return XML_TOK_PARTIAL_CHAR; \
       
    34      if (!IS_NAME_CHAR(enc, ptr, n)) { \
       
    35        *nextTokPtr = ptr; \
       
    36        return XML_TOK_INVALID; \
       
    37      } \
       
    38      ptr += n; \
       
    39      break;
       
    40 
       
    41 #define CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) \
       
    42   case BT_NONASCII: \
       
    43     if (!IS_NAME_CHAR_MINBPC(enc, ptr)) { \
       
    44       *nextTokPtr = ptr; \
       
    45       return XML_TOK_INVALID; \
       
    46     } \
       
    47   case BT_NMSTRT: \
       
    48   case BT_HEX: \
       
    49   case BT_DIGIT: \
       
    50   case BT_NAME: \
       
    51   case BT_MINUS: \
       
    52     ptr += MINBPC(enc); \
       
    53     break; \
       
    54   CHECK_NAME_CASE(2, enc, ptr, end, nextTokPtr) \
       
    55   CHECK_NAME_CASE(3, enc, ptr, end, nextTokPtr) \
       
    56   CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr)
       
    57 
       
    58 #define CHECK_NMSTRT_CASE(n, enc, ptr, end, nextTokPtr) \
       
    59    case BT_LEAD ## n: \
       
    60      if (end - ptr < n) \
       
    61        return XML_TOK_PARTIAL_CHAR; \
       
    62      if (!IS_NMSTRT_CHAR(enc, ptr, n)) { \
       
    63        *nextTokPtr = ptr; \
       
    64        return XML_TOK_INVALID; \
       
    65      } \
       
    66      ptr += n; \
       
    67      break;
       
    68 
       
    69 #define CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) \
       
    70   case BT_NONASCII: \
       
    71     if (!IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { \
       
    72       *nextTokPtr = ptr; \
       
    73       return XML_TOK_INVALID; \
       
    74     } \
       
    75   case BT_NMSTRT: \
       
    76   case BT_HEX: \
       
    77     ptr += MINBPC(enc); \
       
    78     break; \
       
    79   CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr) \
       
    80   CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr) \
       
    81   CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr)
       
    82 
       
    83 #ifndef PREFIX
       
    84 #define PREFIX(ident) ident
       
    85 #endif
       
    86 
       
    87 /* ptr points to character following "<!-" */
       
    88 
       
    89 static int PTRCALL
       
    90 PREFIX(scanComment)(const ENCODING *enc, const char *ptr,
       
    91                     const char *end, const char **nextTokPtr)
       
    92 {
       
    93   if (ptr != end) {
       
    94     if (!CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
       
    95       *nextTokPtr = ptr;
       
    96       return XML_TOK_INVALID;
       
    97     }
       
    98     ptr += MINBPC(enc);
       
    99     while (ptr != end) {
       
   100       switch (BYTE_TYPE(enc, ptr)) {
       
   101       INVALID_CASES(ptr, nextTokPtr)
       
   102       case BT_MINUS:
       
   103         if ((ptr += MINBPC(enc)) == end)
       
   104           return XML_TOK_PARTIAL;
       
   105         if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
       
   106           if ((ptr += MINBPC(enc)) == end)
       
   107             return XML_TOK_PARTIAL;
       
   108           if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
       
   109             *nextTokPtr = ptr;
       
   110             return XML_TOK_INVALID;
       
   111           }
       
   112           *nextTokPtr = ptr + MINBPC(enc);
       
   113           return XML_TOK_COMMENT;
       
   114         }
       
   115         break;
       
   116       default:
       
   117         ptr += MINBPC(enc);
       
   118         break;
       
   119       }
       
   120     }
       
   121   }
       
   122   return XML_TOK_PARTIAL;
       
   123 }
       
   124 
       
   125 /* ptr points to character following "<!" */
       
   126 
       
   127 static int PTRCALL
       
   128 PREFIX(scanDecl)(const ENCODING *enc, const char *ptr,
       
   129                  const char *end, const char **nextTokPtr)
       
   130 {
       
   131   if (ptr == end)
       
   132     return XML_TOK_PARTIAL;
       
   133   switch (BYTE_TYPE(enc, ptr)) {
       
   134   case BT_MINUS:
       
   135     return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
       
   136   case BT_LSQB:
       
   137     *nextTokPtr = ptr + MINBPC(enc);
       
   138     return XML_TOK_COND_SECT_OPEN;
       
   139   case BT_NMSTRT:
       
   140   case BT_HEX:
       
   141     ptr += MINBPC(enc);
       
   142     break;
       
   143   default:
       
   144     *nextTokPtr = ptr;
       
   145     return XML_TOK_INVALID;
       
   146   }
       
   147   while (ptr != end) {
       
   148     switch (BYTE_TYPE(enc, ptr)) {
       
   149     case BT_PERCNT:
       
   150       if (ptr + MINBPC(enc) == end)
       
   151         return XML_TOK_PARTIAL;
       
   152       /* don't allow <!ENTITY% foo "whatever"> */
       
   153       switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) {
       
   154       case BT_S: case BT_CR: case BT_LF: case BT_PERCNT:
       
   155         *nextTokPtr = ptr;
       
   156         return XML_TOK_INVALID;
       
   157       }
       
   158       /* fall through */
       
   159     case BT_S: case BT_CR: case BT_LF:
       
   160       *nextTokPtr = ptr;
       
   161       return XML_TOK_DECL_OPEN;
       
   162     case BT_NMSTRT:
       
   163     case BT_HEX:
       
   164       ptr += MINBPC(enc);
       
   165       break;
       
   166     default:
       
   167       *nextTokPtr = ptr;
       
   168       return XML_TOK_INVALID;
       
   169     }
       
   170   }
       
   171   return XML_TOK_PARTIAL;
       
   172 }
       
   173 
       
   174 static int PTRCALL
       
   175 PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr,
       
   176                       const char *end, int *tokPtr)
       
   177 {
       
   178   int upper = 0;
       
   179   *tokPtr = XML_TOK_PI;
       
   180   if (end - ptr != MINBPC(enc)*3)
       
   181     return 1;
       
   182   switch (BYTE_TO_ASCII(enc, ptr)) {
       
   183   case ASCII_x:
       
   184     break;
       
   185   case ASCII_X:
       
   186     upper = 1;
       
   187     break;
       
   188   default:
       
   189     return 1;
       
   190   }
       
   191   ptr += MINBPC(enc);
       
   192   switch (BYTE_TO_ASCII(enc, ptr)) {
       
   193   case ASCII_m:
       
   194     break;
       
   195   case ASCII_M:
       
   196     upper = 1;
       
   197     break;
       
   198   default:
       
   199     return 1;
       
   200   }
       
   201   ptr += MINBPC(enc);
       
   202   switch (BYTE_TO_ASCII(enc, ptr)) {
       
   203   case ASCII_l:
       
   204     break;
       
   205   case ASCII_L:
       
   206     upper = 1;
       
   207     break;
       
   208   default:
       
   209     return 1;
       
   210   }
       
   211   if (upper)
       
   212     return 0;
       
   213   *tokPtr = XML_TOK_XML_DECL;
       
   214   return 1;
       
   215 }
       
   216 
       
   217 /* ptr points to character following "<?" */
       
   218 
       
   219 static int PTRCALL
       
   220 PREFIX(scanPi)(const ENCODING *enc, const char *ptr,
       
   221                const char *end, const char **nextTokPtr)
       
   222 {
       
   223   int tok;
       
   224   const char *target = ptr;
       
   225   if (ptr == end)
       
   226     return XML_TOK_PARTIAL;
       
   227   switch (BYTE_TYPE(enc, ptr)) {
       
   228   CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
       
   229   default:
       
   230     *nextTokPtr = ptr;
       
   231     return XML_TOK_INVALID;
       
   232   }
       
   233   while (ptr != end) {
       
   234     switch (BYTE_TYPE(enc, ptr)) {
       
   235     CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
       
   236     case BT_S: case BT_CR: case BT_LF:
       
   237       if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
       
   238         *nextTokPtr = ptr;
       
   239         return XML_TOK_INVALID;
       
   240       }
       
   241       ptr += MINBPC(enc);
       
   242       while (ptr != end) {
       
   243         switch (BYTE_TYPE(enc, ptr)) {
       
   244         INVALID_CASES(ptr, nextTokPtr)
       
   245         case BT_QUEST:
       
   246           ptr += MINBPC(enc);
       
   247           if (ptr == end)
       
   248             return XML_TOK_PARTIAL;
       
   249           if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
       
   250             *nextTokPtr = ptr + MINBPC(enc);
       
   251             return tok;
       
   252           }
       
   253           break;
       
   254         default:
       
   255           ptr += MINBPC(enc);
       
   256           break;
       
   257         }
       
   258       }
       
   259       return XML_TOK_PARTIAL;
       
   260     case BT_QUEST:
       
   261       if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
       
   262         *nextTokPtr = ptr;
       
   263         return XML_TOK_INVALID;
       
   264       }
       
   265       ptr += MINBPC(enc);
       
   266       if (ptr == end)
       
   267         return XML_TOK_PARTIAL;
       
   268       if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
       
   269         *nextTokPtr = ptr + MINBPC(enc);
       
   270         return tok;
       
   271       }
       
   272       /* fall through */
       
   273     default:
       
   274       *nextTokPtr = ptr;
       
   275       return XML_TOK_INVALID;
       
   276     }
       
   277   }
       
   278   return XML_TOK_PARTIAL;
       
   279 }
       
   280 
       
   281 static int PTRCALL
       
   282 PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr,
       
   283                          const char *end, const char **nextTokPtr)
       
   284 {
       
   285   static const char CDATA_LSQB[] = { ASCII_C, ASCII_D, ASCII_A,
       
   286                                      ASCII_T, ASCII_A, ASCII_LSQB };
       
   287   int i;
       
   288   /* CDATA[ */
       
   289   if (end - ptr < 6 * MINBPC(enc))
       
   290     return XML_TOK_PARTIAL;
       
   291   for (i = 0; i < 6; i++, ptr += MINBPC(enc)) {
       
   292     if (!CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) {
       
   293       *nextTokPtr = ptr;
       
   294       return XML_TOK_INVALID;
       
   295     }
       
   296   }
       
   297   *nextTokPtr = ptr;
       
   298   return XML_TOK_CDATA_SECT_OPEN;
       
   299 }
       
   300 
       
   301 static int PTRCALL
       
   302 PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr,
       
   303                         const char *end, const char **nextTokPtr)
       
   304 {
       
   305   if (ptr == end)
       
   306     return XML_TOK_NONE;
       
   307   if (MINBPC(enc) > 1) {
       
   308     size_t n = end - ptr;
       
   309     if (n & (MINBPC(enc) - 1)) {
       
   310       n &= ~(MINBPC(enc) - 1);
       
   311       if (n == 0)
       
   312         return XML_TOK_PARTIAL;
       
   313       end = ptr + n;
       
   314     }
       
   315   }
       
   316   switch (BYTE_TYPE(enc, ptr)) {
       
   317   case BT_RSQB:
       
   318     ptr += MINBPC(enc);
       
   319     if (ptr == end)
       
   320       return XML_TOK_PARTIAL;
       
   321     if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
       
   322       break;
       
   323     ptr += MINBPC(enc);
       
   324     if (ptr == end)
       
   325       return XML_TOK_PARTIAL;
       
   326     if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
       
   327       ptr -= MINBPC(enc);
       
   328       break;
       
   329     }
       
   330     *nextTokPtr = ptr + MINBPC(enc);
       
   331     return XML_TOK_CDATA_SECT_CLOSE;
       
   332   case BT_CR:
       
   333     ptr += MINBPC(enc);
       
   334     if (ptr == end)
       
   335       return XML_TOK_PARTIAL;
       
   336     if (BYTE_TYPE(enc, ptr) == BT_LF)
       
   337       ptr += MINBPC(enc);
       
   338     *nextTokPtr = ptr;
       
   339     return XML_TOK_DATA_NEWLINE;
       
   340   case BT_LF:
       
   341     *nextTokPtr = ptr + MINBPC(enc);
       
   342     return XML_TOK_DATA_NEWLINE;
       
   343   INVALID_CASES(ptr, nextTokPtr)
       
   344   default:
       
   345     ptr += MINBPC(enc);
       
   346     break;
       
   347   }
       
   348   while (ptr != end) {
       
   349     switch (BYTE_TYPE(enc, ptr)) {
       
   350 #define LEAD_CASE(n) \
       
   351     case BT_LEAD ## n: \
       
   352       if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
       
   353         *nextTokPtr = ptr; \
       
   354         return XML_TOK_DATA_CHARS; \
       
   355       } \
       
   356       ptr += n; \
       
   357       break;
       
   358     LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
       
   359 #undef LEAD_CASE
       
   360     case BT_NONXML:
       
   361     case BT_MALFORM:
       
   362     case BT_TRAIL:
       
   363     case BT_CR:
       
   364     case BT_LF:
       
   365     case BT_RSQB:
       
   366       *nextTokPtr = ptr;
       
   367       return XML_TOK_DATA_CHARS;
       
   368     default:
       
   369       ptr += MINBPC(enc);
       
   370       break;
       
   371     }
       
   372   }
       
   373   *nextTokPtr = ptr;
       
   374   return XML_TOK_DATA_CHARS;
       
   375 }
       
   376 
       
   377 /* ptr points to character following "</" */
       
   378 
       
   379 static int PTRCALL
       
   380 PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr,
       
   381                    const char *end, const char **nextTokPtr)
       
   382 {
       
   383   if (ptr == end)
       
   384     return XML_TOK_PARTIAL;
       
   385   switch (BYTE_TYPE(enc, ptr)) {
       
   386   CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
       
   387   default:
       
   388     *nextTokPtr = ptr;
       
   389     return XML_TOK_INVALID;
       
   390   }
       
   391   while (ptr != end) {
       
   392     switch (BYTE_TYPE(enc, ptr)) {
       
   393     CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
       
   394     case BT_S: case BT_CR: case BT_LF:
       
   395       for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
       
   396         switch (BYTE_TYPE(enc, ptr)) {
       
   397         case BT_S: case BT_CR: case BT_LF:
       
   398           break;
       
   399         case BT_GT:
       
   400           *nextTokPtr = ptr + MINBPC(enc);
       
   401           return XML_TOK_END_TAG;
       
   402         default:
       
   403           *nextTokPtr = ptr;
       
   404           return XML_TOK_INVALID;
       
   405         }
       
   406       }
       
   407       return XML_TOK_PARTIAL;
       
   408 #ifdef XML_NS
       
   409     case BT_COLON:
       
   410       /* no need to check qname syntax here,
       
   411          since end-tag must match exactly */
       
   412       ptr += MINBPC(enc);
       
   413       break;
       
   414 #endif
       
   415     case BT_GT:
       
   416       *nextTokPtr = ptr + MINBPC(enc);
       
   417       return XML_TOK_END_TAG;
       
   418     default:
       
   419       *nextTokPtr = ptr;
       
   420       return XML_TOK_INVALID;
       
   421     }
       
   422   }
       
   423   return XML_TOK_PARTIAL;
       
   424 }
       
   425 
       
   426 /* ptr points to character following "&#X" */
       
   427 
       
   428 static int PTRCALL
       
   429 PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr,
       
   430                        const char *end, const char **nextTokPtr)
       
   431 {
       
   432   if (ptr != end) {
       
   433     switch (BYTE_TYPE(enc, ptr)) {
       
   434     case BT_DIGIT:
       
   435     case BT_HEX:
       
   436       break;
       
   437     default:
       
   438       *nextTokPtr = ptr;
       
   439       return XML_TOK_INVALID;
       
   440     }
       
   441     for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
       
   442       switch (BYTE_TYPE(enc, ptr)) {
       
   443       case BT_DIGIT:
       
   444       case BT_HEX:
       
   445         break;
       
   446       case BT_SEMI:
       
   447         *nextTokPtr = ptr + MINBPC(enc);
       
   448         return XML_TOK_CHAR_REF;
       
   449       default:
       
   450         *nextTokPtr = ptr;
       
   451         return XML_TOK_INVALID;
       
   452       }
       
   453     }
       
   454   }
       
   455   return XML_TOK_PARTIAL;
       
   456 }
       
   457 
       
   458 /* ptr points to character following "&#" */
       
   459 
       
   460 static int PTRCALL
       
   461 PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr,
       
   462                     const char *end, const char **nextTokPtr)
       
   463 {
       
   464   if (ptr != end) {
       
   465     if (CHAR_MATCHES(enc, ptr, ASCII_x))
       
   466       return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
       
   467     switch (BYTE_TYPE(enc, ptr)) {
       
   468     case BT_DIGIT:
       
   469       break;
       
   470     default:
       
   471       *nextTokPtr = ptr;
       
   472       return XML_TOK_INVALID;
       
   473     }
       
   474     for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
       
   475       switch (BYTE_TYPE(enc, ptr)) {
       
   476       case BT_DIGIT:
       
   477         break;
       
   478       case BT_SEMI:
       
   479         *nextTokPtr = ptr + MINBPC(enc);
       
   480         return XML_TOK_CHAR_REF;
       
   481       default:
       
   482         *nextTokPtr = ptr;
       
   483         return XML_TOK_INVALID;
       
   484       }
       
   485     }
       
   486   }
       
   487   return XML_TOK_PARTIAL;
       
   488 }
       
   489 
       
   490 /* ptr points to character following "&" */
       
   491 
       
   492 static int PTRCALL
       
   493 PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end,
       
   494                 const char **nextTokPtr)
       
   495 {
       
   496   if (ptr == end)
       
   497     return XML_TOK_PARTIAL;
       
   498   switch (BYTE_TYPE(enc, ptr)) {
       
   499   CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
       
   500   case BT_NUM:
       
   501     return PREFIX(scanCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
       
   502   default:
       
   503     *nextTokPtr = ptr;
       
   504     return XML_TOK_INVALID;
       
   505   }
       
   506   while (ptr != end) {
       
   507     switch (BYTE_TYPE(enc, ptr)) {
       
   508     CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
       
   509     case BT_SEMI:
       
   510       *nextTokPtr = ptr + MINBPC(enc);
       
   511       return XML_TOK_ENTITY_REF;
       
   512     default:
       
   513       *nextTokPtr = ptr;
       
   514       return XML_TOK_INVALID;
       
   515     }
       
   516   }
       
   517   return XML_TOK_PARTIAL;
       
   518 }
       
   519 
       
   520 /* ptr points to character following first character of attribute name */
       
   521 
       
   522 static int PTRCALL
       
   523 PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
       
   524                  const char **nextTokPtr)
       
   525 {
       
   526 #ifdef XML_NS
       
   527   int hadColon = 0;
       
   528 #endif
       
   529   while (ptr != end) {
       
   530     switch (BYTE_TYPE(enc, ptr)) {
       
   531     CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
       
   532 #ifdef XML_NS
       
   533     case BT_COLON:
       
   534       if (hadColon) {
       
   535         *nextTokPtr = ptr;
       
   536         return XML_TOK_INVALID;
       
   537       }
       
   538       hadColon = 1;
       
   539       ptr += MINBPC(enc);
       
   540       if (ptr == end)
       
   541         return XML_TOK_PARTIAL;
       
   542       switch (BYTE_TYPE(enc, ptr)) {
       
   543       CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
       
   544       default:
       
   545         *nextTokPtr = ptr;
       
   546         return XML_TOK_INVALID;
       
   547       }
       
   548       break;
       
   549 #endif
       
   550     case BT_S: case BT_CR: case BT_LF:
       
   551       for (;;) {
       
   552         int t;
       
   553 
       
   554         ptr += MINBPC(enc);
       
   555         if (ptr == end)
       
   556           return XML_TOK_PARTIAL;
       
   557         t = BYTE_TYPE(enc, ptr);
       
   558         if (t == BT_EQUALS)
       
   559           break;
       
   560         switch (t) {
       
   561         case BT_S:
       
   562         case BT_LF:
       
   563         case BT_CR:
       
   564           break;
       
   565         default:
       
   566           *nextTokPtr = ptr;
       
   567           return XML_TOK_INVALID;
       
   568         }
       
   569       }
       
   570     /* fall through */
       
   571     case BT_EQUALS:
       
   572       {
       
   573         int open;
       
   574 #ifdef XML_NS
       
   575         hadColon = 0;
       
   576 #endif
       
   577         for (;;) {
       
   578           ptr += MINBPC(enc);
       
   579           if (ptr == end)
       
   580             return XML_TOK_PARTIAL;
       
   581           open = BYTE_TYPE(enc, ptr);
       
   582           if (open == BT_QUOT || open == BT_APOS)
       
   583             break;
       
   584           switch (open) {
       
   585           case BT_S:
       
   586           case BT_LF:
       
   587           case BT_CR:
       
   588             break;
       
   589           default:
       
   590             *nextTokPtr = ptr;
       
   591             return XML_TOK_INVALID;
       
   592           }
       
   593         }
       
   594         ptr += MINBPC(enc);
       
   595         /* in attribute value */
       
   596         for (;;) {
       
   597           int t;
       
   598           if (ptr == end)
       
   599             return XML_TOK_PARTIAL;
       
   600           t = BYTE_TYPE(enc, ptr);
       
   601           if (t == open)
       
   602             break;
       
   603           switch (t) {
       
   604           INVALID_CASES(ptr, nextTokPtr)
       
   605           case BT_AMP:
       
   606             {
       
   607               int tok = PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, &ptr);
       
   608               if (tok <= 0) {
       
   609                 if (tok == XML_TOK_INVALID)
       
   610                   *nextTokPtr = ptr;
       
   611                 return tok;
       
   612               }
       
   613               break;
       
   614             }
       
   615           case BT_LT:
       
   616             *nextTokPtr = ptr;
       
   617             return XML_TOK_INVALID;
       
   618           default:
       
   619             ptr += MINBPC(enc);
       
   620             break;
       
   621           }
       
   622         }
       
   623         ptr += MINBPC(enc);
       
   624         if (ptr == end)
       
   625           return XML_TOK_PARTIAL;
       
   626         switch (BYTE_TYPE(enc, ptr)) {
       
   627         case BT_S:
       
   628         case BT_CR:
       
   629         case BT_LF:
       
   630           break;
       
   631         case BT_SOL:
       
   632           goto sol;
       
   633         case BT_GT:
       
   634           goto gt;
       
   635         default:
       
   636           *nextTokPtr = ptr;
       
   637           return XML_TOK_INVALID;
       
   638         }
       
   639         /* ptr points to closing quote */
       
   640         for (;;) {
       
   641           ptr += MINBPC(enc);
       
   642           if (ptr == end)
       
   643             return XML_TOK_PARTIAL;
       
   644           switch (BYTE_TYPE(enc, ptr)) {
       
   645           CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
       
   646           case BT_S: case BT_CR: case BT_LF:
       
   647             continue;
       
   648           case BT_GT:
       
   649           gt:
       
   650             *nextTokPtr = ptr + MINBPC(enc);
       
   651             return XML_TOK_START_TAG_WITH_ATTS;
       
   652           case BT_SOL:
       
   653           sol:
       
   654             ptr += MINBPC(enc);
       
   655             if (ptr == end)
       
   656               return XML_TOK_PARTIAL;
       
   657             if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
       
   658               *nextTokPtr = ptr;
       
   659               return XML_TOK_INVALID;
       
   660             }
       
   661             *nextTokPtr = ptr + MINBPC(enc);
       
   662             return XML_TOK_EMPTY_ELEMENT_WITH_ATTS;
       
   663           default:
       
   664             *nextTokPtr = ptr;
       
   665             return XML_TOK_INVALID;
       
   666           }
       
   667           break;
       
   668         }
       
   669         break;
       
   670       }
       
   671     default:
       
   672       *nextTokPtr = ptr;
       
   673       return XML_TOK_INVALID;
       
   674     }
       
   675   }
       
   676   return XML_TOK_PARTIAL;
       
   677 }
       
   678 
       
   679 /* ptr points to character following "<" */
       
   680 
       
   681 static int PTRCALL
       
   682 PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
       
   683                const char **nextTokPtr)
       
   684 {
       
   685 #ifdef XML_NS
       
   686   int hadColon;
       
   687 #endif
       
   688   if (ptr == end)
       
   689     return XML_TOK_PARTIAL;
       
   690   switch (BYTE_TYPE(enc, ptr)) {
       
   691   CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
       
   692   case BT_EXCL:
       
   693     if ((ptr += MINBPC(enc)) == end)
       
   694       return XML_TOK_PARTIAL;
       
   695     switch (BYTE_TYPE(enc, ptr)) {
       
   696     case BT_MINUS:
       
   697       return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
       
   698     case BT_LSQB:
       
   699       return PREFIX(scanCdataSection)(enc, ptr + MINBPC(enc),
       
   700                                       end, nextTokPtr);
       
   701     }
       
   702     *nextTokPtr = ptr;
       
   703     return XML_TOK_INVALID;
       
   704   case BT_QUEST:
       
   705     return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
       
   706   case BT_SOL:
       
   707     return PREFIX(scanEndTag)(enc, ptr + MINBPC(enc), end, nextTokPtr);
       
   708   default:
       
   709     *nextTokPtr = ptr;
       
   710     return XML_TOK_INVALID;
       
   711   }
       
   712 #ifdef XML_NS
       
   713   hadColon = 0;
       
   714 #endif
       
   715   /* we have a start-tag */
       
   716   while (ptr != end) {
       
   717     switch (BYTE_TYPE(enc, ptr)) {
       
   718     CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
       
   719 #ifdef XML_NS
       
   720     case BT_COLON:
       
   721       if (hadColon) {
       
   722         *nextTokPtr = ptr;
       
   723         return XML_TOK_INVALID;
       
   724       }
       
   725       hadColon = 1;
       
   726       ptr += MINBPC(enc);
       
   727       if (ptr == end)
       
   728         return XML_TOK_PARTIAL;
       
   729       switch (BYTE_TYPE(enc, ptr)) {
       
   730       CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
       
   731       default:
       
   732         *nextTokPtr = ptr;
       
   733         return XML_TOK_INVALID;
       
   734       }
       
   735       break;
       
   736 #endif
       
   737     case BT_S: case BT_CR: case BT_LF:
       
   738       {
       
   739         ptr += MINBPC(enc);
       
   740         while (ptr != end) {
       
   741           switch (BYTE_TYPE(enc, ptr)) {
       
   742           CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
       
   743           case BT_GT:
       
   744             goto gt;
       
   745           case BT_SOL:
       
   746             goto sol;
       
   747           case BT_S: case BT_CR: case BT_LF:
       
   748             ptr += MINBPC(enc);
       
   749             continue;
       
   750           default:
       
   751             *nextTokPtr = ptr;
       
   752             return XML_TOK_INVALID;
       
   753           }
       
   754           return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr);
       
   755         }
       
   756         return XML_TOK_PARTIAL;
       
   757       }
       
   758     case BT_GT:
       
   759     gt:
       
   760       *nextTokPtr = ptr + MINBPC(enc);
       
   761       return XML_TOK_START_TAG_NO_ATTS;
       
   762     case BT_SOL:
       
   763     sol:
       
   764       ptr += MINBPC(enc);
       
   765       if (ptr == end)
       
   766         return XML_TOK_PARTIAL;
       
   767       if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
       
   768         *nextTokPtr = ptr;
       
   769         return XML_TOK_INVALID;
       
   770       }
       
   771       *nextTokPtr = ptr + MINBPC(enc);
       
   772       return XML_TOK_EMPTY_ELEMENT_NO_ATTS;
       
   773     default:
       
   774       *nextTokPtr = ptr;
       
   775       return XML_TOK_INVALID;
       
   776     }
       
   777   }
       
   778   return XML_TOK_PARTIAL;
       
   779 }
       
   780 
       
   781 static int PTRCALL
       
   782 PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
       
   783                    const char **nextTokPtr)
       
   784 {
       
   785   if (ptr == end)
       
   786     return XML_TOK_NONE;
       
   787   if (MINBPC(enc) > 1) {
       
   788     size_t n = end - ptr;
       
   789     if (n & (MINBPC(enc) - 1)) {
       
   790       n &= ~(MINBPC(enc) - 1);
       
   791       if (n == 0)
       
   792         return XML_TOK_PARTIAL;
       
   793       end = ptr + n;
       
   794     }
       
   795   }
       
   796   switch (BYTE_TYPE(enc, ptr)) {
       
   797   case BT_LT:
       
   798     return PREFIX(scanLt)(enc, ptr + MINBPC(enc), end, nextTokPtr);
       
   799   case BT_AMP:
       
   800     return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
       
   801   case BT_CR:
       
   802     ptr += MINBPC(enc);
       
   803     if (ptr == end)
       
   804       return XML_TOK_TRAILING_CR;
       
   805     if (BYTE_TYPE(enc, ptr) == BT_LF)
       
   806       ptr += MINBPC(enc);
       
   807     *nextTokPtr = ptr;
       
   808     return XML_TOK_DATA_NEWLINE;
       
   809   case BT_LF:
       
   810     *nextTokPtr = ptr + MINBPC(enc);
       
   811     return XML_TOK_DATA_NEWLINE;
       
   812   case BT_RSQB:
       
   813     ptr += MINBPC(enc);
       
   814     if (ptr == end)
       
   815       return XML_TOK_TRAILING_RSQB;
       
   816     if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
       
   817       break;
       
   818     ptr += MINBPC(enc);
       
   819     if (ptr == end)
       
   820       return XML_TOK_TRAILING_RSQB;
       
   821     if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
       
   822       ptr -= MINBPC(enc);
       
   823       break;
       
   824     }
       
   825     *nextTokPtr = ptr;
       
   826     return XML_TOK_INVALID;
       
   827   INVALID_CASES(ptr, nextTokPtr)
       
   828   default:
       
   829     ptr += MINBPC(enc);
       
   830     break;
       
   831   }
       
   832   while (ptr != end) {
       
   833     switch (BYTE_TYPE(enc, ptr)) {
       
   834 #define LEAD_CASE(n) \
       
   835     case BT_LEAD ## n: \
       
   836       if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
       
   837         *nextTokPtr = ptr; \
       
   838         return XML_TOK_DATA_CHARS; \
       
   839       } \
       
   840       ptr += n; \
       
   841       break;
       
   842     LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
       
   843 #undef LEAD_CASE
       
   844     case BT_RSQB:
       
   845       if (ptr + MINBPC(enc) != end) {
       
   846          if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) {
       
   847            ptr += MINBPC(enc);
       
   848            break;
       
   849          }
       
   850          if (ptr + 2*MINBPC(enc) != end) {
       
   851            if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), ASCII_GT)) {
       
   852              ptr += MINBPC(enc);
       
   853              break;
       
   854            }
       
   855            *nextTokPtr = ptr + 2*MINBPC(enc);
       
   856            return XML_TOK_INVALID;
       
   857          }
       
   858       }
       
   859       /* fall through */
       
   860     case BT_AMP:
       
   861     case BT_LT:
       
   862     case BT_NONXML:
       
   863     case BT_MALFORM:
       
   864     case BT_TRAIL:
       
   865     case BT_CR:
       
   866     case BT_LF:
       
   867       *nextTokPtr = ptr;
       
   868       return XML_TOK_DATA_CHARS;
       
   869     default:
       
   870       ptr += MINBPC(enc);
       
   871       break;
       
   872     }
       
   873   }
       
   874   *nextTokPtr = ptr;
       
   875   return XML_TOK_DATA_CHARS;
       
   876 }
       
   877 
       
   878 /* ptr points to character following "%" */
       
   879 
       
   880 static int PTRCALL
       
   881 PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end,
       
   882                     const char **nextTokPtr)
       
   883 {
       
   884   if (ptr == end)
       
   885     return -XML_TOK_PERCENT;
       
   886   switch (BYTE_TYPE(enc, ptr)) {
       
   887   CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
       
   888   case BT_S: case BT_LF: case BT_CR: case BT_PERCNT:
       
   889     *nextTokPtr = ptr;
       
   890     return XML_TOK_PERCENT;
       
   891   default:
       
   892     *nextTokPtr = ptr;
       
   893     return XML_TOK_INVALID;
       
   894   }
       
   895   while (ptr != end) {
       
   896     switch (BYTE_TYPE(enc, ptr)) {
       
   897     CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
       
   898     case BT_SEMI:
       
   899       *nextTokPtr = ptr + MINBPC(enc);
       
   900       return XML_TOK_PARAM_ENTITY_REF;
       
   901     default:
       
   902       *nextTokPtr = ptr;
       
   903       return XML_TOK_INVALID;
       
   904     }
       
   905   }
       
   906   return XML_TOK_PARTIAL;
       
   907 }
       
   908 
       
   909 static int PTRCALL
       
   910 PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end,
       
   911                       const char **nextTokPtr)
       
   912 {
       
   913   if (ptr == end)
       
   914     return XML_TOK_PARTIAL;
       
   915   switch (BYTE_TYPE(enc, ptr)) {
       
   916   CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
       
   917   default:
       
   918     *nextTokPtr = ptr;
       
   919     return XML_TOK_INVALID;
       
   920   }
       
   921   while (ptr != end) {
       
   922     switch (BYTE_TYPE(enc, ptr)) {
       
   923     CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
       
   924     case BT_CR: case BT_LF: case BT_S:
       
   925     case BT_RPAR: case BT_GT: case BT_PERCNT: case BT_VERBAR:
       
   926       *nextTokPtr = ptr;
       
   927       return XML_TOK_POUND_NAME;
       
   928     default:
       
   929       *nextTokPtr = ptr;
       
   930       return XML_TOK_INVALID;
       
   931     }
       
   932   }
       
   933   return -XML_TOK_POUND_NAME;
       
   934 }
       
   935 
       
   936 static int PTRCALL
       
   937 PREFIX(scanLit)(int open, const ENCODING *enc,
       
   938                 const char *ptr, const char *end,
       
   939                 const char **nextTokPtr)
       
   940 {
       
   941   while (ptr != end) {
       
   942     int t = BYTE_TYPE(enc, ptr);
       
   943     switch (t) {
       
   944     INVALID_CASES(ptr, nextTokPtr)
       
   945     case BT_QUOT:
       
   946     case BT_APOS:
       
   947       ptr += MINBPC(enc);
       
   948       if (t != open)
       
   949         break;
       
   950       if (ptr == end)
       
   951         return -XML_TOK_LITERAL;
       
   952       *nextTokPtr = ptr;
       
   953       switch (BYTE_TYPE(enc, ptr)) {
       
   954       case BT_S: case BT_CR: case BT_LF:
       
   955       case BT_GT: case BT_PERCNT: case BT_LSQB:
       
   956         return XML_TOK_LITERAL;
       
   957       default:
       
   958         return XML_TOK_INVALID;
       
   959       }
       
   960     default:
       
   961       ptr += MINBPC(enc);
       
   962       break;
       
   963     }
       
   964   }
       
   965   return XML_TOK_PARTIAL;
       
   966 }
       
   967 
       
   968 static int PTRCALL
       
   969 PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
       
   970                   const char **nextTokPtr)
       
   971 {
       
   972   int tok;
       
   973   if (ptr == end)
       
   974     return XML_TOK_NONE;
       
   975   if (MINBPC(enc) > 1) {
       
   976     size_t n = end - ptr;
       
   977     if (n & (MINBPC(enc) - 1)) {
       
   978       n &= ~(MINBPC(enc) - 1);
       
   979       if (n == 0)
       
   980         return XML_TOK_PARTIAL;
       
   981       end = ptr + n;
       
   982     }
       
   983   }
       
   984   switch (BYTE_TYPE(enc, ptr)) {
       
   985   case BT_QUOT:
       
   986     return PREFIX(scanLit)(BT_QUOT, enc, ptr + MINBPC(enc), end, nextTokPtr);
       
   987   case BT_APOS:
       
   988     return PREFIX(scanLit)(BT_APOS, enc, ptr + MINBPC(enc), end, nextTokPtr);
       
   989   case BT_LT:
       
   990     {
       
   991       ptr += MINBPC(enc);
       
   992       if (ptr == end)
       
   993         return XML_TOK_PARTIAL;
       
   994       switch (BYTE_TYPE(enc, ptr)) {
       
   995       case BT_EXCL:
       
   996         return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr);
       
   997       case BT_QUEST:
       
   998         return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
       
   999       case BT_NMSTRT:
       
  1000       case BT_HEX:
       
  1001       case BT_NONASCII:
       
  1002       case BT_LEAD2:
       
  1003       case BT_LEAD3:
       
  1004       case BT_LEAD4:
       
  1005         *nextTokPtr = ptr - MINBPC(enc);
       
  1006         return XML_TOK_INSTANCE_START;
       
  1007       }
       
  1008       *nextTokPtr = ptr;
       
  1009       return XML_TOK_INVALID;
       
  1010     }
       
  1011   case BT_CR:
       
  1012     if (ptr + MINBPC(enc) == end) {
       
  1013       *nextTokPtr = end;
       
  1014       /* indicate that this might be part of a CR/LF pair */
       
  1015       return -XML_TOK_PROLOG_S;
       
  1016     }
       
  1017     /* fall through */
       
  1018   case BT_S: case BT_LF:
       
  1019     for (;;) {
       
  1020       ptr += MINBPC(enc);
       
  1021       if (ptr == end)
       
  1022         break;
       
  1023       switch (BYTE_TYPE(enc, ptr)) {
       
  1024       case BT_S: case BT_LF:
       
  1025         break;
       
  1026       case BT_CR:
       
  1027         /* don't split CR/LF pair */
       
  1028         if (ptr + MINBPC(enc) != end)
       
  1029           break;
       
  1030         /* fall through */
       
  1031       default:
       
  1032         *nextTokPtr = ptr;
       
  1033         return XML_TOK_PROLOG_S;
       
  1034       }
       
  1035     }
       
  1036     *nextTokPtr = ptr;
       
  1037     return XML_TOK_PROLOG_S;
       
  1038   case BT_PERCNT:
       
  1039     return PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr);
       
  1040   case BT_COMMA:
       
  1041     *nextTokPtr = ptr + MINBPC(enc);
       
  1042     return XML_TOK_COMMA;
       
  1043   case BT_LSQB:
       
  1044     *nextTokPtr = ptr + MINBPC(enc);
       
  1045     return XML_TOK_OPEN_BRACKET;
       
  1046   case BT_RSQB:
       
  1047     ptr += MINBPC(enc);
       
  1048     if (ptr == end)
       
  1049       return -XML_TOK_CLOSE_BRACKET;
       
  1050     if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
       
  1051       if (ptr + MINBPC(enc) == end)
       
  1052         return XML_TOK_PARTIAL;
       
  1053       if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) {
       
  1054         *nextTokPtr = ptr + 2*MINBPC(enc);
       
  1055         return XML_TOK_COND_SECT_CLOSE;
       
  1056       }
       
  1057     }
       
  1058     *nextTokPtr = ptr;
       
  1059     return XML_TOK_CLOSE_BRACKET;
       
  1060   case BT_LPAR:
       
  1061     *nextTokPtr = ptr + MINBPC(enc);
       
  1062     return XML_TOK_OPEN_PAREN;
       
  1063   case BT_RPAR:
       
  1064     ptr += MINBPC(enc);
       
  1065     if (ptr == end)
       
  1066       return -XML_TOK_CLOSE_PAREN;
       
  1067     switch (BYTE_TYPE(enc, ptr)) {
       
  1068     case BT_AST:
       
  1069       *nextTokPtr = ptr + MINBPC(enc);
       
  1070       return XML_TOK_CLOSE_PAREN_ASTERISK;
       
  1071     case BT_QUEST:
       
  1072       *nextTokPtr = ptr + MINBPC(enc);
       
  1073       return XML_TOK_CLOSE_PAREN_QUESTION;
       
  1074     case BT_PLUS:
       
  1075       *nextTokPtr = ptr + MINBPC(enc);
       
  1076       return XML_TOK_CLOSE_PAREN_PLUS;
       
  1077     case BT_CR: case BT_LF: case BT_S:
       
  1078     case BT_GT: case BT_COMMA: case BT_VERBAR:
       
  1079     case BT_RPAR:
       
  1080       *nextTokPtr = ptr;
       
  1081       return XML_TOK_CLOSE_PAREN;
       
  1082     }
       
  1083     *nextTokPtr = ptr;
       
  1084     return XML_TOK_INVALID;
       
  1085   case BT_VERBAR:
       
  1086     *nextTokPtr = ptr + MINBPC(enc);
       
  1087     return XML_TOK_OR;
       
  1088   case BT_GT:
       
  1089     *nextTokPtr = ptr + MINBPC(enc);
       
  1090     return XML_TOK_DECL_CLOSE;
       
  1091   case BT_NUM:
       
  1092     return PREFIX(scanPoundName)(enc, ptr + MINBPC(enc), end, nextTokPtr);
       
  1093 #define LEAD_CASE(n) \
       
  1094   case BT_LEAD ## n: \
       
  1095     if (end - ptr < n) \
       
  1096       return XML_TOK_PARTIAL_CHAR; \
       
  1097     if (IS_NMSTRT_CHAR(enc, ptr, n)) { \
       
  1098       ptr += n; \
       
  1099       tok = XML_TOK_NAME; \
       
  1100       break; \
       
  1101     } \
       
  1102     if (IS_NAME_CHAR(enc, ptr, n)) { \
       
  1103       ptr += n; \
       
  1104       tok = XML_TOK_NMTOKEN; \
       
  1105       break; \
       
  1106     } \
       
  1107     *nextTokPtr = ptr; \
       
  1108     return XML_TOK_INVALID;
       
  1109     LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
       
  1110 #undef LEAD_CASE
       
  1111   case BT_NMSTRT:
       
  1112   case BT_HEX:
       
  1113     tok = XML_TOK_NAME;
       
  1114     ptr += MINBPC(enc);
       
  1115     break;
       
  1116   case BT_DIGIT:
       
  1117   case BT_NAME:
       
  1118   case BT_MINUS:
       
  1119 #ifdef XML_NS
       
  1120   case BT_COLON:
       
  1121 #endif
       
  1122     tok = XML_TOK_NMTOKEN;
       
  1123     ptr += MINBPC(enc);
       
  1124     break;
       
  1125   case BT_NONASCII:
       
  1126     if (IS_NMSTRT_CHAR_MINBPC(enc, ptr)) {
       
  1127       ptr += MINBPC(enc);
       
  1128       tok = XML_TOK_NAME;
       
  1129       break;
       
  1130     }
       
  1131     if (IS_NAME_CHAR_MINBPC(enc, ptr)) {
       
  1132       ptr += MINBPC(enc);
       
  1133       tok = XML_TOK_NMTOKEN;
       
  1134       break;
       
  1135     }
       
  1136     /* fall through */
       
  1137   default:
       
  1138     *nextTokPtr = ptr;
       
  1139     return XML_TOK_INVALID;
       
  1140   }
       
  1141   while (ptr != end) {
       
  1142     switch (BYTE_TYPE(enc, ptr)) {
       
  1143     CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
       
  1144     case BT_GT: case BT_RPAR: case BT_COMMA:
       
  1145     case BT_VERBAR: case BT_LSQB: case BT_PERCNT:
       
  1146     case BT_S: case BT_CR: case BT_LF:
       
  1147       *nextTokPtr = ptr;
       
  1148       return tok;
       
  1149 #ifdef XML_NS
       
  1150     case BT_COLON:
       
  1151       ptr += MINBPC(enc);
       
  1152       switch (tok) {
       
  1153       case XML_TOK_NAME:
       
  1154         if (ptr == end)
       
  1155           return XML_TOK_PARTIAL;
       
  1156         tok = XML_TOK_PREFIXED_NAME;
       
  1157         switch (BYTE_TYPE(enc, ptr)) {
       
  1158         CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
       
  1159         default:
       
  1160           tok = XML_TOK_NMTOKEN;
       
  1161           break;
       
  1162         }
       
  1163         break;
       
  1164       case XML_TOK_PREFIXED_NAME:
       
  1165         tok = XML_TOK_NMTOKEN;
       
  1166         break;
       
  1167       }
       
  1168       break;
       
  1169 #endif
       
  1170     case BT_PLUS:
       
  1171       if (tok == XML_TOK_NMTOKEN)  {
       
  1172         *nextTokPtr = ptr;
       
  1173         return XML_TOK_INVALID;
       
  1174       }
       
  1175       *nextTokPtr = ptr + MINBPC(enc);
       
  1176       return XML_TOK_NAME_PLUS;
       
  1177     case BT_AST:
       
  1178       if (tok == XML_TOK_NMTOKEN)  {
       
  1179         *nextTokPtr = ptr;
       
  1180         return XML_TOK_INVALID;
       
  1181       }
       
  1182       *nextTokPtr = ptr + MINBPC(enc);
       
  1183       return XML_TOK_NAME_ASTERISK;
       
  1184     case BT_QUEST:
       
  1185       if (tok == XML_TOK_NMTOKEN)  {
       
  1186         *nextTokPtr = ptr;
       
  1187         return XML_TOK_INVALID;
       
  1188       }
       
  1189       *nextTokPtr = ptr + MINBPC(enc);
       
  1190       return XML_TOK_NAME_QUESTION;
       
  1191     default:
       
  1192       *nextTokPtr = ptr;
       
  1193       return XML_TOK_INVALID;
       
  1194     }
       
  1195   }
       
  1196   return -tok;
       
  1197 }
       
  1198 
       
  1199 static int PTRCALL
       
  1200 PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr,
       
  1201                           const char *end, const char **nextTokPtr)
       
  1202 {
       
  1203   const char *start;
       
  1204   if (ptr == end)
       
  1205     return XML_TOK_NONE;
       
  1206   start = ptr;
       
  1207   while (ptr != end) {
       
  1208     switch (BYTE_TYPE(enc, ptr)) {
       
  1209 #define LEAD_CASE(n) \
       
  1210     case BT_LEAD ## n: ptr += n; break;
       
  1211     LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
       
  1212 #undef LEAD_CASE
       
  1213     case BT_AMP:
       
  1214       if (ptr == start)
       
  1215         return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
       
  1216       *nextTokPtr = ptr;
       
  1217       return XML_TOK_DATA_CHARS;
       
  1218     case BT_LT:
       
  1219       /* this is for inside entity references */
       
  1220       *nextTokPtr = ptr;
       
  1221       return XML_TOK_INVALID;
       
  1222     case BT_LF:
       
  1223       if (ptr == start) {
       
  1224         *nextTokPtr = ptr + MINBPC(enc);
       
  1225         return XML_TOK_DATA_NEWLINE;
       
  1226       }
       
  1227       *nextTokPtr = ptr;
       
  1228       return XML_TOK_DATA_CHARS;
       
  1229     case BT_CR:
       
  1230       if (ptr == start) {
       
  1231         ptr += MINBPC(enc);
       
  1232         if (ptr == end)
       
  1233           return XML_TOK_TRAILING_CR;
       
  1234         if (BYTE_TYPE(enc, ptr) == BT_LF)
       
  1235           ptr += MINBPC(enc);
       
  1236         *nextTokPtr = ptr;
       
  1237         return XML_TOK_DATA_NEWLINE;
       
  1238       }
       
  1239       *nextTokPtr = ptr;
       
  1240       return XML_TOK_DATA_CHARS;
       
  1241     case BT_S:
       
  1242       if (ptr == start) {
       
  1243         *nextTokPtr = ptr + MINBPC(enc);
       
  1244         return XML_TOK_ATTRIBUTE_VALUE_S;
       
  1245       }
       
  1246       *nextTokPtr = ptr;
       
  1247       return XML_TOK_DATA_CHARS;
       
  1248     default:
       
  1249       ptr += MINBPC(enc);
       
  1250       break;
       
  1251     }
       
  1252   }
       
  1253   *nextTokPtr = ptr;
       
  1254   return XML_TOK_DATA_CHARS;
       
  1255 }
       
  1256 
       
  1257 static int PTRCALL
       
  1258 PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr,
       
  1259                        const char *end, const char **nextTokPtr)
       
  1260 {
       
  1261   const char *start;
       
  1262   if (ptr == end)
       
  1263     return XML_TOK_NONE;
       
  1264   start = ptr;
       
  1265   while (ptr != end) {
       
  1266     switch (BYTE_TYPE(enc, ptr)) {
       
  1267 #define LEAD_CASE(n) \
       
  1268     case BT_LEAD ## n: ptr += n; break;
       
  1269     LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
       
  1270 #undef LEAD_CASE
       
  1271     case BT_AMP:
       
  1272       if (ptr == start)
       
  1273         return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
       
  1274       *nextTokPtr = ptr;
       
  1275       return XML_TOK_DATA_CHARS;
       
  1276     case BT_PERCNT:
       
  1277       if (ptr == start) {
       
  1278         int tok =  PREFIX(scanPercent)(enc, ptr + MINBPC(enc),
       
  1279                                        end, nextTokPtr);
       
  1280         return (tok == XML_TOK_PERCENT) ? XML_TOK_INVALID : tok;
       
  1281       }
       
  1282       *nextTokPtr = ptr;
       
  1283       return XML_TOK_DATA_CHARS;
       
  1284     case BT_LF:
       
  1285       if (ptr == start) {
       
  1286         *nextTokPtr = ptr + MINBPC(enc);
       
  1287         return XML_TOK_DATA_NEWLINE;
       
  1288       }
       
  1289       *nextTokPtr = ptr;
       
  1290       return XML_TOK_DATA_CHARS;
       
  1291     case BT_CR:
       
  1292       if (ptr == start) {
       
  1293         ptr += MINBPC(enc);
       
  1294         if (ptr == end)
       
  1295           return XML_TOK_TRAILING_CR;
       
  1296         if (BYTE_TYPE(enc, ptr) == BT_LF)
       
  1297           ptr += MINBPC(enc);
       
  1298         *nextTokPtr = ptr;
       
  1299         return XML_TOK_DATA_NEWLINE;
       
  1300       }
       
  1301       *nextTokPtr = ptr;
       
  1302       return XML_TOK_DATA_CHARS;
       
  1303     default:
       
  1304       ptr += MINBPC(enc);
       
  1305       break;
       
  1306     }
       
  1307   }
       
  1308   *nextTokPtr = ptr;
       
  1309   return XML_TOK_DATA_CHARS;
       
  1310 }
       
  1311 
       
  1312 #ifdef XML_DTD
       
  1313 
       
  1314 static int PTRCALL
       
  1315 PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr,
       
  1316                          const char *end, const char **nextTokPtr)
       
  1317 {
       
  1318   int level = 0;
       
  1319   if (MINBPC(enc) > 1) {
       
  1320     size_t n = end - ptr;
       
  1321     if (n & (MINBPC(enc) - 1)) {
       
  1322       n &= ~(MINBPC(enc) - 1);
       
  1323       end = ptr + n;
       
  1324     }
       
  1325   }
       
  1326   while (ptr != end) {
       
  1327     switch (BYTE_TYPE(enc, ptr)) {
       
  1328     INVALID_CASES(ptr, nextTokPtr)
       
  1329     case BT_LT:
       
  1330       if ((ptr += MINBPC(enc)) == end)
       
  1331         return XML_TOK_PARTIAL;
       
  1332       if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) {
       
  1333         if ((ptr += MINBPC(enc)) == end)
       
  1334           return XML_TOK_PARTIAL;
       
  1335         if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) {
       
  1336           ++level;
       
  1337           ptr += MINBPC(enc);
       
  1338         }
       
  1339       }
       
  1340       break;
       
  1341     case BT_RSQB:
       
  1342       if ((ptr += MINBPC(enc)) == end)
       
  1343         return XML_TOK_PARTIAL;
       
  1344       if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
       
  1345         if ((ptr += MINBPC(enc)) == end)
       
  1346           return XML_TOK_PARTIAL;
       
  1347         if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
       
  1348           ptr += MINBPC(enc);
       
  1349           if (level == 0) {
       
  1350             *nextTokPtr = ptr;
       
  1351             return XML_TOK_IGNORE_SECT;
       
  1352           }
       
  1353           --level;
       
  1354         }
       
  1355       }
       
  1356       break;
       
  1357     default:
       
  1358       ptr += MINBPC(enc);
       
  1359       break;
       
  1360     }
       
  1361   }
       
  1362   return XML_TOK_PARTIAL;
       
  1363 }
       
  1364 
       
  1365 #endif /* XML_DTD */
       
  1366 
       
  1367 static int PTRCALL
       
  1368 PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end,
       
  1369                    const char **badPtr)
       
  1370 {
       
  1371   ptr += MINBPC(enc);
       
  1372   end -= MINBPC(enc);
       
  1373   for (; ptr != end; ptr += MINBPC(enc)) {
       
  1374     switch (BYTE_TYPE(enc, ptr)) {
       
  1375     case BT_DIGIT:
       
  1376     case BT_HEX:
       
  1377     case BT_MINUS:
       
  1378     case BT_APOS:
       
  1379     case BT_LPAR:
       
  1380     case BT_RPAR:
       
  1381     case BT_PLUS:
       
  1382     case BT_COMMA:
       
  1383     case BT_SOL:
       
  1384     case BT_EQUALS:
       
  1385     case BT_QUEST:
       
  1386     case BT_CR:
       
  1387     case BT_LF:
       
  1388     case BT_SEMI:
       
  1389     case BT_EXCL:
       
  1390     case BT_AST:
       
  1391     case BT_PERCNT:
       
  1392     case BT_NUM:
       
  1393 #ifdef XML_NS
       
  1394     case BT_COLON:
       
  1395 #endif
       
  1396       break;
       
  1397     case BT_S:
       
  1398       if (CHAR_MATCHES(enc, ptr, ASCII_TAB)) {
       
  1399         *badPtr = ptr;
       
  1400         return 0;
       
  1401       }
       
  1402       break;
       
  1403     case BT_NAME:
       
  1404     case BT_NMSTRT:
       
  1405       if (!(BYTE_TO_ASCII(enc, ptr) & ~0x7f))
       
  1406         break;
       
  1407     default:
       
  1408       switch (BYTE_TO_ASCII(enc, ptr)) {
       
  1409       case 0x24: /* $ */
       
  1410       case 0x40: /* @ */
       
  1411         break;
       
  1412       default:
       
  1413         *badPtr = ptr;
       
  1414         return 0;
       
  1415       }
       
  1416       break;
       
  1417     }
       
  1418   }
       
  1419   return 1;
       
  1420 }
       
  1421 
       
  1422 /* This must only be called for a well-formed start-tag or empty
       
  1423    element tag.  Returns the number of attributes.  Pointers to the
       
  1424    first attsMax attributes are stored in atts.
       
  1425 */
       
  1426 
       
  1427 static int PTRCALL
       
  1428 PREFIX(getAtts)(const ENCODING *enc, const char *ptr,
       
  1429                 int attsMax, ATTRIBUTE *atts)
       
  1430 {
       
  1431   enum { other, inName, inValue } state = inName;
       
  1432   int nAtts = 0;
       
  1433   int open = 0; /* defined when state == inValue;
       
  1434                    initialization just to shut up compilers */
       
  1435 
       
  1436   for (ptr += MINBPC(enc);; ptr += MINBPC(enc)) {
       
  1437     switch (BYTE_TYPE(enc, ptr)) {
       
  1438 #define START_NAME \
       
  1439       if (state == other) { \
       
  1440         if (nAtts < attsMax) { \
       
  1441           atts[nAtts].name = ptr; \
       
  1442           atts[nAtts].normalized = 1; \
       
  1443         } \
       
  1444         state = inName; \
       
  1445       }
       
  1446 #define LEAD_CASE(n) \
       
  1447     case BT_LEAD ## n: START_NAME ptr += (n - MINBPC(enc)); break;
       
  1448     LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
       
  1449 #undef LEAD_CASE
       
  1450     case BT_NONASCII:
       
  1451     case BT_NMSTRT:
       
  1452     case BT_HEX:
       
  1453       START_NAME
       
  1454       break;
       
  1455 #undef START_NAME
       
  1456     case BT_QUOT:
       
  1457       if (state != inValue) {
       
  1458         if (nAtts < attsMax)
       
  1459           atts[nAtts].valuePtr = ptr + MINBPC(enc);
       
  1460         state = inValue;
       
  1461         open = BT_QUOT;
       
  1462       }
       
  1463       else if (open == BT_QUOT) {
       
  1464         state = other;
       
  1465         if (nAtts < attsMax)
       
  1466           atts[nAtts].valueEnd = ptr;
       
  1467         nAtts++;
       
  1468       }
       
  1469       break;
       
  1470     case BT_APOS:
       
  1471       if (state != inValue) {
       
  1472         if (nAtts < attsMax)
       
  1473           atts[nAtts].valuePtr = ptr + MINBPC(enc);
       
  1474         state = inValue;
       
  1475         open = BT_APOS;
       
  1476       }
       
  1477       else if (open == BT_APOS) {
       
  1478         state = other;
       
  1479         if (nAtts < attsMax)
       
  1480           atts[nAtts].valueEnd = ptr;
       
  1481         nAtts++;
       
  1482       }
       
  1483       break;
       
  1484     case BT_AMP:
       
  1485       if (nAtts < attsMax)
       
  1486         atts[nAtts].normalized = 0;
       
  1487       break;
       
  1488     case BT_S:
       
  1489       if (state == inName)
       
  1490         state = other;
       
  1491       else if (state == inValue
       
  1492                && nAtts < attsMax
       
  1493                && atts[nAtts].normalized
       
  1494                && (ptr == atts[nAtts].valuePtr
       
  1495                    || BYTE_TO_ASCII(enc, ptr) != ASCII_SPACE
       
  1496                    || BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ASCII_SPACE
       
  1497                    || BYTE_TYPE(enc, ptr + MINBPC(enc)) == open))
       
  1498         atts[nAtts].normalized = 0;
       
  1499       break;
       
  1500     case BT_CR: case BT_LF:
       
  1501       /* This case ensures that the first attribute name is counted
       
  1502          Apart from that we could just change state on the quote. */
       
  1503       if (state == inName)
       
  1504         state = other;
       
  1505       else if (state == inValue && nAtts < attsMax)
       
  1506         atts[nAtts].normalized = 0;
       
  1507       break;
       
  1508     case BT_GT:
       
  1509     case BT_SOL:
       
  1510       if (state != inValue)
       
  1511         return nAtts;
       
  1512       break;
       
  1513     default:
       
  1514       break;
       
  1515     }
       
  1516   }
       
  1517   /* not reached */
       
  1518 }
       
  1519 
       
  1520 static int PTRFASTCALL
       
  1521 PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr)
       
  1522 {
       
  1523   int result = 0;
       
  1524   /* skip &# */
       
  1525   ptr += 2*MINBPC(enc);
       
  1526   if (CHAR_MATCHES(enc, ptr, ASCII_x)) {
       
  1527     for (ptr += MINBPC(enc);
       
  1528          !CHAR_MATCHES(enc, ptr, ASCII_SEMI);
       
  1529          ptr += MINBPC(enc)) {
       
  1530       int c = BYTE_TO_ASCII(enc, ptr);
       
  1531       switch (c) {
       
  1532       case ASCII_0: case ASCII_1: case ASCII_2: case ASCII_3: case ASCII_4:
       
  1533       case ASCII_5: case ASCII_6: case ASCII_7: case ASCII_8: case ASCII_9:
       
  1534         result <<= 4;
       
  1535         result |= (c - ASCII_0);
       
  1536         break;
       
  1537       case ASCII_A: case ASCII_B: case ASCII_C:
       
  1538       case ASCII_D: case ASCII_E: case ASCII_F:
       
  1539         result <<= 4;
       
  1540         result += 10 + (c - ASCII_A);
       
  1541         break;
       
  1542       case ASCII_a: case ASCII_b: case ASCII_c:
       
  1543       case ASCII_d: case ASCII_e: case ASCII_f:
       
  1544         result <<= 4;
       
  1545         result += 10 + (c - ASCII_a);
       
  1546         break;
       
  1547       }
       
  1548       if (result >= 0x110000)
       
  1549         return -1;
       
  1550     }
       
  1551   }
       
  1552   else {
       
  1553     for (; !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) {
       
  1554       int c = BYTE_TO_ASCII(enc, ptr);
       
  1555       result *= 10;
       
  1556       result += (c - ASCII_0);
       
  1557       if (result >= 0x110000)
       
  1558         return -1;
       
  1559     }
       
  1560   }
       
  1561   return checkCharRefNumber(result);
       
  1562 }
       
  1563 
       
  1564 static int PTRCALL
       
  1565 PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr,
       
  1566                              const char *end)
       
  1567 {
       
  1568   switch ((end - ptr)/MINBPC(enc)) {
       
  1569   case 2:
       
  1570     if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_t)) {
       
  1571       switch (BYTE_TO_ASCII(enc, ptr)) {
       
  1572       case ASCII_l:
       
  1573         return ASCII_LT;
       
  1574       case ASCII_g:
       
  1575         return ASCII_GT;
       
  1576       }
       
  1577     }
       
  1578     break;
       
  1579   case 3:
       
  1580     if (CHAR_MATCHES(enc, ptr, ASCII_a)) {
       
  1581       ptr += MINBPC(enc);
       
  1582       if (CHAR_MATCHES(enc, ptr, ASCII_m)) {
       
  1583         ptr += MINBPC(enc);
       
  1584         if (CHAR_MATCHES(enc, ptr, ASCII_p))
       
  1585           return ASCII_AMP;
       
  1586       }
       
  1587     }
       
  1588     break;
       
  1589   case 4:
       
  1590     switch (BYTE_TO_ASCII(enc, ptr)) {
       
  1591     case ASCII_q:
       
  1592       ptr += MINBPC(enc);
       
  1593       if (CHAR_MATCHES(enc, ptr, ASCII_u)) {
       
  1594         ptr += MINBPC(enc);
       
  1595         if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
       
  1596           ptr += MINBPC(enc);
       
  1597           if (CHAR_MATCHES(enc, ptr, ASCII_t))
       
  1598             return ASCII_QUOT;
       
  1599         }
       
  1600       }
       
  1601       break;
       
  1602     case ASCII_a:
       
  1603       ptr += MINBPC(enc);
       
  1604       if (CHAR_MATCHES(enc, ptr, ASCII_p)) {
       
  1605         ptr += MINBPC(enc);
       
  1606         if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
       
  1607           ptr += MINBPC(enc);
       
  1608           if (CHAR_MATCHES(enc, ptr, ASCII_s))
       
  1609             return ASCII_APOS;
       
  1610         }
       
  1611       }
       
  1612       break;
       
  1613     }
       
  1614   }
       
  1615   return 0;
       
  1616 }
       
  1617 
       
  1618 static int PTRCALL
       
  1619 PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2)
       
  1620 {
       
  1621   for (;;) {
       
  1622     switch (BYTE_TYPE(enc, ptr1)) {
       
  1623 #define LEAD_CASE(n) \
       
  1624     case BT_LEAD ## n: \
       
  1625       if (*ptr1++ != *ptr2++) \
       
  1626         return 0;
       
  1627     LEAD_CASE(4) LEAD_CASE(3) LEAD_CASE(2)
       
  1628 #undef LEAD_CASE
       
  1629       /* fall through */
       
  1630       if (*ptr1++ != *ptr2++)
       
  1631         return 0;
       
  1632       break;
       
  1633     case BT_NONASCII:
       
  1634     case BT_NMSTRT:
       
  1635 #ifdef XML_NS
       
  1636     case BT_COLON:
       
  1637 #endif
       
  1638     case BT_HEX:
       
  1639     case BT_DIGIT:
       
  1640     case BT_NAME:
       
  1641     case BT_MINUS:
       
  1642       if (*ptr2++ != *ptr1++)
       
  1643         return 0;
       
  1644       if (MINBPC(enc) > 1) {
       
  1645         if (*ptr2++ != *ptr1++)
       
  1646           return 0;
       
  1647         if (MINBPC(enc) > 2) {
       
  1648           if (*ptr2++ != *ptr1++)
       
  1649             return 0;
       
  1650           if (MINBPC(enc) > 3) {
       
  1651             if (*ptr2++ != *ptr1++)
       
  1652               return 0;
       
  1653           }
       
  1654         }
       
  1655       }
       
  1656       break;
       
  1657     default:
       
  1658       if (MINBPC(enc) == 1 && *ptr1 == *ptr2)
       
  1659         return 1;
       
  1660       switch (BYTE_TYPE(enc, ptr2)) {
       
  1661       case BT_LEAD2:
       
  1662       case BT_LEAD3:
       
  1663       case BT_LEAD4:
       
  1664       case BT_NONASCII:
       
  1665       case BT_NMSTRT:
       
  1666 #ifdef XML_NS
       
  1667       case BT_COLON:
       
  1668 #endif
       
  1669       case BT_HEX:
       
  1670       case BT_DIGIT:
       
  1671       case BT_NAME:
       
  1672       case BT_MINUS:
       
  1673         return 0;
       
  1674       default:
       
  1675         return 1;
       
  1676       }
       
  1677     }
       
  1678   }
       
  1679   /* not reached */
       
  1680 }
       
  1681 
       
  1682 static int PTRCALL
       
  1683 PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1,
       
  1684                          const char *end1, const char *ptr2)
       
  1685 {
       
  1686   for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) {
       
  1687     if (ptr1 == end1)
       
  1688       return 0;
       
  1689     if (!CHAR_MATCHES(enc, ptr1, *ptr2))
       
  1690       return 0;
       
  1691   }
       
  1692   return ptr1 == end1;
       
  1693 }
       
  1694 
       
  1695 static int PTRFASTCALL
       
  1696 PREFIX(nameLength)(const ENCODING *enc, const char *ptr)
       
  1697 {
       
  1698   const char *start = ptr;
       
  1699   for (;;) {
       
  1700     switch (BYTE_TYPE(enc, ptr)) {
       
  1701 #define LEAD_CASE(n) \
       
  1702     case BT_LEAD ## n: ptr += n; break;
       
  1703     LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
       
  1704 #undef LEAD_CASE
       
  1705     case BT_NONASCII:
       
  1706     case BT_NMSTRT:
       
  1707 #ifdef XML_NS
       
  1708     case BT_COLON:
       
  1709 #endif
       
  1710     case BT_HEX:
       
  1711     case BT_DIGIT:
       
  1712     case BT_NAME:
       
  1713     case BT_MINUS:
       
  1714       ptr += MINBPC(enc);
       
  1715       break;
       
  1716     default:
       
  1717       return (int)(ptr - start);
       
  1718     }
       
  1719   }
       
  1720 }
       
  1721 
       
  1722 static const char * PTRFASTCALL
       
  1723 PREFIX(skipS)(const ENCODING *enc, const char *ptr)
       
  1724 {
       
  1725   for (;;) {
       
  1726     switch (BYTE_TYPE(enc, ptr)) {
       
  1727     case BT_LF:
       
  1728     case BT_CR:
       
  1729     case BT_S:
       
  1730       ptr += MINBPC(enc);
       
  1731       break;
       
  1732     default:
       
  1733       return ptr;
       
  1734     }
       
  1735   }
       
  1736 }
       
  1737 
       
  1738 static void PTRCALL
       
  1739 PREFIX(updatePosition)(const ENCODING *enc,
       
  1740                        const char *ptr,
       
  1741                        const char *end,
       
  1742                        POSITION *pos)
       
  1743 {
       
  1744   while (ptr != end) {
       
  1745     switch (BYTE_TYPE(enc, ptr)) {
       
  1746 #define LEAD_CASE(n) \
       
  1747     case BT_LEAD ## n: \
       
  1748       ptr += n; \
       
  1749       break;
       
  1750     LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
       
  1751 #undef LEAD_CASE
       
  1752     case BT_LF:
       
  1753       pos->columnNumber = (XML_Size)-1;
       
  1754       pos->lineNumber++;
       
  1755       ptr += MINBPC(enc);
       
  1756       break;
       
  1757     case BT_CR:
       
  1758       pos->lineNumber++;
       
  1759       ptr += MINBPC(enc);
       
  1760       if (ptr != end && BYTE_TYPE(enc, ptr) == BT_LF)
       
  1761         ptr += MINBPC(enc);
       
  1762       pos->columnNumber = (XML_Size)-1;
       
  1763       break;
       
  1764     default:
       
  1765       ptr += MINBPC(enc);
       
  1766       break;
       
  1767     }
       
  1768     pos->columnNumber++;
       
  1769   }
       
  1770 }
       
  1771 
       
  1772 #undef DO_LEAD_CASE
       
  1773 #undef MULTIBYTE_CASES
       
  1774 #undef INVALID_CASES
       
  1775 #undef CHECK_NAME_CASE
       
  1776 #undef CHECK_NAME_CASES
       
  1777 #undef CHECK_NMSTRT_CASE
       
  1778 #undef CHECK_NMSTRT_CASES
       
  1779