xml/cxmllibrary/src/xmlp/src/XmlpEntity.cpp
branchRCL_3
changeset 21 604ca70b6235
parent 20 889504eac4fb
equal deleted inserted replaced
20:889504eac4fb 21:604ca70b6235
     1 /*
       
     2 * Copyright (c) 2000 - 2001 Nokia Corporation and/or its subsidiary(-ies).
       
     3 * All rights reserved.
       
     4 * This component and the accompanying materials are made available
       
     5 * under the terms of the License "Eclipse Public License v1.0"
       
     6 * which accompanies this distribution, and is available
       
     7 * at the URL "http://www.eclipse.org/legal/epl-v10.html".
       
     8 *
       
     9 * Initial Contributors:
       
    10 * Nokia Corporation - initial contribution.
       
    11 *
       
    12 * Contributors:
       
    13 *
       
    14 * Description: 
       
    15 *
       
    16 */
       
    17 
       
    18 
       
    19 #include "cxml_internal.h"
       
    20 #include <xml/cxml/nw_xmlp_xmlreader.h>
       
    21 #include <xml/cxml/nw_xmlp_xmlparser.h>
       
    22 #include <xml/cxml/nw_string_string.h>
       
    23 #include "cxml_xmlp_entity.h"
       
    24 #include "cxml_xmlp_int_entity.h"
       
    25 
       
    26 /*IMPORTANT NOTE: This array must be sorted by entity name.  The lookup function
       
    27  *				  does a binary search.
       
    28  *
       
    29  *                When you update this table make sure the constant 
       
    30  *				  CXML_Num_CaseInsensitive_Entries which is the count of 
       
    31  *				  caseinsensitive entries is updated correctly 
       
    32  */
       
    33 
       
    34 #define CXML_Num_CaseInsensitive_Entries 126
       
    35 
       
    36 
       
    37 
       
    38 static
       
    39 const CXML_EntitySet_Entry_t CXML_EntitySet_Array[] = 
       
    40 {
       
    41    // CaseSensitive entries
       
    42   {(CXML_Uint8*) "AElig", 198 },
       
    43   {(CXML_Uint8*) "Aacute", 193 },
       
    44   {(CXML_Uint8*) "Acirc", 194 },
       
    45   {(CXML_Uint8*) "Agrave", 192 },
       
    46   {(CXML_Uint8*) "Alpha", 913 },
       
    47   {(CXML_Uint8*) "Aring", 197 },
       
    48   {(CXML_Uint8*) "Atilde", 195 },
       
    49   {(CXML_Uint8*) "Auml", 196 },
       
    50   {(CXML_Uint8*) "Beta", 914 },
       
    51   {(CXML_Uint8*) "Ccedil", 199 },
       
    52   {(CXML_Uint8*) "Chi", 935 },
       
    53   {(CXML_Uint8*) "Dagger", 8225 },
       
    54   {(CXML_Uint8*) "Delta", 916 },
       
    55   {(CXML_Uint8*) "ETH", 208 },
       
    56   {(CXML_Uint8*) "Eacute", 201 },
       
    57   {(CXML_Uint8*) "Ecirc", 202 },
       
    58   {(CXML_Uint8*) "Egrave", 200 },
       
    59   {(CXML_Uint8*) "Epsilon", 917 },
       
    60   {(CXML_Uint8*) "Eta", 919 },
       
    61   {(CXML_Uint8*) "Euml", 203 },
       
    62   {(CXML_Uint8*) "Gamma", 915 },
       
    63   {(CXML_Uint8*) "Iacute", 205 },
       
    64   {(CXML_Uint8*) "Icirc", 206 },
       
    65   {(CXML_Uint8*) "Igrave", 204 },
       
    66   {(CXML_Uint8*) "Iota", 921 },
       
    67   {(CXML_Uint8*) "Iuml", 207 },
       
    68   {(CXML_Uint8*) "Kappa", 922 },
       
    69   {(CXML_Uint8*) "Lambda", 923 },
       
    70   {(CXML_Uint8*) "Mu", 924 },
       
    71   {(CXML_Uint8*) "Ntilde", 209 },
       
    72   {(CXML_Uint8*) "Nu", 925 },
       
    73   {(CXML_Uint8*) "OElig", 338 },
       
    74   {(CXML_Uint8*) "Oacute", 211 },
       
    75   {(CXML_Uint8*) "Ocirc", 212 },
       
    76   {(CXML_Uint8*) "Ograve", 210 },
       
    77   {(CXML_Uint8*) "Omega", 937 },
       
    78   {(CXML_Uint8*) "Omicron", 927 },
       
    79   {(CXML_Uint8*) "Oslash", 216 },
       
    80   {(CXML_Uint8*) "Otilde", 213 },
       
    81   {(CXML_Uint8*) "Ouml", 214 },
       
    82   {(CXML_Uint8*) "Phi", 934 },
       
    83   {(CXML_Uint8*) "Pi", 928 },
       
    84   {(CXML_Uint8*) "Prime", 8243 },
       
    85   {(CXML_Uint8*) "Psi", 936 },
       
    86   {(CXML_Uint8*) "Rho", 929 },
       
    87   {(CXML_Uint8*) "Scaron", 352 },
       
    88   {(CXML_Uint8*) "Sigma", 931 },
       
    89   {(CXML_Uint8*) "THORN", 222 },
       
    90   {(CXML_Uint8*) "Tau", 932 },
       
    91   {(CXML_Uint8*) "Theta", 920 },
       
    92   {(CXML_Uint8*) "Uacute", 218 },
       
    93   {(CXML_Uint8*) "Ucirc", 219 },
       
    94   {(CXML_Uint8*) "Ugrave", 217 },
       
    95   {(CXML_Uint8*) "Upsilon", 933 },
       
    96   {(CXML_Uint8*) "Uuml", 220 },
       
    97   {(CXML_Uint8*) "Xi", 926 },
       
    98   {(CXML_Uint8*) "Yacute", 221 },
       
    99   {(CXML_Uint8*) "Yuml", 376 },
       
   100   {(CXML_Uint8*) "Zeta", 918 },
       
   101   {(CXML_Uint8*) "aacute", 225 },
       
   102   {(CXML_Uint8*) "acirc", 226 },
       
   103   {(CXML_Uint8*) "acute", 180 },
       
   104   {(CXML_Uint8*) "aelig", 230 },
       
   105   {(CXML_Uint8*) "agrave", 224 },
       
   106   {(CXML_Uint8*) "alpha", 945 },
       
   107   {(CXML_Uint8*) "atilde", 227 },
       
   108   {(CXML_Uint8*) "auml", 228 },
       
   109   {(CXML_Uint8*) "beta", 946 },
       
   110   {(CXML_Uint8*) "ccedil", 231 },
       
   111   {(CXML_Uint8*) "chi", 967 },
       
   112   {(CXML_Uint8*) "dArr", 8659 },
       
   113   {(CXML_Uint8*) "dagger", 8224 },
       
   114   {(CXML_Uint8*) "darr", 8595 },
       
   115   {(CXML_Uint8*) "delta", 948 },
       
   116   {(CXML_Uint8*) "eacute", 233 },
       
   117   {(CXML_Uint8*) "ecirc", 234 },
       
   118   {(CXML_Uint8*) "egrave", 232 },
       
   119   {(CXML_Uint8*) "epsilon", 949 },
       
   120   {(CXML_Uint8*) "eta", 951 },
       
   121   {(CXML_Uint8*) "euml", 235 },
       
   122   {(CXML_Uint8*) "gamma", 947 },
       
   123   {(CXML_Uint8*) "hArr", 8660 },
       
   124   {(CXML_Uint8*) "harr", 8596 },
       
   125   {(CXML_Uint8*) "iacute", 237 },
       
   126   {(CXML_Uint8*) "icirc", 238 },
       
   127   {(CXML_Uint8*) "igrave", 236 },
       
   128   {(CXML_Uint8*) "iota", 953 },
       
   129   {(CXML_Uint8*) "iuml", 239 },
       
   130   {(CXML_Uint8*) "kappa", 954 },
       
   131   {(CXML_Uint8*) "lArr", 8656 },
       
   132   {(CXML_Uint8*) "lambda", 955 },
       
   133   {(CXML_Uint8*) "larr", 8592 },
       
   134   {(CXML_Uint8*) "mu", 956 },
       
   135   {(CXML_Uint8*) "ntilde", 241 },
       
   136   {(CXML_Uint8*) "nu", 957 },
       
   137   {(CXML_Uint8*) "oacute", 243 },
       
   138   {(CXML_Uint8*) "ocirc", 244 },
       
   139   {(CXML_Uint8*) "oelig", 339 },
       
   140   {(CXML_Uint8*) "ograve", 242 },
       
   141   {(CXML_Uint8*) "omega", 969 },
       
   142   {(CXML_Uint8*) "omicron", 959 },
       
   143   {(CXML_Uint8*) "oslash", 248 },
       
   144   {(CXML_Uint8*) "otilde", 245 },
       
   145   {(CXML_Uint8*) "otimes", 8855 },
       
   146   {(CXML_Uint8*) "ouml", 246 },
       
   147   {(CXML_Uint8*) "phi", 966 },
       
   148   {(CXML_Uint8*) "pi", 960 },
       
   149   {(CXML_Uint8*) "psi", 968 },
       
   150   {(CXML_Uint8*) "rArr", 8658 },
       
   151   {(CXML_Uint8*) "rarr", 8594 },
       
   152   {(CXML_Uint8*) "rho", 961 },
       
   153   {(CXML_Uint8*) "scaron", 353 },
       
   154   {(CXML_Uint8*) "sigma", 963 },
       
   155   {(CXML_Uint8*) "tau", 964 },
       
   156   {(CXML_Uint8*) "theta", 952 },
       
   157   {(CXML_Uint8*) "thorn", 254 },
       
   158   {(CXML_Uint8*) "uArr", 8657 },
       
   159   {(CXML_Uint8*) "uacute", 250 },
       
   160   {(CXML_Uint8*) "uarr", 8593 },
       
   161   {(CXML_Uint8*) "ucirc", 251 },
       
   162   {(CXML_Uint8*) "ugrave", 249 },
       
   163   {(CXML_Uint8*) "upsilon", 965 },
       
   164   {(CXML_Uint8*) "uuml", 252 },
       
   165   {(CXML_Uint8*) "xi", 958 },
       
   166   {(CXML_Uint8*) "yacute", 253 },
       
   167   {(CXML_Uint8*) "yuml", 255 },
       
   168   {(CXML_Uint8*) "zeta", 950 },
       
   169   {(CXML_Uint8*) "zwj", 8205 },
       
   170   {(CXML_Uint8*) "zwnj", 8204 },
       
   171 // Case Insensitive entries
       
   172   {(CXML_Uint8*) "alefsym", 8501 },
       
   173   {(CXML_Uint8*) "amp", 38 },
       
   174   {(CXML_Uint8*) "and", 8743 },
       
   175   {(CXML_Uint8*) "ang", 8736 },
       
   176   {(CXML_Uint8*) "apos", 39 },
       
   177   {(CXML_Uint8*) "aring", 229 },
       
   178   {(CXML_Uint8*) "asymp", 8776 },
       
   179   {(CXML_Uint8*) "bdquo", 8222 },
       
   180   {(CXML_Uint8*) "brvbar", 166 },
       
   181   {(CXML_Uint8*) "bull", 8226 },
       
   182   {(CXML_Uint8*) "cap", 8745 },
       
   183   {(CXML_Uint8*) "cedil", 184 },
       
   184   {(CXML_Uint8*) "cent", 162 },
       
   185   {(CXML_Uint8*) "circ", 710 },
       
   186   {(CXML_Uint8*) "clubs", 9827 },
       
   187   {(CXML_Uint8*) "cong", 8773 },
       
   188   {(CXML_Uint8*) "copy", 169 },
       
   189   {(CXML_Uint8*) "crarr", 8629 },
       
   190   {(CXML_Uint8*) "cup", 8746 },
       
   191   {(CXML_Uint8*) "curren", 164 },
       
   192   {(CXML_Uint8*) "deg", 176 },
       
   193   {(CXML_Uint8*) "diams", 9830 },
       
   194   {(CXML_Uint8*) "divide", 247 },
       
   195   {(CXML_Uint8*) "empty", 8709 },
       
   196   {(CXML_Uint8*) "emsp", 8195 },
       
   197   {(CXML_Uint8*) "ensp", 8194 },
       
   198   {(CXML_Uint8*) "equiv", 8801 },
       
   199   {(CXML_Uint8*) "eth", 240 },
       
   200   {(CXML_Uint8*) "euro", 8364 },
       
   201   {(CXML_Uint8*) "exist", 8707 },
       
   202   {(CXML_Uint8*) "fnof", 402 },
       
   203   {(CXML_Uint8*) "forall", 8704 },
       
   204   {(CXML_Uint8*) "frac12", 189 },
       
   205   {(CXML_Uint8*) "frac14", 188 },
       
   206   {(CXML_Uint8*) "frac34", 190 },
       
   207   {(CXML_Uint8*) "frasl", 8260 },
       
   208   {(CXML_Uint8*) "ge", 8805 },
       
   209   {(CXML_Uint8*) "gt", 62 },
       
   210   {(CXML_Uint8*) "hearts", 9829 },
       
   211   {(CXML_Uint8*) "hellip", 8230 },
       
   212   {(CXML_Uint8*) "iexcl", 161 },
       
   213   {(CXML_Uint8*) "image", 8465 },
       
   214   {(CXML_Uint8*) "infin", 8734 },
       
   215   {(CXML_Uint8*) "int", 8747 },
       
   216   {(CXML_Uint8*) "iquest", 191 },
       
   217   {(CXML_Uint8*) "isin", 8712 },
       
   218   {(CXML_Uint8*) "lang", 9001 },
       
   219   {(CXML_Uint8*) "laquo", 171 },
       
   220   {(CXML_Uint8*) "lceil", 8968 },
       
   221   {(CXML_Uint8*) "ldquo", 8220 },
       
   222   {(CXML_Uint8*) "le", 8804 },
       
   223   {(CXML_Uint8*) "lfloor", 8970 },
       
   224   {(CXML_Uint8*) "lowast", 8727 },
       
   225   {(CXML_Uint8*) "loz", 9674 },
       
   226   {(CXML_Uint8*) "lrm", 8206 },
       
   227   {(CXML_Uint8*) "lsaquo", 8249 },
       
   228   {(CXML_Uint8*) "lsquo", 8216 },
       
   229   {(CXML_Uint8*) "lt", 60 },
       
   230   {(CXML_Uint8*) "macr", 175 },
       
   231   {(CXML_Uint8*) "mdash", 8212 },
       
   232   {(CXML_Uint8*) "micro", 181 },
       
   233   {(CXML_Uint8*) "middot", 183 },
       
   234   {(CXML_Uint8*) "minus", 8722 },
       
   235   {(CXML_Uint8*) "nabla", 8711 },
       
   236   {(CXML_Uint8*) "nbsp", 160 },
       
   237   {(CXML_Uint8*) "ndash", 8211 },
       
   238   {(CXML_Uint8*) "ne", 8800 },
       
   239   {(CXML_Uint8*) "ni", 8715 },
       
   240   {(CXML_Uint8*) "not", 172 },
       
   241   {(CXML_Uint8*) "notin", 8713 },
       
   242   {(CXML_Uint8*) "nsub", 8836 },
       
   243   {(CXML_Uint8*) "oline", 8254 },
       
   244   {(CXML_Uint8*) "oplus", 8853 },
       
   245   {(CXML_Uint8*) "or", 8744 },
       
   246   {(CXML_Uint8*) "ordf", 170 },
       
   247   {(CXML_Uint8*) "ordm", 186 },
       
   248   {(CXML_Uint8*) "para", 182 },
       
   249   {(CXML_Uint8*) "part", 8706 },
       
   250   {(CXML_Uint8*) "permil", 8240 },
       
   251   {(CXML_Uint8*) "perp", 8869 },
       
   252   {(CXML_Uint8*) "piv", 982 },
       
   253   {(CXML_Uint8*) "plusmn", 177 },
       
   254   {(CXML_Uint8*) "pound", 163 },
       
   255   {(CXML_Uint8*) "prime", 8242 },
       
   256   {(CXML_Uint8*) "prod", 8719 },
       
   257   {(CXML_Uint8*) "prop", 8733 },
       
   258   {(CXML_Uint8*) "quot", 34 },
       
   259   {(CXML_Uint8*) "radic", 8730 },
       
   260   {(CXML_Uint8*) "rang", 9002 },
       
   261   {(CXML_Uint8*) "raquo", 187 },
       
   262   {(CXML_Uint8*) "rceil", 8969 },
       
   263   {(CXML_Uint8*) "rdquo", 8221 },
       
   264   {(CXML_Uint8*) "real", 8476 },
       
   265   {(CXML_Uint8*) "reg", 174 },
       
   266   {(CXML_Uint8*) "rfloor", 8971 },
       
   267   {(CXML_Uint8*) "rlm", 8207 },
       
   268   {(CXML_Uint8*) "rsaquo", 8250 },
       
   269   {(CXML_Uint8*) "rsquo", 8217 },
       
   270   {(CXML_Uint8*) "sbquo", 8218 },
       
   271   {(CXML_Uint8*) "sdot", 8901 },
       
   272   {(CXML_Uint8*) "sect", 167 },
       
   273   {(CXML_Uint8*) "shy", 173 },
       
   274   {(CXML_Uint8*) "sigmaf", 962 },
       
   275   {(CXML_Uint8*) "sim", 8764 },
       
   276   {(CXML_Uint8*) "spades", 9824 },
       
   277   {(CXML_Uint8*) "sub", 8834 },
       
   278   {(CXML_Uint8*) "sube", 8838 },
       
   279   {(CXML_Uint8*) "sum", 8721 },
       
   280   {(CXML_Uint8*) "sup", 8835 },
       
   281   {(CXML_Uint8*) "sup1", 185 },
       
   282   {(CXML_Uint8*) "sup2", 178 },
       
   283   {(CXML_Uint8*) "sup3", 179 },
       
   284   {(CXML_Uint8*) "supe", 8839 },
       
   285   {(CXML_Uint8*) "szlig", 223 },
       
   286   {(CXML_Uint8*) "there4", 8756 },
       
   287   {(CXML_Uint8*) "thetasym", 977 },
       
   288   {(CXML_Uint8*) "thinsp", 8201 },
       
   289   {(CXML_Uint8*) "tilde", 732 },
       
   290   {(CXML_Uint8*) "times", 215 },
       
   291   {(CXML_Uint8*) "trade", 8482 },
       
   292   {(CXML_Uint8*)"uml", 168 },
       
   293   {(CXML_Uint8*) "upsih", 978 },
       
   294   {(CXML_Uint8*) "weierp", 8472 },
       
   295   {(CXML_Uint8*) "yen", 165 },
       
   296   {(CXML_Uint8*) "zwj", 8205 },
       
   297   {(CXML_Uint8*) "zwnj", 8204 },
       
   298 };
       
   299 
       
   300 const CXML_Uint32 CXML_ALL_PREDEFINE_ENTITIES = (sizeof(CXML_EntitySet_Array)
       
   301    / sizeof(CXML_EntitySet_Entry_t));
       
   302 
       
   303 /* It assumes the parser at the begginning i.e '&' symbol. 
       
   304  * The entity name stops at (;) or at '>'. If it is stopped
       
   305  * at (;) then it is a valid entity.
       
   306  * If it stops at '>' then it is not a valid entity.
       
   307  */
       
   308 
       
   309 
       
   310 NW_Status_t CXML_XML_Parser_Entity(NW_XML_Reader_t* pT,
       
   311 							    NW_XML_Reader_Interval_t* I_entityData,
       
   312 							    NW_Bool* entityFound)
       
   313 {
       
   314  NW_Status_t s;
       
   315  NW_Bool endFound = NW_FALSE;
       
   316  //NW_Uint32 isSpace;
       
   317  NW_Uint32 cnt=0;
       
   318  NW_Uint32 match;
       
   319 
       
   320  s = NW_XML_Reader_Advance(pT); //Pass over the '&' symbol
       
   321 
       
   322  if (NW_STAT_IS_FAILURE(s))
       
   323  {
       
   324   return NW_STAT_FAILURE;
       
   325  }
       
   326 
       
   327  NW_XML_Reader_Interval_Start(I_entityData, pT);
       
   328 
       
   329  // Just check for false entity or error in entity e.g. If entity is not 
       
   330  // terminated by (;)
       
   331 
       
   332 
       
   333  for(cnt=0; ; )
       
   334  {
       
   335 
       
   336   cnt++;
       
   337 
       
   338   //check for terminating entity character
       
   339 
       
   340   s = NW_XML_Reader_AsciiCharMatch(pT, ';', &match); 
       
   341 
       
   342   if (NW_STAT_IS_FAILURE(s))
       
   343   {
       
   344     return NW_STAT_FAILURE;
       
   345   }
       
   346 
       
   347   if(match)
       
   348   {
       
   349    endFound = NW_TRUE;
       
   350    break;
       
   351   }
       
   352 
       
   353   //Check condition if it is malformed entity. Exit at least 
       
   354   //at the end of attribute or contents.
       
   355 
       
   356   s = NW_XML_Reader_AsciiCharMatch(pT, '>', &match); 
       
   357 
       
   358   if (NW_STAT_IS_FAILURE(s))
       
   359   {
       
   360     return NW_STAT_FAILURE;
       
   361   }
       
   362 
       
   363     if(match)
       
   364     {
       
   365      endFound = NW_FALSE;
       
   366      break;
       
   367     }
       
   368 
       
   369    //Move to next character
       
   370 
       
   371    s = NW_XML_Reader_Advance(pT); 
       
   372 
       
   373    if (NW_STAT_IS_FAILURE(s))
       
   374    {
       
   375     return NW_STAT_FAILURE;
       
   376    }
       
   377  }//end for
       
   378 
       
   379  if(endFound == NW_TRUE)
       
   380  {
       
   381 	 NW_XML_Reader_Interval_Stop(I_entityData, pT);
       
   382 	 *entityFound = NW_TRUE;
       
   383 
       
   384     //Move over end of entity i.e. (;) character.
       
   385 
       
   386     s = NW_XML_Reader_Advance(pT); 
       
   387 
       
   388     if (NW_STAT_IS_FAILURE(s))
       
   389     {
       
   390      return NW_STAT_FAILURE;
       
   391     }
       
   392  }
       
   393  else
       
   394  {
       
   395 	*entityFound = NW_FALSE;
       
   396 	s = NW_STAT_SUCCESS;
       
   397  }
       
   398 
       
   399 
       
   400  return s;
       
   401 }//end CXML_Parser_Entity(...)
       
   402 
       
   403 
       
   404 /* Function to verify the digit depending on its base */
       
   405 
       
   406 static
       
   407 CXML_Int32 CXML_XML_IsValidDigit (const CXML_Ucs2 ch, 
       
   408                        NW_Int32 base) 
       
   409 {
       
   410   switch (base) {
       
   411     case 10: 
       
   412       return CXML_Str_Isdigit (ch);
       
   413 
       
   414     case 16:
       
   415       return CXML_Str_Isxdigit (ch);
       
   416 
       
   417     default:
       
   418       return 0;
       
   419   }
       
   420 }
       
   421 
       
   422 
       
   423 /* 
       
   424  * Function converts the entity string to the entity value.
       
   425  */
       
   426 
       
   427 static
       
   428 NW_Bool CXML_XML_GetNumEntityChar (const CXML_Ucs2* instring, 
       
   429                            CXML_Ucs2 *retchar, 
       
   430                            CXML_Int32 base) 
       
   431 {
       
   432   CXML_Int32 result = 0;
       
   433   CXML_Int32 prevResult;
       
   434   CXML_Uint32 currDigit = 0;
       
   435   
       
   436   if (*instring == 0) {
       
   437     return NW_FALSE;
       
   438   }
       
   439 
       
   440   while (*instring != 0) {
       
   441     if (!(CXML_XML_IsValidDigit (*instring, base)))
       
   442       return NW_FALSE;
       
   443   
       
   444     if (CXML_Str_Isdigit (*instring)) {
       
   445       currDigit = *instring - CXML_ASCII_0;
       
   446       instring++;
       
   447     } 
       
   448     else {
       
   449       currDigit = CXML_Str_ToUpper (*instring) - CXML_ASCII_UPPER_A + 10;
       
   450       instring++;
       
   451     }
       
   452 
       
   453     prevResult = result;
       
   454     result = result * base + currDigit;
       
   455     if (result < prevResult) {
       
   456       return NW_FALSE;
       
   457     }
       
   458   }
       
   459   if (result > 0xffff || result < 0x0) 
       
   460     return NW_FALSE;
       
   461 
       
   462   *retchar = (NW_Ucs2) result;
       
   463   return NW_TRUE;
       
   464 }//end NW_Bool CXML_XML_GetNumEntityChar(..)
       
   465 
       
   466 
       
   467 /* This function uses the binary search to find the predefined
       
   468  * entity names and corresponding value if found in the array.
       
   469  */
       
   470 
       
   471 static NW_Ucs2
       
   472 CXML_Get_Entity_Val (NW_Ucs2 *name)
       
   473 {
       
   474   CXML_Int32 index;
       
   475   NW_Status_t s = NW_STAT_SUCCESS;
       
   476   const CXML_EntitySet_Entry_t* entry;
       
   477   NW_Ucs2* ucs2TempStr = NULL;
       
   478   CXML_Uint32 entityLength = 0;
       
   479 
       
   480   /* First do a binary search search in the case sensitive part of the array */
       
   481   CXML_Int32 low = 0;
       
   482   CXML_Int32 high = CXML_ALL_PREDEFINE_ENTITIES - CXML_Num_CaseInsensitive_Entries - 1;
       
   483   CXML_Int32 res = 0;
       
   484 
       
   485   while (low <= high ) {
       
   486     index = (high + low) / 2;
       
   487     entry = & (CXML_EntitySet_Array[index]);
       
   488     entityLength = CXML_Asc_strlen((CXML_Int8 *)entry->name);
       
   489 
       
   490     s = NW_String_byteToUCS2Char(entry->name,entityLength,&ucs2TempStr);
       
   491 
       
   492 	if (NW_STAT_IS_FAILURE(s)) 
       
   493 	 {
       
   494 	  return 0;
       
   495 	 }
       
   496 
       
   497     // do a case insensitive string comparison
       
   498     
       
   499     res = CXML_Str_StrcmpConst( name, ucs2TempStr );
       
   500 
       
   501     if(ucs2TempStr != NULL)
       
   502 	 {
       
   503 	  NW_Mem_Free(ucs2TempStr);
       
   504       ucs2TempStr = NULL;
       
   505 	 }
       
   506 
       
   507     if ( res > 0 ) {
       
   508       /* name is ahead of this slot.  Increase low bound. */
       
   509       low = index + 1;
       
   510     } else if ( res < 0 ) {
       
   511       /* name is behind this slot.  Decrease high bound. */
       
   512       high = index - 1;
       
   513     } else {
       
   514       /* Found the entity name.  Return its value. */
       
   515       return entry->value;
       
   516     }
       
   517   }
       
   518 
       
   519   /* if no match was found search in the case insensitive part of the table. */
       
   520 
       
   521   low = CXML_ALL_PREDEFINE_ENTITIES - CXML_Num_CaseInsensitive_Entries;
       
   522   high = CXML_ALL_PREDEFINE_ENTITIES - 1;
       
   523   res = 0;
       
   524   while (low <= high )
       
   525   {
       
   526     index = (high + low) / 2;
       
   527     entry = &(CXML_EntitySet_Array[index]);
       
   528     entityLength = CXML_Asc_strlen((CXML_Int8 *)entry->name);
       
   529 
       
   530     s = NW_String_byteToUCS2Char(entry->name,entityLength,&ucs2TempStr);
       
   531 
       
   532 	if (NW_STAT_IS_FAILURE(s)) 
       
   533 	 {
       
   534 	  return 0;
       
   535 	 }
       
   536 
       
   537     // do a case insensitive string comparison
       
   538     
       
   539     res = CXML_Str_Stricmp( name, ucs2TempStr );
       
   540 
       
   541     if(ucs2TempStr != NULL)
       
   542 	 {
       
   543 	  NW_Mem_Free(ucs2TempStr);
       
   544       ucs2TempStr = NULL;
       
   545 	 }
       
   546 	  
       
   547 
       
   548     if ( res > 0 ) {
       
   549       /* name is ahead of this slot.  Increase low bound. */
       
   550       low = index + 1;
       
   551     } else if ( res < 0 ) {
       
   552       /* name is behind this slot.  Decrease high bound. */
       
   553       high = index - 1;
       
   554     } else {
       
   555       /* Found the entity name.  Return its value. */
       
   556       return entry->value;
       
   557     }
       
   558   }/*end while */
       
   559 
       
   560   /* if no match were found we return 0 */
       
   561   return 0;
       
   562 }
       
   563 
       
   564 /* This function is called when entity of valid syntax is found. The 
       
   565  * entity validity is checked here.
       
   566  *
       
   567  * E.g. &ggg; is valid syntax entity but not a valid entity. In this 
       
   568  *             case continue parsing the entity as it is. Create this
       
   569  *             as normal contents not as the entity.
       
   570  *
       
   571  * entityContent ==> Entity Name (e.g. "amp")
       
   572  * length        ==> Length of Entity Name.
       
   573  * entityVal     ==> This parameter will contain the entity value.
       
   574  *                   if it is character entites (decimal, hex or predefined)
       
   575  *
       
   576  * entityFound  ==> TRUE if it is valid entity of any kind.
       
   577  * encoding     ==> Encoding of input string.
       
   578  *
       
   579  * Resolve the following type of entities 
       
   580  * 1) Character hex entry
       
   581  * 2) Character decimal entry
       
   582  * 3) Prefined entry
       
   583  * 4) Internal Entities defined in the DTD.
       
   584  *
       
   585  */
       
   586 
       
   587 
       
   588 static
       
   589 NW_Status_t CXML_XML_Resolve_Entity( CXML_Uint8* entityContent,
       
   590 									CXML_Uint32 length,
       
   591 									NW_Uint32* entityVal,
       
   592                                     CXML_Uint8** entityValStr,
       
   593 									NW_Bool* entityFound,
       
   594                                     NW_Uint32 encoding,
       
   595                                     void* internalEntityList)
       
   596 {
       
   597  NW_Status_t s = NW_STAT_SUCCESS;
       
   598  NW_Ucs2 entityChar = 0;
       
   599  NW_Ucs2* ucs2Str = NULL;
       
   600  NW_Bool entityGot = NW_FALSE;
       
   601 
       
   602 
       
   603     
       
   604     if( encoding == HTTP_iso_10646_ucs_2 )
       
   605     {
       
   606      /* Extra two bytes for NULL termination as UCS should be always 
       
   607       * even bytes.
       
   608       */
       
   609      ucs2Str = (NW_Ucs2*) NW_Mem_Malloc(length+2);
       
   610      if(ucs2Str != NULL)
       
   611      {
       
   612       (void)NW_Mem_memcpy(ucs2Str, entityContent, length );
       
   613       /* Null-terminate the string */
       
   614        ucs2Str[length/2] = '\000';
       
   615      }
       
   616      else
       
   617      {
       
   618       s = NW_STAT_OUT_OF_MEMORY;
       
   619      }
       
   620     }
       
   621     else
       
   622     {
       
   623      s = NW_String_byteToUCS2Char(entityContent,length,&ucs2Str);
       
   624     }
       
   625 
       
   626     
       
   627 	 if (NW_STAT_IS_FAILURE(s)) 
       
   628 	 {
       
   629           if(ucs2Str != NULL)
       
   630           {
       
   631            NW_Mem_Free(ucs2Str);
       
   632           }
       
   633 	  return s;
       
   634 	 }
       
   635 
       
   636  /*Check and validate for Hex/Decimal numeric character entry */
       
   637 
       
   638  if(ucs2Str[0] == '#')
       
   639  {
       
   640 	entityGot = CXML_TRUE;
       
   641 	if( (ucs2Str[1] == 'x') || (ucs2Str[1] == 'X') )  //Hex entity
       
   642 	{
       
   643       
       
   644 	  if(CXML_XML_GetNumEntityChar(ucs2Str + 2,&entityChar,16) != NW_TRUE)
       
   645 	  {
       
   646        /* Entity syntax is valid, but entity value is wrong so continue parsing it as
       
   647         * normal syntax.
       
   648         */
       
   649        *entityFound = CXML_FALSE;
       
   650          if(ucs2Str != NULL)
       
   651           {
       
   652            NW_Mem_Free(ucs2Str);
       
   653           }
       
   654 	   return NW_STAT_SUCCESS;
       
   655 	  }
       
   656       *entityVal = (entityChar);
       
   657       *entityValStr = NULL;
       
   658 	}
       
   659   else
       
   660 	{
       
   661       /*Check and validate for Decimal numeric character entry */ 
       
   662 	  if(CXML_XML_GetNumEntityChar(ucs2Str + 1,&entityChar,10) != NW_TRUE)
       
   663 	  {
       
   664        /* Entity syntax is valid, but entity value is wrong so continue parsing it as
       
   665         * normal syntax.
       
   666         */
       
   667        *entityFound = CXML_FALSE;
       
   668          if(ucs2Str != NULL)
       
   669           {
       
   670            NW_Mem_Free(ucs2Str);
       
   671           }
       
   672 	   return NW_STAT_SUCCESS;
       
   673 	  }
       
   674       *entityVal = (entityChar);
       
   675       *entityValStr = NULL;
       
   676 	}
       
   677    *entityFound = CXML_TRUE;
       
   678  } //end if(ucs2Str[0] == '#')
       
   679 
       
   680  /* Check for only predefined entities */
       
   681 
       
   682  if(entityGot != CXML_TRUE)
       
   683  {	
       
   684 	 if( ( entityChar = CXML_Get_Entity_Val(ucs2Str) ) != 0)
       
   685 	 {
       
   686 	  entityGot = NW_TRUE;
       
   687 	 }
       
   688 
       
   689 	  
       
   690 	 if(entityGot == NW_TRUE)
       
   691 	 {
       
   692 	  *entityVal =  entityChar;
       
   693       *entityValStr = NULL;
       
   694 	  *entityFound = CXML_TRUE;
       
   695 	 }
       
   696  } //end if(..)
       
   697 
       
   698  /*Check for the Internal Entity */
       
   699 
       
   700  
       
   701  if(entityGot != CXML_TRUE)
       
   702  {	
       
   703   s = CXML_XML_Resolve_Internal_Entity(ucs2Str, entityValStr, entityFound, encoding, internalEntityList);  
       
   704   *entityVal =  0;
       
   705  }
       
   706  
       
   707 
       
   708  if(ucs2Str != NULL)
       
   709  {
       
   710   NW_Mem_Free(ucs2Str);
       
   711  }
       
   712 
       
   713  return NW_STAT_SUCCESS;
       
   714 }//end CXML_XML_Resolve_Entity()
       
   715 
       
   716 /* Reads the entity data. Decide about the entity. */
       
   717 
       
   718 
       
   719 NW_Status_t CXML_XML_Handle_entity(NW_XML_Reader_t* pT,
       
   720 								   NW_XML_Reader_Interval_t* pI_entityData,
       
   721 								   NW_Uint32* entityVal,
       
   722                                    NW_Uint8** entityValStr,
       
   723 								   NW_Bool* entityFound,
       
   724                                    void* internalEntityList)
       
   725 {
       
   726    NW_Status_t s;
       
   727    NW_Uint32 length;
       
   728    NW_Uint32 byteLength;
       
   729    NW_Uint8* pContent;
       
   730 
       
   731 	if (!NW_XML_Reader_Interval_IsWellFormed(pI_entityData)) {
       
   732         return NW_STAT_FAILURE;
       
   733     }
       
   734 
       
   735     length = pI_entityData->stop - pI_entityData->start;
       
   736     byteLength = length;
       
   737 
       
   738     s = NW_XML_Reader_DataAddressFromBuffer(pT, pI_entityData->start,
       
   739                                             &byteLength,
       
   740                                             &pContent);
       
   741     if (NW_STAT_IS_FAILURE(s)) {
       
   742         return s;
       
   743     }
       
   744 
       
   745     if (byteLength != length) {
       
   746         return NW_STAT_FAILURE;
       
   747     }
       
   748 
       
   749 	s = CXML_XML_Resolve_Entity(pContent,length,entityVal,entityValStr,entityFound,
       
   750                                 pT->encoding, internalEntityList);
       
   751 
       
   752 return s;
       
   753 }//end CXML_XML_Handle_entity(..)
       
   754 
       
   755  /* The following function converts the numeric entities to the 
       
   756   * predefined entity.
       
   757   */
       
   758 
       
   759 static
       
   760 NW_Status_t CXML_XML_Entity_to_Ascii(CXML_Uint32 entityVal,
       
   761 									CXML_Byte* entityStr,
       
   762 									CXML_Uint32* strLen)
       
   763 {
       
   764  NW_Uint32 i = 0, j=0;
       
   765  CXML_Uint32 entityLength = 0;
       
   766  NW_Status_t s = NW_STAT_FAILURE;
       
   767 
       
   768  for(i=0; i < CXML_ALL_PREDEFINE_ENTITIES; i++)
       
   769  {
       
   770   if(CXML_EntitySet_Array[i].value == entityVal)
       
   771   {
       
   772    entityStr[0] = '&'; //Starting of the entity
       
   773 
       
   774    entityLength = CXML_Asc_strlen((CXML_Int8 *)CXML_EntitySet_Array[i].name);
       
   775 
       
   776    for(j=0; j < entityLength; j++)
       
   777    {
       
   778     entityStr[j+1] = CXML_EntitySet_Array[i].name[j];
       
   779    }
       
   780    
       
   781     entityStr[j + 1] = ';' ; //end of entity
       
   782     *strLen = entityLength + 2;
       
   783     entityStr[*strLen] = '\0';
       
   784 	s = NW_STAT_SUCCESS;
       
   785 	break;
       
   786   }//endif
       
   787  }//end for(..)
       
   788 
       
   789  return s;
       
   790 }//end CXML_XML_Ascii_to_Entity(..)
       
   791 
       
   792 /* This function assumes that predefined entities. The predefined
       
   793  * entities are defined in the CXML_EntitySet_Array[].
       
   794  * 
       
   795  */
       
   796 
       
   797 EXPORT_C
       
   798 NW_Status_t CXML_XML_Get_Entity(CXML_Uint32 entityVal,
       
   799 								CXML_Byte* entityStr,
       
   800 								CXML_Uint32* strLen)
       
   801 {
       
   802  NW_Status_t s = NW_STAT_SUCCESS;
       
   803  *strLen = 0;
       
   804 
       
   805  
       
   806   s = CXML_XML_Entity_to_Ascii( entityVal,entityStr,strLen);
       
   807 
       
   808  if(s == NW_STAT_FAILURE)
       
   809  {
       
   810   *entityStr = NULL;
       
   811   *strLen    = NULL;
       
   812  }
       
   813 
       
   814  return s;
       
   815 }//end CXML_XML_Get_Entity()
       
   816