--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/browserutilities/feedsengine/FeedsServer/XmlUtils/src/XmlEntity.cpp Mon Mar 30 12:54:55 2009 +0300
@@ -0,0 +1,639 @@
+/*
+* Copyright (c) 2005 Nokia Corporation and/or its subsidiary(-ies).
+* All rights reserved.
+* This component and the accompanying materials are made available
+* under the terms of the License "Eclipse Public License v1.0"
+* which accompanies this distribution, and is available
+* at the URL "http://www.eclipse.org/legal/epl-v10.html".
+*
+* Initial Contributors:
+* Nokia Corporation - initial contribution.
+*
+* Contributors:
+*
+* Description: Resolves entities.
+*
+*/
+
+
+
+#include <EscapeUtils.h>
+#include <libxml2_xmlmemory.h>
+#include <libxml2_globals.h>
+
+#include "CleanupLibXml2.h"
+#include "LeakTracker.h"
+#include "XmlEntity.h"
+
+
+// Private consts.
+// -------------------------------------------------------------------------
+// Note: This array must be sorted by entity name. The lookup function
+// does a binary search. */
+
+// IMPORTANT: When you update this table make sure the constant
+// NW_XHTML_Num_CaseInsensitive_Entries which is the count of
+// caseinsensitive entries is updated correctly.
+
+// Note: See http://kellyjones.netfirms.com/webtools/ascii_utf8_table.html
+// for utf8 entity mappings.
+
+
+#define KNumCaseInsensitiveEntries 126
+
+static const CXmlEntity::EntityEntry sSpaceEntity = { " ", 32 };
+
+static const CXmlEntity::EntityEntry sEntityMappings[] =
+ {
+// CaseSensitive entries
+ { "AElig", 198 },
+ { "Aacute", 193 },
+ { "Acirc", 194 },
+ { "Agrave", 192 },
+ { "Alpha", 913 },
+ { "Aring", 197 },
+ { "Atilde", 195 },
+ { "Auml", 196 },
+ { "Beta", 914 },
+ { "Ccedil", 199 },
+ { "Chi", 935 },
+ { "Dagger", 8225 },
+ { "Delta", 916 },
+ { "ETH", 208 },
+ { "Eacute", 201 },
+ { "Ecirc", 202 },
+ { "Egrave", 200 },
+ { "Epsilon", 917 },
+ { "Eta", 919 },
+ { "Euml", 203 },
+ { "Gamma", 915 },
+ { "Iacute", 205 },
+ { "Icirc", 206 },
+ { "Igrave", 204 },
+ { "Iota", 921 },
+ { "Iuml", 207 },
+ { "Kappa", 922 },
+ { "Lambda", 923 },
+ { "Mu", 924 },
+ { "Ntilde", 209 },
+ { "Nu", 925 },
+ { "OElig", 338 },
+ { "Oacute", 211 },
+ { "Ocirc", 212 },
+ { "Ograve", 210 },
+ { "Omega", 937 },
+ { "Omicron", 927 },
+ { "Oslash", 216 },
+ { "Otilde", 213 },
+ { "Ouml", 214 },
+ { "Phi", 934 },
+ { "Pi", 928 },
+ { "Prime", 8243 },
+ { "Psi", 936 },
+ { "Rho", 929 },
+ { "Scaron", 352 },
+ { "Sigma", 931 },
+ { "THORN", 222 },
+ { "Tau", 932 },
+ { "Theta", 920 },
+ { "Uacute", 218 },
+ { "Ucirc", 219 },
+ { "Ugrave", 217 },
+ { "Upsilon", 933 },
+ { "Uuml", 220 },
+ { "Xi", 926 },
+ { "Yacute", 221 },
+ { "Yuml", 376 },
+ { "Zeta", 918 },
+ { "aacute", 225 },
+ { "acirc", 226 },
+ { "acute", 180 },
+ { "aelig", 230 },
+ { "agrave", 224 },
+ { "alpha", 945 },
+ { "atilde", 227 },
+ { "auml", 228 },
+ { "beta", 946 },
+ { "ccedil", 231 },
+ { "chi", 967 },
+ { "dArr", 8659 },
+ { "dagger", 8224 },
+ { "darr", 8595 },
+ { "delta", 948 },
+ { "eacute", 233 },
+ { "ecirc", 234 },
+ { "egrave", 232 },
+ { "epsilon", 949 },
+ { "eta", 951 },
+ { "euml", 235 },
+ { "gamma", 947 },
+ { "hArr", 8660 },
+ { "harr", 8596 },
+ { "iacute", 237 },
+ { "icirc", 238 },
+ { "igrave", 236 },
+ { "iota", 953 },
+ { "iuml", 239 },
+ { "kappa", 954 },
+ { "lArr", 8656 },
+ { "lambda", 955 },
+ { "larr", 8592 },
+ { "mu", 956 },
+ { "ntilde", 241 },
+ { "nu", 957 },
+ { "oacute", 243 },
+ { "ocirc", 244 },
+ { "oelig", 339 },
+ { "ograve", 242 },
+ { "omega", 969 },
+ { "omicron", 959 },
+ { "oslash", 248 },
+ { "otilde", 245 },
+ { "otimes", 8855 },
+ { "ouml", 246 },
+ { "phi", 966 },
+ { "pi", 960 },
+ { "psi", 968 },
+ { "rArr", 8658 },
+ { "rarr", 8594 },
+ { "rho", 961 },
+ { "scaron", 353 },
+ { "sigma", 963 },
+ { "tau", 964 },
+ { "theta", 952 },
+ { "thorn", 254 },
+ { "uArr", 8657 },
+ { "uacute", 250 },
+ { "uarr", 8593 },
+ { "ucirc", 251 },
+ { "ugrave", 249 },
+ { "upsilon", 965 },
+ { "uuml", 252 },
+ { "xi", 958 },
+ { "yacute", 253 },
+ { "yuml", 255 },
+ { "zeta", 950 },
+ { "zwj", 8205 },
+ { "zwnj", 8204 },
+// Case Insensitive entries
+ { "alefsym", 8501 },
+ { "amp", 38 },
+ { "and", 8743 },
+ { "ang", 8736 },
+ { "apos", 39 },
+ { "aring", 229 },
+ { "asymp", 8776 },
+ { "bdquo", 8222 },
+ { "brvbar", 166 },
+ { "bull", 8226 },
+ { "cap", 8745 },
+ { "cedil", 184 },
+ { "cent", 162 },
+ { "circ", 710 },
+ { "clubs", 9827 },
+ { "cong", 8773 },
+ { "copy", 169 },
+ { "crarr", 8629 },
+ { "cup", 8746 },
+ { "curren", 164 },
+ { "deg", 176 },
+ { "diams", 9830 },
+ { "divide", 247 },
+ { "empty", 8709 },
+ { "emsp", 8195 },
+ { "ensp", 8194 },
+ { "equiv", 8801 },
+ { "eth", 240 },
+ { "euro", 8364 },
+ { "exist", 8707 },
+ { "fnof", 402 },
+ { "forall", 8704 },
+ { "frac12", 189 },
+ { "frac14", 188 },
+ { "frac34", 190 },
+ { "frasl", 8260 },
+ { "ge", 8805 },
+ { "gt", 62 },
+ { "hearts", 9829 },
+ { "hellip", 8230 },
+ { "iexcl", 161 },
+ { "image", 8465 },
+ { "infin", 8734 },
+ { "int", 8747 },
+ { "iquest", 191 },
+ { "isin", 8712 },
+ { "lang", 9001 },
+ { "laquo", 171 },
+ { "lceil", 8968 },
+ { "ldquo", 8220 },
+ { "le", 8804 },
+ { "lfloor", 8970 },
+ { "lowast", 8727 },
+ { "loz", 9674 },
+ { "lrm", 8206 },
+ { "lsaquo", 8249 },
+ { "lsquo", 8216 },
+ { "lt", 60 },
+ { "macr", 175 },
+ { "mdash", 8212 },
+ { "micro", 181 },
+ { "middot", 183 },
+ { "minus", 8722 },
+ { "nabla", 8711 },
+ { "nbsp", 160 },
+ { "ndash", 8211 },
+ { "ne", 8800 },
+ { "ni", 8715 },
+ { "not", 172 },
+ { "notin", 8713 },
+ { "nsub", 8836 },
+ { "oline", 8254 },
+ { "oplus", 8853 },
+ { "or", 8744 },
+ { "ordf", 170 },
+ { "ordm", 186 },
+ { "para", 182 },
+ { "part", 8706 },
+ { "permil", 8240 },
+ { "perp", 8869 },
+ { "piv", 982 },
+ { "plusmn", 177 },
+ { "pound", 163 },
+ { "prime", 8242 },
+ { "prod", 8719 },
+ { "prop", 8733 },
+ { "quot", 34 },
+ { "radic", 8730 },
+ { "rang", 9002 },
+ { "raquo", 187 },
+ { "rceil", 8969 },
+ { "rdquo", 8221 },
+ { "real", 8476 },
+ { "reg", 174 },
+ { "rfloor", 8971 },
+ { "rlm", 8207 },
+ { "rsaquo", 8250 },
+ { "rsquo", 8217 },
+ { "sbquo", 8218 },
+ { "sdot", 8901 },
+ { "sect", 167 },
+ { "shy", 173 },
+ { "sigmaf", 962 },
+ { "sim", 8764 },
+ { "spades", 9824 },
+ { "sub", 8834 },
+ { "sube", 8838 },
+ { "sum", 8721 },
+ { "sup", 8835 },
+ { "sup1", 185 },
+ { "sup2", 178 },
+ { "sup3", 179 },
+ { "supe", 8839 },
+ { "szlig", 223 },
+ { "there4", 8756 },
+ { "thetasym", 977 },
+ { "thinsp", 8201 },
+ { "tilde", 732 },
+ { "times", 215 },
+ { "trade", 8482 },
+ { "uml", 168 },
+ { "upsih", 978 },
+ { "weierp", 8472 },
+ { "yen", 165 },
+ { "zwj", 8205 },
+ { "zwnj", 8204 },
+ };
+
+
+// -----------------------------------------------------------------------------
+// CXmlEntity::NewL
+//
+// Two-phased constructor.
+// -----------------------------------------------------------------------------
+//
+CXmlEntity* CXmlEntity::NewL()
+ {
+ CXmlEntity* self = new (ELeave) CXmlEntity();
+
+ CleanupStack::PushL(self);
+ self->ConstructL();
+ CleanupStack::Pop();
+
+ return self;
+ }
+
+
+// -----------------------------------------------------------------------------
+// CXmlEntity::CXmlEntity
+//
+// C++ default constructor can NOT contain any code, that
+// might leave.
+// -----------------------------------------------------------------------------
+//
+CXmlEntity::CXmlEntity():
+ iLeakTracker(CLeakTracker::EXmlEntity), iEntityMappings(15)
+ {
+ }
+
+
+// -----------------------------------------------------------------------------
+// CXmlEntity::ConstructL
+//
+// Symbian 2nd phase constructor can leave.
+// -----------------------------------------------------------------------------
+//
+void CXmlEntity::ConstructL()
+ {
+ }
+
+
+// -----------------------------------------------------------------------------
+// CXmlEntity::~CXmlEntity
+//
+// Deconstructor.
+// -----------------------------------------------------------------------------
+//
+CXmlEntity::~CXmlEntity()
+ {
+ // Delete the cached encoding-map.
+ for (TInt i = 0; i < iEntityMappings.Count(); i++)
+ {
+ xmlFree(iEntityMappings[i].orig);
+ }
+
+ iEntityMappings.Close();
+ }
+
+
+// -----------------------------------------------------------------------------
+// CXmlEntity::ResolveL
+//
+// Resolves the named entity into its char-value. Can handle numeric entities.
+// -----------------------------------------------------------------------------
+//
+void CXmlEntity::ResolveL(const TDesC& aName, TUint16& aUcs2Value)
+ {
+ HBufC8* utf8 = NULL;
+ xmlChar* xmlStr = NULL;
+
+ if (aName.Length() < 1)
+ {
+ User::Leave(KErrCorrupt);
+ }
+
+ // Resolve numeric entities...
+ if (ResolveNumericL(aName, aUcs2Value))
+ {
+ return;
+ }
+
+ // Convert the name to utf8 -- its strdup'ed to zero-terminate it.
+ utf8 = EscapeUtils::ConvertFromUnicodeToUtf8L(aName);
+ CleanupStack::PushL(utf8);
+
+ xmlStr = xmlStrndup(utf8->Ptr(), utf8->Size());
+ User::LeaveIfNull(xmlStr);
+ CleanupLibXml2::PushL(xmlStr);
+
+ // Look up the entity in the static entity table.
+ const CXmlEntity::EntityEntry& entity = LookupUsc2Value(xmlStr);
+ aUcs2Value = entity.ucs2Value;
+
+ // Clean up.
+ CleanupStack::PopAndDestroy(/*xmlStr*/);
+ CleanupStack::PopAndDestroy(utf8);
+ }
+
+
+// -----------------------------------------------------------------------------
+// CXmlEntity::ResolveL
+//
+// Resolves the named entity into its EntityEntry. Can NOT handle numeric entities.
+// -----------------------------------------------------------------------------
+//
+const xmlEntity* CXmlEntity::ResolveL(const xmlChar *aName)
+ {
+ TInt index;
+ xmlEntity entity;
+
+ // Init the entity.
+ memset(&entity, 0x00, sizeof(xmlEntity));
+
+ // Check if the entity has already been resolved.
+ entity.name = aName;
+ index = iEntityMappings.FindInOrder(entity, LinearOrder);
+
+ if (index != KErrNotFound)
+ {
+ return &iEntityMappings[index];
+ }
+
+ // Also check if the entity has already been resolved in a case insensitive way.
+ index = iEntityMappings.FindInOrder(entity, LinearCaseOrder);
+
+ if (index != KErrNotFound)
+ {
+ return &iEntityMappings[index];
+ }
+
+ // Otherwise look it up in the static table, create a new entry and return it.
+ TBuf<2> ucs2;
+ HBufC8* utf8 = NULL;
+ xmlChar* utf8Value = NULL;
+
+ // Get the entity as a ucs2 value from the static table.
+ const EntityEntry& entityEntry = LookupUsc2Value(aName);
+
+ // Convert the value to utf8.
+ ucs2.Append(entityEntry.ucs2Value);
+
+ utf8 = EscapeUtils::ConvertFromUnicodeToUtf8L(ucs2);
+ CleanupStack::PushL(utf8);
+
+ utf8Value = xmlStrndup(utf8->Ptr(), utf8->Size());
+ User::LeaveIfNull(utf8Value);
+ CleanupLibXml2::PushL(utf8Value);
+
+ // Populate the entity.
+ entity.type = XML_ENTITY_DECL;
+ entity.name = BAD_CAST(entityEntry.entityName);
+ entity.orig = const_cast<xmlChar*>(utf8Value);
+ entity.content = const_cast<xmlChar*>(utf8Value);
+ entity.length = xmlStrlen(utf8Value);
+ entity.etype = XML_INTERNAL_PREDEFINED_ENTITY;
+
+ // Add the new entry.
+ User::LeaveIfError(iEntityMappings.InsertInOrder(entity, LinearOrder));
+ CleanupStack::Pop(/*utf8Value*/);
+ CleanupStack::PopAndDestroy(utf8);
+
+ // Return the newly added entry.
+ index = iEntityMappings.FindInOrder(entity, LinearOrder);
+
+ if (index == KErrNotFound)
+ {
+ User::Leave(KErrCorrupt);
+ }
+
+ return &iEntityMappings[index];
+ }
+
+
+// -----------------------------------------------------------------------------
+// CXmlEntity::LookupUsc2Value
+//
+// Looks up the named entity in the static table. Can NOT handle numeric entities.
+// -----------------------------------------------------------------------------
+//
+const CXmlEntity::EntityEntry& CXmlEntity::LookupUsc2Value(const xmlChar *aName)
+ {
+ TInt index;
+ const EntityEntry* entry;
+ TInt low;
+ TInt high;
+ TInt res;
+ TInt numEntries;
+
+ numEntries = (sizeof(sEntityMappings) / sizeof(EntityEntry));
+
+ // First do a binary search search in the case sensitive part of the array.
+ low = 0;
+ high = numEntries - KNumCaseInsensitiveEntries - 1;
+ res = 0;
+
+ while (low <= high )
+ {
+ index = (high + low) / 2;
+ entry = &(sEntityMappings[index]);
+
+ // Do a case sensitive string comparison.
+ res = xmlStrcmp(aName, BAD_CAST(entry->entityName));
+
+ if (res > 0)
+ {
+ /* name is ahead of this slot. Increase low bound. */
+ low = index + 1;
+ }
+
+ else if (res < 0)
+ {
+ /* name is behind this slot. Decrease high bound. */
+ high = index - 1;
+ }
+ else
+ {
+ /* Found the entity name. Return its value. */
+ return *entry;
+ }
+ }
+
+ // If no match was found search in the case insensitive part of the table.
+ low = numEntries - KNumCaseInsensitiveEntries;
+ high = numEntries - 1;
+ res = 0;
+
+ while (low <= high )
+ {
+ index = (high + low) / 2;
+ entry = &(sEntityMappings[index]);
+
+ // Do a case insensitive string comparison.
+ res = xmlStrcasecmp(aName, BAD_CAST(entry->entityName));
+
+ if (res > 0)
+ {
+ /* name is ahead of this slot. Increase low bound. */
+ low = index + 1;
+ }
+
+ else if (res < 0)
+ {
+ /* name is behind this slot. Decrease high bound. */
+ high = index - 1;
+ }
+ else
+ {
+ /* Found the entity name. Return its value. */
+ return *entry;
+ }
+ }
+
+ // If no match were found return the space.
+ return sSpaceEntity;
+ }
+
+
+// -----------------------------------------------------------------------------
+// CXmlEntity::ResolveNumeric
+//
+// Resolves the numeric entity into it's value.
+// -----------------------------------------------------------------------------
+//
+TBool CXmlEntity::ResolveNumericL(const TDesC& aName, TUint16& aUcs2Value)
+ {
+ _LIT(KHash, "#");
+ _LIT(KHex, "x");
+ _LIT(KHEX, "X");
+
+ TBool found = EFalse;
+
+ if (aName.Length() < 2)
+ {
+ return EFalse;
+ }
+
+ if (aName.Left(1) == KHash)
+ {
+ TRadix aRadix = EDecimal;
+ TPtrC numeric;
+
+ // Entity of the form, #x123
+ if ((aName.Mid(1, 1) == KHex) || (aName.Mid(1, 1) == KHEX))
+ {
+ numeric.Set(aName.Mid(2, aName.Length() - 2));
+ aRadix = EHex;
+ }
+
+ // Entity of the form, #123
+ else
+ {
+ numeric.Set(aName.Mid(1, aName.Length() - 1));
+ }
+
+ // Convert the text into a ucs2 value.
+ if (numeric.Length() > 0)
+ {
+ TLex temp(numeric);
+
+ temp.Val(aUcs2Value, aRadix);
+ found = ETrue;
+ }
+ }
+
+ return found;
+ }
+
+
+// -----------------------------------------------------------------------------
+// CXmlEntity::LinearOrder
+//
+// Comparison method for iEntityMappings.
+// -----------------------------------------------------------------------------
+//
+TInt CXmlEntity::LinearOrder(const xmlEntity& aFirst, const xmlEntity& aSecond)
+ {
+ return xmlStrcmp(aFirst.name, aSecond.name);
+ }
+
+
+// -----------------------------------------------------------------------------
+// CXmlEntity::LinearCaseOrder
+//
+// Comparison method for iEntityMappings.
+// -----------------------------------------------------------------------------
+//
+TInt CXmlEntity::LinearCaseOrder(const xmlEntity& aFirst, const xmlEntity& aSecond)
+ {
+ return xmlStrcasecmp(aFirst.name, aSecond.name);
+ }
+