xml/xmlexpatparser/src/xmlconstants.h
changeset 0 e35f40988205
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/xml/xmlexpatparser/src/xmlconstants.h	Thu Dec 17 09:29:21 2009 +0200
@@ -0,0 +1,399 @@
+// Copyright (c) 2003-2009 Nokia Corporation and/or its subsidiary(-ies).
+// All rights reserved.
+// This component and the accompanying materials are made available
+// under the terms of "Eclipse Public License v1.0"
+// which accompanies this distribution, and is available
+// at the URL "http://www.eclipse.org/legal/epl-v10.html".
+//
+// Initial Contributors:
+// Nokia Corporation - initial contribution.
+//
+// Contributors:
+//
+// Description:
+//
+
+#ifndef __XMLCONSTANTS_H__
+#define __XMLCONSTANTS_H__
+
+#include <e32base.h>
+
+/**
+This file describes useful XML constants.
+
+The UTF-8 character representation protocol is described here.
+
+num of bytes| Bits used in encoding	| Bit representation
+
+          1 |                     7 | 0vvvvvvv
+
+          2 |                    11 | 110vvvvv 10vvvvvv
+
+          3 |                    16 | 1110vvvv 10vvvvvv 10vvvvvv
+
+          4 |                    21 | 11110vvv 10vvvvvv 10vvvvvv 10vvvvvv
+
+          5 |                    26 | 111110vv 10vvvvvv 10vvvvvv 10vvvvvv 10vvvvvv
+
+          6 |                    31 | 1111110v 10vvvvvv 10vvvvvv 10vvvvvv 10vvvvvv 10vvvvvv
+
+Because each XML entity not accompanied by external encoding information and not in
+UTF-8 or UTF-16, encoding must begin with an XML encoding declaration, in which the
+first characters must be '<?xml', any conforming processor can detect, after two to
+four octets of input, which of the following cases apply. In reading this list, it
+may help to know that in UCS-4, '<' is "0x0000003C" and '?' is "0x0000003F", and the
+Byte Order Mark (BOM) required of UTF-16 data streams is "#xFEFF". 
+
+The notation ## is used to denote any byte value except that two consecutive ##s 
+cannot be both 00.
+
+ASCII characters are represented as ASCII values
+@file
+*/
+
+/**
+Bit mask to capture the utf-8 single byte header character encoding.
+@publishedAll
+*/
+const TUint KUTF8SingleHeaderMask		= 0x80; // 10000000
+
+
+/**
+Bit mask to capture the utf-8 double byte header character encoding.
+@publishedAll
+*/
+const TUint KUTF8DoubleHeaderMask		= 0xE0; // 11100000
+
+/**
+Bit mask to capture the utf-8 triple byte header character encoding.
+@publishedAll
+*/
+const TUint KUTF8TripleHeaderMask		= 0xF0; // 11110000
+
+/**
+Bit mask to capture the utf-8 quadruple byte header character encoding.
+@publishedAll
+*/
+const TUint KUTF8QuadrupleHeaderMask	= 0xF8; // 11111000
+
+/**
+Bit mask to capture the utf-8 quinary byte header character encoding.
+@publishedAll
+*/
+const TUint KUTF8QuinaryHeaderMask		= 0xFC; // 11111100
+
+/**
+Bit mask to capture the utf-8 senary byte header character encoding.
+@publishedAll
+*/
+const TUint KUTF8SenaryHeaderMask		= 0xFE; // 11111110
+
+/**
+The utf-8 bit pattern describing a single byte header character encoding.
+@publishedAll
+*/
+const TUint KUTF8SingleByteHeader		= 0x00; // 00000000
+
+/**
+The utf-8 bit pattern describing a double byte header character encoding.
+@publishedAll
+*/
+const TUint KUTF8DoubleByteHeader		= 0xC0; // 11000000
+
+/**
+The utf-8 bit pattern describing a triple byte header character encoding.
+@publishedAll
+*/
+const TUint KUTF8TripleByteHeader		= 0xE0; // 11100000
+
+/**
+The utf-8 bit pattern describing a quadruple byte header character encoding.
+@publishedAll
+*/
+const TUint KUTF8QuadrupleByteHeader	= 0xF0; // 11110000
+
+/**
+The utf-8 bit pattern describing a quinary byte header character encoding.
+@publishedAll
+*/
+const TUint KUTF8QuinaryByteHeader		= 0xF8; // 11111000
+
+/**
+The utf-8 bit pattern describing a senary byte header character encoding.
+@publishedAll
+*/
+const TUint KUTF8SenaryByteHeader		= 0xFC; // 11111100
+
+/**
+The byte count of a utf-8 single byte character encoding.
+@publishedAll
+*/
+const TInt KUTF8SingleByteCount			= 1; 
+
+/**
+The byte count of a utf-8 double byte character encoding.
+@publishedAll
+*/
+const TInt KUTF8DoubleByteCount			= 2; 
+
+/**
+The byte count of a utf-8 triple byte character encoding.
+@publishedAll
+*/
+const TInt KUTF8TripleByteCount			= 3; 
+
+/**
+The byte count of a utf-8 quadruple byte character encoding.
+@publishedAll
+*/
+const TInt KUTF8QuadrupleByteCount		= 4; 
+
+/**
+The byte count of a utf-8 quinary byte character encoding.
+@publishedAll
+*/
+const TInt KUTF8QuinaryByteCount		= 5; 
+
+/**
+The byte count of a utf-8 senary byte character encoding.
+@publishedAll
+*/
+const TInt KUTF8SenaryByteCount			= 6;
+
+/**
+The byte count required to encode '<?xml'.
+@publishedAll
+*/
+const TInt KEncodingByteCount			= 4;
+
+
+/**
+The encoding text to search for that describes the encoding of an xml document.
+@publishedAll
+*/
+_LIT8(KEncodingTxt, "encoding=\"");
+
+/**
+The end tag (>) symbol used in xml to close the scope of and element.
+@publishedAll
+*/
+const TUint8 KXMLEndTag					= '>';
+
+/**
+The quotation (") symbol used in xml.
+@publishedAll
+*/
+const TUint8 KQuotation					= '\"';
+
+
+enum TParseMode 
+/**
+Lists enumerations used to describe one or more Parse modes. Users can set this information
+via the SetParseMode method on the RXmlParser object.
+@see RXmlParser
+@publishedAll
+*/
+	{
+
+/**
+This enumeration when set specifies the convertion of elements and attributes to lowercase. 
+This can be used for case-insensitive HTML so that a tag can be matched to a static 
+string in the string pool.
+@see RStringPool
+*/
+	EParseModeConvertTagsToLowerCase	= 0x0001,
+
+/**
+This enumeration when set reports an error when unrecognised tags are found.
+*/
+	EParseModeErrorOnUnrecognisedTags	= 0x0002,
+
+/**
+This enumeration when set reports unrecognised tags.
+*/
+	EParseModeReportUnrecognisedTags	= 0x0004,
+
+/**
+This enumeration when set reports the namespace.
+*/
+	EParseModeReportNamespaces			= 0x0008,
+
+/**
+This enumeration when set reports the namespace prefix.
+*/
+	EParseModeReportNamespacePrefixes	= 0x0010,
+
+/**
+This enumeration when set sends all content data for an element in one chunk.
+*/
+	EParseModeSendFullContentInOneChunk	= 0x0020,
+
+/**
+This enumeration when set reports namespace mappings via the OnStartPrefixMapping & 
+OnEndPrefixMapping methods.
+@see MMarkupCallback
+*/
+	EParseModeReportNamespaceMapping	= 0x0040,
+
+/**
+This enumeration when set describes the data in the specified encoding, otherwise
+it is specified in utf-8.
+*/
+	EParseModeRawContent                = 0x0080,
+
+/**
+This enumeration when set states that all string comparisons be non-folded.
+Fold is defined as: The removal of differences between characters that are deemed 
+unimportant for the purposes of inexact or case-insensitive matching. 
+As well as ignoring differences of case, folding ignores any accent on a character.
+*/
+	EParseModeStrict					= 0x0100,
+
+/**
+This enumeration is a mask that covers the total enumerations thus far, and as
+such should be updated to reflect any new enumerations added.
+*/
+	EParseModeAllMask                   = 0x01FF,
+
+	};
+
+
+enum TEncoding
+/**
+Lists enumerations used to describe the encoding of an xml document. 
+The first line of an xml document generally has the encoding described,
+however, the data upto this description is specified in the encoding. The actual 
+description is described in ASCII.
+@publishedAll
+*/
+	{
+
+// With BOM (Byte Order Mark):
+
+/**
+This enumeration represents a BOM subset with the following values 00 00 FE FF.
+Posible encodings include: UCS-4, big-endian machine (1234 order).
+*/
+	EEncodingUCS_4BEBOM = 0,
+
+/**
+This enumeration represents a BOM subset with the following values FF FE 00 00.
+Posible encodings include: UCS-4, little-endian machine (4321 order).
+*/
+	EEncodingUCS_4LEBOM,
+	
+/**
+This enumeration represents a BOM subset with the following values 00 00 FF FE.
+Posible encodings include: UCS-4, unusual octet order (2143).
+*/
+	EEncodingUCS_4UO1BOM,
+
+/**
+This enumeration represents a BOM subset with the following values FE FF 00 00.
+Posible encodings include: UCS-4, unusual octet order (3412).
+*/
+	EEncodingUCS_4UO2BOM,
+
+/**
+This enumeration represents a BOM subset with the following values FE FF ## ##.
+Posible encodings include: UTF-16, big-endian.
+*/
+	EEncodingUTF_16BEBOM,
+
+/**
+This enumeration represents a BOM subset with the following values FF FE ## ##.
+Posible encodings include: UTF-16, little-endian.
+*/
+	EEncodingUTF_16LEBOM,
+
+/**
+This enumeration represents a BOM subset with the following values EF BB BF ##.
+Posible encodings include: UTF-8.
+*/
+	EEncodingUTF_8BOM,
+
+
+// Without a Byte Order Mark:
+
+/**
+This enumeration represents a non BOM subset with the following values 00 00 00 3C.
+Posible encodings include: UCS-4 or other encoding with a 32-bit code unit
+and ASCII characters encoded as ASCII values, in respectively big-endian (1234), 
+little-endian (4321) and two unusual byte orders (2143 and 3412). The encoding 
+declaration must be read to determine which of UCS-4 or other supported 32-bit encodings applies.
+*/
+	EEncodingUCS_4BE,
+
+/**
+This enumeration represents a non BOM subset with the following values 3C 00 00 00.
+Posible encodings include: UCS-4 or other encoding with a 32-bit code unit
+and ASCII characters encoded as ASCII values, in respectively big-endian (1234), 
+little-endian (4321) and two unusual byte orders (2143 and 3412). The encoding 
+declaration must be read to determine which of UCS-4 or other supported 32-bit encodings applies.
+*/
+	EEncodingUCS_4LE,
+
+/**
+This enumeration represents a non BOM subset with the following values 00 00 3C 00.
+Posible encodings include: UCS-4 or other encoding with a 32-bit code unit
+and ASCII characters encoded as ASCII values, in respectively big-endian (1234), 
+little-endian (4321) and two unusual byte orders (2143 and 3412). The encoding 
+declaration must be read to determine which of UCS-4 or other supported 32-bit encodings applies.
+*/
+	EEncodingUCS_4BO1,
+
+/**
+This enumeration represents a non BOM subset with the following values 00 3C 00 00 
+Posible encodings include: UCS-4 or other encoding with a 32-bit code unit
+and ASCII characters encoded as ASCII values, in respectively big-endian (1234), 
+little-endian (4321) and two unusual byte orders (2143 and 3412). The encoding 
+declaration must be read to determine which of UCS-4 or other supported 32-bit encodings applies. 
+*/
+	EEncodingUCS_4BO2,
+
+/**
+This enumeration represents a non BOM subset with the following values 00 3C 00 3F.
+Posible encodings include: UTF-16BE or big-endian ISO-10646-UCS-2 or other encoding
+with a 16-bit code unit in big-endian order and ASCII characters encoded as ASCII 
+values (the encoding declaration must be read to determine which).
+*/
+	EEncodingUTF_16BE,
+	
+/**
+This enumeration represents a non BOM subset with the following values 3C 00 3F 00.
+Posible encodings include: UTF-16LE or little-endian ISO-10646-UCS-2 or other encoding
+with a 16-bit code unit in little-endian order and ASCII characters encoded as ASCII 
+values (the encoding declaration must be read to determine which).
+*/
+	EEncodingUTF_16LE,
+
+/**
+This enumeration represents a non BOM subset with the following values 3C 3F 78 6D.
+Posible encodings include: UTF-8, ISO 646, ASCII, some part of ISO 8859, Shift-JIS, 
+EUC, or any other 7-bit, 8-bit, or mixed-width encoding which ensures that the 
+characters of ASCII have their normal positions, width, and values; the actual encoding
+declaration must be read to detect which of these applies, but since all of these 
+encodings use the same bit patterns for the relevant ASCII characters, the encoding 
+declaration itself may be read reliably.
+*/
+	EEncodingUTF_8,
+
+/**
+This enumeration represents a non BOM subset with the following values 4C 6F A7 94.
+Posible encodings include: EBCDIC (in some flavor; the full encoding declaration must 
+be read to tell which code page is in use).
+*/
+
+	EEncodingEBCDIC,
+
+/**
+This enumeration represents a non BOM subset with a combination of other values.
+Posible encodings include: Other UTF-8 without an encoding declaration, or else the data 
+stream is mislabeled (lacking a required encoding declaration), corrupt, fragmentary, 
+or enclosed in a wrapper of some kind.
+*/
+	EEncodingOTHER,
+
+	};
+
+#endif // __XMLCONSTANTS_H__