diff -r 889504eac4fb -r 604ca70b6235 xml/cxmllibrary/src/xmlp/src/XMLParser.cpp --- a/xml/cxmllibrary/src/xmlp/src/XMLParser.cpp Tue Aug 31 17:02:56 2010 +0300 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,2215 +0,0 @@ -/* -* Copyright (c) 2000 - 2001 Nokia Corporation and/or its subsidiary(-ies). -* All rights reserved. -* This component and the accompanying materials are made available -* under the terms of the License "Eclipse Public License v1.0" -* which accompanies this distribution, and is available -* at the URL "http://www.eclipse.org/legal/epl-v10.html". -* -* Initial Contributors: -* Nokia Corporation - initial contribution. -* -* Contributors: -* -* Description: -* -*/ - -#include "cxml_internal.h" -#include -#include -#include - - -#include "cxml_xmlp_entity.h" -#include -#include "cxml_xmlp_int_entity.h" - - - -/* "" len 2 */ -#define NW_XML_String_PiFormStopLength 2 -static -const NW_Uint8 NW_XML_String_PiFormStop[NW_XML_String_PiFormStopLength] = -{ - '?', '>' -}; - -/* "version" len 7 */ -#define NW_XML_String_VersionLength 7 -static -const NW_Uint8 NW_XML_String_Version[NW_XML_String_VersionLength] = -{ - 'v', 'e', 'r', 's', 'i', 'o', 'n' -}; - -/* "encoding" len 8 */ -#define NW_XML_String_EncodingLength 8 -static -const NW_Uint8 NW_XML_String_Encoding[NW_XML_String_EncodingLength] = -{ - 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g' -}; - -/* "standalone" len 10 */ -#define NW_XML_String_StandaloneLength 10 -static -const NW_Uint8 NW_XML_String_Standalone[NW_XML_String_StandaloneLength] = -{ - 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e' -}; - -/* "" len 3 comment end */ -#define NW_XML_String_CommentStopLength 3 -static -const NW_Uint8 NW_XML_String_CommentStop[NW_XML_String_CommentStopLength] = -{ - '-', '-', '>' -}; - -/* "/>" len 2 */ -#define NW_XML_String_EmptyTagEndLength 2 -static -const NW_Uint8 NW_XML_String_EmptyTagEnd[NW_XML_String_EmptyTagEndLength] = -{ - '/', '>' -}; - -/* "" len 3 */ -#define NW_XML_String_CdataEndLength 3 -static -const NW_Uint8 NW_XML_String_CdataEnd[NW_XML_String_CdataEndLength] = -{ - ']', ']', '>' -}; - -/* All case variations of "xml" */ -#define NW_XML_String_XmlNameVariationCount 8 -#define NW_XML_String_XmlLength 3 -static -const NW_Uint8 NW_XML_String_XmlVariations[(NW_XML_String_XmlNameVariationCount - * NW_XML_String_XmlLength)] = -{ - 'x', 'm', 'l', /* all lower case form must be first */ - 'x', 'm', 'L', - 'x', 'M', 'l', - 'x', 'M', 'L', - 'X', 'm', 'l', - 'X', 'm', 'L', - 'X', 'M', 'l', - 'X', 'M', 'L' -}; - - -/* Assumes position in Reader is at the first character of keyword. -returns: *pMatch = 1 if found keyword and advanced over it, 0 if no match -NOTE: Keyword match just means the string of keyword chars -exists at the read position so it does not mean that the keyword -is delimited at the end---it might be followed by more name chars. */ -static -NW_Status_t -NW_XML_Parse_KeywordConsume(NW_XML_Reader_t* pT, NW_XML_Reader_Interval_t* pI, - NW_Uint32 l, const NW_Uint8* pKeyword, - NW_Uint32* pMatch) -{ - NW_Status_t s; - NW_XML_Reader_Interval_Init(pI); - s = NW_XML_Reader_AsciiStringMatch(pT, l, pKeyword, pMatch); - if (NW_STAT_IS_SUCCESS(s) && *pMatch) { - NW_XML_Reader_Interval_Start(pI, pT); - NW_XML_Reader_AdvanceOffset(pT, l); - NW_XML_Reader_Interval_Stop(pI, pT); - } - return s; -} - -/* -Parses an XML Name (productions 5, 4) in Reader. -If no parse error, then *pI marks the Name. -Assumes position in Reader is at the first character of name. -returns: *pMatch = 1 if found name and advanced over it, 0 if no match -*/ -static -NW_Status_t -NW_XML_Parse_NameConsume(NW_XML_Reader_t* pT, NW_XML_Reader_Interval_t* pI, - NW_Uint32* pMatch) -{ - /* - [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' - | CombiningChar | Extender - - Note: combining and extender ignored here. - - [5] Name ::= (Letter | '_' | ':') (NameChar)* - */ - NW_Status_t sl; - NW_Uint32 isLetter; - NW_Status_t su; - NW_Uint32 isUnderscore; - NW_Status_t sc; - NW_Uint32 isColon; - NW_Status_t sd = NW_STAT_SUCCESS; - NW_Uint32 isDigit = 0; - NW_Status_t sp = NW_STAT_SUCCESS; - NW_Uint32 isPeriod = 0; - NW_Status_t sh = NW_STAT_SUCCESS; - NW_Uint32 isHyphen = 0; - NW_Status_t s = NW_STAT_SUCCESS; - - sl = NW_XML_Reader_IsLetter(pT, &isLetter); - su = NW_XML_Reader_AsciiCharMatch(pT, '_', &isUnderscore); - sc = NW_XML_Reader_AsciiCharMatch(pT, ':', &isColon); - *pMatch = 0; - NW_XML_Reader_Interval_Init(pI); - if (NW_STAT_IS_SUCCESS(sl) && NW_STAT_IS_SUCCESS(su) - && NW_STAT_IS_SUCCESS(sc)) { - if (isLetter | isUnderscore | isColon) { - NW_XML_Reader_Interval_Start(pI, pT); - while ((NW_STAT_IS_SUCCESS(sl) && NW_STAT_IS_SUCCESS(su) - && NW_STAT_IS_SUCCESS(sc) && NW_STAT_IS_SUCCESS(sd) - && NW_STAT_IS_SUCCESS(sp) && NW_STAT_IS_SUCCESS(sh) - && NW_STAT_IS_SUCCESS(s)) - && (isLetter | isDigit | isPeriod | isHyphen | isUnderscore - | isColon )) { - s = NW_XML_Reader_Advance(pT); - sl = NW_XML_Reader_IsLetter(pT, &isLetter); - sd = NW_XML_Reader_IsDigit(pT, &isDigit); - sp = NW_XML_Reader_AsciiCharMatch(pT, '.', &isPeriod); - sh = NW_XML_Reader_AsciiCharMatch(pT, '-', &isHyphen); - su = NW_XML_Reader_AsciiCharMatch(pT, '_', &isUnderscore); - sc = NW_XML_Reader_AsciiCharMatch(pT, ':', &isColon); - } - NW_XML_Reader_Interval_Stop(pI, pT); - *pMatch = 1; - } - } - if (NW_STAT_IS_SUCCESS(sl) && NW_STAT_IS_SUCCESS(su) - && NW_STAT_IS_SUCCESS(sc) && NW_STAT_IS_SUCCESS(sd) - && NW_STAT_IS_SUCCESS(sp) && NW_STAT_IS_SUCCESS(sh) - && NW_STAT_IS_SUCCESS(s)) { - return NW_STAT_SUCCESS; - } - return NW_STAT_FAILURE; -} - -/* This function reads data from the pT->pBuf and converts this data to - * the NW_String_t. The string memory is allocated here but it is freed - * in the calling function. - * - * pT --> Parser Structute (IN) - * I_attrVal --> Attribute value Interval parameter (IN) - * *dataStr --> Output string (OUT) - * - * - */ - -static NW_Status_t NW_XML_Data_to_String(NW_XML_Reader_t* pT, NW_XML_Reader_Interval_t* I_attrVal, - NW_String_t* dataStr) -{ - NW_Uint8* pData; - NW_Uint32 numbytes = 0; - NW_Uint32 totalByteCount = 0; - CXML_Uint8* tempAttrValBuf; - CXML_Uint32 tempBufLen = 0; - NW_Uint32 length; - NW_Uint32 i =0; - NW_Ucs2 c; - NW_Status_t s; - - length = I_attrVal->stop - I_attrVal->start; - tempBufLen = length; //Desired bytes need to read. - - s = NW_XML_Reader_DataAddressFromBuffer(pT,I_attrVal->start, - &tempBufLen, - &tempAttrValBuf); - - if (NW_STAT_IS_FAILURE(s)) - { - return s; - } - - if (tempBufLen != length) - { - return NW_STAT_FAILURE; - } - - - - /* pData is not NULL terminated so need to use following method. The num byte - * will be used for both pName and pVlaue. - */ - - - numbytes = NW_String_readChar( (NW_Byte*) tempAttrValBuf,&c,pT->encoding); - - /* Calculate the length of string. Also add the number of characters - * required for the NULL termination. - */ - - totalByteCount = length + numbytes; - - pData = (NW_Uint8*) NW_Mem_Malloc(totalByteCount); - - if (pData != NULL) - { - (void)NW_Mem_memcpy(pData , tempAttrValBuf, length ); - - - for(i=0; i < numbytes; i++) - { - pData[length+i] = '\0'; - } - } /*end if (pName != NULL)*/ - else - { - return NW_STAT_OUT_OF_MEMORY; - } - - s = NW_String_initialize( dataStr , pData, pT->encoding); - - if (NW_STAT_IS_FAILURE(s)) - { - return s; - } - - /* This will the storage of the dataStr by the NW_String_delete() */ - - dataStr->length |= 0x80000000; - - return NW_STAT_SUCCESS; - -}/*end NW_XML_Data_to_String()*/ - -/* Assumes position in Reader is at the opening quote character for value. -* BUG not yet spec compliant. -* The following function is called for Process Instruction and Element -* attribute consume. The predefined entities will be handled in -* element attributes only. So, the "entity" parameter is used to -* distinguish between these two cases. -* Careful about the "entity" parameter passed. This is used both as -* IN/OUT paramter. - -* IN --> if(entity == CXML_TRUE): Then parse attribute value for the -* entities. - -* OUT --> (entity=CXML_TRUE): The entity is found in the attribute value. - -*/ - -static -NW_Status_t -NW_XML_Parse_ValueConsume(NW_XML_Reader_t* pT, NW_XML_Reader_Interval_t* pI, - CXML_Bool* entityCheck, NW_String_t* attrValStr, - const RPointerArray * internalEntityList) -{ - /* - Literal data is any quoted string not containing the quotation mark - used as a delimiter for that string. - - [10] AttValue ::= '"' ([^<&"] | Reference)* '"' - | "'" ([^<&'] | Reference)* "'" - */ - - /* BUG this only pays attention to the quote chars not the value - so it ignores [<&]. */ - NW_Status_t s; - NW_Status_t ssq; - NW_Uint32 isOpenSingleQuote; - NW_Uint32 isCloseSingleQuote = 0; - NW_Status_t sdq; - NW_Uint32 isOpenDoubleQuote; - NW_Uint32 isCloseDoubleQuote = 0; - NW_Status_t slt; - NW_Uint32 isLessThan; - - NW_Uint32 prevIndex; - NW_Uint32 prevCharIndex; - NW_XML_Reader_LineColumn_t prevLineColumn; - CXML_Uint8* intEntityValStr = NULL; - CXML_Uint32 tempBufLen = 0; - NW_String_t* tempStr = NULL; - NW_String_t entityValStr; - CXML_Bool entityFoundLevel_2 = NW_FALSE; - CXML_Bool entityFoundOnce = NW_FALSE; - NW_String_initialize (&entityValStr, NULL, 0); - - - - - NW_XML_Reader_Interval_Init(pI); - ssq = NW_XML_Reader_AsciiCharMatch(pT, '\'', &isOpenSingleQuote); - sdq = NW_XML_Reader_AsciiCharMatch(pT, '\"', &isOpenDoubleQuote); - if (NW_STAT_IS_FAILURE(sdq) || NW_STAT_IS_FAILURE(ssq) - || (!isOpenSingleQuote && !isOpenDoubleQuote)) { - return NW_STAT_FAILURE; - } - /* xor, mutually exclusive */ - NW_ASSERT(isOpenSingleQuote ^ isOpenDoubleQuote); - s = NW_XML_Reader_Advance(pT); - if (NW_STAT_IS_FAILURE(s)) { - return NW_STAT_FAILURE; - } - NW_XML_Reader_Interval_Start(pI, pT); - for (;;) { - - /* Check for the closing quotes. If this is empty attribute value - * then no need for the checking the entities. - */ - - if (isOpenSingleQuote) { - ssq = NW_XML_Reader_AsciiCharMatch(pT, '\'', &isCloseSingleQuote); - } - if (isOpenDoubleQuote) { - sdq = NW_XML_Reader_AsciiCharMatch(pT, '\"', &isCloseDoubleQuote); - } - slt = NW_XML_Reader_AsciiCharMatch(pT, '<', &isLessThan); - if (NW_STAT_IS_FAILURE(ssq) || NW_STAT_IS_FAILURE(sdq) - || NW_STAT_IS_FAILURE(slt) || isLessThan) { - return NW_STAT_FAILURE; - } - - - if( (*entityCheck == CXML_TRUE) && !isCloseSingleQuote && !isCloseDoubleQuote) - { - NW_Uint32 match; - - s = NW_XML_Reader_AsciiCharMatch(pT, '&', &match); - - if (NW_STAT_IS_FAILURE(s)) - { - return NW_STAT_FAILURE; - } - if (match) - { - NW_XML_Reader_Interval_t I_entityData; - NW_Bool entityFoundLevel_1 = NW_FALSE; //If end of entity (;) found - NW_Uint32 entityVal = 0; - NW_XML_Reader_Interval_t* I_attrVal = pI; - - - - entityFoundLevel_2 = NW_FALSE; - NW_XML_Reader_Interval_Stop(I_attrVal, pT); //Contents before entity - - //Will back if not a valid entity - NW_XML_Reader_GetPosition(pT, - &prevIndex, &prevCharIndex, &prevLineColumn); - - s = CXML_XML_Parser_Entity(pT,&I_entityData,&entityFoundLevel_1); - - if (NW_STAT_IS_FAILURE(s)) - { - return NW_STAT_FAILURE; - } - /* - if(entityFoundLevel_1 == NW_FALSE) - { - return NW_STAT_XHTML_BAD_CONTENT; - } - */ - if (entityFoundLevel_1) - { - // Validate the entity - // The following function checks for the character, - // predefined and Internal Entities. - - if( (I_entityData.stop - I_entityData.start) > 0) - { - s = CXML_XML_Handle_entity(pT, - &I_entityData,&entityVal,&intEntityValStr, - &entityFoundLevel_2, - (void*) internalEntityList); - } - else - { - //Not a valid entity e.g. "&&;" test case - entityFoundLevel_2 = NW_FALSE; - } - - if (NW_STAT_IS_FAILURE(s)) - { - return NW_STAT_FAILURE; - } - - if(entityFoundLevel_2 == CXML_TRUE) - { - /* Read contents before entity*/ - - tempBufLen = I_attrVal->stop - I_attrVal->start; - - if(tempBufLen > 0) - { - - if(tempStr == NULL) - { - tempStr = NW_String_new(); - if(tempStr == NULL) - { - return NW_STAT_OUT_OF_MEMORY; - } - - } /*end if(tempStr == NULL)*/ - - s = NW_XML_Data_to_String (pT,I_attrVal,tempStr); - - if (NW_STAT_IS_FAILURE(s)) - { - return s; - } - - s = NW_String_concatenate(attrValStr,tempStr,pT->encoding); - - - if (NW_STAT_IS_FAILURE(s)) - { - return s; - } - - // Do some clean up - - if(tempStr != NULL) - { - NW_String_delete(tempStr); - tempStr = NULL; - } - - }//end if(tempBufLen > 0) - - // Write the entity content now. There are two possibilities for entities. - // - // 1) If it is character or decimal or predefined entities. - // In this case, intEntityValStr = NULL. - // - // 2) If it is "Internal Entity" then In this case, entityVal = 0; - - if (intEntityValStr == NULL) - { - - /* convert contents of the character/predfined entity to string */ - - s = NW_String_entityToString(entityVal,&entityValStr,pT->encoding); - - if (NW_STAT_IS_FAILURE(s)) - { - return s; - } - }/*end if (intEntityValStr == NULL)*/ - else if(entityVal == 0) - { - /* This is a internal entity string */ - - s = NW_String_initialize(&entityValStr,intEntityValStr,pT->encoding); - if (NW_STAT_IS_FAILURE(s)) - { - return s; - } - } /*end else if(entityVal == 0)*/ - - /* Add this entity value to the string */ - - s = NW_String_concatenate(attrValStr,&entityValStr,pT->encoding); - - if (NW_STAT_IS_FAILURE(s)) - { - return s; - } - - - - if(entityValStr.storage != NULL) - { - NW_Mem_Free (entityValStr.storage); - } - // Initialize the entity string for next entity - - NW_String_initialize (&entityValStr, NULL, 0); - - //Again start the top level interval - - NW_XML_Reader_Interval_Init(pI); - NW_XML_Reader_Interval_Start(pI, pT); - - /*Check for the closing quotes after entity parsing */ - - if (isOpenSingleQuote) { - ssq = NW_XML_Reader_AsciiCharMatch(pT, '\'', &isCloseSingleQuote); - } - if (isOpenDoubleQuote) { - sdq = NW_XML_Reader_AsciiCharMatch(pT, '\"', &isCloseDoubleQuote); - } - slt = NW_XML_Reader_AsciiCharMatch(pT, '<', &isLessThan); - if (NW_STAT_IS_FAILURE(ssq) || NW_STAT_IS_FAILURE(sdq) - || NW_STAT_IS_FAILURE(slt) || isLessThan) { - return NW_STAT_FAILURE; - } - entityFoundOnce = CXML_TRUE; - }//endif(entityFoundLevel_2 == CXML_TRUE) - else - { - /* If it is here, the entity is not well formed or a entity is - * not supported. But, it is error for now. - */ - /* - return NW_STAT_XHTML_BAD_CONTENT;*/ - - //No valid entity found. Parse as the normal string - NW_XML_Reader_SetPosition(pT, - prevIndex, - prevCharIndex, - &prevLineColumn); - } - - } //end if (entityFound && inContent) - else - { - //No valid entity found. Parse as the normal string - NW_XML_Reader_SetPosition(pT, - prevIndex, - prevCharIndex, - &prevLineColumn); - }//end else - } //end match - } //end if( !isCloseSingleQuote && !isCloseDoubleQuote) - - - - if ((isOpenSingleQuote & isCloseSingleQuote) - | (isOpenDoubleQuote & isCloseDoubleQuote)) { - break; - } - - if(entityFoundLevel_2 != CXML_TRUE) - { - s = NW_XML_Reader_Advance(pT); - } - else - { - entityFoundLevel_2 = CXML_FALSE; - } - }//end for (;;) - NW_XML_Reader_Interval_Stop(pI, pT); - s = NW_XML_Reader_Advance(pT); - - - if( (*entityCheck == CXML_TRUE) && - ( (entityFoundLevel_2 == CXML_TRUE) || (entityFoundOnce == CXML_TRUE) ) ) - { - /* Get rest of the attribute value contents */ - - - tempBufLen = pI->stop - pI->start; - - if(tempBufLen > 0) - { - if(tempStr == NULL) - { - tempStr = NW_String_new(); - if(tempStr == NULL) - { - return NW_STAT_OUT_OF_MEMORY; - } - - } /*end if(tempStr == NULL)*/ - s = NW_XML_Data_to_String (pT,pI,tempStr); - - if (NW_STAT_IS_FAILURE(s)) - { - return s; - } - - - s = NW_String_concatenate(attrValStr,tempStr,pT->encoding); - - if (NW_STAT_IS_FAILURE(s)) - { - return s; - } - - // Do some clean up - - if(tempStr != NULL) - { - NW_String_delete(tempStr); - tempStr = NULL; - } - - }//endif(tempBufLen > 0) - - }//end if( (*entityCheck == CXML_TRUE) && (entityFoundLevel_2 == CXML_TRUE) ) - else - { - *entityCheck = CXML_FALSE; - } - - return s; -} - -/* -Parses an XML attribute (production ???) in Reader. -If no parse error, then ti_name and ti_attvalue mark the two items. -Allows for leading whitespace. If l is > 0 then p is a "string" of -length l that is the name that must match (parse error if doesn't). -Assumes position in Reader is at whitespace before or first character -of attribute. -return: *pMatch = 1 if keyword or attribute name found - - Careful about the "entity" parameter passed. This is used both as - IN/OUT paramter. - - IN --> if(entity == CXML_TRUE): Then parse attribute value for the - entities. - - OUT --> (entity=CXML_TRUE): The entity is found in the attribute value. -*/ - - -static -NW_Status_t -NW_XML_Parse_AttributeValueConsume(NW_XML_Reader_t* pT, - NW_XML_Reader_Interval_t* pI_name, - NW_XML_Reader_Interval_t* pI_attvalue, - NW_Uint32 l, const NW_Uint8* pKeyword, - NW_Uint32* pMatch, - CXML_Bool* entity, NW_String_t* attrValStr, - const RPointerArray * internalEntityList) -{ - /* - [41] Attribute ::= Name Eq AttValue - where Name is possibly a keyword that must match - */ - NW_Status_t s; - NW_Uint32 match; - NW_Uint32 prevIndex0; - NW_Uint32 prevIndex1; - NW_Uint32 prevCharIndex0; - NW_Uint32 prevCharIndex1; - NW_XML_Reader_LineColumn_t prevLineColumn0; - NW_XML_Reader_LineColumn_t prevLineColumn1; - - *pMatch = 0; - - /* S mandatory */ - NW_XML_Reader_GetPosition(pT, &prevIndex0, &prevCharIndex0, &prevLineColumn0); - s = NW_XML_Reader_SkipSpace(pT); - if (NW_STAT_IS_FAILURE(s)) { - return s; - } - NW_XML_Reader_GetPosition(pT, &prevIndex1, &prevCharIndex1, &prevLineColumn1); - if (prevIndex0 == prevIndex1) { - return NW_STAT_FAILURE; - } - /* Name */ - if ((l != 0U) && (pKeyword != NULL)) { - s = NW_XML_Parse_KeywordConsume(pT, pI_name, l, pKeyword, pMatch); - if (NW_STAT_IS_SUCCESS(s) && !*pMatch) { - /* backup so mandatory space not consumed */ - NW_XML_Reader_SetPosition(pT, prevIndex0, prevCharIndex0, &prevLineColumn0); - } - } else { - s = NW_XML_Parse_NameConsume(pT, pI_name, pMatch); - } - if (NW_STAT_IS_FAILURE(s) || !*pMatch) { - return NW_STAT_FAILURE; - } - /* S */ - s = NW_XML_Reader_SkipSpace(pT); - if (NW_STAT_IS_FAILURE(s)) { - return s; - } - /* = */ - s = NW_XML_Reader_AsciiCharMatch(pT, '=', &match); - if (NW_STAT_IS_FAILURE(s) || !match) { - return NW_STAT_FAILURE; - } - s = NW_XML_Reader_Advance(pT); - if (NW_STAT_IS_FAILURE(s)) { - return NW_STAT_FAILURE; - } - /* S */ - s = NW_XML_Reader_SkipSpace(pT); - if (NW_STAT_IS_FAILURE(s)) { - return s; - } - /* attValue */ - - if(*entity == CXML_FALSE) - { - return NW_XML_Parse_ValueConsume(pT, pI_attvalue,entity,NULL, internalEntityList); - } - else - { - return NW_XML_Parse_ValueConsume(pT, pI_attvalue,entity,attrValStr, internalEntityList); - } -} - -/* -Parses an XML Comment (production 15) in Reader. -If no parse error, then ti marks the Comment--all chars -between the start and end marks including spaces. -Assumes position in Reader is the character after "' - - Note: the pattern with (Char - '-') means that the comment cannot - end with '--->' although it can begin with '