xml/cxmllibrary/src/xmlp/src/XMLReader.c
branchRCL_3
changeset 21 604ca70b6235
parent 20 889504eac4fb
--- a/xml/cxmllibrary/src/xmlp/src/XMLReader.c	Tue Aug 31 17:02:56 2010 +0300
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,445 +0,0 @@
-/*
-* Copyright (c) 2000 - 2001 Nokia Corporation and/or its subsidiary(-ies).
-* All rights reserved.
-* This component and the accompanying materials are made available
-* under the terms of the License "Eclipse Public License v1.0"
-* which accompanies this distribution, and is available
-* at the URL "http://www.eclipse.org/legal/epl-v10.html".
-*
-* Initial Contributors:
-* Nokia Corporation - initial contribution.
-*
-* Contributors:
-*
-* Description: 
-*
-*/
-
-
-/*
-This module provides a text (character) stream, pointers into the stream
-and operations on segments of the stream as though they were strings.
-The goal is to isolate the client from stream buffers, cross buffer
-issues and some character set encoding concerns.
-
-This particular version is for input from a single buffer.
-*/
-
-#include "cxml_internal.h"
-#include <xml/cxml/nw_string_char.h>
-#include <xml/cxml/nw_xmlp_xmlreader.h>
-
-static
-NW_Status_t
-NW_XML_Reader_ReadAsciiChar(NW_Uint32 c, NW_Uint32* pReturnChar)
-{
-    /* This looks a bit weird but the idea is to force the conversion
-    of the ASCII character through the same function that is used
-    to read a character from the text.  This will impose the same conversion
-    limitations and the same result encoding. */
-    NW_Int32 byteCount;
-    NW_Uint8 buf[2];
-    NW_Ucs2 c_ucs2;
-    buf[0] = (NW_Uint8)(c & 0xff);
-    buf[1] = 0;
-    /* should only use this function for ASCII */
-    if (c > 127) {
-        return NW_STAT_FAILURE;
-    }
-    /* call it UTF-8 because ASCII doesn't work with NW_String_readChar()
-    at the moment */
-    byteCount = NW_String_readChar((NW_Byte*)buf, &c_ucs2, HTTP_utf_8);
-    if (byteCount != 1) {
-        return NW_STAT_FAILURE;
-    }
-    *pReturnChar = c_ucs2;
-    return NW_STAT_SUCCESS;
-}
-
-/* assumes this is just a handoff of the buffer (i.e., won't make a copy) */
-EXPORT_C NW_Status_t
-NW_XML_Reader_InitFromBuffer(NW_XML_Reader_t* pT, NW_Uint32 length, unsigned char* pBuf)
-{
-    pT->encoding = 0;
-    pT->endianness = NW_NATIVE_ENDIAN;
-    pT->index = 0;
-    pT->charIndex = 0;
-    pT->lineColumn.crCount = 0;
-    pT->lineColumn.lfCount = 0;
-    pT->lineColumn.charsSinceLastCR = 0;
-    pT->lineColumn.charsSinceLastLF = 0;
-    pT->end = 0;
-    pT->length = length;
-    pT->pBuf = pBuf;
-    return NW_STAT_SUCCESS;
-}
-
-EXPORT_C NW_Status_t
-NW_XML_Reader_DataAddressFromBuffer(NW_XML_Reader_t* pT,
-                                 NW_Uint32 start, NW_Uint32* length,
-                                 unsigned char** ppData)
-{
-    NW_ASSERT(start < pT->length);
-    NW_ASSERT(*length <= pT->length);
-    NW_ASSERT((start + *length) <= pT->length);
-    *ppData = NULL;
-    if (start < pT->length) {
-        *ppData = pT->pBuf + start;
-        *length = (((start + *length) <= pT->length) ?
-                   *length : (pT->length - start));
-        return NW_STAT_SUCCESS;
-    }
-    return NW_STAT_FAILURE;
-}
-
-/* peekOrAdvance: first arg "advance": peek = 0, advance = 1 */
-static
-NW_Status_t
-NW_XML_Reader_PeekOrAdvanceOffset(NW_Bool advance, NW_XML_Reader_t* pT,
-                               NW_Uint32 offsetCharCount, NW_Uint32* pC)
-{
-    NW_Ucs2 c_ucs2;
-    NW_Uint32 i;
-    NW_Uint32 charCount = 0;
-    NW_Int32 byteCount = 0;
-    NW_Uint32 crCount = 0;
-    NW_Uint32 lfCount = 0;
-    NW_Uint32 charsPastCR = 0;
-    NW_Uint32 charsPastLF = 0;
-    NW_Bool resetPastCR = 0;
-    NW_Bool resetPastLF = 0;
-
-    NW_ASSERT(!(advance && !offsetCharCount)); 
-
-    if (pT->end) {
-        return NW_STAT_FAILURE;
-    }
-    for (i = pT->index; i < pT->length; i += (NW_Uint32)byteCount) {
-        NW_ASSERT(charCount <= offsetCharCount);
-
-        /* It is assumed that this func returns UNICODE code points. */
-        byteCount = NW_String_readChar((NW_Byte*)&(pT->pBuf[i]),
-                                       &c_ucs2, pT->encoding);
-        *pC = c_ucs2;
-        if (byteCount == -1) {
-            return NW_STAT_FAILURE;
-        }
-        if (charCount == offsetCharCount) {
-            /* This catches NW_String_readChar() reading past buffer end
-            and can be removed when the readChar function does proper
-            error checking. */
-            if ((i + (NW_Uint32)byteCount) > pT->length) {
-                return NW_STAT_FAILURE;
-            }
-            break;
-        }
-        charCount++;
-        charsPastCR++;
-        charsPastLF++;
-        if (c_ucs2 == 0xd /* CR */) {
-            crCount++;
-            resetPastCR = 1;
-            charsPastCR = 0;
-        } else if (c_ucs2 == 0xa /* LF */) {
-            lfCount++;
-            resetPastLF = 1;
-            charsPastLF = 0;
-        }
-    }
-    if (i >= pT->length) {
-        pT->end = 1;
-    }
-    /* This catches NW_String_readChar() reading past buffer end and can be
-    removed when the readChar function does proper error checking. */
-    if (i > pT->length) {
-        return NW_STAT_FAILURE;
-    }
-    if (advance) {
-        pT->index = i;
-        pT->charIndex += charCount;
-        pT->lineColumn.crCount += crCount;
-        pT->lineColumn.lfCount += lfCount;
-        if (resetPastCR) {
-            pT->lineColumn.charsSinceLastCR = charsPastCR;
-        } else {
-            pT->lineColumn.charsSinceLastCR += charsPastCR;
-        }
-        if (resetPastLF) {
-            pT->lineColumn.charsSinceLastLF = charsPastLF;
-        } else {
-            pT->lineColumn.charsSinceLastLF += charsPastLF;
-        }
-    }
-    return NW_STAT_SUCCESS;
-}
-
-EXPORT_C NW_Status_t
-NW_XML_Reader_PeekOffset(NW_XML_Reader_t* pT, NW_Uint32 nChars, NW_Uint32* pC)
-{
-    return NW_XML_Reader_PeekOrAdvanceOffset(0, pT, nChars, pC);
-}
-
-EXPORT_C NW_Status_t
-NW_XML_Reader_AdvanceOffset(NW_XML_Reader_t* pT, NW_Uint32 nChars)
-{
-    NW_Uint32 c;
-    return NW_XML_Reader_PeekOrAdvanceOffset(1, pT, nChars, &c);
-}
-
-EXPORT_C 
-void NW_XML_Reader_GetPosition(NW_XML_Reader_t* pT, NW_Uint32* pByteIndex,
-                            NW_Uint32* pCharIndex,
-                            NW_XML_Reader_LineColumn_t* pLineColumn)
-{
-    *pByteIndex = pT->index;
-    *pCharIndex = pT->charIndex;
-    pLineColumn->crCount = pT->lineColumn.crCount;
-    pLineColumn->lfCount = pT->lineColumn.lfCount;
-    pLineColumn->charsSinceLastCR = pT->lineColumn.charsSinceLastCR;
-    pLineColumn->charsSinceLastLF = pT->lineColumn.charsSinceLastLF;
-}
-
-/* Note: Setting the position (similar to seeking in a file) is in general
-not possible without reading the characters (usually reading forward) because
-character encoding may use a variable numbers of bytes per character. This is
-here so that if you have defined a valid interval, then you can reposition to
-the beginning of the interval. Setting to the position to a bad value will
-not always be caught immediately. Don't forget to also save and set line
-and column with position. */
-EXPORT_C void
-NW_XML_Reader_SetPosition(NW_XML_Reader_t* pT, NW_Uint32 byteIndex,
-                       NW_Uint32 charIndex,
-                       const NW_XML_Reader_LineColumn_t* pLineColumn)
-{
-    pT->index = byteIndex;
-    pT->charIndex = charIndex;
-    pT->lineColumn.crCount = pLineColumn->crCount;
-    pT->lineColumn.lfCount = pLineColumn->lfCount;
-    pT->lineColumn.charsSinceLastCR = pLineColumn->charsSinceLastCR;
-    pT->lineColumn.charsSinceLastLF = pLineColumn->charsSinceLastLF;
-}
-
-/*
-Reader Interval Functions
-*/
-
-EXPORT_C void
-NW_XML_Reader_Interval_Start(NW_XML_Reader_Interval_t* pI, NW_XML_Reader_t* pT)
-{
-    /* set both start and stop for safety in later use */
-    pI->start = pI->stop = pT->index;
-    pI->charStart = pI->charStop = pT->charIndex;
-}
-
-EXPORT_C void
-NW_XML_Reader_Interval_Stop(NW_XML_Reader_Interval_t* pI, NW_XML_Reader_t* pT)
-{
-    pI->stop = pT->index;
-    pI->charStop = pT->charIndex;
-}
-
-/* BEGIN GENERIC Reader CHARACTER AND STRING FUNCTIONS */
-
-/* pMatch is 1 if ASCII character c matches Reader char in its encoding */
-EXPORT_C NW_Status_t
-NW_XML_Reader_AsciiCharMatch(NW_XML_Reader_t* pT, NW_Uint32 asciiC, NW_Uint32* pMatch)
-{
-    NW_Uint32 c_text, c_ascii;
-    NW_Status_t s = NW_XML_Reader_Peek(pT, &c_text);
-    *pMatch = 0;
-    if (NW_STAT_IS_SUCCESS(s)) {
-        s = NW_XML_Reader_ReadAsciiChar(asciiC, &c_ascii);
-        if (NW_STAT_IS_SUCCESS(s)) {
-            *pMatch = (c_text == c_ascii);
-        }
-    }
-    return s;
-}
-
-/* pMatch is 1 if ASCII string matches Reader sequence in its encoding */
-EXPORT_C NW_Status_t
-NW_XML_Reader_AsciiStringMatch(NW_XML_Reader_t* pT, NW_Uint32 length, const NW_Uint8* pString,
-                            NW_Uint32* pMatch)
-{
-    NW_Uint32 c_text, c_ascii;
-    NW_Uint32 i;
-    NW_Status_t s = NW_STAT_SUCCESS;
-    *pMatch = 0;
-    NW_ASSERT(length);
-    for (i = 0; i < length; i++) {
-        s = NW_XML_Reader_PeekOffset(pT, i, &c_text);
-        if (NW_STAT_IS_FAILURE(s)) {
-            break;
-        }
-        s = NW_XML_Reader_ReadAsciiChar(pString[i], &c_ascii);
-        if (NW_STAT_IS_FAILURE(s)) {
-            break;
-        }
-        if (c_text != c_ascii) {
-            break;
-        }
-    }
-    if (i == length) {
-        *pMatch = 1;
-    }
-    return s;
-}
-
-/* Note: For XML, whitespace is only ASCII 0x20 (space),
-0x09 (tab), 0x0d (CR), 0x0a (LF).  The base test used here,
-CXML_Str_Isspace(), includes two other forms of whitespace. */
-EXPORT_C NW_Status_t
-NW_XML_Reader_SkipSpace(NW_XML_Reader_t* pT)
-{
-    NW_Uint32 c;
-    NW_Status_t s = NW_STAT_SUCCESS;
-    for (;;) {
-        s = NW_XML_Reader_Peek(pT, &c);
-        if (NW_STAT_IS_FAILURE(s)) {
-            break;
-        }
-        if (c > 0xffff) { /* validate casting */
-            break;
-        }
-        if (!CXML_Str_Isspace((NW_Ucs2)(c & 0xffff))) {
-            break;
-        }
-        s = NW_XML_Reader_Advance(pT);
-        if (NW_STAT_IS_FAILURE(s)) {
-            break;
-        }
-        if (pT->end){
-         /* At the end so break */
-            break;
-          }
-    }
-    return s;
-}
-
-/* Note: For XML, whitespace is only ASCII 0x20 (space),
-0x09 (tab), 0x0d (CR), 0x0a (LF).  The base test used here,
-CXML_Str_Isspace(), includes two other forms of whitespace. */
-EXPORT_C NW_Status_t
-NW_XML_Reader_IsSpace(NW_XML_Reader_t* pT, NW_Uint32* pMatch)
-{
-    NW_Uint32 c;
-    NW_Status_t s;
-
-    *pMatch = 0;
-    s  = NW_XML_Reader_Peek(pT, &c);
-    if (NW_STAT_IS_FAILURE(s)) {
-        return s;
-    }
-    if (c > 0xffff) { /* validate casting */
-        return NW_STAT_FAILURE;
-    }
-    if (CXML_Str_Isspace((NW_Ucs2)(c & 0xffff))) {
-        *pMatch = 1;
-    }
-    return NW_STAT_SUCCESS;
-}
-
-EXPORT_C NW_Status_t
-NW_XML_Reader_IsLetter(NW_XML_Reader_t* pT, NW_Uint32* pMatch)
-{
-    NW_Uint32 c;
-    NW_Status_t s;
-
-    *pMatch = 0;
-    s = NW_XML_Reader_Peek(pT, &c);
-    if (NW_STAT_IS_FAILURE(s)) {
-        return s;
-    }
-    /* This is an approximation to what XML charaters are "letter".
-    Everything above the 8-bit range is considered to be a "letter".*/
-    if (c >= 0x41 && c <= 0x5a) {
-        *pMatch = 1;
-    }
-    else if (c >= 0x61 && c <= 0x7a) {
-        *pMatch = 1;
-    }
-    else if (c >= 0xc0 && c <= 0xd6) {
-        *pMatch = 1;
-    }
-    else if (c >= 0xd8 && c <= 0xf6) {
-        *pMatch = 1;
-    }
-    else if (c >= 0xf8) {/* letters become anything above 0xf8 */
-        *pMatch = 1;
-    }
-    return NW_STAT_SUCCESS;
-}
-
-/* Note: For XML, digits include not only the ASCII digits but
-other language forms of digits.  The base test used here,
-CXML_Str_Isdigit() only tests for ASCII digits. */
-EXPORT_C NW_Status_t
-NW_XML_Reader_IsDigit(NW_XML_Reader_t* pT, NW_Uint32* pMatch)
-{
-    NW_Uint32 c;
-    NW_Status_t s;
-
-    *pMatch = 0;
-    s = NW_XML_Reader_Peek(pT, &c);
-    if (NW_STAT_IS_FAILURE(s)) {
-        return s;
-    }
-    if (c > 0xffff) {/* validate casting */
-        return NW_STAT_SUCCESS;
-    }
-    if (CXML_Str_Isdigit((NW_Ucs2)(c & 0xffff))) {
-        *pMatch = 1;
-    }
-    return NW_STAT_SUCCESS;
-}
-
-/* Returns an estimate of the current line and column position in the text.
-It is an estimate because it has to guess at what the intended line ending
-sequence is using a count of CR and LF characters.  Line and Column indices
-are 1-based not 0-based. */
-EXPORT_C void
-NW_XML_Reader_GetLineColumn(NW_XML_Reader_t* pT, NW_Uint32* pLine,
-                         NW_Uint32* pColumn)
-{
-    NW_Uint32 crCount, lfCount, charsSinceCR, charsSinceLF;
-    crCount = pT->lineColumn.crCount;
-    lfCount = pT->lineColumn.lfCount;
-    charsSinceCR = pT->lineColumn.charsSinceLastCR;
-    charsSinceLF = pT->lineColumn.charsSinceLastLF;
-    if (crCount == lfCount) {
-        /* assume CR, LF, DOS style */
-        /* use a bias in favor of CR followed by LF
-        which will give the correct column for DOS */
-        *pLine = lfCount + 1;
-        *pColumn = charsSinceLF + 1;
-    } else if (lfCount == 0) {
-        /* assume CR only, Unix style */
-        *pLine = crCount + 1;
-        *pColumn = charsSinceCR + 1;
-    } else if (crCount == 0) {
-        /* assume LF only, Mac style */
-        *pLine = lfCount + 1;
-        *pColumn = charsSinceLF + 1;
-    } else {
-        /* an unclear situation so use
-        thresholds on the ratio to guess */
-        NW_Uint32 ratio;
-        ratio = ((crCount * 100) / lfCount);
-        if (ratio > 300) {/* more than 3 to 1 crCount to lfCount */
-            /* assume CR only, Unix style */
-            *pLine = crCount + 1;
-            *pColumn = charsSinceCR + 1;
-        } else if (ratio < 33) {/* less than 1 to 3 crCount to lfCount */
-            /* assume LF only, Mac style */
-            *pLine = lfCount + 1;
-            *pColumn = charsSinceLF + 1;
-        } else {
-            /* assume CR, LF, DOS style */
-            /* use a bias in favor of CR, LF sequence (DOS style)
-            which will give the correct column */
-            *pLine = lfCount + 1;
-            *pColumn = charsSinceLF + 1;
-        }
-    }
-}
-