xml/cxmllibrary/src/xmlp/src/XMLReader.c
author hgs
Wed, 23 Jun 2010 20:27:15 +0530
changeset 24 74f0b3eb154c
permissions -rw-r--r--
201024
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
24
hgs
parents:
diff changeset
     1
/*
hgs
parents:
diff changeset
     2
* Copyright (c) 2000 - 2001 Nokia Corporation and/or its subsidiary(-ies).
hgs
parents:
diff changeset
     3
* All rights reserved.
hgs
parents:
diff changeset
     4
* This component and the accompanying materials are made available
hgs
parents:
diff changeset
     5
* under the terms of the License "Eclipse Public License v1.0"
hgs
parents:
diff changeset
     6
* which accompanies this distribution, and is available
hgs
parents:
diff changeset
     7
* at the URL "http://www.eclipse.org/legal/epl-v10.html".
hgs
parents:
diff changeset
     8
*
hgs
parents:
diff changeset
     9
* Initial Contributors:
hgs
parents:
diff changeset
    10
* Nokia Corporation - initial contribution.
hgs
parents:
diff changeset
    11
*
hgs
parents:
diff changeset
    12
* Contributors:
hgs
parents:
diff changeset
    13
*
hgs
parents:
diff changeset
    14
* Description: 
hgs
parents:
diff changeset
    15
*
hgs
parents:
diff changeset
    16
*/
hgs
parents:
diff changeset
    17
hgs
parents:
diff changeset
    18
hgs
parents:
diff changeset
    19
/*
hgs
parents:
diff changeset
    20
This module provides a text (character) stream, pointers into the stream
hgs
parents:
diff changeset
    21
and operations on segments of the stream as though they were strings.
hgs
parents:
diff changeset
    22
The goal is to isolate the client from stream buffers, cross buffer
hgs
parents:
diff changeset
    23
issues and some character set encoding concerns.
hgs
parents:
diff changeset
    24
hgs
parents:
diff changeset
    25
This particular version is for input from a single buffer.
hgs
parents:
diff changeset
    26
*/
hgs
parents:
diff changeset
    27
hgs
parents:
diff changeset
    28
#include "cxml_internal.h"
hgs
parents:
diff changeset
    29
#include <xml/cxml/nw_string_char.h>
hgs
parents:
diff changeset
    30
#include <xml/cxml/nw_xmlp_xmlreader.h>
hgs
parents:
diff changeset
    31
hgs
parents:
diff changeset
    32
static
hgs
parents:
diff changeset
    33
NW_Status_t
hgs
parents:
diff changeset
    34
NW_XML_Reader_ReadAsciiChar(NW_Uint32 c, NW_Uint32* pReturnChar)
hgs
parents:
diff changeset
    35
{
hgs
parents:
diff changeset
    36
    /* This looks a bit weird but the idea is to force the conversion
hgs
parents:
diff changeset
    37
    of the ASCII character through the same function that is used
hgs
parents:
diff changeset
    38
    to read a character from the text.  This will impose the same conversion
hgs
parents:
diff changeset
    39
    limitations and the same result encoding. */
hgs
parents:
diff changeset
    40
    NW_Int32 byteCount;
hgs
parents:
diff changeset
    41
    NW_Uint8 buf[2];
hgs
parents:
diff changeset
    42
    NW_Ucs2 c_ucs2;
hgs
parents:
diff changeset
    43
    buf[0] = (NW_Uint8)(c & 0xff);
hgs
parents:
diff changeset
    44
    buf[1] = 0;
hgs
parents:
diff changeset
    45
    /* should only use this function for ASCII */
hgs
parents:
diff changeset
    46
    if (c > 127) {
hgs
parents:
diff changeset
    47
        return NW_STAT_FAILURE;
hgs
parents:
diff changeset
    48
    }
hgs
parents:
diff changeset
    49
    /* call it UTF-8 because ASCII doesn't work with NW_String_readChar()
hgs
parents:
diff changeset
    50
    at the moment */
hgs
parents:
diff changeset
    51
    byteCount = NW_String_readChar((NW_Byte*)buf, &c_ucs2, HTTP_utf_8);
hgs
parents:
diff changeset
    52
    if (byteCount != 1) {
hgs
parents:
diff changeset
    53
        return NW_STAT_FAILURE;
hgs
parents:
diff changeset
    54
    }
hgs
parents:
diff changeset
    55
    *pReturnChar = c_ucs2;
hgs
parents:
diff changeset
    56
    return NW_STAT_SUCCESS;
hgs
parents:
diff changeset
    57
}
hgs
parents:
diff changeset
    58
hgs
parents:
diff changeset
    59
/* assumes this is just a handoff of the buffer (i.e., won't make a copy) */
hgs
parents:
diff changeset
    60
EXPORT_C NW_Status_t
hgs
parents:
diff changeset
    61
NW_XML_Reader_InitFromBuffer(NW_XML_Reader_t* pT, NW_Uint32 length, unsigned char* pBuf)
hgs
parents:
diff changeset
    62
{
hgs
parents:
diff changeset
    63
    pT->encoding = 0;
hgs
parents:
diff changeset
    64
    pT->endianness = NW_NATIVE_ENDIAN;
hgs
parents:
diff changeset
    65
    pT->index = 0;
hgs
parents:
diff changeset
    66
    pT->charIndex = 0;
hgs
parents:
diff changeset
    67
    pT->lineColumn.crCount = 0;
hgs
parents:
diff changeset
    68
    pT->lineColumn.lfCount = 0;
hgs
parents:
diff changeset
    69
    pT->lineColumn.charsSinceLastCR = 0;
hgs
parents:
diff changeset
    70
    pT->lineColumn.charsSinceLastLF = 0;
hgs
parents:
diff changeset
    71
    pT->end = 0;
hgs
parents:
diff changeset
    72
    pT->length = length;
hgs
parents:
diff changeset
    73
    pT->pBuf = pBuf;
hgs
parents:
diff changeset
    74
    return NW_STAT_SUCCESS;
hgs
parents:
diff changeset
    75
}
hgs
parents:
diff changeset
    76
hgs
parents:
diff changeset
    77
EXPORT_C NW_Status_t
hgs
parents:
diff changeset
    78
NW_XML_Reader_DataAddressFromBuffer(NW_XML_Reader_t* pT,
hgs
parents:
diff changeset
    79
                                 NW_Uint32 start, NW_Uint32* length,
hgs
parents:
diff changeset
    80
                                 unsigned char** ppData)
hgs
parents:
diff changeset
    81
{
hgs
parents:
diff changeset
    82
    NW_ASSERT(start < pT->length);
hgs
parents:
diff changeset
    83
    NW_ASSERT(*length <= pT->length);
hgs
parents:
diff changeset
    84
    NW_ASSERT((start + *length) <= pT->length);
hgs
parents:
diff changeset
    85
    *ppData = NULL;
hgs
parents:
diff changeset
    86
    if (start < pT->length) {
hgs
parents:
diff changeset
    87
        *ppData = pT->pBuf + start;
hgs
parents:
diff changeset
    88
        *length = (((start + *length) <= pT->length) ?
hgs
parents:
diff changeset
    89
                   *length : (pT->length - start));
hgs
parents:
diff changeset
    90
        return NW_STAT_SUCCESS;
hgs
parents:
diff changeset
    91
    }
hgs
parents:
diff changeset
    92
    return NW_STAT_FAILURE;
hgs
parents:
diff changeset
    93
}
hgs
parents:
diff changeset
    94
hgs
parents:
diff changeset
    95
/* peekOrAdvance: first arg "advance": peek = 0, advance = 1 */
hgs
parents:
diff changeset
    96
static
hgs
parents:
diff changeset
    97
NW_Status_t
hgs
parents:
diff changeset
    98
NW_XML_Reader_PeekOrAdvanceOffset(NW_Bool advance, NW_XML_Reader_t* pT,
hgs
parents:
diff changeset
    99
                               NW_Uint32 offsetCharCount, NW_Uint32* pC)
hgs
parents:
diff changeset
   100
{
hgs
parents:
diff changeset
   101
    NW_Ucs2 c_ucs2;
hgs
parents:
diff changeset
   102
    NW_Uint32 i;
hgs
parents:
diff changeset
   103
    NW_Uint32 charCount = 0;
hgs
parents:
diff changeset
   104
    NW_Int32 byteCount = 0;
hgs
parents:
diff changeset
   105
    NW_Uint32 crCount = 0;
hgs
parents:
diff changeset
   106
    NW_Uint32 lfCount = 0;
hgs
parents:
diff changeset
   107
    NW_Uint32 charsPastCR = 0;
hgs
parents:
diff changeset
   108
    NW_Uint32 charsPastLF = 0;
hgs
parents:
diff changeset
   109
    NW_Bool resetPastCR = 0;
hgs
parents:
diff changeset
   110
    NW_Bool resetPastLF = 0;
hgs
parents:
diff changeset
   111
hgs
parents:
diff changeset
   112
    NW_ASSERT(!(advance && !offsetCharCount)); 
hgs
parents:
diff changeset
   113
hgs
parents:
diff changeset
   114
    if (pT->end) {
hgs
parents:
diff changeset
   115
        return NW_STAT_FAILURE;
hgs
parents:
diff changeset
   116
    }
hgs
parents:
diff changeset
   117
    for (i = pT->index; i < pT->length; i += (NW_Uint32)byteCount) {
hgs
parents:
diff changeset
   118
        NW_ASSERT(charCount <= offsetCharCount);
hgs
parents:
diff changeset
   119
hgs
parents:
diff changeset
   120
        /* It is assumed that this func returns UNICODE code points. */
hgs
parents:
diff changeset
   121
        byteCount = NW_String_readChar((NW_Byte*)&(pT->pBuf[i]),
hgs
parents:
diff changeset
   122
                                       &c_ucs2, pT->encoding);
hgs
parents:
diff changeset
   123
        *pC = c_ucs2;
hgs
parents:
diff changeset
   124
        if (byteCount == -1) {
hgs
parents:
diff changeset
   125
            return NW_STAT_FAILURE;
hgs
parents:
diff changeset
   126
        }
hgs
parents:
diff changeset
   127
        if (charCount == offsetCharCount) {
hgs
parents:
diff changeset
   128
            /* This catches NW_String_readChar() reading past buffer end
hgs
parents:
diff changeset
   129
            and can be removed when the readChar function does proper
hgs
parents:
diff changeset
   130
            error checking. */
hgs
parents:
diff changeset
   131
            if ((i + (NW_Uint32)byteCount) > pT->length) {
hgs
parents:
diff changeset
   132
                return NW_STAT_FAILURE;
hgs
parents:
diff changeset
   133
            }
hgs
parents:
diff changeset
   134
            break;
hgs
parents:
diff changeset
   135
        }
hgs
parents:
diff changeset
   136
        charCount++;
hgs
parents:
diff changeset
   137
        charsPastCR++;
hgs
parents:
diff changeset
   138
        charsPastLF++;
hgs
parents:
diff changeset
   139
        if (c_ucs2 == 0xd /* CR */) {
hgs
parents:
diff changeset
   140
            crCount++;
hgs
parents:
diff changeset
   141
            resetPastCR = 1;
hgs
parents:
diff changeset
   142
            charsPastCR = 0;
hgs
parents:
diff changeset
   143
        } else if (c_ucs2 == 0xa /* LF */) {
hgs
parents:
diff changeset
   144
            lfCount++;
hgs
parents:
diff changeset
   145
            resetPastLF = 1;
hgs
parents:
diff changeset
   146
            charsPastLF = 0;
hgs
parents:
diff changeset
   147
        }
hgs
parents:
diff changeset
   148
    }
hgs
parents:
diff changeset
   149
    if (i >= pT->length) {
hgs
parents:
diff changeset
   150
        pT->end = 1;
hgs
parents:
diff changeset
   151
    }
hgs
parents:
diff changeset
   152
    /* This catches NW_String_readChar() reading past buffer end and can be
hgs
parents:
diff changeset
   153
    removed when the readChar function does proper error checking. */
hgs
parents:
diff changeset
   154
    if (i > pT->length) {
hgs
parents:
diff changeset
   155
        return NW_STAT_FAILURE;
hgs
parents:
diff changeset
   156
    }
hgs
parents:
diff changeset
   157
    if (advance) {
hgs
parents:
diff changeset
   158
        pT->index = i;
hgs
parents:
diff changeset
   159
        pT->charIndex += charCount;
hgs
parents:
diff changeset
   160
        pT->lineColumn.crCount += crCount;
hgs
parents:
diff changeset
   161
        pT->lineColumn.lfCount += lfCount;
hgs
parents:
diff changeset
   162
        if (resetPastCR) {
hgs
parents:
diff changeset
   163
            pT->lineColumn.charsSinceLastCR = charsPastCR;
hgs
parents:
diff changeset
   164
        } else {
hgs
parents:
diff changeset
   165
            pT->lineColumn.charsSinceLastCR += charsPastCR;
hgs
parents:
diff changeset
   166
        }
hgs
parents:
diff changeset
   167
        if (resetPastLF) {
hgs
parents:
diff changeset
   168
            pT->lineColumn.charsSinceLastLF = charsPastLF;
hgs
parents:
diff changeset
   169
        } else {
hgs
parents:
diff changeset
   170
            pT->lineColumn.charsSinceLastLF += charsPastLF;
hgs
parents:
diff changeset
   171
        }
hgs
parents:
diff changeset
   172
    }
hgs
parents:
diff changeset
   173
    return NW_STAT_SUCCESS;
hgs
parents:
diff changeset
   174
}
hgs
parents:
diff changeset
   175
hgs
parents:
diff changeset
   176
EXPORT_C NW_Status_t
hgs
parents:
diff changeset
   177
NW_XML_Reader_PeekOffset(NW_XML_Reader_t* pT, NW_Uint32 nChars, NW_Uint32* pC)
hgs
parents:
diff changeset
   178
{
hgs
parents:
diff changeset
   179
    return NW_XML_Reader_PeekOrAdvanceOffset(0, pT, nChars, pC);
hgs
parents:
diff changeset
   180
}
hgs
parents:
diff changeset
   181
hgs
parents:
diff changeset
   182
EXPORT_C NW_Status_t
hgs
parents:
diff changeset
   183
NW_XML_Reader_AdvanceOffset(NW_XML_Reader_t* pT, NW_Uint32 nChars)
hgs
parents:
diff changeset
   184
{
hgs
parents:
diff changeset
   185
    NW_Uint32 c;
hgs
parents:
diff changeset
   186
    return NW_XML_Reader_PeekOrAdvanceOffset(1, pT, nChars, &c);
hgs
parents:
diff changeset
   187
}
hgs
parents:
diff changeset
   188
hgs
parents:
diff changeset
   189
EXPORT_C 
hgs
parents:
diff changeset
   190
void NW_XML_Reader_GetPosition(NW_XML_Reader_t* pT, NW_Uint32* pByteIndex,
hgs
parents:
diff changeset
   191
                            NW_Uint32* pCharIndex,
hgs
parents:
diff changeset
   192
                            NW_XML_Reader_LineColumn_t* pLineColumn)
hgs
parents:
diff changeset
   193
{
hgs
parents:
diff changeset
   194
    *pByteIndex = pT->index;
hgs
parents:
diff changeset
   195
    *pCharIndex = pT->charIndex;
hgs
parents:
diff changeset
   196
    pLineColumn->crCount = pT->lineColumn.crCount;
hgs
parents:
diff changeset
   197
    pLineColumn->lfCount = pT->lineColumn.lfCount;
hgs
parents:
diff changeset
   198
    pLineColumn->charsSinceLastCR = pT->lineColumn.charsSinceLastCR;
hgs
parents:
diff changeset
   199
    pLineColumn->charsSinceLastLF = pT->lineColumn.charsSinceLastLF;
hgs
parents:
diff changeset
   200
}
hgs
parents:
diff changeset
   201
hgs
parents:
diff changeset
   202
/* Note: Setting the position (similar to seeking in a file) is in general
hgs
parents:
diff changeset
   203
not possible without reading the characters (usually reading forward) because
hgs
parents:
diff changeset
   204
character encoding may use a variable numbers of bytes per character. This is
hgs
parents:
diff changeset
   205
here so that if you have defined a valid interval, then you can reposition to
hgs
parents:
diff changeset
   206
the beginning of the interval. Setting to the position to a bad value will
hgs
parents:
diff changeset
   207
not always be caught immediately. Don't forget to also save and set line
hgs
parents:
diff changeset
   208
and column with position. */
hgs
parents:
diff changeset
   209
EXPORT_C void
hgs
parents:
diff changeset
   210
NW_XML_Reader_SetPosition(NW_XML_Reader_t* pT, NW_Uint32 byteIndex,
hgs
parents:
diff changeset
   211
                       NW_Uint32 charIndex,
hgs
parents:
diff changeset
   212
                       const NW_XML_Reader_LineColumn_t* pLineColumn)
hgs
parents:
diff changeset
   213
{
hgs
parents:
diff changeset
   214
    pT->index = byteIndex;
hgs
parents:
diff changeset
   215
    pT->charIndex = charIndex;
hgs
parents:
diff changeset
   216
    pT->lineColumn.crCount = pLineColumn->crCount;
hgs
parents:
diff changeset
   217
    pT->lineColumn.lfCount = pLineColumn->lfCount;
hgs
parents:
diff changeset
   218
    pT->lineColumn.charsSinceLastCR = pLineColumn->charsSinceLastCR;
hgs
parents:
diff changeset
   219
    pT->lineColumn.charsSinceLastLF = pLineColumn->charsSinceLastLF;
hgs
parents:
diff changeset
   220
}
hgs
parents:
diff changeset
   221
hgs
parents:
diff changeset
   222
/*
hgs
parents:
diff changeset
   223
Reader Interval Functions
hgs
parents:
diff changeset
   224
*/
hgs
parents:
diff changeset
   225
hgs
parents:
diff changeset
   226
EXPORT_C void
hgs
parents:
diff changeset
   227
NW_XML_Reader_Interval_Start(NW_XML_Reader_Interval_t* pI, NW_XML_Reader_t* pT)
hgs
parents:
diff changeset
   228
{
hgs
parents:
diff changeset
   229
    /* set both start and stop for safety in later use */
hgs
parents:
diff changeset
   230
    pI->start = pI->stop = pT->index;
hgs
parents:
diff changeset
   231
    pI->charStart = pI->charStop = pT->charIndex;
hgs
parents:
diff changeset
   232
}
hgs
parents:
diff changeset
   233
hgs
parents:
diff changeset
   234
EXPORT_C void
hgs
parents:
diff changeset
   235
NW_XML_Reader_Interval_Stop(NW_XML_Reader_Interval_t* pI, NW_XML_Reader_t* pT)
hgs
parents:
diff changeset
   236
{
hgs
parents:
diff changeset
   237
    pI->stop = pT->index;
hgs
parents:
diff changeset
   238
    pI->charStop = pT->charIndex;
hgs
parents:
diff changeset
   239
}
hgs
parents:
diff changeset
   240
hgs
parents:
diff changeset
   241
/* BEGIN GENERIC Reader CHARACTER AND STRING FUNCTIONS */
hgs
parents:
diff changeset
   242
hgs
parents:
diff changeset
   243
/* pMatch is 1 if ASCII character c matches Reader char in its encoding */
hgs
parents:
diff changeset
   244
EXPORT_C NW_Status_t
hgs
parents:
diff changeset
   245
NW_XML_Reader_AsciiCharMatch(NW_XML_Reader_t* pT, NW_Uint32 asciiC, NW_Uint32* pMatch)
hgs
parents:
diff changeset
   246
{
hgs
parents:
diff changeset
   247
    NW_Uint32 c_text, c_ascii;
hgs
parents:
diff changeset
   248
    NW_Status_t s = NW_XML_Reader_Peek(pT, &c_text);
hgs
parents:
diff changeset
   249
    *pMatch = 0;
hgs
parents:
diff changeset
   250
    if (NW_STAT_IS_SUCCESS(s)) {
hgs
parents:
diff changeset
   251
        s = NW_XML_Reader_ReadAsciiChar(asciiC, &c_ascii);
hgs
parents:
diff changeset
   252
        if (NW_STAT_IS_SUCCESS(s)) {
hgs
parents:
diff changeset
   253
            *pMatch = (c_text == c_ascii);
hgs
parents:
diff changeset
   254
        }
hgs
parents:
diff changeset
   255
    }
hgs
parents:
diff changeset
   256
    return s;
hgs
parents:
diff changeset
   257
}
hgs
parents:
diff changeset
   258
hgs
parents:
diff changeset
   259
/* pMatch is 1 if ASCII string matches Reader sequence in its encoding */
hgs
parents:
diff changeset
   260
EXPORT_C NW_Status_t
hgs
parents:
diff changeset
   261
NW_XML_Reader_AsciiStringMatch(NW_XML_Reader_t* pT, NW_Uint32 length, const NW_Uint8* pString,
hgs
parents:
diff changeset
   262
                            NW_Uint32* pMatch)
hgs
parents:
diff changeset
   263
{
hgs
parents:
diff changeset
   264
    NW_Uint32 c_text, c_ascii;
hgs
parents:
diff changeset
   265
    NW_Uint32 i;
hgs
parents:
diff changeset
   266
    NW_Status_t s = NW_STAT_SUCCESS;
hgs
parents:
diff changeset
   267
    *pMatch = 0;
hgs
parents:
diff changeset
   268
    NW_ASSERT(length);
hgs
parents:
diff changeset
   269
    for (i = 0; i < length; i++) {
hgs
parents:
diff changeset
   270
        s = NW_XML_Reader_PeekOffset(pT, i, &c_text);
hgs
parents:
diff changeset
   271
        if (NW_STAT_IS_FAILURE(s)) {
hgs
parents:
diff changeset
   272
            break;
hgs
parents:
diff changeset
   273
        }
hgs
parents:
diff changeset
   274
        s = NW_XML_Reader_ReadAsciiChar(pString[i], &c_ascii);
hgs
parents:
diff changeset
   275
        if (NW_STAT_IS_FAILURE(s)) {
hgs
parents:
diff changeset
   276
            break;
hgs
parents:
diff changeset
   277
        }
hgs
parents:
diff changeset
   278
        if (c_text != c_ascii) {
hgs
parents:
diff changeset
   279
            break;
hgs
parents:
diff changeset
   280
        }
hgs
parents:
diff changeset
   281
    }
hgs
parents:
diff changeset
   282
    if (i == length) {
hgs
parents:
diff changeset
   283
        *pMatch = 1;
hgs
parents:
diff changeset
   284
    }
hgs
parents:
diff changeset
   285
    return s;
hgs
parents:
diff changeset
   286
}
hgs
parents:
diff changeset
   287
hgs
parents:
diff changeset
   288
/* Note: For XML, whitespace is only ASCII 0x20 (space),
hgs
parents:
diff changeset
   289
0x09 (tab), 0x0d (CR), 0x0a (LF).  The base test used here,
hgs
parents:
diff changeset
   290
CXML_Str_Isspace(), includes two other forms of whitespace. */
hgs
parents:
diff changeset
   291
EXPORT_C NW_Status_t
hgs
parents:
diff changeset
   292
NW_XML_Reader_SkipSpace(NW_XML_Reader_t* pT)
hgs
parents:
diff changeset
   293
{
hgs
parents:
diff changeset
   294
    NW_Uint32 c;
hgs
parents:
diff changeset
   295
    NW_Status_t s = NW_STAT_SUCCESS;
hgs
parents:
diff changeset
   296
    for (;;) {
hgs
parents:
diff changeset
   297
        s = NW_XML_Reader_Peek(pT, &c);
hgs
parents:
diff changeset
   298
        if (NW_STAT_IS_FAILURE(s)) {
hgs
parents:
diff changeset
   299
            break;
hgs
parents:
diff changeset
   300
        }
hgs
parents:
diff changeset
   301
        if (c > 0xffff) { /* validate casting */
hgs
parents:
diff changeset
   302
            break;
hgs
parents:
diff changeset
   303
        }
hgs
parents:
diff changeset
   304
        if (!CXML_Str_Isspace((NW_Ucs2)(c & 0xffff))) {
hgs
parents:
diff changeset
   305
            break;
hgs
parents:
diff changeset
   306
        }
hgs
parents:
diff changeset
   307
        s = NW_XML_Reader_Advance(pT);
hgs
parents:
diff changeset
   308
        if (NW_STAT_IS_FAILURE(s)) {
hgs
parents:
diff changeset
   309
            break;
hgs
parents:
diff changeset
   310
        }
hgs
parents:
diff changeset
   311
        if (pT->end){
hgs
parents:
diff changeset
   312
         /* At the end so break */
hgs
parents:
diff changeset
   313
            break;
hgs
parents:
diff changeset
   314
          }
hgs
parents:
diff changeset
   315
    }
hgs
parents:
diff changeset
   316
    return s;
hgs
parents:
diff changeset
   317
}
hgs
parents:
diff changeset
   318
hgs
parents:
diff changeset
   319
/* Note: For XML, whitespace is only ASCII 0x20 (space),
hgs
parents:
diff changeset
   320
0x09 (tab), 0x0d (CR), 0x0a (LF).  The base test used here,
hgs
parents:
diff changeset
   321
CXML_Str_Isspace(), includes two other forms of whitespace. */
hgs
parents:
diff changeset
   322
EXPORT_C NW_Status_t
hgs
parents:
diff changeset
   323
NW_XML_Reader_IsSpace(NW_XML_Reader_t* pT, NW_Uint32* pMatch)
hgs
parents:
diff changeset
   324
{
hgs
parents:
diff changeset
   325
    NW_Uint32 c;
hgs
parents:
diff changeset
   326
    NW_Status_t s;
hgs
parents:
diff changeset
   327
hgs
parents:
diff changeset
   328
    *pMatch = 0;
hgs
parents:
diff changeset
   329
    s  = NW_XML_Reader_Peek(pT, &c);
hgs
parents:
diff changeset
   330
    if (NW_STAT_IS_FAILURE(s)) {
hgs
parents:
diff changeset
   331
        return s;
hgs
parents:
diff changeset
   332
    }
hgs
parents:
diff changeset
   333
    if (c > 0xffff) { /* validate casting */
hgs
parents:
diff changeset
   334
        return NW_STAT_FAILURE;
hgs
parents:
diff changeset
   335
    }
hgs
parents:
diff changeset
   336
    if (CXML_Str_Isspace((NW_Ucs2)(c & 0xffff))) {
hgs
parents:
diff changeset
   337
        *pMatch = 1;
hgs
parents:
diff changeset
   338
    }
hgs
parents:
diff changeset
   339
    return NW_STAT_SUCCESS;
hgs
parents:
diff changeset
   340
}
hgs
parents:
diff changeset
   341
hgs
parents:
diff changeset
   342
EXPORT_C NW_Status_t
hgs
parents:
diff changeset
   343
NW_XML_Reader_IsLetter(NW_XML_Reader_t* pT, NW_Uint32* pMatch)
hgs
parents:
diff changeset
   344
{
hgs
parents:
diff changeset
   345
    NW_Uint32 c;
hgs
parents:
diff changeset
   346
    NW_Status_t s;
hgs
parents:
diff changeset
   347
hgs
parents:
diff changeset
   348
    *pMatch = 0;
hgs
parents:
diff changeset
   349
    s = NW_XML_Reader_Peek(pT, &c);
hgs
parents:
diff changeset
   350
    if (NW_STAT_IS_FAILURE(s)) {
hgs
parents:
diff changeset
   351
        return s;
hgs
parents:
diff changeset
   352
    }
hgs
parents:
diff changeset
   353
    /* This is an approximation to what XML charaters are "letter".
hgs
parents:
diff changeset
   354
    Everything above the 8-bit range is considered to be a "letter".*/
hgs
parents:
diff changeset
   355
    if (c >= 0x41 && c <= 0x5a) {
hgs
parents:
diff changeset
   356
        *pMatch = 1;
hgs
parents:
diff changeset
   357
    }
hgs
parents:
diff changeset
   358
    else if (c >= 0x61 && c <= 0x7a) {
hgs
parents:
diff changeset
   359
        *pMatch = 1;
hgs
parents:
diff changeset
   360
    }
hgs
parents:
diff changeset
   361
    else if (c >= 0xc0 && c <= 0xd6) {
hgs
parents:
diff changeset
   362
        *pMatch = 1;
hgs
parents:
diff changeset
   363
    }
hgs
parents:
diff changeset
   364
    else if (c >= 0xd8 && c <= 0xf6) {
hgs
parents:
diff changeset
   365
        *pMatch = 1;
hgs
parents:
diff changeset
   366
    }
hgs
parents:
diff changeset
   367
    else if (c >= 0xf8) {/* letters become anything above 0xf8 */
hgs
parents:
diff changeset
   368
        *pMatch = 1;
hgs
parents:
diff changeset
   369
    }
hgs
parents:
diff changeset
   370
    return NW_STAT_SUCCESS;
hgs
parents:
diff changeset
   371
}
hgs
parents:
diff changeset
   372
hgs
parents:
diff changeset
   373
/* Note: For XML, digits include not only the ASCII digits but
hgs
parents:
diff changeset
   374
other language forms of digits.  The base test used here,
hgs
parents:
diff changeset
   375
CXML_Str_Isdigit() only tests for ASCII digits. */
hgs
parents:
diff changeset
   376
EXPORT_C NW_Status_t
hgs
parents:
diff changeset
   377
NW_XML_Reader_IsDigit(NW_XML_Reader_t* pT, NW_Uint32* pMatch)
hgs
parents:
diff changeset
   378
{
hgs
parents:
diff changeset
   379
    NW_Uint32 c;
hgs
parents:
diff changeset
   380
    NW_Status_t s;
hgs
parents:
diff changeset
   381
hgs
parents:
diff changeset
   382
    *pMatch = 0;
hgs
parents:
diff changeset
   383
    s = NW_XML_Reader_Peek(pT, &c);
hgs
parents:
diff changeset
   384
    if (NW_STAT_IS_FAILURE(s)) {
hgs
parents:
diff changeset
   385
        return s;
hgs
parents:
diff changeset
   386
    }
hgs
parents:
diff changeset
   387
    if (c > 0xffff) {/* validate casting */
hgs
parents:
diff changeset
   388
        return NW_STAT_SUCCESS;
hgs
parents:
diff changeset
   389
    }
hgs
parents:
diff changeset
   390
    if (CXML_Str_Isdigit((NW_Ucs2)(c & 0xffff))) {
hgs
parents:
diff changeset
   391
        *pMatch = 1;
hgs
parents:
diff changeset
   392
    }
hgs
parents:
diff changeset
   393
    return NW_STAT_SUCCESS;
hgs
parents:
diff changeset
   394
}
hgs
parents:
diff changeset
   395
hgs
parents:
diff changeset
   396
/* Returns an estimate of the current line and column position in the text.
hgs
parents:
diff changeset
   397
It is an estimate because it has to guess at what the intended line ending
hgs
parents:
diff changeset
   398
sequence is using a count of CR and LF characters.  Line and Column indices
hgs
parents:
diff changeset
   399
are 1-based not 0-based. */
hgs
parents:
diff changeset
   400
EXPORT_C void
hgs
parents:
diff changeset
   401
NW_XML_Reader_GetLineColumn(NW_XML_Reader_t* pT, NW_Uint32* pLine,
hgs
parents:
diff changeset
   402
                         NW_Uint32* pColumn)
hgs
parents:
diff changeset
   403
{
hgs
parents:
diff changeset
   404
    NW_Uint32 crCount, lfCount, charsSinceCR, charsSinceLF;
hgs
parents:
diff changeset
   405
    crCount = pT->lineColumn.crCount;
hgs
parents:
diff changeset
   406
    lfCount = pT->lineColumn.lfCount;
hgs
parents:
diff changeset
   407
    charsSinceCR = pT->lineColumn.charsSinceLastCR;
hgs
parents:
diff changeset
   408
    charsSinceLF = pT->lineColumn.charsSinceLastLF;
hgs
parents:
diff changeset
   409
    if (crCount == lfCount) {
hgs
parents:
diff changeset
   410
        /* assume CR, LF, DOS style */
hgs
parents:
diff changeset
   411
        /* use a bias in favor of CR followed by LF
hgs
parents:
diff changeset
   412
        which will give the correct column for DOS */
hgs
parents:
diff changeset
   413
        *pLine = lfCount + 1;
hgs
parents:
diff changeset
   414
        *pColumn = charsSinceLF + 1;
hgs
parents:
diff changeset
   415
    } else if (lfCount == 0) {
hgs
parents:
diff changeset
   416
        /* assume CR only, Unix style */
hgs
parents:
diff changeset
   417
        *pLine = crCount + 1;
hgs
parents:
diff changeset
   418
        *pColumn = charsSinceCR + 1;
hgs
parents:
diff changeset
   419
    } else if (crCount == 0) {
hgs
parents:
diff changeset
   420
        /* assume LF only, Mac style */
hgs
parents:
diff changeset
   421
        *pLine = lfCount + 1;
hgs
parents:
diff changeset
   422
        *pColumn = charsSinceLF + 1;
hgs
parents:
diff changeset
   423
    } else {
hgs
parents:
diff changeset
   424
        /* an unclear situation so use
hgs
parents:
diff changeset
   425
        thresholds on the ratio to guess */
hgs
parents:
diff changeset
   426
        NW_Uint32 ratio;
hgs
parents:
diff changeset
   427
        ratio = ((crCount * 100) / lfCount);
hgs
parents:
diff changeset
   428
        if (ratio > 300) {/* more than 3 to 1 crCount to lfCount */
hgs
parents:
diff changeset
   429
            /* assume CR only, Unix style */
hgs
parents:
diff changeset
   430
            *pLine = crCount + 1;
hgs
parents:
diff changeset
   431
            *pColumn = charsSinceCR + 1;
hgs
parents:
diff changeset
   432
        } else if (ratio < 33) {/* less than 1 to 3 crCount to lfCount */
hgs
parents:
diff changeset
   433
            /* assume LF only, Mac style */
hgs
parents:
diff changeset
   434
            *pLine = lfCount + 1;
hgs
parents:
diff changeset
   435
            *pColumn = charsSinceLF + 1;
hgs
parents:
diff changeset
   436
        } else {
hgs
parents:
diff changeset
   437
            /* assume CR, LF, DOS style */
hgs
parents:
diff changeset
   438
            /* use a bias in favor of CR, LF sequence (DOS style)
hgs
parents:
diff changeset
   439
            which will give the correct column */
hgs
parents:
diff changeset
   440
            *pLine = lfCount + 1;
hgs
parents:
diff changeset
   441
            *pColumn = charsSinceLF + 1;
hgs
parents:
diff changeset
   442
        }
hgs
parents:
diff changeset
   443
    }
hgs
parents:
diff changeset
   444
}
hgs
parents:
diff changeset
   445