secureswitools/swisistools/source/xmlparser/xerces/include/xercesc/framework/XMLRecognizer.hpp
author Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
Mon, 18 Jan 2010 20:28:24 +0200
changeset 2 661f3784fe57
parent 1 c42dffbd5b4f
permissions -rw-r--r--
Revision: 201001 Kit: 201003

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/*
 *  $Id: XMLRecognizer.hpp 568078 2007-08-21 11:43:25Z amassari $
 */

#if !defined(XMLRECOGNIZER_HPP)
#define XMLRECOGNIZER_HPP

#include <xercesc/util/XercesDefs.hpp>
#include <xercesc/util/PlatformUtils.hpp>

XERCES_CPP_NAMESPACE_BEGIN

/**
 *  This class provides some simple code to recognize the encodings of
 *  XML files. This recognition only does very basic sensing of the encoding
 *  in a broad sense. Basically its just enough to let us get started and
 *  read the XMLDecl line. The scanner, once it reads the XMLDecl, will
 *  tell the reader any actual encoding string it found and the reader can
 *  update itself to be more specific at that point.
 */
class XMLPARSER_EXPORT XMLRecognizer
{
public :
    // -----------------------------------------------------------------------
    //  Class types
    //
    //  This enum represents the various encoding families that we have to
    //  deal with individually at the scanner level. This does not indicate
    //  the exact encoding, just the rough family that would let us scan
    //  the XML/TextDecl to find the encoding string.
    //
    //  The 'L's and 'B's stand for little or big endian. We conditionally
    //  create versions that will automatically map to the local UTF-16 and
    //  UCS-4 endian modes.
    //
    //  OtherEncoding means that its some transcoder based encoding, i.e. not
    //  one of the ones that we do internally. Its a special case and should
    //  never be used directly outside of the reader.
    //
    //  NOTE: Keep this in sync with the name map array in the Cpp file!!
    // -----------------------------------------------------------------------
    enum Encodings
    {
        EBCDIC          = 0
        , UCS_4B        = 1
        , UCS_4L        = 2
        , US_ASCII      = 3
        , UTF_8         = 4
        , UTF_16B       = 5
        , UTF_16L       = 6
        , XERCES_XMLCH  = 7

        , Encodings_Count
        , Encodings_Min = EBCDIC
        , Encodings_Max = XERCES_XMLCH

        , OtherEncoding = 999

        #if defined(ENDIANMODE_BIG)
        , Def_UTF16     = UTF_16B
        , Def_UCS4      = UCS_4B
        #else
        , Def_UTF16     = UTF_16L
        , Def_UCS4      = UCS_4L
        #endif
    };


    // -----------------------------------------------------------------------
    //  Public, const static data
    //
    //  These are the byte sequences for each of the encodings that we can
    //  auto sense, and their lengths.
    // -----------------------------------------------------------------------
    static const char           fgASCIIPre[];
    static const unsigned int   fgASCIIPreLen;
    static const XMLByte        fgEBCDICPre[];
    static const unsigned int   fgEBCDICPreLen;
    static const XMLByte        fgUTF16BPre[];
    static const XMLByte        fgUTF16LPre[];
    static const unsigned int   fgUTF16PreLen;
    static const XMLByte        fgUCS4BPre[];
    static const XMLByte        fgUCS4LPre[];
    static const unsigned int   fgUCS4PreLen;
    static const char           fgUTF8BOM[];
    static const unsigned int   fgUTF8BOMLen;


    // -----------------------------------------------------------------------
    //  Encoding recognition methods
    // -----------------------------------------------------------------------
    static Encodings basicEncodingProbe
    (
        const   XMLByte* const      rawBuffer
        , const unsigned int        rawByteCount
    );

    static Encodings encodingForName
    (
        const   XMLCh* const    theEncName
    );

    static const XMLCh* nameForEncoding(const Encodings theEncoding
        , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager);


protected :
    // -----------------------------------------------------------------------
    //  Unimplemented constructors, operators, and destructor
    //
    //  This class is effectively being used as a namespace for some static
    //  methods.
    //
    //   (these functions are protected rather than private only to get rid of
    //    some annoying compiler warnings.)
    //
    // -----------------------------------------------------------------------
    XMLRecognizer();
    ~XMLRecognizer();

private:
    // -----------------------------------------------------------------------
    //  Unimplemented constructors and operators
    // -----------------------------------------------------------------------
    XMLRecognizer(const XMLRecognizer&);    
    XMLRecognizer& operator=(const XMLRecognizer&);
};

XERCES_CPP_NAMESPACE_END

#endif