secureswitools/swisistools/source/xmlparser/xerces/include/xercesc/util/XMLUri.hpp
author Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
Mon, 18 Jan 2010 20:28:24 +0200
changeset 2 661f3784fe57
parent 1 c42dffbd5b4f
permissions -rw-r--r--
Revision: 201001 Kit: 201003

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/*
 * $Id: XMLUri.hpp 568078 2007-08-21 11:43:25Z amassari $
 */

#if !defined(XMLURI_HPP)
#define XMLURI_HPP

#include <xercesc/util/XMemory.hpp>
#include <xercesc/util/XMLString.hpp>

#include <xercesc/internal/XSerializable.hpp>
#include <xercesc/framework/XMLBuffer.hpp>

XERCES_CPP_NAMESPACE_BEGIN

/*
 * This class is a direct port of Java's URI class, to distinguish
 * itself from the XMLURL, we use the name XMLUri instead of
 * XMLURI.
 *
 * TODO: how to relate XMLUri and XMLURL since URL is part of URI.
 *
 */

class XMLUTIL_EXPORT XMLUri : public XSerializable, public XMemory
{
public:

    // -----------------------------------------------------------------------
    //  Constructors and Destructor
    // -----------------------------------------------------------------------

    /**
     * Construct a new URI from a URI specification string.
     *
     * If the specification follows the "generic URI" syntax, (two slashes
     * following the first colon), the specification will be parsed
     * accordingly - setting the
     *                           scheme,
     *                           userinfo,
     *                           host,
     *                           port,
     *                           path,
     *                           querystring and
     *                           fragment
     * fields as necessary.
     *
     * If the specification does not follow the "generic URI" syntax,
     * the specification is parsed into a
     *                           scheme and
     *                           scheme-specific part (stored as the path) only.
     *
     * @param uriSpec the URI specification string (cannot be null or empty)
     *
     * @param manager Pointer to the memory manager to be used to
     *                allocate objects.
     *
     * ctor# 2
     *
     */
    XMLUri(const XMLCh* const    uriSpec,
           MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager);

    /**
     * Construct a new URI from a base URI and a URI specification string.
     * The URI specification string may be a relative URI.
     *
     * @param baseURI the base URI (cannot be null if uriSpec is null or
     *                empty)
     *
     * @param uriSpec the URI specification string (cannot be null or
     *                empty if base is null)
     *
     * @param manager Pointer to the memory manager to be used to
     *                allocate objects.
     *
     * ctor# 7 relative ctor
     *
     */
    XMLUri(const XMLUri* const  baseURI
         , const XMLCh* const   uriSpec
         , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager);

    /**
     * Copy constructor
     */
    XMLUri(const XMLUri& toCopy);
    XMLUri& operator=(const XMLUri& toAssign);

    virtual ~XMLUri();

    // -----------------------------------------------------------------------
    //  Getter methods
    // -----------------------------------------------------------------------
    /**
     * Get the URI as a string specification. See RFC 2396 Section 5.2.
     *
     * @return the URI string specification
     */
    const XMLCh* getUriText() const;

    /**
     * Get the scheme for this URI.
     *
     * @return the scheme for this URI
     */
     const XMLCh* getScheme() const;

    /**
     * Get the userinfo for this URI.
     *
     * @return the userinfo for this URI (null if not specified).
     */
     const XMLCh* getUserInfo() const;


    /**
     * Get the host for this URI.
     *
     * @return the host for this URI (null if not specified).
     */
     const XMLCh* getHost() const;

    /**
     * Get the port for this URI.
     *
     * @return the port for this URI (-1 if not specified).
     */
     int getPort() const;
     
    /**
     * Get the registry based authority for this URI.
     * 
     * @return the registry based authority (null if not specified).
     */
     const XMLCh* getRegBasedAuthority() const;

    /**
     * Get the path for this URI. Note that the value returned is the path
     * only and does not include the query string or fragment.
     *
     * @return the path for this URI.
     */
     const XMLCh* getPath() const;

    /**
     * Get the query string for this URI.
     *
     * @return the query string for this URI. Null is returned if there
     *         was no "?" in the URI spec, empty string if there was a
     *         "?" but no query string following it.
     */
     const XMLCh* getQueryString() const;

    /**
     * Get the fragment for this URI.
     *
     * @return the fragment for this URI. Null is returned if there
     *         was no "#" in the URI spec, empty string if there was a
     *         "#" but no fragment following it.
     */
     const XMLCh* getFragment() const;

    // -----------------------------------------------------------------------
    //  Setter methods
    // -----------------------------------------------------------------------

    /**
     * Set the scheme for this URI. The scheme is converted to lowercase
     * before it is set.
     *
     * @param newScheme the scheme for this URI (cannot be null)
     *
     */
     void setScheme(const XMLCh* const newScheme);

    /**
     * Set the userinfo for this URI. If a non-null value is passed in and
     * the host value is null, then an exception is thrown.
     *
     * @param newUserInfo the userinfo for this URI
     *
     */
     void setUserInfo(const XMLCh* const newUserInfo);

    /**
     * Set the host for this URI. If null is passed in, the userinfo
     * field is also set to null and the port is set to -1.
     *
     * Note: This method overwrites registry based authority if it
     * previously existed in this URI.
     *
     * @param newHost the host for this URI
     *
     */
     void setHost(const XMLCh* const newHost);

    /**
     * Set the port for this URI. -1 is used to indicate that the port is
     * not specified, otherwise valid port numbers are  between 0 and 65535.
     * If a valid port number is passed in and the host field is null,
     * an exception is thrown.
     *
     * @param newPort the port number for this URI
     *
     */
     void setPort(int newPort);
     
    /**
     * Sets the registry based authority for this URI.
     * 
     * Note: This method overwrites server based authority
     * if it previously existed in this URI.
     * 
     * @param newRegAuth the registry based authority for this URI
     */
     void setRegBasedAuthority(const XMLCh* const newRegAuth);

    /**
     * Set the path for this URI.
     *
     * If the supplied path is null, then the
     * query string and fragment are set to null as well.
     *
     * If the supplied path includes a query string and/or fragment,
     * these fields will be parsed and set as well.
     *
     * Note:
     *
     * For URIs following the "generic URI" syntax, the path
     * specified should start with a slash.
     *
     * For URIs that do not follow the generic URI syntax, this method
     * sets the scheme-specific part.
     *
     * @param newPath the path for this URI (may be null)
     *
     */
     void setPath(const XMLCh* const newPath);

    /**
     * Set the query string for this URI. A non-null value is valid only
     * if this is an URI conforming to the generic URI syntax and
     * the path value is not null.
     *
     * @param newQueryString the query string for this URI
     *
     */
     void setQueryString(const XMLCh* const newQueryString);

    /**
     * Set the fragment for this URI. A non-null value is valid only
     * if this is a URI conforming to the generic URI syntax and
     * the path value is not null.
     *
     * @param newFragment the fragment for this URI
     *
     */
     void setFragment(const XMLCh* const newFragment);

     // -----------------------------------------------------------------------
    //  Miscellaneous methods
    // -----------------------------------------------------------------------

    /**
     * Determine whether a given string contains only URI characters (also
     * called "uric" in RFC 2396). uric consist of all reserved
     * characters, unreserved characters and escaped characters.
     *
     * @return true if the string is comprised of uric, false otherwise
     */
    static bool isURIString(const XMLCh* const uric);

    /**
     * Determine whether a given string is a valid URI
     */
    static bool isValidURI( const XMLUri* const baseURI
                          , const XMLCh* const uriStr);
    /**
     * Determine whether a given string is a valid URI
     */
    static bool isValidURI( bool haveBaseURI
                          , const XMLCh* const uriStr);


    static void normalizeURI(const XMLCh*     const systemURI,
                                   XMLBuffer&       normalizedURI);

    /***
     * Support for Serialization/De-serialization
     ***/
    DECL_XSERIALIZABLE(XMLUri)

    XMLUri(MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager);

private:

    static const XMLCh MARK_OR_RESERVED_CHARACTERS[];
    static const XMLCh RESERVED_CHARACTERS[];
    static const XMLCh MARK_CHARACTERS[];
    static const XMLCh SCHEME_CHARACTERS[];
    static const XMLCh USERINFO_CHARACTERS[];
    static const XMLCh REG_NAME_CHARACTERS[];
    static const XMLCh PATH_CHARACTERS[];

    //helper method for getUriText
    void buildFullText();

    // -----------------------------------------------------------------------
    //  Private helper methods
    // -----------------------------------------------------------------------

    /**
     * Determine whether a character is a reserved character:
     *
     * @return true if the string contains any reserved characters
     */
    static bool isReservedCharacter(const XMLCh theChar);
    
    /**
     * Determine whether a character is a path character:
     *
     * @return true if the character is path character
     */
    static bool isPathCharacter(const XMLCh theChar);

    /**
     * Determine whether a char is an unreserved character.
     *
     * @return true if the char is unreserved, false otherwise
     */
    static bool isUnreservedCharacter(const XMLCh theChar);

    /**
     * Determine whether a char is an reserved or unreserved character.
     *
     * @return true if the char is reserved or unreserved, false otherwise
     */                
    static bool isReservedOrUnreservedCharacter(const XMLCh theChar);

    /**
     * Determine whether a scheme conforms to the rules for a scheme name.
     * A scheme is conformant if it starts with an alphanumeric, and
     * contains only alphanumerics, '+','-' and '.'.
     *
     * @return true if the scheme is conformant, false otherwise
     */
    static bool isConformantSchemeName(const XMLCh* const scheme);

    /**
     * Determine whether a userInfo conforms to the rules for a userinfo.
     *
     * @return true if the scheme is conformant, false otherwise
     */
    static void isConformantUserInfo(const XMLCh* const userInfo
        , MemoryManager* const manager);
    
    /**
     * Determines whether the components host, port, and user info
     * are valid as a server authority.
     *
     * @return true if the given host, port, and userinfo compose
     * a valid server authority
     */
    static bool isValidServerBasedAuthority(const XMLCh* const host
                                           , const int hostLen
                                           , const int port
                                           , const XMLCh* const userinfo
                                           , const int userLen);
                                           
    /**
     * Determines whether the components host, port, and user info
     * are valid as a server authority.
     *
     * @return true if the given host, port, and userinfo compose
     * a valid server authority
     */
    static bool isValidServerBasedAuthority(const XMLCh* const host
                                           , const int port
                                           , const XMLCh* const userinfo
                                           , MemoryManager* const manager);
      
   /**
    * Determines whether the given string is a registry based authority.
    * 
    * @param authority the authority component of a URI
    * 
    * @return true if the given string is a registry based authority
    */
    static bool isValidRegistryBasedAuthority(const XMLCh* const authority
                                             , const int authLen);

   /**
    * Determines whether the given string is a registry based authority.
    * 
    * @param authority the authority component of a URI
    * 
    * @return true if the given string is a registry based authority
    */
    static bool isValidRegistryBasedAuthority(const XMLCh* const authority);

    /**
     * Determine whether a string is syntactically capable of representing
     * a valid IPv4 address, IPv6 reference or the domain name of a network host.
     *
     * A valid IPv4 address consists of four decimal digit groups
     * separated by a '.'.
     *
     * See RFC 2732 Section 3, and RFC 2373 Section 2.2, for the 
     * definition of IPv6 references.
     *
     * A hostname consists of domain labels (each of which must begin and
     * end with an alphanumeric but may contain '-') separated by a '.'.
     * See RFC 2396 Section 3.2.2.
     *
     * @return true if the string is a syntactically valid IPv4 address
     *              or hostname
     */
     static bool isWellFormedAddress(const XMLCh* const addr
         , MemoryManager* const manager);
     
    /**
     * Determines whether a string is an IPv4 address as defined by 
     * RFC 2373, and under the further constraint that it must be a 32-bit
     * address. Though not expressed in the grammar, in order to satisfy 
     * the 32-bit address constraint, each segment of the address cannot 
     * be greater than 255 (8 bits of information).
     *
     * @return true if the string is a syntactically valid IPv4 address
     */
     static bool isWellFormedIPv4Address(const XMLCh* const addr, const int length);
     
    /**
     * Determines whether a string is an IPv6 reference as defined
     * by RFC 2732, where IPv6address is defined in RFC 2373. The 
     * IPv6 address is parsed according to Section 2.2 of RFC 2373,
     * with the additional constraint that the address be composed of
     * 128 bits of information.
     *
     * Note: The BNF expressed in RFC 2373 Appendix B does not 
     * accurately describe section 2.2, and was in fact removed from
     * RFC 3513, the successor of RFC 2373.
     *
     * @return true if the string is a syntactically valid IPv6 reference
     */
     static bool isWellFormedIPv6Reference(const XMLCh* const addr, const int length);
     
    /**
     * Helper function for isWellFormedIPv6Reference which scans the 
     * hex sequences of an IPv6 address. It returns the index of the 
     * next character to scan in the address, or -1 if the string 
     * cannot match a valid IPv6 address. 
     *
     * @param address the string to be scanned
     * @param index the beginning index (inclusive)
     * @param end the ending index (exclusive)
     * @param counter a counter for the number of 16-bit sections read
     * in the address
     *
     * @return the index of the next character to scan, or -1 if the
     * string cannot match a valid IPv6 address
     */
     static int scanHexSequence (const XMLCh* const addr, int index, int end, int& counter);

    /**
     * Get the indicator as to whether this URI uses the "generic URI"
     * syntax.
     *
     * @return true if this URI uses the "generic URI" syntax, false
     *         otherwise
     */
     bool isGenericURI();

    // -----------------------------------------------------------------------
    //  Miscellaneous methods
    // -----------------------------------------------------------------------

    /**
     * Initialize all fields of this URI from another URI.
     *
     * @param toCopy the URI to copy (cannot be null)
     */
     void initialize(const XMLUri& toCopy);

    /**
     * Initializes this URI from a base URI and a URI specification string.
     * See RFC 2396 Section 4 and Appendix B for specifications on parsing
     * the URI and Section 5 for specifications on resolving relative URIs
     * and relative paths.
     *
     * @param baseURI the base URI (may be null if uriSpec is an absolute
     *               URI)
     *
     * @param uriSpec the URI spec string which may be an absolute or
     *                  relative URI (can only be null/empty if base
     *                  is not null)
     *
     */
     void initialize(const XMLUri* const baseURI
                   , const XMLCh*  const uriSpec);

    /**
     * Initialize the scheme for this URI from a URI string spec.
     *
     * @param uriSpec the URI specification (cannot be null)
     *
     */
     void initializeScheme(const XMLCh* const uriSpec);

    /**
     * Initialize the authority (userinfo, host and port) for this
     * URI from a URI string spec.
     *
     * @param uriSpec the URI specification (cannot be null)
     *
     */
     void initializeAuthority(const XMLCh* const uriSpec);

    /**
     * Initialize the path for this URI from a URI string spec.
     *
     * @param uriSpec the URI specification (cannot be null)
     *
     */
     void initializePath(const XMLCh* const uriSpec);

     /**
      * cleanup the data variables
      *
      */
     void cleanUp();

    static bool isConformantSchemeName(const XMLCh* const scheme,
                                       const int schemeLen);
    static bool processScheme(const XMLCh* const uriStr, int& index);
    static bool processAuthority(const XMLCh* const uriStr, const int authLen);
    static bool isWellFormedAddress(const XMLCh* const addr, const int addrLen);
    static bool processPath(const XMLCh* const pathStr, const int pathStrLen,
                            const bool isSchemePresent);

    // -----------------------------------------------------------------------
    //  Data members
    //
    //  for all the data member, we own it,
    //  responsible for the creation and/or deletion for
    //  the memory allocated.
    //
    // -----------------------------------------------------------------------
    int             fPort;
    XMLCh*          fScheme;
    XMLCh*          fUserInfo;
    XMLCh*          fHost;
    XMLCh*          fRegAuth;
    XMLCh*          fPath;
    XMLCh*          fQueryString;
    XMLCh*          fFragment;
    XMLCh*          fURIText;
    MemoryManager*  fMemoryManager;
};

// ---------------------------------------------------------------------------
//  XMLUri: Getter methods
// ---------------------------------------------------------------------------
inline const XMLCh* XMLUri::getScheme() const
{
    return fScheme;
}

inline const XMLCh* XMLUri::getUserInfo() const
{
	return fUserInfo;
}

inline const XMLCh* XMLUri::getHost() const
{
	return fHost;
}

inline int XMLUri::getPort() const
{
	return fPort;
}

inline const XMLCh* XMLUri::getRegBasedAuthority() const
{
	return fRegAuth;
}

inline const XMLCh* XMLUri::getPath() const
{
	return fPath;
}

inline const XMLCh* XMLUri::getQueryString() const
{
	return fQueryString;
}

inline const XMLCh* XMLUri::getFragment() const
{
	return fFragment;
}

inline const XMLCh* XMLUri::getUriText() const
{
    //
    //  Fault it in if not already. Since this is a const method and we
    //  can't use mutable members due the compilers we have to support,
    //  we have to cast off the constness.
    //
    if (!fURIText)
        ((XMLUri*)this)->buildFullText();

    return fURIText;
}

// ---------------------------------------------------------------------------
//  XMLUri: Helper methods
// ---------------------------------------------------------------------------
inline bool XMLUri::isReservedOrUnreservedCharacter(const XMLCh theChar)
{
   return (XMLString::isAlphaNum(theChar) ||
           XMLString::indexOf(MARK_OR_RESERVED_CHARACTERS, theChar) != -1);
}

inline bool XMLUri::isReservedCharacter(const XMLCh theChar)
{
    return (XMLString::indexOf(RESERVED_CHARACTERS, theChar) != -1);
}

inline bool XMLUri::isPathCharacter(const XMLCh theChar)
{
    return (XMLString::indexOf(PATH_CHARACTERS, theChar) != -1);
}

inline bool XMLUri::isUnreservedCharacter(const XMLCh theChar)
{
    return (XMLString::isAlphaNum(theChar) ||
            XMLString::indexOf(MARK_CHARACTERS, theChar) != -1);
}

XERCES_CPP_NAMESPACE_END

#endif