src/xmlpatterns/parser/qxslttokenizer_p.h
changeset 0 1918ee327afb
child 4 3b1da2848fc7
equal deleted inserted replaced
-1:000000000000 0:1918ee327afb
       
     1 /****************************************************************************
       
     2 **
       
     3 ** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
       
     4 ** All rights reserved.
       
     5 ** Contact: Nokia Corporation (qt-info@nokia.com)
       
     6 **
       
     7 ** This file is part of the QtXmlPatterns module of the Qt Toolkit.
       
     8 **
       
     9 ** $QT_BEGIN_LICENSE:LGPL$
       
    10 ** No Commercial Usage
       
    11 ** This file contains pre-release code and may not be distributed.
       
    12 ** You may use this file in accordance with the terms and conditions
       
    13 ** contained in the Technology Preview License Agreement accompanying
       
    14 ** this package.
       
    15 **
       
    16 ** GNU Lesser General Public License Usage
       
    17 ** Alternatively, this file may be used under the terms of the GNU Lesser
       
    18 ** General Public License version 2.1 as published by the Free Software
       
    19 ** Foundation and appearing in the file LICENSE.LGPL included in the
       
    20 ** packaging of this file.  Please review the following information to
       
    21 ** ensure the GNU Lesser General Public License version 2.1 requirements
       
    22 ** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
       
    23 **
       
    24 ** In addition, as a special exception, Nokia gives you certain additional
       
    25 ** rights.  These rights are described in the Nokia Qt LGPL Exception
       
    26 ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
       
    27 **
       
    28 ** If you have questions regarding the use of this file, please contact
       
    29 ** Nokia at qt-info@nokia.com.
       
    30 **
       
    31 **
       
    32 **
       
    33 **
       
    34 **
       
    35 **
       
    36 **
       
    37 **
       
    38 ** $QT_END_LICENSE$
       
    39 **
       
    40 ****************************************************************************/
       
    41 
       
    42 //
       
    43 //  W A R N I N G
       
    44 //  -------------
       
    45 //
       
    46 // This file is not part of the Qt API.  It exists purely as an
       
    47 // implementation detail.  This header file may change from version to
       
    48 // version without notice, or even be removed.
       
    49 //
       
    50 // We mean it.
       
    51 
       
    52 #ifndef Patternist_XSLTTokenizer_H
       
    53 #define Patternist_XSLTTokenizer_H
       
    54 
       
    55 #include <QQueue>
       
    56 #include <QStack>
       
    57 #include <QUrl>
       
    58 
       
    59 #include "qmaintainingreader_p.h"
       
    60 #include "qreportcontext_p.h"
       
    61 #include "qtokenizer_p.h"
       
    62 #include "qxslttokenlookup_p.h"
       
    63 
       
    64 QT_BEGIN_HEADER
       
    65 
       
    66 QT_BEGIN_NAMESPACE
       
    67 
       
    68 namespace QPatternist
       
    69 {
       
    70     /**
       
    71      * @short A TokenSource which contains one Tokenizer::Token.
       
    72      *
       
    73      * One possible way to optimize this is to let SingleTokenContainer
       
    74      * actually contain a list of tokens, such that XSLTTokenizer::queueToken()
       
    75      * could append to that, instead of instansiating a SingleTokenContainer
       
    76      * all the time.
       
    77      *
       
    78      * @author Frans Englich <frans.englich@nokia.com>
       
    79      */
       
    80     class SingleTokenContainer : public TokenSource
       
    81     {
       
    82     public:
       
    83         inline SingleTokenContainer(const Tokenizer::Token &token,
       
    84                                     const YYLTYPE &location);
       
    85 
       
    86         virtual Tokenizer::Token nextToken(YYLTYPE *const sourceLocator);
       
    87     private:
       
    88         const Tokenizer::Token m_token;
       
    89         const YYLTYPE          m_location;
       
    90         bool                   m_hasDelivered;
       
    91     };
       
    92 
       
    93     SingleTokenContainer::SingleTokenContainer(const Tokenizer::Token &token,
       
    94                                                const YYLTYPE &location) : m_token(token)
       
    95                                                                         , m_location(location)
       
    96                                                                         , m_hasDelivered(false)
       
    97     {
       
    98     }
       
    99 
       
   100     /**
       
   101      * @short Tokenizes XSL-T 2.0 documents.
       
   102      *
       
   103      * XSLTTokenizer takes in its constructor a pointer to a QIODevice which is
       
   104      * supposed to contain an XSL-T document. XSLTTokenizer then rewrites that
       
   105      * document into XQuery tokens delivered via nextToken(), which the regular
       
   106      * XQuery parser then reads. Hence, the XSL-T language is rewritten into
       
   107      * XQuery code, slightly extended to handle the featuress specific to
       
   108      * XSL-T.
       
   109      *
       
   110      * @author Frans Englich <frans.englich@nokia.com>
       
   111      */
       
   112     class XSLTTokenizer : public Tokenizer
       
   113                         , private MaintainingReader<XSLTTokenLookup>
       
   114     {
       
   115     public:
       
   116         /**
       
   117          * XSLTTokenizer do not own @p queryDevice.
       
   118          */
       
   119         XSLTTokenizer(QIODevice *const queryDevice,
       
   120                       const QUrl &location,
       
   121                       const ReportContext::Ptr &context,
       
   122                       const NamePool::Ptr &np);
       
   123 
       
   124         virtual Token nextToken(YYLTYPE *const sourceLocator);
       
   125 
       
   126         /**
       
   127          * For XSLT we don't need this mechanism, so we do nothing.
       
   128          */
       
   129         virtual int commenceScanOnly();
       
   130 
       
   131         /**
       
   132          * For XSLT we don't need this mechanism, so we do nothing.
       
   133          */
       
   134         virtual void resumeTokenizationFrom(const int position);
       
   135 
       
   136         virtual void setParserContext(const ParserContext::Ptr &parseInfo);
       
   137 
       
   138         virtual QUrl documentURI() const
       
   139         {
       
   140             return queryURI();
       
   141         }
       
   142 
       
   143     protected:
       
   144         virtual bool isAnyAttributeAllowed() const;
       
   145 
       
   146     private:
       
   147         inline void validateElement() const;
       
   148 
       
   149         YYLTYPE currentSourceLocator() const;
       
   150 
       
   151         enum State
       
   152         {
       
   153             OutsideDocumentElement,
       
   154             InsideStylesheetModule,
       
   155             InsideSequenceConstructor
       
   156         };
       
   157 
       
   158         enum VariableType
       
   159         {
       
   160             FunctionParameter,
       
   161             GlobalParameter,
       
   162             TemplateParameter,
       
   163             VariableDeclaration,
       
   164             VariableInstruction,
       
   165             WithParamVariable
       
   166         };
       
   167 
       
   168         void queueNamespaceDeclarations(TokenSource::Queue *const ts,
       
   169                                         QStack<Token> *const target,
       
   170                                         const bool isDeclaration = false);
       
   171 
       
   172         inline void queueToken(const Token &token,
       
   173                                TokenSource::Queue *const ts);
       
   174         void queueEmptySequence(TokenSource::Queue *const to);
       
   175         void queueSequenceType(const QString &expr);
       
   176         /**
       
   177          * If @p emptynessAllowed is @c true, the @c select attribute may
       
   178          * be empty while there also is no sequence constructor.
       
   179          */
       
   180         void queueSimpleContentConstructor(const ReportContext::ErrorCode code,
       
   181                                            const bool emptynessAllowed,
       
   182                                            TokenSource::Queue *const to,
       
   183                                            const bool selectOnlyFirst = false);
       
   184         /**
       
   185          * Tokenizes and queues @p expr as if it was an attribute value
       
   186          * template.
       
   187          */
       
   188         void queueAVT(const QString &expr,
       
   189                       TokenSource::Queue *const to);
       
   190 
       
   191         void hasWrittenExpression(bool &beacon);
       
   192         void commencingExpression(bool &hasWrittenExpression,
       
   193                                   TokenSource::Queue *const to);
       
   194 
       
   195         void outsideDocumentElement();
       
   196         void insideChoose(TokenSource::Queue *const to);
       
   197         void insideFunction();
       
   198 
       
   199         bool attributeYesNo(const QString &localName) const;
       
   200 
       
   201         /**
       
   202          * Scans/skips @c xsl:fallback elements only. This is the case of the
       
   203          * children of @c xsl:sequence, for instance.
       
   204          */
       
   205         void parseFallbacksOnly();
       
   206 
       
   207         /**
       
   208          * Returns true if the current element is either @c stylesheet
       
   209          * or the synonym @c transform.
       
   210          *
       
   211          * This function assumes that m_reader is positioned at an element
       
   212          * and that the namespace is XSL-T.
       
   213          */
       
   214         bool isStylesheetElement() const;
       
   215 
       
   216         /**
       
   217          * Returns true if the current element name is @p name.
       
   218          *
       
   219          * It is assumed that the namespace is XSL-T and that the current
       
   220          * state in m_reader is either QXmlStreamReader::StartElement or
       
   221          * QXmlStreamReader::EndElement.
       
   222          */
       
   223         bool isElement(const NodeName &name) const;
       
   224 
       
   225         /**
       
   226          * Queues a text constructor for @p chars, if @p chars is
       
   227          * not empty.
       
   228          */
       
   229         void queueTextConstructor(QString &chars,
       
   230                                   bool &hasWrittenExpression,
       
   231                                   TokenSource::Queue *const to);
       
   232 
       
   233         /**
       
   234          *
       
   235          * @see <a href="http://www.w3.org/TR/xslt20/#stylesheet-structure">XSL
       
   236          * Transformations (XSLT) Version 2, 3.6 Stylesheet Element</a>
       
   237          */
       
   238         void insideStylesheetModule();
       
   239         void insideTemplate();
       
   240 
       
   241         /**
       
   242          * Takes @p expr for an XPath expression, and pushes the necessary
       
   243          * things for having it delivered as a stream of token, appropriate
       
   244          * for Effective Boolean Value parsing.
       
   245          */
       
   246         void queueExpression(const QString &expr,
       
   247                              TokenSource::Queue *const to,
       
   248                              const bool wrapWithParantheses = true);
       
   249 
       
   250         void skipBodyOfParam(const ReportContext::ErrorCode code);
       
   251 
       
   252         void queueParams(const NodeName parentName,
       
   253                          TokenSource::Queue *const to);
       
   254 
       
   255         /**
       
   256          * Used for @c xsl:apply-templates and @c xsl:call-templates.
       
   257          */
       
   258         void queueWithParams(const NodeName parentName,
       
   259                              TokenSource::Queue *const to,
       
   260                              const bool initialAdvance = true);
       
   261 
       
   262         /**
       
   263          * Queues an @c xsl:variable declaration. If @p isInstruction is @c
       
   264          * true, it is assumed to be a an instruction, otherwise a top-level
       
   265          * declaration element.
       
   266          */
       
   267         void queueVariableDeclaration(const VariableType variableType,
       
   268                                       TokenSource::Queue *const to);
       
   269 
       
   270         /**
       
   271          * Skips the current sub-tree.
       
   272          *
       
   273          * If text nodes that aren't strippable whitespace, or elements are
       
   274          * encountered, @c true is returned, otherwise @c false.
       
   275          *
       
   276          * If @p exitOnContent is @c true, this function exits immediately
       
   277          * if content is encountered for which it would return @c false.
       
   278          */
       
   279         bool skipSubTree(const bool exitOnContent = false);
       
   280 
       
   281         /**
       
   282          * Queues the necessary tokens for the expression that is either
       
   283          * supplied using a @c select attribute or a sequence constructor,
       
   284          * while doing the necessary error handling for ensuring they are
       
   285          * mutually exclusive.
       
   286          *
       
   287          * It is assumed that the current state of m_reader is
       
   288          * QXmlStreamReader::StartElement, or that the attributes for the
       
   289          * element is supplied through @p atts. This function advances m_reader
       
   290          * up until the corresponding QXmlStreamReader::EndElement.
       
   291          *
       
   292          * If @p emptynessAllowed is @c false, the element must either have a
       
   293          * sequence constructor or a @c select attribute. If @c true, both may
       
   294          * be absent.
       
   295          *
       
   296          * Returns @c true if the queued expression was supplied through the
       
   297          * @c select attribute otherwise @c false.
       
   298          */
       
   299         bool queueSelectOrSequenceConstructor(const ReportContext::ErrorCode code,
       
   300                                               const bool emptynessAllowed,
       
   301                                               TokenSource::Queue *const to,
       
   302                                               const QXmlStreamAttributes *const atts = 0,
       
   303                                               const bool queueEmptyOnEmpty = true);
       
   304 
       
   305         /**
       
   306          * If @p initialAdvance is @c true, insideSequenceConstructor() will
       
   307          * advance m_reader, otherwise it won't. Not doing so is useful
       
   308          * when the caller is already inside a sequence constructor.
       
   309          *
       
   310          * Returns @c true if a sequence constructor was found and queued.
       
   311          * Returns @c false if none was found, and the empty sequence was
       
   312          * synthesized.
       
   313          */
       
   314         bool insideSequenceConstructor(TokenSource::Queue *const to,
       
   315                                        const bool initialAdvance = true,
       
   316                                        const bool queueEmptyOnEmpty = true);
       
   317 
       
   318         bool insideSequenceConstructor(TokenSource::Queue *const to,
       
   319                                        QStack<Token> &queueOnExit,
       
   320                                        const bool initialAdvance = true,
       
   321                                        const bool queueEmptyOnEmpty = true);
       
   322 
       
   323         void insideAttributeSet();
       
   324         void pushState(const State nextState);
       
   325         void leaveState();
       
   326 
       
   327         /**
       
   328          * @short Handles @c xml:space and standard attributes.
       
   329          *
       
   330          * If @p isXSLTElement is @c true, the current element is an XSL-T
       
   331          * element, as opposed to a Literal Result Element.
       
   332          *
       
   333          * handleStandardAttributes() must be called before validateElement(),
       
   334          * because the former determines the version in use, and
       
   335          * validateElement() depends on that.
       
   336          *
       
   337          * The core of this function can't be run many times because it pushes
       
   338          * whitespace handling onto m_stripWhitespace.
       
   339          * m_hasHandledStandardAttributes protects helping against this.
       
   340          *
       
   341          * @see validateElement()
       
   342          * @see <a href="http://www.w3.org/TR/xslt20/#standard-attributes">XSL
       
   343          * Transformations (XSLT) Version 2.0, 3.5 Standard Attributes</a>
       
   344          */
       
   345         void handleStandardAttributes(const bool isXSLTElement);
       
   346 
       
   347         /**
       
   348          * @short Sends the tokens in @p source to @p destination.
       
   349          */
       
   350         inline void queueOnExit(QStack<Token> &source,
       
   351                                 TokenSource::Queue *const destination);
       
   352 
       
   353         /**
       
   354          * Handles the @c type and @c validation attribute on instructions and
       
   355          * literal result elements.
       
   356          *
       
   357          * @p isLRE should be true if the current element is not in the XSL-T
       
   358          * namespace, that is if it's a Literal Result Element.
       
   359          *
       
   360          * @see <a href="http://www.w3.org/TR/xslt20/#validation">XSL
       
   361          * Transformations (XSLT) Version 2.0, 19.2 Validation</a>
       
   362          */
       
   363         void handleValidationAttributes(const bool isLRE) const;
       
   364 
       
   365         void unexpectedContent(const ReportContext::ErrorCode code = ReportContext::XTSE0010) const;
       
   366 
       
   367         void checkForParseError() const;
       
   368 
       
   369         inline void startStorageOfCurrent(TokenSource::Queue *const to);
       
   370         inline void endStorageOfCurrent(TokenSource::Queue *const to);
       
   371 
       
   372         /**
       
   373          * Checks that @p attribute has a value in accordance with what
       
   374          * is allowed and supported.
       
   375          */
       
   376         void handleXSLTVersion(TokenSource::Queue *const to,
       
   377                                QStack<Token> *const queueOnExit,
       
   378                                const bool isXSLTElement,
       
   379                                const QXmlStreamAttributes *atts = 0,
       
   380                                const bool generateCode = true,
       
   381                                const bool setGlobalVersion = false);
       
   382 
       
   383         /**
       
   384          * @short Generates code for reflecting @c xml:base attributes.
       
   385          */
       
   386         void handleXMLBase(TokenSource::Queue *const to,
       
   387                            QStack<Token> *const queueOnExit,
       
   388                            const bool isInstruction = true,
       
   389                            const QXmlStreamAttributes *atts = 0);
       
   390 
       
   391         /**
       
   392          * Concatenates text nodes, ignores comments and processing
       
   393          * instructions, and raises errors on everything else.
       
   394          *
       
   395          * Hence, similar to QXmlStreamReader::readElementText(), except
       
   396          * for error handling.
       
   397          */
       
   398         QString readElementText();
       
   399 
       
   400         /**
       
   401          * Tokenizes and validate xsl:sort statements, if any, until
       
   402          * other content is encountered. The produced tokens are returned
       
   403          * in a list.
       
   404          *
       
   405          * If @p oneSortRequired, at least one @c sort element must appear,
       
   406          * otherwise an error is raised.
       
   407          *
       
   408          * If @p speciallyTreatWhitespace whitespace will be treated as if it
       
   409          * was one of the elements mentioned in step 4 in section 4.2 Stripping
       
   410          * Whitespace from the Stylesheet.
       
   411          */
       
   412         void queueSorting(const bool oneSortRequired,
       
   413                           TokenSource::Queue *const to,
       
   414                           const bool speciallyTreatWhitespace = false);
       
   415 
       
   416         static ElementDescription<XSLTTokenLookup>::Hash createElementDescriptions();
       
   417         static QHash<QString, int> createValidationAlternatives();
       
   418         static QSet<NodeName> createStandardAttributes();
       
   419 
       
   420         /**
       
   421          * Reads the attribute by name @p attributeName, and returns @c true if
       
   422          * its value is @p isTrue, @c false if it is @p isFalse, and raise an
       
   423          * error otherwise.
       
   424          */
       
   425         bool readToggleAttribute(const QString &attributeName,
       
   426                                  const QString &isTrue,
       
   427                                  const QString &isFalse,
       
   428                                  const QXmlStreamAttributes *const atts = 0) const;
       
   429 
       
   430         int readAlternativeAttribute(const QHash<QString, int> &alternatives,
       
   431                                      const QXmlStreamAttribute &attr) const;
       
   432 
       
   433         /**
       
   434          * Returns @c true if the current text node can be skipped without
       
   435          * it leading to a validation error, with respect to whitespace.
       
   436          */
       
   437         inline bool whitespaceToSkip() const;
       
   438 
       
   439         const QUrl                                  m_location;
       
   440         const NamePool::Ptr                         m_namePool;
       
   441         QStack<State>                               m_state;
       
   442         TokenSource::Queue                          m_tokenSource;
       
   443 
       
   444         enum ProcessMode
       
   445         {
       
   446             BackwardsCompatible,
       
   447             ForwardCompatible,
       
   448             NormalProcessing
       
   449         };
       
   450 
       
   451         /**
       
   452          * Whether we're processing in Forwards-Compatible or
       
   453          * Backwards-Compatible mode.
       
   454          *
       
   455          * This is set by handleStandardAttributes().
       
   456          *
       
   457          * ParserContext have similar information in
       
   458          * ParserContext::isBackwardsCompat. A big distinction is that both the
       
   459          * tokenizer and the parser buffer tokens and have positions disjoint
       
   460          * to each other. E.g, the state the parser has when reducing into
       
   461          * non-terminals, is different from the tokenizer's.
       
   462          */
       
   463         QStack<ProcessMode>                         m_processingMode;
       
   464 
       
   465         /**
       
   466          * Returns @c true if the current state in m_reader is in the XSLT
       
   467          * namespace. It is assumed that the current state is an element.
       
   468          */
       
   469         inline bool isXSLT() const;
       
   470 
       
   471         const QHash<QString, int>                   m_validationAlternatives;
       
   472 
       
   473         ParserContext::Ptr                          m_parseInfo;
       
   474     };
       
   475 }
       
   476 
       
   477 QT_END_NAMESPACE
       
   478 
       
   479 QT_END_HEADER
       
   480 
       
   481 #endif