--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/messagingappbase/smilengine/xhtml/inc/xhtmlparser.h Thu Dec 17 08:44:11 2009 +0200
@@ -0,0 +1,751 @@
+/*
+* Copyright (c) 2007 Nokia Corporation and/or its subsidiary(-ies).
+* All rights reserved.
+* This component and the accompanying materials are made available
+* under the terms of "Eclipse Public License v1.0"
+* which accompanies this distribution, and is available
+* at the URL "http://www.eclipse.org/legal/epl-v10.html".
+*
+* Initial Contributors:
+* Nokia Corporation - initial contribution.
+*
+* Contributors:
+*
+* Description: xhtmlparser declaration
+*
+*/
+
+
+
+#ifndef XHTMLPARSER_H
+#define XHTMLPARSER_H
+
+#include <e32base.h>
+#include <s32strm.h>
+#include <txtrich.h>
+#include <gmxmlparser.h>
+
+class CXhtmlStackManager;
+class CXhtmlEntityConverter;
+class CMDXMLNode;
+class CXhtmlFontSpecs;
+class CXhtmlHyperLink;
+
+
+/**
+ * Abstract observer interface for notification when XHTML parsing is complete.
+ * It should be implemented by users of CXhtmlParser.
+ *
+ * @code
+ * ?good_class_usage_example(s)
+ * @endcode
+ *
+ * @lib ?library
+ * @since S60 v3.2
+ */
+NONSHARABLE_CLASS( MXhtmlParserObserver )
+ {
+public:
+
+ /**
+ * Call back function used to inform a client of the Parser
+ * when a parsing operation completes.
+ *
+ * @since S60 v3.2
+ */
+ virtual void ParseCompleteL() = 0;
+
+ /**
+ * Call back function used to inform a client of the Parser
+ * about error.
+ *
+ * @since S60 v3.2
+ */
+ virtual void ParseError( TInt aError ) = 0;
+ };
+
+/**
+ * This class forms the basis for the parser.
+ * It has the call back methods that the parser uses to notify
+ * parser events. These methods then adds the apppropriate text &
+ * formatting to the richtext object.
+ * The xhtml renderer owns this richtext object.
+ *
+ * @code
+ * ?good_class_usage_example(s)
+ * @endcode
+ *
+ * @lib ?library
+ * @since S60 v3.2
+ */
+NONSHARABLE_CLASS( CXhtmlParser ) : public CBase, public MMDXMLParserObserver
+ {
+public:
+
+ /**
+ * Parser phase one constructor
+ *
+ * @since S60 v3.2
+ * @param aParserObserver Observer for notification when XHTML parsing is complete
+ * @return Created CXhtmlParser instance
+ */
+ IMPORT_C static CXhtmlParser* NewL( MXhtmlParserObserver* aParserObserver );
+
+ /**
+ * Destructor.
+ */
+ virtual ~CXhtmlParser();
+
+ /**
+ * Creates a DOM structure from a given XHTML file.
+ *
+ * @since S60 v3.2
+ * @param aRFs -
+ * @param aFileToParse -
+ */
+ IMPORT_C void CreateDomL( RFs &aRFs, const TDesC& aFileToParse );
+
+ /**
+ * Creates a DOM structure from a given XHTML file.
+ *
+ * @since S60 v3.2
+ * @param aFileHandleToParse File handle to XHTML file to parse
+ */
+ IMPORT_C void CreateDomL( RFile& aFileHandleToParse );
+
+ /**
+ * Constructs the richtext object from DOM structure.
+ *
+ * @since S60 v3.2
+ * @param aRichText Richtext object where text, style and format is added based on parsed result
+ */
+ IMPORT_C void ParseL( CRichText& aRichText );
+
+ /**
+ * Cancels the operation.
+ *
+ * @since S60 v3.2
+ */
+ IMPORT_C void Cancel();
+
+
+ /**
+ * Sets fonts used when constructing the richtext object.
+ *
+ * @since S60 v3.2
+ * @param aBigFont Big font
+ * @param aDefaultFont Default font
+ * @param aSmallFont Small font
+ * @param aCourierFont Courier font
+ */
+
+ IMPORT_C void SetFonts( const CFont* aBigFont,
+ const CFont* aDefaultFont,
+ const CFont* aSmallFont,
+ const CFont* aCourierFont );
+
+ /**
+ * Sets text color of default font.
+ *
+ * @since S60 v3.2
+ * @param aColor Color
+ */
+ IMPORT_C void SetDefaultTextColor( TRgb aColor );
+
+ /**
+ * Sets parsing mode.
+ *
+ * @since S60 v3.2
+ * @param aPlainText If true, plain text mode is used (no style and format added)
+ * @param aShowUrls
+ */
+ IMPORT_C void SetMode( TBool aPlainText, TBool aShowUrls );
+
+ /**
+ * Gets information (start position, end position and target URL)
+ * of hyperlink by hyperlink index. Index starts from 0.
+ *
+ * @since S60 v3.2
+ * @param aIndex Index of requested hyperlink
+ * @param aStartPos Start position of hyperlink text. Filled by this function
+ * @param aEndPos End position of hyperlink text. Filled by this function
+ * @param aAddress target URL of hyperlink. Filled by this function
+ * @return KErrNone if completed successfully, KErrArgument if index is out of bounds.
+ */
+ IMPORT_C TInt HyperLink( TInt aIndex, TInt& aStartPos, TInt& aEndPos, TPtrC& aAddress );
+
+ /**
+ * Returns a number of found hyperlinks.
+ *
+ * @since S60 v3.2
+ * @return Number of found hyperlinks
+ */
+ IMPORT_C TInt HyperLinkCount();
+
+
+public: // inlines
+
+ /**
+ * Sets a new limit for nested lists. Default value is 5.
+ *
+ * @param Limit value
+ */
+ inline void SetLimitForNestedLists( TInt aLimit );
+
+ /**
+ * Sets a new width for indent. Default width is width of 4 space characters.
+ *
+ * @param Number of space characters used to calculate width of indent
+ */
+ inline void SetIndentWidth( TInt aCharacters );
+
+
+
+protected:
+
+// from base class MMDXMLParserObserver
+
+
+ /**
+ * From MMDXMLParserObserver.
+ * XML parser calls this when parsing is completed (DOM is filled).
+ * XHTML parser uses this DOM to perform its own parsing.
+ *
+ * @since S60 v3.2
+ */
+ void ParseFileCompleteL();
+
+
+private:
+
+ CXhtmlParser( MXhtmlParserObserver* aParserObserver );
+ void ConstructL( );
+
+ /**
+ * Performs XHTML parsing using XML document filled by XML parser.
+ *
+ * @since S60 v3.2
+ * @param aXmlDocument Given XML document
+ */
+ void ParseDomL( CMDXMLDocument* aXmlDocument );
+
+ /**
+ * Handles a (begin) node in DOM document.
+ * Calls corresponding handler based on node type (Element, Text, Comment etc.)
+ *
+ * @since S60 v3.2
+ * @param aNode Node to be handled.
+ * @return True if completed successfully
+ */
+ void OpenNodeL( CMDXMLNode* aNode );
+
+ /**
+ *
+ * Handles a (end) node in DOM document.
+ *
+ * @since S60 v3.2
+ * @param aNode Node to be handled.
+ */
+ void CloseNodeL( CMDXMLNode* aNode );
+
+ /**
+ * Performs some preparations before handling of begin tag.
+ *
+ * @since S60 v3.2
+ */
+ void PrepareForBeginElementL( const TDesC& aName );
+
+ /**
+ * Performs needed operations for each begin tag. (e.g. for <div>, <p> etc.).
+ * Contains main funtionality of parsing logic. (possible attributes are not
+ * handled here, but in AttributevalueL function)
+ *
+ * @since S60 v3.2
+ * @param aName Name of the tag (without angle brackets)
+ */
+ void BeginElementL( const TDesC& aName );
+
+ /**
+ * Performs needed operations for each end tag. (e.g. for </p>, </div> etc.).
+ * Contains main funtionality of parsing logic.
+ *
+ * @since S60 v3.2
+ * @param aName Name of the tag (without angle brackets)
+ */
+ void EndElementL( const TDesC& aName );
+
+ /**
+ * Begins a new style.
+ *
+ * Creates a new StyleInfo item and adds it to the stack. This info is used later when
+ * a new style is applied to richtext and it contains e.g. the start position of text to
+ * be changed by a new style.
+ *
+ * @since S60 v3.2
+ */
+ void BeginStyleL();
+
+ /**
+ * Ends a style
+ *
+ * Applies a new style to richtext object and removes the StyleInfo item from stack.
+ * Note that a new style is applied only if style is changed by function ChangeCurrentStyleL.
+ * In this case also current CharFormatLayer (created in ChangeCurrentStyleL) is removed by
+ * calling CleanCharLayer.
+ *
+ * @since S60 v3.2
+ */
+ void EndStyleL();
+
+ /**
+ * Changes current style.
+ *
+ * Applies "old" style to the richtext object (from iStylePos to current position).
+ * Creates a new CharFormatLayer with new style (based on old layer in iCurrentCharLayer)
+ * and marks the previous StyleInfo item in stack to be changed.
+ * So, because item is marked to be changed, this new style is recognized in EndStyleL and
+ * applied to the richtext object.
+ *
+ * @since S60 v3.2
+ * @param charFormat ?description
+ * @param charFormatMask ?description
+ */
+ void ChangeCurrentStyleL( TCharFormat charFormat, TCharFormatMask charFormatMask );
+
+ /**
+ * Applies current style to richtext object (from iStylePos to end of document)
+ *
+ * @since S60 v3.2
+ */
+ void ApplyStyleL();
+
+ /**
+ * Begins a new paragraph.
+ *
+ * @since S60 v3.2
+ * @param aAlignment Alignment
+ */
+ void BeginParagraphL( CParaFormat::TAlignment aAlignment );
+
+ /**
+ * Ends a paragraph.
+ *
+ * @since S60 v3.2
+ */
+ void EndParagraphL();
+
+ /**
+ * Applies a paragraph format to richtext object.
+ *
+ * @since S60 v3.2
+ * @param aParaStart Start position of the paragraph
+ * @param aAlignment Alignment
+ */
+ void ApplyParagraphL( TInt aParaStart, CParaFormat::TAlignment aAlignment );
+
+ /**
+ * Begins a new list (with <ol> or <ul> tag.)
+ * Begins a new paragraphs and creates a new ListInfo item to stack to store list information.
+ *
+ * @since S60 v3.2
+ * @param aListContext Type of the current list, ordered or unordered
+ */
+ void BeginListL( TInt aListContext );
+
+ /**
+ * Ends a list.
+ * Ends paragraphs and removes ListInfo item from stack.
+ *
+ * NOTE: TELL HERE MORE ABOUT FUNCTIONALITY!!!!!!!!!!!!!!
+ *
+ * @since S60 v3.2
+ */
+ void EndListL();
+
+ /**
+ * NOTE: TELL HERE MORE ABOUT FUNCTIONALITY!!!!!!!!!!!!!!
+ *
+ * @since S60 v3.2
+ * @return ?description
+ */
+ void BeginListItemL();
+
+ /**
+ * Stores the start position of hyperlink to be used later.
+ * This function is called when <a> tag is found.
+ *
+ * @since S60 v3.2
+ * @param aBeginPosition Start position of the hyperlink
+ */
+ void BeginHyperLink( TInt aBeginPosition );
+
+ /**
+ * Creates a new hyperlink item and adds it to hyperlink array.
+ * This item contains begin and end position of hyperlink text and
+ * target URL of the link.
+ * User is later able to get these hyperlinks by function HyperLink.
+ *
+ * This function is called when </a> tag is found.
+ *
+ * @since S60 v3.2
+ * @param aEndPosition End position of the hyperlink
+ */
+ void EndHyperLinkL( TInt aEndPosition );
+
+ /**
+ * This function adds the actual text to richtext object.
+ * Text can be added as such (if preformatting is in use) or
+ * so that extra whitespaces are removed (preformatting not in use)
+ *
+ * @since S60 v3.2
+ * @param aData Text to be added to richtext object
+ */
+ void ContentL( const TDesC& aData );
+
+ /**
+ * Removes extra whitespace characters from a text.
+ *
+ * @since S60 v3.2
+ * @param aData Text to be collapsed down
+ */
+ void CollapseWhiteSpaces( TDes& aData );
+
+ /**
+ * Handles attributes in begin tags.
+ * Only a few attributes are supported.
+ *
+ * @since S60 v3.2
+ * @param aName Name of the attribute
+ * @param aValue Value of the attribute
+ */
+ void AttributeValueL( const TDesC& aName, const TDesC& aValue );
+
+ /**
+ * Sets target URL of current hyperlink.
+ *
+ * @since S60 v3.2
+ * @param aUrl Target URL of hyperlink
+ */
+ void SetReferenceL( const TDesC& aUrl );
+
+ /**
+ * Removes all space characters from the string.
+ *
+ * @since S60 v3.2
+ * @param aString Text to be handled
+ */
+ void RemoveAllSpace( TDes& aString );
+
+ /**
+ * Splits passed attribute string to name and value.
+ *
+ * @since S60 v3.2
+ * @param aString Attribute string
+ * @param aAttributeName Attribute name
+ * @param aAttributeValue Attribute value
+ */
+ void SplitAttribute( TPtrC& aString, TPtrC& aAttributeName,
+ TPtrC& aAttributeValue );
+
+ /**
+ * Parses color value from given string and creates TRgb object based on it.
+ * Supports both numberic format and name format (e.g. red or #ff0000)
+ *
+ * @since S60 v3.2
+ * @param aString String that contains the color value
+ * @param aTransparent The function sets this true if color is transparent
+ * @return TRgb object based on given color
+ */
+ TRgb ParseColor( const TDesC& aString, TBool& aTransparent );
+
+ /**
+ * CDataL
+ *
+ * @since S60 v3.2
+ * @param aData Data
+ */
+ void CdataL( const TDesC& aData );
+
+ /**
+ * Cleans current CharFormatLayer and sets previous layer to active.
+ * This is used to return to old style at the end of current style handling.
+ *
+ * @since S60 v3.2
+ * @return Previous CharFormatLayer
+ */
+ CCharFormatLayer* CleanCharLayer();
+
+ /**
+ * Converts between arabic-indic digits and european digits based on existing language setting.
+ * So it'll convert any digit from the string
+ * to use either european digits or arabic-indic digits based on current settings.
+ *
+ * @since S60 v3.2
+ * @param aFieldString: Data buffer used in conversion.
+ * @param @param aFieldData: Return converted data in this parameter.
+ */
+ void DoNumberConversion( HBufC* aFieldString, TPtrC& aFieldData ) const;
+
+ /**
+ * Sets all internal variables to initial state.
+ *
+ * @since S60 v3.2
+ */
+ void ResetValues();
+
+ /**
+ * Inserts text to the RichText object.
+ *
+ * @since S60 v3.2
+ * @param aData Text to be written
+ */
+ void InsertTextL( const TDesC& aText );
+
+ /**
+ * Inserts character to the RichText object.
+ *
+ * @since S60 v3.2
+ * @param aData Character to be written
+ */
+ void InsertCharacterL( const TChar aText );
+
+ /**
+ * Appends a new paragraph to the RichText object.
+ *
+ * @since S60 v3.2
+ * @param aPlainText tells if plain text is used or not
+ */
+ void AppendParagraphL( TBool aPlainText );
+
+ /**
+ * Inserts line break to the RichText object.
+ *
+ * @since S60 v3.2
+ */
+ void InsertLineBreak();
+
+ /**
+ * Inserts "forced" line break to the RichText object.
+ * This is called when <br> tag is found.
+ *
+ * @since S60 v3.2
+ */
+ void InsertForcedLineBreak();
+
+
+
+private: // data
+
+ /**
+ * Tells if parsing is allowed or not. If not, no text is applied to RichText.
+ */
+ TBool iParsingAllowed;
+
+ /**
+ * If this is true, XHTML is parsed as plain text.
+ * In this case styles and paragraphs are not applied to rendered text.
+ */
+ TBool iPlainText;
+
+ /**
+ * If this is true, URL addresses are shown after links.
+ */
+ TBool iShowUrls;
+
+ /**
+ * Tells if preformatted parsing is in use. (true between tags <pre> and </pre>).
+ * When preformatted parsing is in use, text is added to RichText in same format
+ * as it is written between tags <pre> and </pre>.
+ * For example all whitespaces are added etc.
+ * However, if there is another tags inside <pre> and </pre> tags, they are parsed
+ * normally. (For example styles are changed normally: <p style="color:red">
+ * changes the text color to red also in preformatted text.)
+ *
+ */
+ TInt iPreformatted;
+
+ /**
+ * Current text alignment (left, center or right). Used in paragraphs.
+ * Changed by tag with corresponding style atribute, for example:
+ * <p style="text-align:right">
+ *
+ */
+ CParaFormat::TAlignment iAlignment;
+
+ /**
+ * Start position of the current style. Every time when a new style is added to
+ * richtext object, it is applied from iStylePos to the end of document. After that,
+ * iStylePos is set to point to the end of document.
+ */
+ TInt iStylePos;
+
+ /**
+ * Tells if a new paragraph has already been added to richtext object AND
+ * no characters are added after that. Used to avoid adding of empty paragraphs
+ * For example. if there are several end tags without text
+ * between them and each end tag creates a new paragraph, this generates unnecessary
+ * paragraphs (also shown as extra lines in text) without handling of the situations with this variable.
+ * So, if a new paragraph has already added and no text after that (=this variable is true),
+ * new paragraphs are not added.
+ */
+ TBool iNewParagraph;
+
+ /**
+ * iNewLines
+ */
+ TInt iNewLines;
+
+ /**
+ * iForcedNewLines
+ */
+ TInt iForcedNewLines;
+
+ /**
+ * iSkipWhiteSpaces
+ */
+ TBool iSkipWhiteSpaces;
+
+ /**
+ * Tells is current alignment has changed by style attribute. (e.g. <p style="text-align:right">)
+ */
+ TBool iAlignmentChanged;
+
+ /**
+ * Tells if tag in question is <img>. Alt attributes are handled only in this case.
+ */
+ TBool iImageFound;
+
+ /**
+ * iFirstTextLineAdded
+ */
+ TBool iFirstTextLineAdded;
+
+ /**
+ * Current list context, ordered or unordered list. Used to define how list items are rendered.
+ * (in orderd list with item number and in unordered list with bullets)
+ */
+ TInt iCurrentListContext;
+
+ /**
+ * Index of current level of definition list, tells in which level we are now.
+ * (There might be several nested definition lists, this is used to support for this).
+ * Used to define margins widths (with other same kind of variables) when a new
+ * paragraph is created.
+ */
+ TInt iDefListLevel;
+
+ /**
+ * Index of current level of block quote, tells in which level we are now.
+ *
+ * (In correct XHTML there may NOT be nested block quotes, but this is needed
+ * also for one-level blocquote. Support also nested block-quotes although they are not allowed
+ * in XHTML).
+ * Used to define margins widths (with other same kind of variables) when a new
+ * paragraph is created.
+ */
+ TInt iBlockQuoteLevel;
+
+ /**
+ * Stores target URL address of current hyperlink.
+ */
+ TInt iHyperLinkPos;
+
+ /**
+ * Default text color. Set by user by calling SetTextColor.
+ */
+ TRgb iTextColor;
+
+ /**
+ * Default margin width of ordered and unordered lists.
+ * Set by user by calling SetMargins.
+ */
+ TInt iListMargin;
+
+ /**
+ * Default margin width of definition lists.
+ * Set by user by calling SetMargins.
+ */
+ TInt iDefListMargin;
+
+ /**
+ * Default margin width of block quote.
+ * Set by user by calling SetMargins.
+ */
+ TInt iBlockQuoteMargin;
+
+ /**
+ * Observer instance for notification when XHTML parsing is complete.
+ * Not own.
+ */
+ MXhtmlParserObserver* iObserver;
+
+ /**
+ * XML parser used to parse from XML file to DOM document.
+ * Own.
+ */
+ CMDXMLParser* iXmlParser;
+
+ /**
+ * RichText object that is modified by XHTML parser.
+ * XHTML parser creates a formatted text based on parsed XHTML and adds
+ * the text to RichText object (with styles and paragraphs).
+ * RichText is passed to parser in both ParseL functions.
+ * The xhtml renderer owns this richtext object.
+ * Not own.
+ */
+ CRichText* iRichText;
+
+
+
+ CMDXMLDocument* iXmlDocument;
+
+ /**
+ * Stackmanager for handling of stacks.
+ * Own.
+ */
+ CXhtmlStackManager* iStackManager;
+
+ /**
+ * Stores font-related information
+ * Own.
+ */
+ CXhtmlFontSpecs* iXhtmlFontSpecs;
+
+ /**
+ * current CharFormatLayer that is based on GlobalCharFormatLayer
+ * of Richtext object. New layers are added in function ChangeCurrentStyle
+ * and removed in function CleanCharlayer. This variable tells the "style" that
+ * is applied to text (e.g. color, fontsize etc.)
+ * Own.
+ */
+ CCharFormatLayer* iCurrentCharLayer;
+
+ /**
+ * Array of hyperlink information. Contains information of all found hyperlinks.
+ * Own.
+ */
+ CArrayPtrFlat<CXhtmlHyperLink>* iHyperLinkArray;
+
+ /**
+ * Stores the value of current "href" attribute of <a> tag.
+ * In practice the value contains target URL of the link.
+ * Not own.
+ */
+ HBufC* iHyperLinkAddress;
+
+ /**
+ * The number of space characters used to calculate the width of indent.
+ * Default value is 4 (space characters).
+ */
+ TInt iIndentWidth;
+
+ /**
+ * Maximum number of nested list levels.
+ * Default value is 5
+ */
+ TInt iMaxListLevels;
+ };
+
+#include <xhtmlparser.inl>
+
+#endif