messagingappbase/smilengine/xhtml/inc/xhtmlparser.h
author Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
Wed, 14 Apr 2010 15:53:21 +0300
branchRCL_3
changeset 15 52d61119153d
parent 0 72b543305e3a
permissions -rw-r--r--
Revision: 201013 Kit: 201015

/*
* Copyright (c) 2007 Nokia Corporation and/or its subsidiary(-ies).
* All rights reserved.
* This component and the accompanying materials are made available
* under the terms of "Eclipse Public License v1.0"
* which accompanies this distribution, and is available
* at the URL "http://www.eclipse.org/legal/epl-v10.html".
*
* Initial Contributors:
* Nokia Corporation - initial contribution.
*
* Contributors:
*
* Description:  xhtmlparser  declaration
*
*/



#ifndef XHTMLPARSER_H
#define XHTMLPARSER_H

#include <e32base.h>
#include <s32strm.h>
#include <txtrich.h>
#include <gmxmlparser.h>

class CXhtmlStackManager;
class CXhtmlEntityConverter;
class CMDXMLNode;
class CXhtmlFontSpecs;
class CXhtmlHyperLink;


/**
 *  Abstract observer interface for notification when XHTML parsing is complete.
 *  It should be implemented by users of CXhtmlParser. 
 *
 *  @code
 *   ?good_class_usage_example(s)
 *  @endcode
 *
 *  @lib ?library
 *  @since S60 v3.2
 */
NONSHARABLE_CLASS( MXhtmlParserObserver )
	{
public:

    /**
     * Call back function used to inform a client of the Parser
     * when a parsing operation completes.
     *
     * @since S60 v3.2
     */
	virtual void ParseCompleteL() = 0;
	
	/**
     * Call back function used to inform a client of the Parser
     * about error.
     *
     * @since S60 v3.2
     */
	virtual void ParseError( TInt aError ) = 0;
	};

/**
 *	This class forms the basis for the parser.
 *  It has the call back methods that the parser uses to notify
 *  parser events. These methods then adds the apppropriate text &
 *  formatting to the richtext object.
 *  The xhtml renderer owns this richtext object.  
 *
 *  @code
 *   ?good_class_usage_example(s)
 *  @endcode
 *
 *  @lib ?library
 *  @since S60 v3.2
 */ 
NONSHARABLE_CLASS( CXhtmlParser ) : public CBase, public MMDXMLParserObserver
	{	
public:

    /**
     * Parser phase one constructor
     *
     * @since S60 v3.2
     * @param aParserObserver Observer for notification when XHTML parsing is complete
     * @return Created CXhtmlParser instance
     */	
	IMPORT_C static CXhtmlParser* NewL( MXhtmlParserObserver* aParserObserver );	

    /**
    * Destructor.
    */
    virtual ~CXhtmlParser();
	
    /**
     * Creates a DOM structure from a given XHTML file.
     *
     * @since S60 v3.2
     * @param aRFs -
     * @param aFileToParse -
     */	
	IMPORT_C void CreateDomL( RFs &aRFs, const TDesC& aFileToParse );
				
    /**
     * Creates a DOM structure from a given XHTML file.
     *
     * @since S60 v3.2
     * @param aFileHandleToParse File handle to XHTML file to parse
     */	
	IMPORT_C void CreateDomL( RFile& aFileHandleToParse );
	
	/**
     * Constructs the richtext object from DOM structure.
     *
     * @since S60 v3.2
     * @param aRichText Richtext object where text, style and format is added based on parsed result
     */	
	IMPORT_C void ParseL( CRichText& aRichText );

	/**
     * Cancels the operation.
     *
     * @since S60 v3.2
     */	
    IMPORT_C void Cancel();


    /**
     * Sets fonts used when constructing the richtext object.
     * 
     * @since S60 v3.2
     * @param aBigFont Big font
     * @param aDefaultFont Default font
     * @param aSmallFont Small font
     * @param aCourierFont Courier font
     */	
     
	IMPORT_C void SetFonts( const CFont* aBigFont,
	                        const CFont* aDefaultFont,
	                        const CFont* aSmallFont,
	                        const CFont* aCourierFont );

    /**
     * Sets text color of default font.
     *
     * @since S60 v3.2
     * @param aColor Color
     */	
	IMPORT_C void SetDefaultTextColor( TRgb aColor );
	
	/**
     * Sets parsing mode.
     *
     * @since S60 v3.2
     * @param aPlainText If true, plain text mode is used (no style and format added) 
     * @param aShowUrls 
     */	
	IMPORT_C void SetMode( TBool aPlainText, TBool aShowUrls );

    /**
     * Gets information (start position, end position and target URL)
     * of hyperlink by hyperlink index. Index starts from 0. 
     *
     * @since S60 v3.2
     * @param aIndex Index of requested hyperlink
     * @param aStartPos Start position of hyperlink text. Filled by this function
     * @param aEndPos End position of hyperlink text. Filled by this function
     * @param aAddress target URL of hyperlink. Filled by this function
     * @return KErrNone if completed successfully, KErrArgument if index is out of bounds.
     */	
	IMPORT_C TInt HyperLink( TInt aIndex, TInt& aStartPos, TInt& aEndPos, TPtrC& aAddress );
     
    /**
     * Returns a number of found hyperlinks. 
     *
     * @since S60 v3.2
     * @return Number of found hyperlinks
     */	
	IMPORT_C TInt HyperLinkCount();
	
	
public: // inlines

    /**
     * Sets a new limit for nested lists. Default value is 5. 
     *
     * @param Limit value
     */
    inline void SetLimitForNestedLists( TInt aLimit );    

    /**
     * Sets a new width for indent. Default width is width of 4 space characters. 
     *
     * @param Number of space characters used to calculate width of indent
     */
    inline void SetIndentWidth( TInt aCharacters );    


    
protected:

// from base class MMDXMLParserObserver


    /**
     * From MMDXMLParserObserver.
     * XML parser calls this when parsing is completed (DOM is filled).
     * XHTML parser uses this DOM to perform its own parsing.
     *
     * @since S60 v3.2
     */
    void ParseFileCompleteL();
    
				
private:

	CXhtmlParser( MXhtmlParserObserver* aParserObserver );
	void ConstructL( );
	
    /**
     * Performs XHTML parsing using XML document filled by XML parser.
     *
     * @since S60 v3.2
     * @param aXmlDocument Given XML document
     */	
	void ParseDomL( CMDXMLDocument* aXmlDocument );

    /**
     * Handles a (begin) node in DOM document.
     * Calls corresponding handler based on node type (Element, Text, Comment etc.) 
     *
     * @since S60 v3.2
     * @param aNode Node to be handled.
     * @return True if completed successfully
     */	
	void OpenNodeL( CMDXMLNode* aNode );

    /**
     * 
     * Handles a (end) node in DOM document.
     *
     * @since S60 v3.2
     * @param aNode Node to be handled.
     */	
	void CloseNodeL( CMDXMLNode* aNode );

    /**
     * Performs some preparations before handling of begin tag.
     *
     * @since S60 v3.2
     */	
	void PrepareForBeginElementL( const TDesC& aName );

    /**
     * Performs needed operations for each begin tag. (e.g. for <div>, <p> etc.).
     * Contains main funtionality of parsing logic. (possible attributes are not
     * handled here, but in AttributevalueL function)
     *
     * @since S60 v3.2
     * @param aName Name of the tag (without angle brackets)
     */	
	void BeginElementL( const TDesC& aName );

    /**
     * Performs needed operations for each end tag. (e.g. for  </p>, </div> etc.).
     * Contains main funtionality of parsing logic.
     *
     * @since S60 v3.2
     * @param aName Name of the tag (without angle brackets)
     */	
	void EndElementL( const TDesC& aName );

   /**
     * Begins a new style. 
     * 
     * Creates a new StyleInfo item and adds it to the stack. This info is used later when
     * a new style is applied to richtext and it contains e.g. the start position of text to
     * be changed by a new style.
     *
     * @since S60 v3.2
     */	
	void BeginStyleL();

    /**
     * Ends a style
     *
     * Applies a new style to richtext object and removes the StyleInfo item from stack.
     * Note that a new style is applied only if style is changed by function ChangeCurrentStyleL.
     * In this case also current CharFormatLayer (created in ChangeCurrentStyleL) is removed by
     * calling CleanCharLayer.
     *
     * @since S60 v3.2
     */	
	void EndStyleL();

    /**
     * Changes current style.
     *
     * Applies "old" style to the richtext object (from iStylePos to current position).
     * Creates a new CharFormatLayer with new style (based on old layer in iCurrentCharLayer)
     * and marks the previous StyleInfo item in stack to be changed.
     * So, because item is marked to be changed, this new style is recognized in EndStyleL and
     * applied to the richtext object.  
     *
     * @since S60 v3.2
     * @param charFormat ?description
     * @param charFormatMask ?description
     */	
	void ChangeCurrentStyleL( TCharFormat charFormat, TCharFormatMask charFormatMask );

    /**
     * Applies current style to richtext object (from iStylePos to end of document)
     *
     * @since S60 v3.2
     */	
	void ApplyStyleL();

    /**
     * Begins a new paragraph. 
     *
     * @since S60 v3.2
     * @param aAlignment Alignment
     */	
	void BeginParagraphL( CParaFormat::TAlignment aAlignment );

    /**
     * Ends a paragraph.
     *
     * @since S60 v3.2
     */	
	void EndParagraphL();

    /**
     * Applies a paragraph format to richtext object.
     *
     * @since S60 v3.2
     * @param aParaStart Start position of the paragraph
     * @param aAlignment Alignment
     */	
	void ApplyParagraphL( TInt aParaStart, CParaFormat::TAlignment aAlignment );

    /**
     * Begins a new list (with <ol> or <ul> tag.)
     * Begins a new paragraphs and creates a new ListInfo item to stack to store list information.
     * 
     * @since S60 v3.2
     * @param aListContext Type of the current list, ordered or unordered
     */	
	void BeginListL( TInt aListContext );

    /**
     * Ends a list.
     * Ends paragraphs and removes ListInfo item from stack.
     *
     * NOTE: TELL HERE MORE ABOUT FUNCTIONALITY!!!!!!!!!!!!!!
     *
     * @since S60 v3.2
     */	
	void EndListL();

    /**
     * NOTE: TELL HERE MORE ABOUT FUNCTIONALITY!!!!!!!!!!!!!!
     *
     * @since S60 v3.2
     * @return ?description
     */	
	void BeginListItemL();

    /**
     * Stores the start position of hyperlink to be used later.
     * This function is called when <a> tag is found.
     *
     * @since S60 v3.2
     * @param aBeginPosition Start position of the hyperlink
     */	
	void BeginHyperLink( TInt aBeginPosition );

    /**
     * Creates a new hyperlink item and adds it to hyperlink array.
     * This item contains begin and end position of hyperlink text and
     * target URL of the link.
     * User is later able to get these hyperlinks by function HyperLink.
     *
     * This function is called when </a> tag is found.
     *
     * @since S60 v3.2
     * @param aEndPosition End position of the hyperlink
     */	
	void EndHyperLinkL( TInt aEndPosition );

    /**
     * This function adds the actual text to richtext object.
     * Text can be added as such (if preformatting is in use) or
     * so that extra whitespaces are removed (preformatting not in use)
     *
     * @since S60 v3.2
     * @param aData Text to be added to richtext object
     */	
	void ContentL( const TDesC& aData );

    /**
     * Removes extra whitespace characters from a text. 
     *
     * @since S60 v3.2
     * @param aData Text to be collapsed down 
     */	
	void CollapseWhiteSpaces( TDes& aData );

    /**
     * Handles attributes in begin tags.
     * Only a few attributes are supported. 
     *
     * @since S60 v3.2
     * @param aName Name of the attribute
     * @param aValue Value of the attribute
     */	
	void AttributeValueL( const TDesC& aName, const TDesC& aValue );

    /**
     * Sets target URL of current hyperlink.
     *
     * @since S60 v3.2
     * @param aUrl Target URL of hyperlink
     */	
	void SetReferenceL( const TDesC& aUrl );

    /**
     * Removes all space characters from the string. 
     *
     * @since S60 v3.2
     * @param aString Text to be handled
     */	
	void RemoveAllSpace( TDes& aString );

    /**
     * Splits passed attribute string to name and value.
     *
     * @since S60 v3.2
     * @param aString Attribute string
     * @param aAttributeName Attribute name
     * @param aAttributeValue Attribute value
     */	
	void SplitAttribute( TPtrC& aString, TPtrC& aAttributeName, 
		                    TPtrC& aAttributeValue );
      
    /**
     * Parses color value from given string and creates TRgb object based on it.
     * Supports both numberic format and name format (e.g. red or #ff0000) 
     *
     * @since S60 v3.2
     * @param aString String that contains the color value
     * @param aTransparent The function sets this true if color is transparent
     * @return TRgb object based on given color
     */	
	TRgb ParseColor( const TDesC& aString, TBool& aTransparent );
	
    /**
     * CDataL
     *
     * @since S60 v3.2
     * @param aData Data
     */	
	void CdataL( const TDesC& aData );

    /**
     * Cleans current CharFormatLayer and sets previous layer to active.
     * This is used to return to old style at the end of current style handling.
     *
     * @since S60 v3.2
     * @return Previous CharFormatLayer
     */	
	CCharFormatLayer* CleanCharLayer();

    /**
     * Converts between arabic-indic digits and european digits based on existing language setting.
     * So it'll convert any digit from the string
     * to use either european digits or arabic-indic digits based on current settings.
     *
     * @since S60 v3.2
     * @param aFieldString: Data buffer used in conversion.
     * @param @param aFieldData: Return converted data in this parameter.
     */	
	void DoNumberConversion( HBufC* aFieldString, TPtrC& aFieldData ) const;

    /**
     * Sets all internal variables to initial state. 
     *
     * @since S60 v3.2
     */	
	void ResetValues();
	
	/**
     * Inserts text to the RichText object. 
     *
     * @since S60 v3.2
     * @param aData Text to be written
     */	
	void InsertTextL( const TDesC& aText );
	
	/**
     * Inserts character to the RichText object. 
     *
     * @since S60 v3.2
     * @param aData Character to be written
     */	
    void InsertCharacterL( const TChar aText );
    
    /**
     * Appends a new paragraph to the RichText object. 
     *
     * @since S60 v3.2
     * @param aPlainText tells if plain text is used or not
     */	
    void AppendParagraphL( TBool aPlainText );
    
    /**
     * Inserts line break to the RichText object. 
     *
     * @since S60 v3.2
     */	
    void InsertLineBreak();
    
    /**
     * Inserts "forced" line break to the RichText object. 
     * This is called when <br> tag is found.
     *
     * @since S60 v3.2
     */	
    void InsertForcedLineBreak();
  
	                        
                           
private: // data
	
	/**
     * Tells if parsing is allowed or not. If not, no text is applied to RichText.
     */
	TBool iParsingAllowed;
	
	/**
     * If this is true, XHTML is parsed as plain text. 
     * In this case styles and paragraphs are not applied to rendered text.
     */
	TBool iPlainText;
	
	/**
     * If this is true, URL addresses are shown after links.  
     */
	TBool iShowUrls;
	
	/**
     * Tells if preformatted parsing is in use. (true between tags <pre> and </pre>).
     * When preformatted parsing is in use, text is added to RichText in same format
     * as it is written between tags <pre> and </pre>.
     * For example all whitespaces are added etc.
     * However, if there is another tags inside <pre> and </pre> tags, they are parsed
     * normally. (For example styles are changed normally: <p style="color:red">
     * changes the text color to red also in preformatted text.)
     *  
     */
	TInt iPreformatted;
	
	/**
     * Current text alignment (left, center or right). Used in paragraphs.
     * Changed by tag with corresponding style atribute, for example:
     * <p style="text-align:right">
     *
     */
	CParaFormat::TAlignment iAlignment;
	
	/**
     * Start position of the current style. Every time when a new style is added to
     * richtext object, it is applied from iStylePos to the end of document. After that,
     * iStylePos is set to point to the end of document. 
     */
	TInt iStylePos;
	
	/**
     * Tells if a new paragraph has already been added to richtext object AND
     * no characters are added after that. Used to avoid adding of empty paragraphs
     * For example. if there are several end tags without text
     * between them and each end tag creates a new paragraph, this generates unnecessary
     * paragraphs (also shown as extra lines in text) without handling of the situations with this variable.
     * So, if a new paragraph has already added and no text after that (=this variable is true),
     * new paragraphs are not added.  
     */
	TBool iNewParagraph;
	
	/**
     * iNewLines
     */
	TInt iNewLines;
	
	/**
     * iForcedNewLines
     */
	TInt iForcedNewLines;
	
	/**
     * iSkipWhiteSpaces 
     */
	TBool iSkipWhiteSpaces;
	
	/**
     * Tells is current alignment has changed by style attribute. (e.g. <p style="text-align:right">) 
     */
	TBool iAlignmentChanged;
	
    /**
     * Tells if tag in question is <img>. Alt attributes are handled only in this case. 
     */
	TBool iImageFound;
	
	/**
     * iFirstTextLineAdded 
     */
	TBool iFirstTextLineAdded;

	/**
     * Current list context, ordered or unordered list. Used to define how list items are rendered.
     * (in orderd list with item number and in unordered list with bullets)
     */
	TInt iCurrentListContext;

	/**
     * Index of current level of definition list, tells in which level we are now.
     * (There might be several nested definition lists, this is used to support for this). 
     * Used to define margins widths (with other same kind of variables) when a new
     * paragraph is created.
     */
	TInt iDefListLevel;

	/**
     * Index of current level of block quote, tells in which level we are now.
     * 
     * (In correct XHTML there may NOT be nested block quotes, but this is needed
     * also for one-level blocquote. Support also nested block-quotes although they are not allowed
     * in XHTML).
     * Used to define margins widths (with other same kind of variables) when a new
     * paragraph is created.
     */
	TInt iBlockQuoteLevel;

	/**
     * Stores target URL address of current hyperlink.
     */
	TInt iHyperLinkPos;
	
	/**
     * Default text color. Set by user by calling SetTextColor.
     */
	TRgb iTextColor;

	/**
     * Default margin width of ordered and unordered lists.
     * Set by user by calling SetMargins.
     */
	TInt iListMargin;

	/**
     * Default margin width of definition lists.
     * Set by user by calling SetMargins.
     */
	TInt iDefListMargin;

	/**
     * Default margin width of block quote.
     * Set by user by calling SetMargins.
     */
	TInt iBlockQuoteMargin;

	/**
     * Observer instance for notification when XHTML parsing is complete. 
     * Not own.
     */
	MXhtmlParserObserver* iObserver;
	
	/**
     * XML parser used to parse from XML file to DOM document.
     * Own.
     */
	CMDXMLParser* iXmlParser;
	
	/**
     * RichText object that is modified by XHTML parser.
     * XHTML parser creates a formatted text based on parsed XHTML and adds
     * the text to RichText object (with styles and paragraphs).
     * RichText is passed to parser in both ParseL functions.
     * The xhtml renderer owns this richtext object. 
     * Not own.
     */
    CRichText* iRichText;
    
    
    
    CMDXMLDocument* iXmlDocument;
	
	/**
     * Stackmanager for handling of stacks.
     * Own.
     */
	CXhtmlStackManager* iStackManager;	
	
	/**
     * Stores font-related information
     * Own.
     */
	CXhtmlFontSpecs* iXhtmlFontSpecs;
	
	/**
     * current CharFormatLayer that is based on GlobalCharFormatLayer
     * of Richtext object. New layers are added in function ChangeCurrentStyle
     * and removed in function CleanCharlayer. This variable tells the "style" that
     * is applied to text (e.g. color, fontsize etc.) 
     * Own.
     */
	CCharFormatLayer* iCurrentCharLayer;
	
	/**
     * Array of hyperlink information. Contains information of all found hyperlinks.
     * Own.
     */
	CArrayPtrFlat<CXhtmlHyperLink>* iHyperLinkArray;
	
	/**
     * Stores the value of current "href" attribute of <a> tag.
     * In practice the value contains target URL of the link.
     * Not own.
     */
	HBufC* iHyperLinkAddress;
	
	/**
     * The number of space characters used to calculate the width of indent.
     * Default value is 4 (space characters).
     */
	TInt iIndentWidth;
	
	/**
     * Maximum number of nested list levels.
     * Default value is 5 
     */
	TInt iMaxListLevels;
	};
	
#include <xhtmlparser.inl>

#endif