email/mail/PluginSrc/MailPlainView/MsgMailViewerHtmlConv.cpp
author Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
Thu, 17 Dec 2009 08:44:11 +0200
changeset 0 72b543305e3a
permissions -rw-r--r--
Revision: 200949 Kit: 200951

/*
* Copyright (c) 2002 Nokia Corporation and/or its subsidiary(-ies).
* All rights reserved.
* This component and the accompanying materials are made available
* under the terms of "Eclipse Public License v1.0"
* which accompanies this distribution, and is available
* at the URL "http://www.eclipse.org/legal/epl-v10.html".
*
* Initial Contributors:
* Nokia Corporation - initial contribution.
*
* Contributors:
*
* Description: 
*     An HTML stripper
*
*/


// INCLUDE FILES
#include "MsgMailViewerHtmlConv.h"

#include <barsread.h>
#include <eikenv.h>
#include <f32file.h>
#include <aknnotewrappers.h>
#include <charconv.h>
#include <MailPlainView.rsg>
#include <MsgMailViewer.rsg>
#include "MailLog.h"



// LOCAL CONSTANTS AND MACROS
_LIT (KLineBreak, "<br>");
_LIT (KUnnumberedList, "<ul>");
_LIT (KUnnumberedListEnd, "</ul>");
_LIT (KOrderedList, "<ol>");
_LIT (KOrderedListEnd, "</ol>");
_LIT (KListItem, "<li>");
_LIT (KLessThan, "<");
_LIT (KGreaterThan, ">");
_LIT (KAmpersand, "&");
_LIT (KDefinitionList, "<dl>");
_LIT (KDefinitionListEnd, "</dl>");
_LIT (KDefinitionListTerm, "<dt>");
_LIT (KDefinitionListDef, "<dd>");
//_LIT (KDefinitionListDefEnd, "</dd>");
_LIT (KBodyTag, "<body>");
_LIT (KScript, "<script");
_LIT (KScriptEnd, "</script>");
_LIT (KNumberedItemSeparator, ". ");
_LIT (KNewParagraph, "<p>");
_LIT (KHeadStart, "<head>");
_LIT (KHeadEnd, "</head>");
_LIT (KHeading, "<h");
_LIT (KHeadingEnd, "</h");
_LIT (KStartHtml, "<html>");
_LIT (KEndHtml, "</html>");
_LIT (KHorizontalRuler, "<hr>");
_LIT (KImage, "<img");
_LIT (KImageAlt, "alt");
_LIT (KImageSrc, "src");
_LIT (KSpace, " ");
_LIT (KNewline, "\n");
_LIT (KListItemSymbol, "- ");
_LIT (KSemicolon, ";");
//_LIT (KQuotationMark, "\"");
_LIT (KBlock, "<div>");
_LIT (KBlockEnd, "</div>");
_LIT(KCharSet, "charset=");
_LIT( KCommentTagEnd, "-->");
_LIT( KOlId, "olid" );

const TInt KMaxOrdinalLength(5);
// "10000 is a reasonable size for a converted block, it has been
// tested that bigger size doesn't provide any faster conversion"
// Blocksize has been lowered to 1000 to make the UI more
// responsive during processing. Based on tests, the performance 
// is not noticeably changed by the smaller block size.
//
const TInt KMaxConvBlockLength( 1000 );

const TInt KSampleBufferSize = 256;
const TInt KMimimumConfidenceLevel = 50;
const TInt KMaxAmountOfSamples = KSampleBufferSize / 2;

enum TConversionState {
    EDeleteHeader = 1,
    EInitCharConverter,
    EConvert,
    ERemoveTags};

enum TStatusFlags {
    EForceUseCharacterSet = KBit0,
    EOwnsFileNameArray = KBit1
    };

// MODULE DATA STRUCTURES


// ================= MEMBER FUNCTIONS =======================

CStringPair::CStringPair()
    {
    }

CStringPair::~CStringPair()
    {
    delete iName;
    delete iValue;
    }

CStringPair* CStringPair::NewLC(const TDesC& aName,const TDesC& aValue)
    {
    CStringPair* temp = new(ELeave) CStringPair();
    CleanupStack::PushL(temp);
    temp->ConstructL(aName, aValue);
    return temp;
    }

void CStringPair::ConstructL(const TDesC& aName,const TDesC& aValue)
    {
    iName = aName.AllocL();
    iValue = aValue.AllocL();
    }

// C++ default constructor cannot contain any code that might leave
CMsgMailViewerHtmlConv::CMsgMailViewerHtmlConv()
    : CActive( CActive::EPriorityLow ),
    iPos(0), 
    iReturnValue(KErrNone),
    iErrorPos(KMaxTInt),
    iConversionState(EDeleteHeader),
    iCharacterSetId(0),
    iStatusFlags(EOwnsFileNameArray)
    {
    CActiveScheduler::Add( this );
    }

// Symbian OS default constructor can leave.
void CMsgMailViewerHtmlConv::ConstructL(RFile& aHandle,
    TUint aCharSet)
    {
    iFileIdArray = new(ELeave) RPointerArray<HBufC>;
	LOG1("CMsgMailViewerHtmlConv::ConstructL:%08x", aCharSet);
    iCharacterSetId = aCharSet;

    // read HTML content
    TInt size;
    User::LeaveIfError(aHandle.Size(size));
    HBufC8 *buf = HBufC8::NewLC(size);
    TPtr8 ptr(buf->Des());
    User::LeaveIfError(aHandle.Read(ptr));

    iOrigText = HBufC::NewL(buf->Length());
    TPtr ptr16(iOrigText->Des());
    ptr16.Copy(ptr);
    CleanupStack::PopAndDestroy(); // buf
    }


// Two-phased constructor.
CMsgMailViewerHtmlConv* CMsgMailViewerHtmlConv::NewLC(
	RFile& aHandle, TUint aCharset, TBool aForced)
	{
    CMsgMailViewerHtmlConv* self = new (ELeave) CMsgMailViewerHtmlConv();

    CleanupStack::PushL( self );
    if (aForced)
    	{
    	self->iStatusFlags |= EForceUseCharacterSet;
    	}
    self->ConstructL(aHandle, aCharset);

    return self;
	}

CMsgMailViewerHtmlConv* CMsgMailViewerHtmlConv::NewL(
    RFile& aHandle, TUint aCharset, TBool aForced)
    {
    CMsgMailViewerHtmlConv* self = 
        CMsgMailViewerHtmlConv::NewLC( aHandle, aCharset, aForced );
    CleanupStack::Pop( self );
    return self;    
    }
    
// Destructor
CMsgMailViewerHtmlConv::~CMsgMailViewerHtmlConv()
    {
    Cancel();
    delete iOrigText;
    delete iCharSetName;
    if (iMacros)
        {
        iMacros->ResetAndDestroy();
        }
    delete iMacros;

    if (iFileIdArray && ( iStatusFlags & EOwnsFileNameArray ) )
        {
        iFileIdArray->ResetAndDestroy();
        delete iFileIdArray;
        }
    delete iCharConv;
    }

TInt CMsgMailViewerHtmlConv::Convert()
    {
    TRAP( iReturnValue, DoConversionL() );
    LOG1( "CMsgMailViewerHtmlConv::Convert return:%d", iReturnValue );
    return iReturnValue;
    }

void CMsgMailViewerHtmlConv::Convert( TRequestStatus* aStatus )
    {
    LOG( "CMsgMailViewerHtmlConv::Convert (Async)");
    ASSERT( aStatus );
    Cancel();
    iReqStatus = aStatus;
    *aStatus = KRequestPending;
    ContinueAsyncConvert();
    }

void CMsgMailViewerHtmlConv::DoConvertL()
    {
    if ( !iCharConv )
        {
        //missing charconv is considered to be a
        //corrupt message situtation, the html part
        //will be available in attachment view
        ShowErrorL();
        User::Leave( KErrCorrupt );
        }

    // Conversion "cursor" location, initially start of descriptor
    TInt toConvertStart( 0 );
    // Length of text to still convert
    TInt toConvertLength( iOrigText->Length() );
    // Conversion is done in blocks to control heap consumption,
    // size of a block is always <= KMaxConvBlockLength
    TInt blockLength( Min( toConvertLength, KMaxConvBlockLength ) );
    // Initially no block is partially converted -> remainder == 0
    TInt blockRemainder( 0 );
    // Converter's state variable, mustn't be tampered with during conversion
    TInt state( CCnvCharacterSetConverter::KStateDefault );

    // 8bit source buffer and 16bit target buffer for a block. We use
    // first block's size as buffer size, since the following blocks
    // are either of same size or smaller
    HBufC8* buf8 = HBufC8::NewLC( blockLength );
    HBufC* buf = HBufC::NewLC( blockLength );

    // Pointers to the source and target texts.
    TPtr origPtr( iOrigText->Des() );
    TPtr8 srcPtr( buf8->Des() );
    TPtr dstPtr( buf->Des() );

    while ( toConvertLength > 0 ) // conversion loop
        {
        // Create a block for this conversion round
        blockLength = Min( toConvertLength, KMaxConvBlockLength );
        srcPtr.Copy( iOrigText->Mid( toConvertStart, blockLength ) );

        // Convert a block, return number of characters that weren't converted
        blockRemainder = iCharConv->ConvertToUnicode( dstPtr, srcPtr, state );
        // Handle possible errors
        if ( blockRemainder < 0 )
            {
            iPos = toConvertStart;
            ShowErrorL();
            break;
            }

        // Store the contents of the output buffer
        // Here unicodeLength may be less than convertedLength, e.g. in
        // case of "&auml;" ->"ä", and that must be taken into account
        TInt convertedLength = srcPtr.Length() - blockRemainder;
        TInt unicodeLength = dstPtr.Length();
        // Replace convertedLength characters with unicodeLength characters
        origPtr.Replace( toConvertStart, convertedLength, dstPtr );
        // Update progress status, move starting point to next
        // unconverted character
        toConvertStart += unicodeLength;
        toConvertLength = iOrigText->Length() - toConvertStart;
        }

    CleanupStack::PopAndDestroy( 2 ); // buf8, buf
    }

void CMsgMailViewerHtmlConv::DoConversionL()
    {
    switch (iConversionState)
        {
        case EDeleteHeader:
            {
            LOG("EDeleteHeader");

            // Try to find charset name if not set already
            if ( !(iStatusFlags & EForceUseCharacterSet) )
            	{
	            TInt charSetPos(iOrigText->FindF(KCharSet()));
	            if ( charSetPos != KErrNotFound)
	                {
                	// Begin of charset name
                	charSetPos += KCharSet().Length();
	                TInt maxIdLength(charSetPos+20); // Give up if id end not found at this point.
	                for (TInt i = charSetPos; i<maxIdLength; ++i)
	                    {
	                    if((*iOrigText)[i] == '"')
	                        {
	                        iCharSetName = iOrigText->Mid(
	                            charSetPos, i-charSetPos).AllocL();
	                        break;
	                        }
	                    }
	                }
            	}

            // 5 for <body
            TInt bodyPos(iOrigText->FindF(KBodyTag().Left(5)));
            if ( bodyPos != KErrNotFound)
                {
                // Delete from start to body tag
                Delete(bodyPos);
                // Delete body tag
                RemoveTagL();
                }
            else // no body tag, invalid HTML, search for <html> then
                {
                bodyPos = iOrigText->FindF(KStartHtml().Left(5));
                if (bodyPos != KErrNotFound)
                    {
                    Delete(bodyPos);
                    RemoveTagL();
                    }
                }

            iConversionState++;
            break;
            }

        case EInitCharConverter:
            {
            RFs fs = CEikonEnv::Static()->FsSession();
            iCharConv = CCnvCharacterSetConverter::NewL();
            // Try to find character set id from the HTML header
            if ( iCharSetName && iCharSetName->Length() )
                {
                HBufC8* buf8 = HBufC8::NewLC( iCharSetName->Length() );
                    buf8->Des().Copy( *iCharSetName );

                iCharacterSetId = iCharConv->
                    ConvertStandardNameOfCharacterSetToIdentifierL(
                    *buf8, fs);

                CleanupStack::PopAndDestroy(); // buf8
                }

            if (iCharacterSetId > 0)
                {
                LOG1("CMsgMailViewerHtmlConv::EInitCharConverter:%08x",
                	iCharacterSetId);
                const TInt ret(iCharConv->PrepareToConvertToOrFromL(
                    iCharacterSetId,
                    fs));

                if (ret == CCnvCharacterSetConverter::ENotAvailable)
                    {
					LOG( "CMsgMailViewerHtmlConv::EInitCharConverter: ENotAvailable" );
                    iCharacterSetId = 0;
                    delete iCharConv;
                    iCharConv = NULL;
                    }

                }
            else
                {
                //regular method of getting charset id failed.
                //Try autodetect instead.

                TInt confidence=0;
                const CArrayFix<CCnvCharacterSetConverter::SCharacterSet>* availableSets =
                CCnvCharacterSetConverter::CreateArrayOfCharacterSetsAvailableLC( fs );

                HBufC8* sample = HBufC8::NewLC( KSampleBufferSize );
	            TPtr8 sampleDes = sample->Des();

	            sampleDes.Copy(iOrigText->Left( iOrigText->Length() > KMaxAmountOfSamples ?
	                                                KMaxAmountOfSamples : iOrigText->Length()));

                iCharConv->AutoDetectCharSetL(confidence, iCharacterSetId, *availableSets, sampleDes);

                if(confidence > KMimimumConfidenceLevel)
                    {
                    const TInt ret(iCharConv->PrepareToConvertToOrFromL(
                    iCharacterSetId,
                    fs));
                    }
                else
                    {
                    //autodetect not reliable enough.
                    delete iCharConv;
                    iCharConv = NULL;
                    }

                CleanupStack::PopAndDestroy(2); // availableSets, sample
                }

            iConversionState++;
            break;
            }

        case EConvert:
            {
            LOG("EConvert");
            DoConvertL();

            // reset position to beginning before step to next state.
            iPos = 0;
			iConversionState++;
            break;
            }

        case ERemoveTags:
            {
            LOG("ERemoveTags");
            /**
            * This operation is done inside the CAknWaitnoteWrapper StepL
            * (CActive::RunL) and thus it should not take too much time.
            * We use maxloops to break out from time consuming operation,
            * so that wait note can be updated.
            */
            TInt maxloops(10000);
            while (iReturnValue == KErrNone && iPos < iOrigText->Length())
                {
                if (maxloops < 0)
                    {
                    LOG("maxloops");
                    // return to update wait note.
                    return;
                    }
                TPtrC character = iOrigText->Mid(iPos, 1);
                if (character == KAmpersand)
                    {
                    // Replace
                    ReplaceMacroL();
                    // takes more time, so decrease by ten
                    maxloops -= 10;
                    }
                else if (character == KLessThan)
                    {
                    // Remove possible tag
                    HandleTagL(0);
                    // takes more time, so decrease by ten
                    maxloops -= 10;
                    }
                // replace linefeeds with one space
                else if (character[0] == 0x0A || character[0] == 0x0D)
                	{
                	// +1 peek next character
                	if (iPos+1 < iOrigText->Length() &&
                		(iOrigText->Mid(iPos+1, 1)[0] == 0x0A ||
                		iOrigText->Mid(iPos+1, 1)[0] == 0x0D))
                		{
                		Delete(1); // delete character
                   		}
                	else
                		{
                		// insert space
                		TPtr ptr(iOrigText->Des());
                		ptr.Replace(iPos, 1, KSpace);
                	    // move to next char
                        iPos++;
                		}
                	}
                else
                    {
                    // +1 peek next char
                    if (character == KSpace &&
                        iPos+1 < iOrigText->Length() &&
                        iOrigText->Mid(iPos+1, 1) == KSpace)
                        {
                        // delete multible spaces
                        Delete(1);
                        }
                    else
                        {
                        // move to next char
                        iPos++;
                        }
                    maxloops--;
                    }
                }
            iConversionState++;
            // conversion ended
            if (iReturnValue == KErrNone)
                {
                iReturnValue = KErrEof;
                }
            break;
            }

        default:
            LOG("default");
            // conversion ended
            iReturnValue = KErrEof;
            break;
        }
    }


TInt CMsgMailViewerHtmlConv::HandleListL(const TInt aIndent,
                                         const TBool aNumbered)
    {
    TInt item(1);
    TInt len(RemoveTagL()); // remove <ol>
    TInt remaining(len - iPos);
    // length of the buf is 4, so we can find and distinguish <li> and </ol
    TBuf<4> buf(iOrigText->Mid(iPos, Min(4, remaining)));
    buf.LowerCase();
    const TInt currPos(iPos);

    while (iPos < len && buf != (aNumbered ? KOrderedListEnd().Left(4) :
        KUnnumberedListEnd().Left(4)))
        {
        if (buf == KListItem)
            {
            RemoveTagL();

            HBufC *fill = HBufC::NewLC(aIndent + KMaxOrdinalLength);
            TPtr fillPtr = fill->Des();

            fillPtr.AppendFill(' ', aIndent);
            if (aNumbered)
                {
                fillPtr.AppendNum(item);
                fillPtr.Append(KNumberedItemSeparator);
                }
            else
                {
                fillPtr.Append(KListItemSymbol);
                }

            InsertTextL(fillPtr);
            CleanupStack::PopAndDestroy(); // fill

            InsertLinefeedL();

            len = iOrigText->Length();
            iPos += aIndent + (aNumbered ? 3 : 2); // iPos is always moved at least by 1
            item++;
            }
        else if ( buf.Left(1)[0] == 0x0A || buf.Left(1)[0] == 0x0D )
            {
            Delete(1); // delete linefeeds
            }
        else if (buf.Left(1) == KLessThan)
            {
            len = HandleTagL(aIndent + 2);
            }
        else if (buf.Left(1) == KAmpersand)
            {
            len = ReplaceMacroL();
            }
        else
            {
            iPos++;
            }
        remaining = len - iPos;
        buf = iOrigText->Mid(iPos, Min(4, remaining));
        buf.LowerCase();
        }

    if ( iPos != len)
        {
        InsertLinefeedL();
        len = RemoveTagL(); // delete </ol>
        }
    else
        {
        iPos = currPos;
        ShowErrorL();
        len = iOrigText->Length();
        }

    return len;
    }

TInt CMsgMailViewerHtmlConv::HandleDListL(TInt aIndent)
    {
    TInt len(Delete(4)); // remove <dl>
    TInt remaining(len - iPos);
    // length of the buf is 4, so we can find and distinguish <li> and </dl
    TBuf<4> buf(iOrigText->Mid(iPos, Min(4, remaining)));
    buf.LowerCase();
    const TInt currPos(iPos);

    while (iPos < len && buf != KDefinitionListEnd().Left(4))
        {
        if (buf == KDefinitionListTerm)
            {
            len = RemoveTagL();
            HBufC *fill = HBufC::NewLC(aIndent);
            TPtr fillPtr = fill->Des();

            fillPtr.AppendFill(' ', aIndent);
            InsertTextL(fillPtr);
            CleanupStack::PopAndDestroy(); // fill

            InsertLinefeedL();
            iPos += aIndent; // skip spaces
            len = iOrigText->Length();
            }
        else if (buf == KDefinitionListDef)
            {
            len = RemoveTagL();
            InsertTextL(KSpace);
            }
        else if (buf.Left(1) == KLessThan)
            {
            len = HandleTagL(aIndent + 2);
            }
        else if (buf.Left(1) == KAmpersand)
            {
            len = ReplaceMacroL();
            }
        else
            {
            iPos++;
            }
        remaining = len - iPos;
        buf = iOrigText->Mid(iPos, Min(4, remaining));
        buf.LowerCase();
        }

    if ( iPos != len)
        {
        len = RemoveTagL(); // delete </dl>
        }
    else // </dl> missing
        {
        iPos = currPos;
        ShowErrorL();
        len = iOrigText->Length();
        }

    return len;
    }

TInt CMsgMailViewerHtmlConv::HandleTagL(TInt aIndent)
    {
    TInt length(iOrigText->Length());
    const TInt remaining(length - iPos);

    if (!remaining)
        {
        // Nothing to remove
        iReturnValue = KErrEof;
        return iOrigText->Length();
        }

    // Longest HTML tag is 6 characters long, thus six char buffer
    TBuf<6> buf(iOrigText->Mid(iPos+1, Min(6, remaining - 1)));
    buf.LowerCase();
    buf.TrimAll();

    if (buf.Left(2) == KLineBreak().Mid(1,2) ||
        buf.Left(1) == KNewParagraph().Mid(1,1) ||
        buf.Left(2) == KHorizontalRuler().Mid(1,2) ||
		buf.Left(3) == KBlock().Mid(1,3))
        {
        length = RemoveTagL(); // Delete <br>, <p>, <hr> or <div>
        InsertLinefeedL();
        return length;
        }
	else if (buf.Left(4) == KBlockEnd().Mid(1,4))
		{
        length = RemoveTagL(); // Delete </div>
	    if ( (length - iPos) > 6 ) // still data to peek to?
			{
			// peek the next tag and if it's <div>, don't add newline
			TPtrC peekPtr(iOrigText->Mid(iPos));
			TInt found = peekPtr.Find(KLessThan);
			// found + 3 there needs to be at least 3 characters for "div" check
			if ( found != KErrNotFound && iPos+found+3 < length )
				{
				// +1 step over "<"
				TBuf<6> peekbuf( iOrigText->Mid(
				    iPos+found+1, Min(6, length - iPos - 1)) );
			    peekbuf.LowerCase();
				peekbuf.TrimAll();
				// compare "div" part
				if ( peekbuf.Length() > 3 &&
				    peekbuf.Left(3) != KBlock().Mid(1,3) )
				    {
					// next tag is not <div>, add newline
					InsertLinefeedL();
				    }
				}
			}
        return length;
		}
    else if (buf.Left(2) == KUnnumberedList().Mid(1,2))
        {
        return HandleUListL(aIndent);
        }
    else if (buf.Left(2) == KOrderedList().Mid(1,2))
        {
		// check for Outlook's olid tag, not to be confused
		// with ordered list tag <ol>
		if( buf.Left( 4 ).Match( KOlId ) == 0 )
			{
			return RemoveTagL(); // Delete <olid ....>
			}
		return HandleOListL(aIndent);
        }
    else if (buf.Left(2) == KDefinitionList().Mid(1,2))
        {
        return HandleDListL(aIndent);
        }
    else if (buf.Left(4) == KHeadStart().Mid(1,4))
        {
        const TInt currPos(iPos);
        length = RemoveTagL(); // Remove <head ...>
        TPtrC tagPos(iOrigText->Mid(iPos));
        TBool found(EFalse);
        TInt endTag(tagPos.Find(KLessThan));
        while (!found && endTag != KErrNotFound
            && iOrigText->Length() >= endTag+5 )
            {
            buf = iOrigText->Mid(endTag+1, 4);
            buf.TrimAll();
            buf.LowerCase();
            if (buf == KHeadEnd().Mid(1, 4))
                {
                found = ETrue;
                Delete(endTag - iPos);
                return RemoveTagL();
                }
            else
                {
                TPtrC tmpPos(iOrigText->Mid(endTag+1));
                const TInt p(tmpPos.Find(KLessThan));
                endTag = p == KErrNotFound ? KErrNotFound : endTag + p + 1;
                }
            }
        if (endTag == KErrNotFound)
            {
            iPos = currPos;
            ShowErrorL();
            return iOrigText->Length();
            }
        }
    else if (buf.Left(1) == KHeading().Mid(1,1))
        {
        CDesCArrayFlat *headArray = new(ELeave) CDesCArrayFlat(6);
        CleanupStack::PushL(headArray);
        for (TInt i = 1; i < 7; i++)
            {
            TBuf<3> tag = KHeading().Mid(1);
            tag.AppendNum(i);
            tag.Append(KGreaterThan);
            headArray->AppendL(tag);
            }
        TInt foundInPos;
        if (headArray->Find(buf.Left(3), foundInPos) == 0)
            {
            length = RemoveTagL(); // remove <h?>
            InsertLinefeedL();
            CleanupStack::PopAndDestroy(); // headArray
            return length;
            }
        CleanupStack::PopAndDestroy(); // headArray
        }
    else if (buf.Left(2) == KHeadingEnd().Mid(1,2) &&
             buf != KEndHtml().Mid(1, 6))
        {
        CDesCArrayFlat *headArray = new(ELeave) CDesCArrayFlat(6);
        CleanupStack::PushL(headArray);
        for (TInt i = 1; i < 7; i++)
            {
            TBuf<4> tag = KHeadingEnd().Mid(1);
            tag.AppendNum(i);
            tag.Append(KGreaterThan);
            headArray->AppendL(tag);
            }
        TInt foundInPos;
        if (headArray->Find(buf.Left(4), foundInPos) == 0)
            {
            length = RemoveTagL(); // remove </h?>
            InsertLinefeedL();
            CleanupStack::PopAndDestroy(); // headArray
            return length;
            }
        CleanupStack::PopAndDestroy(); // headArray
        }
    else if (buf == KScript().Mid(1))
        {
        const TInt currPos(iPos);
        length = RemoveTagL(); // Remove <script ...>
        TPtrC tagPos(iOrigText->Mid(iPos));
        TBool found(EFalse);
        TInt endTag(tagPos.Find(KLessThan));
        while (!found && endTag != KErrNotFound
            && iOrigText->Length() >= endTag+7 )
            {
            buf = iOrigText->Mid(endTag+1, 6);
            buf.TrimAll();
            buf.LowerCase();
            if (buf == KScriptEnd().Mid(1, 6))
                {
                found = ETrue;
                Delete(endTag - iPos);
                return RemoveTagL();
                }
            else
                {
                TPtrC tmpPos(iOrigText->Mid(endTag+1));
                const TInt p(tmpPos.Find(KLessThan));
                endTag = p == KErrNotFound ? KErrNotFound : endTag + p + 1;
                }
            }
        if (endTag == KErrNotFound)
            {
            iPos = currPos;
            ShowErrorL();
            return iOrigText->Length();
            }
        }
    else if (buf.Left(3) == KImage().Mid(1,3))
        {
        TBool inQuoted(EFalse); // in quoted string
        TBool inAltPart(EFalse);
        TBool inAltQuotedPart(EFalse);
        TBool inSrcPart(EFalse);
        const TInt KInitialSize(50);
        HBufC* inlineImage = HBufC::NewLC(KInitialSize);

        while ((*iOrigText)[iPos] != '>' && iPos < iOrigText->Length())
            {
            TChar ch((*iOrigText)[iPos]);
            const TInt remaining(iOrigText->Length() - iPos);
            HBufC* lowerCaseCopy = iOrigText->Mid(iPos,
                remaining > 3 ? 3 : remaining).AllocLC();
            TPtr lowerCasePtr(lowerCaseCopy->Des());
            lowerCasePtr.LowerCase();

            if (inQuoted && ch != '"')
                {
                if ( inlineImage->Length() == inlineImage->Des().MaxLength() )
                    {
                    HBufC* temp = inlineImage->ReAllocL(
                        inlineImage->Length() + KInitialSize);
                    CleanupStack::Pop(2); //lowerCaseCopy, inlineImage
                    CleanupStack::PushL( inlineImage = temp );
                    CleanupStack::PushL( lowerCaseCopy );
                    }

                // Leave alt quoted to the body
                if ( inAltQuotedPart )
                    {
                    iPos++;
                    }
                else if ( inSrcPart )
                    {
                    inlineImage->Des().Append(ch);
                    Delete(1);
                    }
                else
                    {
                    // other queted parts are deleted
                    Delete(1);
                    }

                }
            else if (ch == '"')
                {
                Delete(1);
                inQuoted = !inQuoted;
                if (inAltPart)
                    {
                    inAltQuotedPart = ETrue;
                    inAltPart = EFalse;
                    }
                else if (inAltQuotedPart)
                    {
                    inAltQuotedPart = EFalse;
                    }
                else if (inSrcPart && !inQuoted)
                    {
                    // end of Src Quoted
                    inSrcPart = EFalse;
                    }
                }
            else if (!lowerCaseCopy->Compare(KImageAlt))
                {
                Delete(KImageAlt().Length()); // delete alt
                inAltPart = ETrue;
                inSrcPart = EFalse;
                }
            else if (!lowerCaseCopy->Compare(KImageSrc))
                {
                Delete(KImageSrc().Length()); // delete src
                inAltPart = EFalse;
                inSrcPart = ETrue;
				}
            else
                {
                Delete(1);
                }
            CleanupStack::PopAndDestroy(); // lowerCaseCopy
            }
        if ( inlineImage->Length() )
            {
            User::LeaveIfError( iFileIdArray->Append(inlineImage) );
            CleanupStack::Pop(); // inlineImage
            }
        else
            {
            CleanupStack::PopAndDestroy(); // inlineImage
            }

        if (iPos < iOrigText->Length())
            {
            Delete(1); // delete >
            }
        return iOrigText->Length();
        }

    return RemoveTagL();
    }

TInt CMsgMailViewerHtmlConv::RemoveTagL()
    {
    const TInt currPos(iPos);
    TPtrC tagPos(iOrigText->Mid(iPos));
    const TInt tagLength(tagPos.Find(KGreaterThan));
    if (tagLength == KErrNotFound)
        {
        iReturnValue = KErrEof; //  ">" not found at the end
        iPos = 0;
        return 0;
        }

    if (tagPos[1] == '!') // start of the comment
        {
        const TInt commentLength( tagPos.Find( KCommentTagEnd ) );
        if (commentLength != KErrNotFound)
            return Delete(commentLength + 3); // 3 for -->

        return Delete(tagLength+1);// not comment delete whole tag;
        }
    // Check that > really belongs to this tag
    TPtrC tmpPos(iOrigText->Mid(iPos+1));
    const TInt firstTagStart(tmpPos.Find(KLessThan));
    if (tagLength != KErrNotFound &&
        ( firstTagStart == KErrNotFound ||
          tagLength <= firstTagStart ) )
        {
        return Delete(tagLength+1);
        }
    else
        {
        // check that found < really belongs to tag which end was found
        // and it's not only wrongly encoded
        if (firstTagStart < tagLength)
            {
            iPos++;
            return iOrigText->Length();
            }
        else
            {
            iPos = currPos;
            ShowErrorL();
            return iOrigText->Length();
            }
        }
    }

RPointerArray<HBufC>* CMsgMailViewerHtmlConv::FileIdArray()
    {
    iStatusFlags &= ~EOwnsFileNameArray;
    return iFileIdArray;
    }

TBool CMsgMailViewerHtmlConv::AutoParsedCharSetNameAndIdentifier() const
    {
    TBool autoParsed = EFalse;
    //
    if ( !(iStatusFlags & EForceUseCharacterSet) )
        {
        autoParsed = ( iCharSetName != NULL ) && ( iCharacterSetId != 0);
        }
    //
    return autoParsed;
    }

TUint CMsgMailViewerHtmlConv::AutoParsedCharSetIdentifier() const
    {
    return iCharacterSetId;
    }



TInt CMsgMailViewerHtmlConv::Delete(const TInt aLen)
    {
    TPtr ptr(iOrigText->Des());
    ptr.Delete(iPos, aLen);
    return iOrigText->Length();
    }

// InsertTextL()
// It is checked that text to be inserted fits into a descriptor and
// a descriptor's size is doubled if not. In practice this should never
// happen because insertions are usually one character to the place where
// there used to be i.e. a 4-char macro.
//
TInt CMsgMailViewerHtmlConv::InsertTextL(const TDesC& aChars)
    {
    if (iOrigText->Des().MaxLength() < iOrigText->Length() + aChars.Length())
        {
        iOrigText = iOrigText->ReAllocL((
            iOrigText->Length() + aChars.Length()) * 2);
        }

    TPtr ptr(iOrigText->Des());
    ptr.Insert(iPos, aChars);
    return iOrigText->Length();
    }

TInt CMsgMailViewerHtmlConv::ReplaceMacroL()
    {
    if (!iMacros)
        {
        InitMacroArrayL();
        }

    TInt len(iOrigText->Length());
    const TPtrC macroPos(iOrigText->Mid(iPos+1));
    const TInt macroLength(macroPos.Find(KSemicolon));
    if (macroLength > 0)
        {
        const TPtrC macroPtr(iOrigText->Mid(iPos+1, macroLength));

        TInt pos;
        // A dummy string pair for Find-method
        TDesC tmpDesC(KSpace);
        CStringPair* tmpPair = CStringPair::NewLC(macroPtr, tmpDesC);
        if (iMacros->FindInOrder(tmpPair, pos,
            TLinearOrder<CStringPair>(CStringPair::Compare)) != KErrNotFound)
            {
            // + 2 for '&' + ';'
            Delete( ((*iMacros)[pos])->GetName()->Length() + 2);
            len = InsertTextL( *((*iMacros)[pos]->GetValue()) );
            iPos++;
            CleanupStack::PopAndDestroy(); // tmpPair
            return len;
            }
        CleanupStack::PopAndDestroy(); // tmpPair
        if (macroPtr[0] == '#') // numeric form
            {
            const TPtrC ptr(iOrigText->Mid(iPos+2, macroLength - 1)); // skip #
            TLex lex(ptr);
            TUint value(0);
            TInt err(KErrNone);
            if (User::LowerCase(lex.Peek()) == 'x')
                { // character in hex
                lex.Inc(1); // skip x in &#x20AC
                err = lex.Val(value, EHex);
                }
            else
                { // character in decimal
                err = lex.Val(value);
                }
            if (err == KErrNone)
                {
                // Check how many chars we have actually consumed.
                TInt offset = lex.Offset(); 
                // NCR could be missing the semicolon.
                if( lex.Peek() == ';' )
                    {
                    offset++;
                    }

                // + 2 for # & ;
                Delete( offset + 2 );
                TBuf<1> buf;
                buf.Append(value);
                len = InsertTextL(buf);
                iPos++;
                return len;
                }
            }
        }

    // this isn't correctly encoded macro, but let's skip it instead of
    // showing an error, so user can see as much of the message as possible
    iPos++;
    return iOrigText->Length();
    }

void CMsgMailViewerHtmlConv::InitMacroArrayL()
    {
    TResourceReader reader;
    CEikonEnv::Static()->CreateResourceReaderLC(reader, R_MAIL_HTML_MACRO);
    const TInt count(reader.ReadInt16());
    iMacros = new(ELeave) RPointerArray<CStringPair>(count);
    for (TInt i=0; i<count; i++)
        {
        HBufC* macro = reader.ReadHBufCL();
        CleanupStack::PushL(macro);
        HBufC* realText = reader.ReadHBufCL();
        CleanupStack::PushL(realText);
        CStringPair* tmp = CStringPair::NewLC(*macro, *realText);
        // this should never fail, because we allocated enough memory in
        // construction
        User::LeaveIfError(iMacros->InsertInOrder(
            tmp, TLinearOrder<CStringPair>(CStringPair::Compare)));
        CleanupStack::Pop(); // tmp
        CleanupStack::PopAndDestroy(2); // macro, realText
        }
    CleanupStack::PopAndDestroy(); // reader
    iMacros->Compress();
    }

void CMsgMailViewerHtmlConv::ShowErrorL()
    {
    iErrorPos = iPos;
    TInt deleteRest(iOrigText->Length() - iPos);
    if (deleteRest > 0)
        {
        Delete(deleteRest);
        }
    InsertLinefeedL();
    HBufC* text =
        CEikonEnv::Static()->AllocReadResourceLC(R_ERROR_IN_HTML_TEXT);
    InsertTextL(*text);
    CleanupStack::PopAndDestroy(); // text
    iReturnValue = KErrCorrupt;
    iPos = iOrigText->Length();
    }

void CMsgMailViewerHtmlConv::InsertLinefeedL()
    {
    if (iPos > 0 && iPos < iOrigText->Length() )
        {
        TPtr ptr(iOrigText->Des());
        ptr.Insert(iPos, KNewline);
        iPos++;
        }
    }

void CMsgMailViewerHtmlConv::RunL()
    {
    TInt err = Convert();
    if( !err ) // KErrNone
        {
        // still data to convert. Continue work on next loop.
        ContinueAsyncConvert();
        }
    else if( err == KErrEof )
        {
        // success
        User::RequestComplete( iReqStatus, KErrNone );
        }
    else
        {
        // error occured
        User::RequestComplete( iReqStatus, err );
        }
    }

void CMsgMailViewerHtmlConv::DoCancel()
    {
    User::RequestComplete( iReqStatus, KErrCancel );
    }

void CMsgMailViewerHtmlConv::ContinueAsyncConvert()
    {
    iStatus = KRequestPending;
    TRequestStatus* status = &iStatus;
    User::RequestComplete( status, KErrNone );
    SetActive();
    }

// ================= OTHER EXPORTED FUNCTIONS ==============


//  End of File