contenthandling/webrecognisers/recweb/RECWEB.CPP
changeset 0 2e3d3ce01487
equal deleted inserted replaced
-1:000000000000 0:2e3d3ce01487
       
     1 // Copyright (c) 2003-2009 Nokia Corporation and/or its subsidiary(-ies).
       
     2 // All rights reserved.
       
     3 // This component and the accompanying materials are made available
       
     4 // under the terms of "Eclipse Public License v1.0"
       
     5 // which accompanies this distribution, and is available
       
     6 // at the URL "http://www.eclipse.org/legal/epl-v10.html".
       
     7 //
       
     8 // Initial Contributors:
       
     9 // Nokia Corporation - initial contribution.
       
    10 //
       
    11 // Contributors:
       
    12 //
       
    13 // Description:
       
    14 //
       
    15 
       
    16 #include <apmrec.h>
       
    17 #include <apmstd.h>
       
    18 #include <f32file.h>
       
    19 #include "RECWEB.H"
       
    20 
       
    21 #ifdef __UI_FRAMEWORKS_V2__
       
    22 #include <ecom/implementationproxy.h>
       
    23 #endif //__UI_FRAMEWORKS_V2__
       
    24 
       
    25 const TInt KMimeWebRecognizerValue=0x10001315;
       
    26 const TUid KUidMimeWebRecognizer={KMimeWebRecognizerValue};
       
    27 const TInt KWebNumMimeTypes=2;
       
    28 const TInt KWebRecogniserBufferSize=2048;
       
    29 _LIT8(KHtmlMimeType,"text/html");
       
    30 _LIT8(KXmlMimeType,"text/xml");
       
    31 
       
    32 CApaWebRecognizer::CApaWebRecognizer()
       
    33 :	CApaDataRecognizerType(KUidMimeWebRecognizer,CApaDataRecognizerType::ENormal)
       
    34 	// All these mime types have reasonable recognition
       
    35 	{
       
    36 	iCountDataTypes=KWebNumMimeTypes;
       
    37 	}
       
    38 
       
    39 TUint CApaWebRecognizer::PreferredBufSize()
       
    40 	{
       
    41 	return KWebRecogniserBufferSize;
       
    42 	}
       
    43 
       
    44 TDataType CApaWebRecognizer::SupportedDataTypeL(TInt aIndex) const
       
    45 	{
       
    46 	__ASSERT_DEBUG(aIndex>=0 && aIndex<KWebNumMimeTypes,User::Invariant());
       
    47 	switch (aIndex)
       
    48 		{
       
    49 	case 0:
       
    50 		return TDataType(KHtmlMimeType);
       
    51 	default:
       
    52 		return TDataType(KXmlMimeType);
       
    53 		}
       
    54 	}
       
    55 
       
    56 void CApaWebRecognizer::DoRecognizeL(const TDesC& aName, const TDesC8& aBuffer)
       
    57 	{
       
    58 	iConfidence = ENotRecognized;
       
    59 
       
    60 	const TInt positionOfLastDot=aName.LocateReverse('.');
       
    61 
       
    62     if (positionOfLastDot>=0)
       
    63 		{//check the extension
       
    64 		const TPtrC ext=aName.Mid(positionOfLastDot);
       
    65 		_LIT(KDotHtml,".html");
       
    66 		_LIT(KDotHtm,".htm");
       
    67 		_LIT(KDotShtml,".shtml");
       
    68 		_LIT(KDotShtm,".shtm");
       
    69 		_LIT(KDotXhtml,".xhtml");
       
    70 		_LIT(KDotXml,".xml");
       
    71 
       
    72 		if (ext.CompareF(KDotHtml)==0 || ext.CompareF(KDotHtm)==0
       
    73 			|| ext.CompareF(KDotShtml)==0 || ext.CompareF(KDotShtm)==0
       
    74 			|| ext.CompareF(KDotXhtml)==0)
       
    75 			{
       
    76 			iDataType=TDataType(KHtmlMimeType);
       
    77 			iConfidence=EProbable;
       
    78 			}
       
    79 		else if (ext.CompareF(KDotXml)==0)
       
    80 			{
       
    81 			iDataType=TDataType(KXmlMimeType);
       
    82 			iConfidence=EProbable;
       
    83 			}
       
    84 		}
       
    85 
       
    86 	if (aBuffer.Length() > 0 && iConfidence == ENotRecognized)
       
    87 		{ //don't check data if extension is recognised, otherwise check data file
       
    88 		TInt htmlConfidence = 0;
       
    89 		TInt xmlConfidence = 0;
       
    90 
       
    91 													// Description				        Valid in
       
    92 													//								HTML  XHTML  XML
       
    93 													//
       
    94 		_LIT8(KXml, "<?xml");						// Header info					      X      X
       
    95 		_LIT8(KEmbeddedXml, "<xml");				// Embbeded XML content			X     X
       
    96 		_LIT8(KXmlScheme, "<xs:");					// Schema						             X
       
    97 		_LIT8(KHtml, "<html");						// HTML document				X     X
       
    98 		_LIT8(KBody, "<body");						// Body element					X     X
       
    99 		_LIT8(KTitle, "<title");					// Document title				X     X
       
   100 		_LIT8(KHead, "<head");						// Information about document	X     X
       
   101 		_LIT8(KDocType, "<!doctype");				// Document type				X     X      X
       
   102 		_LIT8(KHtmlDocType, "<!DOCTYPE html");		// H/XHTML document				X     X
       
   103 		_LIT8(KXmlStyle, "<?xml-");					// Stylesheet					             X
       
   104 
       
   105 		if (aBuffer.FindF(KXml) >=0)
       
   106 			{
       
   107 			++xmlConfidence;
       
   108 			}
       
   109 
       
   110 		if (aBuffer.FindF(KEmbeddedXml) >=0)
       
   111 			{
       
   112 			++htmlConfidence;
       
   113 			}
       
   114 
       
   115 		if (aBuffer.FindF(KXmlScheme) >=0)
       
   116 			{
       
   117 			++xmlConfidence;
       
   118 			}
       
   119 
       
   120 		if (aBuffer.FindF(KHtml) >=0)
       
   121 			{
       
   122 			++htmlConfidence;
       
   123 			}
       
   124 
       
   125 		if (aBuffer.FindF(KBody) >=0)
       
   126 			{
       
   127 			++htmlConfidence;
       
   128 			}
       
   129 
       
   130 		if (aBuffer.FindF(KTitle) >=0)
       
   131 			{
       
   132 			++htmlConfidence;
       
   133 			}
       
   134 
       
   135 		if (aBuffer.FindF(KHead) >=0)
       
   136 			{
       
   137 			++htmlConfidence;
       
   138 			}
       
   139 
       
   140 		if (aBuffer.FindF(KDocType) >=0)
       
   141 			{
       
   142 			++xmlConfidence;
       
   143 			++htmlConfidence;
       
   144 			}
       
   145 
       
   146 		if (aBuffer.FindF(KHtmlDocType) >=0)
       
   147 			{
       
   148 			++htmlConfidence;
       
   149 			}
       
   150 
       
   151 		if (aBuffer.FindF(KXmlStyle) >=0)
       
   152 			{
       
   153 			++xmlConfidence;
       
   154 			}
       
   155 
       
   156 		// Use the xmlConfidence and htmlConfidence values and decide on the type.
       
   157 		// The datatype will be set to text/xml if 2 or more xml tags match.
       
   158 		// If there are 3 or more html tags the data type is overridden to text/html
       
   159 		// as if we have xml and html tags, it is likely that it is an xhtml file
       
   160 		// which has the text.html datatype. If there are more than 3 html tags
       
   161 		// the confidence is increased.
       
   162 		// If there is an extension but it doesn't match data, knock down confidence.
       
   163 		if(xmlConfidence>1)
       
   164 			{
       
   165 			iDataType=TDataType(KXmlMimeType);
       
   166 			//knock down confidence if extension is not recognised
       
   167 			iConfidence=(positionOfLastDot>=0) ? EPossible : EProbable;
       
   168 			}
       
   169 
       
   170 		if(htmlConfidence>2)
       
   171 			{
       
   172 			iDataType=TDataType(KHtmlMimeType);
       
   173 			//knock down confidence if extension is not recognised
       
   174 			iConfidence=(positionOfLastDot>=0) ? EPossible : EProbable;
       
   175 			}
       
   176 
       
   177 		if(htmlConfidence>3)
       
   178 			iConfidence=EProbable;
       
   179 		}
       
   180 	}
       
   181 
       
   182 #ifdef __UI_FRAMEWORKS_V2__
       
   183 
       
   184 const TImplementationProxy ImplementationTable[]=
       
   185 	{
       
   186     IMPLEMENTATION_PROXY_ENTRY(0x10001315,CApaWebRecognizer::NewL)
       
   187 	};
       
   188 
       
   189 EXPORT_C const TImplementationProxy* ImplementationGroupProxy(TInt& aTableCount)
       
   190     {
       
   191     aTableCount = sizeof(ImplementationTable) / sizeof(TImplementationProxy);
       
   192     return ImplementationTable;
       
   193     }
       
   194 
       
   195 CApaDataRecognizerType* CApaWebRecognizer::NewL()
       
   196 	{
       
   197     return new (ELeave) CApaWebRecognizer();
       
   198     }
       
   199 
       
   200 #else
       
   201 
       
   202 EXPORT_C CApaDataRecognizerType* CreateRecognizer()
       
   203 // The gate function - ordinal 1
       
   204 //
       
   205 	{
       
   206 	CApaDataRecognizerType* self = new CApaWebRecognizer();
       
   207 	return self; // NULL if new failed
       
   208 	}
       
   209 
       
   210 #endif //__UI_FRAMEWORKS_V2__
       
   211 
       
   212 
       
   213 #ifndef EKA2
       
   214 GLDEF_C TInt E32Dll(TDllReason /*aReason*/)
       
   215 //
       
   216 // DLL entry point
       
   217 //
       
   218 	{
       
   219 	return KErrNone;
       
   220 	}
       
   221 #endif // EKA2