// Copyright (c) 2002-2009 Nokia Corporation and/or its subsidiary(-ies).
// All rights reserved.
// This component and the accompanying materials are made available
// under the terms of "Eclipse Public License v1.0"
// which accompanies this distribution, and is available
// at the URL "http://www.eclipse.org/legal/epl-v10.html".
//
// Initial Contributors:
// Nokia Corporation - initial contribution.
//
// Contributors:
//
// Description:
//
// INCLUDE FILES
#include <e32svr.h>
#include <tuladdressstringtokenizer.h>
#include <tulpanics.h>
#include "languagespecificnumberconverter.h"
// DEFINE
const TInt KFindItemMaxNumbers = 48; // Maximum phone number supported in Contacts
const TInt KFindItemMinNumbers = 3;
const TInt KDotsInIpAddress = 3;
const TInt KNumbersInIpAddress = 3;
const TInt KCharLinefeed = 0x000A;
const TInt KCharFormfeed = 0x000C;
const TInt KCharCarriageReturn = 0x000D;
const TInt KCharLineSeparator = 0x2028;
const TInt KCharParagraphSeparator = 0x2029;
// Valid characters for different search cases
_LIT( KPhoneNumberChars, "1234567890*#-./");
_LIT( KEmailChars, "!#$?%&*+-/");
_LIT( KEmailHostChars, "abcdefghijklmnopqrstuvwxyz1234567890._-");
_LIT( KUrlChars, "%/-~?=:&,#+|");
// generic URI scheme checking
_LIT( KURISchemeStartCharacters, "abcdefghijklmnopqrstuvwxyz" );
_LIT( KURISchemeBodyCharacters, "abcdefghijklmnopqrstuvwxyz1234567890+-.");
_LIT( KURISchemeTerminator, ":");
_LIT( KURICharacters, "abcdefghijklmnopqrstuvwxyz1234567890?/;:@&=+$,%-_.!~*'()#|");
// Url Address beginnings (to support deprecated search cases)
_LIT( KHttpUrlAddress, "http://");
_LIT( KRtspUrlAddress, "rtsp://");
_LIT( KHttpsUrlAddress, "https://");
// Non-schematic URL address beginnings
_LIT( KWwwUrlAddress, "www.");
_LIT( KWapUrlAddress, "wap.");
// IP address pattern to match
_LIT( KIPAddress, "*.*.*.*");
// Panic
_LIT( KFindItemPanic, "ETUL-Panic");
GLDEF_C void Panic(TTulPanic aPanic)
{
User::Panic(KFindItemPanic, aPanic);
}
// ================= MEMBER FUNCTIONS =======================
/**
C++ default constructor.
*/
CTulAddressStringTokenizer::CTulAddressStringTokenizer()
{
// C++ default constructor must NOT contain any code, that might leave.
}
/**
Symbian OS constructor
@param aText Text that will be parsed
@param aSearchCases Identifies what items are we looking for:
EFindItemSearchPhoneNumberBin
EFindItemSearchMailAddressBin
EFindItemSearchURLBin
EFindItemSearchScheme
Any combination of these flags can be given
as a bit mask.
@param aMinNumbers Minimum count of numbers in a string when
the string is considered as a phone number.
*/
void CTulAddressStringTokenizer::ConstructL(const TDesC& aText, TInt aSearchCases, TInt aMinNumbers)
{
iMinNumbers = aMinNumbers;
HBufC* buf = NULL;
TLanguage language = User::Language();
if (language == ELangArabic || language == ELangHebrew || language == ELangUrdu ||
language == ELangFarsi || language == ELangHindi)
{
buf = aText.AllocLC();
TPtr ptr = buf->Des();
LanguageSpecificNumberConverter::ConvertToWesternNumbers( ptr );
PerformSearchL( *buf, aSearchCases );
}
else
PerformSearchL( aText, aSearchCases );
if(buf)
CleanupStack::PopAndDestroy(buf);
}
/**
Performs the search. Uses search algorithms SearchGenericUriL(), SearchMailAddressL(),
SearchUrlL() and SearchPhoneNumberL().
*/
void CTulAddressStringTokenizer::PerformSearchL(const TDesC& aText , TInt aSearchCases)
{
ASSERT(!iFoundItems);
iFoundItems = new (ELeave) CArrayFixFlat<SFoundItem>(2);
TBool somethingSearched = EFalse;
// first detect binary mask cases
if ( (aSearchCases & EFindItemSearchScheme) == EFindItemSearchScheme)
{
SearchGenericUriL( aText );
somethingSearched = ETrue;
}
// Prioritize mail address found before URL search. Address name@123.123.123.123
// would otherwise be detected as IP number.
if ( (aSearchCases & EFindItemSearchMailAddressBin) == EFindItemSearchMailAddressBin)
{
SearchMailAddressL( aText );
somethingSearched = ETrue;
}
if ( (aSearchCases & EFindItemSearchURLBin) == EFindItemSearchURLBin)
{
SearchUrlL( aText, ETrue );
somethingSearched = ETrue;
}
// Search phone numbers last as all others can contain number sequences
if ( (aSearchCases & EFindItemSearchPhoneNumberBin) == EFindItemSearchPhoneNumberBin)
{
SearchPhoneNumberL( aText );
somethingSearched = ETrue;
}
// if no binary cases found
if ( !somethingSearched )
{
__ASSERT_DEBUG( 1, Panic(ETulPanicInvalidTokenizerSearchCase) );
}
}
/**
Two-phase constructor method that is used to create a new instance
of the CTulAddressStringTokenizer class. This instance can then be queried for
the items defined by the second parameter. The actual search is
executed during construction.
@param aText will be parsed.
@param aSearchCases identifies what items we are looking for:
EFindItemSearchPhoneNumberBin
EFindItemSearchMailAddressBin
EFindItemSearchURLBin
EFindItemSearchScheme
Any combination of these flags can be given as a bit mask.
@return a pointer to a new instance of CTulAddressStringTokenizer class.
@panic ETulPanicInvalidTokenizerSearchCase in debug build if there is no valid search case.
@panic ETulPanicDescriptorLength in debug build if item's position
and/or length is out of the document's range.
@leave KErrNone, if successful; otherwise one of the other system-wide error codes.
*/
EXPORT_C CTulAddressStringTokenizer* CTulAddressStringTokenizer::NewL(const TDesC& aText, TInt aSearchCases)
{
CTulAddressStringTokenizer* self = new (ELeave) CTulAddressStringTokenizer;
CleanupStack::PushL(self);
self->ConstructL(aText, aSearchCases, KFindItemMinNumbers);
CleanupStack::Pop();
return self;
}
/**
Two-phase constructor method that is used to create a new instance
of the CTulAddressStringTokenizer class. This instance can then be queried for
the items defined by the second parameter. The actual search is
executed during construction.
@param aText will be parsed.
@param aSearchCases identifies what items we are looking for:
EFindItemSearchPhoneNumberBin
EFindItemSearchMailAddressBin
EFindItemSearchURLBin
EFindItemSearchScheme
Any combination of these flags can be given as a bit mask.
@param aMinNumbers defines a minimum count of numbers in a phone
number string, during a phone number
search.
@return a pointer to an new instance of CTulAddressStringTokenizer class.
@panic ETulPanicInvalidTokenizerSearchCase in debug build if there is no valid search case.
@panic ETulPanicDescriptorLength in debug build if item's position
and/or length is out of the document's range.
@leave KErrNone, if successful; otherwise one of the other system-wide error codes.
*/
EXPORT_C CTulAddressStringTokenizer* CTulAddressStringTokenizer::NewL(const TDesC& aText, TInt aSearchCases,
TInt aMinNumbers )
{
CTulAddressStringTokenizer* self = new (ELeave) CTulAddressStringTokenizer;
CleanupStack::PushL( self );
self->ConstructL( aText, aSearchCases, aMinNumbers );
CleanupStack::Pop( );
return self;
}
/**
Destructor.
*/
EXPORT_C CTulAddressStringTokenizer::~CTulAddressStringTokenizer()
{
delete iFoundItems;
}
// API methods for using engine separately from FindItemUI
/**
Executes a new search with the already created CTulAddressStringTokenizer
instance. The position in the found items array is reset to the
beginning of the array.
@param aText will be parsed.
@param aSearchCases identifies what items are we looking for:
EFindItemSearchPhoneNumberBin
EFindItemSearchMailAddressBin
EFindItemSearchURLBin
EFindItemSearchScheme
Any combination of these flags can be given as a bit mask.
@return number of found items.
@panic ETulPanicInvalidTokenizerSearchCase in debug build if there is no valid search case.
@panic ETulPanicDescriptorLength in debug build if item's position
and/or length is out of the document's range.
@leave one of the Symbian error codes.
*/
EXPORT_C TInt CTulAddressStringTokenizer::DoNewSearchL(const TDesC& aText, TInt aSearchCases)
{
return DoNewSearchL( aText, aSearchCases, KFindItemMinNumbers );
}
// API methods for using engine separately from FindItemUI
/**
Executes a new search with the already created CTulAddressStringTokenizer
instance. The position in the found items array is reset to the
beginning of the array.
@param aText will be parsed.
@param aSearchCases identifies what items are we looking for:
EFindItemSearchPhoneNumberBin
EFindItemSearchMailAddressBin
EFindItemSearchURLBin
EFindItemSearchScheme
Any combination of these flags can be given as a bit mask.
@param aMinNumbers defines a minimum count of numbers in a phone
number string, during a phone number
search.
@return number of found items.
@panic ETulPanicInvalidTokenizerSearchCase in debug build if there is no valid search case.
@panic ETulPanicDescriptorLength in debug build if item's position
and/or length is out of the document's range.
@leave KErrNone, if successful; otherwise one of the other system-wide error codes.
*/
EXPORT_C TInt CTulAddressStringTokenizer::DoNewSearchL(const TDesC& aText, TInt aSearchCases, TInt aMinNumbers)
{
delete iFoundItems;
iFoundItems = NULL;
iPosition = 0;
ConstructL(aText, aSearchCases, aMinNumbers);
return ItemCount();
}
// ---------------------------------------------------------
// Search Algorithms and helpers
// ---------------------------------------------------------
/**
Character information methods
@param charac a Character to be investigated
@return ETrue if the parameter for phone number was valid, else returns EFalse
*/
TBool CTulAddressStringTokenizer::IsValidPhoneNumberChar(const TChar& aCharac)
{
// Returns ETrue if the parameter is a valid character in a phonenumber
const TDesC& array = KPhoneNumberChars;
return (array.Locate(aCharac) != KErrNotFound);
}
/**
Character information methods
@param charac a Character to be investigated
@return ETrue if the parameter for login part of the e-mail address is valid, else returns EFalse
*/
TBool CTulAddressStringTokenizer::IsValidEmailChar(const TChar& aCharac)
{
// Returns ETrue if the parameter is a valid character for username part of e-mail address
// ASCII 33 - 47 (without '"', ',', ''', '(' and ')' because they can be used to separate address
// from rest of the text )
const TDesC& array = KEmailChars;
return (array.Locate(aCharac) != KErrNotFound || IsValidEmailHostChar(aCharac));
}
/**
Character information methods
@param charac a Character to be investigated
@return ETrue if the parameter for host part of the e-mail address is valid, else returns EFalse
*/
TBool CTulAddressStringTokenizer::IsValidEmailHostChar(const TChar& aCharac)
{
// Returns ETrue if the parameter is a valid character for a host part of e-mail address
const TDesC& array = KEmailHostChars;
return (array.Locate( aCharac.GetLowerCase() ) != KErrNotFound);
}
/**
Character information methods
@param charac a Character to be investigated
@return ETrue if the parameter for URL is valid, else returns EFalse
*/
TBool CTulAddressStringTokenizer::IsValidUrlChar(const TChar& aCharac)
{
// Returns ETrue if the parameter is a valid character for url
if (IsValidEmailHostChar(aCharac))
return ETrue;
const TDesC& array = KUrlChars;
return (array.Locate(aCharac) != KErrNotFound);
}
/**
Search algorithm for searching e-mail addresses
@param aText Text that will be parsed
@return ETrue if any EMail items were found else returns EFalse
@leave KErrNone, if successful; otherwise one of the other system-wide error codes.
@panic ETulPanicDescriptorLength in debug build if item's position
and/or length is out of the document's range.
*/
TBool CTulAddressStringTokenizer::SearchMailAddressL( const TDesC& aText )
{
TInt searchStart = 0;
TInt searchResult = 0;
const TInt end = aText.Length(); // end of document
do
{
TPtrC segment = aText.Right( end - searchStart );
searchResult = segment.LocateF('@');
if (searchResult != KErrNotFound)
{ // @ found
// There should be valid characters (not a period) before and after the @ character
if ( searchResult == 0 // first char
|| (searchResult >= segment.Length() - 1) // last char
|| !(IsValidEmailChar(segment[searchResult - 1]))
|| !(IsValidEmailHostChar(segment[searchResult + 1]))
|| segment[searchResult - 1] == '.'
|| segment[searchResult + 1] == '.'
)
{
searchStart += searchResult + 1;
continue;
}
TBool wasPeriod = EFalse; // To prevent sequential periods
// Get TLex from the pointer to get a better API for parsing
TLexMark startPos;
TLexMark endPos;
TLex token = segment;
// Go to searchResult and un-get until the beginning of e-mail address is reached
token.Inc( searchResult );
token.Mark();
do
{
token.UnGet();
if ( token.Peek() == '.' )
{ // If it was a period
if (wasPeriod) // and if the former was also -> break
break;
else // else mark that this one was a period
wasPeriod = ETrue;
}
else
wasPeriod = EFalse;
}
while (token.Offset() > 0 && IsValidEmailChar(token.Peek()));
if (token.Offset() != 0 || !IsValidEmailChar(token.Peek()))
token.Inc();
// Get rid of periods from the start of address
// Does it have to start with a number or char(abc...).
// If it does, the loop should check that it gets rid of all special chars also.
while (token.Peek() == '.')
token.Inc();
token.Mark( startPos ); // Mark the beginning of address
token.UnGetToMark();
wasPeriod = EFalse;
do // Go forward until a nonvalid character
{
token.Inc();
if ( token.Peek() == '.' )
{ // If it was a period
if ( wasPeriod ) // and if the former was also -> break
break;
else // else mark that this one was a period
wasPeriod = ETrue;
}
else
wasPeriod = EFalse;
}
while ( !token.Eos() && IsValidEmailHostChar( token.Peek() ) );
// If address ends with a period take it away
token.UnGet();
if (token.Peek() != '.')
token.Inc();
token.Mark( endPos ); // Mark the beginning of address
// Append the found string to the array
__ASSERT_DEBUG( searchStart + token.MarkedOffset( startPos )
+ token.MarkedOffset( endPos )
- token.MarkedOffset( startPos ) <= aText.Length(),
Panic(ETulPanicDescriptorLength) );
AddItemL( searchStart + token.MarkedOffset( startPos ),
token.MarkedOffset( endPos ) - token.MarkedOffset( startPos ),
EFindItemSearchMailAddressBin);
searchStart += token.MarkedOffset( endPos ) + 1;
}
}
while ( searchResult != KErrNotFound && searchStart < end );
return (iFoundItems->Count() > 0);
}
/**
Search algorithm for searching phone numbers
@param aText Text that will be parsed
@return ETrue if any Phone Number items were found else returns EFalse
@leave KErrNone, if successful; otherwise one of the other system-wide error codes.
@panic ETulPanicDescriptorLength in debug build if item's position
and/or length is out of the document's range.
*/
TBool CTulAddressStringTokenizer::SearchPhoneNumberL( const TDesC& aText )
{
TLexMark startMark; // Points to the start of the found phone number
TLexMark endMark; // Points to the end of the found phone number
TLexMark mark;
const TInt end = aText.Length();
TLex number = aText;
while ( !(number.Eos()) )
{
TInt numberCount = 0; // How many real numbers (1234567890)
TInt bracketsOpen = 0; // How many brackets are currently open
TInt brackets = 0; // How many brackets overall
TChar charac = number.Peek();
while( (!(IsValidPhoneNumberChar( charac ) || charac == '+'
|| charac == '(' ) || charac == '-' || charac == '.' || charac == '/')
&& !(number.Eos()) && number.Offset() < end )
{
number.Inc();
charac = number.Peek();
}
if ( number.Offset() >= end )
break;
if ( number.Peek() == '#' )
{
number.Inc();
if (number.Peek() == '.' )
continue;
number.UnGet();
}
if ( number.Peek() == '+' )
{ // '+' has to be followed by a number (not # or * ...)
number.Inc();
if ( !(number.Peek().IsDigit()) )
continue;
number.UnGet();
}
if ( number.Peek() == '(' )
{ // '(' has to be followed by valid phone number
// character (whitespaces are allowed before) or '+' is a next character
number.Inc();
if ( !(number.Peek() == '+') )
{
number.Mark(mark);
number.SkipSpace();
charac = number.Peek();
if ( !( IsValidPhoneNumberChar(charac) || charac == '+'
|| charac == '(' ) || charac == '-' || charac == '.' || charac == '/')
{
number.Inc();
continue;
}
else
{
number.UnGetToMark(mark);
number.UnGet();
number.Mark(startMark);
}
}
else
{
number.UnGet();
number.Mark(startMark);
number.Inc();
}
bracketsOpen++;
brackets++;
}
else
number.Mark(startMark);
if ( number.Peek().IsDigit() ) // If the character was a number
numberCount++;
else if ( bracketsOpen > 0 )
{
number.Inc();
TChar next = number.Peek();
TInt bracketsOpen2 = bracketsOpen;
while( (IsValidPhoneNumberChar( next ) || next.IsSpace()
|| next == '(' || next == ')' || next == 'p' || next == '+'
|| next == 'w' ) && !(number.Eos()) && number.Offset() < end)
{
if ( next == '(' )
bracketsOpen2++;
else if ( next == ')' )
bracketsOpen2--;
if ( bracketsOpen2 == 0 )
break;
number.Inc();
next = number.Peek();
}
number.UnGetToMark(startMark);
if ( bracketsOpen2 != 0 )
{
number.Inc();
continue;
}
}
number.Inc();
while ( number.Peek() == '(' && !(number.Eos()) && bracketsOpen > 0 )
{
number.Inc();
bracketsOpen++;
}
if ( number.Peek() == '+' && bracketsOpen > 0 )
number.Inc();
// a Valid first character has been found. Let's go forward as long as valid characters are found.
charac = number.Peek();
while( (IsValidPhoneNumberChar( charac ) || charac.IsSpace()
|| charac == '(' || charac == ')' || charac == 'p'
|| charac == 'w' ) && !(number.Eos()) && number.Offset() < end
&& charac != KCharLinefeed && charac != KCharFormfeed
&& charac != KCharCarriageReturn
&& charac != KCharLineSeparator
&& charac != KCharParagraphSeparator )
{
if ( number.Peek() == '(' )
{ // '(' can't be the last character in phone number
number.Mark(mark);
number.Inc();
TChar spaceJump = number.Peek();
while ( !number.Eos() && spaceJump.IsSpace() && spaceJump != KCharLinefeed
&& spaceJump != KCharFormfeed && spaceJump != KCharCarriageReturn
&& charac != KCharLineSeparator && spaceJump != KCharParagraphSeparator)
{
number.Inc();
spaceJump = number.Peek();
}
if ( !(IsValidPhoneNumberChar(number.Peek())) && number.Peek() != ')'
&& number.Peek() != '(' )
{
number.UnGetToMark(mark);
break;
}
TChar next = number.Peek();
TInt bracketsOpen2 = bracketsOpen + 1;
while( (IsValidPhoneNumberChar( next ) || next.IsSpace()
|| next == '(' || next == ')' || next == 'p'
|| next == 'w' ) && !(number.Eos()) && number.Offset() < end)
{
if ( next == '(' )
bracketsOpen2++;
else if ( next == ')' )
bracketsOpen2--;
if ( bracketsOpen2 == 0 )
break;
number.Inc();
next = number.Peek();
}
number.UnGetToMark(mark);
if ( bracketsOpen2 != 0 )
break;
bracketsOpen++;
brackets++;
}
else if ( number.Peek() == ')' )
{
if ( bracketsOpen <= 0 ) // there has to be equal number of brackets
break;
bracketsOpen--;
number.Mark(mark);
number.Inc();
if ( number.Peek() == '.' ) // '.' is not allowed after ')'
break;
number.UnGetToMark(mark);
}
else if ( number.Peek() == '-' || number.Peek() == 'w'
|| number.Peek() == 'p' || number.Peek() == '.' || number.Peek() == '/')
{ // Hyphen mark and 'p' & 'w' chars must be followed by a number
TChar last = number.Peek();
number.Mark(mark);
number.Inc();
TChar spaceJump = number.Peek();
while ( !number.Eos() && spaceJump.IsSpace() && spaceJump != KCharLinefeed
&& spaceJump != KCharFormfeed && spaceJump != KCharCarriageReturn
&& charac != KCharLineSeparator && spaceJump != KCharParagraphSeparator )
{
number.Inc();
spaceJump = number.Peek();
}
if ( !(number.Peek().IsDigit()) )
{
if (last == '.' && number.Peek() == ')' && bracketsOpen > 0 )
continue;
else
{
number.UnGetToMark(mark);
break;
}
}
number.UnGetToMark(mark);
}
else if ( number.Peek().IsDigit() )
numberCount++;
number.Inc();
charac = number.Peek();
}
// Get rid of whitespaces from the end
number.UnGet();
while( number.Peek().IsSpace() && !(number.Eos()))
number.UnGet();
number.Inc();
// ------------------------------------
number.Mark(endMark);
// If they exist, remove brackets from the beginning and the end
number.Mark(mark); // Let's mark where to continue the search
TBool endBrackets = ETrue;
do
{
number.UnGet();
if ( number.Peek() == ')' )
{
number.UnGetToMark(startMark);
if ( number.Peek() == '(' )
{
// If there's more than one pair of brackets -> don't strip them.
if ( brackets > 1 )
break;
number.Inc();
number.Mark(startMark);
number.UnGetToMark(endMark);
number.UnGet();
number.Mark(endMark);
// Get rid of whitespaces and periods from the end and from the beginning
number.UnGet();
while ( (number.Peek().IsSpace() || number.Peek() == '.')
&& number.Offset() > number.MarkedOffset(startMark) )
{ // from the end
number.UnGet();
}
number.Inc();
number.Mark(endMark);
number.UnGetToMark(startMark);
while ( (number.Peek().IsSpace() || number.Peek() == '.')
&& number.Offset() < number.MarkedOffset(endMark) )
{ // from the beginning
number.Inc();
}
number.Mark(startMark);
number.UnGetToMark(endMark);
// ----
}
else
endBrackets = EFalse;
}
else
endBrackets = EFalse;
}
while ( endBrackets );
number.UnGetToMark(mark);
// ----------------
if ( numberCount <= KFindItemMaxNumbers && numberCount >= iMinNumbers )
{
TPtrC tokenPtr = number.MarkedToken(startMark);
TInt tokensEnd = tokenPtr.Length();
TInt numbers = 0;
TInt partialNumber = 0;
TBool wasValidPhoneNumber = ETrue;
TInt i = 0;
for ( ; i < tokensEnd; i++ )
{
if ( tokenPtr[i] == '.' )
partialNumber = 0;
else if ( ((TChar)tokenPtr[i]).IsDigit() )
{
numbers++;
partialNumber++;
}
if ( ( partialNumber == 1 || partialNumber == 2 ) && i + 1 < tokensEnd )
{
if ( tokenPtr[i + 1] == '.' )
wasValidPhoneNumber = EFalse;
}
}
if (!wasValidPhoneNumber && numbers > 6)
wasValidPhoneNumber = ETrue;
if (wasValidPhoneNumber)
{
__ASSERT_DEBUG( number.MarkedOffset(startMark) + number.MarkedOffset(endMark)
- number.MarkedOffset(startMark) <= aText.Length(),
Panic(ETulPanicDescriptorLength) );
AddItemL( number.MarkedOffset(startMark),
number.MarkedOffset(endMark) - number.MarkedOffset(startMark),
EFindItemSearchPhoneNumberBin );
}
}
}
return (iFoundItems->Count() > 0);
}
/**
Parses URL from a token. Is used by SearchUrlL method and if a URL
was found it's appended to item array. Note that parsing for generic URIs
is done with SearchGenericUriL -method.
@param aType a Type of URL to seach, i.e.
www.
wap.
IP e.g.127.0.0.1
@param aTokenPtr Pointer to token that will be parsed
@param aTextOffset Offset of the token (start position in the whole text)
@leave KErrNone, if successful; otherwise one of the other system-wide error codes.
@return ETrue if the parameter for phone number is valid, else returns EFalse
*/
TBool CTulAddressStringTokenizer::ParseUrlL(const TDesC& aType, const TPtrC& aTokenPtr, TInt aTextOffset)
{
TBool wasValidUrl = EFalse;
TLex url;
TInt position = aTokenPtr.FindF( aType );
if ( position != KErrNotFound )
{ // address start found
url = aTokenPtr.Right( aTokenPtr.Length() - position );
url.Inc( aType.Length() );
while( IsValidUrlChar( url.Peek() ) && !(url.Eos()) )
{
if( url.Peek() == ':' )
{
url.Inc();
if ( !url.Peek().IsDigit() )
{
url.UnGet();
break;
}
}
else
url.Inc();
}
// If a period or question mark was followed by a whitespace remove it
if ( url.Eos() ) // Can't be followed by white space if it's
{ // the last character at token
url.UnGet();
if ( url.Peek() != '.' && url.Peek() != '?' && url.Peek() != ',' ) // If it wasn't a period or question mark
url.Inc();
}
url.Mark();
wasValidUrl = ETrue;
}
if ( wasValidUrl && ( url.MarkedOffset() > aType.Length() ) )
{
AddItemL( aTextOffset - aTokenPtr.Length() + position, url.MarkedOffset(), EFindItemSearchURLBin );
return ETrue;
}
return EFalse;
}
/**
Search fixed start URLs, i.e. URLs without schema (www., wap.).
Also finds IPv4 addresses (*.*.*.*).
As a special case, supports deprecated hardcoded schematic addresses finding
(http://, https://, rtsp://) to make sure deprecated search cases work
as they did previously.
@param aText Text that will be parsed
@param aFindFixedSchemas If true, will find old fixed schematic URLs also
@return ETrue if any URL are found else returns EFalse
@leave KErrNone, if successful; otherwise one of the other system-wide error codes.
@panic ETulPanicDescriptorLength in debug build if item's position and/or length is out of the document's range.
*/
TBool CTulAddressStringTokenizer::SearchUrlL( const TDesC& aText, const TBool aFindFixedSchemas )
{
TLex text = aText;
while ( !text.Eos() )
{
while( !(text.Eos()) && !IsValidUrlChar( text.Peek() ) )
text.Inc();
text.Mark();
while( !(text.Eos()) && IsValidUrlChar( text.Peek() ) )
text.Inc();
TPtrC tokenPtr = text.MarkedToken();
TBool wasValidUrl = EFalse;
if ( aFindFixedSchemas ) // Search for http://
wasValidUrl = ParseUrlL( KHttpUrlAddress, tokenPtr, text.Offset() );
if (aFindFixedSchemas && !wasValidUrl) // Search for https://
wasValidUrl = ParseUrlL( KHttpsUrlAddress, tokenPtr, text.Offset() );
if (aFindFixedSchemas && !wasValidUrl) // Search for rtsp://
wasValidUrl = ParseUrlL( KRtspUrlAddress, tokenPtr, text.Offset() );
if ( !wasValidUrl ) // Search for www.
wasValidUrl = ParseUrlL( KWwwUrlAddress, tokenPtr, text.Offset() );
if ( !wasValidUrl ) // Search for wap.
wasValidUrl = ParseUrlL( KWapUrlAddress, tokenPtr, text.Offset() );
if ( !wasValidUrl ) // Search for IP-address (xxx.xxx.xxx.xxx)
{
if ( tokenPtr.Match( KIPAddress ) != KErrNotFound )
{
TInt periods = 0;
wasValidUrl = ETrue;
TBool endWithPunctuation = EFalse;
TBool betweenBrackets = EFalse;
// First see if token ends with ",",".","!","?",";" or ":"
TChar charac = tokenPtr[tokenPtr.Length() - 1];
TChar charac0 = tokenPtr[0];
if ( charac == ',' || charac == '.' ||
charac == '!' || charac == '?' ||
charac == ';' || charac == ':' )
{
endWithPunctuation = ETrue;
}
// Or if it starts and ends with brackets or quotation marks
else if ( ( charac0 == '(' && charac == ')' )
|| ( charac0 == '"' && charac == '"' )
|| ( charac0 == '[' && charac == ']' )
|| ( charac0 == '<' && charac == '>' ) )
{
betweenBrackets = ETrue;
}
TInt i = 0;
TInt tokensEnd = tokenPtr.Length();
if ( endWithPunctuation )
tokensEnd--;
else if ( betweenBrackets )
{
i = 1;
tokensEnd--;
}
// Take a closer look to see if a valid IP-address
TBuf<3> ipPart;
TInt numbers = 0;
for ( ; i < tokensEnd; i++ )
{
if ( !( ((TChar)tokenPtr[i]).IsDigit() || tokenPtr[i] == '.' ) )
{
wasValidUrl = EFalse;
break;
}
if ( tokenPtr[i] == '.' )
periods++;
else
numbers++;
if ( numbers > KNumbersInIpAddress || periods > KDotsInIpAddress )
{
wasValidUrl = EFalse;
break;
}
if ( ((TChar)tokenPtr[i]).IsDigit() )
{
ipPart.Append( tokenPtr[i] );
TBool checkInt = EFalse;
if ( i + 1 < tokensEnd )
{
if ( tokenPtr[i+1] == '.' )
checkInt = ETrue;
}
if ( i == tokensEnd - 1 || checkInt )
{
TLex val = ipPart;
TInt numberInt;
TInt error = val.Val( numberInt );
if ( error != KErrNone || numberInt > 255 )
{
wasValidUrl = EFalse;
break;
}
numbers = 0;
ipPart.Delete( 0, ipPart.Length() );
}
}
}
if ( wasValidUrl && periods == KDotsInIpAddress )
{
TInt startPos = text.Offset() - tokenPtr.Length();
TInt length = tokenPtr.Length();
// If there was a punctuation at the end or brackets, let's take it/them away
if ( endWithPunctuation || betweenBrackets)
{
length--;
if ( betweenBrackets )
{
startPos++;
length--;
}
}
__ASSERT_DEBUG( startPos + length <= aText.Length(), Panic(ETulPanicDescriptorLength) );
AddItemL( startPos, length, EFindItemSearchURLBin );
}
}
}
}
return (iFoundItems->Count() > 0);
}
/**
Search algorithm for searching generic URIs
@param aText Text that will be parsed
@return ETrue if any generic URI is found else returns EFalse
@leave KErrNone, if successful; otherwise one of the other system-wide error codes.
*/
TBool CTulAddressStringTokenizer::SearchGenericUriL( const TDesC& aText )
{
// Detect generic URI within the token
const TDesC& schemeStartArray = KURISchemeStartCharacters;
const TDesC& schemeBodyArray = KURISchemeBodyCharacters;
const TDesC& schemeTerminatorArray = KURISchemeTerminator;
const TDesC& URIArray = KURICharacters;
TBool wasValidUri = EFalse;
TLex text = aText;
while ( !text.Eos() )
{
// Discard characters until URI scheme terminator is found
while( !(text.Eos()) && schemeTerminatorArray.Locate(text.Peek()) == KErrNotFound )
text.Inc();
// if at end of the text, no legit URI found
if ( !text.Eos() )
{
// Store the schema end offset (+1 to skip ':')
TInt schemeEndOffset = text.Offset() + 1;
// Scheme must be at least 1 character long at the beginning of the text to be valid
if ( text.Offset() > 0 )
{
// Un-get last scheme character to begin examination
text.UnGet();
// Rewind until beginning of the URI
while ( text.Offset() > 0 && schemeBodyArray.Locate(text.Peek().GetLowerCase()) != KErrNotFound )
text.UnGet();
// Now text pointer is at first character of the URI
// Do go back through the scheme until a legal beginning character for URI
// is found or back to the (schemeEndOffset - 1) i.e. URI scheme terminator
while ( schemeStartArray.Locate(text.Peek().GetLowerCase()) == KErrNotFound && (text.Offset() + 1) < schemeEndOffset )
text.Inc();
// check if terminated because a valid start character was found when
// scheme terminator was reached.
if ( schemeStartArray.Locate(text.Peek().GetLowerCase()) != KErrNotFound )
{
// First character is a valid URI char, so the scheme is valid ->
// marks the beginning of the array
text.Mark();
// fast forward to the end of the scheme
while( text.Offset() < schemeEndOffset )
text.Inc();
// Get characters until end of schema
while( !(text.Eos()) && URIArray.Locate( text.Peek().GetLowerCase() ) != KErrNotFound )
text.Inc();
// remove certain punctuation from end of the URI, as it is likely
// to be part of the surrounding text.
text.UnGet();
//special processing for bracket
//only remove the end bracket if there is no open bracket in the uri
//not counting bracket pairs for efficiency
if (text.Peek()!=')' || text.MarkedToken().Locate(TChar('('))!=-1)
text.Inc();
text.UnGet();
if ( text.Peek() != '.' && text.Peek() != '?' && text.Peek() != ',')
text.Inc();
// URI cannot contain only scheme, so check that pointer was increased
// by at least one character
if ( schemeEndOffset != text.Offset() )
{
// Append found text to item array (it is now known to be
// syntactically valid URI as it contains characters after the scheme)
AddItemL( text.MarkedOffset(), text.Offset() - text.MarkedOffset(), EFindItemSearchScheme );
wasValidUri = ETrue;
}
}
else // First character of scheme is not legit, fast forward to end of the
// scheme anyway to continue search
{
while( text.Offset() < schemeEndOffset )
text.Inc();
}
}
else
text.Inc();
}
}
return wasValidUri;
}
// ---------------------------------------------------------
// Position and count methods
// ---------------------------------------------------------
/**
Gets the number of items in the found items array.
@return the number of items in the found items array.
*/
EXPORT_C TInt CTulAddressStringTokenizer::ItemCount() const
{
return (iFoundItems ? iFoundItems->Count() : 0);
}
/**
Gets the current position (or the position of the currently selected item)
in the found items array.
@return the current position in the found items array of the
CTulAddressStringTokenizer instance. If no items are in the array, zero is returned.
*/
EXPORT_C TInt CTulAddressStringTokenizer::Position() const
{
return iPosition;
}
/**
Resets the position in item array to zero (beginning of the array).
*/
EXPORT_C void CTulAddressStringTokenizer::ResetPosition()
{
iPosition = 0;
}
// ---------------------------------------------------------
// GetItem methods
// ---------------------------------------------------------
/**
Gets the array of found items. Returns a constant pointer to the
found items array of the CTulAddressStringTokenizer instance. The items cannot
be modified through this pointer, only accessed. The ownership of
the array stays with CTulAddressStringTokenizer.
@return a constant pointer to the array of found items. Ownership
stays with CTulAddressStringTokenizer.
*/
EXPORT_C const CArrayFixFlat<CTulAddressStringTokenizer::SFoundItem>* CTulAddressStringTokenizer::ItemArray() const
{
return iFoundItems;
}
/**
Gets the currently 'selected' item in the array of found items.
@param aItem contains the currently selected item after returning.
@return ETrue if the item was found. EFalse if the item wasn't found.
*/
EXPORT_C TBool CTulAddressStringTokenizer::Item( SFoundItem& aItem ) const
{ // Return EFalse if no items were found
if ( iFoundItems->Count() <= 0 )
{
aItem.iStartPos = 0;
aItem.iLength = 0;
aItem.iItemType = EFindItemSearchPhoneNumberBin;
return EFalse;
}
aItem = iFoundItems->At( iPosition );
return ETrue;
}
/**
Gets the next found item relative to the currently selected item.
Moves the selection to point to the next item in the array of
found items.
@param aItem contains the next item after returning.
@return ETrue if the item was found. EFalse if there's no next item.
*/
EXPORT_C TBool CTulAddressStringTokenizer::NextItem( SFoundItem& aItem )
{
if (iFoundItems->Count() <= (iPosition + 1))
return EFalse;
iPosition++;
aItem = iFoundItems->At( iPosition );
return ETrue;
}
/**
Gets the previous found item relative to the currently selected
item. Moves the selection to point to the previous item in the
array of found items..
@param aItem contains the previous item after returning.
@return ETrue if the item was found. EFalse if there's no previous item.
*/
EXPORT_C TBool CTulAddressStringTokenizer::PrevItem( SFoundItem& aItem )
{
if ( iPosition <= 0 )
return EFalse;
iPosition--;
aItem = iFoundItems->At( iPosition );
return ETrue;
}
/**
Adds item to search arrays. Adding is done so that arrays are always sorted.
If added element would overlap a previously found element, it is not added.
@param aStartPos Start position of the found item
@param aLength Length of found item
@param aType Type of the found item
@leave KErrNone, if successful; otherwise one of the other system-wide error codes.
*/
void CTulAddressStringTokenizer::AddItemL(TInt aStartPos, TInt aLength, TTokenizerSearchCase aType )
{
// Create item element
SFoundItem foundItem;
foundItem.iStartPos = aStartPos;
foundItem.iLength = aLength;
foundItem.iItemType = aType;
// Add item data to arrays
TKeyArrayFix insertKey(0, ECmpTInt);
iFoundItems->InsertIsqAllowDuplicatesL(foundItem, insertKey);
}
// End of File