diff -r 000000000000 -r 2f259fa3e83a lafagnosticuifoundation/uigraphicsutils/tulsrc/tuladdressstringtokenizer.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lafagnosticuifoundation/uigraphicsutils/tulsrc/tuladdressstringtokenizer.cpp Tue Feb 02 01:00:49 2010 +0200 @@ -0,0 +1,1251 @@ +// Copyright (c) 2002-2009 Nokia Corporation and/or its subsidiary(-ies). +// All rights reserved. +// This component and the accompanying materials are made available +// under the terms of "Eclipse Public License v1.0" +// which accompanies this distribution, and is available +// at the URL "http://www.eclipse.org/legal/epl-v10.html". +// +// Initial Contributors: +// Nokia Corporation - initial contribution. +// +// Contributors: +// +// Description: +// + + + +// INCLUDE FILES +#include +#include +#include +#include "languagespecificnumberconverter.h" + +// DEFINE + +const TInt KFindItemMaxNumbers = 20; +const TInt KFindItemMinNumbers = 3; +const TInt KDotsInIpAddress = 3; +const TInt KNumbersInIpAddress = 3; + +const TInt KCharLinefeed = 0x000A; +const TInt KCharFormfeed = 0x000C; +const TInt KCharCarriageReturn = 0x000D; +const TInt KCharLineSeparator = 0x2028; +const TInt KCharParagraphSeparator = 0x2029; + +// Valid characters for different search cases +_LIT( KPhoneNumberChars, "1234567890*#-./"); +_LIT( KEmailChars, "!#$?%&*+-/"); +_LIT( KEmailHostChars, "abcdefghijklmnopqrstuvwxyz1234567890._-"); +_LIT( KUrlChars, "%/-~?=:&,#+|"); + +// generic URI scheme checking +_LIT( KURISchemeStartCharacters, "abcdefghijklmnopqrstuvwxyz" ); +_LIT( KURISchemeBodyCharacters, "abcdefghijklmnopqrstuvwxyz1234567890+-."); +_LIT( KURISchemeTerminator, ":"); +_LIT( KURICharacters, "abcdefghijklmnopqrstuvwxyz1234567890?/;:@&=+$,%-_.!~*'()#|"); + +// Url Address beginnings (to support deprecated search cases) +_LIT( KHttpUrlAddress, "http://"); +_LIT( KRtspUrlAddress, "rtsp://"); +_LIT( KHttpsUrlAddress, "https://"); + +// Non-schematic URL address beginnings +_LIT( KWwwUrlAddress, "www."); +_LIT( KWapUrlAddress, "wap."); + +// IP address pattern to match +_LIT( KIPAddress, "*.*.*.*"); + +// Panic +_LIT( KFindItemPanic, "ETUL-Panic"); + +GLDEF_C void Panic(TTulPanic aPanic) + { + User::Panic(KFindItemPanic, aPanic); + } + + +// ================= MEMBER FUNCTIONS ======================= + +/** +C++ default constructor. +*/ +CTulAddressStringTokenizer::CTulAddressStringTokenizer() + { + // C++ default constructor must NOT contain any code, that might leave. + } + +/** +Symbian OS constructor +@param aText Text that will be parsed +@param aSearchCases Identifies what items are we looking for: + EFindItemSearchPhoneNumberBin + EFindItemSearchMailAddressBin + EFindItemSearchURLBin + EFindItemSearchScheme + Any combination of these flags can be given + as a bit mask. +@param aMinNumbers Minimum count of numbers in a string when + the string is considered as a phone number. +*/ +void CTulAddressStringTokenizer::ConstructL(const TDesC& aText, TInt aSearchCases, TInt aMinNumbers) + { + iMinNumbers = aMinNumbers; + HBufC* buf = NULL; + TLanguage language = User::Language(); + + if (language == ELangArabic || language == ELangHebrew || language == ELangUrdu || + language == ELangFarsi || language == ELangHindi) + { + buf = aText.AllocLC(); + TPtr ptr = buf->Des(); + LanguageSpecificNumberConverter::ConvertToWesternNumbers( ptr ); + PerformSearchL( *buf, aSearchCases ); + } + else + PerformSearchL( aText, aSearchCases ); + + if(buf) + CleanupStack::PopAndDestroy(buf); + } + +/** +Performs the search. Uses search algorithms SearchGenericUriL(), SearchMailAddressL(), +SearchUrlL() and SearchPhoneNumberL(). +*/ +void CTulAddressStringTokenizer::PerformSearchL(const TDesC& aText , TInt aSearchCases) + { + ASSERT(!iFoundItems); + iFoundItems = new (ELeave) CArrayFixFlat(2); + + TBool somethingSearched = EFalse; + + // first detect binary mask cases + if ( (aSearchCases & EFindItemSearchScheme) == EFindItemSearchScheme) + { + SearchGenericUriL( aText ); + somethingSearched = ETrue; + } + + // Prioritize mail address found before URL search. Address name@123.123.123.123 + // would otherwise be detected as IP number. + if ( (aSearchCases & EFindItemSearchMailAddressBin) == EFindItemSearchMailAddressBin) + { + SearchMailAddressL( aText ); + somethingSearched = ETrue; + } + + if ( (aSearchCases & EFindItemSearchURLBin) == EFindItemSearchURLBin) + { + SearchUrlL( aText, ETrue ); + somethingSearched = ETrue; + } + + // Search phone numbers last as all others can contain number sequences + if ( (aSearchCases & EFindItemSearchPhoneNumberBin) == EFindItemSearchPhoneNumberBin) + { + SearchPhoneNumberL( aText ); + somethingSearched = ETrue; + } + + // if no binary cases found + if ( !somethingSearched ) + { + __ASSERT_DEBUG( 1, Panic(ETulPanicInvalidTokenizerSearchCase) ); + } + } + +/** +Two-phase constructor method that is used to create a new instance +of the CTulAddressStringTokenizer class. This instance can then be queried for +the items defined by the second parameter. The actual search is +executed during construction. + +@param aText will be parsed. +@param aSearchCases identifies what items we are looking for: + EFindItemSearchPhoneNumberBin + EFindItemSearchMailAddressBin + EFindItemSearchURLBin + EFindItemSearchScheme +Any combination of these flags can be given as a bit mask. +@return a pointer to a new instance of CTulAddressStringTokenizer class. + +@panic ETulPanicInvalidTokenizerSearchCase in debug build if there is no valid search case. +@panic ETulPanicDescriptorLength in debug build if item's position +and/or length is out of the document's range. +@leave KErrNone, if successful; otherwise one of the other system-wide error codes. +*/ +EXPORT_C CTulAddressStringTokenizer* CTulAddressStringTokenizer::NewL(const TDesC& aText, TInt aSearchCases) + { + CTulAddressStringTokenizer* self = new (ELeave) CTulAddressStringTokenizer; + CleanupStack::PushL(self); + self->ConstructL(aText, aSearchCases, KFindItemMinNumbers); + CleanupStack::Pop(); + return self; + } + +/** +Two-phase constructor method that is used to create a new instance +of the CTulAddressStringTokenizer class. This instance can then be queried for +the items defined by the second parameter. The actual search is +executed during construction. + +@param aText will be parsed. +@param aSearchCases identifies what items we are looking for: + EFindItemSearchPhoneNumberBin + EFindItemSearchMailAddressBin + EFindItemSearchURLBin + EFindItemSearchScheme +Any combination of these flags can be given as a bit mask. +@param aMinNumbers defines a minimum count of numbers in a phone +number string, during a phone number +search. +@return a pointer to an new instance of CTulAddressStringTokenizer class. + +@panic ETulPanicInvalidTokenizerSearchCase in debug build if there is no valid search case. +@panic ETulPanicDescriptorLength in debug build if item's position +and/or length is out of the document's range. +@leave KErrNone, if successful; otherwise one of the other system-wide error codes. +*/ +EXPORT_C CTulAddressStringTokenizer* CTulAddressStringTokenizer::NewL(const TDesC& aText, TInt aSearchCases, + TInt aMinNumbers ) + { + CTulAddressStringTokenizer* self = new (ELeave) CTulAddressStringTokenizer; + CleanupStack::PushL( self ); + self->ConstructL( aText, aSearchCases, aMinNumbers ); + CleanupStack::Pop( ); + return self; + } + + +/** +Destructor. +*/ +EXPORT_C CTulAddressStringTokenizer::~CTulAddressStringTokenizer() + { + delete iFoundItems; + } + +// API methods for using engine separately from FindItemUI + +/** +Executes a new search with the already created CTulAddressStringTokenizer +instance. The position in the found items array is reset to the +beginning of the array. + +@param aText will be parsed. +@param aSearchCases identifies what items are we looking for: + EFindItemSearchPhoneNumberBin + EFindItemSearchMailAddressBin + EFindItemSearchURLBin + EFindItemSearchScheme +Any combination of these flags can be given as a bit mask. +@return number of found items. + +@panic ETulPanicInvalidTokenizerSearchCase in debug build if there is no valid search case. +@panic ETulPanicDescriptorLength in debug build if item's position +and/or length is out of the document's range. +@leave one of the Symbian error codes. +*/ +EXPORT_C TInt CTulAddressStringTokenizer::DoNewSearchL(const TDesC& aText, TInt aSearchCases) + { + return DoNewSearchL( aText, aSearchCases, KFindItemMinNumbers ); + } + +// API methods for using engine separately from FindItemUI + +/** +Executes a new search with the already created CTulAddressStringTokenizer +instance. The position in the found items array is reset to the +beginning of the array. + +@param aText will be parsed. +@param aSearchCases identifies what items are we looking for: + EFindItemSearchPhoneNumberBin + EFindItemSearchMailAddressBin + EFindItemSearchURLBin + EFindItemSearchScheme +Any combination of these flags can be given as a bit mask. +@param aMinNumbers defines a minimum count of numbers in a phone +number string, during a phone number +search. +@return number of found items. + +@panic ETulPanicInvalidTokenizerSearchCase in debug build if there is no valid search case. +@panic ETulPanicDescriptorLength in debug build if item's position +and/or length is out of the document's range. +@leave KErrNone, if successful; otherwise one of the other system-wide error codes. +*/ +EXPORT_C TInt CTulAddressStringTokenizer::DoNewSearchL(const TDesC& aText, TInt aSearchCases, TInt aMinNumbers) + { + delete iFoundItems; + iFoundItems = NULL; + iPosition = 0; + ConstructL(aText, aSearchCases, aMinNumbers); + return ItemCount(); + } + +// --------------------------------------------------------- +// Search Algorithms and helpers +// --------------------------------------------------------- +/** +Character information methods + +@param charac a Character to be investigated +@return ETrue if the parameter for phone number was valid, else returns EFalse +*/ +TBool CTulAddressStringTokenizer::IsValidPhoneNumberChar(const TChar& aCharac) + { + // Returns ETrue if the parameter is a valid character in a phonenumber + const TDesC& array = KPhoneNumberChars; + return (array.Locate(aCharac) != KErrNotFound); + } + +/** +Character information methods + +@param charac a Character to be investigated +@return ETrue if the parameter for login part of the e-mail address is valid, else returns EFalse +*/ +TBool CTulAddressStringTokenizer::IsValidEmailChar(const TChar& aCharac) + { + // Returns ETrue if the parameter is a valid character for username part of e-mail address + // ASCII 33 - 47 (without '"', ',', ''', '(' and ')' because they can be used to separate address + // from rest of the text ) + const TDesC& array = KEmailChars; + return (array.Locate(aCharac) != KErrNotFound || IsValidEmailHostChar(aCharac)); + } + +/** +Character information methods + +@param charac a Character to be investigated +@return ETrue if the parameter for host part of the e-mail address is valid, else returns EFalse +*/ +TBool CTulAddressStringTokenizer::IsValidEmailHostChar(const TChar& aCharac) + { + // Returns ETrue if the parameter is a valid character for a host part of e-mail address + const TDesC& array = KEmailHostChars; + return (array.Locate( aCharac.GetLowerCase() ) != KErrNotFound); + } + +/** +Character information methods + +@param charac a Character to be investigated +@return ETrue if the parameter for URL is valid, else returns EFalse +*/ +TBool CTulAddressStringTokenizer::IsValidUrlChar(const TChar& aCharac) + { + // Returns ETrue if the parameter is a valid character for url + if (IsValidEmailHostChar(aCharac)) + return ETrue; + + const TDesC& array = KUrlChars; + return (array.Locate(aCharac) != KErrNotFound); + } + +/** +Search algorithm for searching e-mail addresses + +@param aText Text that will be parsed +@return ETrue if any EMail items were found else returns EFalse +@leave KErrNone, if successful; otherwise one of the other system-wide error codes. +@panic ETulPanicDescriptorLength in debug build if item's position +and/or length is out of the document's range. +*/ +TBool CTulAddressStringTokenizer::SearchMailAddressL( const TDesC& aText ) + { + TInt searchStart = 0; + TInt searchResult = 0; + const TInt end = aText.Length(); // end of document + + do + { + TPtrC segment = aText.Right( end - searchStart ); + searchResult = segment.LocateF('@'); + + if (searchResult != KErrNotFound) + { // @ found + // There should be valid characters (not a period) before and after the @ character + if ( searchResult == 0 // first char + || (searchResult >= segment.Length() - 1) // last char + || !(IsValidEmailChar(segment[searchResult - 1])) + || !(IsValidEmailHostChar(segment[searchResult + 1])) + || segment[searchResult - 1] == '.' + || segment[searchResult + 1] == '.' + ) + { + searchStart += searchResult + 1; + continue; + } + + TBool wasPeriod = EFalse; // To prevent sequential periods + // Get TLex from the pointer to get a better API for parsing + TLexMark startPos; + TLexMark endPos; + TLex token = segment; + + // Go to searchResult and un-get until the beginning of e-mail address is reached + token.Inc( searchResult ); + token.Mark(); + do + { + token.UnGet(); + if ( token.Peek() == '.' ) + { // If it was a period + if (wasPeriod) // and if the former was also -> break + break; + else // else mark that this one was a period + wasPeriod = ETrue; + } + else + wasPeriod = EFalse; + } + while (token.Offset() > 0 && IsValidEmailChar(token.Peek())); + + if (token.Offset() != 0 || !IsValidEmailChar(token.Peek())) + token.Inc(); + + // Get rid of periods from the start of address + // Does it have to start with a number or char(abc...). + // If it does, the loop should check that it gets rid of all special chars also. + while (token.Peek() == '.') + token.Inc(); + + token.Mark( startPos ); // Mark the beginning of address + token.UnGetToMark(); + wasPeriod = EFalse; + + do // Go forward until a nonvalid character + { + token.Inc(); + if ( token.Peek() == '.' ) + { // If it was a period + if ( wasPeriod ) // and if the former was also -> break + break; + else // else mark that this one was a period + wasPeriod = ETrue; + } + else + wasPeriod = EFalse; + } + while ( !token.Eos() && IsValidEmailHostChar( token.Peek() ) ); + + // If address ends with a period take it away + token.UnGet(); + if (token.Peek() != '.') + token.Inc(); + + token.Mark( endPos ); // Mark the beginning of address + + // Append the found string to the array + __ASSERT_DEBUG( searchStart + token.MarkedOffset( startPos ) + + token.MarkedOffset( endPos ) + - token.MarkedOffset( startPos ) <= aText.Length(), + Panic(ETulPanicDescriptorLength) ); + AddItemL( searchStart + token.MarkedOffset( startPos ), + token.MarkedOffset( endPos ) - token.MarkedOffset( startPos ), + EFindItemSearchMailAddressBin); + searchStart += token.MarkedOffset( endPos ) + 1; + } + } + while ( searchResult != KErrNotFound && searchStart < end ); + + return (iFoundItems->Count() > 0); + } + +/** +Search algorithm for searching phone numbers + +@param aText Text that will be parsed +@return ETrue if any Phone Number items were found else returns EFalse +@leave KErrNone, if successful; otherwise one of the other system-wide error codes. +@panic ETulPanicDescriptorLength in debug build if item's position +and/or length is out of the document's range. +*/ +TBool CTulAddressStringTokenizer::SearchPhoneNumberL( const TDesC& aText ) + { + TLexMark startMark; // Points to the start of the found phone number + TLexMark endMark; // Points to the end of the found phone number + TLexMark mark; + const TInt end = aText.Length(); + + TLex number = aText; + + while ( !(number.Eos()) ) + { + TInt numberCount = 0; // How many real numbers (1234567890) + TInt bracketsOpen = 0; // How many brackets are currently open + TInt brackets = 0; // How many brackets overall + + TChar charac = number.Peek(); + + while( (!(IsValidPhoneNumberChar( charac ) || charac == '+' + || charac == '(' ) || charac == '-' || charac == '.' || charac == '/') + && !(number.Eos()) && number.Offset() < end ) + { + number.Inc(); + charac = number.Peek(); + } + + if ( number.Offset() >= end ) + break; + + if ( number.Peek() == '#' ) + { + number.Inc(); + if (number.Peek() == '.' ) + continue; + + number.UnGet(); + } + + if ( number.Peek() == '+' ) + { // '+' has to be followed by a number (not # or * ...) + number.Inc(); + if ( !(number.Peek().IsDigit()) ) + continue; + + number.UnGet(); + } + + if ( number.Peek() == '(' ) + { // '(' has to be followed by valid phone number + // character (whitespaces are allowed before) or '+' is a next character + number.Inc(); + if ( !(number.Peek() == '+') ) + { + number.Mark(mark); + number.SkipSpace(); + charac = number.Peek(); + if ( !( IsValidPhoneNumberChar(charac) || charac == '+' + || charac == '(' ) || charac == '-' || charac == '.' || charac == '/') + { + number.Inc(); + continue; + } + else + { + number.UnGetToMark(mark); + number.UnGet(); + number.Mark(startMark); + } + } + else + { + number.UnGet(); + number.Mark(startMark); + number.Inc(); + } + + bracketsOpen++; + brackets++; + } + else + number.Mark(startMark); + + if ( number.Peek().IsDigit() ) // If the character was a number + numberCount++; + else if ( bracketsOpen > 0 ) + { + number.Inc(); + TChar next = number.Peek(); + TInt bracketsOpen2 = bracketsOpen; + while( (IsValidPhoneNumberChar( next ) || next.IsSpace() + || next == '(' || next == ')' || next == 'p' || next == '+' + || next == 'w' ) && !(number.Eos()) && number.Offset() < end) + { + if ( next == '(' ) + bracketsOpen2++; + else if ( next == ')' ) + bracketsOpen2--; + + if ( bracketsOpen2 == 0 ) + break; + + number.Inc(); + next = number.Peek(); + } + + number.UnGetToMark(startMark); + if ( bracketsOpen2 != 0 ) + { + number.Inc(); + continue; + } + } + + number.Inc(); + while ( number.Peek() == '(' && !(number.Eos()) && bracketsOpen > 0 ) + { + number.Inc(); + bracketsOpen++; + } + + if ( number.Peek() == '+' && bracketsOpen > 0 ) + number.Inc(); + + // a Valid first character has been found. Let's go forward as long as valid characters are found. + charac = number.Peek(); + + while( (IsValidPhoneNumberChar( charac ) || charac.IsSpace() + || charac == '(' || charac == ')' || charac == 'p' + || charac == 'w' ) && !(number.Eos()) && number.Offset() < end + && charac != KCharLinefeed && charac != KCharFormfeed + && charac != KCharCarriageReturn + && charac != KCharLineSeparator + && charac != KCharParagraphSeparator ) + { + if ( number.Peek() == '(' ) + { // '(' can't be the last character in phone number + number.Mark(mark); + number.Inc(); + + TChar spaceJump = number.Peek(); + while ( !number.Eos() && spaceJump.IsSpace() && spaceJump != KCharLinefeed + && spaceJump != KCharFormfeed && spaceJump != KCharCarriageReturn + && charac != KCharLineSeparator && spaceJump != KCharParagraphSeparator) + { + number.Inc(); + spaceJump = number.Peek(); + } + + if ( !(IsValidPhoneNumberChar(number.Peek())) && number.Peek() != ')' + && number.Peek() != '(' ) + { + number.UnGetToMark(mark); + break; + } + + TChar next = number.Peek(); + TInt bracketsOpen2 = bracketsOpen + 1; + while( (IsValidPhoneNumberChar( next ) || next.IsSpace() + || next == '(' || next == ')' || next == 'p' + || next == 'w' ) && !(number.Eos()) && number.Offset() < end) + { + if ( next == '(' ) + bracketsOpen2++; + else if ( next == ')' ) + bracketsOpen2--; + + if ( bracketsOpen2 == 0 ) + break; + + number.Inc(); + next = number.Peek(); + } + + number.UnGetToMark(mark); + + if ( bracketsOpen2 != 0 ) + break; + + bracketsOpen++; + brackets++; + } + else if ( number.Peek() == ')' ) + { + if ( bracketsOpen <= 0 ) // there has to be equal number of brackets + break; + + bracketsOpen--; + number.Mark(mark); + number.Inc(); + if ( number.Peek() == '.' ) // '.' is not allowed after ')' + break; + + number.UnGetToMark(mark); + } + else if ( number.Peek() == '-' || number.Peek() == 'w' + || number.Peek() == 'p' || number.Peek() == '.' || number.Peek() == '/') + { // Hyphen mark and 'p' & 'w' chars must be followed by a number + TChar last = number.Peek(); + number.Mark(mark); + number.Inc(); + + TChar spaceJump = number.Peek(); + while ( !number.Eos() && spaceJump.IsSpace() && spaceJump != KCharLinefeed + && spaceJump != KCharFormfeed && spaceJump != KCharCarriageReturn + && charac != KCharLineSeparator && spaceJump != KCharParagraphSeparator ) + { + number.Inc(); + spaceJump = number.Peek(); + } + + if ( !(number.Peek().IsDigit()) ) + { + if (last == '.' && number.Peek() == ')' && bracketsOpen > 0 ) + continue; + else + { + number.UnGetToMark(mark); + break; + } + } + + number.UnGetToMark(mark); + } + else if ( number.Peek().IsDigit() ) + numberCount++; + + number.Inc(); + charac = number.Peek(); + } + + // Get rid of whitespaces from the end + number.UnGet(); + while( number.Peek().IsSpace() && !(number.Eos())) + number.UnGet(); + + number.Inc(); + // ------------------------------------ + number.Mark(endMark); + + // If they exist, remove brackets from the beginning and the end + number.Mark(mark); // Let's mark where to continue the search + TBool endBrackets = ETrue; + do + { + number.UnGet(); + + if ( number.Peek() == ')' ) + { + number.UnGetToMark(startMark); + if ( number.Peek() == '(' ) + { + // If there's more than one pair of brackets -> don't strip them. + if ( brackets > 1 ) + break; + + number.Inc(); + number.Mark(startMark); + number.UnGetToMark(endMark); + number.UnGet(); + number.Mark(endMark); + // Get rid of whitespaces and periods from the end and from the beginning + number.UnGet(); + while ( (number.Peek().IsSpace() || number.Peek() == '.') + && number.Offset() > number.MarkedOffset(startMark) ) + { // from the end + number.UnGet(); + } + number.Inc(); + number.Mark(endMark); + number.UnGetToMark(startMark); + while ( (number.Peek().IsSpace() || number.Peek() == '.') + && number.Offset() < number.MarkedOffset(endMark) ) + { // from the beginning + number.Inc(); + } + number.Mark(startMark); + number.UnGetToMark(endMark); + // ---- + } + else + endBrackets = EFalse; + } + else + endBrackets = EFalse; + } + while ( endBrackets ); + + number.UnGetToMark(mark); + // ---------------- + + if ( numberCount <= KFindItemMaxNumbers && numberCount >= iMinNumbers ) + { + TPtrC tokenPtr = number.MarkedToken(startMark); + TInt tokensEnd = tokenPtr.Length(); + TInt numbers = 0; + TInt partialNumber = 0; + TBool wasValidPhoneNumber = ETrue; + TInt i = 0; + + for ( ; i < tokensEnd; i++ ) + { + if ( tokenPtr[i] == '.' ) + partialNumber = 0; + else if ( ((TChar)tokenPtr[i]).IsDigit() ) + { + numbers++; + partialNumber++; + } + + if ( ( partialNumber == 1 || partialNumber == 2 ) && i + 1 < tokensEnd ) + { + if ( tokenPtr[i + 1] == '.' ) + wasValidPhoneNumber = EFalse; + } + } + + if (!wasValidPhoneNumber && numbers > 6) + wasValidPhoneNumber = ETrue; + + if (wasValidPhoneNumber) + { + __ASSERT_DEBUG( number.MarkedOffset(startMark) + number.MarkedOffset(endMark) + - number.MarkedOffset(startMark) <= aText.Length(), + Panic(ETulPanicDescriptorLength) ); + + AddItemL( number.MarkedOffset(startMark), + number.MarkedOffset(endMark) - number.MarkedOffset(startMark), + EFindItemSearchPhoneNumberBin ); + } + } + + } + + return (iFoundItems->Count() > 0); + } + + +/** +Parses URL from a token. Is used by SearchUrlL method and if a URL +was found it's appended to item array. Note that parsing for generic URIs +is done with SearchGenericUriL -method. + +@param aType a Type of URL to seach, i.e. + www. + wap. + IP e.g.127.0.0.1 +@param aTokenPtr Pointer to token that will be parsed +@param aTextOffset Offset of the token (start position in the whole text) +@leave KErrNone, if successful; otherwise one of the other system-wide error codes. +@return ETrue if the parameter for phone number is valid, else returns EFalse +*/ +TBool CTulAddressStringTokenizer::ParseUrlL(const TDesC& aType, const TPtrC& aTokenPtr, TInt aTextOffset) + { + TBool wasValidUrl = EFalse; + TLex url; + + TInt position = aTokenPtr.FindF( aType ); + if ( position != KErrNotFound ) + { // address start found + url = aTokenPtr.Right( aTokenPtr.Length() - position ); + url.Inc( aType.Length() ); + + while( IsValidUrlChar( url.Peek() ) && !(url.Eos()) ) + { + if( url.Peek() == ':' ) + { + url.Inc(); + if ( !url.Peek().IsDigit() ) + { + url.UnGet(); + break; + } + } + else + url.Inc(); + } + + // If a period or question mark was followed by a whitespace remove it + if ( url.Eos() ) // Can't be followed by white space if it's + { // the last character at token + url.UnGet(); + if ( url.Peek() != '.' && url.Peek() != '?' && url.Peek() != ',' ) // If it wasn't a period or question mark + url.Inc(); + } + + url.Mark(); + wasValidUrl = ETrue; + } + + if ( wasValidUrl && ( url.MarkedOffset() > aType.Length() ) ) + { + AddItemL( aTextOffset - aTokenPtr.Length() + position, url.MarkedOffset(), EFindItemSearchURLBin ); + return ETrue; + } + + return EFalse; + } + +/** +Search fixed start URLs, i.e. URLs without schema (www., wap.). +Also finds IPv4 addresses (*.*.*.*). +As a special case, supports deprecated hardcoded schematic addresses finding +(http://, https://, rtsp://) to make sure deprecated search cases work +as they did previously. + +@param aText Text that will be parsed +@param aFindFixedSchemas If true, will find old fixed schematic URLs also +@return ETrue if any URL are found else returns EFalse +@leave KErrNone, if successful; otherwise one of the other system-wide error codes. +@panic ETulPanicDescriptorLength in debug build if item's position and/or length is out of the document's range. +*/ +TBool CTulAddressStringTokenizer::SearchUrlL( const TDesC& aText, const TBool aFindFixedSchemas ) + { + TLex text = aText; + while ( !text.Eos() ) + { + while( !(text.Eos()) && !IsValidUrlChar( text.Peek() ) ) + text.Inc(); + + text.Mark(); + while( !(text.Eos()) && IsValidUrlChar( text.Peek() ) ) + text.Inc(); + + TPtrC tokenPtr = text.MarkedToken(); + TBool wasValidUrl = EFalse; + + if ( aFindFixedSchemas ) // Search for http:// + wasValidUrl = ParseUrlL( KHttpUrlAddress, tokenPtr, text.Offset() ); + + if (aFindFixedSchemas && !wasValidUrl) // Search for https:// + wasValidUrl = ParseUrlL( KHttpsUrlAddress, tokenPtr, text.Offset() ); + + if (aFindFixedSchemas && !wasValidUrl) // Search for rtsp:// + wasValidUrl = ParseUrlL( KRtspUrlAddress, tokenPtr, text.Offset() ); + + if ( !wasValidUrl ) // Search for www. + wasValidUrl = ParseUrlL( KWwwUrlAddress, tokenPtr, text.Offset() ); + + if ( !wasValidUrl ) // Search for wap. + wasValidUrl = ParseUrlL( KWapUrlAddress, tokenPtr, text.Offset() ); + + if ( !wasValidUrl ) // Search for IP-address (xxx.xxx.xxx.xxx) + { + if ( tokenPtr.Match( KIPAddress ) != KErrNotFound ) + { + TInt periods = 0; + wasValidUrl = ETrue; + TBool endWithPunctuation = EFalse; + TBool betweenBrackets = EFalse; + + // First see if token ends with ",",".","!","?",";" or ":" + TChar charac = tokenPtr[tokenPtr.Length() - 1]; + TChar charac0 = tokenPtr[0]; + if ( charac == ',' || charac == '.' || + charac == '!' || charac == '?' || + charac == ';' || charac == ':' ) + { + endWithPunctuation = ETrue; + } + // Or if it starts and ends with brackets or quotation marks + else if ( ( charac0 == '(' && charac == ')' ) + || ( charac0 == '"' && charac == '"' ) + || ( charac0 == '[' && charac == ']' ) + || ( charac0 == '<' && charac == '>' ) ) + { + betweenBrackets = ETrue; + } + + TInt i = 0; + TInt tokensEnd = tokenPtr.Length(); + if ( endWithPunctuation ) + tokensEnd--; + else if ( betweenBrackets ) + { + i = 1; + tokensEnd--; + } + + // Take a closer look to see if a valid IP-address + TBuf<3> ipPart; + TInt numbers = 0; + for ( ; i < tokensEnd; i++ ) + { + if ( !( ((TChar)tokenPtr[i]).IsDigit() || tokenPtr[i] == '.' ) ) + { + wasValidUrl = EFalse; + break; + } + + if ( tokenPtr[i] == '.' ) + periods++; + else + numbers++; + + if ( numbers > KNumbersInIpAddress || periods > KDotsInIpAddress ) + { + wasValidUrl = EFalse; + break; + } + + if ( ((TChar)tokenPtr[i]).IsDigit() ) + { + ipPart.Append( tokenPtr[i] ); + TBool checkInt = EFalse; + if ( i + 1 < tokensEnd ) + { + if ( tokenPtr[i+1] == '.' ) + checkInt = ETrue; + } + + if ( i == tokensEnd - 1 || checkInt ) + { + TLex val = ipPart; + TInt numberInt; + TInt error = val.Val( numberInt ); + if ( error != KErrNone || numberInt > 255 ) + { + wasValidUrl = EFalse; + break; + } + + numbers = 0; + ipPart.Delete( 0, ipPart.Length() ); + } + } + } + + if ( wasValidUrl && periods == KDotsInIpAddress ) + { + TInt startPos = text.Offset() - tokenPtr.Length(); + TInt length = tokenPtr.Length(); + // If there was a punctuation at the end or brackets, let's take it/them away + if ( endWithPunctuation || betweenBrackets) + { + length--; + if ( betweenBrackets ) + { + startPos++; + length--; + } + } + + __ASSERT_DEBUG( startPos + length <= aText.Length(), Panic(ETulPanicDescriptorLength) ); + AddItemL( startPos, length, EFindItemSearchURLBin ); + } + } + } + } + + return (iFoundItems->Count() > 0); + } + +/** +Search algorithm for searching generic URIs + +@param aText Text that will be parsed +@return ETrue if any generic URI is found else returns EFalse +@leave KErrNone, if successful; otherwise one of the other system-wide error codes. +*/ +TBool CTulAddressStringTokenizer::SearchGenericUriL( const TDesC& aText ) + { + // Detect generic URI within the token + const TDesC& schemeStartArray = KURISchemeStartCharacters; + const TDesC& schemeBodyArray = KURISchemeBodyCharacters; + const TDesC& schemeTerminatorArray = KURISchemeTerminator; + const TDesC& URIArray = KURICharacters; + + TBool wasValidUri = EFalse; + TLex text = aText; + + while ( !text.Eos() ) + { + // Discard characters until URI scheme terminator is found + while( !(text.Eos()) && schemeTerminatorArray.Locate(text.Peek()) == KErrNotFound ) + text.Inc(); + + // if at end of the text, no legit URI found + if ( !text.Eos() ) + { + // Store the schema end offset (+1 to skip ':') + TInt schemeEndOffset = text.Offset() + 1; + + // Scheme must be at least 1 character long at the beginning of the text to be valid + if ( text.Offset() > 0 ) + { + // Un-get last scheme character to begin examination + text.UnGet(); + + // Rewind until beginning of the URI + while ( text.Offset() > 0 && schemeBodyArray.Locate(text.Peek().GetLowerCase()) != KErrNotFound ) + text.UnGet(); + + // Now text pointer is at first character of the URI + // Do go back through the scheme until a legal beginning character for URI + // is found or back to the (schemeEndOffset - 1) i.e. URI scheme terminator + while ( schemeStartArray.Locate(text.Peek().GetLowerCase()) == KErrNotFound && (text.Offset() + 1) < schemeEndOffset ) + text.Inc(); + + // check if terminated because a valid start character was found when + // scheme terminator was reached. + if ( schemeStartArray.Locate(text.Peek().GetLowerCase()) != KErrNotFound ) + { + // First character is a valid URI char, so the scheme is valid -> + // marks the beginning of the array + text.Mark(); + + // fast forward to the end of the scheme + while( text.Offset() < schemeEndOffset ) + text.Inc(); + + // Get characters until end of schema + while( !(text.Eos()) && URIArray.Locate( text.Peek().GetLowerCase() ) != KErrNotFound ) + text.Inc(); + + // remove certain punctuation from end of the URI, as it is likely + // to be part of the surrounding text. + text.UnGet(); + + //special processing for bracket + //only remove the end bracket if there is no open bracket in the uri + //not counting bracket pairs for efficiency + if (text.Peek()!=')' || text.MarkedToken().Locate(TChar('('))!=-1) + text.Inc(); + + text.UnGet(); + if ( text.Peek() != '.' && text.Peek() != '?' && text.Peek() != ',') + text.Inc(); + + // URI cannot contain only scheme, so check that pointer was increased + // by at least one character + if ( schemeEndOffset != text.Offset() ) + { + // Append found text to item array (it is now known to be + // syntactically valid URI as it contains characters after the scheme) + AddItemL( text.MarkedOffset(), text.Offset() - text.MarkedOffset(), EFindItemSearchScheme ); + wasValidUri = ETrue; + } + } + else // First character of scheme is not legit, fast forward to end of the + // scheme anyway to continue search + { + while( text.Offset() < schemeEndOffset ) + text.Inc(); + } + } + else + text.Inc(); + } + } + + return wasValidUri; + } + +// --------------------------------------------------------- +// Position and count methods +// --------------------------------------------------------- + +/** +Gets the number of items in the found items array. + +@return the number of items in the found items array. +*/ +EXPORT_C TInt CTulAddressStringTokenizer::ItemCount() const + { + return (iFoundItems ? iFoundItems->Count() : 0); + } + +/** +Gets the current position (or the position of the currently selected item) +in the found items array. + +@return the current position in the found items array of the +CTulAddressStringTokenizer instance. If no items are in the array, zero is returned. +*/ +EXPORT_C TInt CTulAddressStringTokenizer::Position() const + { + return iPosition; + } + +/** +Resets the position in item array to zero (beginning of the array). +*/ +EXPORT_C void CTulAddressStringTokenizer::ResetPosition() + { + iPosition = 0; + } + +// --------------------------------------------------------- +// GetItem methods +// --------------------------------------------------------- + +/** +Gets the array of found items. Returns a constant pointer to the +found items array of the CTulAddressStringTokenizer instance. The items cannot +be modified through this pointer, only accessed. The ownership of +the array stays with CTulAddressStringTokenizer. + +@return a constant pointer to the array of found items. Ownership +stays with CTulAddressStringTokenizer. +*/ +EXPORT_C const CArrayFixFlat* CTulAddressStringTokenizer::ItemArray() const + { + return iFoundItems; + } + +/** +Gets the currently 'selected' item in the array of found items. + +@param aItem contains the currently selected item after returning. +@return ETrue if the item was found. EFalse if the item wasn't found. +*/ +EXPORT_C TBool CTulAddressStringTokenizer::Item( SFoundItem& aItem ) const + { // Return EFalse if no items were found + if ( iFoundItems->Count() <= 0 ) + { + aItem.iStartPos = 0; + aItem.iLength = 0; + aItem.iItemType = EFindItemSearchPhoneNumberBin; + return EFalse; + } + + aItem = iFoundItems->At( iPosition ); + return ETrue; + } + +/** +Gets the next found item relative to the currently selected item. +Moves the selection to point to the next item in the array of +found items. + +@param aItem contains the next item after returning. +@return ETrue if the item was found. EFalse if there's no next item. +*/ +EXPORT_C TBool CTulAddressStringTokenizer::NextItem( SFoundItem& aItem ) + { + if (iFoundItems->Count() <= (iPosition + 1)) + return EFalse; + + iPosition++; + aItem = iFoundItems->At( iPosition ); + return ETrue; + } + +/** +Gets the previous found item relative to the currently selected +item. Moves the selection to point to the previous item in the +array of found items.. + +@param aItem contains the previous item after returning. +@return ETrue if the item was found. EFalse if there's no previous item. +*/ +EXPORT_C TBool CTulAddressStringTokenizer::PrevItem( SFoundItem& aItem ) + { + if ( iPosition <= 0 ) + return EFalse; + + iPosition--; + aItem = iFoundItems->At( iPosition ); + return ETrue; + } + +/** +Adds item to search arrays. Adding is done so that arrays are always sorted. +If added element would overlap a previously found element, it is not added. + +@param aStartPos Start position of the found item +@param aLength Length of found item +@param aType Type of the found item +@leave KErrNone, if successful; otherwise one of the other system-wide error codes. +*/ +void CTulAddressStringTokenizer::AddItemL(TInt aStartPos, TInt aLength, TTokenizerSearchCase aType ) + { + // Create item element + SFoundItem foundItem; + foundItem.iStartPos = aStartPos; + foundItem.iLength = aLength; + foundItem.iItemType = aType; + + // Add item data to arrays + TKeyArrayFix insertKey(0, ECmpTInt); + iFoundItems->InsertIsqAllowDuplicatesL(foundItem, insertKey); + } + +// End of File