diff -r 000000000000 -r 671dee74050a searchengine/util/cpixtools/inc/public/cpixstrtools.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/searchengine/util/cpixtools/inc/public/cpixstrtools.h Mon Apr 19 14:40:16 2010 +0300 @@ -0,0 +1,850 @@ +/* +* Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies). +* All rights reserved. +* This component and the accompanying materials are made available +* under the terms of "Eclipse Public License v1.0" +* which accompanies this distribution, and is available +* at the URL "http://www.eclipse.org/legal/epl-v10.html". +* +* Initial Contributors: +* Nokia Corporation - initial contribution. +* +* Contributors: +* +* Description: +* +*/ +#ifndef CPIXTOOLS_CPIXSTRTOOLS_H_ +#define CPIXTOOLS_CPIXSTRTOOLS_H_ + +#include +#include + +namespace Cpt +{ + + + /** + * Breaks down a string on certain delimiters. + * + * @param orig the original string + * + * @param delimiters the zero terminated string containing the + * delimiter characters + * + * @param target the list to put the substrings into + */ + void splitstring(const char * orig, + const char * delimiters, + std::list & target); + + + /** + * @param first the first string - this should be the substring of + * the second or equal to it + * + * @param second the second string + * + * @return true if the first string is the substring of the second + * or equal to it, false otherwise + */ + template + bool issubstroforequal(const CTYPE * first, + const CTYPE * second) + { + for ( ; + *first != static_cast(0) + && *second != static_cast(0) + && *first == *second; + ++first, ++second) + ; + + return *first == static_cast(0); + } + + + /** + * Poor man's substitute to OpenC's leaking printf("%f") formatting + * function, because printf float formatting leak memory. + * + * NOTE: Last number WILL NOT be rounded properly. It will + * be rounded up, when it should be rounded to closest. + */ + int wsnprintdouble(wchar_t* buf, size_t n, double number, int decimals); + + + + /** + * Exception class to tell about conversion failures. + */ + class ConversionExc : public std::exception + { + private: + // + // private members + // + + std::string what_; + + public: + // + // public operators + // + + /** + * Overriden from std::exception + */ + virtual const char * what() const throw(); + + + // + // Lifecycle management + // + + /** + * Constructs this exception with printf style format string + * and potential detail arguments. + */ + ConversionExc(const char * format, + ...); + }; + + + + /** + * A very simple lifetime mgmt class for arrays. (std::auto_ptr + * should not be used on C-strings or arrays of a type, as it uses + * delete and not delete[] to clean up. + */ + template + class auto_array + { + private: + // + // private members + // + T * p_; + + + // no value semantics + auto_array(const auto_array &); + auto_array & operator=(const auto_array &); + + public: + // + // public operators + // + + + /** + * Gives up ownership of stored array to the caller. + * + * @returns the pointer to the array - the caller will have to + * manage it from now on. + */ + T * release(); + + + /** + * Resets the array owned by this, any old arrays are + * discarded. + */ + void reset(T * p); + + + /** + * @returns the stored array - ownership is not transferred + */ + T * get(); + + + /** + * Default constructor + */ + auto_array(); + + + /** + * Constructor taking ownership of given array pointer by p. + */ + auto_array(T * p); + + + + /** + * Named constant for the default (auto) case for string + * length parameter in the conversion constructor. + * + * The value should be max of size_t, and unfortunately + * std::numeric_limits::max() is no use here as it is + * a function providing a value at runtime and not a constant + * expression you can use at compile time. So much about the + * usefulness of numeric_limits. So, using the fact that + * size_t is always an unsigned type, we are going to use -1 + * cast to size_t as MAX. Bit obscure but should provide the + * numerical value for the maximal unsigned number size_t can + * represent. + */ + enum { AUTO_STRLEN = static_cast(-1) }; + + + /** + * Constructor for the case when both T and CHAR are character + * types but they are not the same (wchar_t, char). + * + * In this case, auto_array will behave as a simple string + * class that was initialized by copying and converting an + * original string of different character type. + * + * @param src a c-style string (char or wchar_t). + * + * @param length if not AUTO_STRLEN (default value), then + * min(length, STRLEN(src)) amount of characters will be + * copied to this auto_array instance. STRLEN(src) means + * either strlen(src) or wcslen(src) depending on whether CHAR + * is plain or wide character. + * + * NOTE: T has to be also char or wchar_t. + * + * NOTE: DO NOT attempt to use this constructor in the case + * where the types T and CHAR are the same. In that case, the + * overloaded constructor will be applied (cf above) by the + * C++ compiler and that has completely different ownership + * semantics: it does not copy (and convert), it takes + * ownership of given string! + * + * @throws ConversionExc if conversion fails for some reason + * (invalid byte sequences etc) + */ + template + explicit auto_array(const CHAR * src, + size_t length = AUTO_STRLEN); + + + + /** + * Destructor + */ + ~auto_array(); + }; + + + /** + * @return the used dynamic memory in bytes. + */ + uint32_t getUsedDynamicMemory(); + + + + /******************************************************* + * + * Conversion utilities (wide string -> value types) + * + */ + + /** + * Attempts to convert the given wide string to a value. For + * integer types, this will be equivalent to wconvertInteger call. + * + * @param to the pointer to the value to put the results, must not + * be NULL + * + * @param fromStr wide string, must not be NULL, and it must + * contain only the string representation for the value, nothing + * more. Failure to eat all of the string and put it to the value + * shall result in error. + * + * @return NULL if successful, and some string about what sort of + * value/formatting was attempted on failure. + */ + template + const char * wconvert(V * to, + const wchar_t * fromStr); + + + /** + * Attempts to convert a given wide string to an integer type + * (char, short, integer, long, long long). The string is + * interpreted in decimal notation. + * + * @param to the pointer to the value to put the results, must not + * be NULL + * + * @param fromStr wide string, must not be NULL, and it must + * contain only the string representation for the value, nothing + * more. Failure to eat all of the string and put it to the value + * shall result in error. + * + * @return NULL if successful, and some string about what sort of + * value/formatting was attempted on failure. + */ + template + const char * wconvertDecimal(INT * to, + const wchar_t * fromStr); + + + /** + * Attempts to convert a given wide string to an integer type + * (char, short, integer, long, long long). The string is + * interpreted generally: 0x prefix will cause hexadecimal + * interpretation, 0 prefix octal and otherwise decimal base will + * be used. + * + * @param to the pointer to the value to put the results, must not + * be NULL + * + * @param fromStr wide string, must not be NULL, and it must + * contain only the string representation for the value, nothing + * more. Failure to eat all of the string and put it to the value + * shall result in error. + * + * @return NULL if successful, and some string about what sort of + * value/formatting was attempted on failure. + */ + template + const char * wconvertInteger(INT * to, + const wchar_t * fromStr); + + + /** + * Attempts to convert a given wide string to an unsigned integer + * type. Octal base is used. + * + * @param to the pointer to the value to put the results, must not + * be NULL + * + * @param fromStr wide string, must not be NULL, and it must + * contain only the string representation for the value, nothing + * more. Failure to eat all of the string and put it to the value + * shall result in error. + * + * @return NULL if successful, and some string about what sort of + * value/formatting was attempted on failure. + */ + template + const char * wconvertOctal(INT * to, + const wchar_t * fromStr); + + + /** + * Attempts to convert a given wide string to an unsigned integer + * type. Decimal base is used. + * + * @param to the pointer to the value to put the results, must not + * be NULL + * + * @param fromStr wide string, must not be NULL, and it must + * contain only the string representation for the value, nothing + * more. Failure to eat all of the string and put it to the value + * shall result in error. + * + * @return NULL if successful, and some string about what sort of + * value/formatting was attempted on failure. + */ + template + const char * wconvertUnsignedInteger(INT * to, + const wchar_t * fromStr); + + /** + * Attempts to convert a given wide string to an unsigned integer + * type. Hexadecimal base is used. + * + * @param to the pointer to the value to put the results, must not + * be NULL + * + * @param fromStr wide string, must not be NULL, and it must + * contain only the string representation for the value, nothing + * more. Failure to eat all of the string and put it to the value + * shall result in error. + * + * @return NULL if successful, and some string about what sort of + * value/formatting was attempted on failure. + */ + template + const char * wconvertHexadecimal(INT * to, + const wchar_t * fromStr); + +} + + + + +/************************************************************** + * + * IMPLEMENTATION OF TEMPLATES + * + */ +namespace Cpt +{ + + ////////////////////////////////////////////////////////// + // + // auto_array + // + template + T * auto_array::release() + { + T + * rv = p_; + + p_ = NULL; + + return rv; + } + + + template + void auto_array::reset(T * p) + { + if (p_ != p) + { + delete[] p_; + p_ = p; + } + } + + + template + T * auto_array::get() + { + return p_; + } + + + template + auto_array::auto_array() + : p_(NULL) + { + ; + } + + + template + auto_array::auto_array(T * p) + : p_(p) + { + ; + } + + + // + // namespace for auto_array copy-and-convert-string ctor impl + // details + namespace Impl + { + template + struct AssertCharType + { + ~AssertCharType() + { + // For non-character types, we don't support the + // special auto_array templated conversion + // constructor. For decent compilers, a compile time + // error can be generated, for non-decent compilers + // like the old ARMCC version we use, we have to do + // with a runtime exception generated. +#ifdef __ARMCC__ + throw ConversionExc("PANIC auto_array conversion constructor is used for non-char type %s", + typeid(T).name()); +#else + compile_error(Template_type_T_is_not_a_character_type); +#endif + } + }; + + + template<> + struct AssertCharType + { + ~AssertCharType() + { + ; + } + }; + + + template<> + struct AssertCharType + { + ~AssertCharType() + { + ; + } + }; + + + /** + * This template class (or rather, some of its specializations + * for enabled types) will create a new buffer, copy an + * original string into it converting. + * + * Allocation happens with new[] (you need to use delete[]). + */ + template + struct CopyConverter + { + }; + + + /** + * The OpenC wcstombs is impossible to use correctly AND + * effectively (RAM consumptionwise) at the same time. It does + * not tell how much original wchar_t-s it has consumed, and + * it will not put a terminating zero at the end if there is + * not place left. So we need our own (non-trivial) logic. + * + * Copies and converts the whole src string, and sets the dst + * to the newly acquired buffer. + * + * @throws ConversionExc + */ + void ProperWcsToMbs(char * & dst, + const wchar_t * src, + size_t length); + + + /** + * Copies and converts the whole src string, and sets the dst + * to the newly acquired buffer. + * + * @throws ConversionExc + */ + void ProperMbsToWcs(wchar_t * & dst, + const char * src, + size_t length); + + + template<> + struct CopyConverter + { + static void CopyConvert(char * & dst, + const wchar_t * src, + size_t length) + { + ProperWcsToMbs(dst, + src, + length); + } + }; + + + template<> + struct CopyConverter + { + static void CopyConvert(wchar_t * & dst, + const char * src, + size_t length) + { + ProperMbsToWcs(dst, + src, + length); + } + }; + + } // ns + + + + template + template + auto_array::auto_array(const CHAR * src, + size_t length) + : p_(NULL) + { + Impl::AssertCharType + a1; + Impl::AssertCharType + a2; + + Impl::CopyConverter::CopyConvert(p_, + src, + length); + } + + + + template + auto_array::~auto_array() + { + reset(NULL); + } + + + ////////////////////////////////////////////////////////// + // + // conversion functions + // + + + /** + * Implementation detail struct storing the conversion string for + * scanf type of functions and the displayname of the type being + * converted to (for error messages). + */ + struct ValueType + { + const wchar_t * formatStr_; + const char * displayName_; + }; + + + /** + * Table of value-type structs specifying the conversion format + * strings and display names. + */ + extern const ValueType ValueTypes[]; + + + /************ + * + * The table ValueTypes contains format specifications, but for a + * typename-inputformat pair (V,INPUTFORMAT) we have to know what + * is the index in the ValueTypes table where we can find the + * format string and the display name for that tuple. + * + * For instance, format string for floats and int will look + * different, as well well format strings for integers in decimal + * notations and integers in hexadecimal notations will differ. + */ + template + struct ValueTypeIndex + { + // If during compilation, you find that there is no IDX + // member, then it means that you are attempting to convert a + // value type - inputformat tuple that is not supported. + // + // There are two cases. + // + // It can be so, that you tuple (V,INPUTFORMAT) does not make + // any sense. For instance, scanf expects unsigned integers + // for hecadecimal strings, so you can't dream to put there + // signed ints or floats. + // + // The other case is that the conversion is okay, but it is + // just not enabled / supported yet. Steps for enabling: + // + // 1 Go to definition of valuetypes in cpixstrtools.cpp, and + // add a new entry with the proper scanf format and + // display name. NOTE: there will be two conversion + // specifiers, first the ones you define and the second a + // mandatory %n. + // + // 2 Define a template specialication below for your tuple + // and define the IDX enum value to be the index of your + // new entry in the ValueTypes table. + // + // enum { IDX = ??? }; + }; + + + /** + * In the case of integer value types (char, short, int, long, + * long long, and their unsigned counterpart), there are several + * formats the string can use, these can be controlled with the + * enums here. + * + * Most types will just used the default (INPUTFORMAT_DEFAULT). + */ + enum InputFormat + { + // default, for integer types it is equivalent to + // INPUTFORMAT_INTEGER + INPUTFORMAT_DEFAULT = 0, + + // target variable is signed + INPUTFORMAT_DECIMAL = 1, + + // target variable is signed + INPUTFORMAT_INTEGER = 2, + + // target variable is unsigned + INPUTFORMAT_OCTAL = 3, + + // target variable is unsigned + INPUTFORMAT_UINTEGER = 4, + + // target variable is unsigned + INPUTFORMAT_HEXADECIMAL = 5 + }; + + + template<> + struct ValueTypeIndex + { + enum { IDX = 0 }; + }; + + template<> + struct ValueTypeIndex + { + enum { IDX = 1 }; + }; + + template<> + struct ValueTypeIndex + { + enum { IDX = 0 }; + }; + + template<> + struct ValueTypeIndex + { + enum { IDX = 2 }; + }; + + template<> + struct ValueTypeIndex + { + enum { IDX = 3 }; + }; + + template<> + struct ValueTypeIndex + { + enum { IDX = 4 }; + }; + + + template<> + struct ValueTypeIndex + { + enum { IDX = 5 }; + }; + + template<> + struct ValueTypeIndex + { + enum { IDX = 6 }; + }; + + + + /** + * Attempts to convert the given wide string to the value. This is + * the common workhorse function for all conversions. + * + * The template parameter INPUTFORMAT can be one of the + * InputFormat enums. Mostly, you want to use INPUTFORMAT_DEFAULT + * (cf overloaded convert function). If you are dealing with + * integers and special formatted values (octal, hex, etc ...), + * you can use the enum values above. + * + * NOTE: some of the input enum values imply signed, other imply + * unsigned value. Trying to use the wrong type of variable for + * 'to' will result in compilation error. + * + * @param to the pointer to the value to put the results, must not + * be NULL + * + * @param wide string, must not be NULL, and it must contain only + * the string representation for the value, nothing more. Failure + * to eat all of the string and put it to the value shall result + * in error. + * + * @return NULL if successful, and some string about what sort of + * value/formatting was attempted on failure. + */ + template + const char * wconvert(V * to, + const wchar_t * fromStr) + { + const char + * rv = NULL; + + int + valueTypeIndex = ValueTypeIndex::IDX; + + const ValueType + & valueType = ValueTypes[valueTypeIndex]; + + int + charsRead = 0; + int + result = swscanf(fromStr, + valueType.formatStr_, + to, + &charsRead); + + if (result != 1 + || charsRead != (*wcslen)(fromStr)) + { + rv = valueType.displayName_; + } + + return rv; + } + + + template + const char * wconvert(V * to, + const wchar_t * fromStr) + { + return wconvert(to, + fromStr); + } + + + template + const char * wconvertDecimal(INT * to, + const wchar_t * fromStr) + { + return wconvert(to, + fromStr); + } + + + template + const char * wconvertInteger(INT * to, + const wchar_t * fromStr) + { + return wconvert(to, + fromStr); + } + + + template + const char * wconvertOctal(INT * to, + const wchar_t * fromStr) + { + return wconvert(to, + fromStr); + } + + + template + const char * wconvertUnsignedInteger(INT * to, + const wchar_t * fromStr) + { + return wconvert(to, + fromStr); + } + + template + const char * wconvertHexadecimal(INT * to, + const wchar_t * fromStr) + { + return wconvert(to, + fromStr); + } + + + +} + + +#endif + + + +