JavaScriptCore/icu/unicode/ucol.h
changeset 0 4f2f89ce4247
equal deleted inserted replaced
-1:000000000000 0:4f2f89ce4247
       
     1 /*
       
     2 *******************************************************************************
       
     3 * Copyright (c) 1996-2005, International Business Machines Corporation and others.
       
     4 * All Rights Reserved.
       
     5 *******************************************************************************
       
     6 */
       
     7 
       
     8 #ifndef UCOL_H
       
     9 #define UCOL_H
       
    10 
       
    11 #include "unicode/utypes.h"
       
    12 
       
    13 #if !UCONFIG_NO_COLLATION
       
    14 
       
    15 #include "unicode/unorm.h"
       
    16 #include "unicode/parseerr.h"
       
    17 #include "unicode/uloc.h"
       
    18 #include "unicode/uset.h"
       
    19 
       
    20 /**
       
    21  * \file
       
    22  * \brief C API: Collator 
       
    23  *
       
    24  * <h2> Collator C API </h2>
       
    25  *
       
    26  * The C API for Collator performs locale-sensitive
       
    27  * string comparison. You use this service to build
       
    28  * searching and sorting routines for natural language text.
       
    29  * <em>Important: </em>The ICU collation service has been reimplemented 
       
    30  * in order to achieve better performance and UCA compliance. 
       
    31  * For details, see the 
       
    32  * <a href="http://icu.sourceforge.net/cvs/icu/~checkout~/icuhtml/design/collation/ICU_collation_design.htm">
       
    33  * collation design document</a>.
       
    34  * <p>
       
    35  * For more information about the collation service see 
       
    36  * <a href="http://icu.sourceforge.net/icu/userguide/Collate_Intro.html">the users guide</a>.
       
    37  * <p>
       
    38  * Collation service provides correct sorting orders for most locales supported in ICU. 
       
    39  * If specific data for a locale is not available, the orders eventually falls back
       
    40  * to the <a href="http://www.unicode.org/unicode/reports/tr10/">UCA sort order</a>. 
       
    41  * <p>
       
    42  * Sort ordering may be customized by providing your own set of rules. For more on
       
    43  * this subject see the 
       
    44  * <a href="http://icu.sourceforge.net/icu/userguide/Collate_Customization.html">
       
    45  * Collation customization</a> section of the users guide.
       
    46  * <p>
       
    47  * @see         UCollationResult
       
    48  * @see         UNormalizationMode
       
    49  * @see         UCollationStrength
       
    50  * @see         UCollationElements
       
    51  */
       
    52 
       
    53 /** A collation element iterator.
       
    54 *  For usage in C programs.
       
    55 */
       
    56 struct collIterate;
       
    57 /** structure representing collation element iterator instance 
       
    58  * @stable ICU 2.0
       
    59  */
       
    60 typedef struct collIterate collIterate;
       
    61 
       
    62 /** A collator.
       
    63 *  For usage in C programs.
       
    64 */
       
    65 struct UCollator;
       
    66 /** structure representing a collator object instance 
       
    67  * @stable ICU 2.0
       
    68  */
       
    69 typedef struct UCollator UCollator;
       
    70 
       
    71 
       
    72 /**
       
    73  * UCOL_LESS is returned if source string is compared to be less than target
       
    74  * string in the u_strcoll() method.
       
    75  * UCOL_EQUAL is returned if source string is compared to be equal to target
       
    76  * string in the u_strcoll() method.
       
    77  * UCOL_GREATER is returned if source string is compared to be greater than
       
    78  * target string in the u_strcoll() method.
       
    79  * @see u_strcoll()
       
    80  * <p>
       
    81  * Possible values for a comparison result 
       
    82  * @stable ICU 2.0
       
    83  */
       
    84 typedef enum {
       
    85   /** string a == string b */
       
    86   UCOL_EQUAL    = 0,
       
    87   /** string a > string b */
       
    88   UCOL_GREATER    = 1,
       
    89   /** string a < string b */
       
    90   UCOL_LESS    = -1
       
    91 } UCollationResult ;
       
    92 
       
    93 
       
    94 /** Enum containing attribute values for controling collation behavior.
       
    95  * Here are all the allowable values. Not every attribute can take every value. The only
       
    96  * universal value is UCOL_DEFAULT, which resets the attribute value to the predefined  
       
    97  * value for that locale 
       
    98  * @stable ICU 2.0
       
    99  */
       
   100 typedef enum {
       
   101   /** accepted by most attributes */
       
   102   UCOL_DEFAULT = -1,
       
   103 
       
   104   /** Primary collation strength */
       
   105   UCOL_PRIMARY = 0,
       
   106   /** Secondary collation strength */
       
   107   UCOL_SECONDARY = 1,
       
   108   /** Tertiary collation strength */
       
   109   UCOL_TERTIARY = 2,
       
   110   /** Default collation strength */
       
   111   UCOL_DEFAULT_STRENGTH = UCOL_TERTIARY,
       
   112   UCOL_CE_STRENGTH_LIMIT,
       
   113   /** Quaternary collation strength */
       
   114   UCOL_QUATERNARY=3,
       
   115   /** Identical collation strength */
       
   116   UCOL_IDENTICAL=15,
       
   117   UCOL_STRENGTH_LIMIT,
       
   118 
       
   119   /** Turn the feature off - works for UCOL_FRENCH_COLLATION, 
       
   120       UCOL_CASE_LEVEL, UCOL_HIRAGANA_QUATERNARY_MODE
       
   121       & UCOL_DECOMPOSITION_MODE*/
       
   122   UCOL_OFF = 16,
       
   123   /** Turn the feature on - works for UCOL_FRENCH_COLLATION, 
       
   124       UCOL_CASE_LEVEL, UCOL_HIRAGANA_QUATERNARY_MODE
       
   125       & UCOL_DECOMPOSITION_MODE*/
       
   126   UCOL_ON = 17,
       
   127   
       
   128   /** Valid for UCOL_ALTERNATE_HANDLING. Alternate handling will be shifted */
       
   129   UCOL_SHIFTED = 20,
       
   130   /** Valid for UCOL_ALTERNATE_HANDLING. Alternate handling will be non ignorable */
       
   131   UCOL_NON_IGNORABLE = 21,
       
   132 
       
   133   /** Valid for UCOL_CASE_FIRST - 
       
   134       lower case sorts before upper case */
       
   135   UCOL_LOWER_FIRST = 24,
       
   136   /** upper case sorts before lower case */
       
   137   UCOL_UPPER_FIRST = 25,
       
   138 
       
   139   UCOL_ATTRIBUTE_VALUE_COUNT
       
   140 
       
   141 } UColAttributeValue;
       
   142 
       
   143 /**
       
   144  * Base letter represents a primary difference.  Set comparison
       
   145  * level to UCOL_PRIMARY to ignore secondary and tertiary differences.
       
   146  * Use this to set the strength of a Collator object.
       
   147  * Example of primary difference, "abc" &lt; "abd"
       
   148  * 
       
   149  * Diacritical differences on the same base letter represent a secondary
       
   150  * difference.  Set comparison level to UCOL_SECONDARY to ignore tertiary
       
   151  * differences. Use this to set the strength of a Collator object.
       
   152  * Example of secondary difference, "ä" >> "a".
       
   153  *
       
   154  * Uppercase and lowercase versions of the same character represents a
       
   155  * tertiary difference.  Set comparison level to UCOL_TERTIARY to include
       
   156  * all comparison differences. Use this to set the strength of a Collator
       
   157  * object.
       
   158  * Example of tertiary difference, "abc" &lt;&lt;&lt; "ABC".
       
   159  *
       
   160  * Two characters are considered "identical" when they have the same
       
   161  * unicode spellings.  UCOL_IDENTICAL.
       
   162  * For example, "ä" == "ä".
       
   163  *
       
   164  * UCollationStrength is also used to determine the strength of sort keys 
       
   165  * generated from UCollator objects
       
   166  * These values can be now found in the UColAttributeValue enum.
       
   167  * @stable ICU 2.0
       
   168  **/
       
   169 typedef UColAttributeValue UCollationStrength;
       
   170 
       
   171 /** Attributes that collation service understands. All the attributes can take UCOL_DEFAULT
       
   172  * value, as well as the values specific to each one. 
       
   173  * @stable ICU 2.0
       
   174  */
       
   175 typedef enum {
       
   176      /** Attribute for direction of secondary weights - used in French.\ 
       
   177       * Acceptable values are UCOL_ON, which results in secondary weights
       
   178       * being considered backwards and UCOL_OFF which treats secondary
       
   179       * weights in the order they appear.*/
       
   180      UCOL_FRENCH_COLLATION, 
       
   181      /** Attribute for handling variable elements.\ 
       
   182       * Acceptable values are UCOL_NON_IGNORABLE (default)
       
   183       * which treats all the codepoints with non-ignorable 
       
   184       * primary weights in the same way,
       
   185       * and UCOL_SHIFTED which causes codepoints with primary 
       
   186       * weights that are equal or below the variable top value
       
   187       * to be ignored on primary level and moved to the quaternary 
       
   188       * level.*/
       
   189      UCOL_ALTERNATE_HANDLING, 
       
   190      /** Controls the ordering of upper and lower case letters.\ 
       
   191       * Acceptable values are UCOL_OFF (default), which orders
       
   192       * upper and lower case letters in accordance to their tertiary
       
   193       * weights, UCOL_UPPER_FIRST which forces upper case letters to 
       
   194       * sort before lower case letters, and UCOL_LOWER_FIRST which does 
       
   195       * the opposite. */
       
   196      UCOL_CASE_FIRST, 
       
   197      /** Controls whether an extra case level (positioned before the third
       
   198       * level) is generated or not.\ Acceptable values are UCOL_OFF (default), 
       
   199       * when case level is not generated, and UCOL_ON which causes the case
       
   200       * level to be generated.\ Contents of the case level are affected by
       
   201       * the value of UCOL_CASE_FIRST attribute.\ A simple way to ignore 
       
   202       * accent differences in a string is to set the strength to UCOL_PRIMARY
       
   203       * and enable case level. */
       
   204      UCOL_CASE_LEVEL,
       
   205      /** Controls whether the normalization check and necessary normalizations
       
   206       * are performed.\ When set to UCOL_OFF (default) no normalization check
       
   207       * is performed.\ The correctness of the result is guaranteed only if the 
       
   208       * input data is in so-called FCD form (see users manual for more info).\ 
       
   209       * When set to UCOL_ON, an incremental check is performed to see whether the input data
       
   210       * is in the FCD form.\ If the data is not in the FCD form, incremental 
       
   211       * NFD normalization is performed. */
       
   212      UCOL_NORMALIZATION_MODE, 
       
   213      /** An alias for UCOL_NORMALIZATION_MODE attribute */
       
   214      UCOL_DECOMPOSITION_MODE = UCOL_NORMALIZATION_MODE,
       
   215      /** The strength attribute.\ Can be either UCOL_PRIMARY, UCOL_SECONDARY,
       
   216       * UCOL_TERTIARY, UCOL_QUATERNARY or UCOL_IDENTICAL.\ The usual strength
       
   217       * for most locales (except Japanese) is tertiary.\ Quaternary strength 
       
   218       * is useful when combined with shifted setting for alternate handling
       
   219       * attribute and for JIS x 4061 collation, when it is used to distinguish
       
   220       * between Katakana  and Hiragana (this is achieved by setting the 
       
   221       * UCOL_HIRAGANA_QUATERNARY mode to on.\ Otherwise, quaternary level
       
   222       * is affected only by the number of non ignorable code points in
       
   223       * the string.\ Identical strength is rarely useful, as it amounts 
       
   224       * to codepoints of the NFD form of the string. */
       
   225      UCOL_STRENGTH,  
       
   226      /** when turned on, this attribute 
       
   227       * positions Hiragana before all  
       
   228       * non-ignorables on quaternary level
       
   229       * This is a sneaky way to produce JIS
       
   230       * sort order */     
       
   231      UCOL_HIRAGANA_QUATERNARY_MODE,
       
   232      /** when turned on, this attribute 
       
   233       * generates a collation key
       
   234       * for the numeric value of substrings
       
   235       * of digits. This is a way to get '100' 
       
   236       * to sort AFTER '2'.*/          
       
   237      UCOL_NUMERIC_COLLATION, 
       
   238      UCOL_ATTRIBUTE_COUNT
       
   239 } UColAttribute;
       
   240 
       
   241 /** Options for retrieving the rule string 
       
   242  *  @stable ICU 2.0
       
   243  */
       
   244 typedef enum {
       
   245   /** Retrieve tailoring only */
       
   246   UCOL_TAILORING_ONLY, 
       
   247   /** Retrieve UCA rules and tailoring */
       
   248   UCOL_FULL_RULES 
       
   249 } UColRuleOption ;
       
   250 
       
   251 /**
       
   252  * Open a UCollator for comparing strings.
       
   253  * The UCollator pointer is used in all the calls to the Collation 
       
   254  * service. After finished, collator must be disposed of by calling
       
   255  * {@link #ucol_close }.
       
   256  * @param loc The locale containing the required collation rules. 
       
   257  *            Special values for locales can be passed in - 
       
   258  *            if NULL is passed for the locale, the default locale
       
   259  *            collation rules will be used. If empty string ("") or
       
   260  *            "root" are passed, UCA rules will be used.
       
   261  * @param status A pointer to an UErrorCode to receive any errors
       
   262  * @return A pointer to a UCollator, or 0 if an error occurred.
       
   263  * @see ucol_openRules
       
   264  * @see ucol_safeClone
       
   265  * @see ucol_close
       
   266  * @stable ICU 2.0
       
   267  */
       
   268 U_STABLE UCollator* U_EXPORT2 
       
   269 ucol_open(const char *loc, UErrorCode *status);
       
   270 
       
   271 /**
       
   272  * Produce an UCollator instance according to the rules supplied.
       
   273  * The rules are used to change the default ordering, defined in the
       
   274  * UCA in a process called tailoring. The resulting UCollator pointer
       
   275  * can be used in the same way as the one obtained by {@link #ucol_strcoll }.
       
   276  * @param rules A string describing the collation rules. For the syntax
       
   277  *              of the rules please see users guide.
       
   278  * @param rulesLength The length of rules, or -1 if null-terminated.
       
   279  * @param normalizationMode The normalization mode: One of
       
   280  *             UCOL_OFF     (expect the text to not need normalization),
       
   281  *             UCOL_ON      (normalize), or
       
   282  *             UCOL_DEFAULT (set the mode according to the rules)
       
   283  * @param strength The default collation strength; one of UCOL_PRIMARY, UCOL_SECONDARY,
       
   284  * UCOL_TERTIARY, UCOL_IDENTICAL,UCOL_DEFAULT_STRENGTH - can be also set in the rules.
       
   285  * @param parseError  A pointer to UParseError to recieve information about errors
       
   286  *                    occurred during parsing. This argument can currently be set
       
   287  *                    to NULL, but at users own risk. Please provide a real structure.
       
   288  * @param status A pointer to an UErrorCode to receive any errors
       
   289  * @return A pointer to a UCollator.\ It is not guaranteed that NULL be returned in case
       
   290  *         of error - please use status argument to check for errors.
       
   291  * @see ucol_open
       
   292  * @see ucol_safeClone
       
   293  * @see ucol_close
       
   294  * @stable ICU 2.0
       
   295  */
       
   296 U_STABLE UCollator* U_EXPORT2 
       
   297 ucol_openRules( const UChar        *rules,
       
   298                 int32_t            rulesLength,
       
   299                 UColAttributeValue normalizationMode,
       
   300                 UCollationStrength strength,
       
   301                 UParseError        *parseError,
       
   302                 UErrorCode         *status);
       
   303 
       
   304 /** 
       
   305  * Open a collator defined by a short form string.
       
   306  * The structure and the syntax of the string is defined in the "Naming collators"
       
   307  * section of the users guide: 
       
   308  * http://icu.sourceforge.net/icu/userguide/Collate_Concepts.html#Naming_Collators
       
   309  * Attributes are overriden by the subsequent attributes. So, for "S2_S3", final
       
   310  * strength will be 3. 3066bis locale overrides individual locale parts.
       
   311  * The call to this function is equivalent to a call to ucol_open, followed by a 
       
   312  * series of calls to ucol_setAttribute and ucol_setVariableTop.
       
   313  * @param definition A short string containing a locale and a set of attributes. 
       
   314  *                   Attributes not explicitly mentioned are left at the default
       
   315  *                   state for a locale.
       
   316  * @param parseError if not NULL, structure that will get filled with error's pre
       
   317  *                   and post context in case of error.
       
   318  * @param forceDefaults if FALSE, the settings that are the same as the collator 
       
   319  *                   default settings will not be applied (for example, setting
       
   320  *                   French secondary on a French collator would not be executed). 
       
   321  *                   If TRUE, all the settings will be applied regardless of the 
       
   322  *                   collator default value. If the definition
       
   323  *                   strings are to be cached, should be set to FALSE.
       
   324  * @param status     Error code. Apart from regular error conditions connected to 
       
   325  *                   instantiating collators (like out of memory or similar), this
       
   326  *                   API will return an error if an invalid attribute or attribute/value
       
   327  *                   combination is specified.
       
   328  * @return           A pointer to a UCollator or 0 if an error occured (including an 
       
   329  *                   invalid attribute).
       
   330  * @see ucol_open
       
   331  * @see ucol_setAttribute
       
   332  * @see ucol_setVariableTop
       
   333  * @see ucol_getShortDefinitionString
       
   334  * @see ucol_normalizeShortDefinitionString
       
   335  * @draft ICU 3.0
       
   336  *
       
   337  */
       
   338 U_CAPI UCollator* U_EXPORT2
       
   339 ucol_openFromShortString( const char *definition,
       
   340                           UBool forceDefaults,
       
   341                           UParseError *parseError,
       
   342                           UErrorCode *status);
       
   343 
       
   344 /**
       
   345  * Get a set containing the contractions defined by the collator. The set includes
       
   346  * both the UCA contractions and the contractions defined by the collator. This set
       
   347  * will contain only strings. If a tailoring explicitly suppresses contractions from 
       
   348  * the UCA (like Russian), removed contractions will not be in the resulting set.
       
   349  * @param coll collator 
       
   350  * @param conts the set to hold the result. It gets emptied before
       
   351  *              contractions are added. 
       
   352  * @param status to hold the error code
       
   353  * @return the size of the contraction set
       
   354  *
       
   355  * @draft ICU 3.0
       
   356  */
       
   357 U_CAPI int32_t U_EXPORT2
       
   358 ucol_getContractions( const UCollator *coll,
       
   359                   USet *conts,
       
   360                   UErrorCode *status);
       
   361 
       
   362 
       
   363 /** 
       
   364  * Close a UCollator.
       
   365  * Once closed, a UCollator should not be used.\ Every open collator should
       
   366  * be closed.\ Otherwise, a memory leak will result.
       
   367  * @param coll The UCollator to close.
       
   368  * @see ucol_open
       
   369  * @see ucol_openRules
       
   370  * @see ucol_safeClone
       
   371  * @stable ICU 2.0
       
   372  */
       
   373 U_STABLE void U_EXPORT2 
       
   374 ucol_close(UCollator *coll);
       
   375 
       
   376 /**
       
   377  * Compare two strings.
       
   378  * The strings will be compared using the options already specified.
       
   379  * @param coll The UCollator containing the comparison rules.
       
   380  * @param source The source string.
       
   381  * @param sourceLength The length of source, or -1 if null-terminated.
       
   382  * @param target The target string.
       
   383  * @param targetLength The length of target, or -1 if null-terminated.
       
   384  * @return The result of comparing the strings; one of UCOL_EQUAL,
       
   385  * UCOL_GREATER, UCOL_LESS
       
   386  * @see ucol_greater
       
   387  * @see ucol_greaterOrEqual
       
   388  * @see ucol_equal
       
   389  * @stable ICU 2.0
       
   390  */
       
   391 U_STABLE UCollationResult U_EXPORT2 
       
   392 ucol_strcoll(    const    UCollator    *coll,
       
   393         const    UChar        *source,
       
   394         int32_t            sourceLength,
       
   395         const    UChar        *target,
       
   396         int32_t            targetLength);
       
   397 
       
   398 /**
       
   399  * Determine if one string is greater than another.
       
   400  * This function is equivalent to {@link #ucol_strcoll } == UCOL_GREATER
       
   401  * @param coll The UCollator containing the comparison rules.
       
   402  * @param source The source string.
       
   403  * @param sourceLength The length of source, or -1 if null-terminated.
       
   404  * @param target The target string.
       
   405  * @param targetLength The length of target, or -1 if null-terminated.
       
   406  * @return TRUE if source is greater than target, FALSE otherwise.
       
   407  * @see ucol_strcoll
       
   408  * @see ucol_greaterOrEqual
       
   409  * @see ucol_equal
       
   410  * @stable ICU 2.0
       
   411  */
       
   412 U_STABLE UBool U_EXPORT2 
       
   413 ucol_greater(const UCollator *coll,
       
   414              const UChar     *source, int32_t sourceLength,
       
   415              const UChar     *target, int32_t targetLength);
       
   416 
       
   417 /**
       
   418  * Determine if one string is greater than or equal to another.
       
   419  * This function is equivalent to {@link #ucol_strcoll } != UCOL_LESS
       
   420  * @param coll The UCollator containing the comparison rules.
       
   421  * @param source The source string.
       
   422  * @param sourceLength The length of source, or -1 if null-terminated.
       
   423  * @param target The target string.
       
   424  * @param targetLength The length of target, or -1 if null-terminated.
       
   425  * @return TRUE if source is greater than or equal to target, FALSE otherwise.
       
   426  * @see ucol_strcoll
       
   427  * @see ucol_greater
       
   428  * @see ucol_equal
       
   429  * @stable ICU 2.0
       
   430  */
       
   431 U_STABLE UBool U_EXPORT2 
       
   432 ucol_greaterOrEqual(const UCollator *coll,
       
   433                     const UChar     *source, int32_t sourceLength,
       
   434                     const UChar     *target, int32_t targetLength);
       
   435 
       
   436 /**
       
   437  * Compare two strings for equality.
       
   438  * This function is equivalent to {@link #ucol_strcoll } == UCOL_EQUAL
       
   439  * @param coll The UCollator containing the comparison rules.
       
   440  * @param source The source string.
       
   441  * @param sourceLength The length of source, or -1 if null-terminated.
       
   442  * @param target The target string.
       
   443  * @param targetLength The length of target, or -1 if null-terminated.
       
   444  * @return TRUE if source is equal to target, FALSE otherwise
       
   445  * @see ucol_strcoll
       
   446  * @see ucol_greater
       
   447  * @see ucol_greaterOrEqual
       
   448  * @stable ICU 2.0
       
   449  */
       
   450 U_STABLE UBool U_EXPORT2 
       
   451 ucol_equal(const UCollator *coll,
       
   452            const UChar     *source, int32_t sourceLength,
       
   453            const UChar     *target, int32_t targetLength);
       
   454 
       
   455 /**
       
   456  * Compare two UTF-8 encoded trings.
       
   457  * The strings will be compared using the options already specified.
       
   458  * @param coll The UCollator containing the comparison rules.
       
   459  * @param sIter The source string iterator.
       
   460  * @param tIter The target string iterator.
       
   461  * @return The result of comparing the strings; one of UCOL_EQUAL,
       
   462  * UCOL_GREATER, UCOL_LESS
       
   463  * @param status A pointer to an UErrorCode to receive any errors
       
   464  * @see ucol_strcoll
       
   465  * @stable ICU 2.6
       
   466  */
       
   467 U_STABLE UCollationResult U_EXPORT2 
       
   468 ucol_strcollIter(  const    UCollator    *coll,
       
   469                   UCharIterator *sIter,
       
   470                   UCharIterator *tIter,
       
   471                   UErrorCode *status);
       
   472 
       
   473 /**
       
   474  * Get the collation strength used in a UCollator.
       
   475  * The strength influences how strings are compared.
       
   476  * @param coll The UCollator to query.
       
   477  * @return The collation strength; one of UCOL_PRIMARY, UCOL_SECONDARY,
       
   478  * UCOL_TERTIARY, UCOL_QUATERNARY, UCOL_IDENTICAL
       
   479  * @see ucol_setStrength
       
   480  * @stable ICU 2.0
       
   481  */
       
   482 U_STABLE UCollationStrength U_EXPORT2 
       
   483 ucol_getStrength(const UCollator *coll);
       
   484 
       
   485 /**
       
   486  * Set the collation strength used in a UCollator.
       
   487  * The strength influences how strings are compared.
       
   488  * @param coll The UCollator to set.
       
   489  * @param strength The desired collation strength; one of UCOL_PRIMARY, 
       
   490  * UCOL_SECONDARY, UCOL_TERTIARY, UCOL_QUATERNARY, UCOL_IDENTICAL, UCOL_DEFAULT
       
   491  * @see ucol_getStrength
       
   492  * @stable ICU 2.0
       
   493  */
       
   494 U_STABLE void U_EXPORT2 
       
   495 ucol_setStrength(UCollator *coll,
       
   496                  UCollationStrength strength);
       
   497 
       
   498 /**
       
   499  * Get the display name for a UCollator.
       
   500  * The display name is suitable for presentation to a user.
       
   501  * @param objLoc The locale of the collator in question.
       
   502  * @param dispLoc The locale for display.
       
   503  * @param result A pointer to a buffer to receive the attribute.
       
   504  * @param resultLength The maximum size of result.
       
   505  * @param status A pointer to an UErrorCode to receive any errors
       
   506  * @return The total buffer size needed; if greater than resultLength,
       
   507  * the output was truncated.
       
   508  * @stable ICU 2.0
       
   509  */
       
   510 U_STABLE int32_t U_EXPORT2 
       
   511 ucol_getDisplayName(    const    char        *objLoc,
       
   512             const    char        *dispLoc,
       
   513             UChar             *result,
       
   514             int32_t         resultLength,
       
   515             UErrorCode        *status);
       
   516 
       
   517 /**
       
   518  * Get a locale for which collation rules are available.
       
   519  * A UCollator in a locale returned by this function will perform the correct
       
   520  * collation for the locale.
       
   521  * @param index The index of the desired locale.
       
   522  * @return A locale for which collation rules are available, or 0 if none.
       
   523  * @see ucol_countAvailable
       
   524  * @stable ICU 2.0
       
   525  */
       
   526 U_STABLE const char* U_EXPORT2 
       
   527 ucol_getAvailable(int32_t index);
       
   528 
       
   529 /**
       
   530  * Determine how many locales have collation rules available.
       
   531  * This function is most useful as determining the loop ending condition for
       
   532  * calls to {@link #ucol_getAvailable }.
       
   533  * @return The number of locales for which collation rules are available.
       
   534  * @see ucol_getAvailable
       
   535  * @stable ICU 2.0
       
   536  */
       
   537 U_STABLE int32_t U_EXPORT2 
       
   538 ucol_countAvailable(void);
       
   539 
       
   540 #if !UCONFIG_NO_SERVICE
       
   541 /**
       
   542  * Create a string enumerator of all locales for which a valid
       
   543  * collator may be opened.
       
   544  * @param status input-output error code
       
   545  * @return a string enumeration over locale strings. The caller is
       
   546  * responsible for closing the result.
       
   547  * @draft ICU 3.0
       
   548  */
       
   549 U_DRAFT UEnumeration* U_EXPORT2
       
   550 ucol_openAvailableLocales(UErrorCode *status);
       
   551 #endif
       
   552 
       
   553 /**
       
   554  * Create a string enumerator of all possible keywords that are relevant to
       
   555  * collation. At this point, the only recognized keyword for this
       
   556  * service is "collation".
       
   557  * @param status input-output error code
       
   558  * @return a string enumeration over locale strings. The caller is
       
   559  * responsible for closing the result.
       
   560  * @draft ICU 3.0
       
   561  */
       
   562 U_DRAFT UEnumeration* U_EXPORT2
       
   563 ucol_getKeywords(UErrorCode *status);
       
   564 
       
   565 /**
       
   566  * Given a keyword, create a string enumeration of all values
       
   567  * for that keyword that are currently in use.
       
   568  * @param keyword a particular keyword as enumerated by
       
   569  * ucol_getKeywords. If any other keyword is passed in, *status is set
       
   570  * to U_ILLEGAL_ARGUMENT_ERROR.
       
   571  * @param status input-output error code
       
   572  * @return a string enumeration over collation keyword values, or NULL
       
   573  * upon error. The caller is responsible for closing the result.
       
   574  * @draft ICU 3.0
       
   575  */
       
   576 U_DRAFT UEnumeration* U_EXPORT2
       
   577 ucol_getKeywordValues(const char *keyword, UErrorCode *status);
       
   578 
       
   579 /**
       
   580  * Return the functionally equivalent locale for the given
       
   581  * requested locale, with respect to given keyword, for the
       
   582  * collation service.  If two locales return the same result, then
       
   583  * collators instantiated for these locales will behave
       
   584  * equivalently.  The converse is not always true; two collators
       
   585  * may in fact be equivalent, but return different results, due to
       
   586  * internal details.  The return result has no other meaning than
       
   587  * that stated above, and implies nothing as to the relationship
       
   588  * between the two locales.  This is intended for use by
       
   589  * applications who wish to cache collators, or otherwise reuse
       
   590  * collators when possible.  The functional equivalent may change
       
   591  * over time.  For more information, please see the <a
       
   592  * href="http://icu.sourceforge.net/icu/userguide/locale.html#services">
       
   593  * Locales and Services</a> section of the ICU User Guide.
       
   594  * @param result fillin for the functionally equivalent locale
       
   595  * @param resultCapacity capacity of the fillin buffer
       
   596  * @param keyword a particular keyword as enumerated by
       
   597  * ucol_getKeywords.
       
   598  * @param locale the requested locale
       
   599  * @param isAvailable if non-NULL, pointer to a fillin parameter that
       
   600  * indicates whether the requested locale was 'available' to the
       
   601  * collation service. A locale is defined as 'available' if it
       
   602  * physically exists within the collation locale data.
       
   603  * @param status pointer to input-output error code
       
   604  * @return the actual buffer size needed for the locale.  If greater
       
   605  * than resultCapacity, the returned full name will be truncated and
       
   606  * an error code will be returned.
       
   607  * @draft ICU 3.0
       
   608  */
       
   609 U_DRAFT int32_t U_EXPORT2
       
   610 ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,
       
   611                              const char* keyword, const char* locale,
       
   612                              UBool* isAvailable, UErrorCode* status);
       
   613 
       
   614 /**
       
   615  * Get the collation rules from a UCollator.
       
   616  * The rules will follow the rule syntax.
       
   617  * @param coll The UCollator to query.
       
   618  * @param length 
       
   619  * @return The collation rules.
       
   620  * @stable ICU 2.0
       
   621  */
       
   622 U_STABLE const UChar* U_EXPORT2 
       
   623 ucol_getRules(    const    UCollator    *coll, 
       
   624         int32_t            *length);
       
   625 
       
   626 /** Get the short definition string for a collator. This API harvests the collator's
       
   627  *  locale and the attribute set and produces a string that can be used for opening 
       
   628  *  a collator with the same properties using the ucol_openFromShortString API.
       
   629  *  This string will be normalized.
       
   630  *  The structure and the syntax of the string is defined in the "Naming collators"
       
   631  *  section of the users guide: 
       
   632  *  http://icu.sourceforge.net/icu/userguide/Collate_Concepts.html#Naming_Collators
       
   633  *  This API supports preflighting.
       
   634  *  @param coll a collator
       
   635  *  @param locale a locale that will appear as a collators locale in the resulting
       
   636  *                short string definition. If NULL, the locale will be harvested 
       
   637  *                from the collator.
       
   638  *  @param buffer space to hold the resulting string
       
   639  *  @param capacity capacity of the buffer
       
   640  *  @param status for returning errors. All the preflighting errors are featured
       
   641  *  @return length of the resulting string
       
   642  *  @see ucol_openFromShortString
       
   643  *  @see ucol_normalizeShortDefinitionString
       
   644  *  @draft ICU 3.0
       
   645  */
       
   646 U_CAPI int32_t U_EXPORT2
       
   647 ucol_getShortDefinitionString(const UCollator *coll,
       
   648                               const char *locale,
       
   649                               char *buffer,
       
   650                               int32_t capacity,
       
   651                               UErrorCode *status);
       
   652 
       
   653 /** Verifies and normalizes short definition string.
       
   654  *  Normalized short definition string has all the option sorted by the argument name,
       
   655  *  so that equivalent definition strings are the same. 
       
   656  *  This API supports preflighting.
       
   657  *  @param source definition string
       
   658  *  @param destination space to hold the resulting string
       
   659  *  @param capacity capacity of the buffer
       
   660  *  @param parseError if not NULL, structure that will get filled with error's pre
       
   661  *                   and post context in case of error.
       
   662  *  @param status     Error code. This API will return an error if an invalid attribute 
       
   663  *                    or attribute/value combination is specified. All the preflighting 
       
   664  *                    errors are also featured
       
   665  *  @return length of the resulting normalized string.
       
   666  *
       
   667  *  @see ucol_openFromShortString
       
   668  *  @see ucol_getShortDefinitionString
       
   669  * 
       
   670  *  @draft ICU 3.0
       
   671  */
       
   672 
       
   673 U_CAPI int32_t U_EXPORT2
       
   674 ucol_normalizeShortDefinitionString(const char *source,
       
   675                                     char *destination,
       
   676                                     int32_t capacity,
       
   677                                     UParseError *parseError,
       
   678                                     UErrorCode *status);
       
   679         
       
   680 
       
   681 /**
       
   682  * Get a sort key for a string from a UCollator.
       
   683  * Sort keys may be compared using <TT>strcmp</TT>.
       
   684  * @param coll The UCollator containing the collation rules.
       
   685  * @param source The string to transform.
       
   686  * @param sourceLength The length of source, or -1 if null-terminated.
       
   687  * @param result A pointer to a buffer to receive the attribute.
       
   688  * @param resultLength The maximum size of result.
       
   689  * @return The size needed to fully store the sort key..
       
   690  * @see ucol_keyHashCode
       
   691  * @stable ICU 2.0
       
   692  */
       
   693 U_STABLE int32_t U_EXPORT2 
       
   694 ucol_getSortKey(const    UCollator    *coll,
       
   695         const    UChar        *source,
       
   696         int32_t        sourceLength,
       
   697         uint8_t        *result,
       
   698         int32_t        resultLength);
       
   699 
       
   700 
       
   701 /** Gets the next count bytes of a sort key. Caller needs
       
   702  *  to preserve state array between calls and to provide
       
   703  *  the same type of UCharIterator set with the same string.
       
   704  *  The destination buffer provided must be big enough to store
       
   705  *  the number of requested bytes. Generated sortkey is not 
       
   706  *  compatible with sortkeys generated using ucol_getSortKey
       
   707  *  API, since we don't do any compression. If uncompressed
       
   708  *  sortkeys are required, this API can be used.
       
   709  *  @param coll The UCollator containing the collation rules.
       
   710  *  @param iter UCharIterator containing the string we need 
       
   711  *              the sort key to be calculated for.
       
   712  *  @param state Opaque state of sortkey iteration.
       
   713  *  @param dest Buffer to hold the resulting sortkey part
       
   714  *  @param count number of sort key bytes required.
       
   715  *  @param status error code indicator.
       
   716  *  @return the actual number of bytes of a sortkey. It can be
       
   717  *          smaller than count if we have reached the end of 
       
   718  *          the sort key.
       
   719  *  @stable ICU 2.6
       
   720  */
       
   721 U_STABLE int32_t U_EXPORT2 
       
   722 ucol_nextSortKeyPart(const UCollator *coll,
       
   723                      UCharIterator *iter,
       
   724                      uint32_t state[2],
       
   725                      uint8_t *dest, int32_t count,
       
   726                      UErrorCode *status);
       
   727 
       
   728 /** enum that is taken by ucol_getBound API 
       
   729  * See below for explanation                
       
   730  * do not change the values assigned to the 
       
   731  * members of this enum. Underlying code    
       
   732  * depends on them having these numbers     
       
   733  * @stable ICU 2.0
       
   734  */
       
   735 typedef enum {
       
   736   /** lower bound */
       
   737   UCOL_BOUND_LOWER = 0,
       
   738   /** upper bound that will match strings of exact size */
       
   739   UCOL_BOUND_UPPER = 1,
       
   740   /** upper bound that will match all the strings that have the same initial substring as the given string */
       
   741   UCOL_BOUND_UPPER_LONG = 2,
       
   742   UCOL_BOUND_VALUE_COUNT
       
   743 } UColBoundMode;
       
   744 
       
   745 /**
       
   746  * Produce a bound for a given sortkey and a number of levels.
       
   747  * Return value is always the number of bytes needed, regardless of 
       
   748  * whether the result buffer was big enough or even valid.<br>
       
   749  * Resulting bounds can be used to produce a range of strings that are
       
   750  * between upper and lower bounds. For example, if bounds are produced
       
   751  * for a sortkey of string "smith", strings between upper and lower 
       
   752  * bounds with one level would include "Smith", "SMITH", "sMiTh".<br>
       
   753  * There are two upper bounds that can be produced. If UCOL_BOUND_UPPER
       
   754  * is produced, strings matched would be as above. However, if bound
       
   755  * produced using UCOL_BOUND_UPPER_LONG is used, the above example will
       
   756  * also match "Smithsonian" and similar.<br>
       
   757  * For more on usage, see example in cintltst/capitst.c in procedure
       
   758  * TestBounds.
       
   759  * Sort keys may be compared using <TT>strcmp</TT>.
       
   760  * @param source The source sortkey.
       
   761  * @param sourceLength The length of source, or -1 if null-terminated. 
       
   762  *                     (If an unmodified sortkey is passed, it is always null 
       
   763  *                      terminated).
       
   764  * @param boundType Type of bound required. It can be UCOL_BOUND_LOWER, which 
       
   765  *                  produces a lower inclusive bound, UCOL_BOUND_UPPER, that 
       
   766  *                  produces upper bound that matches strings of the same length 
       
   767  *                  or UCOL_BOUND_UPPER_LONG that matches strings that have the 
       
   768  *                  same starting substring as the source string.
       
   769  * @param noOfLevels  Number of levels required in the resulting bound (for most 
       
   770  *                    uses, the recommended value is 1). See users guide for 
       
   771  *                    explanation on number of levels a sortkey can have.
       
   772  * @param result A pointer to a buffer to receive the resulting sortkey.
       
   773  * @param resultLength The maximum size of result.
       
   774  * @param status Used for returning error code if something went wrong. If the 
       
   775  *               number of levels requested is higher than the number of levels
       
   776  *               in the source key, a warning (U_SORT_KEY_TOO_SHORT_WARNING) is 
       
   777  *               issued.
       
   778  * @return The size needed to fully store the bound. 
       
   779  * @see ucol_keyHashCode
       
   780  * @stable ICU 2.1
       
   781  */
       
   782 U_STABLE int32_t U_EXPORT2 
       
   783 ucol_getBound(const uint8_t       *source,
       
   784         int32_t             sourceLength,
       
   785         UColBoundMode       boundType,
       
   786         uint32_t            noOfLevels,
       
   787         uint8_t             *result,
       
   788         int32_t             resultLength,
       
   789         UErrorCode          *status);
       
   790         
       
   791 /**
       
   792  * Gets the version information for a Collator. Version is currently
       
   793  * an opaque 32-bit number which depends, among other things, on major
       
   794  * versions of the collator tailoring and UCA.
       
   795  * @param coll The UCollator to query.
       
   796  * @param info the version # information, the result will be filled in
       
   797  * @stable ICU 2.0
       
   798  */
       
   799 U_STABLE void U_EXPORT2
       
   800 ucol_getVersion(const UCollator* coll, UVersionInfo info);
       
   801 
       
   802 /**
       
   803  * Gets the UCA version information for a Collator. Version is the
       
   804  * UCA version number (3.1.1, 4.0).
       
   805  * @param coll The UCollator to query.
       
   806  * @param info the version # information, the result will be filled in
       
   807  * @draft ICU 2.8
       
   808  */
       
   809 U_DRAFT void U_EXPORT2
       
   810 ucol_getUCAVersion(const UCollator* coll, UVersionInfo info);
       
   811 
       
   812 /** 
       
   813  * Merge two sort keys. The levels are merged with their corresponding counterparts
       
   814  * (primaries with primaries, secondaries with secondaries etc.). Between the values
       
   815  * from the same level a separator is inserted.
       
   816  * example (uncompressed): 
       
   817  * 191B1D 01 050505 01 910505 00 and 1F2123 01 050505 01 910505 00
       
   818  * will be merged as 
       
   819  * 191B1D 02 1F212301 050505 02 050505 01 910505 02 910505 00
       
   820  * This allows for concatenating of first and last names for sorting, among other things.
       
   821  * If the destination buffer is not big enough, the results are undefined.
       
   822  * If any of source lengths are zero or any of source pointers are NULL/undefined, 
       
   823  * result is of size zero.
       
   824  * @param src1 pointer to the first sortkey
       
   825  * @param src1Length length of the first sortkey
       
   826  * @param src2 pointer to the second sortkey
       
   827  * @param src2Length length of the second sortkey
       
   828  * @param dest buffer to hold the result
       
   829  * @param destCapacity size of the buffer for the result
       
   830  * @return size of the result. If the buffer is big enough size is always
       
   831  *         src1Length+src2Length-1
       
   832  * @stable ICU 2.0
       
   833  */
       
   834 U_STABLE int32_t U_EXPORT2 
       
   835 ucol_mergeSortkeys(const uint8_t *src1, int32_t src1Length,
       
   836                    const uint8_t *src2, int32_t src2Length,
       
   837                    uint8_t *dest, int32_t destCapacity);
       
   838 
       
   839 /**
       
   840  * Universal attribute setter
       
   841  * @param coll collator which attributes are to be changed
       
   842  * @param attr attribute type 
       
   843  * @param value attribute value
       
   844  * @param status to indicate whether the operation went on smoothly or there were errors
       
   845  * @see UColAttribute
       
   846  * @see UColAttributeValue
       
   847  * @see ucol_getAttribute
       
   848  * @stable ICU 2.0
       
   849  */
       
   850 U_STABLE void U_EXPORT2 
       
   851 ucol_setAttribute(UCollator *coll, UColAttribute attr, UColAttributeValue value, UErrorCode *status);
       
   852 
       
   853 /**
       
   854  * Universal attribute getter
       
   855  * @param coll collator which attributes are to be changed
       
   856  * @param attr attribute type
       
   857  * @return attribute value
       
   858  * @param status to indicate whether the operation went on smoothly or there were errors
       
   859  * @see UColAttribute
       
   860  * @see UColAttributeValue
       
   861  * @see ucol_setAttribute
       
   862  * @stable ICU 2.0
       
   863  */
       
   864 U_STABLE UColAttributeValue  U_EXPORT2 
       
   865 ucol_getAttribute(const UCollator *coll, UColAttribute attr, UErrorCode *status);
       
   866 
       
   867 /** Variable top
       
   868  * is a two byte primary value which causes all the codepoints with primary values that
       
   869  * are less or equal than the variable top to be shifted when alternate handling is set
       
   870  * to UCOL_SHIFTED.
       
   871  * Sets the variable top to a collation element value of a string supplied. 
       
   872  * @param coll collator which variable top needs to be changed
       
   873  * @param varTop one or more (if contraction) UChars to which the variable top should be set
       
   874  * @param len length of variable top string. If -1 it is considered to be zero terminated.
       
   875  * @param status error code. If error code is set, the return value is undefined. 
       
   876  *               Errors set by this function are: <br>
       
   877  *    U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such 
       
   878  *    a contraction<br>
       
   879  *    U_PRIMARY_TOO_LONG_ERROR if the primary for the variable top has more than two bytes
       
   880  * @return a 32 bit value containing the value of the variable top in upper 16 bits. 
       
   881  *         Lower 16 bits are undefined
       
   882  * @see ucol_getVariableTop
       
   883  * @see ucol_restoreVariableTop
       
   884  * @stable ICU 2.0
       
   885  */
       
   886 U_STABLE uint32_t U_EXPORT2 
       
   887 ucol_setVariableTop(UCollator *coll, 
       
   888                     const UChar *varTop, int32_t len, 
       
   889                     UErrorCode *status);
       
   890 
       
   891 /** 
       
   892  * Gets the variable top value of a Collator. 
       
   893  * Lower 16 bits are undefined and should be ignored.
       
   894  * @param coll collator which variable top needs to be retrieved
       
   895  * @param status error code (not changed by function). If error code is set, 
       
   896  *               the return value is undefined.
       
   897  * @return the variable top value of a Collator.
       
   898  * @see ucol_setVariableTop
       
   899  * @see ucol_restoreVariableTop
       
   900  * @stable ICU 2.0
       
   901  */
       
   902 U_STABLE uint32_t U_EXPORT2 ucol_getVariableTop(const UCollator *coll, UErrorCode *status);
       
   903 
       
   904 /** 
       
   905  * Sets the variable top to a collation element value supplied. Variable top is 
       
   906  * set to the upper 16 bits. 
       
   907  * Lower 16 bits are ignored.
       
   908  * @param coll collator which variable top needs to be changed
       
   909  * @param varTop CE value, as returned by ucol_setVariableTop or ucol)getVariableTop
       
   910  * @param status error code (not changed by function)
       
   911  * @see ucol_getVariableTop
       
   912  * @see ucol_setVariableTop
       
   913  * @stable ICU 2.0
       
   914  */
       
   915 U_STABLE void U_EXPORT2 
       
   916 ucol_restoreVariableTop(UCollator *coll, const uint32_t varTop, UErrorCode *status);
       
   917 
       
   918 /**
       
   919  * Thread safe cloning operation. The result is a clone of a given collator.
       
   920  * @param coll collator to be cloned
       
   921  * @param stackBuffer user allocated space for the new clone. 
       
   922  * If NULL new memory will be allocated. 
       
   923  *  If buffer is not large enough, new memory will be allocated.
       
   924  *  Clients can use the U_COL_SAFECLONE_BUFFERSIZE. 
       
   925  *  This will probably be enough to avoid memory allocations.
       
   926  * @param pBufferSize pointer to size of allocated space. 
       
   927  *  If *pBufferSize == 0, a sufficient size for use in cloning will 
       
   928  *  be returned ('pre-flighting')
       
   929  *  If *pBufferSize is not enough for a stack-based safe clone, 
       
   930  *  new memory will be allocated.
       
   931  * @param status to indicate whether the operation went on smoothly or there were errors
       
   932  *    An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used if any
       
   933  * allocations were necessary.
       
   934  * @return pointer to the new clone
       
   935  * @see ucol_open
       
   936  * @see ucol_openRules
       
   937  * @see ucol_close
       
   938  * @stable ICU 2.0
       
   939  */
       
   940 U_STABLE UCollator* U_EXPORT2 
       
   941 ucol_safeClone(const UCollator *coll,
       
   942                void            *stackBuffer,
       
   943                int32_t         *pBufferSize,
       
   944                UErrorCode      *status);
       
   945 
       
   946 /** default memory size for the new clone. It needs to be this large for os/400 large pointers 
       
   947  * @stable ICU 2.0
       
   948  */
       
   949 #define U_COL_SAFECLONE_BUFFERSIZE 512
       
   950 
       
   951 /**
       
   952  * Returns current rules. Delta defines whether full rules are returned or just the tailoring. 
       
   953  * Returns number of UChars needed to store rules. If buffer is NULL or bufferLen is not enough 
       
   954  * to store rules, will store up to available space.
       
   955  * @param coll collator to get the rules from
       
   956  * @param delta one of UCOL_TAILORING_ONLY, UCOL_FULL_RULES. 
       
   957  * @param buffer buffer to store the result in. If NULL, you'll get no rules.
       
   958  * @param bufferLen lenght of buffer to store rules in. If less then needed you'll get only the part that fits in.
       
   959  * @return current rules
       
   960  * @stable ICU 2.0
       
   961  */
       
   962 U_STABLE int32_t U_EXPORT2 
       
   963 ucol_getRulesEx(const UCollator *coll, UColRuleOption delta, UChar *buffer, int32_t bufferLen);
       
   964 
       
   965 /**
       
   966  * gets the locale name of the collator. If the collator
       
   967  * is instantiated from the rules, then this function returns
       
   968  * NULL.
       
   969  * @param coll The UCollator for which the locale is needed
       
   970  * @param type You can choose between requested, valid and actual
       
   971  *             locale. For description see the definition of
       
   972  *             ULocDataLocaleType in uloc.h
       
   973  * @param status error code of the operation
       
   974  * @return real locale name from which the collation data comes. 
       
   975  *         If the collator was instantiated from rules, returns
       
   976  *         NULL.
       
   977  * @deprecated ICU 2.8 Use ucol_getLocaleByType instead
       
   978  */
       
   979 U_DEPRECATED const char * U_EXPORT2
       
   980 ucol_getLocale(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status);
       
   981 
       
   982 
       
   983 /**
       
   984  * gets the locale name of the collator. If the collator
       
   985  * is instantiated from the rules, then this function returns
       
   986  * NULL.
       
   987  * @param coll The UCollator for which the locale is needed
       
   988  * @param type You can choose between requested, valid and actual
       
   989  *             locale. For description see the definition of
       
   990  *             ULocDataLocaleType in uloc.h
       
   991  * @param status error code of the operation
       
   992  * @return real locale name from which the collation data comes. 
       
   993  *         If the collator was instantiated from rules, returns
       
   994  *         NULL.
       
   995  * @draft ICU 2.8 likely to change in ICU 3.0, based on feedback
       
   996  */
       
   997 U_DRAFT const char * U_EXPORT2
       
   998 ucol_getLocaleByType(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status);
       
   999 
       
  1000 /**
       
  1001  * Get an Unicode set that contains all the characters and sequences tailored in 
       
  1002  * this collator. The result must be disposed of by using uset_close.
       
  1003  * @param coll        The UCollator for which we want to get tailored chars
       
  1004  * @param status      error code of the operation
       
  1005  * @return a pointer to newly created USet. Must be be disposed by using uset_close
       
  1006  * @see ucol_openRules
       
  1007  * @see uset_close
       
  1008  * @stable ICU 2.4
       
  1009  */
       
  1010 U_STABLE USet * U_EXPORT2
       
  1011 ucol_getTailoredSet(const UCollator *coll, UErrorCode *status);
       
  1012 
       
  1013 /**
       
  1014  * Returned by ucol_collatorToIdentifier to signify that collator is
       
  1015  * not encodable as an identifier.
       
  1016  * @internal ICU 3.0
       
  1017  */
       
  1018 #define UCOL_SIT_COLLATOR_NOT_ENCODABLE 0x80000000
       
  1019 
       
  1020 /**
       
  1021  * Get a 31-bit identifier given a collator. 
       
  1022  * @param coll UCollator
       
  1023  *  @param locale a locale that will appear as a collators locale in the resulting
       
  1024  *                short string definition. If NULL, the locale will be harvested 
       
  1025  *                from the collator.
       
  1026  * @param status holds error messages
       
  1027  * @return 31-bit identifier. MSB is used if the collator cannot be encoded. In that
       
  1028  *         case UCOL_SIT_COLLATOR_NOT_ENCODABLE is returned
       
  1029  * @see ucol_openFromIdentifier
       
  1030  * @see ucol_identifierToShortString
       
  1031  * @internal ICU 3.0
       
  1032  */
       
  1033 U_INTERNAL uint32_t U_EXPORT2
       
  1034 ucol_collatorToIdentifier(const UCollator *coll,
       
  1035                           const char *locale,
       
  1036                           UErrorCode *status);
       
  1037 
       
  1038 /**
       
  1039  * Open a collator given a 31-bit identifier
       
  1040  * @param identifier 31-bit identifier, encoded by calling ucol_collatorToIdentifier
       
  1041  * @param forceDefaults if FALSE, the settings that are the same as the collator 
       
  1042  *                   default settings will not be applied (for example, setting
       
  1043  *                   French secondary on a French collator would not be executed). 
       
  1044  *                   If TRUE, all the settings will be applied regardless of the 
       
  1045  *                   collator default value. If the definition
       
  1046  *                   strings that can be produced from a collator instantiated by 
       
  1047  *                   calling this API are to be cached, should be set to FALSE.
       
  1048  * @param status for returning errors
       
  1049  * @return UCollator object
       
  1050  * @see ucol_collatorToIdentifier
       
  1051  * @see ucol_identifierToShortString
       
  1052  * @internal ICU 3.0
       
  1053  */
       
  1054 U_INTERNAL UCollator* U_EXPORT2
       
  1055 ucol_openFromIdentifier(uint32_t identifier,
       
  1056                         UBool forceDefaults,
       
  1057                         UErrorCode *status);
       
  1058 
       
  1059 
       
  1060 /**
       
  1061  * Calculate the short definition string given an identifier. Supports preflighting.
       
  1062  * @param identifier 31-bit identifier, encoded by calling ucol_collatorToIdentifier
       
  1063  * @param buffer buffer to store the result
       
  1064  * @param capacity buffer capacity
       
  1065  * @param forceDefaults whether the settings that are the same as the default setting
       
  1066  *                      should be forced anyway. Setting this argument to FALSE reduces
       
  1067  *                      the number of different configurations, but decreases performace
       
  1068  *                      as a collator has to be instantiated.
       
  1069  * @param status for returning errors
       
  1070  * @return length of the short definition string
       
  1071  * @see ucol_collatorToIdentifier
       
  1072  * @see ucol_openFromIdentifier
       
  1073  * @see ucol_shortStringToIdentifier
       
  1074  * @internal ICU 3.0
       
  1075  */
       
  1076 U_INTERNAL int32_t U_EXPORT2
       
  1077 ucol_identifierToShortString(uint32_t identifier,
       
  1078                              char *buffer,
       
  1079                              int32_t capacity,
       
  1080                              UBool forceDefaults,
       
  1081                              UErrorCode *status);
       
  1082 
       
  1083 /**
       
  1084  * Calculate the identifier given a short definition string. Supports preflighting.
       
  1085  * @param definition short string definition
       
  1086  * @param forceDefaults whether the settings that are the same as the default setting
       
  1087  *                      should be forced anyway. Setting this argument to FALSE reduces
       
  1088  *                      the number of different configurations, but decreases performace
       
  1089  *                      as a collator has to be instantiated.
       
  1090  * @param status for returning errors
       
  1091  * @return identifier
       
  1092  * @see ucol_collatorToIdentifier
       
  1093  * @see ucol_openFromIdentifier
       
  1094  * @see ucol_identifierToShortString
       
  1095  * @internal ICU 3.0
       
  1096  */
       
  1097 U_INTERNAL uint32_t U_EXPORT2
       
  1098 ucol_shortStringToIdentifier(const char *definition,
       
  1099                              UBool forceDefaults,
       
  1100                              UErrorCode *status);
       
  1101 
       
  1102 
       
  1103 
       
  1104 /**
       
  1105  * Universal attribute getter that returns UCOL_DEFAULT if the value is default
       
  1106  * @param coll collator which attributes are to be changed
       
  1107  * @param attr attribute type
       
  1108  * @return attribute value or UCOL_DEFAULT if the value is default
       
  1109  * @param status to indicate whether the operation went on smoothly or there were errors
       
  1110  * @see UColAttribute
       
  1111  * @see UColAttributeValue
       
  1112  * @see ucol_setAttribute
       
  1113  * @internal ICU 3.0
       
  1114  */
       
  1115 U_INTERNAL UColAttributeValue  U_EXPORT2
       
  1116 ucol_getAttributeOrDefault(const UCollator *coll, UColAttribute attr, UErrorCode *status);
       
  1117 
       
  1118 /** Check whether two collators are equal. Collators are considered equal if they
       
  1119  *  will sort strings the same. This means that both the current attributes and the
       
  1120  *  rules must be equivalent. Currently used for RuleBasedCollator::operator==.
       
  1121  *  @param source first collator
       
  1122  *  @param target second collator
       
  1123  *  @return TRUE or FALSE
       
  1124  *  @internal ICU 3.0
       
  1125  */
       
  1126 U_INTERNAL UBool U_EXPORT2
       
  1127 ucol_equals(const UCollator *source, const UCollator *target);
       
  1128 
       
  1129 /** Calculates the set of unsafe code points, given a collator.
       
  1130  *   A character is unsafe if you could append any character and cause the ordering to alter significantly.
       
  1131  *   Collation sorts in normalized order, so anything that rearranges in normalization can cause this.
       
  1132  *   Thus if you have a character like a_umlaut, and you add a lower_dot to it,
       
  1133  *   then it normalizes to a_lower_dot + umlaut, and sorts differently.
       
  1134  *  @param coll Collator
       
  1135  *  @param unsafe a fill-in set to receive the unsafe points
       
  1136  *  @param status for catching errors
       
  1137  *  @return number of elements in the set
       
  1138  *  @internal ICU 3.0
       
  1139  */
       
  1140 U_INTERNAL int32_t U_EXPORT2
       
  1141 ucol_getUnsafeSet( const UCollator *coll,
       
  1142                   USet *unsafe,
       
  1143                   UErrorCode *status);
       
  1144 
       
  1145 /** Reset UCA's static pointers. You don't want to use this, unless your static memory can go away.
       
  1146  * @internal ICU 3.2.1
       
  1147  */
       
  1148 U_INTERNAL void U_EXPORT2
       
  1149 ucol_forgetUCA(void);
       
  1150 
       
  1151 /** Touches all resources needed for instantiating a collator from a short string definition,
       
  1152  *  thus filling up the cache.
       
  1153  * @param definition A short string containing a locale and a set of attributes. 
       
  1154  *                   Attributes not explicitly mentioned are left at the default
       
  1155  *                   state for a locale.
       
  1156  * @param parseError if not NULL, structure that will get filled with error's pre
       
  1157  *                   and post context in case of error.
       
  1158  * @param forceDefaults if FALSE, the settings that are the same as the collator 
       
  1159  *                   default settings will not be applied (for example, setting
       
  1160  *                   French secondary on a French collator would not be executed). 
       
  1161  *                   If TRUE, all the settings will be applied regardless of the 
       
  1162  *                   collator default value. If the definition
       
  1163  *                   strings are to be cached, should be set to FALSE.
       
  1164  * @param status     Error code. Apart from regular error conditions connected to 
       
  1165  *                   instantiating collators (like out of memory or similar), this
       
  1166  *                   API will return an error if an invalid attribute or attribute/value
       
  1167  *                   combination is specified.
       
  1168  * @see ucol_openFromShortString
       
  1169  * @internal ICU 3.2.1
       
  1170  */
       
  1171 U_INTERNAL void U_EXPORT2
       
  1172 ucol_prepareShortStringOpen( const char *definition,
       
  1173                           UBool forceDefaults,
       
  1174                           UParseError *parseError,
       
  1175                           UErrorCode *status);
       
  1176 
       
  1177 /** Creates a binary image of a collator. This binary image can be stored and 
       
  1178  *  later used to instantiate a collator using ucol_openBinary.
       
  1179  *  This API supports preflighting.
       
  1180  *  @param coll Collator
       
  1181  *  @param buffer a fill-in buffer to receive the binary image
       
  1182  *  @param capacity capacity of the destination buffer
       
  1183  *  @param status for catching errors
       
  1184  *  @return size of the image
       
  1185  *  @see ucol_openBinary
       
  1186  *  @draft ICU 3.2
       
  1187  */
       
  1188 U_DRAFT int32_t U_EXPORT2
       
  1189 ucol_cloneBinary(const UCollator *coll,
       
  1190                  uint8_t *buffer, int32_t capacity,
       
  1191                  UErrorCode *status);
       
  1192 
       
  1193 /** Opens a collator from a collator binary image created using
       
  1194  *  ucol_cloneBinary. Binary image used in instantiation of the 
       
  1195  *  collator remains owned by the user and should stay around for 
       
  1196  *  the lifetime of the collator. The API also takes a base collator
       
  1197  *  which usualy should be UCA.
       
  1198  *  @param bin binary image owned by the user and required through the
       
  1199  *             lifetime of the collator
       
  1200  *  @param length size of the image. If negative, the API will try to
       
  1201  *                figure out the length of the image
       
  1202  *  @param base fallback collator, usually UCA. Base is required to be
       
  1203  *              present through the lifetime of the collator. Currently 
       
  1204  *              it cannot be NULL.
       
  1205  *  @param status for catching errors
       
  1206  *  @return newly created collator
       
  1207  *  @see ucol_cloneBinary
       
  1208  *  @draft ICU 3.2
       
  1209  */
       
  1210 U_DRAFT UCollator* U_EXPORT2
       
  1211 ucol_openBinary(const uint8_t *bin, int32_t length, 
       
  1212                 const UCollator *base, 
       
  1213                 UErrorCode *status);
       
  1214 
       
  1215 
       
  1216 #endif /* #if !UCONFIG_NO_COLLATION */
       
  1217 
       
  1218 #endif
       
  1219