WebCore/icu/unicode/ucoleitr.h
changeset 0 4f2f89ce4247
equal deleted inserted replaced
-1:000000000000 0:4f2f89ce4247
       
     1 /*
       
     2 *******************************************************************************
       
     3 *   Copyright (C) 2001-2004, International Business Machines
       
     4 *   Corporation and others.  All Rights Reserved.
       
     5 *******************************************************************************
       
     6 *
       
     7 * File ucoleitr.cpp
       
     8 *
       
     9 * Modification History:
       
    10 *
       
    11 * Date        Name        Description
       
    12 * 02/15/2001  synwee      Modified all methods to process its own function 
       
    13 *                         instead of calling the equivalent c++ api (coleitr.h)
       
    14 *******************************************************************************/
       
    15 
       
    16 #ifndef UCOLEITR_H
       
    17 #define UCOLEITR_H
       
    18 
       
    19 #include "unicode/utypes.h"
       
    20 
       
    21 #if !UCONFIG_NO_COLLATION
       
    22 
       
    23 /**  
       
    24  * This indicates an error has occured during processing or if no more CEs is 
       
    25  * to be returned.
       
    26  * @stable ICU 2.0
       
    27  */
       
    28 #define UCOL_NULLORDER        ((int32_t)0xFFFFFFFF)
       
    29 
       
    30 #include "unicode/ucol.h"
       
    31 
       
    32 /** 
       
    33  * The UCollationElements struct.
       
    34  * For usage in C programs.
       
    35  * @stable ICU 2.0
       
    36  */
       
    37 typedef struct UCollationElements UCollationElements;
       
    38 
       
    39 /**
       
    40  * \file
       
    41  * \brief C API: UCollationElements
       
    42  *
       
    43  * The UCollationElements API is used as an iterator to walk through each 
       
    44  * character of an international string. Use the iterator to return the
       
    45  * ordering priority of the positioned character. The ordering priority of a 
       
    46  * character, which we refer to as a key, defines how a character is collated 
       
    47  * in the given collation object.
       
    48  * For example, consider the following in Spanish:
       
    49  * <pre>
       
    50  * .       "ca" -> the first key is key('c') and second key is key('a').
       
    51  * .       "cha" -> the first key is key('ch') and second key is key('a').
       
    52  * </pre>
       
    53  * And in German,
       
    54  * <pre>
       
    55  * .       "<ae ligature>b"-> the first key is key('a'), the second key is key('e'), and
       
    56  * .       the third key is key('b').
       
    57  * </pre>
       
    58  * <p>Example of the iterator usage: (without error checking)
       
    59  * <pre>
       
    60  * .  void CollationElementIterator_Example()
       
    61  * .  {
       
    62  * .      UChar *s;
       
    63  * .      t_int32 order, primaryOrder;
       
    64  * .      UCollationElements *c;
       
    65  * .      UCollatorOld *coll;
       
    66  * .      UErrorCode success = U_ZERO_ERROR;
       
    67  * .      s=(UChar*)malloc(sizeof(UChar) * (strlen("This is a test")+1) );
       
    68  * .      u_uastrcpy(s, "This is a test");
       
    69  * .      coll = ucol_open(NULL, &success);
       
    70  * .      c = ucol_openElements(coll, str, u_strlen(str), &status);
       
    71  * .      order = ucol_next(c, &success);
       
    72  * .      ucol_reset(c);
       
    73  * .      order = ucol_prev(c, &success);
       
    74  * .      free(s);
       
    75  * .      ucol_close(coll);
       
    76  * .      ucol_closeElements(c);
       
    77  * .  }
       
    78  * </pre>
       
    79  * <p>
       
    80  * ucol_next() returns the collation order of the next.
       
    81  * ucol_prev() returns the collation order of the previous character.
       
    82  * The Collation Element Iterator moves only in one direction between calls to
       
    83  * ucol_reset. That is, ucol_next() and ucol_prev can not be inter-used. 
       
    84  * Whenever ucol_prev is to be called after ucol_next() or vice versa, 
       
    85  * ucol_reset has to be called first to reset the status, shifting pointers to 
       
    86  * either the end or the start of the string. Hence at the next call of 
       
    87  * ucol_prev or ucol_next, the first or last collation order will be returned. 
       
    88  * If a change of direction is done without a ucol_reset, the result is 
       
    89  * undefined.
       
    90  * The result of a forward iterate (ucol_next) and reversed result of the  
       
    91  * backward iterate (ucol_prev) on the same string are equivalent, if 
       
    92  * collation orders with the value UCOL_IGNORABLE are ignored.
       
    93  * Character based on the comparison level of the collator.  A collation order 
       
    94  * consists of primary order, secondary order and tertiary order.  The data 
       
    95  * type of the collation order is <strong>t_int32</strong>. 
       
    96  *
       
    97  * @see UCollator
       
    98  */
       
    99 
       
   100 /**
       
   101  * Open the collation elements for a string.
       
   102  *
       
   103  * @param coll The collator containing the desired collation rules.
       
   104  * @param text The text to iterate over.
       
   105  * @param textLength The number of characters in text, or -1 if null-terminated
       
   106  * @param status A pointer to an UErrorCode to receive any errors.
       
   107  * @return a struct containing collation element information
       
   108  * @stable ICU 2.0
       
   109  */
       
   110 U_STABLE UCollationElements* U_EXPORT2 
       
   111 ucol_openElements(const UCollator  *coll,
       
   112                   const UChar      *text,
       
   113                         int32_t    textLength,
       
   114                         UErrorCode *status);
       
   115 
       
   116 /**
       
   117  * get a hash code for a key... Not very useful!
       
   118  * @param key    the given key.
       
   119  * @param length the size of the key array.
       
   120  * @return       the hash code.
       
   121  * @stable ICU 2.0
       
   122  */
       
   123 U_STABLE int32_t U_EXPORT2 
       
   124 ucol_keyHashCode(const uint8_t* key, int32_t length);
       
   125 
       
   126 /**
       
   127  * Close a UCollationElements.
       
   128  * Once closed, a UCollationElements may no longer be used.
       
   129  * @param elems The UCollationElements to close.
       
   130  * @stable ICU 2.0
       
   131  */
       
   132 U_STABLE void U_EXPORT2 
       
   133 ucol_closeElements(UCollationElements *elems);
       
   134 
       
   135 /**
       
   136  * Reset the collation elements to their initial state.
       
   137  * This will move the 'cursor' to the beginning of the text.
       
   138  * Property settings for collation will be reset to the current status.
       
   139  * @param elems The UCollationElements to reset.
       
   140  * @see ucol_next
       
   141  * @see ucol_previous
       
   142  * @stable ICU 2.0
       
   143  */
       
   144 U_STABLE void U_EXPORT2 
       
   145 ucol_reset(UCollationElements *elems);
       
   146 
       
   147 /**
       
   148  * Get the ordering priority of the next collation element in the text.
       
   149  * A single character may contain more than one collation element.
       
   150  * @param elems The UCollationElements containing the text.
       
   151  * @param status A pointer to an UErrorCode to receive any errors.
       
   152  * @return The next collation elements ordering, otherwise returns NULLORDER 
       
   153  *         if an error has occured or if the end of string has been reached
       
   154  * @stable ICU 2.0
       
   155  */
       
   156 U_STABLE int32_t U_EXPORT2 
       
   157 ucol_next(UCollationElements *elems, UErrorCode *status);
       
   158 
       
   159 /**
       
   160  * Get the ordering priority of the previous collation element in the text.
       
   161  * A single character may contain more than one collation element.
       
   162  * Note that internally a stack is used to store buffered collation elements. 
       
   163  * It is very rare that the stack will overflow, however if such a case is 
       
   164  * encountered, the problem can be solved by increasing the size 
       
   165  * UCOL_EXPAND_CE_BUFFER_SIZE in ucol_imp.h.
       
   166  * @param elems The UCollationElements containing the text.
       
   167  * @param status A pointer to an UErrorCode to receive any errors. Noteably 
       
   168  *               a U_BUFFER_OVERFLOW_ERROR is returned if the internal stack
       
   169  *               buffer has been exhausted.
       
   170  * @return The previous collation elements ordering, otherwise returns 
       
   171  *         NULLORDER if an error has occured or if the start of string has 
       
   172  *         been reached.
       
   173  * @stable ICU 2.0
       
   174  */
       
   175 U_STABLE int32_t U_EXPORT2 
       
   176 ucol_previous(UCollationElements *elems, UErrorCode *status);
       
   177 
       
   178 /**
       
   179  * Get the maximum length of any expansion sequences that end with the 
       
   180  * specified comparison order.
       
   181  * This is useful for .... ?
       
   182  * @param elems The UCollationElements containing the text.
       
   183  * @param order A collation order returned by previous or next.
       
   184  * @return maximum size of the expansion sequences ending with the collation 
       
   185  *         element or 1 if collation element does not occur at the end of any 
       
   186  *         expansion sequence
       
   187  * @stable ICU 2.0
       
   188  */
       
   189 U_STABLE int32_t U_EXPORT2 
       
   190 ucol_getMaxExpansion(const UCollationElements *elems, int32_t order);
       
   191 
       
   192 /**
       
   193  * Set the text containing the collation elements.
       
   194  * Property settings for collation will remain the same.
       
   195  * In order to reset the iterator to the current collation property settings,
       
   196  * the API reset() has to be called.
       
   197  * @param elems The UCollationElements to set.
       
   198  * @param text The source text containing the collation elements.
       
   199  * @param textLength The length of text, or -1 if null-terminated.
       
   200  * @param status A pointer to an UErrorCode to receive any errors.
       
   201  * @see ucol_getText
       
   202  * @stable ICU 2.0
       
   203  */
       
   204 U_STABLE void U_EXPORT2 
       
   205 ucol_setText(      UCollationElements *elems, 
       
   206              const UChar              *text,
       
   207                    int32_t            textLength,
       
   208                    UErrorCode         *status);
       
   209 
       
   210 /**
       
   211  * Get the offset of the current source character.
       
   212  * This is an offset into the text of the character containing the current
       
   213  * collation elements.
       
   214  * @param elems The UCollationElements to query.
       
   215  * @return The offset of the current source character.
       
   216  * @see ucol_setOffset
       
   217  * @stable ICU 2.0
       
   218  */
       
   219 U_STABLE int32_t U_EXPORT2 
       
   220 ucol_getOffset(const UCollationElements *elems);
       
   221 
       
   222 /**
       
   223  * Set the offset of the current source character.
       
   224  * This is an offset into the text of the character to be processed.
       
   225  * Property settings for collation will remain the same.
       
   226  * In order to reset the iterator to the current collation property settings,
       
   227  * the API reset() has to be called.
       
   228  * @param elems The UCollationElements to set.
       
   229  * @param offset The desired character offset.
       
   230  * @param status A pointer to an UErrorCode to receive any errors.
       
   231  * @see ucol_getOffset
       
   232  * @stable ICU 2.0
       
   233  */
       
   234 U_STABLE void U_EXPORT2 
       
   235 ucol_setOffset(UCollationElements *elems,
       
   236                int32_t        offset,
       
   237                UErrorCode         *status);
       
   238 
       
   239 /**
       
   240 * Get the primary order of a collation order.
       
   241 * @param order the collation order
       
   242 * @return the primary order of a collation order.
       
   243 * @stable ICU 2.6
       
   244 */
       
   245 U_STABLE int32_t U_EXPORT2
       
   246 ucol_primaryOrder (int32_t order); 
       
   247 
       
   248 /**
       
   249 * Get the secondary order of a collation order.
       
   250 * @param order the collation order
       
   251 * @return the secondary order of a collation order.
       
   252 * @stable ICU 2.6
       
   253 */
       
   254 U_STABLE int32_t U_EXPORT2
       
   255 ucol_secondaryOrder (int32_t order); 
       
   256 
       
   257 /**
       
   258 * Get the tertiary order of a collation order.
       
   259 * @param order the collation order
       
   260 * @return the tertiary order of a collation order.
       
   261 * @stable ICU 2.6
       
   262 */
       
   263 U_STABLE int32_t U_EXPORT2
       
   264 ucol_tertiaryOrder (int32_t order); 
       
   265 
       
   266 #endif /* #if !UCONFIG_NO_COLLATION */
       
   267 
       
   268 #endif