WebCore/icu/unicode/ubrk.h
changeset 0 4f2f89ce4247
equal deleted inserted replaced
-1:000000000000 0:4f2f89ce4247
       
     1 /*
       
     2 * Copyright (C) 1996-2004, International Business Machines Corporation and others. All Rights Reserved.
       
     3 *****************************************************************************************
       
     4 */
       
     5 
       
     6 #ifndef UBRK_H
       
     7 #define UBRK_H
       
     8 
       
     9 #include "unicode/utypes.h"
       
    10 #include "unicode/uloc.h"
       
    11 
       
    12 /**
       
    13  * A text-break iterator.
       
    14  *  For usage in C programs.
       
    15  */
       
    16 #ifndef UBRK_TYPEDEF_UBREAK_ITERATOR
       
    17 #   define UBRK_TYPEDEF_UBREAK_ITERATOR
       
    18     /**
       
    19      *  Opaque type representing an ICU Break iterator object.
       
    20      *  @stable ICU 2.0
       
    21      */
       
    22     typedef void UBreakIterator;
       
    23 #endif
       
    24 
       
    25 #if !UCONFIG_NO_BREAK_ITERATION
       
    26 
       
    27 #include "unicode/parseerr.h"
       
    28 
       
    29 /**
       
    30  * \file
       
    31  * \brief C API: BreakIterator
       
    32  *
       
    33  * <h2> BreakIterator C API </h2>
       
    34  *
       
    35  * The BreakIterator C API defines  methods for finding the location
       
    36  * of boundaries in text. Pointer to a UBreakIterator maintain a
       
    37  * current position and scan over text returning the index of characters
       
    38  * where boundaries occur.
       
    39  * <P>
       
    40  * Line boundary analysis determines where a text string can be broken
       
    41  * when line-wrapping. The mechanism correctly handles punctuation and
       
    42  * hyphenated words.
       
    43  * <P>
       
    44  * Sentence boundary analysis allows selection with correct
       
    45  * interpretation of periods within numbers and abbreviations, and
       
    46  * trailing punctuation marks such as quotation marks and parentheses.
       
    47  * <P>
       
    48  * Word boundary analysis is used by search and replace functions, as
       
    49  * well as within text editing applications that allow the user to
       
    50  * select words with a double click. Word selection provides correct
       
    51  * interpretation of punctuation marks within and following
       
    52  * words. Characters that are not part of a word, such as symbols or
       
    53  * punctuation marks, have word-breaks on both sides.
       
    54  * <P>
       
    55  * Character boundary analysis allows users to interact with
       
    56  * characters as they expect to, for example, when moving the cursor
       
    57  * through a text string. Character boundary analysis provides correct
       
    58  * navigation of through character strings, regardless of how the
       
    59  * character is stored.  For example, an accented character might be
       
    60  * stored as a base character and a diacritical mark. What users
       
    61  * consider to be a character can differ between languages.
       
    62  * <P>
       
    63  * Title boundary analysis locates all positions,
       
    64  * typically starts of words, that should be set to Title Case
       
    65  * when title casing the text.
       
    66  * <P>
       
    67  *
       
    68  * This is the interface for all text boundaries.
       
    69  * <P>
       
    70  * Examples:
       
    71  * <P>
       
    72  * Helper function to output text
       
    73  * <pre>
       
    74  * \code
       
    75  *    void printTextRange(UChar* str, int32_t start, int32_t end ) {
       
    76  *         UChar* result;
       
    77  *         UChar* temp;
       
    78  *         const char* res;
       
    79  *         temp=(UChar*)malloc(sizeof(UChar) * ((u_strlen(str)-start)+1));
       
    80  *         result=(UChar*)malloc(sizeof(UChar) * ((end-start)+1));
       
    81  *         u_strcpy(temp, &str[start]);
       
    82  *         u_strncpy(result, temp, end-start);
       
    83  *         res=(char*)malloc(sizeof(char) * (u_strlen(result)+1));
       
    84  *         u_austrcpy(res, result);
       
    85  *         printf("%s\n", res);
       
    86  *    }
       
    87  * \endcode
       
    88  * </pre>
       
    89  * Print each element in order:
       
    90  * <pre>
       
    91  * \code
       
    92  *    void printEachForward( UBreakIterator* boundary, UChar* str) {
       
    93  *       int32_t end;
       
    94  *       int32_t start = ubrk_first(boundary);
       
    95  *       for (end = ubrk_next(boundary)); end != UBRK_DONE; start = end, end = ubrk_next(boundary)) {
       
    96  *             printTextRange(str, start, end );
       
    97  *         }
       
    98  *    }
       
    99  * \endcode
       
   100  * </pre>
       
   101  * Print each element in reverse order:
       
   102  * <pre>
       
   103  * \code
       
   104  *    void printEachBackward( UBreakIterator* boundary, UChar* str) {
       
   105  *       int32_t start;
       
   106  *       int32_t end = ubrk_last(boundary);
       
   107  *       for (start = ubrk_previous(boundary); start != UBRK_DONE;  end = start, start =ubrk_previous(boundary)) {
       
   108  *             printTextRange( str, start, end );
       
   109  *         }
       
   110  *    }
       
   111  * \endcode
       
   112  * </pre>
       
   113  * Print first element
       
   114  * <pre>
       
   115  * \code
       
   116  *    void printFirst(UBreakIterator* boundary, UChar* str) {
       
   117  *        int32_t end;
       
   118  *        int32_t start = ubrk_first(boundary);
       
   119  *        end = ubrk_next(boundary);
       
   120  *        printTextRange( str, start, end );
       
   121  *    }
       
   122  * \endcode
       
   123  * </pre>
       
   124  * Print last element
       
   125  * <pre>
       
   126  * \code
       
   127  *    void printLast(UBreakIterator* boundary, UChar* str) {
       
   128  *        int32_t start;
       
   129  *        int32_t end = ubrk_last(boundary);
       
   130  *        start = ubrk_previous(boundary);
       
   131  *        printTextRange(str, start, end );
       
   132  *    }
       
   133  * \endcode
       
   134  * </pre>
       
   135  * Print the element at a specified position
       
   136  * <pre>
       
   137  * \code
       
   138  *    void printAt(UBreakIterator* boundary, int32_t pos , UChar* str) {
       
   139  *        int32_t start;
       
   140  *        int32_t end = ubrk_following(boundary, pos);
       
   141  *        start = ubrk_previous(boundary);
       
   142  *        printTextRange(str, start, end );
       
   143  *    }
       
   144  * \endcode
       
   145  * </pre>
       
   146  * Creating and using text boundaries
       
   147  * <pre>
       
   148  * \code
       
   149  *       void BreakIterator_Example( void ) {
       
   150  *           UBreakIterator* boundary;
       
   151  *           UChar *stringToExamine;
       
   152  *           stringToExamine=(UChar*)malloc(sizeof(UChar) * (strlen("Aaa bbb ccc. Ddd eee fff.")+1) );
       
   153  *           u_uastrcpy(stringToExamine, "Aaa bbb ccc. Ddd eee fff.");
       
   154  *           printf("Examining: "Aaa bbb ccc. Ddd eee fff.");
       
   155  *
       
   156  *           //print each sentence in forward and reverse order
       
   157  *           boundary = ubrk_open(UBRK_SENTENCE, "en_us", stringToExamine, u_strlen(stringToExamine), &status);
       
   158  *           printf("----- forward: -----------\n");
       
   159  *           printEachForward(boundary, stringToExamine);
       
   160  *           printf("----- backward: ----------\n");
       
   161  *           printEachBackward(boundary, stringToExamine);
       
   162  *           ubrk_close(boundary);
       
   163  *
       
   164  *           //print each word in order
       
   165  *           boundary = ubrk_open(UBRK_WORD, "en_us", stringToExamine, u_strlen(stringToExamine), &status);
       
   166  *           printf("----- forward: -----------\n");
       
   167  *           printEachForward(boundary, stringToExamine);
       
   168  *           printf("----- backward: ----------\n");
       
   169  *           printEachBackward(boundary, stringToExamine);
       
   170  *           //print first element
       
   171  *           printf("----- first: -------------\n");
       
   172  *           printFirst(boundary, stringToExamine);
       
   173  *           //print last element
       
   174  *           printf("----- last: --------------\n");
       
   175  *           printLast(boundary, stringToExamine);
       
   176  *           //print word at charpos 10
       
   177  *           printf("----- at pos 10: ---------\n");
       
   178  *           printAt(boundary, 10 , stringToExamine);
       
   179  *
       
   180  *           ubrk_close(boundary);
       
   181  *       }
       
   182  * \endcode
       
   183  * </pre>
       
   184  */
       
   185 
       
   186 /** The possible types of text boundaries.  @stable ICU 2.0 */
       
   187 typedef enum UBreakIteratorType {
       
   188   /** Character breaks  @stable ICU 2.0 */
       
   189   UBRK_CHARACTER,
       
   190   /** Word breaks @stable ICU 2.0 */
       
   191   UBRK_WORD,
       
   192   /** Line breaks @stable ICU 2.0 */
       
   193   UBRK_LINE,
       
   194   /** Sentence breaks @stable ICU 2.0 */
       
   195   UBRK_SENTENCE,
       
   196 
       
   197 #ifndef U_HIDE_DEPRECATED_API
       
   198   /** 
       
   199    * Title Case breaks 
       
   200    * The iterator created using this type locates title boundaries as described for 
       
   201    * Unicode 3.2 only. For Unicode 4.0 and above title boundary iteration,
       
   202    * please use Word Boundary iterator.
       
   203    *
       
   204    * @deprecated ICU 2.8 Use the word break iterator for titlecasing for Unicode 4 and later.
       
   205    */
       
   206   UBRK_TITLE
       
   207 #endif /* U_HIDE_DEPRECATED_API */
       
   208 
       
   209 } UBreakIteratorType;
       
   210 
       
   211 /** Value indicating all text boundaries have been returned.
       
   212  *  @stable ICU 2.0 
       
   213  */
       
   214 #define UBRK_DONE ((int32_t) -1)
       
   215 
       
   216 
       
   217 /**
       
   218  *  Enum constants for the word break tags returned by
       
   219  *  getRuleStatus().  A range of values is defined for each category of
       
   220  *  word, to allow for further subdivisions of a category in future releases.
       
   221  *  Applications should check for tag values falling within the range, rather
       
   222  *  than for single individual values.
       
   223  *  @stable ICU 2.2
       
   224 */
       
   225 typedef enum UWordBreak {
       
   226     /** Tag value for "words" that do not fit into any of other categories. 
       
   227      *  Includes spaces and most punctuation. */
       
   228     UBRK_WORD_NONE           = 0,
       
   229     /** Upper bound for tags for uncategorized words. */
       
   230     UBRK_WORD_NONE_LIMIT     = 100,
       
   231     /** Tag value for words that appear to be numbers, lower limit.    */
       
   232     UBRK_WORD_NUMBER         = 100,
       
   233     /** Tag value for words that appear to be numbers, upper limit.    */
       
   234     UBRK_WORD_NUMBER_LIMIT   = 200,
       
   235     /** Tag value for words that contain letters, excluding
       
   236      *  hiragana, katakana or ideographic characters, lower limit.    */
       
   237     UBRK_WORD_LETTER         = 200,
       
   238     /** Tag value for words containing letters, upper limit  */
       
   239     UBRK_WORD_LETTER_LIMIT   = 300,
       
   240     /** Tag value for words containing kana characters, lower limit */
       
   241     UBRK_WORD_KANA           = 300,
       
   242     /** Tag value for words containing kana characters, upper limit */
       
   243     UBRK_WORD_KANA_LIMIT     = 400,
       
   244     /** Tag value for words containing ideographic characters, lower limit */
       
   245     UBRK_WORD_IDEO           = 400,
       
   246     /** Tag value for words containing ideographic characters, upper limit */
       
   247     UBRK_WORD_IDEO_LIMIT     = 500
       
   248 } UWordBreak;
       
   249 
       
   250 /**
       
   251  *  Enum constants for the line break tags returned by getRuleStatus().
       
   252  *  A range of values is defined for each category of
       
   253  *  word, to allow for further subdivisions of a category in future releases.
       
   254  *  Applications should check for tag values falling within the range, rather
       
   255  *  than for single individual values.
       
   256  *  @draft ICU 2.8
       
   257 */
       
   258 typedef enum ULineBreakTag {
       
   259     /** Tag value for soft line breaks, positions at which a line break
       
   260       *  is acceptable but not required                */
       
   261     UBRK_LINE_SOFT            = 0,
       
   262     /** Upper bound for soft line breaks.              */
       
   263     UBRK_LINE_SOFT_LIMIT      = 100,
       
   264     /** Tag value for a hard, or mandatory line break  */
       
   265     UBRK_LINE_HARD            = 100,
       
   266     /** Upper bound for hard line breaks.              */
       
   267     UBRK_LINE_HARD_LIMIT      = 200
       
   268 } ULineBreakTag;
       
   269 
       
   270 
       
   271 
       
   272 /**
       
   273  *  Enum constants for the sentence break tags returned by getRuleStatus().
       
   274  *  A range of values is defined for each category of
       
   275  *  sentence, to allow for further subdivisions of a category in future releases.
       
   276  *  Applications should check for tag values falling within the range, rather
       
   277  *  than for single individual values.
       
   278  *  @draft ICU 2.8
       
   279 */
       
   280 typedef enum USentenceBreakTag {
       
   281     /** Tag value for for sentences  ending with a sentence terminator
       
   282       * ('.', '?', '!', etc.) character, possibly followed by a
       
   283       * hard separator (CR, LF, PS, etc.)
       
   284       */
       
   285     UBRK_SENTENCE_TERM       = 0,
       
   286     /** Upper bound for tags for sentences ended by sentence terminators.    */
       
   287     UBRK_SENTENCE_TERM_LIMIT = 100,
       
   288     /** Tag value for for sentences that do not contain an ending
       
   289       * sentence terminator ('.', '?', '!', etc.) character, but 
       
   290       * are ended only by a hard separator (CR, LF, PS, etc.) or end of input.
       
   291       */
       
   292     UBRK_SENTENCE_SEP        = 100,
       
   293     /** Upper bound for tags for sentences ended by a separator.              */
       
   294     UBRK_SENTENCE_SEP_LIMIT  = 200
       
   295     /** Tag value for a hard, or mandatory line break  */
       
   296 } USentenceBreakTag;
       
   297 
       
   298 
       
   299 /**
       
   300  * Open a new UBreakIterator for locating text boundaries for a specified locale.
       
   301  * A UBreakIterator may be used for detecting character, line, word,
       
   302  * and sentence breaks in text.
       
   303  * @param type The type of UBreakIterator to open: one of UBRK_CHARACTER, UBRK_WORD,
       
   304  * UBRK_LINE, UBRK_SENTENCE
       
   305  * @param locale The locale specifying the text-breaking conventions.
       
   306  * @param text The text to be iterated over.
       
   307  * @param textLength The number of characters in text, or -1 if null-terminated.
       
   308  * @param status A UErrorCode to receive any errors.
       
   309  * @return A UBreakIterator for the specified locale.
       
   310  * @see ubrk_openRules
       
   311  * @stable ICU 2.0
       
   312  */
       
   313 U_STABLE UBreakIterator* U_EXPORT2
       
   314 ubrk_open(UBreakIteratorType type,
       
   315       const char *locale,
       
   316       const UChar *text,
       
   317       int32_t textLength,
       
   318       UErrorCode *status);
       
   319 
       
   320 /**
       
   321  * Open a new UBreakIterator for locating text boundaries using specified breaking rules.
       
   322  * The rule syntax is ... (TBD)
       
   323  * @param rules A set of rules specifying the text breaking conventions.
       
   324  * @param rulesLength The number of characters in rules, or -1 if null-terminated.
       
   325  * @param text The text to be iterated over.  May be null, in which case ubrk_setText() is
       
   326  *        used to specify the text to be iterated.
       
   327  * @param textLength The number of characters in text, or -1 if null-terminated.
       
   328  * @param parseErr   Receives position and context information for any syntax errors
       
   329  *                   detected while parsing the rules.
       
   330  * @param status A UErrorCode to receive any errors.
       
   331  * @return A UBreakIterator for the specified rules.
       
   332  * @see ubrk_open
       
   333  * @stable ICU 2.2
       
   334  */
       
   335 U_STABLE UBreakIterator* U_EXPORT2
       
   336 ubrk_openRules(const UChar     *rules,
       
   337                int32_t         rulesLength,
       
   338                const UChar     *text,
       
   339                int32_t          textLength,
       
   340                UParseError     *parseErr,
       
   341                UErrorCode      *status);
       
   342 
       
   343 /**
       
   344  * Thread safe cloning operation
       
   345  * @param bi iterator to be cloned
       
   346  * @param stackBuffer user allocated space for the new clone. If NULL new memory will be allocated.
       
   347  *  If buffer is not large enough, new memory will be allocated.
       
   348  *  Clients can use the U_BRK_SAFECLONE_BUFFERSIZE. This will probably be enough to avoid memory allocations.
       
   349  * @param pBufferSize pointer to size of allocated space.
       
   350  *  If *pBufferSize == 0, a sufficient size for use in cloning will
       
   351  *  be returned ('pre-flighting')
       
   352  *  If *pBufferSize is not enough for a stack-based safe clone,
       
   353  *  new memory will be allocated.
       
   354  * @param status to indicate whether the operation went on smoothly or there were errors
       
   355  *  An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used if any allocations were necessary.
       
   356  * @return pointer to the new clone
       
   357  * @stable ICU 2.0
       
   358  */
       
   359 U_STABLE UBreakIterator * U_EXPORT2
       
   360 ubrk_safeClone(
       
   361           const UBreakIterator *bi,
       
   362           void *stackBuffer,
       
   363           int32_t *pBufferSize,
       
   364           UErrorCode *status);
       
   365 
       
   366 /**
       
   367   * A recommended size (in bytes) for the memory buffer to be passed to ubrk_saveClone().
       
   368   * @stable ICU 2.0
       
   369   */
       
   370 #define U_BRK_SAFECLONE_BUFFERSIZE 512
       
   371 
       
   372 /**
       
   373 * Close a UBreakIterator.
       
   374 * Once closed, a UBreakIterator may no longer be used.
       
   375 * @param bi The break iterator to close.
       
   376  * @stable ICU 2.0
       
   377 */
       
   378 U_STABLE void U_EXPORT2
       
   379 ubrk_close(UBreakIterator *bi);
       
   380 
       
   381 /**
       
   382  * Sets an existing iterator to point to a new piece of text
       
   383  * @param bi The iterator to use
       
   384  * @param text The text to be set
       
   385  * @param textLength The length of the text
       
   386  * @param status The error code
       
   387  * @stable ICU 2.0
       
   388  */
       
   389 U_STABLE void U_EXPORT2
       
   390 ubrk_setText(UBreakIterator* bi,
       
   391              const UChar*    text,
       
   392              int32_t         textLength,
       
   393              UErrorCode*     status);
       
   394 
       
   395 /**
       
   396  * Determine the most recently-returned text boundary.
       
   397  *
       
   398  * @param bi The break iterator to use.
       
   399  * @return The character index most recently returned by \ref ubrk_next, \ref ubrk_previous,
       
   400  * \ref ubrk_first, or \ref ubrk_last.
       
   401  * @stable ICU 2.0
       
   402  */
       
   403 U_STABLE int32_t U_EXPORT2
       
   404 ubrk_current(const UBreakIterator *bi);
       
   405 
       
   406 /**
       
   407  * Determine the text boundary following the current text boundary.
       
   408  *
       
   409  * @param bi The break iterator to use.
       
   410  * @return The character index of the next text boundary, or UBRK_DONE
       
   411  * if all text boundaries have been returned.
       
   412  * @see ubrk_previous
       
   413  * @stable ICU 2.0
       
   414  */
       
   415 U_STABLE int32_t U_EXPORT2
       
   416 ubrk_next(UBreakIterator *bi);
       
   417 
       
   418 /**
       
   419  * Determine the text boundary preceding the current text boundary.
       
   420  *
       
   421  * @param bi The break iterator to use.
       
   422  * @return The character index of the preceding text boundary, or UBRK_DONE
       
   423  * if all text boundaries have been returned.
       
   424  * @see ubrk_next
       
   425  * @stable ICU 2.0
       
   426  */
       
   427 U_STABLE int32_t U_EXPORT2
       
   428 ubrk_previous(UBreakIterator *bi);
       
   429 
       
   430 /**
       
   431  * Determine the index of the first character in the text being scanned.
       
   432  * This is not always the same as index 0 of the text.
       
   433  * @param bi The break iterator to use.
       
   434  * @return The character index of the first character in the text being scanned.
       
   435  * @see ubrk_last
       
   436  * @stable ICU 2.0
       
   437  */
       
   438 U_STABLE int32_t U_EXPORT2
       
   439 ubrk_first(UBreakIterator *bi);
       
   440 
       
   441 /**
       
   442  * Determine the index immediately <EM>beyond</EM> the last character in the text being
       
   443  * scanned.
       
   444  * This is not the same as the last character.
       
   445  * @param bi The break iterator to use.
       
   446  * @return The character offset immediately <EM>beyond</EM> the last character in the
       
   447  * text being scanned.
       
   448  * @see ubrk_first
       
   449  * @stable ICU 2.0
       
   450  */
       
   451 U_STABLE int32_t U_EXPORT2
       
   452 ubrk_last(UBreakIterator *bi);
       
   453 
       
   454 /**
       
   455  * Determine the text boundary preceding the specified offset.
       
   456  * The value returned is always smaller than offset, or UBRK_DONE.
       
   457  * @param bi The break iterator to use.
       
   458  * @param offset The offset to begin scanning.
       
   459  * @return The text boundary preceding offset, or UBRK_DONE.
       
   460  * @see ubrk_following
       
   461  * @stable ICU 2.0
       
   462  */
       
   463 U_STABLE int32_t U_EXPORT2
       
   464 ubrk_preceding(UBreakIterator *bi,
       
   465            int32_t offset);
       
   466 
       
   467 /**
       
   468  * Determine the text boundary following the specified offset.
       
   469  * The value returned is always greater than offset, or UBRK_DONE.
       
   470  * @param bi The break iterator to use.
       
   471  * @param offset The offset to begin scanning.
       
   472  * @return The text boundary following offset, or UBRK_DONE.
       
   473  * @see ubrk_preceding
       
   474  * @stable ICU 2.0
       
   475  */
       
   476 U_STABLE int32_t U_EXPORT2
       
   477 ubrk_following(UBreakIterator *bi,
       
   478            int32_t offset);
       
   479 
       
   480 /**
       
   481 * Get a locale for which text breaking information is available.
       
   482 * A UBreakIterator in a locale returned by this function will perform the correct
       
   483 * text breaking for the locale.
       
   484 * @param index The index of the desired locale.
       
   485 * @return A locale for which number text breaking information is available, or 0 if none.
       
   486 * @see ubrk_countAvailable
       
   487 * @stable ICU 2.0
       
   488 */
       
   489 U_STABLE const char* U_EXPORT2
       
   490 ubrk_getAvailable(int32_t index);
       
   491 
       
   492 /**
       
   493 * Determine how many locales have text breaking information available.
       
   494 * This function is most useful as determining the loop ending condition for
       
   495 * calls to \ref ubrk_getAvailable.
       
   496 * @return The number of locales for which text breaking information is available.
       
   497 * @see ubrk_getAvailable
       
   498 * @stable ICU 2.0
       
   499 */
       
   500 U_STABLE int32_t U_EXPORT2
       
   501 ubrk_countAvailable(void);
       
   502 
       
   503 
       
   504 /**
       
   505 * Returns true if the specfied position is a boundary position.  As a side
       
   506 * effect, leaves the iterator pointing to the first boundary position at
       
   507 * or after "offset".
       
   508 * @param bi The break iterator to use.
       
   509 * @param offset the offset to check.
       
   510 * @return True if "offset" is a boundary position.
       
   511 * @stable ICU 2.0
       
   512 */
       
   513 U_STABLE  UBool U_EXPORT2
       
   514 ubrk_isBoundary(UBreakIterator *bi, int32_t offset);
       
   515 
       
   516 /**
       
   517  * Return the status from the break rule that determined the most recently
       
   518  * returned break position.  The values appear in the rule source
       
   519  * within brackets, {123}, for example.  For rules that do not specify a
       
   520  * status, a default value of 0 is returned.
       
   521  * <p>
       
   522  * For word break iterators, the possible values are defined in enum UWordBreak.
       
   523  * @stable ICU 2.2
       
   524  */
       
   525 U_STABLE  int32_t U_EXPORT2
       
   526 ubrk_getRuleStatus(UBreakIterator *bi);
       
   527 
       
   528 /**
       
   529  * Get the statuses from the break rules that determined the most recently
       
   530  * returned break position.  The values appear in the rule source
       
   531  * within brackets, {123}, for example.  The default status value for rules
       
   532  * that do not explicitly provide one is zero.
       
   533  * <p>
       
   534  * For word break iterators, the possible values are defined in enum UWordBreak.
       
   535  * @param bi        The break iterator to use
       
   536  * @param fillInVec an array to be filled in with the status values.  
       
   537  * @param capacity  the length of the supplied vector.  A length of zero causes
       
   538  *                  the function to return the number of status values, in the
       
   539  *                  normal way, without attemtping to store any values.
       
   540  * @param status    receives error codes.  
       
   541  * @return          The number of rule status values from rules that determined 
       
   542  *                  the most recent boundary returned by the break iterator.
       
   543  * @draft ICU 3.0
       
   544  */
       
   545 U_DRAFT  int32_t U_EXPORT2
       
   546 ubrk_getRuleStatusVec(UBreakIterator *bi, int32_t *fillInVec, int32_t capacity, UErrorCode *status);
       
   547 
       
   548 /**
       
   549  * Return the locale of the break iterator. You can choose between the valid and
       
   550  * the actual locale.
       
   551  * @param bi break iterator
       
   552  * @param type locale type (valid or actual)
       
   553  * @param status error code
       
   554  * @return locale string
       
   555  * @draft ICU 2.8 likely to change in ICU 3.0, based on feedback
       
   556  */
       
   557 U_DRAFT const char* U_EXPORT2
       
   558 ubrk_getLocaleByType(const UBreakIterator *bi, ULocDataLocaleType type, UErrorCode* status);
       
   559 
       
   560 
       
   561 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
       
   562 
       
   563 #endif