fontservices/textshaperplugin/IcuSource/common/unicode/uidna.h
changeset 0 1fb32624e06b
equal deleted inserted replaced
-1:000000000000 0:1fb32624e06b
       
     1 /*
       
     2  *******************************************************************************
       
     3  *
       
     4  *   Copyright (C) 2003-2005, International Business Machines
       
     5  *   Corporation and others.  All Rights Reserved.
       
     6  *
       
     7  *******************************************************************************
       
     8  *   file name:  uidna.h
       
     9  *   encoding:   US-ASCII
       
    10  *   tab size:   8 (not used)
       
    11  *   indentation:4
       
    12  *
       
    13  *   created on: 2003feb1
       
    14  *   created by: Ram Viswanadha
       
    15  */
       
    16 
       
    17 #ifndef __UIDNA_H__
       
    18 #define __UIDNA_H__
       
    19 
       
    20 #include "unicode/utypes.h"
       
    21 
       
    22 #if !UCONFIG_NO_IDNA
       
    23 
       
    24 #include "unicode/parseerr.h"
       
    25   
       
    26 /**
       
    27  * \file
       
    28  * \brief C API: Internationalized Domain Names in Applications Tranformation
       
    29  *
       
    30  * UIDNA API implements the IDNA protocol as defined in the IDNA RFC 
       
    31  * (http://www.ietf.org/rfc/rfc3490.txt).
       
    32  * The RFC defines 2 operations: ToASCII and ToUnicode. Domain labels 
       
    33  * containing non-ASCII code points are required to be processed by
       
    34  * ToASCII operation before passing it to resolver libraries. Domain names
       
    35  * that are obtained from resolver libraries are required to be processed by
       
    36  * ToUnicode operation before displaying the domain name to the user.
       
    37  * IDNA requires that implementations process input strings with Nameprep
       
    38  * (http://www.ietf.org/rfc/rfc3491.txt), 
       
    39  * which is a profile of Stringprep (http://www.ietf.org/rfc/rfc3454.txt), 
       
    40  * and then with Punycode (http://www.ietf.org/rfc/rfc3492.txt). 
       
    41  * Implementations of IDNA MUST fully implement Nameprep and Punycode; 
       
    42  * neither Nameprep nor Punycode are optional.
       
    43  * The input and output of ToASCII and ToUnicode operations are Unicode 
       
    44  * and are designed to be chainable, i.e., applying ToASCII or ToUnicode operations
       
    45  * multiple times to an input string will yield the same result as applying the operation
       
    46  * once.
       
    47  * ToUnicode(ToUnicode(ToUnicode...(ToUnicode(string)))) == ToUnicode(string) 
       
    48  * ToASCII(ToASCII(ToASCII...(ToASCII(string))) == ToASCII(string).
       
    49  *
       
    50  */
       
    51 
       
    52 #ifndef U_HIDE_DRAFT_API
       
    53 
       
    54 /** 
       
    55  * Option to prohibit processing of unassigned codepoints in the input and
       
    56  * do not check if the input conforms to STD-3 ASCII rules.
       
    57  * 
       
    58  * @see  uidna_toASCII uidna_toUnicode
       
    59  * @stable ICU 2.6
       
    60  */
       
    61 #define UIDNA_DEFAULT          0x0000
       
    62 /** 
       
    63  * Option to allow processing of unassigned codepoints in the input
       
    64  * 
       
    65  * @see  uidna_toASCII uidna_toUnicode
       
    66  * @stable ICU 2.6
       
    67  */
       
    68 #define UIDNA_ALLOW_UNASSIGNED 0x0001
       
    69 /** 
       
    70  * Option to check if input conforms to STD-3 ASCII rules
       
    71  * 
       
    72  * @see  uidna_toASCII uidna_toUnicode
       
    73  * @stable ICU 2.6
       
    74  */
       
    75 #define UIDNA_USE_STD3_RULES   0x0002
       
    76 
       
    77 #endif /*U_HIDE_DRAFT_API*/
       
    78     
       
    79 /**
       
    80  * This function implements the ToASCII operation as defined in the IDNA RFC.
       
    81  * This operation is done on <b>single labels</b> before sending it to something that expects
       
    82  * ASCII names. A label is an individual part of a domain name. Labels are usually
       
    83  * separated by dots; e.g." "www.example.com" is composed of 3 labels 
       
    84  * "www","example", and "com".
       
    85  *
       
    86  *
       
    87  * @param src               Input UChar array containing label in Unicode.
       
    88  * @param srcLength         Number of UChars in src, or -1 if NUL-terminated.
       
    89  * @param dest              Output UChar array with ASCII (ACE encoded) label.
       
    90  * @param destCapacity      Size of dest.
       
    91  * @param options           A bit set of options:
       
    92  *
       
    93  *  - UIDNA_DEFAULT             Use default options, i.e., do not process unassigned code points
       
    94  *                              and do not use STD3 ASCII rules
       
    95  *                              If unassigned code points are found the operation fails with 
       
    96  *                              U_UNASSIGNED_ERROR error code.
       
    97  *
       
    98  *  - UIDNA_ALLOW_UNASSIGNED    Unassigned values can be converted to ASCII for query operations
       
    99  *                              If this option is set, the unassigned code points are in the input 
       
   100  *                              are treated as normal Unicode code points.
       
   101  *                          
       
   102  *  - UIDNA_USE_STD3_RULES      Use STD3 ASCII rules for host name syntax restrictions
       
   103  *                              If this option is set and the input does not satisfy STD3 rules,  
       
   104  *                              the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
       
   105  *
       
   106  * @param parseError        Pointer to UParseError struct to receive information on position 
       
   107  *                          of error if an error is encountered. Can be NULL.
       
   108  * @param status            ICU in/out error code parameter.
       
   109  *                          U_INVALID_CHAR_FOUND if src contains
       
   110  *                          unmatched single surrogates.
       
   111  *                          U_INDEX_OUTOFBOUNDS_ERROR if src contains
       
   112  *                          too many code points.
       
   113  *                          U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
       
   114  * @return                  Number of ASCII characters converted.
       
   115  * @stable ICU 2.6
       
   116  */
       
   117 U_STABLE int32_t U_EXPORT2
       
   118 uidna_toASCII(const UChar* src, int32_t srcLength, 
       
   119               UChar* dest, int32_t destCapacity,
       
   120               int32_t options,
       
   121               UParseError* parseError,
       
   122               UErrorCode* status);
       
   123 
       
   124 
       
   125 /**
       
   126  * This function implements the ToUnicode operation as defined in the IDNA RFC.
       
   127  * This operation is done on <b>single labels</b> before sending it to something that expects
       
   128  * Unicode names. A label is an individual part of a domain name. Labels are usually
       
   129  * separated by dots; for e.g." "www.example.com" is composed of 3 labels 
       
   130  * "www","example", and "com".
       
   131  *
       
   132  * @param src               Input UChar array containing ASCII (ACE encoded) label.
       
   133  * @param srcLength         Number of UChars in src, or -1 if NUL-terminated.
       
   134  * @param dest Output       Converted UChar array containing Unicode equivalent of label.
       
   135  * @param destCapacity      Size of dest.
       
   136  * @param options           A bit set of options:
       
   137  *  
       
   138  *  - UIDNA_DEFAULT             Use default options, i.e., do not process unassigned code points
       
   139  *                              and do not use STD3 ASCII rules
       
   140  *                              If unassigned code points are found the operation fails with 
       
   141  *                              U_UNASSIGNED_ERROR error code.
       
   142  *
       
   143  *  - UIDNA_ALLOW_UNASSIGNED      Unassigned values can be converted to ASCII for query operations
       
   144  *                              If this option is set, the unassigned code points are in the input 
       
   145  *                              are treated as normal Unicode code points. <b> Note: </b> This option is 
       
   146  *                              required on toUnicode operation because the RFC mandates 
       
   147  *                              verification of decoded ACE input by applying toASCII and comparing
       
   148  *                              its output with source
       
   149  *
       
   150  *                          
       
   151  *                          
       
   152  *  - UIDNA_USE_STD3_RULES      Use STD3 ASCII rules for host name syntax restrictions
       
   153  *                              If this option is set and the input does not satisfy STD3 rules,  
       
   154  *                              the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
       
   155  *
       
   156  * @param parseError        Pointer to UParseError struct to receive information on position 
       
   157  *                          of error if an error is encountered. Can be NULL.
       
   158  * @param status            ICU in/out error code parameter.
       
   159  *                          U_INVALID_CHAR_FOUND if src contains
       
   160  *                          unmatched single surrogates.
       
   161  *                          U_INDEX_OUTOFBOUNDS_ERROR if src contains
       
   162  *                          too many code points.
       
   163  *                          U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
       
   164  * @return                  Number of Unicode characters converted.
       
   165  * @stable ICU 2.6
       
   166  */
       
   167 U_STABLE int32_t U_EXPORT2
       
   168 uidna_toUnicode(const UChar* src, int32_t srcLength,
       
   169                 UChar* dest, int32_t destCapacity,
       
   170                 int32_t options,
       
   171                 UParseError* parseError,
       
   172                 UErrorCode* status);
       
   173 
       
   174 
       
   175 /**
       
   176  * Convenience function that implements the IDNToASCII operation as defined in the IDNA RFC.
       
   177  * This operation is done on complete domain names, e.g: "www.example.com". 
       
   178  * It is important to note that this operation can fail. If it fails, then the input 
       
   179  * domain name cannot be used as an Internationalized Domain Name and the application
       
   180  * should have methods defined to deal with the failure.
       
   181  * 
       
   182  * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
       
   183  * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each, 
       
   184  * and then convert. This function does not offer that level of granularity. The options once  
       
   185  * set will apply to all labels in the domain name
       
   186  *
       
   187  * @param src               Input UChar array containing IDN in Unicode.
       
   188  * @param srcLength         Number of UChars in src, or -1 if NUL-terminated.
       
   189  * @param dest              Output UChar array with ASCII (ACE encoded) IDN.
       
   190  * @param destCapacity      Size of dest.
       
   191  * @param options           A bit set of options:
       
   192  *  
       
   193  *  - UIDNA_DEFAULT             Use default options, i.e., do not process unassigned code points
       
   194  *                              and do not use STD3 ASCII rules
       
   195  *                              If unassigned code points are found the operation fails with 
       
   196  *                              U_UNASSIGNED_CODE_POINT_FOUND error code.
       
   197  *
       
   198  *  - UIDNA_ALLOW_UNASSIGNED    Unassigned values can be converted to ASCII for query operations
       
   199  *                              If this option is set, the unassigned code points are in the input 
       
   200  *                              are treated as normal Unicode code points.
       
   201  *                          
       
   202  *  - UIDNA_USE_STD3_RULES      Use STD3 ASCII rules for host name syntax restrictions
       
   203  *                              If this option is set and the input does not satisfy STD3 rules,  
       
   204  *                              the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
       
   205  * 
       
   206  * @param parseError        Pointer to UParseError struct to receive information on position 
       
   207  *                          of error if an error is encountered. Can be NULL.
       
   208  * @param status            ICU in/out error code parameter.
       
   209  *                          U_INVALID_CHAR_FOUND if src contains
       
   210  *                          unmatched single surrogates.
       
   211  *                          U_INDEX_OUTOFBOUNDS_ERROR if src contains
       
   212  *                          too many code points.
       
   213  *                          U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
       
   214  * @return                  Number of ASCII characters converted.
       
   215  * @stable ICU 2.6
       
   216  */
       
   217 U_STABLE int32_t U_EXPORT2
       
   218 uidna_IDNToASCII(  const UChar* src, int32_t srcLength,
       
   219                    UChar* dest, int32_t destCapacity,
       
   220                    int32_t options,
       
   221                    UParseError* parseError,
       
   222                    UErrorCode* status);
       
   223 
       
   224 /**
       
   225  * Convenience function that implements the IDNToUnicode operation as defined in the IDNA RFC.
       
   226  * This operation is done on complete domain names, e.g: "www.example.com". 
       
   227  *
       
   228  * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
       
   229  * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each, 
       
   230  * and then convert. This function does not offer that level of granularity. The options once  
       
   231  * set will apply to all labels in the domain name
       
   232  *
       
   233  * @param src               Input UChar array containing IDN in ASCII (ACE encoded) form.
       
   234  * @param srcLength         Number of UChars in src, or -1 if NUL-terminated.
       
   235  * @param dest Output       UChar array containing Unicode equivalent of source IDN.
       
   236  * @param destCapacity      Size of dest.
       
   237  * @param options           A bit set of options:
       
   238  *  
       
   239  *  - UIDNA_DEFAULT             Use default options, i.e., do not process unassigned code points
       
   240  *                              and do not use STD3 ASCII rules
       
   241  *                              If unassigned code points are found the operation fails with 
       
   242  *                              U_UNASSIGNED_CODE_POINT_FOUND error code.
       
   243  *
       
   244  *  - UIDNA_ALLOW_UNASSIGNED    Unassigned values can be converted to ASCII for query operations
       
   245  *                              If this option is set, the unassigned code points are in the input 
       
   246  *                              are treated as normal Unicode code points.
       
   247  *                          
       
   248  *  - UIDNA_USE_STD3_RULES      Use STD3 ASCII rules for host name syntax restrictions
       
   249  *                              If this option is set and the input does not satisfy STD3 rules,  
       
   250  *                              the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
       
   251  *
       
   252  * @param parseError        Pointer to UParseError struct to receive information on position 
       
   253  *                          of error if an error is encountered. Can be NULL.
       
   254  * @param status            ICU in/out error code parameter.
       
   255  *                          U_INVALID_CHAR_FOUND if src contains
       
   256  *                          unmatched single surrogates.
       
   257  *                          U_INDEX_OUTOFBOUNDS_ERROR if src contains
       
   258  *                          too many code points.
       
   259  *                          U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
       
   260  * @return                  Number of ASCII characters converted.
       
   261  * @stable ICU 2.6
       
   262  */
       
   263 U_STABLE int32_t U_EXPORT2
       
   264 uidna_IDNToUnicode(  const UChar* src, int32_t srcLength,
       
   265                      UChar* dest, int32_t destCapacity,
       
   266                      int32_t options,
       
   267                      UParseError* parseError,
       
   268                      UErrorCode* status);
       
   269 
       
   270 /**
       
   271  * Compare two IDN strings for equivalence.
       
   272  * This function splits the domain names into labels and compares them.
       
   273  * According to IDN RFC, whenever two labels are compared, they are 
       
   274  * considered equal if and only if their ASCII forms (obtained by 
       
   275  * applying toASCII) match using an case-insensitive ASCII comparison.
       
   276  * Two domain names are considered a match if and only if all labels 
       
   277  * match regardless of whether label separators match.
       
   278  *
       
   279  * @param s1                First source string.
       
   280  * @param length1           Length of first source string, or -1 if NUL-terminated.
       
   281  *
       
   282  * @param s2                Second source string.
       
   283  * @param length2           Length of second source string, or -1 if NUL-terminated.
       
   284  * @param options           A bit set of options:
       
   285  *  
       
   286  *  - UIDNA_DEFAULT             Use default options, i.e., do not process unassigned code points
       
   287  *                              and do not use STD3 ASCII rules
       
   288  *                              If unassigned code points are found the operation fails with 
       
   289  *                              U_UNASSIGNED_CODE_POINT_FOUND error code.
       
   290  *
       
   291  *  - UIDNA_ALLOW_UNASSIGNED    Unassigned values can be converted to ASCII for query operations
       
   292  *                              If this option is set, the unassigned code points are in the input 
       
   293  *                              are treated as normal Unicode code points.
       
   294  *                          
       
   295  *  - UIDNA_USE_STD3_RULES      Use STD3 ASCII rules for host name syntax restrictions
       
   296  *                              If this option is set and the input does not satisfy STD3 rules,  
       
   297  *                              the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
       
   298  *
       
   299  * @param status            ICU error code in/out parameter.
       
   300  *                          Must fulfill U_SUCCESS before the function call.
       
   301  * @return <0 or 0 or >0 as usual for string comparisons
       
   302  * @stable ICU 2.6
       
   303  */
       
   304 U_STABLE int32_t U_EXPORT2
       
   305 uidna_compare(  const UChar *s1, int32_t length1,
       
   306                 const UChar *s2, int32_t length2,
       
   307                 int32_t options,
       
   308                 UErrorCode* status);
       
   309 
       
   310 #endif /* #if !UCONFIG_NO_IDNA */
       
   311 
       
   312 #endif