mobilemessaging/smsui/charconvplugin/GSM0338.TXT
changeset 0 72b543305e3a
equal deleted inserted replaced
-1:000000000000 0:72b543305e3a
       
     1 #
       
     2 #	Name:             GSM 03.38 to Unicode
       
     3 #	Unicode version:  3.0
       
     4 #	Table version:    1.1
       
     5 #	Table format:     Format A
       
     6 #	Date:             2000 May 30
       
     7 #	Authors:          Ken Whistler <kenw@sybase.com>,
       
     8 #                         Kent Karlsson <keka@im.se>,
       
     9 #                         Markus Kuhn <mkuhn@acm.org>
       
    10 #
       
    11 #	Copyright (c) 2000 Unicode, Inc.  All Rights reserved.
       
    12 #
       
    13 #	This file is provided as-is by Unicode, Inc. (The Unicode Consortium).
       
    14 #	No claims are made as to fitness for any particular purpose.  No
       
    15 #	warranties of any kind are expressed or implied.  The recipient
       
    16 #	agrees to determine applicability of information provided.  If this
       
    17 #	file has been provided on optical media by Unicode, Inc., the sole
       
    18 #	remedy for any claim will be exchange of defective media within 90
       
    19 #	days of receipt.
       
    20 #
       
    21 #	Unicode, Inc. hereby grants the right to freely use the information
       
    22 #	supplied in this file in the creation of products supporting the
       
    23 #	Unicode Standard, and to make copies of this file in any form for
       
    24 #	internal or external distribution as long as this notice remains
       
    25 #	attached.
       
    26 #
       
    27 #	General notes:
       
    28 #
       
    29 #	This table contains the data the Unicode Consortium has on how
       
    30 #       ETSI GSM 03.38 7-bit default alphabet characters map into Unicode.
       
    31 #	This mapping is based on ETSI TS 100 900 V7.2.0 (1999-07), with
       
    32 #	a correction of 0x09 to *small* c-cedilla, instead of *capital*
       
    33 #	C-cedilla.
       
    34 #
       
    35 #	Format:  Three tab-separated columns
       
    36 #		 Column #1 is the ETSI GSM 03.38 7-bit default alphabet 
       
    37 #                             code (in hex as 0xXX, or 0xXXXX for double-byte
       
    38 #                             sequences)
       
    39 #		 Column #2 is the Unicode scalar value (in hex as 0xXXXX)
       
    40 #		 Column #3 the Unicode name (follows a comment sign, '#')
       
    41 #
       
    42 #	The entries are in ETSI GSM 03.38 7-bit default alphabet code order.
       
    43 #
       
    44 #       Note that ETSI GSM 03.38 also allows for the use of UCS-2 (UTF-16
       
    45 #       restricted to the BMP) in GSM/SMS messages.
       
    46 #
       
    47 #	Note also that there are commented Greek mappings for some 
       
    48 #	capital Latin characters. This follows from the clear intent
       
    49 #	of the ETSI GSM 03.38 to have glyph coverage for the uppercase
       
    50 #	Greek alphabet by reusing Latin letters that have the same 
       
    51 #	form as an uppercase Greek letter. Conversion implementations 
       
    52 #	should be aware of this fact.
       
    53 #
       
    54 #       The ETSI GSM 03.38 specification shows an uppercase C-cedilla
       
    55 #       glyph at 0x09. This may be the result of limited display
       
    56 #       capabilities for handling characters with descenders. However, the
       
    57 #       language coverage intent is clearly for the lowercase c-cedilla, as shown
       
    58 #       in the mapping below. The mapping for uppercase C-cedilla is shown
       
    59 #       in a commented line in the mapping table.
       
    60 #
       
    61 #	The ESC character 0x1B is
       
    62 #	mapped to the no-break space character, unless it is part of a
       
    63 #	valid ESC sequence, to facilitate round-trip compatibility in
       
    64 #	the presence of unknown ESC sequences.
       
    65 #
       
    66 #	0x00 is NULL (when followed only by 0x00 up to the
       
    67 #	end of (fixed byte length) message, possibly also up to
       
    68 #	FORM FEED.  But 0x00 is also the code for COMMERCIAL AT
       
    69 #	when some other character (CARRIAGE RETURN if nothing else)
       
    70 #	comes after the 0x00.
       
    71 #
       
    72 #	Version history
       
    73 #	1.0 version: first creation
       
    74 #	1.1 version: fixed problem with the wrong line being a comment,
       
    75 #			added text regarding 0x00's interpretation,
       
    76 #                       added second mapping for C-cedilla,
       
    77 #                       added mapping of 0x1B escape to NBSP for display.
       
    78 #
       
    79 #	Updated versions of this file may be found in:
       
    80 #		<ftp://ftp.unicode.org/Public/MAPPINGS/>
       
    81 #
       
    82 #	Any comments or problems, contact <errata@unicode.org>
       
    83 #	Please note that <errata@unicode.org> is an archival address;
       
    84 #	notices will be checked, but do not expect an immediate response.
       
    85 #
       
    86 0x00	0x0040	#	COMMERCIAL AT
       
    87 #0x00	0x0000	#	NULL (see note above)
       
    88 0x01	0x00A3	#	POUND SIGN
       
    89 0x02	0x0024	#	DOLLAR SIGN
       
    90 0x03	0x00A5	#	YEN SIGN
       
    91 0x04	0x00E8	#	LATIN SMALL LETTER E WITH GRAVE
       
    92 0x05	0x00E9	#	LATIN SMALL LETTER E WITH ACUTE
       
    93 0x06	0x00F9	#	LATIN SMALL LETTER U WITH GRAVE
       
    94 0x07	0x00EC	#	LATIN SMALL LETTER I WITH GRAVE
       
    95 0x08	0x00F2	#	LATIN SMALL LETTER O WITH GRAVE
       
    96 0x09	0x00E7	#	LATIN SMALL LETTER C WITH CEDILLA
       
    97 #0x09	0x00C7	#	LATIN CAPITAL LETTER C WITH CEDILLA (see note above)
       
    98 0x0A	0x000A	#	LINE FEED
       
    99 0x0B	0x00D8	#	LATIN CAPITAL LETTER O WITH STROKE
       
   100 0x0C	0x00F8	#	LATIN SMALL LETTER O WITH STROKE
       
   101 0x0D	0x000D	#	CARRIAGE RETURN
       
   102 0x0E	0x00C5	#	LATIN CAPITAL LETTER A WITH RING ABOVE
       
   103 0x0F	0x00E5	#	LATIN SMALL LETTER A WITH RING ABOVE
       
   104 0x10	0x0394	#	GREEK CAPITAL LETTER DELTA
       
   105 0x11	0x005F	#	LOW LINE
       
   106 0x12	0x03A6	#	GREEK CAPITAL LETTER PHI
       
   107 0x13	0x0393	#	GREEK CAPITAL LETTER GAMMA
       
   108 0x14	0x039B	#	GREEK CAPITAL LETTER LAMDA
       
   109 0x15	0x03A9	#	GREEK CAPITAL LETTER OMEGA
       
   110 0x16	0x03A0	#	GREEK CAPITAL LETTER PI
       
   111 0x17	0x03A8	#	GREEK CAPITAL LETTER PSI
       
   112 0x18	0x03A3	#	GREEK CAPITAL LETTER SIGMA
       
   113 0x19	0x0398	#	GREEK CAPITAL LETTER THETA
       
   114 0x1A	0x039E	#	GREEK CAPITAL LETTER XI
       
   115 #0x1B	0x00A0	#	ESCAPE TO EXTENSION TABLE (or displayed as NBSP, see note above)
       
   116 0x1B0A	0x000C	#	FORM FEED
       
   117 0x1B14	0x005E	#	CIRCUMFLEX ACCENT
       
   118 0x1B28	0x007B	#	LEFT CURLY BRACKET
       
   119 0x1B29	0x007D	#	RIGHT CURLY BRACKET
       
   120 0x1B2F	0x005C	#	REVERSE SOLIDUS
       
   121 0x1B3C	0x005B	#	LEFT SQUARE BRACKET
       
   122 0x1B3D	0x007E	#	TILDE
       
   123 0x1B3E	0x005D	#	RIGHT SQUARE BRACKET
       
   124 0x1B40	0x007C	#	VERTICAL LINE
       
   125 0x1B65	0x20AC	#	EURO SIGN
       
   126 0x1C	0x00C6	#	LATIN CAPITAL LETTER AE
       
   127 0x1D	0x00E6	#	LATIN SMALL LETTER AE
       
   128 0x1E	0x00DF	#	LATIN SMALL LETTER SHARP S (German)
       
   129 0x1F	0x00C9	#	LATIN CAPITAL LETTER E WITH ACUTE
       
   130 0x20	0x0020	#	SPACE
       
   131 0x21	0x0021	#	EXCLAMATION MARK
       
   132 0x22	0x0022	#	QUOTATION MARK
       
   133 0x23	0x0023	#	NUMBER SIGN
       
   134 0x24	0x00A4	#	CURRENCY SIGN
       
   135 0x25	0x0025	#	PERCENT SIGN
       
   136 0x26	0x0026	#	AMPERSAND
       
   137 0x27	0x0027	#	APOSTROPHE
       
   138 0x28	0x0028	#	LEFT PARENTHESIS
       
   139 0x29	0x0029	#	RIGHT PARENTHESIS
       
   140 0x2A	0x002A	#	ASTERISK
       
   141 0x2B	0x002B	#	PLUS SIGN
       
   142 0x2C	0x002C	#	COMMA
       
   143 0x2D	0x002D	#	HYPHEN-MINUS
       
   144 0x2E	0x002E	#	FULL STOP
       
   145 0x2F	0x002F	#	SOLIDUS
       
   146 0x30	0x0030	#	DIGIT ZERO
       
   147 0x31	0x0031	#	DIGIT ONE
       
   148 0x32	0x0032	#	DIGIT TWO
       
   149 0x33	0x0033	#	DIGIT THREE
       
   150 0x34	0x0034	#	DIGIT FOUR
       
   151 0x35	0x0035	#	DIGIT FIVE
       
   152 0x36	0x0036	#	DIGIT SIX
       
   153 0x37	0x0037	#	DIGIT SEVEN
       
   154 0x38	0x0038	#	DIGIT EIGHT
       
   155 0x39	0x0039	#	DIGIT NINE
       
   156 0x3A	0x003A	#	COLON
       
   157 0x3B	0x003B	#	SEMICOLON
       
   158 0x3C	0x003C	#	LESS-THAN SIGN
       
   159 0x3D	0x003D	#	EQUALS SIGN
       
   160 0x3E	0x003E	#	GREATER-THAN SIGN
       
   161 0x3F	0x003F	#	QUESTION MARK
       
   162 0x40	0x00A1	#	INVERTED EXCLAMATION MARK
       
   163 0x41	0x0041	#	LATIN CAPITAL LETTER A
       
   164 #0x41	0x0391	#	GREEK CAPITAL LETTER ALPHA
       
   165 0x42	0x0042	#	LATIN CAPITAL LETTER B
       
   166 #0x42	0x0392	#	GREEK CAPITAL LETTER BETA
       
   167 0x43	0x0043	#	LATIN CAPITAL LETTER C
       
   168 0x44	0x0044	#	LATIN CAPITAL LETTER D
       
   169 0x45	0x0045	#	LATIN CAPITAL LETTER E
       
   170 #0x45	0x0395	#	GREEK CAPITAL LETTER EPSILON
       
   171 0x46	0x0046	#	LATIN CAPITAL LETTER F
       
   172 0x47	0x0047	#	LATIN CAPITAL LETTER G
       
   173 0x48	0x0048	#	LATIN CAPITAL LETTER H
       
   174 #0x48	0x0397	#	GREEK CAPITAL LETTER ETA
       
   175 0x49	0x0049	#	LATIN CAPITAL LETTER I
       
   176 #0x49	0x0399	#	GREEK CAPITAL LETTER IOTA
       
   177 0x4A	0x004A	#	LATIN CAPITAL LETTER J
       
   178 0x4B	0x004B	#	LATIN CAPITAL LETTER K
       
   179 #0x4B	0x039A	#	GREEK CAPITAL LETTER KAPPA
       
   180 0x4C	0x004C	#	LATIN CAPITAL LETTER L
       
   181 0x4D	0x004D	#	LATIN CAPITAL LETTER M
       
   182 #0x4D	0x039C	#	GREEK CAPITAL LETTER MU
       
   183 0x4E	0x004E	#	LATIN CAPITAL LETTER N
       
   184 #0x4E	0x039D	#	GREEK CAPITAL LETTER NU
       
   185 0x4F	0x004F	#	LATIN CAPITAL LETTER O
       
   186 #0x4F	0x039F	#	GREEK CAPITAL LETTER OMICRON
       
   187 0x50	0x0050	#	LATIN CAPITAL LETTER P
       
   188 #0x50	0x03A1	#	GREEK CAPITAL LETTER RHO
       
   189 0x51	0x0051	#	LATIN CAPITAL LETTER Q
       
   190 0x52	0x0052	#	LATIN CAPITAL LETTER R
       
   191 0x53	0x0053	#	LATIN CAPITAL LETTER S
       
   192 0x54	0x0054	#	LATIN CAPITAL LETTER T
       
   193 #0x54	0x03A4	#	GREEK CAPITAL LETTER TAU
       
   194 0x55	0x0055	#	LATIN CAPITAL LETTER U
       
   195 #0x55	0x03A5	#	GREEK CAPITAL LETTER UPSILON
       
   196 0x56	0x0056	#	LATIN CAPITAL LETTER V
       
   197 0x57	0x0057	#	LATIN CAPITAL LETTER W
       
   198 0x58	0x0058	#	LATIN CAPITAL LETTER X
       
   199 #0x58	0x03A7	#	GREEK CAPITAL LETTER CHI
       
   200 0x59	0x0059	#	LATIN CAPITAL LETTER Y
       
   201 0x5A	0x005A	#	LATIN CAPITAL LETTER Z
       
   202 #0x5A	0x0396	#	GREEK CAPITAL LETTER ZETA
       
   203 0x5B	0x00C4	#	LATIN CAPITAL LETTER A WITH DIAERESIS
       
   204 0x5C	0x00D6	#	LATIN CAPITAL LETTER O WITH DIAERESIS
       
   205 0x5D	0x00D1	#	LATIN CAPITAL LETTER N WITH TILDE
       
   206 0x5E	0x00DC	#	LATIN CAPITAL LETTER U WITH DIAERESIS
       
   207 0x5F	0x00A7	#	SECTION SIGN
       
   208 0x60	0x00BF	#	INVERTED QUESTION MARK
       
   209 0x61	0x0061	#	LATIN SMALL LETTER A
       
   210 0x62	0x0062	#	LATIN SMALL LETTER B
       
   211 0x63	0x0063	#	LATIN SMALL LETTER C
       
   212 0x64	0x0064	#	LATIN SMALL LETTER D
       
   213 0x65	0x0065	#	LATIN SMALL LETTER E
       
   214 0x66	0x0066	#	LATIN SMALL LETTER F
       
   215 0x67	0x0067	#	LATIN SMALL LETTER G
       
   216 0x68	0x0068	#	LATIN SMALL LETTER H
       
   217 0x69	0x0069	#	LATIN SMALL LETTER I
       
   218 0x6A	0x006A	#	LATIN SMALL LETTER J
       
   219 0x6B	0x006B	#	LATIN SMALL LETTER K
       
   220 0x6C	0x006C	#	LATIN SMALL LETTER L
       
   221 0x6D	0x006D	#	LATIN SMALL LETTER M
       
   222 0x6E	0x006E	#	LATIN SMALL LETTER N
       
   223 0x6F	0x006F	#	LATIN SMALL LETTER O
       
   224 0x70	0x0070	#	LATIN SMALL LETTER P
       
   225 0x71	0x0071	#	LATIN SMALL LETTER Q
       
   226 0x72	0x0072	#	LATIN SMALL LETTER R
       
   227 0x73	0x0073	#	LATIN SMALL LETTER S
       
   228 0x74	0x0074	#	LATIN SMALL LETTER T
       
   229 0x75	0x0075	#	LATIN SMALL LETTER U
       
   230 0x76	0x0076	#	LATIN SMALL LETTER V
       
   231 0x77	0x0077	#	LATIN SMALL LETTER W
       
   232 0x78	0x0078	#	LATIN SMALL LETTER X
       
   233 0x79	0x0079	#	LATIN SMALL LETTER Y
       
   234 0x7A	0x007A	#	LATIN SMALL LETTER Z
       
   235 0x7B	0x00E4	#	LATIN SMALL LETTER A WITH DIAERESIS
       
   236 0x7C	0x00F6	#	LATIN SMALL LETTER O WITH DIAERESIS
       
   237 0x7D	0x00F1	#	LATIN SMALL LETTER N WITH TILDE
       
   238 0x7E	0x00FC	#	LATIN SMALL LETTER U WITH DIAERESIS
       
   239 0x7F	0x00E0	#	LATIN SMALL LETTER A WITH GRAVE