|
1 """ Encoding Aliases Support |
|
2 |
|
3 This module is used by the encodings package search function to |
|
4 map encodings names to module names. |
|
5 |
|
6 Note that the search function normalizes the encoding names before |
|
7 doing the lookup, so the mapping will have to map normalized |
|
8 encoding names to module names. |
|
9 |
|
10 Contents: |
|
11 |
|
12 The following aliases dictionary contains mappings of all IANA |
|
13 character set names for which the Python core library provides |
|
14 codecs. In addition to these, a few Python specific codec |
|
15 aliases have also been added. |
|
16 |
|
17 """ |
|
18 aliases = { |
|
19 |
|
20 # Please keep this list sorted alphabetically by value ! |
|
21 |
|
22 # ascii codec |
|
23 '646' : 'ascii', |
|
24 'ansi_x3.4_1968' : 'ascii', |
|
25 'ansi_x3_4_1968' : 'ascii', # some email headers use this non-standard name |
|
26 'ansi_x3.4_1986' : 'ascii', |
|
27 'cp367' : 'ascii', |
|
28 'csascii' : 'ascii', |
|
29 'ibm367' : 'ascii', |
|
30 'iso646_us' : 'ascii', |
|
31 'iso_646.irv_1991' : 'ascii', |
|
32 'iso_ir_6' : 'ascii', |
|
33 'us' : 'ascii', |
|
34 'us_ascii' : 'ascii', |
|
35 |
|
36 # base64_codec codec |
|
37 'base64' : 'base64_codec', |
|
38 'base_64' : 'base64_codec', |
|
39 |
|
40 # big5 codec |
|
41 'big5_tw' : 'big5', |
|
42 'csbig5' : 'big5', |
|
43 |
|
44 # big5hkscs codec |
|
45 'big5_hkscs' : 'big5hkscs', |
|
46 'hkscs' : 'big5hkscs', |
|
47 |
|
48 # bz2_codec codec |
|
49 'bz2' : 'bz2_codec', |
|
50 |
|
51 # cp037 codec |
|
52 '037' : 'cp037', |
|
53 'csibm037' : 'cp037', |
|
54 'ebcdic_cp_ca' : 'cp037', |
|
55 'ebcdic_cp_nl' : 'cp037', |
|
56 'ebcdic_cp_us' : 'cp037', |
|
57 'ebcdic_cp_wt' : 'cp037', |
|
58 'ibm037' : 'cp037', |
|
59 'ibm039' : 'cp037', |
|
60 |
|
61 # cp1026 codec |
|
62 '1026' : 'cp1026', |
|
63 'csibm1026' : 'cp1026', |
|
64 'ibm1026' : 'cp1026', |
|
65 |
|
66 # cp1140 codec |
|
67 '1140' : 'cp1140', |
|
68 'ibm1140' : 'cp1140', |
|
69 |
|
70 # cp1250 codec |
|
71 '1250' : 'cp1250', |
|
72 'windows_1250' : 'cp1250', |
|
73 |
|
74 # cp1251 codec |
|
75 '1251' : 'cp1251', |
|
76 'windows_1251' : 'cp1251', |
|
77 |
|
78 # cp1252 codec |
|
79 '1252' : 'cp1252', |
|
80 'windows_1252' : 'cp1252', |
|
81 |
|
82 # cp1253 codec |
|
83 '1253' : 'cp1253', |
|
84 'windows_1253' : 'cp1253', |
|
85 |
|
86 # cp1254 codec |
|
87 '1254' : 'cp1254', |
|
88 'windows_1254' : 'cp1254', |
|
89 |
|
90 # cp1255 codec |
|
91 '1255' : 'cp1255', |
|
92 'windows_1255' : 'cp1255', |
|
93 |
|
94 # cp1256 codec |
|
95 '1256' : 'cp1256', |
|
96 'windows_1256' : 'cp1256', |
|
97 |
|
98 # cp1257 codec |
|
99 '1257' : 'cp1257', |
|
100 'windows_1257' : 'cp1257', |
|
101 |
|
102 # cp1258 codec |
|
103 '1258' : 'cp1258', |
|
104 'windows_1258' : 'cp1258', |
|
105 |
|
106 # cp424 codec |
|
107 '424' : 'cp424', |
|
108 'csibm424' : 'cp424', |
|
109 'ebcdic_cp_he' : 'cp424', |
|
110 'ibm424' : 'cp424', |
|
111 |
|
112 # cp437 codec |
|
113 '437' : 'cp437', |
|
114 'cspc8codepage437' : 'cp437', |
|
115 'ibm437' : 'cp437', |
|
116 |
|
117 # cp500 codec |
|
118 '500' : 'cp500', |
|
119 'csibm500' : 'cp500', |
|
120 'ebcdic_cp_be' : 'cp500', |
|
121 'ebcdic_cp_ch' : 'cp500', |
|
122 'ibm500' : 'cp500', |
|
123 |
|
124 # cp775 codec |
|
125 '775' : 'cp775', |
|
126 'cspc775baltic' : 'cp775', |
|
127 'ibm775' : 'cp775', |
|
128 |
|
129 # cp850 codec |
|
130 '850' : 'cp850', |
|
131 'cspc850multilingual' : 'cp850', |
|
132 'ibm850' : 'cp850', |
|
133 |
|
134 # cp852 codec |
|
135 '852' : 'cp852', |
|
136 'cspcp852' : 'cp852', |
|
137 'ibm852' : 'cp852', |
|
138 |
|
139 # cp855 codec |
|
140 '855' : 'cp855', |
|
141 'csibm855' : 'cp855', |
|
142 'ibm855' : 'cp855', |
|
143 |
|
144 # cp857 codec |
|
145 '857' : 'cp857', |
|
146 'csibm857' : 'cp857', |
|
147 'ibm857' : 'cp857', |
|
148 |
|
149 # cp860 codec |
|
150 '860' : 'cp860', |
|
151 'csibm860' : 'cp860', |
|
152 'ibm860' : 'cp860', |
|
153 |
|
154 # cp861 codec |
|
155 '861' : 'cp861', |
|
156 'cp_is' : 'cp861', |
|
157 'csibm861' : 'cp861', |
|
158 'ibm861' : 'cp861', |
|
159 |
|
160 # cp862 codec |
|
161 '862' : 'cp862', |
|
162 'cspc862latinhebrew' : 'cp862', |
|
163 'ibm862' : 'cp862', |
|
164 |
|
165 # cp863 codec |
|
166 '863' : 'cp863', |
|
167 'csibm863' : 'cp863', |
|
168 'ibm863' : 'cp863', |
|
169 |
|
170 # cp864 codec |
|
171 '864' : 'cp864', |
|
172 'csibm864' : 'cp864', |
|
173 'ibm864' : 'cp864', |
|
174 |
|
175 # cp865 codec |
|
176 '865' : 'cp865', |
|
177 'csibm865' : 'cp865', |
|
178 'ibm865' : 'cp865', |
|
179 |
|
180 # cp866 codec |
|
181 '866' : 'cp866', |
|
182 'csibm866' : 'cp866', |
|
183 'ibm866' : 'cp866', |
|
184 |
|
185 # cp869 codec |
|
186 '869' : 'cp869', |
|
187 'cp_gr' : 'cp869', |
|
188 'csibm869' : 'cp869', |
|
189 'ibm869' : 'cp869', |
|
190 |
|
191 # cp932 codec |
|
192 '932' : 'cp932', |
|
193 'ms932' : 'cp932', |
|
194 'mskanji' : 'cp932', |
|
195 'ms_kanji' : 'cp932', |
|
196 |
|
197 # cp949 codec |
|
198 '949' : 'cp949', |
|
199 'ms949' : 'cp949', |
|
200 'uhc' : 'cp949', |
|
201 |
|
202 # cp950 codec |
|
203 '950' : 'cp950', |
|
204 'ms950' : 'cp950', |
|
205 |
|
206 # euc_jis_2004 codec |
|
207 'jisx0213' : 'euc_jis_2004', |
|
208 'eucjis2004' : 'euc_jis_2004', |
|
209 'euc_jis2004' : 'euc_jis_2004', |
|
210 |
|
211 # euc_jisx0213 codec |
|
212 'eucjisx0213' : 'euc_jisx0213', |
|
213 |
|
214 # euc_jp codec |
|
215 'eucjp' : 'euc_jp', |
|
216 'ujis' : 'euc_jp', |
|
217 'u_jis' : 'euc_jp', |
|
218 |
|
219 # euc_kr codec |
|
220 'euckr' : 'euc_kr', |
|
221 'korean' : 'euc_kr', |
|
222 'ksc5601' : 'euc_kr', |
|
223 'ks_c_5601' : 'euc_kr', |
|
224 'ks_c_5601_1987' : 'euc_kr', |
|
225 'ksx1001' : 'euc_kr', |
|
226 'ks_x_1001' : 'euc_kr', |
|
227 |
|
228 # gb18030 codec |
|
229 'gb18030_2000' : 'gb18030', |
|
230 |
|
231 # gb2312 codec |
|
232 'chinese' : 'gb2312', |
|
233 'csiso58gb231280' : 'gb2312', |
|
234 'euc_cn' : 'gb2312', |
|
235 'euccn' : 'gb2312', |
|
236 'eucgb2312_cn' : 'gb2312', |
|
237 'gb2312_1980' : 'gb2312', |
|
238 'gb2312_80' : 'gb2312', |
|
239 'iso_ir_58' : 'gb2312', |
|
240 |
|
241 # gbk codec |
|
242 '936' : 'gbk', |
|
243 'cp936' : 'gbk', |
|
244 'ms936' : 'gbk', |
|
245 |
|
246 # hex_codec codec |
|
247 'hex' : 'hex_codec', |
|
248 |
|
249 # hp_roman8 codec |
|
250 'roman8' : 'hp_roman8', |
|
251 'r8' : 'hp_roman8', |
|
252 'csHPRoman8' : 'hp_roman8', |
|
253 |
|
254 # hz codec |
|
255 'hzgb' : 'hz', |
|
256 'hz_gb' : 'hz', |
|
257 'hz_gb_2312' : 'hz', |
|
258 |
|
259 # iso2022_jp codec |
|
260 'csiso2022jp' : 'iso2022_jp', |
|
261 'iso2022jp' : 'iso2022_jp', |
|
262 'iso_2022_jp' : 'iso2022_jp', |
|
263 |
|
264 # iso2022_jp_1 codec |
|
265 'iso2022jp_1' : 'iso2022_jp_1', |
|
266 'iso_2022_jp_1' : 'iso2022_jp_1', |
|
267 |
|
268 # iso2022_jp_2 codec |
|
269 'iso2022jp_2' : 'iso2022_jp_2', |
|
270 'iso_2022_jp_2' : 'iso2022_jp_2', |
|
271 |
|
272 # iso2022_jp_2004 codec |
|
273 'iso_2022_jp_2004' : 'iso2022_jp_2004', |
|
274 'iso2022jp_2004' : 'iso2022_jp_2004', |
|
275 |
|
276 # iso2022_jp_3 codec |
|
277 'iso2022jp_3' : 'iso2022_jp_3', |
|
278 'iso_2022_jp_3' : 'iso2022_jp_3', |
|
279 |
|
280 # iso2022_jp_ext codec |
|
281 'iso2022jp_ext' : 'iso2022_jp_ext', |
|
282 'iso_2022_jp_ext' : 'iso2022_jp_ext', |
|
283 |
|
284 # iso2022_kr codec |
|
285 'csiso2022kr' : 'iso2022_kr', |
|
286 'iso2022kr' : 'iso2022_kr', |
|
287 'iso_2022_kr' : 'iso2022_kr', |
|
288 |
|
289 # iso8859_10 codec |
|
290 'csisolatin6' : 'iso8859_10', |
|
291 'iso_8859_10' : 'iso8859_10', |
|
292 'iso_8859_10_1992' : 'iso8859_10', |
|
293 'iso_ir_157' : 'iso8859_10', |
|
294 'l6' : 'iso8859_10', |
|
295 'latin6' : 'iso8859_10', |
|
296 |
|
297 # iso8859_11 codec |
|
298 'thai' : 'iso8859_11', |
|
299 'iso_8859_11' : 'iso8859_11', |
|
300 'iso_8859_11_2001' : 'iso8859_11', |
|
301 |
|
302 # iso8859_13 codec |
|
303 'iso_8859_13' : 'iso8859_13', |
|
304 'l7' : 'iso8859_13', |
|
305 'latin7' : 'iso8859_13', |
|
306 |
|
307 # iso8859_14 codec |
|
308 'iso_8859_14' : 'iso8859_14', |
|
309 'iso_8859_14_1998' : 'iso8859_14', |
|
310 'iso_celtic' : 'iso8859_14', |
|
311 'iso_ir_199' : 'iso8859_14', |
|
312 'l8' : 'iso8859_14', |
|
313 'latin8' : 'iso8859_14', |
|
314 |
|
315 # iso8859_15 codec |
|
316 'iso_8859_15' : 'iso8859_15', |
|
317 'l9' : 'iso8859_15', |
|
318 'latin9' : 'iso8859_15', |
|
319 |
|
320 # iso8859_16 codec |
|
321 'iso_8859_16' : 'iso8859_16', |
|
322 'iso_8859_16_2001' : 'iso8859_16', |
|
323 'iso_ir_226' : 'iso8859_16', |
|
324 'l10' : 'iso8859_16', |
|
325 'latin10' : 'iso8859_16', |
|
326 |
|
327 # iso8859_2 codec |
|
328 'csisolatin2' : 'iso8859_2', |
|
329 'iso_8859_2' : 'iso8859_2', |
|
330 'iso_8859_2_1987' : 'iso8859_2', |
|
331 'iso_ir_101' : 'iso8859_2', |
|
332 'l2' : 'iso8859_2', |
|
333 'latin2' : 'iso8859_2', |
|
334 |
|
335 # iso8859_3 codec |
|
336 'csisolatin3' : 'iso8859_3', |
|
337 'iso_8859_3' : 'iso8859_3', |
|
338 'iso_8859_3_1988' : 'iso8859_3', |
|
339 'iso_ir_109' : 'iso8859_3', |
|
340 'l3' : 'iso8859_3', |
|
341 'latin3' : 'iso8859_3', |
|
342 |
|
343 # iso8859_4 codec |
|
344 'csisolatin4' : 'iso8859_4', |
|
345 'iso_8859_4' : 'iso8859_4', |
|
346 'iso_8859_4_1988' : 'iso8859_4', |
|
347 'iso_ir_110' : 'iso8859_4', |
|
348 'l4' : 'iso8859_4', |
|
349 'latin4' : 'iso8859_4', |
|
350 |
|
351 # iso8859_5 codec |
|
352 'csisolatincyrillic' : 'iso8859_5', |
|
353 'cyrillic' : 'iso8859_5', |
|
354 'iso_8859_5' : 'iso8859_5', |
|
355 'iso_8859_5_1988' : 'iso8859_5', |
|
356 'iso_ir_144' : 'iso8859_5', |
|
357 |
|
358 # iso8859_6 codec |
|
359 'arabic' : 'iso8859_6', |
|
360 'asmo_708' : 'iso8859_6', |
|
361 'csisolatinarabic' : 'iso8859_6', |
|
362 'ecma_114' : 'iso8859_6', |
|
363 'iso_8859_6' : 'iso8859_6', |
|
364 'iso_8859_6_1987' : 'iso8859_6', |
|
365 'iso_ir_127' : 'iso8859_6', |
|
366 |
|
367 # iso8859_7 codec |
|
368 'csisolatingreek' : 'iso8859_7', |
|
369 'ecma_118' : 'iso8859_7', |
|
370 'elot_928' : 'iso8859_7', |
|
371 'greek' : 'iso8859_7', |
|
372 'greek8' : 'iso8859_7', |
|
373 'iso_8859_7' : 'iso8859_7', |
|
374 'iso_8859_7_1987' : 'iso8859_7', |
|
375 'iso_ir_126' : 'iso8859_7', |
|
376 |
|
377 # iso8859_8 codec |
|
378 'csisolatinhebrew' : 'iso8859_8', |
|
379 'hebrew' : 'iso8859_8', |
|
380 'iso_8859_8' : 'iso8859_8', |
|
381 'iso_8859_8_1988' : 'iso8859_8', |
|
382 'iso_ir_138' : 'iso8859_8', |
|
383 |
|
384 # iso8859_9 codec |
|
385 'csisolatin5' : 'iso8859_9', |
|
386 'iso_8859_9' : 'iso8859_9', |
|
387 'iso_8859_9_1989' : 'iso8859_9', |
|
388 'iso_ir_148' : 'iso8859_9', |
|
389 'l5' : 'iso8859_9', |
|
390 'latin5' : 'iso8859_9', |
|
391 |
|
392 # johab codec |
|
393 'cp1361' : 'johab', |
|
394 'ms1361' : 'johab', |
|
395 |
|
396 # koi8_r codec |
|
397 'cskoi8r' : 'koi8_r', |
|
398 |
|
399 # latin_1 codec |
|
400 # |
|
401 # Note that the latin_1 codec is implemented internally in C and a |
|
402 # lot faster than the charmap codec iso8859_1 which uses the same |
|
403 # encoding. This is why we discourage the use of the iso8859_1 |
|
404 # codec and alias it to latin_1 instead. |
|
405 # |
|
406 '8859' : 'latin_1', |
|
407 'cp819' : 'latin_1', |
|
408 'csisolatin1' : 'latin_1', |
|
409 'ibm819' : 'latin_1', |
|
410 'iso8859' : 'latin_1', |
|
411 'iso8859_1' : 'latin_1', |
|
412 'iso_8859_1' : 'latin_1', |
|
413 'iso_8859_1_1987' : 'latin_1', |
|
414 'iso_ir_100' : 'latin_1', |
|
415 'l1' : 'latin_1', |
|
416 'latin' : 'latin_1', |
|
417 'latin1' : 'latin_1', |
|
418 |
|
419 # mac_cyrillic codec |
|
420 'maccyrillic' : 'mac_cyrillic', |
|
421 |
|
422 # mac_greek codec |
|
423 'macgreek' : 'mac_greek', |
|
424 |
|
425 # mac_iceland codec |
|
426 'maciceland' : 'mac_iceland', |
|
427 |
|
428 # mac_latin2 codec |
|
429 'maccentraleurope' : 'mac_latin2', |
|
430 'maclatin2' : 'mac_latin2', |
|
431 |
|
432 # mac_roman codec |
|
433 'macroman' : 'mac_roman', |
|
434 |
|
435 # mac_turkish codec |
|
436 'macturkish' : 'mac_turkish', |
|
437 |
|
438 # mbcs codec |
|
439 'dbcs' : 'mbcs', |
|
440 |
|
441 # ptcp154 codec |
|
442 'csptcp154' : 'ptcp154', |
|
443 'pt154' : 'ptcp154', |
|
444 'cp154' : 'ptcp154', |
|
445 'cyrillic-asian' : 'ptcp154', |
|
446 |
|
447 # quopri_codec codec |
|
448 'quopri' : 'quopri_codec', |
|
449 'quoted_printable' : 'quopri_codec', |
|
450 'quotedprintable' : 'quopri_codec', |
|
451 |
|
452 # rot_13 codec |
|
453 'rot13' : 'rot_13', |
|
454 |
|
455 # shift_jis codec |
|
456 'csshiftjis' : 'shift_jis', |
|
457 'shiftjis' : 'shift_jis', |
|
458 'sjis' : 'shift_jis', |
|
459 's_jis' : 'shift_jis', |
|
460 |
|
461 # shift_jis_2004 codec |
|
462 'shiftjis2004' : 'shift_jis_2004', |
|
463 'sjis_2004' : 'shift_jis_2004', |
|
464 's_jis_2004' : 'shift_jis_2004', |
|
465 |
|
466 # shift_jisx0213 codec |
|
467 'shiftjisx0213' : 'shift_jisx0213', |
|
468 'sjisx0213' : 'shift_jisx0213', |
|
469 's_jisx0213' : 'shift_jisx0213', |
|
470 |
|
471 # tactis codec |
|
472 'tis260' : 'tactis', |
|
473 |
|
474 # tis_620 codec |
|
475 'tis620' : 'tis_620', |
|
476 'tis_620_0' : 'tis_620', |
|
477 'tis_620_2529_0' : 'tis_620', |
|
478 'tis_620_2529_1' : 'tis_620', |
|
479 'iso_ir_166' : 'tis_620', |
|
480 |
|
481 # utf_16 codec |
|
482 'u16' : 'utf_16', |
|
483 'utf16' : 'utf_16', |
|
484 |
|
485 # utf_16_be codec |
|
486 'unicodebigunmarked' : 'utf_16_be', |
|
487 'utf_16be' : 'utf_16_be', |
|
488 |
|
489 # utf_16_le codec |
|
490 'unicodelittleunmarked' : 'utf_16_le', |
|
491 'utf_16le' : 'utf_16_le', |
|
492 |
|
493 # utf_32 codec |
|
494 'u32' : 'utf_32', |
|
495 'utf32' : 'utf_32', |
|
496 |
|
497 # utf_32_be codec |
|
498 'utf_32be' : 'utf_32_be', |
|
499 |
|
500 # utf_32_le codec |
|
501 'utf_32le' : 'utf_32_le', |
|
502 |
|
503 # utf_7 codec |
|
504 'u7' : 'utf_7', |
|
505 'utf7' : 'utf_7', |
|
506 'unicode_1_1_utf_7' : 'utf_7', |
|
507 |
|
508 # utf_8 codec |
|
509 'u8' : 'utf_8', |
|
510 'utf' : 'utf_8', |
|
511 'utf8' : 'utf_8', |
|
512 'utf8_ucs2' : 'utf_8', |
|
513 'utf8_ucs4' : 'utf_8', |
|
514 |
|
515 # uu_codec codec |
|
516 'uu' : 'uu_codec', |
|
517 |
|
518 # zlib_codec codec |
|
519 'zip' : 'zlib_codec', |
|
520 'zlib' : 'zlib_codec', |
|
521 |
|
522 } |