|
1 """ |
|
2 Python 'utf-32' Codec |
|
3 """ |
|
4 import codecs, sys |
|
5 |
|
6 ### Codec APIs |
|
7 |
|
8 encode = codecs.utf_32_encode |
|
9 |
|
10 def decode(input, errors='strict'): |
|
11 return codecs.utf_32_decode(input, errors, True) |
|
12 |
|
13 class IncrementalEncoder(codecs.IncrementalEncoder): |
|
14 def __init__(self, errors='strict'): |
|
15 codecs.IncrementalEncoder.__init__(self, errors) |
|
16 self.encoder = None |
|
17 |
|
18 def encode(self, input, final=False): |
|
19 if self.encoder is None: |
|
20 result = codecs.utf_32_encode(input, self.errors)[0] |
|
21 if sys.byteorder == 'little': |
|
22 self.encoder = codecs.utf_32_le_encode |
|
23 else: |
|
24 self.encoder = codecs.utf_32_be_encode |
|
25 return result |
|
26 return self.encoder(input, self.errors)[0] |
|
27 |
|
28 def reset(self): |
|
29 codecs.IncrementalEncoder.reset(self) |
|
30 self.encoder = None |
|
31 |
|
32 def getstate(self): |
|
33 # state info we return to the caller: |
|
34 # 0: stream is in natural order for this platform |
|
35 # 2: endianness hasn't been determined yet |
|
36 # (we're never writing in unnatural order) |
|
37 return (2 if self.encoder is None else 0) |
|
38 |
|
39 def setstate(self, state): |
|
40 if state: |
|
41 self.encoder = None |
|
42 else: |
|
43 if sys.byteorder == 'little': |
|
44 self.encoder = codecs.utf_32_le_encode |
|
45 else: |
|
46 self.encoder = codecs.utf_32_be_encode |
|
47 |
|
48 class IncrementalDecoder(codecs.BufferedIncrementalDecoder): |
|
49 def __init__(self, errors='strict'): |
|
50 codecs.BufferedIncrementalDecoder.__init__(self, errors) |
|
51 self.decoder = None |
|
52 |
|
53 def _buffer_decode(self, input, errors, final): |
|
54 if self.decoder is None: |
|
55 (output, consumed, byteorder) = \ |
|
56 codecs.utf_32_ex_decode(input, errors, 0, final) |
|
57 if byteorder == -1: |
|
58 self.decoder = codecs.utf_32_le_decode |
|
59 elif byteorder == 1: |
|
60 self.decoder = codecs.utf_32_be_decode |
|
61 elif consumed >= 4: |
|
62 raise UnicodeError("UTF-32 stream does not start with BOM") |
|
63 return (output, consumed) |
|
64 return self.decoder(input, self.errors, final) |
|
65 |
|
66 def reset(self): |
|
67 codecs.BufferedIncrementalDecoder.reset(self) |
|
68 self.decoder = None |
|
69 |
|
70 def getstate(self): |
|
71 # additonal state info from the base class must be None here, |
|
72 # as it isn't passed along to the caller |
|
73 state = codecs.BufferedIncrementalDecoder.getstate(self)[0] |
|
74 # additional state info we pass to the caller: |
|
75 # 0: stream is in natural order for this platform |
|
76 # 1: stream is in unnatural order |
|
77 # 2: endianness hasn't been determined yet |
|
78 if self.decoder is None: |
|
79 return (state, 2) |
|
80 addstate = int((sys.byteorder == "big") != |
|
81 (self.decoder is codecs.utf_32_be_decode)) |
|
82 return (state, addstate) |
|
83 |
|
84 def setstate(self, state): |
|
85 # state[1] will be ignored by BufferedIncrementalDecoder.setstate() |
|
86 codecs.BufferedIncrementalDecoder.setstate(self, state) |
|
87 state = state[1] |
|
88 if state == 0: |
|
89 self.decoder = (codecs.utf_32_be_decode |
|
90 if sys.byteorder == "big" |
|
91 else codecs.utf_32_le_decode) |
|
92 elif state == 1: |
|
93 self.decoder = (codecs.utf_32_le_decode |
|
94 if sys.byteorder == "big" |
|
95 else codecs.utf_32_be_decode) |
|
96 else: |
|
97 self.decoder = None |
|
98 |
|
99 class StreamWriter(codecs.StreamWriter): |
|
100 def __init__(self, stream, errors='strict'): |
|
101 self.bom_written = False |
|
102 codecs.StreamWriter.__init__(self, stream, errors) |
|
103 |
|
104 def encode(self, input, errors='strict'): |
|
105 self.bom_written = True |
|
106 result = codecs.utf_32_encode(input, errors) |
|
107 if sys.byteorder == 'little': |
|
108 self.encode = codecs.utf_32_le_encode |
|
109 else: |
|
110 self.encode = codecs.utf_32_be_encode |
|
111 return result |
|
112 |
|
113 class StreamReader(codecs.StreamReader): |
|
114 |
|
115 def reset(self): |
|
116 codecs.StreamReader.reset(self) |
|
117 try: |
|
118 del self.decode |
|
119 except AttributeError: |
|
120 pass |
|
121 |
|
122 def decode(self, input, errors='strict'): |
|
123 (object, consumed, byteorder) = \ |
|
124 codecs.utf_32_ex_decode(input, errors, 0, False) |
|
125 if byteorder == -1: |
|
126 self.decode = codecs.utf_32_le_decode |
|
127 elif byteorder == 1: |
|
128 self.decode = codecs.utf_32_be_decode |
|
129 elif consumed>=4: |
|
130 raise UnicodeError,"UTF-32 stream does not start with BOM" |
|
131 return (object, consumed) |
|
132 |
|
133 ### encodings module API |
|
134 |
|
135 def getregentry(): |
|
136 return codecs.CodecInfo( |
|
137 name='utf-32', |
|
138 encode=encode, |
|
139 decode=decode, |
|
140 incrementalencoder=IncrementalEncoder, |
|
141 incrementaldecoder=IncrementalDecoder, |
|
142 streamreader=StreamReader, |
|
143 streamwriter=StreamWriter, |
|
144 ) |