|
1 /*! \file translator.cpp |
|
2 * \brief Implementation of generally used translator methods. |
|
3 * |
|
4 * This file contains implementation of the translator methods that |
|
5 * are not expected to be reimplemented by derived translator classes. |
|
6 * It also contains static data tables used by the methods. |
|
7 * |
|
8 */ |
|
9 #include "translator.h" |
|
10 |
|
11 /*! The translation table used by Win1250ToISO88592() method. */ |
|
12 const char Translator::Win1250ToISO88592Tab[] = |
|
13 { |
|
14 '\x80', '\x81', '\x82', '\x83', '\x84', '\x85', '\x86', '\x87', |
|
15 '\x88', '\x89', '\xA9', '\x8B', '\xA6', '\xAB', '\xAE', '\xAC', |
|
16 '\x90', '\x91', '\x92', '\x93', '\x94', '\x2E', '\x96', '\x97', |
|
17 '\x98', '\x99', '\xB9', '\x9B', '\xB6', '\xBB', '\xBE', '\xBC', |
|
18 '\xA0', '\x20', '\x20', '\xA3', '\xA4', '\xA1', '\xA6', '\xA7', |
|
19 '\x22', '\xA9', '\xAA', '\x3C', '\xAC', '\x2D', '\xAE', '\xAF', |
|
20 '\x2E', '\x2B', '\x20', '\xB3', '\x27', '\x75', '\xB6', '\xB7', |
|
21 '\x20', '\xB1', '\xBA', '\x3E', '\xA5', '\x22', '\xB5', '\xBF', |
|
22 '\xC0', '\xC1', '\xC2', '\xC3', '\xC4', '\xC5', '\xC6', '\xC7', |
|
23 '\xC8', '\xC9', '\xCA', '\xCB', '\xCC', '\xCD', '\xCE', '\xCF', |
|
24 '\xD0', '\xD1', '\xD2', '\xD3', '\xD4', '\xD5', '\xD6', '\xD7', |
|
25 '\xD8', '\xD9', '\xDA', '\xDB', '\xDC', '\xDD', '\xDE', '\xDF', |
|
26 '\xE0', '\xE1', '\xE2', '\xE3', '\xE4', '\xE5', '\xE6', '\xE7', |
|
27 '\xE8', '\xE9', '\xEA', '\xEB', '\xEC', '\xED', '\xEE', '\xEF', |
|
28 '\xF0', '\xF1', '\xF2', '\xF3', '\xF4', '\xF5', '\xF6', '\x2D', |
|
29 '\xF8', '\xF9', '\xFA', '\xFB', '\xFC', '\xFD', '\xFE', '\xFF', |
|
30 '\0' |
|
31 }; |
|
32 |
|
33 |
|
34 /*! The translation table used by ISO88592ToWin1250() method. */ |
|
35 const char Translator::ISO88592ToWin1250Tab[] = { |
|
36 '\x80', '\x81', '\x82', '\x83', '\x84', '\x85', '\x86', '\x87', |
|
37 '\x88', '\x89', '\x8A', '\x8B', '\x8C', '\x8D', '\x8E', '\x8F', |
|
38 '\x90', '\x91', '\x92', '\x93', '\x94', '\x95', '\x96', '\x97', |
|
39 '\x98', '\x99', '\x9A', '\x9B', '\x9C', '\x9D', '\x9E', '\x9F', |
|
40 '\xA0', '\xA5', '\xA2', '\xA3', '\xA4', '\xBC', '\x8C', '\xA7', |
|
41 '\xA8', '\x8A', '\xAA', '\x8D', '\x8F', '\xAD', '\x8E', '\xAF', |
|
42 '\xB0', '\xB9', '\xB2', '\xB3', '\xB4', '\xBE', '\x9C', '\xB7', |
|
43 '\xB8', '\x9A', '\xBA', '\x9D', '\x9F', '\xBD', '\x9E', '\xBF', |
|
44 '\xC0', '\xC1', '\xC2', '\xC3', '\xC4', '\xC5', '\xC6', '\xC7', |
|
45 '\xC8', '\xC9', '\xCA', '\xCB', '\xCC', '\xCD', '\xCE', '\xCF', |
|
46 '\xD0', '\xD1', '\xD2', '\xD3', '\xD4', '\xD5', '\xD6', '\xD7', |
|
47 '\xD8', '\xD9', '\xDA', '\xDB', '\xDC', '\xDD', '\xDE', '\xDF', |
|
48 '\xE0', '\xE1', '\xE2', '\xE3', '\xE4', '\xE5', '\xE6', '\xE7', |
|
49 '\xE8', '\xE9', '\xEA', '\xEB', '\xEC', '\xED', '\xEE', '\xEF', |
|
50 '\xF0', '\xF1', '\xF2', '\xF3', '\xF4', '\xF5', '\xF6', '\xF7', |
|
51 '\xF8', '\xF9', '\xFA', '\xFB', '\xFC', '\xFD', '\xFE', '\xFF', |
|
52 '\0' |
|
53 }; |
|
54 |
|
55 |
|
56 /*! The translation table used by Koi8RToWindows1251() method. */ |
|
57 const unsigned char Translator::Koi8RToWindows1251Tab[128] = |
|
58 { 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143, |
|
59 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159, |
|
60 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175, |
|
61 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191, |
|
62 254,224,225,246,228,229,244,227,245,232,233,234,235,236,237,238, |
|
63 239,255,240,241,242,243,230,226,252,251,231,248,253,249,247,250, |
|
64 222,192,193,214,196,197,212,195,213,200,201,202,203,204,205,206, |
|
65 207,223,208,209,210,211,198,194,220,219,199,216,221,217,215,218 |
|
66 }; |
|
67 |
|
68 |
|
69 /*! The translation table used by Windows1251ToKoi8R() method. */ |
|
70 const unsigned char Translator::Windows1251ToKoi8RTab[128] = |
|
71 { 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143, |
|
72 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159, |
|
73 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175, |
|
74 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191, |
|
75 225,226,247,231,228,229,246,250,233,234,235,236,237,238,239,240, |
|
76 242,243,244,245,230,232,227,254,251,253,255,249,248,252,224,241, |
|
77 193,194,215,199,196,197,214,218,201,202,203,204,205,206,207,208, |
|
78 210,211,212,213,198,200,195,222,219,221,223,217,216,220,192,209 |
|
79 }; |
|
80 |
|
81 /*! Returns the string converted from windows-1250 to iso-8859-2. */ |
|
82 /* The method was designed initially for translator_cz.h. |
|
83 * It is used for on-line encoding conversion related to |
|
84 * conditional compilation in Unix/MS Windows environments |
|
85 * (both use different encoding). Later, the translator_hr.h |
|
86 * (by Boris Bralo) used and improved the same style. As the |
|
87 * method with the translation table was the same, the |
|
88 * decision to move it to this base class was made. The same |
|
89 * holds for ISO88592ToWin1250() method. |
|
90 * |
|
91 * Alexandr Chelpanov used the same approach for |
|
92 * Koi8RToWindows1251() and Windows1251ToKoi8R() methods. Notice, |
|
93 * that he uses Unicode tables. |
|
94 * |
|
95 * It is recommended for possibly other similar methods in future. |
|
96 */ |
|
97 QCString Translator::Win1250ToISO88592(const QCString & sInput) |
|
98 { |
|
99 // The conversion table for characters >127 |
|
100 // |
|
101 |
|
102 QCString result; |
|
103 int len = sInput.length(); |
|
104 |
|
105 for (int i = 0; i < len; ++i) |
|
106 { |
|
107 unsigned int c = sInput[i]; |
|
108 result += (c > 127) ? Win1250ToISO88592Tab[c & 0x7F] : c; |
|
109 } |
|
110 return result; |
|
111 } |
|
112 |
|
113 |
|
114 /*! returns the string converted from iso-8859-2 to windows-1250 */ |
|
115 /* See the comments of the Win1250ToISO88592() method for details. */ |
|
116 QCString Translator::ISO88592ToWin1250(const QCString & sInput) |
|
117 { |
|
118 // The conversion table for characters >127 |
|
119 // |
|
120 QCString result; |
|
121 int len = sInput.length(); |
|
122 |
|
123 for (int i = 0; i < len; ++i) |
|
124 { |
|
125 unsigned int c = sInput[i]; |
|
126 result += (c > 127) ? ISO88592ToWin1250Tab[c & 0x7F] : c; |
|
127 } |
|
128 return result; |
|
129 } |
|
130 |
|
131 |
|
132 /*! Returns the string converted from koi8-r to windows-1251. */ |
|
133 /* The method was designed initially for translator_cz.h. |
|
134 It is used for on-line encoding conversion related to conditional |
|
135 compilation in Unix/MS Windows environments (both use different |
|
136 encoding). Encoding table got from QT:qtextcodec.cpp |
|
137 */ |
|
138 QCString Translator::Koi8RToWindows1251( const QCString & sInput ) |
|
139 { |
|
140 |
|
141 QCString result(sInput); |
|
142 int len = sInput.length(); |
|
143 |
|
144 const unsigned char * c = (const unsigned char *)(const char*)sInput; |
|
145 unsigned char *dc = (unsigned char*)(const char*)result; |
|
146 for( int i=0; i<len; i++ ) { |
|
147 if ( c[i] > 127 ) |
|
148 dc[i] = Koi8RToWindows1251Tab[c[i]-128]; |
|
149 } |
|
150 return result; |
|
151 } |
|
152 |
|
153 |
|
154 /*! returns the string converted from Windows-1251 to koi8-r */ |
|
155 /* See the comments of the Koi8RToWindows1251() method for details. |
|
156 Encoding table got from QT:qtextcodec.cpp */ |
|
157 QCString Translator::Windows1251ToKoi8R( const QCString & sInput ) |
|
158 { |
|
159 QCString result(sInput); |
|
160 int len = sInput.length(); |
|
161 |
|
162 const unsigned char * c = (const unsigned char *)(const char*)sInput; |
|
163 unsigned char *dc = (unsigned char*)(const char*)result; |
|
164 for( int i=0; i<len; i++ ) { |
|
165 if ( c[i] > 127 ) |
|
166 dc[i] = Windows1251ToKoi8RTab[c[i]-128]; |
|
167 } |
|
168 return result; |
|
169 } |
|
170 |
|
171 /*! returns the caracter converted from hankaku-kana to zenkakukana. |
|
172 Thanks Yongmao Ni http://alfin.mine.utsunomiya-u.ac.jp/~niy/algo/ */ |
|
173 unsigned int hankaku2zen(int hankaku) |
|
174 { |
|
175 static unsigned int z[64] = { |
|
176 0x2121,0x2123,0x2156,0x2157,0x2122,0x2126,0x2572,0x2521, |
|
177 0x2523,0x2525,0x2527,0x2529,0x2563,0x2565,0x2567,0x2543, |
|
178 0x213c,0x2522,0x2524,0x2526,0x2528,0x252a,0x252b,0x252d, |
|
179 0x252f,0x2531,0x2533,0x2535,0x2537,0x2539,0x253b,0x253d, |
|
180 0x253f,0x2541,0x2544,0x2546,0x2548,0x254a,0x254b,0x254c, |
|
181 0x254d,0x254e,0x254f,0x2552,0x2555,0x2558,0x255b,0x255e, |
|
182 0x255f,0x2560,0x2561,0x2562,0x2564,0x2566,0x2568,0x2569, |
|
183 0x256a,0x256b,0x256c,0x256d,0x256f,0x2573,0x212b,0x212c }; |
|
184 |
|
185 if (hankaku < 0xa0 || hankaku > 0xdf) return 0; |
|
186 return z[hankaku - 0xa0]; |
|
187 } |
|
188 |
|
189 /*! returns the character converted from japaneseEUC to SJIS |
|
190 Thanks Yongmao Ni http://alfin.mine.utsunomiya-u.ac.jp/~niy/algo/ */ |
|
191 unsigned int euc2sjis(unsigned int euc) |
|
192 { |
|
193 unsigned int jis; |
|
194 unsigned int hib, lob; |
|
195 |
|
196 if ((euc & 0xff00) == 0x8e00) |
|
197 jis = hankaku2zen(euc & 0xff); |
|
198 else jis = euc & ~0x8080; |
|
199 |
|
200 hib = (jis >> 8) & 0xff; |
|
201 lob = jis & 0xff; |
|
202 lob += (hib & 1) ? 0x1f : 0x7d; |
|
203 if (lob >= 0x7f) lob++; |
|
204 hib = ((hib - 0x21) >> 1) + 0x81; |
|
205 if (hib > 0x9f) hib += 0x40; |
|
206 |
|
207 return (hib << 8) | lob; |
|
208 } |
|
209 |
|
210 |
|
211 /*! returns the string converted from Japanese-EUC to SJIS */ |
|
212 |
|
213 QCString Translator::JapaneseEucToSjis( const QCString & sInput ) |
|
214 { |
|
215 QString result; |
|
216 int len = sInput.length(); |
|
217 int c1,c2,sj; |
|
218 |
|
219 result.setUnicode(0, len); |
|
220 QChar* uc = (QChar*)result.unicode(); // const_cast |
|
221 const unsigned char * c = (const unsigned char *)(const char*)sInput; |
|
222 |
|
223 for( int i=0; i<len;) |
|
224 { |
|
225 c1 = c[i]; |
|
226 |
|
227 if( c1 == EOF ) break; |
|
228 |
|
229 /* if MSB=0 then the character is ascii */ |
|
230 if(!( c1 & 0x80)) |
|
231 { |
|
232 uc[i] = c[i]; |
|
233 i=i+1; |
|
234 } |
|
235 else |
|
236 { |
|
237 c2 = c[i+1]; |
|
238 if( c2 == EOF ) break; |
|
239 sj = euc2sjis( (c1 << 8) + c2 ); |
|
240 uc[i] = sj >> 8; |
|
241 uc[i+1] = sj & 0xff; |
|
242 i+=2; |
|
243 } |
|
244 } |
|
245 |
|
246 return result.latin1(); |
|
247 |
|
248 } |