Orb/Doxygen/src/translator.cpp
changeset 3 d8fccb2cd802
parent 0 42188c7ea2d9
equal deleted inserted replaced
2:932c358ece3e 3:d8fccb2cd802
       
     1 /*! \file translator.cpp 
       
     2  *  \brief Implementation of generally used translator methods.
       
     3  * 
       
     4  * This file contains implementation of the translator methods that
       
     5  * are not expected to be reimplemented by derived translator classes.
       
     6  * It also contains static data tables used by the methods.
       
     7  *  
       
     8  */
       
     9 #include "translator.h"
       
    10 
       
    11 /*! The translation table used by Win1250ToISO88592() method. */
       
    12 const char Translator::Win1250ToISO88592Tab[] = 
       
    13 {
       
    14   '\x80', '\x81', '\x82', '\x83', '\x84', '\x85', '\x86', '\x87',
       
    15   '\x88', '\x89', '\xA9', '\x8B', '\xA6', '\xAB', '\xAE', '\xAC',
       
    16   '\x90', '\x91', '\x92', '\x93', '\x94', '\x2E', '\x96', '\x97',
       
    17   '\x98', '\x99', '\xB9', '\x9B', '\xB6', '\xBB', '\xBE', '\xBC',
       
    18   '\xA0', '\x20', '\x20', '\xA3', '\xA4', '\xA1', '\xA6', '\xA7',
       
    19   '\x22', '\xA9', '\xAA', '\x3C', '\xAC', '\x2D', '\xAE', '\xAF',
       
    20   '\x2E', '\x2B', '\x20', '\xB3', '\x27', '\x75', '\xB6', '\xB7',
       
    21   '\x20', '\xB1', '\xBA', '\x3E', '\xA5', '\x22', '\xB5', '\xBF',
       
    22   '\xC0', '\xC1', '\xC2', '\xC3', '\xC4', '\xC5', '\xC6', '\xC7',
       
    23   '\xC8', '\xC9', '\xCA', '\xCB', '\xCC', '\xCD', '\xCE', '\xCF',
       
    24   '\xD0', '\xD1', '\xD2', '\xD3', '\xD4', '\xD5', '\xD6', '\xD7',
       
    25   '\xD8', '\xD9', '\xDA', '\xDB', '\xDC', '\xDD', '\xDE', '\xDF',
       
    26   '\xE0', '\xE1', '\xE2', '\xE3', '\xE4', '\xE5', '\xE6', '\xE7',
       
    27   '\xE8', '\xE9', '\xEA', '\xEB', '\xEC', '\xED', '\xEE', '\xEF',
       
    28   '\xF0', '\xF1', '\xF2', '\xF3', '\xF4', '\xF5', '\xF6', '\x2D',
       
    29   '\xF8', '\xF9', '\xFA', '\xFB', '\xFC', '\xFD', '\xFE', '\xFF',
       
    30   '\0'
       
    31 };
       
    32 
       
    33 
       
    34 /*! The translation table used by ISO88592ToWin1250() method. */
       
    35 const char Translator::ISO88592ToWin1250Tab[] = {
       
    36   '\x80', '\x81', '\x82', '\x83', '\x84', '\x85', '\x86', '\x87',
       
    37   '\x88', '\x89', '\x8A', '\x8B', '\x8C', '\x8D', '\x8E', '\x8F',
       
    38   '\x90', '\x91', '\x92', '\x93', '\x94', '\x95', '\x96', '\x97',
       
    39   '\x98', '\x99', '\x9A', '\x9B', '\x9C', '\x9D', '\x9E', '\x9F',
       
    40   '\xA0', '\xA5', '\xA2', '\xA3', '\xA4', '\xBC', '\x8C', '\xA7',
       
    41   '\xA8', '\x8A', '\xAA', '\x8D', '\x8F', '\xAD', '\x8E', '\xAF',
       
    42   '\xB0', '\xB9', '\xB2', '\xB3', '\xB4', '\xBE', '\x9C', '\xB7',
       
    43   '\xB8', '\x9A', '\xBA', '\x9D', '\x9F', '\xBD', '\x9E', '\xBF',
       
    44   '\xC0', '\xC1', '\xC2', '\xC3', '\xC4', '\xC5', '\xC6', '\xC7',
       
    45   '\xC8', '\xC9', '\xCA', '\xCB', '\xCC', '\xCD', '\xCE', '\xCF',
       
    46   '\xD0', '\xD1', '\xD2', '\xD3', '\xD4', '\xD5', '\xD6', '\xD7',
       
    47   '\xD8', '\xD9', '\xDA', '\xDB', '\xDC', '\xDD', '\xDE', '\xDF',
       
    48   '\xE0', '\xE1', '\xE2', '\xE3', '\xE4', '\xE5', '\xE6', '\xE7',
       
    49   '\xE8', '\xE9', '\xEA', '\xEB', '\xEC', '\xED', '\xEE', '\xEF',
       
    50   '\xF0', '\xF1', '\xF2', '\xF3', '\xF4', '\xF5', '\xF6', '\xF7',
       
    51   '\xF8', '\xF9', '\xFA', '\xFB', '\xFC', '\xFD', '\xFE', '\xFF',
       
    52   '\0'
       
    53 };
       
    54 
       
    55 
       
    56 /*! The translation table used by Koi8RToWindows1251() method. */
       
    57 const unsigned char Translator::Koi8RToWindows1251Tab[128] =
       
    58 { 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
       
    59   144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
       
    60   160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
       
    61   176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
       
    62   254,224,225,246,228,229,244,227,245,232,233,234,235,236,237,238,
       
    63   239,255,240,241,242,243,230,226,252,251,231,248,253,249,247,250,
       
    64   222,192,193,214,196,197,212,195,213,200,201,202,203,204,205,206,
       
    65   207,223,208,209,210,211,198,194,220,219,199,216,221,217,215,218 
       
    66 };
       
    67 
       
    68 
       
    69 /*! The translation table used by Windows1251ToKoi8R() method. */
       
    70 const unsigned char Translator::Windows1251ToKoi8RTab[128] =
       
    71 { 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
       
    72   144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
       
    73   160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
       
    74   176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
       
    75   225,226,247,231,228,229,246,250,233,234,235,236,237,238,239,240,
       
    76   242,243,244,245,230,232,227,254,251,253,255,249,248,252,224,241,
       
    77   193,194,215,199,196,197,214,218,201,202,203,204,205,206,207,208,
       
    78   210,211,212,213,198,200,195,222,219,221,223,217,216,220,192,209
       
    79 };
       
    80 
       
    81 /*! Returns the string converted from windows-1250 to iso-8859-2. */
       
    82 /* The method was designed initially for translator_cz.h. 
       
    83  * It is used for on-line encoding conversion related to
       
    84  * conditional compilation in Unix/MS Windows environments
       
    85  * (both use different encoding).  Later, the translator_hr.h
       
    86  * (by Boris Bralo) used and improved the same style. As the
       
    87  * method with the translation table was the same, the
       
    88  * decision to move it to this base class was made. The same
       
    89  * holds for ISO88592ToWin1250() method. 
       
    90  * 
       
    91  * Alexandr Chelpanov used the same approach for
       
    92  * Koi8RToWindows1251() and Windows1251ToKoi8R() methods.  Notice,
       
    93  * that he uses Unicode tables.
       
    94  * 
       
    95  * It is recommended for possibly other similar methods in future.
       
    96  */
       
    97 QCString Translator::Win1250ToISO88592(const QCString & sInput)
       
    98 {
       
    99   // The conversion table for characters >127
       
   100   // 
       
   101   
       
   102   QCString result;
       
   103   int len = sInput.length();
       
   104 
       
   105   for (int i = 0; i < len; ++i)
       
   106   {
       
   107     unsigned int c = sInput[i];  
       
   108     result += (c > 127) ? Win1250ToISO88592Tab[c & 0x7F] : c;
       
   109   }
       
   110   return result;
       
   111 }
       
   112 
       
   113 
       
   114 /*! returns the string converted from iso-8859-2 to windows-1250 */
       
   115 /* See the comments of the Win1250ToISO88592() method for details. */
       
   116 QCString Translator::ISO88592ToWin1250(const QCString & sInput)
       
   117 {
       
   118   // The conversion table for characters >127
       
   119   // 
       
   120   QCString result;
       
   121   int len = sInput.length();
       
   122 
       
   123   for (int i = 0; i < len; ++i)
       
   124   {
       
   125     unsigned int c = sInput[i];  
       
   126     result += (c > 127) ? ISO88592ToWin1250Tab[c & 0x7F] : c;
       
   127   }
       
   128   return result;
       
   129 }
       
   130 
       
   131 
       
   132 /*! Returns the string converted from koi8-r to windows-1251. */
       
   133 /* The method was designed initially for translator_cz.h. 
       
   134    It is used for on-line encoding conversion related to conditional
       
   135    compilation in Unix/MS Windows environments (both use different
       
   136    encoding). Encoding table got from QT:qtextcodec.cpp
       
   137  */
       
   138 QCString Translator::Koi8RToWindows1251( const QCString & sInput )
       
   139 {
       
   140 
       
   141   QCString result(sInput);
       
   142   int len = sInput.length();
       
   143 
       
   144   const unsigned char * c = (const unsigned char *)(const char*)sInput;
       
   145   unsigned char *dc = (unsigned char*)(const char*)result;
       
   146   for( int i=0; i<len; i++ ) {
       
   147     if ( c[i] > 127 )
       
   148       dc[i] = Koi8RToWindows1251Tab[c[i]-128];
       
   149   }
       
   150   return result;
       
   151 }
       
   152 
       
   153 
       
   154 /*! returns the string converted from Windows-1251 to koi8-r */
       
   155 /* See the comments of the Koi8RToWindows1251() method for details.
       
   156    Encoding table got from QT:qtextcodec.cpp */
       
   157 QCString Translator::Windows1251ToKoi8R( const QCString & sInput )
       
   158 {
       
   159   QCString result(sInput);
       
   160   int len = sInput.length();
       
   161 
       
   162   const unsigned char * c = (const unsigned char *)(const char*)sInput;
       
   163   unsigned char *dc = (unsigned char*)(const char*)result;
       
   164   for( int i=0; i<len; i++ ) {
       
   165     if ( c[i] > 127 )
       
   166       dc[i] = Windows1251ToKoi8RTab[c[i]-128];
       
   167   }
       
   168   return result;
       
   169 }
       
   170 
       
   171 /*! returns the caracter converted from hankaku-kana to zenkakukana. 
       
   172   Thanks Yongmao Ni http://alfin.mine.utsunomiya-u.ac.jp/~niy/algo/ */
       
   173 unsigned int hankaku2zen(int hankaku)
       
   174 {
       
   175     static unsigned int z[64] = {
       
   176         0x2121,0x2123,0x2156,0x2157,0x2122,0x2126,0x2572,0x2521,
       
   177         0x2523,0x2525,0x2527,0x2529,0x2563,0x2565,0x2567,0x2543,
       
   178         0x213c,0x2522,0x2524,0x2526,0x2528,0x252a,0x252b,0x252d,
       
   179         0x252f,0x2531,0x2533,0x2535,0x2537,0x2539,0x253b,0x253d,
       
   180         0x253f,0x2541,0x2544,0x2546,0x2548,0x254a,0x254b,0x254c,
       
   181         0x254d,0x254e,0x254f,0x2552,0x2555,0x2558,0x255b,0x255e,
       
   182         0x255f,0x2560,0x2561,0x2562,0x2564,0x2566,0x2568,0x2569,
       
   183         0x256a,0x256b,0x256c,0x256d,0x256f,0x2573,0x212b,0x212c };
       
   184 
       
   185     if (hankaku < 0xa0 || hankaku > 0xdf) return 0;
       
   186     return z[hankaku - 0xa0];
       
   187 }
       
   188 
       
   189 /*! returns the character converted from japaneseEUC to SJIS
       
   190   Thanks Yongmao Ni http://alfin.mine.utsunomiya-u.ac.jp/~niy/algo/ */
       
   191 unsigned int euc2sjis(unsigned int euc)
       
   192 {
       
   193     unsigned int jis;
       
   194     unsigned int hib, lob;
       
   195 
       
   196     if ((euc & 0xff00) == 0x8e00)
       
   197         jis = hankaku2zen(euc & 0xff);
       
   198     else jis = euc & ~0x8080;
       
   199     
       
   200     hib = (jis >> 8) & 0xff;
       
   201     lob = jis & 0xff;
       
   202     lob += (hib & 1) ? 0x1f : 0x7d;
       
   203     if (lob >= 0x7f) lob++;
       
   204     hib = ((hib - 0x21) >> 1) + 0x81;
       
   205     if (hib > 0x9f) hib += 0x40;
       
   206 
       
   207     return (hib << 8) | lob;
       
   208 }
       
   209 
       
   210 
       
   211 /*! returns the string converted from Japanese-EUC to SJIS */
       
   212 
       
   213 QCString Translator::JapaneseEucToSjis( const QCString & sInput )
       
   214 {
       
   215   QString result;
       
   216   int len = sInput.length();
       
   217   int c1,c2,sj;
       
   218 
       
   219   result.setUnicode(0, len);
       
   220   QChar* uc = (QChar*)result.unicode(); // const_cast
       
   221   const unsigned char * c = (const unsigned char *)(const char*)sInput;
       
   222   
       
   223   for( int i=0; i<len;)
       
   224     {
       
   225       c1 = c[i];
       
   226 
       
   227       if( c1 == EOF ) break;
       
   228       
       
   229       /* if MSB=0 then the character is ascii */
       
   230       if(!( c1 & 0x80))
       
   231 	{
       
   232 	  uc[i] = c[i];
       
   233 	  i=i+1;
       
   234 	}
       
   235       else
       
   236 	{
       
   237 	  c2 = c[i+1];
       
   238 	  if( c2 == EOF ) break;
       
   239 	  sj     = euc2sjis( (c1 << 8) + c2 );
       
   240 	  uc[i]   = sj >> 8;
       
   241 	  uc[i+1] = sj & 0xff;
       
   242 	  i+=2;
       
   243 	}
       
   244     }
       
   245 
       
   246   return result.latin1();
       
   247 
       
   248 }