diff -r 59758314f811 -r d4524d6a4472 Symbian3/Examples/guid-6013a680-57f9-415b-8851-c4fa63356636/chartrans_8c_source.html --- a/Symbian3/Examples/guid-6013a680-57f9-415b-8851-c4fa63356636/chartrans_8c_source.html Fri Jun 11 15:24:34 2010 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,736 +0,0 @@ - - -
- -00001 /* -00002 * chartrans.c -00003 * Copyright (C) 1999-2004 A.J. van Os; Released under GNU GPL -00004 * -00005 * Description: -00006 * Translate Word characters to local representation -00007 */ -00008 -00009 #include <stdlib.h> -00010 #include <string.h> -00011 #include <ctype.h> -00012 #if defined(__STDC_ISO_10646__) -00013 #include <wctype.h> -00014 #endif /* __STDC_ISO_10646__ */ -00015 #include "antiword.h" -00016 -00017 static const USHORT usCp850[] = { /* DOS implementation of Latin1 */ -00018 0x00c7, 0x00fc, 0x00e9, 0x00e2, 0x00e4, 0x00e0, 0x00e5, 0x00e7, -00019 0x00ea, 0x00eb, 0x00e8, 0x00ef, 0x00ee, 0x00ec, 0x00c4, 0x00c5, -00020 0x00c9, 0x00e6, 0x00c6, 0x00f4, 0x00f6, 0x00f2, 0x00fb, 0x00f9, -00021 0x00ff, 0x00d6, 0x00dc, 0x00f8, 0x00a3, 0x00d8, 0x00d7, 0x0192, -00022 0x00e1, 0x00ed, 0x00f3, 0x00fa, 0x00f1, 0x00d1, 0x00aa, 0x00ba, -00023 0x00bf, 0x00ae, 0x00ac, 0x00bd, 0x00bc, 0x00a1, 0x00ab, 0x00bb, -00024 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x00c1, 0x00c2, 0x00c0, -00025 0x00a9, 0x2563, 0x2551, 0x2557, 0x255d, 0x00a2, 0x00a5, 0x2510, -00026 0x2514, 0x2534, 0x252c, 0x251c, 0x2500, 0x253c, 0x00e3, 0x00c3, -00027 0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256c, 0x00a4, -00028 0x00f0, 0x00d0, 0x00ca, 0x00cb, 0x00c8, 0x0131, 0x00cd, 0x00ce, -00029 0x00cf, 0x2518, 0x250c, 0x2588, 0x2584, 0x00a6, 0x00cc, 0x2580, -00030 0x00d3, 0x00df, 0x00d4, 0x00d2, 0x00f5, 0x00d5, 0x00b5, 0x00fe, -00031 0x00de, 0x00da, 0x00db, 0x00d9, 0x00fd, 0x00dd, 0x00af, 0x00b4, -00032 0x00ad, 0x00b1, 0x2017, 0x00be, 0x00b6, 0x00a7, 0x00f7, 0x00b8, -00033 0x00b0, 0x00a8, 0x00b7, 0x00b9, 0x00b3, 0x00b2, 0x25a0, 0x00a0, -00034 }; -00035 -00036 static const USHORT usCp1250[] = { /* Windows implementation of Latin2 */ -00037 0x20ac, 0x003f, 0x201a, 0x003f, 0x201e, 0x2026, 0x2020, 0x2021, -00038 0x003f, 0x2030, 0x0160, 0x2039, 0x015a, 0x0164, 0x017d, 0x0179, -00039 0x003f, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014, -00040 0x003f, 0x2122, 0x0161, 0x203a, 0x015b, 0x0165, 0x017e, 0x017a, -00041 0x00a0, 0x02c7, 0x02d8, 0x0141, 0x00a4, 0x0104, 0x00a6, 0x00a7, -00042 0x00a8, 0x00a9, 0x015e, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x017b, -00043 0x00b0, 0x00b1, 0x02db, 0x0142, 0x00b4, 0x00b5, 0x00b6, 0x00b7, -00044 0x00b8, 0x0105, 0x015f, 0x00bb, 0x013d, 0x02dd, 0x013e, 0x017c, -00045 0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7, -00046 0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e, -00047 0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7, -00048 0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df, -00049 0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7, -00050 0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f, -00051 0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7, -00052 0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9, -00053 }; -00054 -00055 static const USHORT usCp1251[] = { /* Windows implementation of Cyrillic */ -00056 0x0402, 0x0403, 0x201a, 0x0453, 0x201e, 0x2026, 0x2020, 0x2021, -00057 0x20ac, 0x2030, 0x0409, 0x2039, 0x040a, 0x040c, 0x040b, 0x040f, -00058 0x0452, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014, -00059 0x00f3, 0x2122, 0x0459, 0x203a, 0x045a, 0x045c, 0x045b, 0x045f, -00060 0x00a0, 0x040e, 0x045e, 0x0408, 0x00a4, 0x0490, 0x00a6, 0x00a7, -00061 0x0401, 0x00a9, 0x0404, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x0407, -00062 0x00b0, 0x00b1, 0x0406, 0x0456, 0x0491, 0x00b5, 0x00b6, 0x00b7, -00063 0x0451, 0x2116, 0x0454, 0x00bb, 0x0458, 0x0405, 0x0455, 0x0457, -00064 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, -00065 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f, -00066 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, -00067 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f, -00068 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, -00069 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f, -00070 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, -00071 0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f, -00072 }; -00073 -00074 static const USHORT usCp1252[] = { /* Windows implementation of Latin1 */ -00075 0x20ac, 0x003f, 0x201a, 0x0192, 0x201e, 0x2026, 0x2020, 0x2021, -00076 0x02c6, 0x2030, 0x0160, 0x2039, 0x0152, 0x003f, 0x017d, 0x003f, -00077 0x003f, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014, -00078 0x02dc, 0x2122, 0x0161, 0x203a, 0x0153, 0x003f, 0x017e, 0x0178, -00079 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, -00080 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, -00081 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, -00082 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, -00083 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, -00084 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, -00085 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7, -00086 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df, -00087 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, -00088 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, -00089 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, -00090 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff, -00091 }; -00092 -00093 static const USHORT usMacRoman[] = { /* Apple implementation of Latin1 */ -00094 0x00c4, 0x00c5, 0x00c7, 0x00c9, 0x00d1, 0x00d6, 0x00dc, 0x00e1, -00095 0x00e0, 0x00e2, 0x00e4, 0x00e3, 0x00e5, 0x00e7, 0x00e9, 0x00e8, -00096 0x00ea, 0x00eb, 0x00ed, 0x00ec, 0x00ee, 0x00ef, 0x00f1, 0x00f3, -00097 0x00f2, 0x00f4, 0x00f6, 0x00f5, 0x00fa, 0x00f9, 0x00fb, 0x00fc, -00098 0x2020, 0x00b0, 0x00a2, 0x00a3, 0x00a7, 0x2022, 0x00b6, 0x00df, -00099 0x00ae, 0x00a9, 0x2122, 0x00b4, 0x00a8, 0x2260, 0x00c6, 0x00d8, -00100 0x221e, 0x00b1, 0x2264, 0x2265, 0x00a5, 0x00b5, 0x2202, 0x2211, -00101 0x220f, 0x03c0, 0x222b, 0x00aa, 0x00ba, 0x2126, 0x00e6, 0x00f8, -00102 0x00bf, 0x00a1, 0x00ac, 0x221a, 0x0192, 0x2248, 0x2206, 0x00ab, -00103 0x00bb, 0x2026, 0x00a0, 0x00c0, 0x00c3, 0x00d5, 0x0152, 0x0153, -00104 0x2013, 0x2014, 0x201c, 0x201d, 0x2018, 0x2019, 0x00f7, 0x25ca, -00105 0x00ff, 0x0178, 0x2044, 0x00a4, 0x2039, 0x203a, 0xfb01, 0xfb02, -00106 0x2021, 0x00b7, 0x201a, 0x201e, 0x2030, 0x00c2, 0x00ca, 0x00c1, -00107 0x00cb, 0x00c8, 0x00cd, 0x00ce, 0x00cf, 0x00cc, 0x00d3, 0x00d4, -00108 0x003f, 0x00d2, 0x00da, 0x00db, 0x00d9, 0x0131, 0x02c6, 0x02dc, -00109 0x00af, 0x02d8, 0x02d9, 0x02da, 0x00b8, 0x02dd, 0x02db, 0x02c7, -00110 }; -00111 -00112 static const USHORT usPrivateArea[] = { -00113 0x0020, 0x0021, 0x2200, 0x0023, 0x2203, 0x0025, 0x0026, 0x220d, -00114 0x0028, 0x0029, 0x2217, 0x002b, 0x002c, 0x2212, 0x002e, 0x002f, -00115 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, -00116 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x2019, 0x003e, 0x003f, -00117 0x201d, 0x201c, 0x0392, 0x03a7, 0x0394, 0x0395, 0x03a6, 0x0393, -00118 0x0397, 0x0399, 0x03d1, 0x039a, 0x039b, 0x039c, 0x039d, 0x039f, -00119 0x03a0, 0x0398, 0x03a1, 0x03a3, 0x03a4, 0x03a5, 0x03c2, 0x03a9, -00120 0x039e, 0x03a8, 0x0396, 0x005b, 0x2234, 0x005d, 0x22a5, 0x005f, -00121 0x003f, 0x03b1, 0x03b2, 0x03c7, 0x03b4, 0x03b5, 0x03c6, 0x03b3, -00122 0x03b7, 0x03b9, 0x03d5, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03bf, -00123 0x03c0, 0x03b8, 0x03c1, 0x03c3, 0x03c4, 0x03c5, 0x03d6, 0x03c9, -00124 0x03be, 0x03c8, 0x03b6, 0x007b, 0x007c, 0x007d, 0x223c, 0x003f, -00125 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, -00126 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, -00127 0x003f, 0x003f, 0x003f, 0x2022, 0x003f, 0x003f, 0x003f, 0x003f, -00128 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, -00129 0x20ac, 0x03d2, 0x2032, 0x2264, 0x2044, 0x221e, 0x0192, 0x2663, -00130 0x2666, 0x2665, 0x2660, 0x2194, 0x2190, 0x2191, 0x2192, 0x2193, -00131 0x00b0, 0x00b1, 0x2033, 0x2265, 0x00d7, 0x221d, 0x2202, 0x2022, -00132 0x00f7, 0x2260, 0x2261, 0x2248, 0x2026, 0x007c, 0x23af, 0x21b5, -00133 0x2135, 0x2111, 0x211c, 0x2118, 0x2297, 0x2295, 0x2205, 0x2229, -00134 0x222a, 0x2283, 0x2287, 0x2284, 0x2282, 0x2286, 0x2208, 0x2209, -00135 0x2220, 0x2207, 0x00ae, 0x00a9, 0x2122, 0x220f, 0x221a, 0x22c5, -00136 0x00ac, 0x2227, 0x2228, 0x21d4, 0x21d0, 0x21d1, 0x21d2, 0x21d3, -00137 0x22c4, 0x3008, 0x00ae, 0x00a9, 0x2122, 0x2211, 0x239b, 0x239c, -00138 0x239d, 0x23a1, 0x23a2, 0x23a3, 0x23a7, 0x23a8, 0x23a9, 0x23aa, -00139 0x003f, 0x3009, 0x222b, 0x2320, 0x23ae, 0x2321, 0x239e, 0x239f, -00140 0x23a0, 0x23a4, 0x23a5, 0x23a6, 0x23ab, 0x23ac, 0x23ad, 0x003f, -00141 }; -00142 -00143 typedef struct char_table_tag { -00144 UCHAR ucLocal; -00145 USHORT usUnicode; -00146 } char_table_type; -00147 -00148 static char_table_type atCharTable[256]; -00149 static size_t tNextPosFree = 0; -00150 -00151 -00152 /* -00153 * iCompare - compare two records -00154 * -00155 * Compares two records. For use by qsort(3C) and bsearch(3C). -00156 * -00157 * returns -1 if rec1 < rec2, 0 if rec1 == rec2, 1 if rec1 > rec2 -00158 */ -00159 static int -00160 iCompare(const void *pvRecord1, const void *pvRecord2) -00161 { -00162 USHORT usUnicode1, usUnicode2; -00163 -00164 usUnicode1 = ((char_table_type *)pvRecord1)->usUnicode; -00165 usUnicode2 = ((char_table_type *)pvRecord2)->usUnicode; -00166 -00167 if (usUnicode1 < usUnicode2) { -00168 return -1; -00169 } -00170 if (usUnicode1 > usUnicode2) { -00171 return 1; -00172 } -00173 return 0; -00174 } /* end of iCompare */ -00175 -00176 /* -00177 * pGetCharTableRecord - get the character table record -00178 * -00179 * returns a pointer to the record when found, otherwise NULL -00180 */ -00181 static const char_table_type * -00182 pGetCharTableRecord(USHORT usUnicode) -00183 { -00184 char_table_type tKey; -00185 -00186 if (tNextPosFree == 0) { -00187 return NULL; -00188 } -00189 tKey.usUnicode = usUnicode; -00190 tKey.ucLocal = 0; -00191 return (char_table_type *)bsearch(&tKey, -00192 atCharTable, -00193 tNextPosFree, sizeof(atCharTable[0]), -00194 iCompare); -00195 } /* end of pGetCharTableRecord */ -00196 -00197 /* -00198 * ucGetBulletCharacter - get the local representation of the bullet -00199 */ -00200 UCHAR -00201 ucGetBulletCharacter(conversion_type eConversionType, encoding_type eEncoding) -00202 { -00203 #if defined(__riscos) -00204 return 0x8f; -00205 #else -00206 const char_table_type *pRec; -00207 -00208 fail(eEncoding == encoding_utf_8); -00209 -00210 if (eEncoding == encoding_latin_1 && -00211 (eConversionType == conversion_ps || -00212 eConversionType == conversion_pdf)) { -00213 /* Ugly, but it makes the PostScript and PDF look better */ -00214 return (UCHAR)143; -00215 } -00216 if (eConversionType != conversion_text && -00217 eConversionType != conversion_fmt_text) { -00218 pRec = pGetCharTableRecord(UNICODE_BULLET); -00219 if (pRec != NULL) { -00220 return pRec->ucLocal; -00221 } -00222 pRec = pGetCharTableRecord(UNICODE_BULLET_OPERATOR); -00223 if (pRec != NULL) { -00224 return pRec->ucLocal; -00225 } -00226 pRec = pGetCharTableRecord(UNICODE_MIDDLE_DOT); -00227 if (pRec != NULL) { -00228 return pRec->ucLocal; -00229 } -00230 } -00231 return (UCHAR)'.'; -00232 #endif /* __riscos */ -00233 } /* end of ucGetBulletCharacter */ -00234 -00235 /* -00236 * ucGetNbspCharacter - get the local representation of the non-breaking space -00237 */ -00238 UCHAR -00239 ucGetNbspCharacter(void) -00240 { -00241 const char_table_type *pRec; -00242 -00243 pRec = pGetCharTableRecord(0x00a0); /* Unicode non-breaking space */ -00244 if (pRec == NULL) { -00245 DBG_MSG("Non-breaking space record not found"); -00246 /* No value found, use the best guess */ -00247 return (UCHAR)0xa0; -00248 } -00249 return pRec->ucLocal; -00250 } /* end of ucGetNbspCharacter */ -00251 -00252 /* -00253 * bReadCharacterMappingTable - read the mapping table -00254 * -00255 * Read the character mapping table from file and have the contents sorted -00256 * -00257 * returns TRUE if successful, otherwise FALSE -00258 */ -00259 BOOL -00260 bReadCharacterMappingTable(FILE *pFile) -00261 { -00262 char *pcTmp; -00263 ULONG ulUnicode; -00264 UINT uiLocal; -00265 int iFields; -00266 char szLine[81]; -00267 -00268 if (pFile == NULL) { -00269 return FALSE; -00270 } -00271 -00272 /* Clean the table first */ -00273 (void)memset(atCharTable, 0, sizeof(atCharTable)); -00274 -00275 /* Fill the table */ -00276 while (fgets(szLine, (int)sizeof(szLine), pFile)) { -00277 if (szLine[0] == '#' || -00278 szLine[0] == '\r' || -00279 szLine[0] == '\n') { -00280 /* Comment or empty line */ -00281 continue; -00282 } -00283 iFields = sscanf(szLine, "%x %lx %*s", &uiLocal, &ulUnicode); -00284 if (iFields != 2) { -00285 pcTmp = strchr(szLine, '\r'); -00286 if (pcTmp != NULL) { -00287 *pcTmp = '\0'; -00288 } -00289 pcTmp = strchr(szLine, '\n'); -00290 if (pcTmp != NULL) { -00291 *pcTmp = '\0'; -00292 } -00293 werr(0, "Syntax error in: '%s'", szLine); -00294 continue; -00295 } -00296 if (uiLocal > 0xff || ulUnicode > 0xffff) { -00297 werr(0, "Syntax error in: '%02x %04lx'", -00298 uiLocal, ulUnicode); -00299 continue; -00300 } -00301 /* Store only the relevant entries */ -00302 if (uiLocal != ulUnicode || uiLocal >= 0x80) { -00303 atCharTable[tNextPosFree].ucLocal = (UCHAR)uiLocal; -00304 atCharTable[tNextPosFree].usUnicode = (USHORT)ulUnicode; -00305 tNextPosFree++; -00306 } -00307 if (tNextPosFree >= elementsof(atCharTable)) { -00308 werr(0, "Too many entries in the character mapping " -00309 "file. Ignoring the rest."); -00310 break; -00311 } -00312 } -00313 -00314 if (tNextPosFree != 0) { -00315 DBG_HEX(atCharTable[0].usUnicode); -00316 DBG_HEX(atCharTable[tNextPosFree - 1].usUnicode); -00317 -00318 qsort(atCharTable, -00319 tNextPosFree, sizeof(atCharTable[0]), -00320 iCompare); -00321 -00322 DBG_HEX(atCharTable[0].usUnicode); -00323 DBG_HEX(atCharTable[tNextPosFree - 1].usUnicode); -00324 } -00325 -00326 return TRUE; -00327 } /* end of bReadCharacterMappingTable */ -00328 -00329 /* -00330 * ulTranslateCharacters - Translate characters to local representation -00331 * -00332 * Translate all characters to local representation -00333 * -00334 * returns the translated character -00335 */ -00336 ULONG -00337 ulTranslateCharacters(USHORT usChar, ULONG ulFileOffset, int iWordVersion, -00338 conversion_type eConversionType, encoding_type eEncoding, -00339 BOOL bUseMacCharSet) -00340 { -00341 const char_table_type *pTmp; -00342 const USHORT *usCharSet; -00343 -00344 usCharSet = NULL; -00345 if (bUseMacCharSet) { -00346 /* Macintosh character set */ -00347 usCharSet = usMacRoman; -00348 } else if (iWordVersion == 0) { -00349 /* DOS character set */ -00350 usCharSet = usCp850; -00351 } else { -00352 /* Windows character set */ -00353 switch (eEncoding) { -00354 case encoding_latin_2: -00355 usCharSet = usCp1250; -00356 break; -00357 case encoding_cyrillic: -00358 usCharSet = usCp1251; -00359 break; -00360 case encoding_latin_1: -00361 default: -00362 usCharSet = usCp1252; -00363 break; -00364 } -00365 } -00366 fail(usCharSet == NULL); -00367 if (usChar >= 0x80 && usChar <= 0x9f) { -00368 /* Translate implementation defined characters */ -00369 usChar = usCharSet[usChar - 0x80]; -00370 } else if (iWordVersion < 8 && usChar >= 0xa0 && usChar <= 0xff) { -00371 /* Translate old character set to Unixcode */ -00372 usChar = usCharSet[usChar - 0x80]; -00373 } -00374 -00375 /* Microsoft Unicode to real Unicode */ -00376 if (usChar >= 0xf020 && usChar <= 0xf0ff) { -00377 DBG_HEX_C(usPrivateArea[usChar - 0xf020] == 0x003f, usChar); -00378 usChar = usPrivateArea[usChar - 0xf020]; -00379 } -00380 -00381 /* Characters with a special meaning in Word */ -00382 switch (usChar) { -00383 case IGNORE_CHARACTER: -00384 case FOOTNOTE_SEPARATOR: -00385 case FOOTNOTE_CONTINUATION: -00386 case ANNOTATION: -00387 case FRAME: -00388 case LINE_FEED: -00389 case WORD_SOFT_HYPHEN: -00390 case UNICODE_HYPHENATION_POINT: -00391 return IGNORE_CHARACTER; -00392 case PICTURE: -00393 case TABLE_SEPARATOR: -00394 case TAB: -00395 case HARD_RETURN: -00396 case PAGE_BREAK: -00397 case PAR_END: -00398 case COLUMN_FEED: -00399 return (ULONG)usChar; -00400 case FOOTNOTE_OR_ENDNOTE: -00401 NO_DBG_HEX(ulFileOffset); -00402 switch (eGetNotetype(ulFileOffset)) { -00403 case notetype_is_footnote: -00404 return FOOTNOTE_CHAR; -00405 case notetype_is_endnote: -00406 return ENDNOTE_CHAR; -00407 default: -00408 return UNKNOWN_NOTE_CHAR; -00409 } -00410 case WORD_UNBREAKABLE_JOIN: -00411 return (ULONG)OUR_UNBREAKABLE_JOIN; -00412 default: -00413 break; -00414 } -00415 -00416 if (eEncoding != encoding_utf_8) { -00417 /* Latin characters in an oriental text */ -00418 if (usChar >= 0xff01 && usChar <= 0xff5e) { -00419 usChar -= 0xfee0; -00420 } -00421 } -00422 -00423 if (eEncoding == encoding_latin_1 && -00424 (eConversionType == conversion_ps || -00425 eConversionType == conversion_pdf)) { -00426 /* Ugly, but it makes the PostScript and PDF look better */ -00427 switch (usChar) { -00428 case UNICODE_ELLIPSIS: -00429 return 140; -00430 case UNICODE_TRADEMARK_SIGN: -00431 return 141; -00432 case UNICODE_PER_MILLE_SIGN: -00433 return 142; -00434 case UNICODE_BULLET: -00435 case UNICODE_BULLET_OPERATOR: -00436 case UNICODE_BLACK_CLUB_SUIT: -00437 return 143; -00438 case UNICODE_LEFT_SINGLE_QMARK: -00439 return 144; -00440 case UNICODE_RIGHT_SINGLE_QMARK: -00441 return 145; -00442 case UNICODE_SINGLE_LEFT_ANGLE_QMARK: -00443 return 146; -00444 case UNICODE_SINGLE_RIGHT_ANGLE_QMARK: -00445 return 147; -00446 case UNICODE_LEFT_DOUBLE_QMARK: -00447 return 148; -00448 case UNICODE_RIGHT_DOUBLE_QMARK: -00449 return 149; -00450 case UNICODE_DOUBLE_LOW_9_QMARK: -00451 return 150; -00452 case UNICODE_EN_DASH: -00453 return 151; -00454 case UNICODE_EM_DASH: -00455 return 152; -00456 case UNICODE_MINUS_SIGN: -00457 return 153; -00458 case UNICODE_CAPITAL_LIGATURE_OE: -00459 return 154; -00460 case UNICODE_SMALL_LIGATURE_OE: -00461 return 155; -00462 case UNICODE_DAGGER: -00463 return 156; -00464 case UNICODE_DOUBLE_DAGGER: -00465 return 157; -00466 case UNICODE_SMALL_LIGATURE_FI: -00467 return 158; -00468 case UNICODE_SMALL_LIGATURE_FL: -00469 return 159; -00470 default: -00471 break; -00472 } -00473 } -00474 -00475 if (eConversionType == conversion_pdf) { -00476 if (eEncoding == encoding_latin_1) { -00477 switch (usChar) { -00478 case UNICODE_EURO_SIGN: -00479 return 128; -00480 default: -00481 break; -00482 } -00483 } else if (eEncoding == encoding_latin_2) { -00484 switch (usChar) { -00485 case UNICODE_CAPITAL_D_WITH_STROKE: -00486 case UNICODE_SMALL_D_WITH_STROKE: -00487 return 0x3f; -00488 default: -00489 break; -00490 } -00491 } -00492 } -00493 -00494 if (usChar < 0x80) { -00495 /* US ASCII */ -00496 if (usChar < 0x20 || usChar == 0x7f) { -00497 /* Ignore control characters */ -00498 DBG_HEX(usChar); -00499 DBG_FIXME(); -00500 return IGNORE_CHARACTER; -00501 } -00502 return (ULONG)usChar; -00503 } -00504 -00505 if (eEncoding == encoding_utf_8) { -00506 /* No need to convert Unicode characters */ -00507 return (ULONG)usChar; -00508 } -00509 -00510 /* Unicode to local representation */ -00511 pTmp = pGetCharTableRecord(usChar); -00512 if (pTmp != NULL) { -00513 DBG_HEX_C(usChar >= 0x7f && usChar <= 0x9f, usChar); -00514 return (ULONG)pTmp->ucLocal; -00515 } -00516 -00517 /* Fancy characters to simple US ASCII */ -00518 switch (usChar) { -00519 case UNICODE_SMALL_F_HOOK: -00520 return (ULONG)'f'; -00521 case UNICODE_GREEK_CAPITAL_CHI: -00522 return (ULONG)'X'; -00523 case UNICODE_GREEK_SMALL_UPSILON: -00524 return (ULONG)'v'; -00525 case UNICODE_MODIFIER_CIRCUMFLEX: -00526 case UNICODE_UPWARDS_ARROW: -00527 return (ULONG)'^'; -00528 case UNICODE_SMALL_TILDE: -00529 case UNICODE_TILDE_OPERATOR: -00530 return (ULONG)'~'; -00531 case UNICODE_EN_QUAD: -00532 case UNICODE_EM_QUAD: -00533 case UNICODE_EN_SPACE: -00534 case UNICODE_EM_SPACE: -00535 case UNICODE_THREE_PER_EM_SPACE: -00536 case UNICODE_FOUR_PER_EM_SPACE: -00537 case UNICODE_SIX_PER_EM_SPACE: -00538 case UNICODE_FIGURE_SPACE: -00539 case UNICODE_PUNCTUATION_SPACE: -00540 case UNICODE_THIN_SPACE: -00541 case UNICODE_NARROW_NO_BREAK_SPACE: -00542 case UNICODE_LIGHT_SHADE: -00543 case UNICODE_MEDIUM_SHADE: -00544 case UNICODE_DARK_SHADE: -00545 return (ULONG)' '; -00546 case UNICODE_LEFT_DOUBLE_QMARK: -00547 case UNICODE_RIGHT_DOUBLE_QMARK: -00548 case UNICODE_DOUBLE_LOW_9_QMARK: -00549 case UNICODE_DOUBLE_HIGH_REV_9_QMARK: -00550 case UNICODE_DOUBLE_PRIME: -00551 return (ULONG)'"'; -00552 case UNICODE_LEFT_SINGLE_QMARK: -00553 case UNICODE_RIGHT_SINGLE_QMARK: -00554 case UNICODE_SINGLE_LOW_9_QMARK: -00555 case UNICODE_SINGLE_HIGH_REV_9_QMARK: -00556 case UNICODE_PRIME: -00557 return (ULONG)'\''; -00558 case UNICODE_HYPHEN: -00559 case UNICODE_NON_BREAKING_HYPHEN: -00560 case UNICODE_FIGURE_DASH: -00561 case UNICODE_EN_DASH: -00562 case UNICODE_EM_DASH: -00563 case UNICODE_HORIZONTAL_BAR: -00564 case UNICODE_MINUS_SIGN: -00565 case UNICODE_BD_LIGHT_HORIZONTAL: -00566 case UNICODE_BD_DOUBLE_HORIZONTAL: -00567 return (ULONG)'-'; -00568 case UNICODE_DOUBLE_VERTICAL_LINE: -00569 case UNICODE_BD_LIGHT_VERTICAL: -00570 case UNICODE_BD_DOUBLE_VERTICAL: -00571 return (ULONG)'|'; -00572 case UNICODE_DOUBLE_LOW_LINE: -00573 return (ULONG)'_'; -00574 case UNICODE_DAGGER: -00575 return (ULONG)'+'; -00576 case UNICODE_DOUBLE_DAGGER: -00577 return (ULONG)'#'; -00578 case UNICODE_BULLET: -00579 case UNICODE_BULLET_OPERATOR: -00580 case UNICODE_BLACK_CLUB_SUIT: -00581 return (ULONG)ucGetBulletCharacter(eConversionType, eEncoding); -00582 case UNICODE_ONE_DOT_LEADER: -00583 case UNICODE_TWO_DOT_LEADER: -00584 return (ULONG)'.'; -00585 case UNICODE_ELLIPSIS: -00586 #if defined(__riscos) -00587 return (ULONG)OUR_ELLIPSIS; -00588 #else -00589 if (ulFileOffset == 0) { -00590 return (ULONG)OUR_ELLIPSIS; -00591 } -00592 return UNICODE_ELLIPSIS; -00593 #endif /* __riscos */ -00594 case UNICODE_DOUBLE_LEFT_ANGLE_QMARK: -00595 case UNICODE_TRIANGULAR_BULLET: -00596 case UNICODE_SINGLE_LEFT_ANGLE_QMARK: -00597 case UNICODE_LEFTWARDS_ARROW: -00598 return (ULONG)'<'; -00599 case UNICODE_DOUBLE_RIGHT_ANGLE_QMARK: -00600 case UNICODE_SINGLE_RIGHT_ANGLE_QMARK: -00601 case UNICODE_RIGHTWARDS_ARROW: -00602 return (ULONG)'>'; -00603 case UNICODE_UNDERTIE: -00604 return (ULONG)'-'; -00605 case UNICODE_N_ARY_SUMMATION: -00606 return (ULONG)'S'; -00607 case UNICODE_EURO_SIGN: -00608 return (ULONG)'E'; -00609 case UNICODE_CIRCLE: -00610 case UNICODE_SQUARE: -00611 return (ULONG)'O'; -00612 case UNICODE_DIAMOND: -00613 return (ULONG)OUR_DIAMOND; -00614 case UNICODE_NUMERO_SIGN: -00615 return (ULONG)'N'; -00616 case UNICODE_KELVIN_SIGN: -00617 return (ULONG)'K'; -00618 case UNICODE_DOWNWARDS_ARROW: -00619 return (ULONG)'v'; -00620 case UNICODE_FRACTION_SLASH: -00621 case UNICODE_DIVISION_SLASH: -00622 return (ULONG)'/'; -00623 case UNICODE_ASTERISK_OPERATOR: -00624 return (ULONG)'*'; -00625 case UNICODE_RATIO: -00626 return (ULONG)':'; -00627 case UNICODE_BD_LIGHT_DOWN_RIGHT: -00628 case UNICODE_BD_LIGHT_DOWN_AND_LEFT: -00629 case UNICODE_BD_LIGHT_UP_AND_RIGHT: -00630 case UNICODE_BD_LIGHT_UP_AND_LEFT: -00631 case UNICODE_BD_LIGHT_VERTICAL_AND_RIGHT: -00632 case UNICODE_BD_LIGHT_VERTICAL_AND_LEFT: -00633 case UNICODE_BD_LIGHT_DOWN_AND_HORIZONTAL: -00634 case UNICODE_BD_LIGHT_UP_AND_HORIZONTAL: -00635 case UNICODE_BD_LIGHT_VERTICAL_AND_HORIZONTAL: -00636 case UNICODE_BD_DOUBLE_DOWN_AND_RIGHT: -00637 case UNICODE_BD_DOUBLE_DOWN_AND_LEFT: -00638 case UNICODE_BD_DOUBLE_UP_AND_RIGHT: -00639 case UNICODE_BD_DOUBLE_UP_AND_LEFT: -00640 case UNICODE_BD_DOUBLE_VERTICAL_AND_RIGHT: -00641 case UNICODE_BD_DOUBLE_VERTICAL_AND_LEFT: -00642 case UNICODE_BD_DOUBLE_DOWN_AND_HORIZONTAL: -00643 case UNICODE_BD_DOUBLE_UP_AND_HORIZONTAL: -00644 case UNICODE_BD_DOUBLE_VERTICAL_AND_HORIZONTAL: -00645 case UNICODE_BLACK_SQUARE: -00646 return (ULONG)'+'; -00647 case UNICODE_HAIR_SPACE: -00648 case UNICODE_ZERO_WIDTH_SPACE: -00649 case UNICODE_ZERO_WIDTH_NON_JOINER: -00650 case UNICODE_ZERO_WIDTH_JOINER: -00651 case UNICODE_LEFT_TO_RIGHT_MARK: -00652 case UNICODE_RIGHT_TO_LEFT_MARK: -00653 case UNICODE_LEFT_TO_RIGHT_EMBEDDING: -00654 case UNICODE_RIGHT_TO_LEFT_EMBEDDING: -00655 case UNICODE_POP_DIRECTIONAL_FORMATTING: -00656 case UNICODE_LEFT_TO_RIGHT_OVERRIDE: -00657 case UNICODE_RIGHT_TO_LEFT_OVERRIDE: -00658 case UNICODE_ZERO_WIDTH_NO_BREAK_SPACE: -00659 return IGNORE_CHARACTER; -00660 default: -00661 break; -00662 } -00663 -00664 if (usChar == UNICODE_TRADEMARK_SIGN) { -00665 /* -00666 * No local representation, it doesn't look like anything in -00667 * US-ASCII and a question mark does more harm than good. -00668 */ -00669 return IGNORE_CHARACTER; -00670 } -00671 -00672 if (usChar >= 0xa0 && usChar <= 0xff) { -00673 /* Before Word 97, Word did't use Unicode */ -00674 return (ULONG)usChar; -00675 } -00676 -00677 DBG_HEX_C(usChar < 0x3000 || usChar >= 0xd800, ulFileOffset); -00678 DBG_HEX_C(usChar < 0x3000 || usChar >= 0xd800, usChar); -00679 DBG_MSG_C(usChar >= 0xe000 && usChar < 0xf900, "Private Use Area"); -00680 -00681 /* Untranslated Unicode character */ -00682 return 0x3f; -00683 } /* end of ulTranslateCharacters */ -00684 -00685 /* -00686 * ulToUpper - convert letter to upper case -00687 * -00688 * This function converts a letter to upper case. Unlike toupper(3) this -00689 * function is independent from the settings of locale. This comes in handy -00690 * for people who have to read Word documents in more than one language or -00691 * contain more than one language. -00692 * -00693 * returns the converted letter, or ulChar if the conversion was not possible. -00694 */ -00695 ULONG -00696 ulToUpper(ULONG ulChar) -00697 { -00698 if (ulChar < 0x80) { -00699 /* US ASCII: use standard function */ -00700 return (ULONG)toupper((int)ulChar); -00701 } -00702 if (ulChar >= 0xe0 && ulChar <= 0xfe && ulChar != 0xf7) { -00703 /* -00704 * Lower case accented characters -00705 * 0xf7 is Division sign; 0xd7 is Multiplication sign -00706 * 0xff is y with diaeresis; 0xdf is Sharp s -00707 */ -00708 return ulChar & ~0x20; -00709 } -00710 #if defined(__STDC_ISO_10646__) -00711 /* -00712 * If this is ISO C99 and all locales have wchar_t = ISO 10646 -00713 * (e.g., glibc 2.2 or newer), then use standard function -00714 */ -00715 if (ulChar > 0xff) { -00716 return (ULONG)towupper((wint_t)ulChar); -00717 } -00718 #endif /* __STDC_ISO_10646__ */ -00719 return ulChar; -00720 } /* end of ulToUpper */ -