diff -r f345bda72bc4 -r 43e37759235e Symbian3/Examples/guid-6013a680-57f9-415b-8851-c4fa63356636/wordlib_8c_source.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Symbian3/Examples/guid-6013a680-57f9-415b-8851-c4fa63356636/wordlib_8c_source.html Tue Mar 30 16:16:55 2010 +0100 @@ -0,0 +1,375 @@ + + +
+ +00001 /* +00002 * wordlib.c +00003 * Copyright (C) 1998-2004 A.J. van Os; Released under GNU GPL +00004 * +00005 * Description: +00006 * Deal with the internals of a MS Word file +00007 */ +00008 +00009 #include "antiword.h" +00010 +00011 static BOOL bOldMacFile = FALSE; +00012 +00013 +00014 /* +00015 * Common part of the file checking functions +00016 */ +00017 static BOOL +00018 bCheckBytes(FILE *pFile, const UCHAR *aucBytes, size_t tBytes) +00019 { +00020 int iIndex, iChar; +00021 +00022 fail(pFile == NULL || aucBytes == NULL || tBytes == 0); +00023 +00024 rewind(pFile); +00025 +00026 for (iIndex = 0; iIndex < (int)tBytes; iIndex++) { +00027 iChar = getc(pFile); +00028 if (iChar == EOF || iChar != (int)aucBytes[iIndex]) { +00029 NO_DBG_HEX(iChar); +00030 NO_DBG_HEX(aucBytes[iIndex]); +00031 return FALSE; +00032 } +00033 } +00034 return TRUE; +00035 } /* end of bCheckBytes */ +00036 +00037 /* +00038 * This function checks whether the given file is or is not a "Word for DOS" +00039 * document +00040 */ +00041 BOOL +00042 bIsWordForDosFile(FILE *pFile, long lFilesize) +00043 { +00044 static UCHAR aucBytes[] = +00045 { 0x31, 0xbe, 0x00, 0x00, 0x00, 0xab }; /* Word for DOS */ +00046 +00047 DBG_MSG("bIsWordForDosFile"); +00048 +00049 if (pFile == NULL || lFilesize < 0) { +00050 DBG_MSG("No proper file given"); +00051 return FALSE; +00052 } +00053 if (lFilesize < 128) { +00054 DBG_MSG("File too small to be a Word document"); +00055 return FALSE; +00056 } +00057 return bCheckBytes(pFile, aucBytes, elementsof(aucBytes)); +00058 } /* end of bIsWordForDosFile */ +00059 +00060 /* +00061 * This function checks whether the given file is or is not a file with an +00062 * OLE envelope (That is a document made by Word 6 or later) +00063 */ +00064 static BOOL +00065 bIsWordFileWithOLE(FILE *pFile, long lFilesize) +00066 { +00067 static UCHAR aucBytes[] = +00068 { 0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1 }; +00069 int iTailLen; +00070 +00071 if (pFile == NULL || lFilesize < 0) { +00072 DBG_MSG("No proper file given"); +00073 return FALSE; +00074 } +00075 if (lFilesize < (long)BIG_BLOCK_SIZE * 3) { +00076 DBG_MSG("This file is too small to be a Word document"); +00077 return FALSE; +00078 } +00079 +00080 iTailLen = (int)(lFilesize % BIG_BLOCK_SIZE); +00081 switch (iTailLen) { +00082 case 0: /* No tail, as it should be */ +00083 break; +00084 case 1: +00085 case 2: /* Filesize mismatch or a buggy email program */ +00086 if ((int)(lFilesize % 3) == iTailLen) { +00087 DBG_DEC(lFilesize); +00088 return FALSE; +00089 } +00090 /* +00091 * Ignore extra bytes caused by buggy email programs. +00092 * They have bugs in their base64 encoding or decoding. +00093 * 3 bytes -> 4 ascii chars -> 3 bytes +00094 */ +00095 DBG_MSG("Document with extra bytes"); +00096 break; +00097 default: /* Wrong filesize for a Word document */ +00098 DBG_DEC(lFilesize); +00099 DBG_DEC(iTailLen); +00100 return FALSE; +00101 } +00102 return bCheckBytes(pFile, aucBytes, elementsof(aucBytes)); +00103 } /* end of bIsWordFileWithOLE */ +00104 +00105 /* +00106 * This function checks whether the given file is or is not a RTF document +00107 */ +00108 BOOL +00109 bIsRtfFile(FILE *pFile) +00110 { +00111 static UCHAR aucBytes[] = +00112 { '{', '\\', 'r', 't', 'f', '1' }; +00113 +00114 DBG_MSG("bIsRtfFile"); +00115 +00116 return bCheckBytes(pFile, aucBytes, elementsof(aucBytes)); +00117 } /* end of bIsRtfFile */ +00118 +00119 /* +00120 * This function checks whether the given file is or is not a WP document +00121 */ +00122 BOOL +00123 bIsWordPerfectFile(FILE *pFile) +00124 { +00125 static UCHAR aucBytes[] = +00126 { 0xff, 'W', 'P', 'C' }; +00127 +00128 DBG_MSG("bIsWordPerfectFile"); +00129 +00130 return bCheckBytes(pFile, aucBytes, elementsof(aucBytes)); +00131 } /* end of bIsWordPerfectFile */ +00132 +00133 /* +00134 * This function checks whether the given file is or is not a "Win Word 1 or 2" +00135 * document +00136 */ +00137 BOOL +00138 bIsWinWord12File(FILE *pFile, long lFilesize) +00139 { +00140 static UCHAR aucBytes[2][4] = { +00141 { 0x9b, 0xa5, 0x21, 0x00 }, /* Win Word 1.x */ +00142 { 0xdb, 0xa5, 0x2d, 0x00 }, /* Win Word 2.0 */ +00143 }; +00144 int iIndex; +00145 +00146 DBG_MSG("bIsWinWord12File"); +00147 +00148 if (pFile == NULL || lFilesize < 0) { +00149 DBG_MSG("No proper file given"); +00150 return FALSE; +00151 } +00152 if (lFilesize < 384) { +00153 DBG_MSG("This file is too small to be a Word document"); +00154 return FALSE; +00155 } +00156 +00157 for (iIndex = 0; iIndex < (int)elementsof(aucBytes); iIndex++) { +00158 if (bCheckBytes(pFile, +00159 aucBytes[iIndex], +00160 elementsof(aucBytes[iIndex]))) { +00161 return TRUE; +00162 } +00163 } +00164 return FALSE; +00165 } /* end of bIsWinWord12File */ +00166 +00167 /* +00168 * This function checks whether the given file is or is not a "Mac Word 4 or 5" +00169 * document +00170 */ +00171 BOOL +00172 bIsMacWord45File(FILE *pFile) +00173 { +00174 static UCHAR aucBytes[2][6] = { +00175 { 0xfe, 0x37, 0x00, 0x1c, 0x00, 0x00 }, /* Mac Word 4 */ +00176 { 0xfe, 0x37, 0x00, 0x23, 0x00, 0x00 }, /* Mac Word 5 */ +00177 }; +00178 int iIndex; +00179 +00180 DBG_MSG("bIsMacWord45File"); +00181 +00182 for (iIndex = 0; iIndex < (int)elementsof(aucBytes); iIndex++) { +00183 if (bCheckBytes(pFile, +00184 aucBytes[iIndex], +00185 elementsof(aucBytes[iIndex]))) { +00186 return TRUE; +00187 } +00188 } +00189 return FALSE; +00190 } /* end of bIsMacWord45File */ +00191 +00192 /* +00193 * iGuessVersionNumber - guess the Word version number from first few bytes +00194 * +00195 * Returns the guessed version number or -1 when no guess it possible +00196 */ +00197 int +00198 iGuessVersionNumber(FILE *pFile, long lFilesize) +00199 { +00200 if(bIsWordForDosFile(pFile, lFilesize)) { +00201 return 0; +00202 } +00203 if (bIsWinWord12File(pFile, lFilesize)) { +00204 return 2; +00205 } +00206 if (bIsMacWord45File(pFile)) { +00207 return 5; +00208 } +00209 if (bIsWordFileWithOLE(pFile, lFilesize)) { +00210 return 6; +00211 } +00212 return -1; +00213 } /* end of iGuessVersionNumber */ +00214 +00215 /* +00216 * iGetVersionNumber - get the Word version number from the header +00217 * +00218 * Returns the version number or -1 when unknown +00219 */ +00220 int +00221 iGetVersionNumber(const UCHAR *aucHeader) +00222 { +00223 USHORT usFib, usChse; +00224 +00225 usFib = usGetWord(0x02, aucHeader); +00226 if (usFib >= 0x1000) { +00227 /* To big: must be MacWord using Big Endian */ +00228 DBG_HEX(usFib); +00229 usFib = usGetWordBE(0x02, aucHeader); +00230 } +00231 DBG_DEC(usFib); +00232 bOldMacFile = FALSE; +00233 switch (usFib) { +00234 case 0: +00235 DBG_MSG("Word for DOS"); +00236 return 0; +00237 case 28: +00238 DBG_MSG("Word 4 for Macintosh"); +00239 bOldMacFile = TRUE; +00240 return 4; +00241 case 33: +00242 DBG_MSG("Word 1.x for Windows"); +00243 return 1; +00244 case 35: +00245 DBG_MSG("Word 5 for Macintosh"); +00246 bOldMacFile = TRUE; +00247 return 5; +00248 case 45: +00249 DBG_MSG("Word 2 for Windows"); +00250 return 2; +00251 case 101: +00252 case 102: +00253 DBG_MSG("Word 6 for Windows"); +00254 return 6; +00255 case 103: +00256 case 104: +00257 usChse = usGetWord(0x14, aucHeader); +00258 DBG_DEC(usChse); +00259 switch (usChse) { +00260 case 0: +00261 DBG_MSG("Word 7 for Win95"); +00262 return 7; +00263 case 256: +00264 DBG_MSG("Word 6 for Macintosh"); +00265 bOldMacFile = TRUE; +00266 return 6; +00267 default: +00268 DBG_FIXME(); +00269 if ((int)ucGetByte(0x05, aucHeader) == 0xe0) { +00270 DBG_MSG("Word 7 for Win95"); +00271 return 7; +00272 } +00273 DBG_MSG("Word 6 for Macintosh"); +00274 bOldMacFile = TRUE; +00275 return 6; +00276 } +00277 default: +00278 usChse = usGetWord(0x14, aucHeader); +00279 DBG_DEC(usChse); +00280 if (usFib < 192) { +00281 /* Unknown or unsupported version of Word */ +00282 DBG_DEC(usFib); +00283 return -1; +00284 } +00285 DBG_MSG_C(usChse != 256, "Word97 for Win95/98/NT"); +00286 DBG_MSG_C(usChse == 256, "Word98 for Macintosh"); +00287 return 8; +00288 } +00289 } /* end of iGetVersionNumber */ +00290 +00291 /* +00292 * TRUE if the current file was made by Word version 6 or older on an +00293 * Apple Macintosh, otherwise FALSE. +00294 * This function hides the methode of how to find out from the rest of the +00295 * program. +00296 */ +00297 BOOL +00298 bIsOldMacFile(void) +00299 { +00300 return bOldMacFile; +00301 } /* end of bIsOldMacFile */ +00302 +00303 /* +00304 * iInitDocument - initialize a document +00305 * +00306 * Returns the version of Word that made the document or -1 +00307 */ +00308 int +00309 iInitDocument(FILE *pFile, long lFilesize) +00310 { +00311 int iGuess, iWordVersion; +00312 +00313 iGuess = iGuessVersionNumber(pFile, lFilesize); +00314 switch (iGuess) { +00315 case 0: +00316 iWordVersion = iInitDocumentDOS(pFile, lFilesize); +00317 break; +00318 case 2: +00319 iWordVersion = iInitDocumentWIN(pFile, lFilesize); +00320 break; +00321 case 5: +00322 iWordVersion = iInitDocumentMAC(pFile, lFilesize); +00323 break; +00324 case 6: +00325 iWordVersion = iInitDocumentOLE(pFile, lFilesize); +00326 break; +00327 default: +00328 DBG_DEC(iGuess); +00329 iWordVersion = -1; +00330 break; +00331 } +00332 return iWordVersion; +00333 } /* end of iInitDocument */ +00334 +00335 /* +00336 * vFreeDocument - free a document by free-ing its parts +00337 */ +00338 void +00339 vFreeDocument(void) +00340 { +00341 DBG_MSG("vFreeDocument"); +00342 +00343 /* Free the memory */ +00344 vDestroyTextBlockList(); +00345 vDestroyDataBlockList(); +00346 vDestroyListInfoList(); +00347 vDestroyRowInfoList(); +00348 vDestroyStyleInfoList(); +00349 vDestroyFontInfoList(); +00350 vDestroyStylesheetList(); +00351 vDestroyPictInfoList(); +00352 vDestroyDocumentInfoList(); +00353 vDestroySectionInfoList(); +00354 vDestroyHdrFtrInfoList(); +00355 vDestroyPropModList(); +00356 vDestroyNotesInfoLists(); +00357 vDestroyFontTable(); +00358 vDestroySummaryInfo(); +00359 } /* end of vFreeDocument */ +