diff -r 59758314f811 -r d4524d6a4472 Symbian3/Examples/guid-6013a680-57f9-415b-8851-c4fa63356636/wordlib_8c_source.html --- a/Symbian3/Examples/guid-6013a680-57f9-415b-8851-c4fa63356636/wordlib_8c_source.html Fri Jun 11 15:24:34 2010 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,375 +0,0 @@ - - -
- -00001 /* -00002 * wordlib.c -00003 * Copyright (C) 1998-2004 A.J. van Os; Released under GNU GPL -00004 * -00005 * Description: -00006 * Deal with the internals of a MS Word file -00007 */ -00008 -00009 #include "antiword.h" -00010 -00011 static BOOL bOldMacFile = FALSE; -00012 -00013 -00014 /* -00015 * Common part of the file checking functions -00016 */ -00017 static BOOL -00018 bCheckBytes(FILE *pFile, const UCHAR *aucBytes, size_t tBytes) -00019 { -00020 int iIndex, iChar; -00021 -00022 fail(pFile == NULL || aucBytes == NULL || tBytes == 0); -00023 -00024 rewind(pFile); -00025 -00026 for (iIndex = 0; iIndex < (int)tBytes; iIndex++) { -00027 iChar = getc(pFile); -00028 if (iChar == EOF || iChar != (int)aucBytes[iIndex]) { -00029 NO_DBG_HEX(iChar); -00030 NO_DBG_HEX(aucBytes[iIndex]); -00031 return FALSE; -00032 } -00033 } -00034 return TRUE; -00035 } /* end of bCheckBytes */ -00036 -00037 /* -00038 * This function checks whether the given file is or is not a "Word for DOS" -00039 * document -00040 */ -00041 BOOL -00042 bIsWordForDosFile(FILE *pFile, long lFilesize) -00043 { -00044 static UCHAR aucBytes[] = -00045 { 0x31, 0xbe, 0x00, 0x00, 0x00, 0xab }; /* Word for DOS */ -00046 -00047 DBG_MSG("bIsWordForDosFile"); -00048 -00049 if (pFile == NULL || lFilesize < 0) { -00050 DBG_MSG("No proper file given"); -00051 return FALSE; -00052 } -00053 if (lFilesize < 128) { -00054 DBG_MSG("File too small to be a Word document"); -00055 return FALSE; -00056 } -00057 return bCheckBytes(pFile, aucBytes, elementsof(aucBytes)); -00058 } /* end of bIsWordForDosFile */ -00059 -00060 /* -00061 * This function checks whether the given file is or is not a file with an -00062 * OLE envelope (That is a document made by Word 6 or later) -00063 */ -00064 static BOOL -00065 bIsWordFileWithOLE(FILE *pFile, long lFilesize) -00066 { -00067 static UCHAR aucBytes[] = -00068 { 0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1 }; -00069 int iTailLen; -00070 -00071 if (pFile == NULL || lFilesize < 0) { -00072 DBG_MSG("No proper file given"); -00073 return FALSE; -00074 } -00075 if (lFilesize < (long)BIG_BLOCK_SIZE * 3) { -00076 DBG_MSG("This file is too small to be a Word document"); -00077 return FALSE; -00078 } -00079 -00080 iTailLen = (int)(lFilesize % BIG_BLOCK_SIZE); -00081 switch (iTailLen) { -00082 case 0: /* No tail, as it should be */ -00083 break; -00084 case 1: -00085 case 2: /* Filesize mismatch or a buggy email program */ -00086 if ((int)(lFilesize % 3) == iTailLen) { -00087 DBG_DEC(lFilesize); -00088 return FALSE; -00089 } -00090 /* -00091 * Ignore extra bytes caused by buggy email programs. -00092 * They have bugs in their base64 encoding or decoding. -00093 * 3 bytes -> 4 ascii chars -> 3 bytes -00094 */ -00095 DBG_MSG("Document with extra bytes"); -00096 break; -00097 default: /* Wrong filesize for a Word document */ -00098 DBG_DEC(lFilesize); -00099 DBG_DEC(iTailLen); -00100 return FALSE; -00101 } -00102 return bCheckBytes(pFile, aucBytes, elementsof(aucBytes)); -00103 } /* end of bIsWordFileWithOLE */ -00104 -00105 /* -00106 * This function checks whether the given file is or is not a RTF document -00107 */ -00108 BOOL -00109 bIsRtfFile(FILE *pFile) -00110 { -00111 static UCHAR aucBytes[] = -00112 { '{', '\\', 'r', 't', 'f', '1' }; -00113 -00114 DBG_MSG("bIsRtfFile"); -00115 -00116 return bCheckBytes(pFile, aucBytes, elementsof(aucBytes)); -00117 } /* end of bIsRtfFile */ -00118 -00119 /* -00120 * This function checks whether the given file is or is not a WP document -00121 */ -00122 BOOL -00123 bIsWordPerfectFile(FILE *pFile) -00124 { -00125 static UCHAR aucBytes[] = -00126 { 0xff, 'W', 'P', 'C' }; -00127 -00128 DBG_MSG("bIsWordPerfectFile"); -00129 -00130 return bCheckBytes(pFile, aucBytes, elementsof(aucBytes)); -00131 } /* end of bIsWordPerfectFile */ -00132 -00133 /* -00134 * This function checks whether the given file is or is not a "Win Word 1 or 2" -00135 * document -00136 */ -00137 BOOL -00138 bIsWinWord12File(FILE *pFile, long lFilesize) -00139 { -00140 static UCHAR aucBytes[2][4] = { -00141 { 0x9b, 0xa5, 0x21, 0x00 }, /* Win Word 1.x */ -00142 { 0xdb, 0xa5, 0x2d, 0x00 }, /* Win Word 2.0 */ -00143 }; -00144 int iIndex; -00145 -00146 DBG_MSG("bIsWinWord12File"); -00147 -00148 if (pFile == NULL || lFilesize < 0) { -00149 DBG_MSG("No proper file given"); -00150 return FALSE; -00151 } -00152 if (lFilesize < 384) { -00153 DBG_MSG("This file is too small to be a Word document"); -00154 return FALSE; -00155 } -00156 -00157 for (iIndex = 0; iIndex < (int)elementsof(aucBytes); iIndex++) { -00158 if (bCheckBytes(pFile, -00159 aucBytes[iIndex], -00160 elementsof(aucBytes[iIndex]))) { -00161 return TRUE; -00162 } -00163 } -00164 return FALSE; -00165 } /* end of bIsWinWord12File */ -00166 -00167 /* -00168 * This function checks whether the given file is or is not a "Mac Word 4 or 5" -00169 * document -00170 */ -00171 BOOL -00172 bIsMacWord45File(FILE *pFile) -00173 { -00174 static UCHAR aucBytes[2][6] = { -00175 { 0xfe, 0x37, 0x00, 0x1c, 0x00, 0x00 }, /* Mac Word 4 */ -00176 { 0xfe, 0x37, 0x00, 0x23, 0x00, 0x00 }, /* Mac Word 5 */ -00177 }; -00178 int iIndex; -00179 -00180 DBG_MSG("bIsMacWord45File"); -00181 -00182 for (iIndex = 0; iIndex < (int)elementsof(aucBytes); iIndex++) { -00183 if (bCheckBytes(pFile, -00184 aucBytes[iIndex], -00185 elementsof(aucBytes[iIndex]))) { -00186 return TRUE; -00187 } -00188 } -00189 return FALSE; -00190 } /* end of bIsMacWord45File */ -00191 -00192 /* -00193 * iGuessVersionNumber - guess the Word version number from first few bytes -00194 * -00195 * Returns the guessed version number or -1 when no guess it possible -00196 */ -00197 int -00198 iGuessVersionNumber(FILE *pFile, long lFilesize) -00199 { -00200 if(bIsWordForDosFile(pFile, lFilesize)) { -00201 return 0; -00202 } -00203 if (bIsWinWord12File(pFile, lFilesize)) { -00204 return 2; -00205 } -00206 if (bIsMacWord45File(pFile)) { -00207 return 5; -00208 } -00209 if (bIsWordFileWithOLE(pFile, lFilesize)) { -00210 return 6; -00211 } -00212 return -1; -00213 } /* end of iGuessVersionNumber */ -00214 -00215 /* -00216 * iGetVersionNumber - get the Word version number from the header -00217 * -00218 * Returns the version number or -1 when unknown -00219 */ -00220 int -00221 iGetVersionNumber(const UCHAR *aucHeader) -00222 { -00223 USHORT usFib, usChse; -00224 -00225 usFib = usGetWord(0x02, aucHeader); -00226 if (usFib >= 0x1000) { -00227 /* To big: must be MacWord using Big Endian */ -00228 DBG_HEX(usFib); -00229 usFib = usGetWordBE(0x02, aucHeader); -00230 } -00231 DBG_DEC(usFib); -00232 bOldMacFile = FALSE; -00233 switch (usFib) { -00234 case 0: -00235 DBG_MSG("Word for DOS"); -00236 return 0; -00237 case 28: -00238 DBG_MSG("Word 4 for Macintosh"); -00239 bOldMacFile = TRUE; -00240 return 4; -00241 case 33: -00242 DBG_MSG("Word 1.x for Windows"); -00243 return 1; -00244 case 35: -00245 DBG_MSG("Word 5 for Macintosh"); -00246 bOldMacFile = TRUE; -00247 return 5; -00248 case 45: -00249 DBG_MSG("Word 2 for Windows"); -00250 return 2; -00251 case 101: -00252 case 102: -00253 DBG_MSG("Word 6 for Windows"); -00254 return 6; -00255 case 103: -00256 case 104: -00257 usChse = usGetWord(0x14, aucHeader); -00258 DBG_DEC(usChse); -00259 switch (usChse) { -00260 case 0: -00261 DBG_MSG("Word 7 for Win95"); -00262 return 7; -00263 case 256: -00264 DBG_MSG("Word 6 for Macintosh"); -00265 bOldMacFile = TRUE; -00266 return 6; -00267 default: -00268 DBG_FIXME(); -00269 if ((int)ucGetByte(0x05, aucHeader) == 0xe0) { -00270 DBG_MSG("Word 7 for Win95"); -00271 return 7; -00272 } -00273 DBG_MSG("Word 6 for Macintosh"); -00274 bOldMacFile = TRUE; -00275 return 6; -00276 } -00277 default: -00278 usChse = usGetWord(0x14, aucHeader); -00279 DBG_DEC(usChse); -00280 if (usFib < 192) { -00281 /* Unknown or unsupported version of Word */ -00282 DBG_DEC(usFib); -00283 return -1; -00284 } -00285 DBG_MSG_C(usChse != 256, "Word97 for Win95/98/NT"); -00286 DBG_MSG_C(usChse == 256, "Word98 for Macintosh"); -00287 return 8; -00288 } -00289 } /* end of iGetVersionNumber */ -00290 -00291 /* -00292 * TRUE if the current file was made by Word version 6 or older on an -00293 * Apple Macintosh, otherwise FALSE. -00294 * This function hides the methode of how to find out from the rest of the -00295 * program. -00296 */ -00297 BOOL -00298 bIsOldMacFile(void) -00299 { -00300 return bOldMacFile; -00301 } /* end of bIsOldMacFile */ -00302 -00303 /* -00304 * iInitDocument - initialize a document -00305 * -00306 * Returns the version of Word that made the document or -1 -00307 */ -00308 int -00309 iInitDocument(FILE *pFile, long lFilesize) -00310 { -00311 int iGuess, iWordVersion; -00312 -00313 iGuess = iGuessVersionNumber(pFile, lFilesize); -00314 switch (iGuess) { -00315 case 0: -00316 iWordVersion = iInitDocumentDOS(pFile, lFilesize); -00317 break; -00318 case 2: -00319 iWordVersion = iInitDocumentWIN(pFile, lFilesize); -00320 break; -00321 case 5: -00322 iWordVersion = iInitDocumentMAC(pFile, lFilesize); -00323 break; -00324 case 6: -00325 iWordVersion = iInitDocumentOLE(pFile, lFilesize); -00326 break; -00327 default: -00328 DBG_DEC(iGuess); -00329 iWordVersion = -1; -00330 break; -00331 } -00332 return iWordVersion; -00333 } /* end of iInitDocument */ -00334 -00335 /* -00336 * vFreeDocument - free a document by free-ing its parts -00337 */ -00338 void -00339 vFreeDocument(void) -00340 { -00341 DBG_MSG("vFreeDocument"); -00342 -00343 /* Free the memory */ -00344 vDestroyTextBlockList(); -00345 vDestroyDataBlockList(); -00346 vDestroyListInfoList(); -00347 vDestroyRowInfoList(); -00348 vDestroyStyleInfoList(); -00349 vDestroyFontInfoList(); -00350 vDestroyStylesheetList(); -00351 vDestroyPictInfoList(); -00352 vDestroyDocumentInfoList(); -00353 vDestroySectionInfoList(); -00354 vDestroyHdrFtrInfoList(); -00355 vDestroyPropModList(); -00356 vDestroyNotesInfoLists(); -00357 vDestroyFontTable(); -00358 vDestroySummaryInfo(); -00359 } /* end of vFreeDocument */ -