diff -r 59758314f811 -r d4524d6a4472 Symbian3/Examples/guid-6013a680-57f9-415b-8851-c4fa63356636/wordlib_8c_source.html --- a/Symbian3/Examples/guid-6013a680-57f9-415b-8851-c4fa63356636/wordlib_8c_source.html Fri Jun 11 15:24:34 2010 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,375 +0,0 @@ - - - - -TB9.2 Example Applications: examples/PIPS/antiword/src/wordlib.c Source File - - - - - -

examples/PIPS/antiword/src/wordlib.c

00001 /*
-00002  * wordlib.c
-00003  * Copyright (C) 1998-2004 A.J. van Os; Released under GNU GPL
-00004  *
-00005  * Description:
-00006  * Deal with the internals of a MS Word file
-00007  */
-00008 
-00009 #include "antiword.h"
-00010 
-00011 static BOOL     bOldMacFile = FALSE;
-00012 
-00013 
-00014 /*
-00015  * Common part of the file checking functions
-00016  */
-00017 static BOOL
-00018 bCheckBytes(FILE *pFile, const UCHAR *aucBytes, size_t tBytes)
-00019 {
-00020         int     iIndex, iChar;
-00021 
-00022         fail(pFile == NULL || aucBytes == NULL || tBytes == 0);
-00023 
-00024         rewind(pFile);
-00025 
-00026         for (iIndex = 0; iIndex < (int)tBytes; iIndex++) {
-00027                 iChar = getc(pFile);
-00028                 if (iChar == EOF || iChar != (int)aucBytes[iIndex]) {
-00029                         NO_DBG_HEX(iChar);
-00030                         NO_DBG_HEX(aucBytes[iIndex]);
-00031                         return FALSE;
-00032                 }
-00033         }
-00034         return TRUE;
-00035 } /* end of bCheckBytes */
-00036 
-00037 /*
-00038  * This function checks whether the given file is or is not a "Word for DOS"
-00039  * document
-00040  */
-00041 BOOL
-00042 bIsWordForDosFile(FILE *pFile, long lFilesize)
-00043 {
-00044         static UCHAR    aucBytes[] =
-00045                 { 0x31, 0xbe, 0x00, 0x00, 0x00, 0xab }; /* Word for DOS */
-00046 
-00047         DBG_MSG("bIsWordForDosFile");
-00048 
-00049         if (pFile == NULL || lFilesize < 0) {
-00050                 DBG_MSG("No proper file given");
-00051                 return FALSE;
-00052         }
-00053         if (lFilesize < 128) {
-00054                 DBG_MSG("File too small to be a Word document");
-00055                 return FALSE;
-00056         }
-00057         return bCheckBytes(pFile, aucBytes, elementsof(aucBytes));
-00058 } /* end of bIsWordForDosFile */
-00059 
-00060 /*
-00061  * This function checks whether the given file is or is not a file with an
-00062  * OLE envelope (That is a document made by Word 6 or later)
-00063  */
-00064 static BOOL
-00065 bIsWordFileWithOLE(FILE *pFile, long lFilesize)
-00066 {
-00067         static UCHAR    aucBytes[] =
-00068                 { 0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1 };
-00069         int     iTailLen;
-00070 
-00071         if (pFile == NULL || lFilesize < 0) {
-00072                 DBG_MSG("No proper file given");
-00073                 return FALSE;
-00074         }
-00075         if (lFilesize < (long)BIG_BLOCK_SIZE * 3) {
-00076                 DBG_MSG("This file is too small to be a Word document");
-00077                 return FALSE;
-00078         }
-00079 
-00080         iTailLen = (int)(lFilesize % BIG_BLOCK_SIZE);
-00081         switch (iTailLen) {
-00082         case 0:         /* No tail, as it should be */
-00083                 break;
-00084         case 1:
-00085         case 2:         /* Filesize mismatch or a buggy email program */
-00086                 if ((int)(lFilesize % 3) == iTailLen) {
-00087                         DBG_DEC(lFilesize);
-00088                         return FALSE;
-00089                 }
-00090                 /*
-00091                  * Ignore extra bytes caused by buggy email programs.
-00092                  * They have bugs in their base64 encoding or decoding.
-00093                  * 3 bytes -> 4 ascii chars -> 3 bytes
-00094                  */
-00095                 DBG_MSG("Document with extra bytes");
-00096                 break;
-00097         default:        /* Wrong filesize for a Word document */
-00098                 DBG_DEC(lFilesize);
-00099                 DBG_DEC(iTailLen);
-00100                 return FALSE;
-00101         }
-00102         return bCheckBytes(pFile, aucBytes, elementsof(aucBytes));
-00103 } /* end of bIsWordFileWithOLE */
-00104 
-00105 /*
-00106  * This function checks whether the given file is or is not a RTF document
-00107  */
-00108 BOOL
-00109 bIsRtfFile(FILE *pFile)
-00110 {
-00111         static UCHAR    aucBytes[] =
-00112                 { '{', '\\', 'r', 't', 'f', '1' };
-00113 
-00114         DBG_MSG("bIsRtfFile");
-00115 
-00116         return bCheckBytes(pFile, aucBytes, elementsof(aucBytes));
-00117 } /* end of bIsRtfFile */
-00118 
-00119 /*
-00120  * This function checks whether the given file is or is not a WP document
-00121  */
-00122 BOOL
-00123 bIsWordPerfectFile(FILE *pFile)
-00124 {
-00125         static UCHAR    aucBytes[] =
-00126                 { 0xff, 'W', 'P', 'C' };
-00127 
-00128         DBG_MSG("bIsWordPerfectFile");
-00129 
-00130         return bCheckBytes(pFile, aucBytes, elementsof(aucBytes));
-00131 } /* end of bIsWordPerfectFile */
-00132 
-00133 /*
-00134  * This function checks whether the given file is or is not a "Win Word 1 or 2"
-00135  * document
-00136  */
-00137 BOOL
-00138 bIsWinWord12File(FILE *pFile, long lFilesize)
-00139 {
-00140         static UCHAR    aucBytes[2][4] = {
-00141                 { 0x9b, 0xa5, 0x21, 0x00 },     /* Win Word 1.x */
-00142                 { 0xdb, 0xa5, 0x2d, 0x00 },     /* Win Word 2.0 */
-00143         };
-00144         int     iIndex;
-00145 
-00146         DBG_MSG("bIsWinWord12File");
-00147 
-00148         if (pFile == NULL || lFilesize < 0) {
-00149                 DBG_MSG("No proper file given");
-00150                 return FALSE;
-00151         }
-00152         if (lFilesize < 384) {
-00153                 DBG_MSG("This file is too small to be a Word document");
-00154                 return FALSE;
-00155         }
-00156 
-00157         for (iIndex = 0; iIndex < (int)elementsof(aucBytes); iIndex++) {
-00158                 if (bCheckBytes(pFile,
-00159                                 aucBytes[iIndex],
-00160                                 elementsof(aucBytes[iIndex]))) {
-00161                         return TRUE;
-00162                 }
-00163         }
-00164         return FALSE;
-00165 } /* end of bIsWinWord12File */
-00166 
-00167 /*
-00168  * This function checks whether the given file is or is not a "Mac Word 4 or 5"
-00169  * document
-00170  */
-00171 BOOL
-00172 bIsMacWord45File(FILE *pFile)
-00173 {
-00174         static UCHAR    aucBytes[2][6] = {
-00175                 { 0xfe, 0x37, 0x00, 0x1c, 0x00, 0x00 }, /* Mac Word 4 */
-00176                 { 0xfe, 0x37, 0x00, 0x23, 0x00, 0x00 }, /* Mac Word 5 */
-00177         };
-00178         int     iIndex;
-00179 
-00180         DBG_MSG("bIsMacWord45File");
-00181 
-00182         for (iIndex = 0; iIndex < (int)elementsof(aucBytes); iIndex++) {
-00183                 if (bCheckBytes(pFile,
-00184                                 aucBytes[iIndex],
-00185                                 elementsof(aucBytes[iIndex]))) {
-00186                         return TRUE;
-00187                 }
-00188         }
-00189         return FALSE;
-00190 } /* end of bIsMacWord45File */
-00191 
-00192 /*
-00193  * iGuessVersionNumber - guess the Word version number from first few bytes
-00194  *
-00195  * Returns the guessed version number or -1 when no guess it possible
-00196  */
-00197 int
-00198 iGuessVersionNumber(FILE *pFile, long lFilesize)
-00199 {
-00200         if(bIsWordForDosFile(pFile, lFilesize)) {
-00201                 return 0;
-00202         }
-00203         if (bIsWinWord12File(pFile, lFilesize)) {
-00204                 return 2;
-00205         }
-00206         if (bIsMacWord45File(pFile)) {
-00207                 return 5;
-00208         }
-00209         if (bIsWordFileWithOLE(pFile, lFilesize)) {
-00210                 return 6;
-00211         }
-00212         return -1;
-00213 } /* end of iGuessVersionNumber */
-00214 
-00215 /*
-00216  * iGetVersionNumber - get the Word version number from the header
-00217  *
-00218  * Returns the version number or -1 when unknown
-00219  */
-00220 int
-00221 iGetVersionNumber(const UCHAR *aucHeader)
-00222 {
-00223         USHORT  usFib, usChse;
-00224 
-00225         usFib = usGetWord(0x02, aucHeader);
-00226         if (usFib >= 0x1000) {
-00227                 /* To big: must be MacWord using Big Endian */
-00228                 DBG_HEX(usFib);
-00229                 usFib = usGetWordBE(0x02, aucHeader);
-00230         }
-00231         DBG_DEC(usFib);
-00232         bOldMacFile = FALSE;
-00233         switch (usFib) {
-00234         case   0:
-00235                 DBG_MSG("Word for DOS");
-00236                 return 0;
-00237         case  28:
-00238                 DBG_MSG("Word 4 for Macintosh");
-00239                 bOldMacFile = TRUE;
-00240                 return 4;
-00241         case  33:
-00242                 DBG_MSG("Word 1.x for Windows");
-00243                 return 1;
-00244         case  35:
-00245                 DBG_MSG("Word 5 for Macintosh");
-00246                 bOldMacFile = TRUE;
-00247                 return 5;
-00248         case  45:
-00249                 DBG_MSG("Word 2 for Windows");
-00250                 return 2;
-00251         case 101:
-00252         case 102:
-00253                 DBG_MSG("Word 6 for Windows");
-00254                 return 6;
-00255         case 103:
-00256         case 104:
-00257                 usChse = usGetWord(0x14, aucHeader);
-00258                 DBG_DEC(usChse);
-00259                 switch (usChse) {
-00260                 case 0:
-00261                         DBG_MSG("Word 7 for Win95");
-00262                         return 7;
-00263                 case 256:
-00264                         DBG_MSG("Word 6 for Macintosh");
-00265                         bOldMacFile = TRUE;
-00266                         return 6;
-00267                 default:
-00268                         DBG_FIXME();
-00269                         if ((int)ucGetByte(0x05, aucHeader) == 0xe0) {
-00270                                 DBG_MSG("Word 7 for Win95");
-00271                                 return 7;
-00272                         }
-00273                         DBG_MSG("Word 6 for Macintosh");
-00274                         bOldMacFile = TRUE;
-00275                         return 6;
-00276                 }
-00277         default:
-00278                 usChse = usGetWord(0x14, aucHeader);
-00279                 DBG_DEC(usChse);
-00280                 if (usFib < 192) {
-00281                         /* Unknown or unsupported version of Word */
-00282                         DBG_DEC(usFib);
-00283                         return -1;
-00284                 }
-00285                 DBG_MSG_C(usChse != 256, "Word97 for Win95/98/NT");
-00286                 DBG_MSG_C(usChse == 256, "Word98 for Macintosh");
-00287                 return 8;
-00288         }
-00289 } /* end of iGetVersionNumber */
-00290 
-00291 /*
-00292  * TRUE if the current file was made by Word version 6 or older on an
-00293  * Apple Macintosh, otherwise FALSE.
-00294  * This function hides the methode of how to find out from the rest of the
-00295  * program.
-00296  */
-00297 BOOL
-00298 bIsOldMacFile(void)
-00299 {
-00300         return bOldMacFile;
-00301 } /* end of bIsOldMacFile */
-00302 
-00303 /*
-00304  * iInitDocument - initialize a document
-00305  *
-00306  * Returns the version of Word that made the document or -1
-00307  */
-00308 int
-00309 iInitDocument(FILE *pFile, long lFilesize)
-00310 {
-00311         int     iGuess, iWordVersion;
-00312 
-00313         iGuess = iGuessVersionNumber(pFile, lFilesize);
-00314         switch (iGuess) {
-00315         case 0:
-00316                 iWordVersion = iInitDocumentDOS(pFile, lFilesize);
-00317                 break;
-00318         case 2:
-00319                 iWordVersion = iInitDocumentWIN(pFile, lFilesize);
-00320                 break;
-00321         case 5:
-00322                 iWordVersion = iInitDocumentMAC(pFile, lFilesize);
-00323                 break;
-00324         case 6:
-00325                 iWordVersion = iInitDocumentOLE(pFile, lFilesize);
-00326                 break;
-00327         default:
-00328                 DBG_DEC(iGuess);
-00329                 iWordVersion = -1;
-00330                 break;
-00331         }
-00332         return iWordVersion;
-00333 } /* end of iInitDocument */
-00334 
-00335 /*
-00336  * vFreeDocument - free a document by free-ing its parts
-00337  */
-00338 void
-00339 vFreeDocument(void)
-00340 {
-00341         DBG_MSG("vFreeDocument");
-00342 
-00343         /* Free the memory */
-00344         vDestroyTextBlockList();
-00345         vDestroyDataBlockList();
-00346         vDestroyListInfoList();
-00347         vDestroyRowInfoList();
-00348         vDestroyStyleInfoList();
-00349         vDestroyFontInfoList();
-00350         vDestroyStylesheetList();
-00351         vDestroyPictInfoList();
-00352         vDestroyDocumentInfoList();
-00353         vDestroySectionInfoList();
-00354         vDestroyHdrFtrInfoList();
-00355         vDestroyPropModList();
-00356         vDestroyNotesInfoLists();
-00357         vDestroyFontTable();
-00358         vDestroySummaryInfo();
-00359 } /* end of vFreeDocument */
-
-
Generated by  - -doxygen 1.6.2
- -