diff -r f345bda72bc4 -r 43e37759235e Symbian3/Examples/guid-6013a680-57f9-415b-8851-c4fa63356636/wordlib_8c_source.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Symbian3/Examples/guid-6013a680-57f9-415b-8851-c4fa63356636/wordlib_8c_source.html Tue Mar 30 16:16:55 2010 +0100 @@ -0,0 +1,375 @@ + + + + +TB9.2 Example Applications: examples/PIPS/antiword/src/wordlib.c Source File + + + + + +

examples/PIPS/antiword/src/wordlib.c

00001 /*
+00002  * wordlib.c
+00003  * Copyright (C) 1998-2004 A.J. van Os; Released under GNU GPL
+00004  *
+00005  * Description:
+00006  * Deal with the internals of a MS Word file
+00007  */
+00008 
+00009 #include "antiword.h"
+00010 
+00011 static BOOL     bOldMacFile = FALSE;
+00012 
+00013 
+00014 /*
+00015  * Common part of the file checking functions
+00016  */
+00017 static BOOL
+00018 bCheckBytes(FILE *pFile, const UCHAR *aucBytes, size_t tBytes)
+00019 {
+00020         int     iIndex, iChar;
+00021 
+00022         fail(pFile == NULL || aucBytes == NULL || tBytes == 0);
+00023 
+00024         rewind(pFile);
+00025 
+00026         for (iIndex = 0; iIndex < (int)tBytes; iIndex++) {
+00027                 iChar = getc(pFile);
+00028                 if (iChar == EOF || iChar != (int)aucBytes[iIndex]) {
+00029                         NO_DBG_HEX(iChar);
+00030                         NO_DBG_HEX(aucBytes[iIndex]);
+00031                         return FALSE;
+00032                 }
+00033         }
+00034         return TRUE;
+00035 } /* end of bCheckBytes */
+00036 
+00037 /*
+00038  * This function checks whether the given file is or is not a "Word for DOS"
+00039  * document
+00040  */
+00041 BOOL
+00042 bIsWordForDosFile(FILE *pFile, long lFilesize)
+00043 {
+00044         static UCHAR    aucBytes[] =
+00045                 { 0x31, 0xbe, 0x00, 0x00, 0x00, 0xab }; /* Word for DOS */
+00046 
+00047         DBG_MSG("bIsWordForDosFile");
+00048 
+00049         if (pFile == NULL || lFilesize < 0) {
+00050                 DBG_MSG("No proper file given");
+00051                 return FALSE;
+00052         }
+00053         if (lFilesize < 128) {
+00054                 DBG_MSG("File too small to be a Word document");
+00055                 return FALSE;
+00056         }
+00057         return bCheckBytes(pFile, aucBytes, elementsof(aucBytes));
+00058 } /* end of bIsWordForDosFile */
+00059 
+00060 /*
+00061  * This function checks whether the given file is or is not a file with an
+00062  * OLE envelope (That is a document made by Word 6 or later)
+00063  */
+00064 static BOOL
+00065 bIsWordFileWithOLE(FILE *pFile, long lFilesize)
+00066 {
+00067         static UCHAR    aucBytes[] =
+00068                 { 0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1 };
+00069         int     iTailLen;
+00070 
+00071         if (pFile == NULL || lFilesize < 0) {
+00072                 DBG_MSG("No proper file given");
+00073                 return FALSE;
+00074         }
+00075         if (lFilesize < (long)BIG_BLOCK_SIZE * 3) {
+00076                 DBG_MSG("This file is too small to be a Word document");
+00077                 return FALSE;
+00078         }
+00079 
+00080         iTailLen = (int)(lFilesize % BIG_BLOCK_SIZE);
+00081         switch (iTailLen) {
+00082         case 0:         /* No tail, as it should be */
+00083                 break;
+00084         case 1:
+00085         case 2:         /* Filesize mismatch or a buggy email program */
+00086                 if ((int)(lFilesize % 3) == iTailLen) {
+00087                         DBG_DEC(lFilesize);
+00088                         return FALSE;
+00089                 }
+00090                 /*
+00091                  * Ignore extra bytes caused by buggy email programs.
+00092                  * They have bugs in their base64 encoding or decoding.
+00093                  * 3 bytes -> 4 ascii chars -> 3 bytes
+00094                  */
+00095                 DBG_MSG("Document with extra bytes");
+00096                 break;
+00097         default:        /* Wrong filesize for a Word document */
+00098                 DBG_DEC(lFilesize);
+00099                 DBG_DEC(iTailLen);
+00100                 return FALSE;
+00101         }
+00102         return bCheckBytes(pFile, aucBytes, elementsof(aucBytes));
+00103 } /* end of bIsWordFileWithOLE */
+00104 
+00105 /*
+00106  * This function checks whether the given file is or is not a RTF document
+00107  */
+00108 BOOL
+00109 bIsRtfFile(FILE *pFile)
+00110 {
+00111         static UCHAR    aucBytes[] =
+00112                 { '{', '\\', 'r', 't', 'f', '1' };
+00113 
+00114         DBG_MSG("bIsRtfFile");
+00115 
+00116         return bCheckBytes(pFile, aucBytes, elementsof(aucBytes));
+00117 } /* end of bIsRtfFile */
+00118 
+00119 /*
+00120  * This function checks whether the given file is or is not a WP document
+00121  */
+00122 BOOL
+00123 bIsWordPerfectFile(FILE *pFile)
+00124 {
+00125         static UCHAR    aucBytes[] =
+00126                 { 0xff, 'W', 'P', 'C' };
+00127 
+00128         DBG_MSG("bIsWordPerfectFile");
+00129 
+00130         return bCheckBytes(pFile, aucBytes, elementsof(aucBytes));
+00131 } /* end of bIsWordPerfectFile */
+00132 
+00133 /*
+00134  * This function checks whether the given file is or is not a "Win Word 1 or 2"
+00135  * document
+00136  */
+00137 BOOL
+00138 bIsWinWord12File(FILE *pFile, long lFilesize)
+00139 {
+00140         static UCHAR    aucBytes[2][4] = {
+00141                 { 0x9b, 0xa5, 0x21, 0x00 },     /* Win Word 1.x */
+00142                 { 0xdb, 0xa5, 0x2d, 0x00 },     /* Win Word 2.0 */
+00143         };
+00144         int     iIndex;
+00145 
+00146         DBG_MSG("bIsWinWord12File");
+00147 
+00148         if (pFile == NULL || lFilesize < 0) {
+00149                 DBG_MSG("No proper file given");
+00150                 return FALSE;
+00151         }
+00152         if (lFilesize < 384) {
+00153                 DBG_MSG("This file is too small to be a Word document");
+00154                 return FALSE;
+00155         }
+00156 
+00157         for (iIndex = 0; iIndex < (int)elementsof(aucBytes); iIndex++) {
+00158                 if (bCheckBytes(pFile,
+00159                                 aucBytes[iIndex],
+00160                                 elementsof(aucBytes[iIndex]))) {
+00161                         return TRUE;
+00162                 }
+00163         }
+00164         return FALSE;
+00165 } /* end of bIsWinWord12File */
+00166 
+00167 /*
+00168  * This function checks whether the given file is or is not a "Mac Word 4 or 5"
+00169  * document
+00170  */
+00171 BOOL
+00172 bIsMacWord45File(FILE *pFile)
+00173 {
+00174         static UCHAR    aucBytes[2][6] = {
+00175                 { 0xfe, 0x37, 0x00, 0x1c, 0x00, 0x00 }, /* Mac Word 4 */
+00176                 { 0xfe, 0x37, 0x00, 0x23, 0x00, 0x00 }, /* Mac Word 5 */
+00177         };
+00178         int     iIndex;
+00179 
+00180         DBG_MSG("bIsMacWord45File");
+00181 
+00182         for (iIndex = 0; iIndex < (int)elementsof(aucBytes); iIndex++) {
+00183                 if (bCheckBytes(pFile,
+00184                                 aucBytes[iIndex],
+00185                                 elementsof(aucBytes[iIndex]))) {
+00186                         return TRUE;
+00187                 }
+00188         }
+00189         return FALSE;
+00190 } /* end of bIsMacWord45File */
+00191 
+00192 /*
+00193  * iGuessVersionNumber - guess the Word version number from first few bytes
+00194  *
+00195  * Returns the guessed version number or -1 when no guess it possible
+00196  */
+00197 int
+00198 iGuessVersionNumber(FILE *pFile, long lFilesize)
+00199 {
+00200         if(bIsWordForDosFile(pFile, lFilesize)) {
+00201                 return 0;
+00202         }
+00203         if (bIsWinWord12File(pFile, lFilesize)) {
+00204                 return 2;
+00205         }
+00206         if (bIsMacWord45File(pFile)) {
+00207                 return 5;
+00208         }
+00209         if (bIsWordFileWithOLE(pFile, lFilesize)) {
+00210                 return 6;
+00211         }
+00212         return -1;
+00213 } /* end of iGuessVersionNumber */
+00214 
+00215 /*
+00216  * iGetVersionNumber - get the Word version number from the header
+00217  *
+00218  * Returns the version number or -1 when unknown
+00219  */
+00220 int
+00221 iGetVersionNumber(const UCHAR *aucHeader)
+00222 {
+00223         USHORT  usFib, usChse;
+00224 
+00225         usFib = usGetWord(0x02, aucHeader);
+00226         if (usFib >= 0x1000) {
+00227                 /* To big: must be MacWord using Big Endian */
+00228                 DBG_HEX(usFib);
+00229                 usFib = usGetWordBE(0x02, aucHeader);
+00230         }
+00231         DBG_DEC(usFib);
+00232         bOldMacFile = FALSE;
+00233         switch (usFib) {
+00234         case   0:
+00235                 DBG_MSG("Word for DOS");
+00236                 return 0;
+00237         case  28:
+00238                 DBG_MSG("Word 4 for Macintosh");
+00239                 bOldMacFile = TRUE;
+00240                 return 4;
+00241         case  33:
+00242                 DBG_MSG("Word 1.x for Windows");
+00243                 return 1;
+00244         case  35:
+00245                 DBG_MSG("Word 5 for Macintosh");
+00246                 bOldMacFile = TRUE;
+00247                 return 5;
+00248         case  45:
+00249                 DBG_MSG("Word 2 for Windows");
+00250                 return 2;
+00251         case 101:
+00252         case 102:
+00253                 DBG_MSG("Word 6 for Windows");
+00254                 return 6;
+00255         case 103:
+00256         case 104:
+00257                 usChse = usGetWord(0x14, aucHeader);
+00258                 DBG_DEC(usChse);
+00259                 switch (usChse) {
+00260                 case 0:
+00261                         DBG_MSG("Word 7 for Win95");
+00262                         return 7;
+00263                 case 256:
+00264                         DBG_MSG("Word 6 for Macintosh");
+00265                         bOldMacFile = TRUE;
+00266                         return 6;
+00267                 default:
+00268                         DBG_FIXME();
+00269                         if ((int)ucGetByte(0x05, aucHeader) == 0xe0) {
+00270                                 DBG_MSG("Word 7 for Win95");
+00271                                 return 7;
+00272                         }
+00273                         DBG_MSG("Word 6 for Macintosh");
+00274                         bOldMacFile = TRUE;
+00275                         return 6;
+00276                 }
+00277         default:
+00278                 usChse = usGetWord(0x14, aucHeader);
+00279                 DBG_DEC(usChse);
+00280                 if (usFib < 192) {
+00281                         /* Unknown or unsupported version of Word */
+00282                         DBG_DEC(usFib);
+00283                         return -1;
+00284                 }
+00285                 DBG_MSG_C(usChse != 256, "Word97 for Win95/98/NT");
+00286                 DBG_MSG_C(usChse == 256, "Word98 for Macintosh");
+00287                 return 8;
+00288         }
+00289 } /* end of iGetVersionNumber */
+00290 
+00291 /*
+00292  * TRUE if the current file was made by Word version 6 or older on an
+00293  * Apple Macintosh, otherwise FALSE.
+00294  * This function hides the methode of how to find out from the rest of the
+00295  * program.
+00296  */
+00297 BOOL
+00298 bIsOldMacFile(void)
+00299 {
+00300         return bOldMacFile;
+00301 } /* end of bIsOldMacFile */
+00302 
+00303 /*
+00304  * iInitDocument - initialize a document
+00305  *
+00306  * Returns the version of Word that made the document or -1
+00307  */
+00308 int
+00309 iInitDocument(FILE *pFile, long lFilesize)
+00310 {
+00311         int     iGuess, iWordVersion;
+00312 
+00313         iGuess = iGuessVersionNumber(pFile, lFilesize);
+00314         switch (iGuess) {
+00315         case 0:
+00316                 iWordVersion = iInitDocumentDOS(pFile, lFilesize);
+00317                 break;
+00318         case 2:
+00319                 iWordVersion = iInitDocumentWIN(pFile, lFilesize);
+00320                 break;
+00321         case 5:
+00322                 iWordVersion = iInitDocumentMAC(pFile, lFilesize);
+00323                 break;
+00324         case 6:
+00325                 iWordVersion = iInitDocumentOLE(pFile, lFilesize);
+00326                 break;
+00327         default:
+00328                 DBG_DEC(iGuess);
+00329                 iWordVersion = -1;
+00330                 break;
+00331         }
+00332         return iWordVersion;
+00333 } /* end of iInitDocument */
+00334 
+00335 /*
+00336  * vFreeDocument - free a document by free-ing its parts
+00337  */
+00338 void
+00339 vFreeDocument(void)
+00340 {
+00341         DBG_MSG("vFreeDocument");
+00342 
+00343         /* Free the memory */
+00344         vDestroyTextBlockList();
+00345         vDestroyDataBlockList();
+00346         vDestroyListInfoList();
+00347         vDestroyRowInfoList();
+00348         vDestroyStyleInfoList();
+00349         vDestroyFontInfoList();
+00350         vDestroyStylesheetList();
+00351         vDestroyPictInfoList();
+00352         vDestroyDocumentInfoList();
+00353         vDestroySectionInfoList();
+00354         vDestroyHdrFtrInfoList();
+00355         vDestroyPropModList();
+00356         vDestroyNotesInfoLists();
+00357         vDestroyFontTable();
+00358         vDestroySummaryInfo();
+00359 } /* end of vFreeDocument */
+
+
Generated by  + +doxygen 1.6.2
+ +