diff -r f345bda72bc4 -r 43e37759235e Symbian3/Examples/guid-6013a680-57f9-415b-8851-c4fa63356636/findtext_8c_source.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Symbian3/Examples/guid-6013a680-57f9-415b-8851-c4fa63356636/findtext_8c_source.html Tue Mar 30 16:16:55 2010 +0100 @@ -0,0 +1,305 @@ + + + + +TB9.2 Example Applications: examples/PIPS/antiword/src/findtext.c Source File + + + + + +

examples/PIPS/antiword/src/findtext.c

00001 /*
+00002  * findtext.c
+00003  * Copyright (C) 1998-2004 A.J. van Os; Released under GNU GPL
+00004  *
+00005  * Description:
+00006  * Find the blocks that contain the text of MS Word files
+00007  */
+00008 
+00009 #include <stdio.h>
+00010 #include <stdlib.h>
+00011 #include "antiword.h"
+00012 
+00013 
+00014 /*
+00015  * bAddTextBlocks - Add the blocks to the text block list
+00016  *
+00017  * Returns TRUE when successful, FALSE if not
+00018  */
+00019 BOOL
+00020 bAddTextBlocks(ULONG ulCharPosFirst, ULONG ulTotalLength,
+00021         BOOL bUsesUnicode, USHORT usPropMod,
+00022         ULONG ulStartBlock, const ULONG *aulBBD, size_t tBBDLen)
+00023 {
+00024         text_block_type tTextBlock;
+00025         ULONG   ulCharPos, ulOffset, ulIndex;
+00026         long    lToGo;
+00027 
+00028         fail(ulTotalLength > (ULONG)LONG_MAX / 2);
+00029         fail(ulStartBlock > MAX_BLOCKNUMBER && ulStartBlock != END_OF_CHAIN);
+00030         fail(aulBBD == NULL);
+00031 
+00032         NO_DBG_HEX(ulCharPosFirst);
+00033         NO_DBG_DEC(ulTotalLength);
+00034 
+00035         if (bUsesUnicode) {
+00036                 /* One character equals two bytes */
+00037                 NO_DBG_MSG("Uses Unicode");
+00038                 lToGo = (long)ulTotalLength * 2;
+00039         } else {
+00040                 /* One character equals one byte */
+00041                 NO_DBG_MSG("Uses ASCII");
+00042                 lToGo = (long)ulTotalLength;
+00043         }
+00044 
+00045         ulCharPos = ulCharPosFirst;
+00046         ulOffset = ulCharPosFirst;
+00047         for (ulIndex = ulStartBlock;
+00048              ulIndex != END_OF_CHAIN && lToGo > 0;
+00049              ulIndex = aulBBD[ulIndex]) {
+00050                 if (ulIndex >= (ULONG)tBBDLen) {
+00051                         DBG_DEC(ulIndex);
+00052                         DBG_DEC(tBBDLen);
+00053                         werr(1, "The Big Block Depot is damaged");
+00054                 }
+00055                 if (ulOffset >= BIG_BLOCK_SIZE) {
+00056                         ulOffset -= BIG_BLOCK_SIZE;
+00057                         continue;
+00058                 }
+00059                 tTextBlock.ulFileOffset =
+00060                         (ulIndex + 1) * BIG_BLOCK_SIZE + ulOffset;
+00061                 tTextBlock.ulCharPos = ulCharPos;
+00062                 tTextBlock.ulLength = min(BIG_BLOCK_SIZE - ulOffset,
+00063                                                 (ULONG)lToGo);
+00064                 tTextBlock.bUsesUnicode = bUsesUnicode;
+00065                 tTextBlock.usPropMod = usPropMod;
+00066                 ulOffset = 0;
+00067                 if (!bAdd2TextBlockList(&tTextBlock)) {
+00068                         DBG_HEX(tTextBlock.ulFileOffset);
+00069                         DBG_HEX(tTextBlock.ulCharPos);
+00070                         DBG_DEC(tTextBlock.ulLength);
+00071                         DBG_DEC(tTextBlock.bUsesUnicode);
+00072                         DBG_DEC(tTextBlock.usPropMod);
+00073                         return FALSE;
+00074                 }
+00075                 ulCharPos += tTextBlock.ulLength;
+00076                 lToGo -= (long)tTextBlock.ulLength;
+00077         }
+00078         DBG_DEC_C(lToGo != 0, lToGo);
+00079         return lToGo == 0;
+00080 } /* end of bAddTextBlocks */
+00081 
+00082 /*
+00083  * bGet6DocumentText - make a list of the text blocks of Word 6/7 files
+00084  *
+00085  * Code for "fast saved" files.
+00086  *
+00087  * Returns TRUE when successful, FALSE if not
+00088  */
+00089 BOOL
+00090 bGet6DocumentText(FILE *pFile, BOOL bUsesUnicode, ULONG ulStartBlock,
+00091         const ULONG *aulBBD, size_t tBBDLen, const UCHAR *aucHeader)
+00092 {
+00093         UCHAR   *aucBuffer;
+00094         ULONG   ulBeginTextInfo, ulTextOffset, ulTotLength;
+00095         size_t  tTextInfoLen;
+00096         int     iIndex, iType, iOff, iLen, iPieces;
+00097         USHORT  usPropMod;
+00098 
+00099         DBG_MSG("bGet6DocumentText");
+00100 
+00101         fail(pFile == NULL);
+00102         fail(aulBBD == NULL);
+00103         fail(aucHeader == NULL);
+00104 
+00105         ulBeginTextInfo = ulGetLong(0x160, aucHeader);  /* fcClx */
+00106         DBG_HEX(ulBeginTextInfo);
+00107         tTextInfoLen = (size_t)ulGetLong(0x164, aucHeader);     /* lcbClx */
+00108         DBG_DEC(tTextInfoLen);
+00109 
+00110         aucBuffer = xmalloc(tTextInfoLen);
+00111         if (!bReadBuffer(pFile, ulStartBlock,
+00112                         aulBBD, tBBDLen, BIG_BLOCK_SIZE,
+00113                         aucBuffer, ulBeginTextInfo, tTextInfoLen)) {
+00114                 aucBuffer = xfree(aucBuffer);
+00115                 return FALSE;
+00116         }
+00117         NO_DBG_PRINT_BLOCK(aucBuffer, tTextInfoLen);
+00118 
+00119         iOff = 0;
+00120         while ((size_t)iOff < tTextInfoLen) {
+00121                 iType = (int)ucGetByte(iOff, aucBuffer);
+00122                 iOff++;
+00123                 if (iType == 0) {
+00124                         DBG_FIXME();
+00125                         iOff++;
+00126                         continue;
+00127                 }
+00128                 if (iType == 1) {
+00129                         iLen = (int)usGetWord(iOff, aucBuffer);
+00130                         vAdd2PropModList(aucBuffer + iOff);
+00131                         iOff += iLen + 2;
+00132                         continue;
+00133                 }
+00134                 if (iType != 2) {
+00135                         werr(0, "Unknown type of 'fastsaved' format");
+00136                         aucBuffer = xfree(aucBuffer);
+00137                         return FALSE;
+00138                 }
+00139                 /* Type 2 */
+00140                 iLen = (int)usGetWord(iOff, aucBuffer);
+00141                 NO_DBG_DEC(iLen);
+00142                 iOff += 4;
+00143                 iPieces = (iLen - 4) / 12;
+00144                 DBG_DEC(iPieces);
+00145                 for (iIndex = 0; iIndex < iPieces; iIndex++) {
+00146                         ulTextOffset = ulGetLong(
+00147                                 iOff + (iPieces + 1) * 4 + iIndex * 8 + 2,
+00148                                 aucBuffer);
+00149                         usPropMod = usGetWord(
+00150                                 iOff + (iPieces + 1) * 4 + iIndex * 8 + 6,
+00151                                 aucBuffer);
+00152                         ulTotLength = ulGetLong(iOff + (iIndex + 1) * 4,
+00153                                                 aucBuffer) -
+00154                                         ulGetLong(iOff + iIndex * 4,
+00155                                                 aucBuffer);
+00156                         NO_DBG_HEX_C(usPropMod != 0, usPropMod);
+00157                         if (!bAddTextBlocks(ulTextOffset, ulTotLength,
+00158                                         bUsesUnicode, usPropMod,
+00159                                         ulStartBlock,
+00160                                         aulBBD, tBBDLen)) {
+00161                                 aucBuffer = xfree(aucBuffer);
+00162                                 return FALSE;
+00163                         }
+00164                 }
+00165                 break;
+00166         }
+00167         aucBuffer = xfree(aucBuffer);
+00168         return TRUE;
+00169 } /* end of bGet6DocumentText */
+00170 
+00171 /*
+00172  * bGet8DocumentText - make a list of the text blocks of Word 8/97 files
+00173  *
+00174  * Returns TRUE when successful, FALSE if not
+00175  */
+00176 BOOL
+00177 bGet8DocumentText(FILE *pFile, const pps_info_type *pPPS,
+00178         const ULONG *aulBBD, size_t tBBDLen,
+00179         const ULONG *aulSBD, size_t tSBDLen,
+00180         const UCHAR *aucHeader)
+00181 {
+00182         const ULONG     *aulBlockDepot;
+00183         UCHAR   *aucBuffer;
+00184         ULONG   ulTextOffset, ulBeginTextInfo;
+00185         ULONG   ulTotLength, ulLen;
+00186         long    lIndex, lPieces, lOff;
+00187         size_t  tTextInfoLen, tBlockDepotLen, tBlockSize;
+00188         int     iType, iLen;
+00189         BOOL    bUsesUnicode;
+00190         USHORT  usPropMod;
+00191 
+00192         DBG_MSG("bGet8DocumentText");
+00193 
+00194         fail(pFile == NULL || pPPS == NULL);
+00195         fail(aulBBD == NULL || aulSBD == NULL);
+00196         fail(aucHeader == NULL);
+00197 
+00198         ulBeginTextInfo = ulGetLong(0x1a2, aucHeader);  /* fcClx */
+00199         DBG_HEX(ulBeginTextInfo);
+00200         tTextInfoLen = (size_t)ulGetLong(0x1a6, aucHeader);     /* lcbClx */
+00201         DBG_DEC(tTextInfoLen);
+00202 
+00203         DBG_DEC(pPPS->tTable.ulSB);
+00204         DBG_HEX(pPPS->tTable.ulSize);
+00205         if (pPPS->tTable.ulSize == 0) {
+00206                 return FALSE;
+00207         }
+00208 
+00209         if (pPPS->tTable.ulSize < MIN_SIZE_FOR_BBD_USE) {
+00210                 /* Use the Small Block Depot */
+00211                 aulBlockDepot = aulSBD;
+00212                 tBlockDepotLen = tSBDLen;
+00213                 tBlockSize = SMALL_BLOCK_SIZE;
+00214         } else {
+00215                 /* Use the Big Block Depot */
+00216                 aulBlockDepot = aulBBD;
+00217                 tBlockDepotLen = tBBDLen;
+00218                 tBlockSize = BIG_BLOCK_SIZE;
+00219         }
+00220         aucBuffer = xmalloc(tTextInfoLen);
+00221         if (!bReadBuffer(pFile, pPPS->tTable.ulSB,
+00222                         aulBlockDepot, tBlockDepotLen, tBlockSize,
+00223                         aucBuffer, ulBeginTextInfo, tTextInfoLen)) {
+00224                 aucBuffer = xfree(aucBuffer);
+00225                 return FALSE;
+00226         }
+00227         NO_DBG_PRINT_BLOCK(aucBuffer, tTextInfoLen);
+00228 
+00229         lOff = 0;
+00230         while (lOff < (long)tTextInfoLen) {
+00231                 iType = (int)ucGetByte(lOff, aucBuffer);
+00232                 lOff++;
+00233                 if (iType == 0) {
+00234                         DBG_FIXME();
+00235                         lOff++;
+00236                         continue;
+00237                 }
+00238                 if (iType == 1) {
+00239                         iLen = (int)usGetWord(lOff, aucBuffer);
+00240                         vAdd2PropModList(aucBuffer + lOff);
+00241                         lOff += (long)iLen + 2;
+00242                         continue;
+00243                 }
+00244                 if (iType != 2) {
+00245                         werr(0, "Unknown type of 'fastsaved' format");
+00246                         aucBuffer = xfree(aucBuffer);
+00247                         return FALSE;
+00248                 }
+00249                 /* Type 2 */
+00250                 ulLen = ulGetLong(lOff, aucBuffer);
+00251                 if (ulLen < 4) {
+00252                         DBG_DEC(ulLen);
+00253                         return FALSE;
+00254                 }
+00255                 lOff += 4;
+00256                 lPieces = (long)((ulLen - 4) / 12);
+00257                 DBG_DEC(lPieces);
+00258                 for (lIndex = 0; lIndex < lPieces; lIndex++) {
+00259                         ulTextOffset = ulGetLong(
+00260                                 lOff + (lPieces + 1) * 4 + lIndex * 8 + 2,
+00261                                 aucBuffer);
+00262                         usPropMod = usGetWord(
+00263                                 lOff + (lPieces + 1) * 4 + lIndex * 8 + 6,
+00264                                 aucBuffer);
+00265                         ulTotLength = ulGetLong(lOff + (lIndex + 1) * 4,
+00266                                                 aucBuffer) -
+00267                                         ulGetLong(lOff + lIndex * 4,
+00268                                                 aucBuffer);
+00269                         if ((ulTextOffset & BIT(30)) == 0) {
+00270                                 bUsesUnicode = TRUE;
+00271                         } else {
+00272                                 bUsesUnicode = FALSE;
+00273                                 ulTextOffset &= ~BIT(30);
+00274                                 ulTextOffset /= 2;
+00275                         }
+00276                         NO_DBG_HEX_C(usPropMod != 0, usPropMod);
+00277                         if (!bAddTextBlocks(ulTextOffset, ulTotLength,
+00278                                         bUsesUnicode, usPropMod,
+00279                                         pPPS->tWordDocument.ulSB,
+00280                                         aulBBD, tBBDLen)) {
+00281                                 aucBuffer = xfree(aucBuffer);
+00282                                 return FALSE;
+00283                         }
+00284                 }
+00285                 break;
+00286         }
+00287         aucBuffer = xfree(aucBuffer);
+00288         return TRUE;
+00289 } /* end of bGet8DocumentText */
+
+
Generated by  + +doxygen 1.6.2
+ +