diff -r f345bda72bc4 -r 43e37759235e Symbian3/Examples/guid-6013a680-57f9-415b-8851-c4fa63356636/findtext_8c_source.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Symbian3/Examples/guid-6013a680-57f9-415b-8851-c4fa63356636/findtext_8c_source.html Tue Mar 30 16:16:55 2010 +0100 @@ -0,0 +1,305 @@ + + +
+ +00001 /* +00002 * findtext.c +00003 * Copyright (C) 1998-2004 A.J. van Os; Released under GNU GPL +00004 * +00005 * Description: +00006 * Find the blocks that contain the text of MS Word files +00007 */ +00008 +00009 #include <stdio.h> +00010 #include <stdlib.h> +00011 #include "antiword.h" +00012 +00013 +00014 /* +00015 * bAddTextBlocks - Add the blocks to the text block list +00016 * +00017 * Returns TRUE when successful, FALSE if not +00018 */ +00019 BOOL +00020 bAddTextBlocks(ULONG ulCharPosFirst, ULONG ulTotalLength, +00021 BOOL bUsesUnicode, USHORT usPropMod, +00022 ULONG ulStartBlock, const ULONG *aulBBD, size_t tBBDLen) +00023 { +00024 text_block_type tTextBlock; +00025 ULONG ulCharPos, ulOffset, ulIndex; +00026 long lToGo; +00027 +00028 fail(ulTotalLength > (ULONG)LONG_MAX / 2); +00029 fail(ulStartBlock > MAX_BLOCKNUMBER && ulStartBlock != END_OF_CHAIN); +00030 fail(aulBBD == NULL); +00031 +00032 NO_DBG_HEX(ulCharPosFirst); +00033 NO_DBG_DEC(ulTotalLength); +00034 +00035 if (bUsesUnicode) { +00036 /* One character equals two bytes */ +00037 NO_DBG_MSG("Uses Unicode"); +00038 lToGo = (long)ulTotalLength * 2; +00039 } else { +00040 /* One character equals one byte */ +00041 NO_DBG_MSG("Uses ASCII"); +00042 lToGo = (long)ulTotalLength; +00043 } +00044 +00045 ulCharPos = ulCharPosFirst; +00046 ulOffset = ulCharPosFirst; +00047 for (ulIndex = ulStartBlock; +00048 ulIndex != END_OF_CHAIN && lToGo > 0; +00049 ulIndex = aulBBD[ulIndex]) { +00050 if (ulIndex >= (ULONG)tBBDLen) { +00051 DBG_DEC(ulIndex); +00052 DBG_DEC(tBBDLen); +00053 werr(1, "The Big Block Depot is damaged"); +00054 } +00055 if (ulOffset >= BIG_BLOCK_SIZE) { +00056 ulOffset -= BIG_BLOCK_SIZE; +00057 continue; +00058 } +00059 tTextBlock.ulFileOffset = +00060 (ulIndex + 1) * BIG_BLOCK_SIZE + ulOffset; +00061 tTextBlock.ulCharPos = ulCharPos; +00062 tTextBlock.ulLength = min(BIG_BLOCK_SIZE - ulOffset, +00063 (ULONG)lToGo); +00064 tTextBlock.bUsesUnicode = bUsesUnicode; +00065 tTextBlock.usPropMod = usPropMod; +00066 ulOffset = 0; +00067 if (!bAdd2TextBlockList(&tTextBlock)) { +00068 DBG_HEX(tTextBlock.ulFileOffset); +00069 DBG_HEX(tTextBlock.ulCharPos); +00070 DBG_DEC(tTextBlock.ulLength); +00071 DBG_DEC(tTextBlock.bUsesUnicode); +00072 DBG_DEC(tTextBlock.usPropMod); +00073 return FALSE; +00074 } +00075 ulCharPos += tTextBlock.ulLength; +00076 lToGo -= (long)tTextBlock.ulLength; +00077 } +00078 DBG_DEC_C(lToGo != 0, lToGo); +00079 return lToGo == 0; +00080 } /* end of bAddTextBlocks */ +00081 +00082 /* +00083 * bGet6DocumentText - make a list of the text blocks of Word 6/7 files +00084 * +00085 * Code for "fast saved" files. +00086 * +00087 * Returns TRUE when successful, FALSE if not +00088 */ +00089 BOOL +00090 bGet6DocumentText(FILE *pFile, BOOL bUsesUnicode, ULONG ulStartBlock, +00091 const ULONG *aulBBD, size_t tBBDLen, const UCHAR *aucHeader) +00092 { +00093 UCHAR *aucBuffer; +00094 ULONG ulBeginTextInfo, ulTextOffset, ulTotLength; +00095 size_t tTextInfoLen; +00096 int iIndex, iType, iOff, iLen, iPieces; +00097 USHORT usPropMod; +00098 +00099 DBG_MSG("bGet6DocumentText"); +00100 +00101 fail(pFile == NULL); +00102 fail(aulBBD == NULL); +00103 fail(aucHeader == NULL); +00104 +00105 ulBeginTextInfo = ulGetLong(0x160, aucHeader); /* fcClx */ +00106 DBG_HEX(ulBeginTextInfo); +00107 tTextInfoLen = (size_t)ulGetLong(0x164, aucHeader); /* lcbClx */ +00108 DBG_DEC(tTextInfoLen); +00109 +00110 aucBuffer = xmalloc(tTextInfoLen); +00111 if (!bReadBuffer(pFile, ulStartBlock, +00112 aulBBD, tBBDLen, BIG_BLOCK_SIZE, +00113 aucBuffer, ulBeginTextInfo, tTextInfoLen)) { +00114 aucBuffer = xfree(aucBuffer); +00115 return FALSE; +00116 } +00117 NO_DBG_PRINT_BLOCK(aucBuffer, tTextInfoLen); +00118 +00119 iOff = 0; +00120 while ((size_t)iOff < tTextInfoLen) { +00121 iType = (int)ucGetByte(iOff, aucBuffer); +00122 iOff++; +00123 if (iType == 0) { +00124 DBG_FIXME(); +00125 iOff++; +00126 continue; +00127 } +00128 if (iType == 1) { +00129 iLen = (int)usGetWord(iOff, aucBuffer); +00130 vAdd2PropModList(aucBuffer + iOff); +00131 iOff += iLen + 2; +00132 continue; +00133 } +00134 if (iType != 2) { +00135 werr(0, "Unknown type of 'fastsaved' format"); +00136 aucBuffer = xfree(aucBuffer); +00137 return FALSE; +00138 } +00139 /* Type 2 */ +00140 iLen = (int)usGetWord(iOff, aucBuffer); +00141 NO_DBG_DEC(iLen); +00142 iOff += 4; +00143 iPieces = (iLen - 4) / 12; +00144 DBG_DEC(iPieces); +00145 for (iIndex = 0; iIndex < iPieces; iIndex++) { +00146 ulTextOffset = ulGetLong( +00147 iOff + (iPieces + 1) * 4 + iIndex * 8 + 2, +00148 aucBuffer); +00149 usPropMod = usGetWord( +00150 iOff + (iPieces + 1) * 4 + iIndex * 8 + 6, +00151 aucBuffer); +00152 ulTotLength = ulGetLong(iOff + (iIndex + 1) * 4, +00153 aucBuffer) - +00154 ulGetLong(iOff + iIndex * 4, +00155 aucBuffer); +00156 NO_DBG_HEX_C(usPropMod != 0, usPropMod); +00157 if (!bAddTextBlocks(ulTextOffset, ulTotLength, +00158 bUsesUnicode, usPropMod, +00159 ulStartBlock, +00160 aulBBD, tBBDLen)) { +00161 aucBuffer = xfree(aucBuffer); +00162 return FALSE; +00163 } +00164 } +00165 break; +00166 } +00167 aucBuffer = xfree(aucBuffer); +00168 return TRUE; +00169 } /* end of bGet6DocumentText */ +00170 +00171 /* +00172 * bGet8DocumentText - make a list of the text blocks of Word 8/97 files +00173 * +00174 * Returns TRUE when successful, FALSE if not +00175 */ +00176 BOOL +00177 bGet8DocumentText(FILE *pFile, const pps_info_type *pPPS, +00178 const ULONG *aulBBD, size_t tBBDLen, +00179 const ULONG *aulSBD, size_t tSBDLen, +00180 const UCHAR *aucHeader) +00181 { +00182 const ULONG *aulBlockDepot; +00183 UCHAR *aucBuffer; +00184 ULONG ulTextOffset, ulBeginTextInfo; +00185 ULONG ulTotLength, ulLen; +00186 long lIndex, lPieces, lOff; +00187 size_t tTextInfoLen, tBlockDepotLen, tBlockSize; +00188 int iType, iLen; +00189 BOOL bUsesUnicode; +00190 USHORT usPropMod; +00191 +00192 DBG_MSG("bGet8DocumentText"); +00193 +00194 fail(pFile == NULL || pPPS == NULL); +00195 fail(aulBBD == NULL || aulSBD == NULL); +00196 fail(aucHeader == NULL); +00197 +00198 ulBeginTextInfo = ulGetLong(0x1a2, aucHeader); /* fcClx */ +00199 DBG_HEX(ulBeginTextInfo); +00200 tTextInfoLen = (size_t)ulGetLong(0x1a6, aucHeader); /* lcbClx */ +00201 DBG_DEC(tTextInfoLen); +00202 +00203 DBG_DEC(pPPS->tTable.ulSB); +00204 DBG_HEX(pPPS->tTable.ulSize); +00205 if (pPPS->tTable.ulSize == 0) { +00206 return FALSE; +00207 } +00208 +00209 if (pPPS->tTable.ulSize < MIN_SIZE_FOR_BBD_USE) { +00210 /* Use the Small Block Depot */ +00211 aulBlockDepot = aulSBD; +00212 tBlockDepotLen = tSBDLen; +00213 tBlockSize = SMALL_BLOCK_SIZE; +00214 } else { +00215 /* Use the Big Block Depot */ +00216 aulBlockDepot = aulBBD; +00217 tBlockDepotLen = tBBDLen; +00218 tBlockSize = BIG_BLOCK_SIZE; +00219 } +00220 aucBuffer = xmalloc(tTextInfoLen); +00221 if (!bReadBuffer(pFile, pPPS->tTable.ulSB, +00222 aulBlockDepot, tBlockDepotLen, tBlockSize, +00223 aucBuffer, ulBeginTextInfo, tTextInfoLen)) { +00224 aucBuffer = xfree(aucBuffer); +00225 return FALSE; +00226 } +00227 NO_DBG_PRINT_BLOCK(aucBuffer, tTextInfoLen); +00228 +00229 lOff = 0; +00230 while (lOff < (long)tTextInfoLen) { +00231 iType = (int)ucGetByte(lOff, aucBuffer); +00232 lOff++; +00233 if (iType == 0) { +00234 DBG_FIXME(); +00235 lOff++; +00236 continue; +00237 } +00238 if (iType == 1) { +00239 iLen = (int)usGetWord(lOff, aucBuffer); +00240 vAdd2PropModList(aucBuffer + lOff); +00241 lOff += (long)iLen + 2; +00242 continue; +00243 } +00244 if (iType != 2) { +00245 werr(0, "Unknown type of 'fastsaved' format"); +00246 aucBuffer = xfree(aucBuffer); +00247 return FALSE; +00248 } +00249 /* Type 2 */ +00250 ulLen = ulGetLong(lOff, aucBuffer); +00251 if (ulLen < 4) { +00252 DBG_DEC(ulLen); +00253 return FALSE; +00254 } +00255 lOff += 4; +00256 lPieces = (long)((ulLen - 4) / 12); +00257 DBG_DEC(lPieces); +00258 for (lIndex = 0; lIndex < lPieces; lIndex++) { +00259 ulTextOffset = ulGetLong( +00260 lOff + (lPieces + 1) * 4 + lIndex * 8 + 2, +00261 aucBuffer); +00262 usPropMod = usGetWord( +00263 lOff + (lPieces + 1) * 4 + lIndex * 8 + 6, +00264 aucBuffer); +00265 ulTotLength = ulGetLong(lOff + (lIndex + 1) * 4, +00266 aucBuffer) - +00267 ulGetLong(lOff + lIndex * 4, +00268 aucBuffer); +00269 if ((ulTextOffset & BIT(30)) == 0) { +00270 bUsesUnicode = TRUE; +00271 } else { +00272 bUsesUnicode = FALSE; +00273 ulTextOffset &= ~BIT(30); +00274 ulTextOffset /= 2; +00275 } +00276 NO_DBG_HEX_C(usPropMod != 0, usPropMod); +00277 if (!bAddTextBlocks(ulTextOffset, ulTotLength, +00278 bUsesUnicode, usPropMod, +00279 pPPS->tWordDocument.ulSB, +00280 aulBBD, tBBDLen)) { +00281 aucBuffer = xfree(aucBuffer); +00282 return FALSE; +00283 } +00284 } +00285 break; +00286 } +00287 aucBuffer = xfree(aucBuffer); +00288 return TRUE; +00289 } /* end of bGet8DocumentText */ +