diff -r 59758314f811 -r d4524d6a4472 Symbian3/Examples/guid-6013a680-57f9-415b-8851-c4fa63356636/findtext_8c_source.html --- a/Symbian3/Examples/guid-6013a680-57f9-415b-8851-c4fa63356636/findtext_8c_source.html Fri Jun 11 15:24:34 2010 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,305 +0,0 @@ - - -
- -00001 /* -00002 * findtext.c -00003 * Copyright (C) 1998-2004 A.J. van Os; Released under GNU GPL -00004 * -00005 * Description: -00006 * Find the blocks that contain the text of MS Word files -00007 */ -00008 -00009 #include <stdio.h> -00010 #include <stdlib.h> -00011 #include "antiword.h" -00012 -00013 -00014 /* -00015 * bAddTextBlocks - Add the blocks to the text block list -00016 * -00017 * Returns TRUE when successful, FALSE if not -00018 */ -00019 BOOL -00020 bAddTextBlocks(ULONG ulCharPosFirst, ULONG ulTotalLength, -00021 BOOL bUsesUnicode, USHORT usPropMod, -00022 ULONG ulStartBlock, const ULONG *aulBBD, size_t tBBDLen) -00023 { -00024 text_block_type tTextBlock; -00025 ULONG ulCharPos, ulOffset, ulIndex; -00026 long lToGo; -00027 -00028 fail(ulTotalLength > (ULONG)LONG_MAX / 2); -00029 fail(ulStartBlock > MAX_BLOCKNUMBER && ulStartBlock != END_OF_CHAIN); -00030 fail(aulBBD == NULL); -00031 -00032 NO_DBG_HEX(ulCharPosFirst); -00033 NO_DBG_DEC(ulTotalLength); -00034 -00035 if (bUsesUnicode) { -00036 /* One character equals two bytes */ -00037 NO_DBG_MSG("Uses Unicode"); -00038 lToGo = (long)ulTotalLength * 2; -00039 } else { -00040 /* One character equals one byte */ -00041 NO_DBG_MSG("Uses ASCII"); -00042 lToGo = (long)ulTotalLength; -00043 } -00044 -00045 ulCharPos = ulCharPosFirst; -00046 ulOffset = ulCharPosFirst; -00047 for (ulIndex = ulStartBlock; -00048 ulIndex != END_OF_CHAIN && lToGo > 0; -00049 ulIndex = aulBBD[ulIndex]) { -00050 if (ulIndex >= (ULONG)tBBDLen) { -00051 DBG_DEC(ulIndex); -00052 DBG_DEC(tBBDLen); -00053 werr(1, "The Big Block Depot is damaged"); -00054 } -00055 if (ulOffset >= BIG_BLOCK_SIZE) { -00056 ulOffset -= BIG_BLOCK_SIZE; -00057 continue; -00058 } -00059 tTextBlock.ulFileOffset = -00060 (ulIndex + 1) * BIG_BLOCK_SIZE + ulOffset; -00061 tTextBlock.ulCharPos = ulCharPos; -00062 tTextBlock.ulLength = min(BIG_BLOCK_SIZE - ulOffset, -00063 (ULONG)lToGo); -00064 tTextBlock.bUsesUnicode = bUsesUnicode; -00065 tTextBlock.usPropMod = usPropMod; -00066 ulOffset = 0; -00067 if (!bAdd2TextBlockList(&tTextBlock)) { -00068 DBG_HEX(tTextBlock.ulFileOffset); -00069 DBG_HEX(tTextBlock.ulCharPos); -00070 DBG_DEC(tTextBlock.ulLength); -00071 DBG_DEC(tTextBlock.bUsesUnicode); -00072 DBG_DEC(tTextBlock.usPropMod); -00073 return FALSE; -00074 } -00075 ulCharPos += tTextBlock.ulLength; -00076 lToGo -= (long)tTextBlock.ulLength; -00077 } -00078 DBG_DEC_C(lToGo != 0, lToGo); -00079 return lToGo == 0; -00080 } /* end of bAddTextBlocks */ -00081 -00082 /* -00083 * bGet6DocumentText - make a list of the text blocks of Word 6/7 files -00084 * -00085 * Code for "fast saved" files. -00086 * -00087 * Returns TRUE when successful, FALSE if not -00088 */ -00089 BOOL -00090 bGet6DocumentText(FILE *pFile, BOOL bUsesUnicode, ULONG ulStartBlock, -00091 const ULONG *aulBBD, size_t tBBDLen, const UCHAR *aucHeader) -00092 { -00093 UCHAR *aucBuffer; -00094 ULONG ulBeginTextInfo, ulTextOffset, ulTotLength; -00095 size_t tTextInfoLen; -00096 int iIndex, iType, iOff, iLen, iPieces; -00097 USHORT usPropMod; -00098 -00099 DBG_MSG("bGet6DocumentText"); -00100 -00101 fail(pFile == NULL); -00102 fail(aulBBD == NULL); -00103 fail(aucHeader == NULL); -00104 -00105 ulBeginTextInfo = ulGetLong(0x160, aucHeader); /* fcClx */ -00106 DBG_HEX(ulBeginTextInfo); -00107 tTextInfoLen = (size_t)ulGetLong(0x164, aucHeader); /* lcbClx */ -00108 DBG_DEC(tTextInfoLen); -00109 -00110 aucBuffer = xmalloc(tTextInfoLen); -00111 if (!bReadBuffer(pFile, ulStartBlock, -00112 aulBBD, tBBDLen, BIG_BLOCK_SIZE, -00113 aucBuffer, ulBeginTextInfo, tTextInfoLen)) { -00114 aucBuffer = xfree(aucBuffer); -00115 return FALSE; -00116 } -00117 NO_DBG_PRINT_BLOCK(aucBuffer, tTextInfoLen); -00118 -00119 iOff = 0; -00120 while ((size_t)iOff < tTextInfoLen) { -00121 iType = (int)ucGetByte(iOff, aucBuffer); -00122 iOff++; -00123 if (iType == 0) { -00124 DBG_FIXME(); -00125 iOff++; -00126 continue; -00127 } -00128 if (iType == 1) { -00129 iLen = (int)usGetWord(iOff, aucBuffer); -00130 vAdd2PropModList(aucBuffer + iOff); -00131 iOff += iLen + 2; -00132 continue; -00133 } -00134 if (iType != 2) { -00135 werr(0, "Unknown type of 'fastsaved' format"); -00136 aucBuffer = xfree(aucBuffer); -00137 return FALSE; -00138 } -00139 /* Type 2 */ -00140 iLen = (int)usGetWord(iOff, aucBuffer); -00141 NO_DBG_DEC(iLen); -00142 iOff += 4; -00143 iPieces = (iLen - 4) / 12; -00144 DBG_DEC(iPieces); -00145 for (iIndex = 0; iIndex < iPieces; iIndex++) { -00146 ulTextOffset = ulGetLong( -00147 iOff + (iPieces + 1) * 4 + iIndex * 8 + 2, -00148 aucBuffer); -00149 usPropMod = usGetWord( -00150 iOff + (iPieces + 1) * 4 + iIndex * 8 + 6, -00151 aucBuffer); -00152 ulTotLength = ulGetLong(iOff + (iIndex + 1) * 4, -00153 aucBuffer) - -00154 ulGetLong(iOff + iIndex * 4, -00155 aucBuffer); -00156 NO_DBG_HEX_C(usPropMod != 0, usPropMod); -00157 if (!bAddTextBlocks(ulTextOffset, ulTotLength, -00158 bUsesUnicode, usPropMod, -00159 ulStartBlock, -00160 aulBBD, tBBDLen)) { -00161 aucBuffer = xfree(aucBuffer); -00162 return FALSE; -00163 } -00164 } -00165 break; -00166 } -00167 aucBuffer = xfree(aucBuffer); -00168 return TRUE; -00169 } /* end of bGet6DocumentText */ -00170 -00171 /* -00172 * bGet8DocumentText - make a list of the text blocks of Word 8/97 files -00173 * -00174 * Returns TRUE when successful, FALSE if not -00175 */ -00176 BOOL -00177 bGet8DocumentText(FILE *pFile, const pps_info_type *pPPS, -00178 const ULONG *aulBBD, size_t tBBDLen, -00179 const ULONG *aulSBD, size_t tSBDLen, -00180 const UCHAR *aucHeader) -00181 { -00182 const ULONG *aulBlockDepot; -00183 UCHAR *aucBuffer; -00184 ULONG ulTextOffset, ulBeginTextInfo; -00185 ULONG ulTotLength, ulLen; -00186 long lIndex, lPieces, lOff; -00187 size_t tTextInfoLen, tBlockDepotLen, tBlockSize; -00188 int iType, iLen; -00189 BOOL bUsesUnicode; -00190 USHORT usPropMod; -00191 -00192 DBG_MSG("bGet8DocumentText"); -00193 -00194 fail(pFile == NULL || pPPS == NULL); -00195 fail(aulBBD == NULL || aulSBD == NULL); -00196 fail(aucHeader == NULL); -00197 -00198 ulBeginTextInfo = ulGetLong(0x1a2, aucHeader); /* fcClx */ -00199 DBG_HEX(ulBeginTextInfo); -00200 tTextInfoLen = (size_t)ulGetLong(0x1a6, aucHeader); /* lcbClx */ -00201 DBG_DEC(tTextInfoLen); -00202 -00203 DBG_DEC(pPPS->tTable.ulSB); -00204 DBG_HEX(pPPS->tTable.ulSize); -00205 if (pPPS->tTable.ulSize == 0) { -00206 return FALSE; -00207 } -00208 -00209 if (pPPS->tTable.ulSize < MIN_SIZE_FOR_BBD_USE) { -00210 /* Use the Small Block Depot */ -00211 aulBlockDepot = aulSBD; -00212 tBlockDepotLen = tSBDLen; -00213 tBlockSize = SMALL_BLOCK_SIZE; -00214 } else { -00215 /* Use the Big Block Depot */ -00216 aulBlockDepot = aulBBD; -00217 tBlockDepotLen = tBBDLen; -00218 tBlockSize = BIG_BLOCK_SIZE; -00219 } -00220 aucBuffer = xmalloc(tTextInfoLen); -00221 if (!bReadBuffer(pFile, pPPS->tTable.ulSB, -00222 aulBlockDepot, tBlockDepotLen, tBlockSize, -00223 aucBuffer, ulBeginTextInfo, tTextInfoLen)) { -00224 aucBuffer = xfree(aucBuffer); -00225 return FALSE; -00226 } -00227 NO_DBG_PRINT_BLOCK(aucBuffer, tTextInfoLen); -00228 -00229 lOff = 0; -00230 while (lOff < (long)tTextInfoLen) { -00231 iType = (int)ucGetByte(lOff, aucBuffer); -00232 lOff++; -00233 if (iType == 0) { -00234 DBG_FIXME(); -00235 lOff++; -00236 continue; -00237 } -00238 if (iType == 1) { -00239 iLen = (int)usGetWord(lOff, aucBuffer); -00240 vAdd2PropModList(aucBuffer + lOff); -00241 lOff += (long)iLen + 2; -00242 continue; -00243 } -00244 if (iType != 2) { -00245 werr(0, "Unknown type of 'fastsaved' format"); -00246 aucBuffer = xfree(aucBuffer); -00247 return FALSE; -00248 } -00249 /* Type 2 */ -00250 ulLen = ulGetLong(lOff, aucBuffer); -00251 if (ulLen < 4) { -00252 DBG_DEC(ulLen); -00253 return FALSE; -00254 } -00255 lOff += 4; -00256 lPieces = (long)((ulLen - 4) / 12); -00257 DBG_DEC(lPieces); -00258 for (lIndex = 0; lIndex < lPieces; lIndex++) { -00259 ulTextOffset = ulGetLong( -00260 lOff + (lPieces + 1) * 4 + lIndex * 8 + 2, -00261 aucBuffer); -00262 usPropMod = usGetWord( -00263 lOff + (lPieces + 1) * 4 + lIndex * 8 + 6, -00264 aucBuffer); -00265 ulTotLength = ulGetLong(lOff + (lIndex + 1) * 4, -00266 aucBuffer) - -00267 ulGetLong(lOff + lIndex * 4, -00268 aucBuffer); -00269 if ((ulTextOffset & BIT(30)) == 0) { -00270 bUsesUnicode = TRUE; -00271 } else { -00272 bUsesUnicode = FALSE; -00273 ulTextOffset &= ~BIT(30); -00274 ulTextOffset /= 2; -00275 } -00276 NO_DBG_HEX_C(usPropMod != 0, usPropMod); -00277 if (!bAddTextBlocks(ulTextOffset, ulTotLength, -00278 bUsesUnicode, usPropMod, -00279 pPPS->tWordDocument.ulSB, -00280 aulBBD, tBBDLen)) { -00281 aucBuffer = xfree(aucBuffer); -00282 return FALSE; -00283 } -00284 } -00285 break; -00286 } -00287 aucBuffer = xfree(aucBuffer); -00288 return TRUE; -00289 } /* end of bGet8DocumentText */ -