diff -r f345bda72bc4 -r 43e37759235e Symbian3/Examples/guid-6013a680-57f9-415b-8851-c4fa63356636/wordole_8c_source.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Symbian3/Examples/guid-6013a680-57f9-415b-8851-c4fa63356636/wordole_8c_source.html Tue Mar 30 16:16:55 2010 +0100 @@ -0,0 +1,820 @@ + + +
+ +00001 /* +00002 * wordole.c +00003 * Copyright (C) 1998-2004 A.J. van Os; Released under GPL +00004 * +00005 * Description: +00006 * Deal with the OLE internals of a MS Word file +00007 */ +00008 +00009 #include <string.h> +00010 #include "antiword.h" +00011 +00012 /* Private type for Property Set Storage entries */ +00013 typedef struct pps_entry_tag { +00014 ULONG ulNext; +00015 ULONG ulPrevious; +00016 ULONG ulDir; +00017 ULONG ulSB; +00018 ULONG ulSize; +00019 int iLevel; +00020 char szName[32]; +00021 UCHAR ucType; +00022 } pps_entry_type; +00023 +00024 /* Show that a PPS number or index should not be used */ +00025 #define PPS_NUMBER_INVALID 0xffffffffUL +00026 +00027 +00028 /* Macro to make sure all such statements will be identical */ +00029 #define FREE_ALL() \ +00030 do {\ +00031 vDestroySmallBlockList();\ +00032 aulRootList = xfree(aulRootList);\ +00033 aulSbdList = xfree(aulSbdList);\ +00034 aulBbdList = xfree(aulBbdList);\ +00035 aulSBD = xfree(aulSBD);\ +00036 aulBBD = xfree(aulBBD);\ +00037 } while(0) +00038 +00039 +00040 /* +00041 * ulReadLong - read four bytes from the given file and offset +00042 */ +00043 static ULONG +00044 ulReadLong(FILE *pFile, ULONG ulOffset) +00045 { +00046 UCHAR aucBytes[4]; +00047 +00048 fail(pFile == NULL); +00049 +00050 if (!bReadBytes(aucBytes, 4, ulOffset, pFile)) { +00051 werr(1, "Read long 0x%lx not possible", ulOffset); +00052 } +00053 return ulGetLong(0, aucBytes); +00054 } /* end of ulReadLong */ +00055 +00056 /* +00057 * vName2String - turn the name into a proper string. +00058 */ +00059 static void +00060 vName2String(char *szName, const UCHAR *aucBytes, size_t tNameSize) +00061 { +00062 char *pcChar; +00063 size_t tIndex; +00064 +00065 fail(aucBytes == NULL || szName == NULL); +00066 +00067 if (tNameSize < 2) { +00068 szName[0] = '\0'; +00069 return; +00070 } +00071 for (tIndex = 0, pcChar = szName; +00072 tIndex < 2 * tNameSize; +00073 tIndex += 2, pcChar++) { +00074 *pcChar = (char)aucBytes[tIndex]; +00075 } +00076 szName[tNameSize - 1] = '\0'; +00077 } /* end of vName2String */ +00078 +00079 /* +00080 * tReadBlockIndices - read the Big/Small Block Depot indices +00081 * +00082 * Returns the number of indices read +00083 */ +00084 static size_t +00085 tReadBlockIndices(FILE *pFile, ULONG *aulBlockDepot, +00086 size_t tMaxRec, ULONG ulOffset) +00087 { +00088 size_t tDone; +00089 int iIndex; +00090 UCHAR aucBytes[BIG_BLOCK_SIZE]; +00091 +00092 fail(pFile == NULL || aulBlockDepot == NULL); +00093 fail(tMaxRec == 0); +00094 +00095 /* Read a big block with BBD or SBD indices */ +00096 if (!bReadBytes(aucBytes, BIG_BLOCK_SIZE, ulOffset, pFile)) { +00097 werr(0, "Reading big block from 0x%lx is not possible", +00098 ulOffset); +00099 return 0; +00100 } +00101 /* Split the big block into indices, an index is four bytes */ +00102 tDone = min(tMaxRec, (size_t)BIG_BLOCK_SIZE / 4); +00103 for (iIndex = 0; iIndex < (int)tDone; iIndex++) { +00104 aulBlockDepot[iIndex] = ulGetLong(4 * iIndex, aucBytes); +00105 NO_DBG_DEC(aulBlockDepot[iIndex]); +00106 } +00107 return tDone; +00108 } /* end of tReadBlockIndices */ +00109 +00110 /* +00111 * bGetBBD - get the Big Block Depot indices from the index-blocks +00112 */ +00113 static BOOL +00114 bGetBBD(FILE *pFile, const ULONG *aulDepot, size_t tDepotLen, +00115 ULONG *aulBBD, size_t tBBDLen) +00116 { +00117 ULONG ulBegin; +00118 size_t tToGo, tDone; +00119 int iIndex; +00120 +00121 fail(pFile == NULL || aulDepot == NULL || aulBBD == NULL); +00122 +00123 DBG_MSG("bGetBBD"); +00124 +00125 tToGo = tBBDLen; +00126 for (iIndex = 0; iIndex < (int)tDepotLen && tToGo != 0; iIndex++) { +00127 ulBegin = (aulDepot[iIndex] + 1) * BIG_BLOCK_SIZE; +00128 NO_DBG_HEX(ulBegin); +00129 tDone = tReadBlockIndices(pFile, aulBBD, tToGo, ulBegin); +00130 fail(tDone > tToGo); +00131 if (tDone == 0) { +00132 return FALSE; +00133 } +00134 aulBBD += tDone; +00135 tToGo -= tDone; +00136 } +00137 return tToGo == 0; +00138 } /* end of bGetBBD */ +00139 +00140 /* +00141 * bGetSBD - get the Small Block Depot indices from the index-blocks +00142 */ +00143 static BOOL +00144 bGetSBD(FILE *pFile, const ULONG *aulDepot, size_t tDepotLen, +00145 ULONG *aulSBD, size_t tSBDLen) +00146 { +00147 ULONG ulBegin; +00148 size_t tToGo, tDone; +00149 int iIndex; +00150 +00151 fail(pFile == NULL || aulDepot == NULL || aulSBD == NULL); +00152 +00153 DBG_MSG("bGetSBD"); +00154 +00155 tToGo = tSBDLen; +00156 for (iIndex = 0; iIndex < (int)tDepotLen && tToGo != 0; iIndex++) { +00157 fail(aulDepot[iIndex] >= ULONG_MAX / BIG_BLOCK_SIZE); +00158 ulBegin = (aulDepot[iIndex] + 1) * BIG_BLOCK_SIZE; +00159 NO_DBG_HEX(ulBegin); +00160 tDone = tReadBlockIndices(pFile, aulSBD, tToGo, ulBegin); +00161 fail(tDone > tToGo); +00162 if (tDone == 0) { +00163 return FALSE; +00164 } +00165 aulSBD += tDone; +00166 tToGo -= tDone; +00167 } +00168 return tToGo == 0; +00169 } /* end of bGetSBD */ +00170 +00171 /* +00172 * vComputePPSlevels - compute the levels of the Property Set Storage entries +00173 */ +00174 static void +00175 vComputePPSlevels(pps_entry_type *atPPSlist, pps_entry_type *pNode, +00176 int iLevel, int iRecursionLevel) +00177 { +00178 fail(atPPSlist == NULL || pNode == NULL); +00179 fail(iLevel < 0 || iRecursionLevel < 0); +00180 +00181 if (iRecursionLevel > 25) { +00182 /* This removes the possibility of an infinite recursion */ +00183 DBG_DEC(iRecursionLevel); +00184 return; +00185 } +00186 if (pNode->iLevel <= iLevel) { +00187 /* Avoid entering a loop */ +00188 DBG_DEC(iLevel); +00189 DBG_DEC(pNode->iLevel); +00190 return; +00191 } +00192 +00193 pNode->iLevel = iLevel; +00194 +00195 if (pNode->ulDir != PPS_NUMBER_INVALID) { +00196 vComputePPSlevels(atPPSlist, +00197 &atPPSlist[pNode->ulDir], +00198 iLevel + 1, +00199 iRecursionLevel + 1); +00200 } +00201 if (pNode->ulNext != PPS_NUMBER_INVALID) { +00202 vComputePPSlevels(atPPSlist, +00203 &atPPSlist[pNode->ulNext], +00204 iLevel, +00205 iRecursionLevel + 1); +00206 } +00207 if (pNode->ulPrevious != PPS_NUMBER_INVALID) { +00208 vComputePPSlevels(atPPSlist, +00209 &atPPSlist[pNode->ulPrevious], +00210 iLevel, +00211 iRecursionLevel + 1); +00212 } +00213 } /* end of vComputePPSlevels */ +00214 +00215 /* +00216 * bGetPPS - search the Property Set Storage for three sets +00217 * +00218 * Return TRUE if the WordDocument PPS is found +00219 */ +00220 static BOOL +00221 bGetPPS(FILE *pFile, +00222 const ULONG *aulRootList, size_t tRootListLen, pps_info_type *pPPS) +00223 { +00224 pps_entry_type *atPPSlist; +00225 ULONG ulBegin, ulOffset, ulTmp; +00226 size_t tNbrOfPPS, tNameSize; +00227 int iIndex, iStartBlock, iRootIndex; +00228 BOOL bWord, bExcel; +00229 UCHAR aucBytes[PROPERTY_SET_STORAGE_SIZE]; +00230 +00231 fail(pFile == NULL || aulRootList == NULL || pPPS == NULL); +00232 +00233 DBG_MSG("bGetPPS"); +00234 +00235 NO_DBG_DEC(tRootListLen); +00236 +00237 bWord = FALSE; +00238 bExcel = FALSE; +00239 (void)memset(pPPS, 0, sizeof(*pPPS)); +00240 +00241 /* Read and store all the Property Set Storage entries */ +00242 +00243 tNbrOfPPS = tRootListLen * BIG_BLOCK_SIZE / PROPERTY_SET_STORAGE_SIZE; +00244 atPPSlist = xcalloc(tNbrOfPPS, sizeof(pps_entry_type)); +00245 iRootIndex = 0; +00246 +00247 for (iIndex = 0; iIndex < (int)tNbrOfPPS; iIndex++) { +00248 ulTmp = (ULONG)iIndex * PROPERTY_SET_STORAGE_SIZE; +00249 iStartBlock = (int)(ulTmp / BIG_BLOCK_SIZE); +00250 ulOffset = ulTmp % BIG_BLOCK_SIZE; +00251 ulBegin = (aulRootList[iStartBlock] + 1) * BIG_BLOCK_SIZE + +00252 ulOffset; +00253 NO_DBG_HEX(ulBegin); +00254 if (!bReadBytes(aucBytes, PROPERTY_SET_STORAGE_SIZE, +00255 ulBegin, pFile)) { +00256 werr(0, "Reading PPS %d is not possible", iIndex); +00257 atPPSlist = xfree(atPPSlist); +00258 return FALSE; +00259 } +00260 tNameSize = (size_t)usGetWord(0x40, aucBytes); +00261 tNameSize = (tNameSize + 1) / 2; +00262 vName2String(atPPSlist[iIndex].szName, aucBytes, tNameSize); +00263 atPPSlist[iIndex].ucType = ucGetByte(0x42, aucBytes); +00264 if (atPPSlist[iIndex].ucType == 5) { +00265 iRootIndex = iIndex; +00266 } +00267 atPPSlist[iIndex].ulPrevious = ulGetLong(0x44, aucBytes); +00268 atPPSlist[iIndex].ulNext = ulGetLong(0x48, aucBytes); +00269 atPPSlist[iIndex].ulDir = ulGetLong(0x4c, aucBytes); +00270 atPPSlist[iIndex].ulSB = ulGetLong(0x74, aucBytes); +00271 atPPSlist[iIndex].ulSize = ulGetLong(0x78, aucBytes); +00272 atPPSlist[iIndex].iLevel = INT_MAX; +00273 if ((atPPSlist[iIndex].ulPrevious >= (ULONG)tNbrOfPPS && +00274 atPPSlist[iIndex].ulPrevious != PPS_NUMBER_INVALID) || +00275 (atPPSlist[iIndex].ulNext >= (ULONG)tNbrOfPPS && +00276 atPPSlist[iIndex].ulNext != PPS_NUMBER_INVALID) || +00277 (atPPSlist[iIndex].ulDir >= (ULONG)tNbrOfPPS && +00278 atPPSlist[iIndex].ulDir != PPS_NUMBER_INVALID)) { +00279 DBG_DEC(iIndex); +00280 DBG_DEC(atPPSlist[iIndex].ulPrevious); +00281 DBG_DEC(atPPSlist[iIndex].ulNext); +00282 DBG_DEC(atPPSlist[iIndex].ulDir); +00283 DBG_DEC(tNbrOfPPS); +00284 werr(0, "The Property Set Storage is damaged"); +00285 atPPSlist = xfree(atPPSlist); +00286 return FALSE; +00287 } +00288 } +00289 +00290 #if 0 /* defined(DEBUG) */ +00291 DBG_MSG("Before"); +00292 for (iIndex = 0; iIndex < (int)tNbrOfPPS; iIndex++) { +00293 DBG_MSG(atPPSlist[iIndex].szName); +00294 DBG_HEX(atPPSlist[iIndex].ulDir); +00295 DBG_HEX(atPPSlist[iIndex].ulPrevious); +00296 DBG_HEX(atPPSlist[iIndex].ulNext); +00297 DBG_DEC(atPPSlist[iIndex].ulSB); +00298 DBG_HEX(atPPSlist[iIndex].ulSize); +00299 DBG_DEC(atPPSlist[iIndex].iLevel); +00300 } +00301 #endif /* DEBUG */ +00302 +00303 /* Add level information to each entry */ +00304 vComputePPSlevels(atPPSlist, &atPPSlist[iRootIndex], 0, 0); +00305 +00306 /* Check the entries on level 1 for the required information */ +00307 NO_DBG_MSG("After"); +00308 for (iIndex = 0; iIndex < (int)tNbrOfPPS; iIndex++) { +00309 #if 0 /* defined(DEBUG) */ +00310 DBG_MSG(atPPSlist[iIndex].szName); +00311 DBG_HEX(atPPSlist[iIndex].ulDir); +00312 DBG_HEX(atPPSlist[iIndex].ulPrevious); +00313 DBG_HEX(atPPSlist[iIndex].ulNext); +00314 DBG_DEC(atPPSlist[iIndex].ulSB); +00315 DBG_HEX(atPPSlist[iIndex].ulSize); +00316 DBG_DEC(atPPSlist[iIndex].iLevel); +00317 #endif /* DEBUG */ +00318 if (atPPSlist[iIndex].iLevel != 1 || +00319 atPPSlist[iIndex].ucType != 2 || +00320 atPPSlist[iIndex].szName[0] == '\0' || +00321 atPPSlist[iIndex].ulSize == 0) { +00322 /* This entry can be ignored */ +00323 continue; +00324 } +00325 if (pPPS->tWordDocument.ulSize == 0 && +00326 STREQ(atPPSlist[iIndex].szName, "WordDocument")) { +00327 pPPS->tWordDocument.ulSB = atPPSlist[iIndex].ulSB; +00328 pPPS->tWordDocument.ulSize = atPPSlist[iIndex].ulSize; +00329 bWord = TRUE; +00330 } else if (pPPS->tData.ulSize == 0 && +00331 STREQ(atPPSlist[iIndex].szName, "Data")) { +00332 pPPS->tData.ulSB = atPPSlist[iIndex].ulSB; +00333 pPPS->tData.ulSize = atPPSlist[iIndex].ulSize; +00334 } else if (pPPS->t0Table.ulSize == 0 && +00335 STREQ(atPPSlist[iIndex].szName, "0Table")) { +00336 pPPS->t0Table.ulSB = atPPSlist[iIndex].ulSB; +00337 pPPS->t0Table.ulSize = atPPSlist[iIndex].ulSize; +00338 } else if (pPPS->t1Table.ulSize == 0 && +00339 STREQ(atPPSlist[iIndex].szName, "1Table")) { +00340 pPPS->t1Table.ulSB = atPPSlist[iIndex].ulSB; +00341 pPPS->t1Table.ulSize = atPPSlist[iIndex].ulSize; +00342 } else if (pPPS->tSummaryInfo.ulSize == 0 && +00343 STREQ(atPPSlist[iIndex].szName, +00344 "\005SummaryInformation")) { +00345 pPPS->tSummaryInfo.ulSB = atPPSlist[iIndex].ulSB; +00346 pPPS->tSummaryInfo.ulSize = atPPSlist[iIndex].ulSize; +00347 } else if (pPPS->tDocSummaryInfo.ulSize == 0 && +00348 STREQ(atPPSlist[iIndex].szName, +00349 "\005DocumentSummaryInformation")) { +00350 pPPS->tDocSummaryInfo.ulSB = atPPSlist[iIndex].ulSB; +00351 pPPS->tDocSummaryInfo.ulSize = atPPSlist[iIndex].ulSize; +00352 } else if (STREQ(atPPSlist[iIndex].szName, "Book") || +00353 STREQ(atPPSlist[iIndex].szName, "Workbook")) { +00354 bExcel = TRUE; +00355 } +00356 } +00357 +00358 /* Free the space for the Property Set Storage entries */ +00359 atPPSlist = xfree(atPPSlist); +00360 +00361 /* Draw your conclusions */ +00362 if (bWord) { +00363 return TRUE; +00364 } +00365 +00366 if (bExcel) { +00367 werr(0, "Sorry, but this is an Excel spreadsheet"); +00368 } else { +00369 werr(0, "This OLE file does not contain a Word document"); +00370 } +00371 return FALSE; +00372 } /* end of bGetPPS */ +00373 +00374 /* +00375 * vGetBbdList - make a list of the places to find big blocks +00376 */ +00377 static void +00378 vGetBbdList(FILE *pFile, int iNbr, ULONG *aulBbdList, ULONG ulOffset) +00379 { +00380 int iIndex; +00381 +00382 fail(pFile == NULL); +00383 fail(iNbr > 127); +00384 fail(aulBbdList == NULL); +00385 +00386 NO_DBG_DEC(iNbr); +00387 for (iIndex = 0; iIndex < iNbr; iIndex++) { +00388 aulBbdList[iIndex] = +00389 ulReadLong(pFile, ulOffset + 4 * (ULONG)iIndex); +00390 NO_DBG_DEC(iIndex); +00391 NO_DBG_HEX(aulBbdList[iIndex]); +00392 } +00393 } /* end of vGetBbdList */ +00394 +00395 /* +00396 * bGetDocumentText - make a list of the text blocks of a Word document +00397 * +00398 * Return TRUE when succesful, otherwise FALSE +00399 */ +00400 static BOOL +00401 bGetDocumentText(FILE *pFile, const pps_info_type *pPPS, +00402 const ULONG *aulBBD, size_t tBBDLen, +00403 const ULONG *aulSBD, size_t tSBDLen, +00404 const UCHAR *aucHeader, int iWordVersion) +00405 { +00406 ULONG ulBeginOfText; +00407 ULONG ulTextLen, ulFootnoteLen, ulEndnoteLen; +00408 ULONG ulHdrFtrLen, ulMacroLen, ulAnnotationLen; +00409 ULONG ulTextBoxLen, ulHdrTextBoxLen; +00410 UINT uiQuickSaves; +00411 BOOL bFarEastWord, bTemplate, bFastSaved, bEncrypted, bSuccess; +00412 USHORT usIdent, usDocStatus; +00413 +00414 fail(pFile == NULL || pPPS == NULL); +00415 fail(aulBBD == NULL); +00416 fail(aulSBD == NULL); +00417 +00418 DBG_MSG("bGetDocumentText"); +00419 +00420 /* Get the "magic number" from the header */ +00421 usIdent = usGetWord(0x00, aucHeader); +00422 DBG_HEX(usIdent); +00423 bFarEastWord = usIdent == 0x8098 || usIdent == 0x8099 || +00424 usIdent == 0xa697 || usIdent == 0xa699; +00425 /* Get the status flags from the header */ +00426 usDocStatus = usGetWord(0x0a, aucHeader); +00427 DBG_HEX(usDocStatus); +00428 bTemplate = (usDocStatus & BIT(0)) != 0; +00429 DBG_MSG_C(bTemplate, "This document is a Template"); +00430 bFastSaved = (usDocStatus & BIT(2)) != 0; +00431 uiQuickSaves = (UINT)(usDocStatus & 0x00f0) >> 4; +00432 DBG_MSG_C(bFastSaved, "This document is Fast Saved"); +00433 DBG_DEC_C(bFastSaved, uiQuickSaves); +00434 bEncrypted = (usDocStatus & BIT(8)) != 0; +00435 if (bEncrypted) { +00436 werr(0, "Encrypted documents are not supported"); +00437 return FALSE; +00438 } +00439 +00440 /* Get length information */ +00441 ulBeginOfText = ulGetLong(0x18, aucHeader); +00442 DBG_HEX(ulBeginOfText); +00443 switch (iWordVersion) { +00444 case 6: +00445 case 7: +00446 ulTextLen = ulGetLong(0x34, aucHeader); +00447 ulFootnoteLen = ulGetLong(0x38, aucHeader); +00448 ulHdrFtrLen = ulGetLong(0x3c, aucHeader); +00449 ulMacroLen = ulGetLong(0x40, aucHeader); +00450 ulAnnotationLen = ulGetLong(0x44, aucHeader); +00451 ulEndnoteLen = ulGetLong(0x48, aucHeader); +00452 ulTextBoxLen = ulGetLong(0x4c, aucHeader); +00453 ulHdrTextBoxLen = ulGetLong(0x50, aucHeader); +00454 break; +00455 case 8: +00456 ulTextLen = ulGetLong(0x4c, aucHeader); +00457 ulFootnoteLen = ulGetLong(0x50, aucHeader); +00458 ulHdrFtrLen = ulGetLong(0x54, aucHeader); +00459 ulMacroLen = ulGetLong(0x58, aucHeader); +00460 ulAnnotationLen = ulGetLong(0x5c, aucHeader); +00461 ulEndnoteLen = ulGetLong(0x60, aucHeader); +00462 ulTextBoxLen = ulGetLong(0x64, aucHeader); +00463 ulHdrTextBoxLen = ulGetLong(0x68, aucHeader); +00464 break; +00465 default: +00466 werr(0, "This version of Word is not supported"); +00467 return FALSE; +00468 } +00469 DBG_DEC(ulTextLen); +00470 DBG_DEC(ulFootnoteLen); +00471 DBG_DEC(ulHdrFtrLen); +00472 DBG_DEC(ulMacroLen); +00473 DBG_DEC(ulAnnotationLen); +00474 DBG_DEC(ulEndnoteLen); +00475 DBG_DEC(ulTextBoxLen); +00476 DBG_DEC(ulHdrTextBoxLen); +00477 +00478 /* Make a list of the text blocks */ +00479 switch (iWordVersion) { +00480 case 6: +00481 case 7: +00482 if (bFastSaved) { +00483 bSuccess = bGet6DocumentText(pFile, +00484 bFarEastWord, +00485 pPPS->tWordDocument.ulSB, +00486 aulBBD, tBBDLen, +00487 aucHeader); +00488 } else { +00489 bSuccess = bAddTextBlocks(ulBeginOfText, +00490 ulTextLen + +00491 ulFootnoteLen + +00492 ulHdrFtrLen + +00493 ulMacroLen + ulAnnotationLen + +00494 ulEndnoteLen + +00495 ulTextBoxLen + ulHdrTextBoxLen, +00496 bFarEastWord, +00497 IGNORE_PROPMOD, +00498 pPPS->tWordDocument.ulSB, +00499 aulBBD, tBBDLen); +00500 } +00501 break; +00502 case 8: +00503 bSuccess = bGet8DocumentText(pFile, +00504 pPPS, +00505 aulBBD, tBBDLen, aulSBD, tSBDLen, +00506 aucHeader); +00507 break; +00508 default: +00509 werr(0, "This version of Word is not supported"); +00510 bSuccess = FALSE; +00511 break; +00512 } +00513 +00514 if (bSuccess) { +00515 vSplitBlockList(pFile, +00516 ulTextLen, +00517 ulFootnoteLen, +00518 ulHdrFtrLen, +00519 ulMacroLen, +00520 ulAnnotationLen, +00521 ulEndnoteLen, +00522 ulTextBoxLen, +00523 ulHdrTextBoxLen, +00524 !bFastSaved && iWordVersion == 8); +00525 } else { +00526 vDestroyTextBlockList(); +00527 werr(0, "I can't find the text of this document"); +00528 } +00529 return bSuccess; +00530 } /* end of bGetDocumentText */ +00531 +00532 /* +00533 * vGetDocumentData - make a list of the data blocks of a Word document +00534 */ +00535 static void +00536 vGetDocumentData(FILE *pFile, const pps_info_type *pPPS, +00537 const ULONG *aulBBD, size_t tBBDLen, +00538 const UCHAR *aucHeader, int iWordVersion) +00539 { +00540 options_type tOptions; +00541 ULONG ulBeginOfText; +00542 BOOL bFastSaved, bHasImages, bSuccess; +00543 USHORT usDocStatus; +00544 +00545 fail(pFile == NULL); +00546 fail(pPPS == NULL); +00547 fail(aulBBD == NULL); +00548 +00549 /* Get the options */ +00550 vGetOptions(&tOptions); +00551 +00552 /* Get the status flags from the header */ +00553 usDocStatus = usGetWord(0x0a, aucHeader); +00554 DBG_HEX(usDocStatus); +00555 bFastSaved = (usDocStatus & BIT(2)) != 0; +00556 bHasImages = (usDocStatus & BIT(3)) != 0; +00557 +00558 if (!bHasImages || +00559 tOptions.eConversionType == conversion_text || +00560 tOptions.eConversionType == conversion_fmt_text || +00561 tOptions.eConversionType == conversion_xml || +00562 tOptions.eImageLevel == level_no_images) { +00563 /* +00564 * No images in the document or text-only output or +00565 * no images wanted, so no data blocks will be needed +00566 */ +00567 vDestroyDataBlockList(); +00568 return; +00569 } +00570 +00571 /* Get length information */ +00572 ulBeginOfText = ulGetLong(0x18, aucHeader); +00573 DBG_HEX(ulBeginOfText); +00574 +00575 /* Make a list of the data blocks */ +00576 switch (iWordVersion) { +00577 case 6: +00578 case 7: +00579 /* +00580 * The data blocks are in the text stream. The text stream +00581 * is in "fast saved" format or "normal saved" format +00582 */ +00583 if (bFastSaved) { +00584 bSuccess = bGet6DocumentData(pFile, +00585 pPPS->tWordDocument.ulSB, +00586 aulBBD, tBBDLen, +00587 aucHeader); +00588 } else { +00589 bSuccess = bAddDataBlocks(ulBeginOfText, +00590 (ULONG)LONG_MAX, +00591 pPPS->tWordDocument.ulSB, +00592 aulBBD, tBBDLen); +00593 } +00594 break; +00595 case 8: +00596 /* +00597 * The data blocks are in the data stream. The data stream +00598 * is always in "normal saved" format +00599 */ +00600 bSuccess = bAddDataBlocks(0, (ULONG)LONG_MAX, +00601 pPPS->tData.ulSB, aulBBD, tBBDLen); +00602 break; +00603 default: +00604 werr(0, "This version of Word is not supported"); +00605 bSuccess = FALSE; +00606 break; +00607 } +00608 +00609 if (!bSuccess) { +00610 vDestroyDataBlockList(); +00611 werr(0, "I can't find the data of this document"); +00612 } +00613 } /* end of vGetDocumentData */ +00614 +00615 /* +00616 * iInitDocumentOLE - initialize an OLE document +00617 * +00618 * Returns the version of Word that made the document or -1 +00619 */ +00620 int +00621 iInitDocumentOLE(FILE *pFile, long lFilesize) +00622 { +00623 pps_info_type PPS_info; +00624 ULONG *aulBBD, *aulSBD; +00625 ULONG *aulRootList, *aulBbdList, *aulSbdList; +00626 ULONG ulBdbListStart, ulAdditionalBBDlist; +00627 ULONG ulRootStartblock, ulSbdStartblock, ulSBLstartblock; +00628 ULONG ulStart, ulTmp; +00629 long lMaxBlock; +00630 size_t tBBDLen, tSBDLen, tNumBbdBlocks, tRootListLen; +00631 int iWordVersion, iIndex, iToGo; +00632 BOOL bSuccess; +00633 USHORT usIdent, usDocStatus; +00634 UCHAR aucHeader[HEADER_SIZE]; +00635 +00636 fail(pFile == NULL); +00637 +00638 lMaxBlock = lFilesize / BIG_BLOCK_SIZE - 2; +00639 DBG_DEC(lMaxBlock); +00640 if (lMaxBlock < 1) { +00641 return -1; +00642 } +00643 tBBDLen = (size_t)(lMaxBlock + 1); +00644 tNumBbdBlocks = (size_t)ulReadLong(pFile, 0x2c); +00645 DBG_DEC(tNumBbdBlocks); +00646 ulRootStartblock = ulReadLong(pFile, 0x30); +00647 DBG_DEC(ulRootStartblock); +00648 ulSbdStartblock = ulReadLong(pFile, 0x3c); +00649 DBG_DEC(ulSbdStartblock); +00650 ulAdditionalBBDlist = ulReadLong(pFile, 0x44); +00651 DBG_HEX(ulAdditionalBBDlist); +00652 ulSBLstartblock = ulReadLong(pFile, +00653 (ulRootStartblock + 1) * BIG_BLOCK_SIZE + 0x74); +00654 DBG_DEC(ulSBLstartblock); +00655 tSBDLen = (size_t)(ulReadLong(pFile, +00656 (ulRootStartblock + 1) * BIG_BLOCK_SIZE + 0x78) / +00657 SMALL_BLOCK_SIZE); +00658 /* All to be xcalloc-ed pointers to NULL */ +00659 aulRootList = NULL; +00660 aulSbdList = NULL; +00661 aulBbdList = NULL; +00662 aulSBD = NULL; +00663 aulBBD = NULL; +00664 /* Big Block Depot */ +00665 aulBbdList = xcalloc(tNumBbdBlocks, sizeof(ULONG)); +00666 aulBBD = xcalloc(tBBDLen, sizeof(ULONG)); +00667 iToGo = (int)tNumBbdBlocks; +00668 vGetBbdList(pFile, min(iToGo, 109), aulBbdList, 0x4c); +00669 ulStart = 109; +00670 iToGo -= 109; +00671 while (ulAdditionalBBDlist != END_OF_CHAIN && iToGo > 0) { +00672 ulBdbListStart = (ulAdditionalBBDlist + 1) * BIG_BLOCK_SIZE; +00673 vGetBbdList(pFile, min(iToGo, 127), +00674 aulBbdList + ulStart, ulBdbListStart); +00675 ulAdditionalBBDlist = ulReadLong(pFile, +00676 ulBdbListStart + 4 * 127); +00677 DBG_DEC(ulAdditionalBBDlist); +00678 DBG_HEX(ulAdditionalBBDlist); +00679 ulStart += 127; +00680 iToGo -= 127; +00681 } +00682 if (!bGetBBD(pFile, aulBbdList, tNumBbdBlocks, aulBBD, tBBDLen)) { +00683 FREE_ALL(); +00684 return -1; +00685 } +00686 aulBbdList = xfree(aulBbdList); +00687 /* Small Block Depot */ +00688 aulSbdList = xcalloc(tBBDLen, sizeof(ULONG)); +00689 aulSBD = xcalloc(tSBDLen, sizeof(ULONG)); +00690 for (iIndex = 0, ulTmp = ulSbdStartblock; +00691 iIndex < (int)tBBDLen && ulTmp != END_OF_CHAIN; +00692 iIndex++, ulTmp = aulBBD[ulTmp]) { +00693 if (ulTmp >= (ULONG)tBBDLen) { +00694 DBG_DEC(ulTmp); +00695 DBG_DEC(tBBDLen); +00696 werr(1, "The Big Block Depot is damaged"); +00697 } +00698 aulSbdList[iIndex] = ulTmp; +00699 NO_DBG_HEX(aulSbdList[iIndex]); +00700 } +00701 if (!bGetSBD(pFile, aulSbdList, tBBDLen, aulSBD, tSBDLen)) { +00702 FREE_ALL(); +00703 return -1; +00704 } +00705 aulSbdList = xfree(aulSbdList); +00706 /* Root list */ +00707 for (tRootListLen = 0, ulTmp = ulRootStartblock; +00708 tRootListLen < tBBDLen && ulTmp != END_OF_CHAIN; +00709 tRootListLen++, ulTmp = aulBBD[ulTmp]) { +00710 if (ulTmp >= (ULONG)tBBDLen) { +00711 DBG_DEC(ulTmp); +00712 DBG_DEC(tBBDLen); +00713 werr(1, "The Big Block Depot is damaged"); +00714 } +00715 } +00716 if (tRootListLen == 0) { +00717 werr(0, "No Rootlist found"); +00718 FREE_ALL(); +00719 return -1; +00720 } +00721 aulRootList = xcalloc(tRootListLen, sizeof(ULONG)); +00722 for (iIndex = 0, ulTmp = ulRootStartblock; +00723 iIndex < (int)tBBDLen && ulTmp != END_OF_CHAIN; +00724 iIndex++, ulTmp = aulBBD[ulTmp]) { +00725 if (ulTmp >= (ULONG)tBBDLen) { +00726 DBG_DEC(ulTmp); +00727 DBG_DEC(tBBDLen); +00728 werr(1, "The Big Block Depot is damaged"); +00729 } +00730 aulRootList[iIndex] = ulTmp; +00731 NO_DBG_DEC(aulRootList[iIndex]); +00732 } +00733 fail(tRootListLen != (size_t)iIndex); +00734 bSuccess = bGetPPS(pFile, aulRootList, tRootListLen, &PPS_info); +00735 aulRootList = xfree(aulRootList); +00736 if (!bSuccess) { +00737 FREE_ALL(); +00738 return -1; +00739 } +00740 /* Small block list */ +00741 if (!bCreateSmallBlockList(ulSBLstartblock, aulBBD, tBBDLen)) { +00742 FREE_ALL(); +00743 return -1; +00744 } +00745 +00746 if (PPS_info.tWordDocument.ulSize < MIN_SIZE_FOR_BBD_USE) { +00747 DBG_DEC(PPS_info.tWordDocument.ulSize); +00748 FREE_ALL(); +00749 werr(0, "I'm afraid the text stream of this file " +00750 "is too small to handle."); +00751 return -1; +00752 } +00753 /* Read the headerblock */ +00754 if (!bReadBuffer(pFile, PPS_info.tWordDocument.ulSB, +00755 aulBBD, tBBDLen, BIG_BLOCK_SIZE, +00756 aucHeader, 0, HEADER_SIZE)) { +00757 FREE_ALL(); +00758 return -1; +00759 } +00760 usIdent = usGetWord(0x00, aucHeader); +00761 DBG_HEX(usIdent); +00762 fail(usIdent != 0x8098 && /* Word 7 for oriental languages */ +00763 usIdent != 0x8099 && /* Word 7 for oriental languages */ +00764 usIdent != 0xa5dc && /* Word 6 & 7 */ +00765 usIdent != 0xa5ec && /* Word 7 & 97 & 98 */ +00766 usIdent != 0xa697 && /* Word 7 for oriental languages */ +00767 usIdent != 0xa699); /* Word 7 for oriental languages */ +00768 iWordVersion = iGetVersionNumber(aucHeader); +00769 if (iWordVersion < 6) { +00770 FREE_ALL(); +00771 werr(0, "This file is from a version of Word before Word 6."); +00772 return -1; +00773 } +00774 +00775 /* Get the status flags from the header */ +00776 usDocStatus = usGetWord(0x0a, aucHeader); +00777 if (usDocStatus & BIT(9)) { +00778 PPS_info.tTable = PPS_info.t1Table; +00779 } else { +00780 PPS_info.tTable = PPS_info.t0Table; +00781 } +00782 /* Clean the entries that should not be used */ +00783 memset(&PPS_info.t0Table, 0, sizeof(PPS_info.t0Table)); +00784 memset(&PPS_info.t1Table, 0, sizeof(PPS_info.t1Table)); +00785 +00786 bSuccess = bGetDocumentText(pFile, &PPS_info, +00787 aulBBD, tBBDLen, aulSBD, tSBDLen, +00788 aucHeader, iWordVersion); +00789 if (bSuccess) { +00790 vGetDocumentData(pFile, &PPS_info, +00791 aulBBD, tBBDLen, aucHeader, iWordVersion); +00792 vGetPropertyInfo(pFile, &PPS_info, +00793 aulBBD, tBBDLen, aulSBD, tSBDLen, +00794 aucHeader, iWordVersion); +00795 vSetDefaultTabWidth(pFile, &PPS_info, +00796 aulBBD, tBBDLen, aulSBD, tSBDLen, +00797 aucHeader, iWordVersion); +00798 vGetNotesInfo(pFile, &PPS_info, +00799 aulBBD, tBBDLen, aulSBD, tSBDLen, +00800 aucHeader, iWordVersion); +00801 } +00802 FREE_ALL(); +00803 return bSuccess ? iWordVersion : -1; +00804 } /* end of iInitDocumentOLE */ +