TB9.2 Example Applications: examples/PIPS/antiword/src/prop0.c Source File

00001 /*
+00002  * prop0.c
+00003  * Copyright (C) 2002-2004 A.J. van Os; Released under GNU GPL
+00004  *
+00005  * Description:
+00006  * Read the property information from a Word for DOS file
+00007  */
+00008 
+00009 #include <string.h>
+00010 #include <time.h>
+00011 #include "antiword.h"
+00012 
+00013 
+00014 /*
+00015  * tConvertDosDate - convert DOS date format
+00016  *
+00017  * returns Unix time_t or -1
+00018  */
+00019 static time_t
+00020 tConvertDosDate(const char *szDosDate)
+00021 {
+00022         struct tm       tTime;
+00023         const char      *pcTmp;
+00024         time_t          tResult;
+00025 
+00026         memset(&tTime, 0, sizeof(tTime));
+00027         pcTmp = szDosDate;
+00028         /* Get the month */
+00029         if (!isdigit(*pcTmp)) {
+00030                 return (time_t)-1;
+00031         }
+00032         tTime.tm_mon = (int)(*pcTmp - '0');
+00033         pcTmp++;
+00034         if (isdigit(*pcTmp)) {
+00035                 tTime.tm_mon *= 10;
+00036                 tTime.tm_mon += (int)(*pcTmp - '0');
+00037                 pcTmp++;
+00038         }
+00039         /* Get the first separater */
+00040         if (isalnum(*pcTmp)) {
+00041                 return (time_t)-1;
+00042         }
+00043         pcTmp++;
+00044         /* Get the day */
+00045         if (!isdigit(*pcTmp)) {
+00046                 return (time_t)-1;
+00047         }
+00048         tTime.tm_mday = (int)(*pcTmp - '0');
+00049         pcTmp++;
+00050         if (isdigit(*pcTmp)) {
+00051                 tTime.tm_mday *= 10;
+00052                 tTime.tm_mday += (int)(*pcTmp - '0');
+00053                 pcTmp++;
+00054         }
+00055         /* Get the second separater */
+00056         if (isalnum(*pcTmp)) {
+00057                 return (time_t)-1;
+00058         }
+00059         pcTmp++;
+00060         /* Get the year */
+00061         if (!isdigit(*pcTmp)) {
+00062                 return (time_t)-1;
+00063         }
+00064         tTime.tm_year = (int)(*pcTmp - '0');
+00065         pcTmp++;
+00066         if (isdigit(*pcTmp)) {
+00067                 tTime.tm_year *= 10;
+00068                 tTime.tm_year += (int)(*pcTmp - '0');
+00069                 pcTmp++;
+00070         }
+00071         /* Check the values */
+00072         if (tTime.tm_mon == 0 || tTime.tm_mday == 0 || tTime.tm_mday > 31) {
+00073                 return (time_t)-1;
+00074         }
+00075         /* Correct the values */
+00076         tTime.tm_mon--;         /* From 01-12 to 00-11 */
+00077         if (tTime.tm_year < 80) {
+00078                 tTime.tm_year += 100;   /* 00 means 2000 is 100 */
+00079         }
+00080         tTime.tm_isdst = -1;
+00081         tResult = mktime(&tTime);
+00082         NO_DBG_MSG(ctime(&tResult));
+00083         return tResult;
+00084 } /* end of tConvertDosDate */
+00085 
+00086 /*
+00087  * Build the lists with Document Property Information for Word for DOS files
+00088  */
+00089 void
+00090 vGet0DopInfo(FILE *pFile, const UCHAR *aucHeader)
+00091 {
+00092         document_block_type     tDocument;
+00093         UCHAR   *aucBuffer;
+00094         ULONG   ulBeginSumdInfo, ulBeginNextBlock;
+00095         size_t  tLen;
+00096         USHORT  usOffset;
+00097 
+00098         tDocument.ucHdrFtrSpecification = 0;
+00099         tDocument.usDefaultTabWidth = usGetWord(0x70, aucHeader); /* dxaTab */
+00100         tDocument.tCreateDate = (time_t)-1;
+00101         tDocument.tRevisedDate = (time_t)-1;
+00102 
+00103         ulBeginSumdInfo = 128 * (ULONG)usGetWord(0x1c, aucHeader);
+00104         DBG_HEX(ulBeginSumdInfo);
+00105         ulBeginNextBlock = 128 * (ULONG)usGetWord(0x6a, aucHeader);
+00106         DBG_HEX(ulBeginNextBlock);
+00107 
+00108         if (ulBeginSumdInfo < ulBeginNextBlock && ulBeginNextBlock != 0) {
+00109                 /* There is a summary information block */
+00110                 tLen = (size_t)(ulBeginNextBlock - ulBeginSumdInfo);
+00111                 aucBuffer = xmalloc(tLen);
+00112                 /* Read the summary information block */
+00113                 if (bReadBytes(aucBuffer, tLen, ulBeginSumdInfo, pFile)) {
+00114                         usOffset = usGetWord(12, aucBuffer);
+00115                         if (aucBuffer[usOffset] != 0) {
+00116                                 NO_DBG_STRN(aucBuffer + usOffset, 8);
+00117                                 tDocument.tRevisedDate =
+00118                                 tConvertDosDate((char *)aucBuffer + usOffset);
+00119                         }
+00120                         usOffset = usGetWord(14, aucBuffer);
+00121                         if (aucBuffer[usOffset] != 0) {
+00122                                 NO_DBG_STRN(aucBuffer + usOffset, 8);
+00123                                 tDocument.tCreateDate =
+00124                                 tConvertDosDate((char *)aucBuffer + usOffset);
+00125                         }
+00126                 }
+00127                 aucBuffer = xfree(aucBuffer);
+00128         }
+00129         vCreateDocumentInfoList(&tDocument);
+00130 } /* end of vGet0DopInfo */
+00131 
+00132 /*
+00133  * Fill the section information block with information
+00134  * from a Word for DOS file.
+00135  */
+00136 static void
+00137 vGet0SectionInfo(const UCHAR *aucGrpprl, size_t tBytes,
+00138                 section_block_type *pSection)
+00139 {
+00140         USHORT  usCcol;
+00141         UCHAR   ucTmp;
+00142 
+00143         fail(aucGrpprl == NULL || pSection == NULL);
+00144 
+00145         if (tBytes < 2) {
+00146                 return;
+00147         }
+00148         /* bkc */
+00149         ucTmp = ucGetByte(1, aucGrpprl);
+00150         DBG_HEX(ucTmp);
+00151         ucTmp &= 0x07;
+00152         DBG_HEX(ucTmp);
+00153         pSection->bNewPage = ucTmp != 0 && ucTmp != 1;
+00154         if (tBytes < 18) {
+00155                 return;
+00156         }
+00157         /* ccolM1 */
+00158         usCcol = (USHORT)ucGetByte(17, aucGrpprl);
+00159         DBG_DEC(usCcol);
+00160 } /* end of vGet0SectionInfo */
+00161 
+00162 /*
+00163  * Build the lists with Section Property Information for Word for DOS files
+00164  */
+00165 void
+00166 vGet0SepInfo(FILE *pFile, const UCHAR *aucHeader)
+00167 {
+00168         section_block_type      tSection;
+00169         UCHAR   *aucBuffer;
+00170         ULONG   ulBeginOfText, ulTextOffset, ulBeginSectInfo;
+00171         ULONG   ulCharPos, ulSectPage, ulBeginNextBlock;
+00172         size_t  tSectInfoLen, tIndex, tSections, tBytes;
+00173         UCHAR   aucTmp[2], aucFpage[35];
+00174 
+00175         fail(pFile == NULL || aucHeader == NULL);
+00176 
+00177         ulBeginOfText = 128;
+00178         NO_DBG_HEX(ulBeginOfText);
+00179         ulBeginSectInfo = 128 * (ULONG)usGetWord(0x18, aucHeader);
+00180         DBG_HEX(ulBeginSectInfo);
+00181         ulBeginNextBlock = 128 * (ULONG)usGetWord(0x1a, aucHeader);
+00182         DBG_HEX(ulBeginNextBlock);
+00183         if (ulBeginSectInfo == ulBeginNextBlock) {
+00184                 /* There is no section information block */
+00185                 return;
+00186         }
+00187 
+00188         /* Get the the number of sections */
+00189         if (!bReadBytes(aucTmp, 2, ulBeginSectInfo, pFile)) {
+00190                 return;
+00191         }
+00192         tSections = (size_t)usGetWord(0, aucTmp);
+00193         NO_DBG_DEC(tSections);
+00194 
+00195         /* Read the Section Descriptors */
+00196         tSectInfoLen = 10 * tSections;
+00197         NO_DBG_DEC(tSectInfoLen);
+00198         aucBuffer = xmalloc(tSectInfoLen);
+00199         if (!bReadBytes(aucBuffer, tSectInfoLen, ulBeginSectInfo + 4, pFile)) {
+00200                 aucBuffer = xfree(aucBuffer);
+00201                 return;
+00202         }
+00203         NO_DBG_PRINT_BLOCK(aucBuffer, tSectInfoLen);
+00204 
+00205         /* Read the Section Properties */
+00206         for (tIndex = 0; tIndex < tSections; tIndex++) {
+00207                 ulTextOffset = ulGetLong(10 * tIndex, aucBuffer);
+00208                 NO_DBG_HEX(ulTextOffset);
+00209                 ulCharPos = ulBeginOfText + ulTextOffset;
+00210                 NO_DBG_HEX(ulTextOffset);
+00211                 ulSectPage = ulGetLong(10 * tIndex + 6, aucBuffer);
+00212                 NO_DBG_HEX(ulSectPage);
+00213                 if (ulSectPage == FC_INVALID ||         /* Must use defaults */
+00214                     ulSectPage < 128 ||                 /* Should not happen */
+00215                     ulSectPage >= ulBeginSectInfo) {    /* Should not happen */
+00216                         DBG_HEX_C(ulSectPage != FC_INVALID, ulSectPage);
+00217                         vDefault2SectionInfoList(ulCharPos);
+00218                         continue;
+00219                 }
+00220                 /* Get the number of bytes to read */
+00221                 if (!bReadBytes(aucTmp, 1, ulSectPage, pFile)) {
+00222                         continue;
+00223                 }
+00224                 tBytes = 1 + (size_t)ucGetByte(0, aucTmp);
+00225                 NO_DBG_DEC(tBytes);
+00226                 if (tBytes > sizeof(aucFpage)) {
+00227                         DBG_DEC(tBytes);
+00228                         tBytes = sizeof(aucFpage);
+00229                 }
+00230                 /* Read the bytes */
+00231                 if (!bReadBytes(aucFpage, tBytes, ulSectPage, pFile)) {
+00232                         continue;
+00233                 }
+00234                 NO_DBG_PRINT_BLOCK(aucFpage, tBytes);
+00235                 /* Process the bytes */
+00236                 vGetDefaultSection(&tSection);
+00237                 vGet0SectionInfo(aucFpage + 1, tBytes - 1, &tSection);
+00238                 vAdd2SectionInfoList(&tSection, ulCharPos);
+00239         }
+00240         /* Clean up before you leave */
+00241         aucBuffer = xfree(aucBuffer);
+00242 } /* end of vGet0SepInfo */
+00243 
+00244 /*
+00245  * Fill the style information block with information
+00246  * from a Word for DOS file.
+00247  */
+00248 static void
+00249 vGet0StyleInfo(int iFodo, const UCHAR *aucGrpprl, style_block_type *pStyle)
+00250 {
+00251         int     iBytes;
+00252         UCHAR   ucTmp;
+00253 
+00254         fail(iFodo <= 0 || aucGrpprl == NULL || pStyle == NULL);
+00255 
+00256         pStyle->usIstdNext = ISTD_NORMAL;
+00257 
+00258         iBytes = (int)ucGetByte(iFodo, aucGrpprl);
+00259         if (iBytes < 1) {
+00260                 return;
+00261         }
+00262         /* stc if styled */
+00263         ucTmp = ucGetByte(iFodo + 1, aucGrpprl);
+00264         if ((ucTmp & BIT(0)) != 0) {
+00265                 ucTmp >>= 1;
+00266                 if (ucTmp >= 88 && ucTmp <= 94) {
+00267                         /* Header levels 1 through 7 */
+00268                         pStyle->usIstd = ucTmp - 87;
+00269                         pStyle->ucNumLevel = 1;
+00270                 }
+00271         }
+00272         if (iBytes < 2) {
+00273                 return;
+00274         }
+00275         /* jc */
+00276         ucTmp = ucGetByte(iFodo + 2, aucGrpprl);
+00277         pStyle->ucAlignment = ucTmp & 0x02;
+00278         if (iBytes < 3) {
+00279                 return;
+00280         }
+00281         /* stc */
+00282         ucTmp = ucGetByte(iFodo + 3, aucGrpprl);
+00283         ucTmp &= 0x7f;
+00284         if (ucTmp >= 88 && ucTmp <= 94) {
+00285                 /* Header levels 1 through 7 */
+00286                 pStyle->usIstd = ucTmp - 87;
+00287                 pStyle->ucNumLevel = 1;
+00288         }
+00289         if (iBytes < 6) {
+00290                 return;
+00291         }
+00292         /* dxaRight */
+00293         pStyle->sRightIndent = (short)usGetWord(iFodo + 5, aucGrpprl);
+00294         NO_DBG_DEC(pStyle->sRightIndent);
+00295         if (iBytes < 8) {
+00296                 return;
+00297         }
+00298         /* dxaLeft */
+00299         pStyle->sLeftIndent = (short)usGetWord(iFodo + 7, aucGrpprl);
+00300         NO_DBG_DEC(pStyle->sLeftIndent);
+00301         if (iBytes < 10) {
+00302                 return;
+00303         }
+00304         /* dxaLeft1 */
+00305         pStyle->sLeftIndent1 = (short)usGetWord(iFodo + 9, aucGrpprl);
+00306         NO_DBG_DEC(pStyle->sLeftIndent1);
+00307         if (iBytes < 14) {
+00308                 return;
+00309         }
+00310         /* dyaBefore */
+00311         pStyle->usBeforeIndent = usGetWord(iFodo + 13, aucGrpprl);
+00312         NO_DBG_DEC(pStyle->usBeforeIndent);
+00313         if (iBytes < 16) {
+00314                 return;
+00315         }
+00316         /* dyaAfter */
+00317         pStyle->usAfterIndent = usGetWord(iFodo + 15, aucGrpprl);
+00318         NO_DBG_DEC(pStyle->usAfterIndent);
+00319 } /* end of vGet0StyleInfo */
+00320 
+00321 /*
+00322  * Build the lists with Paragraph Information for Word for DOS files
+00323  */
+00324 void
+00325 vGet0PapInfo(FILE *pFile, const UCHAR *aucHeader)
+00326 {
+00327         style_block_type        tStyle;
+00328         ULONG   ulBeginParfInfo, ulCharPos, ulCharPosNext;
+00329         int     iIndex, iRun, iFodo;
+00330         UCHAR   aucFpage[128];
+00331 
+00332         fail(pFile == NULL || aucHeader == NULL);
+00333 
+00334         ulBeginParfInfo = 128 * (ULONG)usGetWord(0x12, aucHeader);
+00335         NO_DBG_HEX(ulBeginParfInfo);
+00336 
+00337         do {
+00338                 if (!bReadBytes(aucFpage, 128, ulBeginParfInfo, pFile)) {
+00339                         return;
+00340                 }
+00341                 NO_DBG_PRINT_BLOCK(aucFpage, 128);
+00342                 ulCharPosNext = ulGetLong(0, aucFpage);
+00343                 iRun = (int)ucGetByte(0x7f, aucFpage);
+00344                 NO_DBG_DEC(iRun);
+00345                 for (iIndex = 0; iIndex < iRun; iIndex++) {
+00346                         iFodo = (int)usGetWord(6 * iIndex + 8, aucFpage);
+00347                         if (iFodo <= 0 || iFodo > 0x79) {
+00348                                 DBG_DEC_C(iFodo != (int)0xffff, iFodo);
+00349                                 continue;
+00350                         }
+00351                         vFillStyleFromStylesheet(0, &tStyle);
+00352                         vGet0StyleInfo(iFodo, aucFpage + 4, &tStyle);
+00353                         ulCharPos = ulCharPosNext;
+00354                         ulCharPosNext = ulGetLong(6 * iIndex + 4, aucFpage);
+00355                         tStyle.ulFileOffset = ulCharPos;
+00356                         vAdd2StyleInfoList(&tStyle);
+00357                 }
+00358                 ulBeginParfInfo += 128;
+00359         } while (ulCharPosNext == ulBeginParfInfo);
+00360 } /* end of vGet0PapInfo */
+00361 
+00362 /*
+00363  * Fill the font information block with information
+00364  * from a Word for DOS file.
+00365  */
+00366 static void
+00367 vGet0FontInfo(int iFodo, const UCHAR *aucGrpprl, font_block_type *pFont)
+00368 {
+00369         int     iBytes;
+00370         UCHAR   ucTmp;
+00371 
+00372         fail(iFodo <= 0 || aucGrpprl == NULL || pFont == NULL);
+00373 
+00374         iBytes = (int)ucGetByte(iFodo, aucGrpprl);
+00375         if (iBytes < 2) {
+00376                 return;
+00377         }
+00378         /* fBold, fItalic, cFtc */
+00379         ucTmp = ucGetByte(iFodo + 2, aucGrpprl);
+00380         if ((ucTmp & BIT(0)) != 0) {
+00381                 pFont->usFontStyle |= FONT_BOLD;
+00382         }
+00383         if ((ucTmp & BIT(1)) != 0) {
+00384                 pFont->usFontStyle |= FONT_ITALIC;
+00385         }
+00386         pFont->ucFontNumber = ucTmp >> 2;
+00387         NO_DBG_DEC(pFont->ucFontNumber);
+00388         if (iBytes < 3) {
+00389                 return;
+00390         }
+00391         /* cHps */
+00392         pFont->usFontSize = (USHORT)ucGetByte(iFodo + 3, aucGrpprl);
+00393         NO_DBG_DEC(pFont->usFontSize);
+00394         if (iBytes < 4) {
+00395                 return;
+00396         }
+00397         /* cKul, fStrike, fCaps, fSmallCaps, fVanish */
+00398         ucTmp = ucGetByte(iFodo + 4, aucGrpprl);
+00399         if ((ucTmp & BIT(0)) != 0 || (ucTmp & BIT(2)) != 0) {
+00400                 pFont->usFontStyle |= FONT_UNDERLINE;
+00401         }
+00402         if ((ucTmp & BIT(1)) != 0) {
+00403                 pFont->usFontStyle |= FONT_STRIKE;
+00404         }
+00405         if ((ucTmp & BIT(4)) != 0) {
+00406                 pFont->usFontStyle |= FONT_CAPITALS;
+00407         }
+00408         if ((ucTmp & BIT(5)) != 0) {
+00409                 pFont->usFontStyle |= FONT_SMALL_CAPITALS;
+00410         }
+00411         if ((ucTmp & BIT(7)) != 0) {
+00412                 pFont->usFontStyle |= FONT_HIDDEN;
+00413         }
+00414         DBG_HEX(pFont->usFontStyle);
+00415         if (iBytes < 6) {
+00416                 return;
+00417         }
+00418         /* cIss */
+00419         ucTmp = ucGetByte(iFodo + 6, aucGrpprl);
+00420         if (ucTmp != 0) {
+00421                 if (ucTmp < 128) {
+00422                         pFont->usFontStyle |= FONT_SUPERSCRIPT;
+00423                         DBG_MSG("Superscript");
+00424                 } else {
+00425                         pFont->usFontStyle |= FONT_SUBSCRIPT;
+00426                         DBG_MSG("Subscript");
+00427                 }
+00428         }
+00429         if (iBytes < 7) {
+00430                 return;
+00431         }
+00432         /* cIco */
+00433         ucTmp = ucGetByte(iFodo + 7, aucGrpprl);
+00434         switch (ucTmp & 0x07) {
+00435         case 0: pFont->ucFontColor = FONT_COLOR_BLACK; break;
+00436         case 1: pFont->ucFontColor = FONT_COLOR_RED; break;
+00437         case 2: pFont->ucFontColor = FONT_COLOR_GREEN; break;
+00438         case 3: pFont->ucFontColor = FONT_COLOR_BLUE; break;
+00439         case 4: pFont->ucFontColor = FONT_COLOR_CYAN; break;
+00440         case 5: pFont->ucFontColor = FONT_COLOR_MAGENTA; break;
+00441         case 6: pFont->ucFontColor = FONT_COLOR_YELLOW; break;
+00442         case 7: pFont->ucFontColor = FONT_COLOR_WHITE; break;
+00443         default:pFont->ucFontColor = FONT_COLOR_BLACK; break;
+00444         }
+00445         NO_DBG_DEC(pFont->ucFontColor);
+00446 } /* end of vGet0FontInfo */
+00447 
+00448 /*
+00449  * Build the lists with Character Information for Word for DOS files
+00450  */
+00451 void
+00452 vGet0ChrInfo(FILE *pFile, const UCHAR *aucHeader)
+00453 {
+00454         font_block_type         tFont;
+00455         ULONG   ulBeginCharInfo, ulCharPos, ulCharPosNext;
+00456         int     iIndex, iRun, iFodo;
+00457         UCHAR   aucFpage[128];
+00458 
+00459         fail(pFile == NULL || aucHeader == NULL);
+00460 
+00461         ulBeginCharInfo = ulGetLong(0x0e, aucHeader);
+00462         NO_DBG_HEX(ulBeginCharInfo);
+00463         ulBeginCharInfo = ROUND128(ulBeginCharInfo);
+00464         NO_DBG_HEX(ulBeginCharInfo);
+00465 
+00466         do {
+00467                 if (!bReadBytes(aucFpage, 128, ulBeginCharInfo, pFile)) {
+00468                         return;
+00469                 }
+00470                 NO_DBG_PRINT_BLOCK(aucFpage, 128);
+00471                 ulCharPosNext = ulGetLong(0, aucFpage);
+00472                 iRun = (int)ucGetByte(0x7f, aucFpage);
+00473                 NO_DBG_DEC(iRun);
+00474                 for (iIndex = 0; iIndex < iRun; iIndex++) {
+00475                         iFodo = (int)usGetWord(6 * iIndex + 8, aucFpage);
+00476                         if (iFodo <= 0 || iFodo > 0x79) {
+00477                                 DBG_DEC_C(iFodo != (int)0xffff, iFodo);
+00478                                 continue;
+00479                         }
+00480                         vFillFontFromStylesheet(0, &tFont);
+00481                         vGet0FontInfo(iFodo, aucFpage + 4, &tFont);
+00482                         ulCharPos = ulCharPosNext;
+00483                         ulCharPosNext = ulGetLong(6 * iIndex + 4, aucFpage);
+00484                         tFont.ulFileOffset = ulCharPos;
+00485                         vAdd2FontInfoList(&tFont);
+00486                 }
+00487                 ulBeginCharInfo += 128;
+00488         } while (ulCharPosNext == ulBeginCharInfo);
+00489 } /* end of vGet0ChrInfo */
+