symbian-qemu-0.9.1-12/zlib-1.2.3/examples/gzappend.c
changeset 1 2fb8b9db1c86
equal deleted inserted replaced
0:ffa851df0825 1:2fb8b9db1c86
       
     1 /* gzappend -- command to append to a gzip file
       
     2 
       
     3   Copyright (C) 2003 Mark Adler, all rights reserved
       
     4   version 1.1, 4 Nov 2003
       
     5 
       
     6   This software is provided 'as-is', without any express or implied
       
     7   warranty.  In no event will the author be held liable for any damages
       
     8   arising from the use of this software.
       
     9 
       
    10   Permission is granted to anyone to use this software for any purpose,
       
    11   including commercial applications, and to alter it and redistribute it
       
    12   freely, subject to the following restrictions:
       
    13 
       
    14   1. The origin of this software must not be misrepresented; you must not
       
    15      claim that you wrote the original software. If you use this software
       
    16      in a product, an acknowledgment in the product documentation would be
       
    17      appreciated but is not required.
       
    18   2. Altered source versions must be plainly marked as such, and must not be
       
    19      misrepresented as being the original software.
       
    20   3. This notice may not be removed or altered from any source distribution.
       
    21 
       
    22   Mark Adler    madler@alumni.caltech.edu
       
    23  */
       
    24 
       
    25 /*
       
    26  * Change history:
       
    27  *
       
    28  * 1.0  19 Oct 2003     - First version
       
    29  * 1.1   4 Nov 2003     - Expand and clarify some comments and notes
       
    30  *                      - Add version and copyright to help
       
    31  *                      - Send help to stdout instead of stderr
       
    32  *                      - Add some preemptive typecasts
       
    33  *                      - Add L to constants in lseek() calls
       
    34  *                      - Remove some debugging information in error messages
       
    35  *                      - Use new data_type definition for zlib 1.2.1
       
    36  *                      - Simplfy and unify file operations
       
    37  *                      - Finish off gzip file in gztack()
       
    38  *                      - Use deflatePrime() instead of adding empty blocks
       
    39  *                      - Keep gzip file clean on appended file read errors
       
    40  *                      - Use in-place rotate instead of auxiliary buffer
       
    41  *                        (Why you ask?  Because it was fun to write!)
       
    42  */
       
    43 
       
    44 /*
       
    45    gzappend takes a gzip file and appends to it, compressing files from the
       
    46    command line or data from stdin.  The gzip file is written to directly, to
       
    47    avoid copying that file, in case it's large.  Note that this results in the
       
    48    unfriendly behavior that if gzappend fails, the gzip file is corrupted.
       
    49 
       
    50    This program was written to illustrate the use of the new Z_BLOCK option of
       
    51    zlib 1.2.x's inflate() function.  This option returns from inflate() at each
       
    52    block boundary to facilitate locating and modifying the last block bit at
       
    53    the start of the final deflate block.  Also whether using Z_BLOCK or not,
       
    54    another required feature of zlib 1.2.x is that inflate() now provides the
       
    55    number of unusued bits in the last input byte used.  gzappend will not work
       
    56    with versions of zlib earlier than 1.2.1.
       
    57 
       
    58    gzappend first decompresses the gzip file internally, discarding all but
       
    59    the last 32K of uncompressed data, and noting the location of the last block
       
    60    bit and the number of unused bits in the last byte of the compressed data.
       
    61    The gzip trailer containing the CRC-32 and length of the uncompressed data
       
    62    is verified.  This trailer will be later overwritten.
       
    63 
       
    64    Then the last block bit is cleared by seeking back in the file and rewriting
       
    65    the byte that contains it.  Seeking forward, the last byte of the compressed
       
    66    data is saved along with the number of unused bits to initialize deflate.
       
    67 
       
    68    A deflate process is initialized, using the last 32K of the uncompressed
       
    69    data from the gzip file to initialize the dictionary.  If the total
       
    70    uncompressed data was less than 32K, then all of it is used to initialize
       
    71    the dictionary.  The deflate output bit buffer is also initialized with the
       
    72    last bits from the original deflate stream.  From here on, the data to
       
    73    append is simply compressed using deflate, and written to the gzip file.
       
    74    When that is complete, the new CRC-32 and uncompressed length are written
       
    75    as the trailer of the gzip file.
       
    76  */
       
    77 
       
    78 #include <stdio.h>
       
    79 #include <stdlib.h>
       
    80 #include <string.h>
       
    81 #include <fcntl.h>
       
    82 #include <unistd.h>
       
    83 #include "zlib.h"
       
    84 
       
    85 #define local static
       
    86 #define LGCHUNK 14
       
    87 #define CHUNK (1U << LGCHUNK)
       
    88 #define DSIZE 32768U
       
    89 
       
    90 /* print an error message and terminate with extreme prejudice */
       
    91 local void bye(char *msg1, char *msg2)
       
    92 {
       
    93     fprintf(stderr, "gzappend error: %s%s\n", msg1, msg2);
       
    94     exit(1);
       
    95 }
       
    96 
       
    97 /* return the greatest common divisor of a and b using Euclid's algorithm,
       
    98    modified to be fast when one argument much greater than the other, and
       
    99    coded to avoid unnecessary swapping */
       
   100 local unsigned gcd(unsigned a, unsigned b)
       
   101 {
       
   102     unsigned c;
       
   103 
       
   104     while (a && b)
       
   105         if (a > b) {
       
   106             c = b;
       
   107             while (a - c >= c)
       
   108                 c <<= 1;
       
   109             a -= c;
       
   110         }
       
   111         else {
       
   112             c = a;
       
   113             while (b - c >= c)
       
   114                 c <<= 1;
       
   115             b -= c;
       
   116         }
       
   117     return a + b;
       
   118 }
       
   119 
       
   120 /* rotate list[0..len-1] left by rot positions, in place */
       
   121 local void rotate(unsigned char *list, unsigned len, unsigned rot)
       
   122 {
       
   123     unsigned char tmp;
       
   124     unsigned cycles;
       
   125     unsigned char *start, *last, *to, *from;
       
   126 
       
   127     /* normalize rot and handle degenerate cases */
       
   128     if (len < 2) return;
       
   129     if (rot >= len) rot %= len;
       
   130     if (rot == 0) return;
       
   131 
       
   132     /* pointer to last entry in list */
       
   133     last = list + (len - 1);
       
   134 
       
   135     /* do simple left shift by one */
       
   136     if (rot == 1) {
       
   137         tmp = *list;
       
   138         memcpy(list, list + 1, len - 1);
       
   139         *last = tmp;
       
   140         return;
       
   141     }
       
   142 
       
   143     /* do simple right shift by one */
       
   144     if (rot == len - 1) {
       
   145         tmp = *last;
       
   146         memmove(list + 1, list, len - 1);
       
   147         *list = tmp;
       
   148         return;
       
   149     }
       
   150 
       
   151     /* otherwise do rotate as a set of cycles in place */
       
   152     cycles = gcd(len, rot);             /* number of cycles */
       
   153     do {
       
   154         start = from = list + cycles;   /* start index is arbitrary */
       
   155         tmp = *from;                    /* save entry to be overwritten */
       
   156         for (;;) {
       
   157             to = from;                  /* next step in cycle */
       
   158             from += rot;                /* go right rot positions */
       
   159             if (from > last) from -= len;   /* (pointer better not wrap) */
       
   160             if (from == start) break;   /* all but one shifted */
       
   161             *to = *from;                /* shift left */
       
   162         }
       
   163         *to = tmp;                      /* complete the circle */
       
   164     } while (--cycles);
       
   165 }
       
   166 
       
   167 /* structure for gzip file read operations */
       
   168 typedef struct {
       
   169     int fd;                     /* file descriptor */
       
   170     int size;                   /* 1 << size is bytes in buf */
       
   171     unsigned left;              /* bytes available at next */
       
   172     unsigned char *buf;         /* buffer */
       
   173     unsigned char *next;        /* next byte in buffer */
       
   174     char *name;                 /* file name for error messages */
       
   175 } file;
       
   176 
       
   177 /* reload buffer */
       
   178 local int readin(file *in)
       
   179 {
       
   180     int len;
       
   181 
       
   182     len = read(in->fd, in->buf, 1 << in->size);
       
   183     if (len == -1) bye("error reading ", in->name);
       
   184     in->left = (unsigned)len;
       
   185     in->next = in->buf;
       
   186     return len;
       
   187 }
       
   188 
       
   189 /* read from file in, exit if end-of-file */
       
   190 local int readmore(file *in)
       
   191 {
       
   192     if (readin(in) == 0) bye("unexpected end of ", in->name);
       
   193     return 0;
       
   194 }
       
   195 
       
   196 #define read1(in) (in->left == 0 ? readmore(in) : 0, \
       
   197                    in->left--, *(in->next)++)
       
   198 
       
   199 /* skip over n bytes of in */
       
   200 local void skip(file *in, unsigned n)
       
   201 {
       
   202     unsigned bypass;
       
   203 
       
   204     if (n > in->left) {
       
   205         n -= in->left;
       
   206         bypass = n & ~((1U << in->size) - 1);
       
   207         if (bypass) {
       
   208             if (lseek(in->fd, (off_t)bypass, SEEK_CUR) == -1)
       
   209                 bye("seeking ", in->name);
       
   210             n -= bypass;
       
   211         }
       
   212         readmore(in);
       
   213         if (n > in->left)
       
   214             bye("unexpected end of ", in->name);
       
   215     }
       
   216     in->left -= n;
       
   217     in->next += n;
       
   218 }
       
   219 
       
   220 /* read a four-byte unsigned integer, little-endian, from in */
       
   221 unsigned long read4(file *in)
       
   222 {
       
   223     unsigned long val;
       
   224 
       
   225     val = read1(in);
       
   226     val += (unsigned)read1(in) << 8;
       
   227     val += (unsigned long)read1(in) << 16;
       
   228     val += (unsigned long)read1(in) << 24;
       
   229     return val;
       
   230 }
       
   231 
       
   232 /* skip over gzip header */
       
   233 local void gzheader(file *in)
       
   234 {
       
   235     int flags;
       
   236     unsigned n;
       
   237 
       
   238     if (read1(in) != 31 || read1(in) != 139) bye(in->name, " not a gzip file");
       
   239     if (read1(in) != 8) bye("unknown compression method in", in->name);
       
   240     flags = read1(in);
       
   241     if (flags & 0xe0) bye("unknown header flags set in", in->name);
       
   242     skip(in, 6);
       
   243     if (flags & 4) {
       
   244         n = read1(in);
       
   245         n += (unsigned)(read1(in)) << 8;
       
   246         skip(in, n);
       
   247     }
       
   248     if (flags & 8) while (read1(in) != 0) ;
       
   249     if (flags & 16) while (read1(in) != 0) ;
       
   250     if (flags & 2) skip(in, 2);
       
   251 }
       
   252 
       
   253 /* decompress gzip file "name", return strm with a deflate stream ready to
       
   254    continue compression of the data in the gzip file, and return a file
       
   255    descriptor pointing to where to write the compressed data -- the deflate
       
   256    stream is initialized to compress using level "level" */
       
   257 local int gzscan(char *name, z_stream *strm, int level)
       
   258 {
       
   259     int ret, lastbit, left, full;
       
   260     unsigned have;
       
   261     unsigned long crc, tot;
       
   262     unsigned char *window;
       
   263     off_t lastoff, end;
       
   264     file gz;
       
   265 
       
   266     /* open gzip file */
       
   267     gz.name = name;
       
   268     gz.fd = open(name, O_RDWR, 0);
       
   269     if (gz.fd == -1) bye("cannot open ", name);
       
   270     gz.buf = malloc(CHUNK);
       
   271     if (gz.buf == NULL) bye("out of memory", "");
       
   272     gz.size = LGCHUNK;
       
   273     gz.left = 0;
       
   274 
       
   275     /* skip gzip header */
       
   276     gzheader(&gz);
       
   277 
       
   278     /* prepare to decompress */
       
   279     window = malloc(DSIZE);
       
   280     if (window == NULL) bye("out of memory", "");
       
   281     strm->zalloc = Z_NULL;
       
   282     strm->zfree = Z_NULL;
       
   283     strm->opaque = Z_NULL;
       
   284     ret = inflateInit2(strm, -15);
       
   285     if (ret != Z_OK) bye("out of memory", " or library mismatch");
       
   286 
       
   287     /* decompress the deflate stream, saving append information */
       
   288     lastbit = 0;
       
   289     lastoff = lseek(gz.fd, 0L, SEEK_CUR) - gz.left;
       
   290     left = 0;
       
   291     strm->avail_in = gz.left;
       
   292     strm->next_in = gz.next;
       
   293     crc = crc32(0L, Z_NULL, 0);
       
   294     have = full = 0;
       
   295     do {
       
   296         /* if needed, get more input */
       
   297         if (strm->avail_in == 0) {
       
   298             readmore(&gz);
       
   299             strm->avail_in = gz.left;
       
   300             strm->next_in = gz.next;
       
   301         }
       
   302 
       
   303         /* set up output to next available section of sliding window */
       
   304         strm->avail_out = DSIZE - have;
       
   305         strm->next_out = window + have;
       
   306 
       
   307         /* inflate and check for errors */
       
   308         ret = inflate(strm, Z_BLOCK);
       
   309         if (ret == Z_STREAM_ERROR) bye("internal stream error!", "");
       
   310         if (ret == Z_MEM_ERROR) bye("out of memory", "");
       
   311         if (ret == Z_DATA_ERROR)
       
   312             bye("invalid compressed data--format violated in", name);
       
   313 
       
   314         /* update crc and sliding window pointer */
       
   315         crc = crc32(crc, window + have, DSIZE - have - strm->avail_out);
       
   316         if (strm->avail_out)
       
   317             have = DSIZE - strm->avail_out;
       
   318         else {
       
   319             have = 0;
       
   320             full = 1;
       
   321         }
       
   322 
       
   323         /* process end of block */
       
   324         if (strm->data_type & 128) {
       
   325             if (strm->data_type & 64)
       
   326                 left = strm->data_type & 0x1f;
       
   327             else {
       
   328                 lastbit = strm->data_type & 0x1f;
       
   329                 lastoff = lseek(gz.fd, 0L, SEEK_CUR) - strm->avail_in;
       
   330             }
       
   331         }
       
   332     } while (ret != Z_STREAM_END);
       
   333     inflateEnd(strm);
       
   334     gz.left = strm->avail_in;
       
   335     gz.next = strm->next_in;
       
   336 
       
   337     /* save the location of the end of the compressed data */
       
   338     end = lseek(gz.fd, 0L, SEEK_CUR) - gz.left;
       
   339 
       
   340     /* check gzip trailer and save total for deflate */
       
   341     if (crc != read4(&gz))
       
   342         bye("invalid compressed data--crc mismatch in ", name);
       
   343     tot = strm->total_out;
       
   344     if ((tot & 0xffffffffUL) != read4(&gz))
       
   345         bye("invalid compressed data--length mismatch in", name);
       
   346 
       
   347     /* if not at end of file, warn */
       
   348     if (gz.left || readin(&gz))
       
   349         fprintf(stderr,
       
   350             "gzappend warning: junk at end of gzip file overwritten\n");
       
   351 
       
   352     /* clear last block bit */
       
   353     lseek(gz.fd, lastoff - (lastbit != 0), SEEK_SET);
       
   354     if (read(gz.fd, gz.buf, 1) != 1) bye("reading after seek on ", name);
       
   355     *gz.buf = (unsigned char)(*gz.buf ^ (1 << ((8 - lastbit) & 7)));
       
   356     lseek(gz.fd, -1L, SEEK_CUR);
       
   357     if (write(gz.fd, gz.buf, 1) != 1) bye("writing after seek to ", name);
       
   358 
       
   359     /* if window wrapped, build dictionary from window by rotating */
       
   360     if (full) {
       
   361         rotate(window, DSIZE, have);
       
   362         have = DSIZE;
       
   363     }
       
   364 
       
   365     /* set up deflate stream with window, crc, total_in, and leftover bits */
       
   366     ret = deflateInit2(strm, level, Z_DEFLATED, -15, 8, Z_DEFAULT_STRATEGY);
       
   367     if (ret != Z_OK) bye("out of memory", "");
       
   368     deflateSetDictionary(strm, window, have);
       
   369     strm->adler = crc;
       
   370     strm->total_in = tot;
       
   371     if (left) {
       
   372         lseek(gz.fd, --end, SEEK_SET);
       
   373         if (read(gz.fd, gz.buf, 1) != 1) bye("reading after seek on ", name);
       
   374         deflatePrime(strm, 8 - left, *gz.buf);
       
   375     }
       
   376     lseek(gz.fd, end, SEEK_SET);
       
   377 
       
   378     /* clean up and return */
       
   379     free(window);
       
   380     free(gz.buf);
       
   381     return gz.fd;
       
   382 }
       
   383 
       
   384 /* append file "name" to gzip file gd using deflate stream strm -- if last
       
   385    is true, then finish off the deflate stream at the end */
       
   386 local void gztack(char *name, int gd, z_stream *strm, int last)
       
   387 {
       
   388     int fd, len, ret;
       
   389     unsigned left;
       
   390     unsigned char *in, *out;
       
   391 
       
   392     /* open file to compress and append */
       
   393     fd = 0;
       
   394     if (name != NULL) {
       
   395         fd = open(name, O_RDONLY, 0);
       
   396         if (fd == -1)
       
   397             fprintf(stderr, "gzappend warning: %s not found, skipping ...\n",
       
   398                     name);
       
   399     }
       
   400 
       
   401     /* allocate buffers */
       
   402     in = fd == -1 ? NULL : malloc(CHUNK);
       
   403     out = malloc(CHUNK);
       
   404     if (out == NULL) bye("out of memory", "");
       
   405 
       
   406     /* compress input file and append to gzip file */
       
   407     do {
       
   408         /* get more input */
       
   409         len = fd == -1 ? 0 : read(fd, in, CHUNK);
       
   410         if (len == -1) {
       
   411             fprintf(stderr,
       
   412                     "gzappend warning: error reading %s, skipping rest ...\n",
       
   413                     name);
       
   414             len = 0;
       
   415         }
       
   416         strm->avail_in = (unsigned)len;
       
   417         strm->next_in = in;
       
   418         if (len) strm->adler = crc32(strm->adler, in, (unsigned)len);
       
   419 
       
   420         /* compress and write all available output */
       
   421         do {
       
   422             strm->avail_out = CHUNK;
       
   423             strm->next_out = out;
       
   424             ret = deflate(strm, last && len == 0 ? Z_FINISH : Z_NO_FLUSH);
       
   425             left = CHUNK - strm->avail_out;
       
   426             while (left) {
       
   427                 len = write(gd, out + CHUNK - strm->avail_out - left, left);
       
   428                 if (len == -1) bye("writing gzip file", "");
       
   429                 left -= (unsigned)len;
       
   430             }
       
   431         } while (strm->avail_out == 0 && ret != Z_STREAM_END);
       
   432     } while (len != 0);
       
   433 
       
   434     /* write trailer after last entry */
       
   435     if (last) {
       
   436         deflateEnd(strm);
       
   437         out[0] = (unsigned char)(strm->adler);
       
   438         out[1] = (unsigned char)(strm->adler >> 8);
       
   439         out[2] = (unsigned char)(strm->adler >> 16);
       
   440         out[3] = (unsigned char)(strm->adler >> 24);
       
   441         out[4] = (unsigned char)(strm->total_in);
       
   442         out[5] = (unsigned char)(strm->total_in >> 8);
       
   443         out[6] = (unsigned char)(strm->total_in >> 16);
       
   444         out[7] = (unsigned char)(strm->total_in >> 24);
       
   445         len = 8;
       
   446         do {
       
   447             ret = write(gd, out + 8 - len, len);
       
   448             if (ret == -1) bye("writing gzip file", "");
       
   449             len -= ret;
       
   450         } while (len);
       
   451         close(gd);
       
   452     }
       
   453 
       
   454     /* clean up and return */
       
   455     free(out);
       
   456     if (in != NULL) free(in);
       
   457     if (fd > 0) close(fd);
       
   458 }
       
   459 
       
   460 /* process the compression level option if present, scan the gzip file, and
       
   461    append the specified files, or append the data from stdin if no other file
       
   462    names are provided on the command line -- the gzip file must be writable
       
   463    and seekable */
       
   464 int main(int argc, char **argv)
       
   465 {
       
   466     int gd, level;
       
   467     z_stream strm;
       
   468 
       
   469     /* ignore command name */
       
   470     argv++;
       
   471 
       
   472     /* provide usage if no arguments */
       
   473     if (*argv == NULL) {
       
   474         printf("gzappend 1.1 (4 Nov 2003) Copyright (C) 2003 Mark Adler\n");
       
   475         printf(
       
   476             "usage: gzappend [-level] file.gz [ addthis [ andthis ... ]]\n");
       
   477         return 0;
       
   478     }
       
   479 
       
   480     /* set compression level */
       
   481     level = Z_DEFAULT_COMPRESSION;
       
   482     if (argv[0][0] == '-') {
       
   483         if (argv[0][1] < '0' || argv[0][1] > '9' || argv[0][2] != 0)
       
   484             bye("invalid compression level", "");
       
   485         level = argv[0][1] - '0';
       
   486         if (*++argv == NULL) bye("no gzip file name after options", "");
       
   487     }
       
   488 
       
   489     /* prepare to append to gzip file */
       
   490     gd = gzscan(*argv++, &strm, level);
       
   491 
       
   492     /* append files on command line, or from stdin if none */
       
   493     if (*argv == NULL)
       
   494         gztack(NULL, gd, &strm, 1);
       
   495     else
       
   496         do {
       
   497             gztack(*argv, gd, &strm, argv[1] == NULL);
       
   498         } while (*++argv != NULL);
       
   499     return 0;
       
   500 }