|
1 /* |
|
2 * gzlog.c |
|
3 * Copyright (C) 2004 Mark Adler |
|
4 * For conditions of distribution and use, see copyright notice in gzlog.h |
|
5 * version 1.0, 26 Nov 2004 |
|
6 * |
|
7 */ |
|
8 |
|
9 #include <string.h> /* memcmp() */ |
|
10 #include <stdlib.h> /* malloc(), free(), NULL */ |
|
11 #include <sys/types.h> /* size_t, off_t */ |
|
12 #include <unistd.h> /* read(), close(), sleep(), ftruncate(), */ |
|
13 /* lseek() */ |
|
14 #include <fcntl.h> /* open() */ |
|
15 #include <sys/file.h> /* flock() */ |
|
16 #include "zlib.h" /* deflateInit2(), deflate(), deflateEnd() */ |
|
17 |
|
18 #include "gzlog.h" /* interface */ |
|
19 #define local static |
|
20 |
|
21 /* log object structure */ |
|
22 typedef struct { |
|
23 int id; /* object identifier */ |
|
24 int fd; /* log file descriptor */ |
|
25 off_t extra; /* offset of extra "ap" subfield */ |
|
26 off_t mark_off; /* offset of marked data */ |
|
27 off_t last_off; /* offset of last block */ |
|
28 unsigned long crc; /* uncompressed crc */ |
|
29 unsigned long len; /* uncompressed length (modulo 2^32) */ |
|
30 unsigned stored; /* length of current stored block */ |
|
31 } gz_log; |
|
32 |
|
33 #define GZLOGID 19334 /* gz_log object identifier */ |
|
34 |
|
35 #define LOCK_RETRY 1 /* retry lock once a second */ |
|
36 #define LOCK_PATIENCE 1200 /* try about twenty minutes before forcing */ |
|
37 |
|
38 /* acquire a lock on a file */ |
|
39 local int lock(int fd) |
|
40 { |
|
41 int patience; |
|
42 |
|
43 /* try to lock every LOCK_RETRY seconds for LOCK_PATIENCE seconds */ |
|
44 patience = LOCK_PATIENCE; |
|
45 do { |
|
46 if (flock(fd, LOCK_EX + LOCK_NB) == 0) |
|
47 return 0; |
|
48 (void)sleep(LOCK_RETRY); |
|
49 patience -= LOCK_RETRY; |
|
50 } while (patience > 0); |
|
51 |
|
52 /* we've run out of patience -- give up */ |
|
53 return -1; |
|
54 } |
|
55 |
|
56 /* release lock */ |
|
57 local void unlock(int fd) |
|
58 { |
|
59 (void)flock(fd, LOCK_UN); |
|
60 } |
|
61 |
|
62 /* release a log object */ |
|
63 local void log_clean(gz_log *log) |
|
64 { |
|
65 unlock(log->fd); |
|
66 (void)close(log->fd); |
|
67 free(log); |
|
68 } |
|
69 |
|
70 /* read an unsigned long from a byte buffer little-endian */ |
|
71 local unsigned long make_ulg(unsigned char *buf) |
|
72 { |
|
73 int n; |
|
74 unsigned long val; |
|
75 |
|
76 val = (unsigned long)(*buf++); |
|
77 for (n = 8; n < 32; n += 8) |
|
78 val += (unsigned long)(*buf++) << n; |
|
79 return val; |
|
80 } |
|
81 |
|
82 /* read an off_t from a byte buffer little-endian */ |
|
83 local off_t make_off(unsigned char *buf) |
|
84 { |
|
85 int n; |
|
86 off_t val; |
|
87 |
|
88 val = (off_t)(*buf++); |
|
89 for (n = 8; n < 64; n += 8) |
|
90 val += (off_t)(*buf++) << n; |
|
91 return val; |
|
92 } |
|
93 |
|
94 /* write an unsigned long little-endian to byte buffer */ |
|
95 local void dice_ulg(unsigned long val, unsigned char *buf) |
|
96 { |
|
97 int n; |
|
98 |
|
99 for (n = 0; n < 4; n++) { |
|
100 *buf++ = val & 0xff; |
|
101 val >>= 8; |
|
102 } |
|
103 } |
|
104 |
|
105 /* write an off_t little-endian to byte buffer */ |
|
106 local void dice_off(off_t val, unsigned char *buf) |
|
107 { |
|
108 int n; |
|
109 |
|
110 for (n = 0; n < 8; n++) { |
|
111 *buf++ = val & 0xff; |
|
112 val >>= 8; |
|
113 } |
|
114 } |
|
115 |
|
116 /* initial, empty gzip file for appending */ |
|
117 local char empty_gz[] = { |
|
118 0x1f, 0x8b, /* magic gzip id */ |
|
119 8, /* compression method is deflate */ |
|
120 4, /* there is an extra field */ |
|
121 0, 0, 0, 0, /* no modification time provided */ |
|
122 0, 0xff, /* no extra flags, no OS */ |
|
123 20, 0, 'a', 'p', 16, 0, /* extra field with "ap" subfield */ |
|
124 32, 0, 0, 0, 0, 0, 0, 0, /* offset of uncompressed data */ |
|
125 32, 0, 0, 0, 0, 0, 0, 0, /* offset of last block */ |
|
126 1, 0, 0, 0xff, 0xff, /* empty stored block (last) */ |
|
127 0, 0, 0, 0, /* crc */ |
|
128 0, 0, 0, 0 /* uncompressed length */ |
|
129 }; |
|
130 |
|
131 /* initialize a log object with locking */ |
|
132 void *gzlog_open(char *path) |
|
133 { |
|
134 unsigned xlen; |
|
135 unsigned char temp[20]; |
|
136 unsigned sub_len; |
|
137 int good; |
|
138 gz_log *log; |
|
139 |
|
140 /* allocate log structure */ |
|
141 log = malloc(sizeof(gz_log)); |
|
142 if (log == NULL) |
|
143 return NULL; |
|
144 log->id = GZLOGID; |
|
145 |
|
146 /* open file, creating it if necessary, and locking it */ |
|
147 log->fd = open(path, O_RDWR | O_CREAT, 0600); |
|
148 if (log->fd < 0) { |
|
149 free(log); |
|
150 return NULL; |
|
151 } |
|
152 if (lock(log->fd)) { |
|
153 close(log->fd); |
|
154 free(log); |
|
155 return NULL; |
|
156 } |
|
157 |
|
158 /* if file is empty, write new gzip stream */ |
|
159 if (lseek(log->fd, 0, SEEK_END) == 0) { |
|
160 if (write(log->fd, empty_gz, sizeof(empty_gz)) != sizeof(empty_gz)) { |
|
161 log_clean(log); |
|
162 return NULL; |
|
163 } |
|
164 } |
|
165 |
|
166 /* check gzip header */ |
|
167 (void)lseek(log->fd, 0, SEEK_SET); |
|
168 if (read(log->fd, temp, 12) != 12 || temp[0] != 0x1f || |
|
169 temp[1] != 0x8b || temp[2] != 8 || (temp[3] & 4) == 0) { |
|
170 log_clean(log); |
|
171 return NULL; |
|
172 } |
|
173 |
|
174 /* process extra field to find "ap" sub-field */ |
|
175 xlen = temp[10] + (temp[11] << 8); |
|
176 good = 0; |
|
177 while (xlen) { |
|
178 if (xlen < 4 || read(log->fd, temp, 4) != 4) |
|
179 break; |
|
180 sub_len = temp[2]; |
|
181 sub_len += temp[3] << 8; |
|
182 xlen -= 4; |
|
183 if (memcmp(temp, "ap", 2) == 0 && sub_len == 16) { |
|
184 good = 1; |
|
185 break; |
|
186 } |
|
187 if (xlen < sub_len) |
|
188 break; |
|
189 (void)lseek(log->fd, sub_len, SEEK_CUR); |
|
190 xlen -= sub_len; |
|
191 } |
|
192 if (!good) { |
|
193 log_clean(log); |
|
194 return NULL; |
|
195 } |
|
196 |
|
197 /* read in "ap" sub-field */ |
|
198 log->extra = lseek(log->fd, 0, SEEK_CUR); |
|
199 if (read(log->fd, temp, 16) != 16) { |
|
200 log_clean(log); |
|
201 return NULL; |
|
202 } |
|
203 log->mark_off = make_off(temp); |
|
204 log->last_off = make_off(temp + 8); |
|
205 |
|
206 /* get crc, length of gzip file */ |
|
207 (void)lseek(log->fd, log->last_off, SEEK_SET); |
|
208 if (read(log->fd, temp, 13) != 13 || |
|
209 memcmp(temp, "\001\000\000\377\377", 5) != 0) { |
|
210 log_clean(log); |
|
211 return NULL; |
|
212 } |
|
213 log->crc = make_ulg(temp + 5); |
|
214 log->len = make_ulg(temp + 9); |
|
215 |
|
216 /* set up to write over empty last block */ |
|
217 (void)lseek(log->fd, log->last_off + 5, SEEK_SET); |
|
218 log->stored = 0; |
|
219 return (void *)log; |
|
220 } |
|
221 |
|
222 /* maximum amount to put in a stored block before starting a new one */ |
|
223 #define MAX_BLOCK 16384 |
|
224 |
|
225 /* write a block to a log object */ |
|
226 int gzlog_write(void *obj, char *data, size_t len) |
|
227 { |
|
228 size_t some; |
|
229 unsigned char temp[5]; |
|
230 gz_log *log; |
|
231 |
|
232 /* check object */ |
|
233 log = (gz_log *)obj; |
|
234 if (log == NULL || log->id != GZLOGID) |
|
235 return 1; |
|
236 |
|
237 /* write stored blocks until all of the input is written */ |
|
238 do { |
|
239 some = MAX_BLOCK - log->stored; |
|
240 if (some > len) |
|
241 some = len; |
|
242 if (write(log->fd, data, some) != some) |
|
243 return 1; |
|
244 log->crc = crc32(log->crc, data, some); |
|
245 log->len += some; |
|
246 len -= some; |
|
247 data += some; |
|
248 log->stored += some; |
|
249 |
|
250 /* if the stored block is full, end it and start another */ |
|
251 if (log->stored == MAX_BLOCK) { |
|
252 (void)lseek(log->fd, log->last_off, SEEK_SET); |
|
253 temp[0] = 0; |
|
254 dice_ulg(log->stored + ((unsigned long)(~log->stored) << 16), |
|
255 temp + 1); |
|
256 if (write(log->fd, temp, 5) != 5) |
|
257 return 1; |
|
258 log->last_off = lseek(log->fd, log->stored, SEEK_CUR); |
|
259 (void)lseek(log->fd, 5, SEEK_CUR); |
|
260 log->stored = 0; |
|
261 } |
|
262 } while (len); |
|
263 return 0; |
|
264 } |
|
265 |
|
266 /* recompress the remaining stored deflate data in place */ |
|
267 local int recomp(gz_log *log) |
|
268 { |
|
269 z_stream strm; |
|
270 size_t len, max; |
|
271 unsigned char *in; |
|
272 unsigned char *out; |
|
273 unsigned char temp[16]; |
|
274 |
|
275 /* allocate space and read it all in (it's around 1 MB) */ |
|
276 len = log->last_off - log->mark_off; |
|
277 max = len + (len >> 12) + (len >> 14) + 11; |
|
278 out = malloc(max); |
|
279 if (out == NULL) |
|
280 return 1; |
|
281 in = malloc(len); |
|
282 if (in == NULL) { |
|
283 free(out); |
|
284 return 1; |
|
285 } |
|
286 (void)lseek(log->fd, log->mark_off, SEEK_SET); |
|
287 if (read(log->fd, in, len) != len) { |
|
288 free(in); |
|
289 free(out); |
|
290 return 1; |
|
291 } |
|
292 |
|
293 /* recompress in memory, decoding stored data as we go */ |
|
294 /* note: this assumes that unsigned is four bytes or more */ |
|
295 /* consider not making that assumption */ |
|
296 strm.zalloc = Z_NULL; |
|
297 strm.zfree = Z_NULL; |
|
298 strm.opaque = Z_NULL; |
|
299 if (deflateInit2(&strm, Z_BEST_COMPRESSION, Z_DEFLATED, -15, 8, |
|
300 Z_DEFAULT_STRATEGY) != Z_OK) { |
|
301 free(in); |
|
302 free(out); |
|
303 return 1; |
|
304 } |
|
305 strm.next_in = in; |
|
306 strm.avail_out = max; |
|
307 strm.next_out = out; |
|
308 while (len >= 5) { |
|
309 if (strm.next_in[0] != 0) |
|
310 break; |
|
311 strm.avail_in = strm.next_in[1] + (strm.next_in[2] << 8); |
|
312 strm.next_in += 5; |
|
313 len -= 5; |
|
314 if (strm.avail_in != 0) { |
|
315 if (len < strm.avail_in) |
|
316 break; |
|
317 len -= strm.avail_in; |
|
318 (void)deflate(&strm, Z_NO_FLUSH); |
|
319 if (strm.avail_in != 0 || strm.avail_out == 0) |
|
320 break; |
|
321 } |
|
322 } |
|
323 (void)deflate(&strm, Z_SYNC_FLUSH); |
|
324 (void)deflateEnd(&strm); |
|
325 free(in); |
|
326 if (len != 0 || strm.avail_out == 0) { |
|
327 free(out); |
|
328 return 1; |
|
329 } |
|
330 |
|
331 /* overwrite stored data with compressed data */ |
|
332 (void)lseek(log->fd, log->mark_off, SEEK_SET); |
|
333 len = max - strm.avail_out; |
|
334 if (write(log->fd, out, len) != len) { |
|
335 free(out); |
|
336 return 1; |
|
337 } |
|
338 free(out); |
|
339 |
|
340 /* write last empty block, crc, and length */ |
|
341 log->mark_off = log->last_off = lseek(log->fd, 0, SEEK_CUR); |
|
342 temp[0] = 1; |
|
343 dice_ulg(0xffffL << 16, temp + 1); |
|
344 dice_ulg(log->crc, temp + 5); |
|
345 dice_ulg(log->len, temp + 9); |
|
346 if (write(log->fd, temp, 13) != 13) |
|
347 return 1; |
|
348 |
|
349 /* truncate file to discard remaining stored data and old trailer */ |
|
350 ftruncate(log->fd, lseek(log->fd, 0, SEEK_CUR)); |
|
351 |
|
352 /* update extra field to point to new last empty block */ |
|
353 (void)lseek(log->fd, log->extra, SEEK_SET); |
|
354 dice_off(log->mark_off, temp); |
|
355 dice_off(log->last_off, temp + 8); |
|
356 if (write(log->fd, temp, 16) != 16) |
|
357 return 1; |
|
358 return 0; |
|
359 } |
|
360 |
|
361 /* maximum accumulation of stored blocks before compressing */ |
|
362 #define MAX_STORED 1048576 |
|
363 |
|
364 /* close log object */ |
|
365 int gzlog_close(void *obj) |
|
366 { |
|
367 unsigned char temp[8]; |
|
368 gz_log *log; |
|
369 |
|
370 /* check object */ |
|
371 log = (gz_log *)obj; |
|
372 if (log == NULL || log->id != GZLOGID) |
|
373 return 1; |
|
374 |
|
375 /* go to start of most recent block being written */ |
|
376 (void)lseek(log->fd, log->last_off, SEEK_SET); |
|
377 |
|
378 /* if some stuff was put there, update block */ |
|
379 if (log->stored) { |
|
380 temp[0] = 0; |
|
381 dice_ulg(log->stored + ((unsigned long)(~log->stored) << 16), |
|
382 temp + 1); |
|
383 if (write(log->fd, temp, 5) != 5) |
|
384 return 1; |
|
385 log->last_off = lseek(log->fd, log->stored, SEEK_CUR); |
|
386 } |
|
387 |
|
388 /* write last block (empty) */ |
|
389 if (write(log->fd, "\001\000\000\377\377", 5) != 5) |
|
390 return 1; |
|
391 |
|
392 /* write updated crc and uncompressed length */ |
|
393 dice_ulg(log->crc, temp); |
|
394 dice_ulg(log->len, temp + 4); |
|
395 if (write(log->fd, temp, 8) != 8) |
|
396 return 1; |
|
397 |
|
398 /* put offset of that last block in gzip extra block */ |
|
399 (void)lseek(log->fd, log->extra + 8, SEEK_SET); |
|
400 dice_off(log->last_off, temp); |
|
401 if (write(log->fd, temp, 8) != 8) |
|
402 return 1; |
|
403 |
|
404 /* if more than 1 MB stored, then time to compress it */ |
|
405 if (log->last_off - log->mark_off > MAX_STORED) { |
|
406 if (recomp(log)) |
|
407 return 1; |
|
408 } |
|
409 |
|
410 /* unlock and close file */ |
|
411 log_clean(log); |
|
412 return 0; |
|
413 } |