|
1 # Copyright (c) 1995-98 Gurusamy Sarathy. All rights reserved. |
|
2 # |
|
3 # Copyright (c) 1998 Raphael Manfredi. |
|
4 # |
|
5 # This program is free software; you can redistribute it and/or |
|
6 # modify it under the same terms as Perl itself. |
|
7 |
|
8 # |
|
9 # MLDBM.pm |
|
10 # |
|
11 # store multi-level hash structure in single level tied hash (read DBM) |
|
12 # |
|
13 # Documentation at the __END__ |
|
14 # |
|
15 # Gurusamy Sarathy <gsar@umich.edu> |
|
16 # Raphael Manfredi <Raphael_Manfredi@grenoble.hp.com> |
|
17 # |
|
18 |
|
19 require 5.004; |
|
20 use strict; |
|
21 |
|
22 #################################################################### |
|
23 package MLDBM::Serializer; ## deferred |
|
24 |
|
25 use Carp; |
|
26 |
|
27 # |
|
28 # The serialization interface comprises of just three methods: |
|
29 # new(), serialize() and deserialize(). Only the last two are |
|
30 # _required_ to be implemented by any MLDBM serialization wrapper. |
|
31 # |
|
32 |
|
33 sub new { bless {}, shift }; |
|
34 |
|
35 sub serialize { confess "deferred" }; |
|
36 |
|
37 sub deserialize { confess "deferred" }; |
|
38 |
|
39 |
|
40 # |
|
41 # Attributes: |
|
42 # |
|
43 # dumpmeth: |
|
44 # the preferred dumping method. |
|
45 # |
|
46 # removetaint: |
|
47 # untainting flag; when true, data will be untainted after |
|
48 # extraction from the database. |
|
49 # |
|
50 # key: |
|
51 # the magic string used to recognize non-natively stored data. |
|
52 # |
|
53 # Attribute access methods: |
|
54 # |
|
55 # These defaults allow readonly access. Sub-class may override |
|
56 # them to allow write access if any of these attributes |
|
57 # makes sense for it. |
|
58 # |
|
59 |
|
60 sub DumpMeth { |
|
61 my $s = shift; |
|
62 confess "can't set dumpmeth with " . ref($s) if @_; |
|
63 $s->_attrib('dumpmeth'); |
|
64 } |
|
65 |
|
66 sub RemoveTaint { |
|
67 my $s = shift; |
|
68 confess "can't set untaint with " . ref($s) if @_; |
|
69 $s->_attrib('removetaint'); |
|
70 } |
|
71 |
|
72 sub Key { |
|
73 my $s = shift; |
|
74 confess "can't set key with " . ref($s) if @_; |
|
75 $s->_attrib('key'); |
|
76 } |
|
77 |
|
78 sub _attrib { |
|
79 my ($s, $a, $v) = @_; |
|
80 if (ref $s and @_ > 2) { |
|
81 $s->{$a} = $v; |
|
82 return $s; |
|
83 } |
|
84 $s->{$a}; |
|
85 } |
|
86 |
|
87 #################################################################### |
|
88 package MLDBM; |
|
89 |
|
90 $MLDBM::VERSION = $MLDBM::VERSION = '2.00'; |
|
91 |
|
92 require Tie::Hash; |
|
93 @MLDBM::ISA = 'Tie::Hash'; |
|
94 |
|
95 use Carp; |
|
96 |
|
97 # |
|
98 # the DB package to use (we default to SDBM since it comes with perl) |
|
99 # you might want to change this default to something more efficient |
|
100 # like DB_File (you can always override it in the use list) |
|
101 # |
|
102 $MLDBM::UseDB = "SDBM_File" unless $MLDBM::UseDB; |
|
103 $MLDBM::Serializer = 'Data::Dumper' unless $MLDBM::Serializer; |
|
104 $MLDBM::Key = '$MlDbM' unless $MLDBM::Key; |
|
105 $MLDBM::DumpMeth = "" unless $MLDBM::DumpMeth; |
|
106 $MLDBM::RemoveTaint = 0 unless $MLDBM::RemoveTaint; |
|
107 |
|
108 # |
|
109 # A private way to load packages at runtime. |
|
110 my $loadpack = sub { |
|
111 my $pack = shift; |
|
112 $pack =~ s|::|/|g; |
|
113 $pack .= ".pm"; |
|
114 eval { require $pack }; |
|
115 if ($@) { |
|
116 carp "MLDBM error: " . |
|
117 "Please make sure $pack is a properly installed package.\n" . |
|
118 "\tPerl says: \"$@\""; |
|
119 return undef; |
|
120 } |
|
121 1; |
|
122 }; |
|
123 |
|
124 |
|
125 # |
|
126 # TIEHASH interface methods |
|
127 # |
|
128 sub TIEHASH { |
|
129 my $c = shift; |
|
130 my $s = bless {}, $c; |
|
131 |
|
132 # |
|
133 # Create the right serializer object. |
|
134 my $szr = $MLDBM::Serializer; |
|
135 unless (ref $szr) { |
|
136 $szr = "MLDBM::Serializer::$szr" # allow convenient short names |
|
137 unless $szr =~ /^MLDBM::Serializer::/; |
|
138 &$loadpack($szr) or return undef; |
|
139 $szr = $szr->new($MLDBM::DumpMeth, |
|
140 $MLDBM::RemoveTaint, |
|
141 $MLDBM::Key); |
|
142 } |
|
143 $s->Serializer($szr); |
|
144 |
|
145 # |
|
146 # Create the right TIEHASH object. |
|
147 my $db = $MLDBM::UseDB; |
|
148 unless (ref $db) { |
|
149 &$loadpack($db) or return undef; |
|
150 $db = $db->TIEHASH(@_) |
|
151 or carp "MLDBM error: Second level tie failed, \"$!\"" |
|
152 and return undef; |
|
153 } |
|
154 $s->UseDB($db); |
|
155 |
|
156 return $s; |
|
157 } |
|
158 |
|
159 sub FETCH { |
|
160 my ($s, $k) = @_; |
|
161 my $ret = $s->{DB}->FETCH($k); |
|
162 $s->{SR}->deserialize($ret); |
|
163 } |
|
164 |
|
165 sub STORE { |
|
166 my ($s, $k, $v) = @_; |
|
167 $v = $s->{SR}->serialize($v); |
|
168 $s->{DB}->STORE($k, $v); |
|
169 } |
|
170 |
|
171 sub DELETE { my $s = shift; $s->{DB}->DELETE(@_); } |
|
172 sub FIRSTKEY { my $s = shift; $s->{DB}->FIRSTKEY(@_); } |
|
173 sub NEXTKEY { my $s = shift; $s->{DB}->NEXTKEY(@_); } |
|
174 sub EXISTS { my $s = shift; $s->{DB}->EXISTS(@_); } |
|
175 sub CLEAR { my $s = shift; $s->{DB}->CLEAR(@_); } |
|
176 |
|
177 sub new { &TIEHASH } |
|
178 |
|
179 # |
|
180 # delegate messages to the underlying DBM |
|
181 # |
|
182 sub AUTOLOAD { |
|
183 return if $MLDBM::AUTOLOAD =~ /::DESTROY$/; |
|
184 my $s = shift; |
|
185 if (ref $s) { # twas a method call |
|
186 my $dbname = ref($s->{DB}); |
|
187 # permit inheritance |
|
188 $MLDBM::AUTOLOAD =~ s/^.*::([^:]+)$/$dbname\:\:$1/; |
|
189 $s->{DB}->$MLDBM::AUTOLOAD(@_); |
|
190 } |
|
191 } |
|
192 |
|
193 # |
|
194 # delegate messages to the underlying Serializer |
|
195 # |
|
196 sub DumpMeth { my $s = shift; $s->{SR}->DumpMeth(@_); } |
|
197 sub RemoveTaint { my $s = shift; $s->{SR}->RemoveTaint(@_); } |
|
198 sub Key { my $s = shift; $s->{SR}->Key(@_); } |
|
199 |
|
200 # |
|
201 # get/set the DB object |
|
202 # |
|
203 sub UseDB { my $s = shift; @_ ? ($s->{DB} = shift) : $s->{DB}; } |
|
204 |
|
205 # |
|
206 # get/set the Serializer object |
|
207 # |
|
208 sub Serializer { my $s = shift; @_ ? ($s->{SR} = shift) : $s->{SR}; } |
|
209 |
|
210 # |
|
211 # stuff to do at 'use' time |
|
212 # |
|
213 sub import { |
|
214 my ($pack, $dbpack, $szr, $dumpmeth, $removetaint, $key) = @_; |
|
215 $MLDBM::UseDB = $dbpack if defined $dbpack and $dbpack; |
|
216 $MLDBM::Serializer = $szr if defined $szr and $szr; |
|
217 # undocumented, may change! |
|
218 $MLDBM::DumpMeth = $dumpmeth if defined $dumpmeth; |
|
219 $MLDBM::RemoveTaint = $removetaint if defined $removetaint; |
|
220 $MLDBM::Key = $key if defined $key and $key; |
|
221 } |
|
222 |
|
223 1; |
|
224 |
|
225 __END__ |
|
226 |
|
227 =head1 NAME |
|
228 |
|
229 MLDBM - store multi-level hash structure in single level tied hash |
|
230 |
|
231 =head1 SYNOPSIS |
|
232 |
|
233 use MLDBM; # this gets the default, SDBM |
|
234 #use MLDBM qw(DB_File FreezeThaw); # use FreezeThaw for serializing |
|
235 #use MLDBM qw(DB_File Storable); # use Storable for serializing |
|
236 |
|
237 $dbm = tie %o, 'MLDBM' [..other DBM args..] or die $!; |
|
238 |
|
239 =head1 DESCRIPTION |
|
240 |
|
241 This module can serve as a transparent interface to any TIEHASH package |
|
242 that is required to store arbitrary perl data, including nested references. |
|
243 Thus, this module can be used for storing references and other arbitrary data |
|
244 within DBM databases. |
|
245 |
|
246 It works by serializing the references in the hash into a single string. In the |
|
247 underlying TIEHASH package (usually a DBM database), it is this string that |
|
248 gets stored. When the value is fetched again, the string is deserialized to |
|
249 reconstruct the data structure into memory. |
|
250 |
|
251 For historical and practical reasons, it requires the B<Data::Dumper> package, |
|
252 available at any CPAN site. B<Data::Dumper> gives you really nice-looking dumps of |
|
253 your data structures, in case you wish to look at them on the screen, and |
|
254 it was the only serializing engine before version 2.00. However, as of version |
|
255 2.00, you can use any of B<Data::Dumper>, B<FreezeThaw> or B<Storable> to |
|
256 perform the underlying serialization, as hinted at by the L<SYNOPSIS> overview |
|
257 above. Using B<Storable> is usually much faster than the other methods. |
|
258 |
|
259 See the L<BUGS> section for important limitations. |
|
260 |
|
261 =head2 Changing the Defaults |
|
262 |
|
263 B<MLDBM> relies on an underlying TIEHASH implementation (usually a |
|
264 DBM package), and an underlying serialization package. The respective |
|
265 defaults are B<SDBM_File> and D<Data::Dumper>. Both of these defaults |
|
266 can be changed. Changing the B<SDBM_File> default is strongly recommended. |
|
267 See L<WARNINGS> below. |
|
268 |
|
269 Three serialization wrappers are currently supported: B<Data::Dumper>, |
|
270 B<Storable>, and B<FreezeThaw>. Additional serializers can be |
|
271 supported by writing a wrapper that implements the interface required by |
|
272 B<MLDBM::Serializer>. See the supported wrappers and the B<MLDBM::Serializer> |
|
273 source for details. |
|
274 |
|
275 In the following, I<$OBJ> stands for the tied object, as in: |
|
276 |
|
277 $obj = tie %o, .... |
|
278 $obj = tied %o; |
|
279 |
|
280 =over 4 |
|
281 |
|
282 =item $MLDBM::UseDB I<or> I<$OBJ>->UseDB(I<[TIEDOBJECT]>) |
|
283 |
|
284 The global C<$MLDBM::UseDB> can be set to default to something other than |
|
285 C<SDBM_File>, in case you have a more efficient DBM, or if you want to use |
|
286 this with some other TIEHASH implementation. Alternatively, you can specify |
|
287 the name of the package at C<use> time, as the first "parameter". |
|
288 Nested module names can be specified as "Foo::Bar". |
|
289 |
|
290 The corresponding method call returns the underlying TIEHASH object when |
|
291 called without arguments. It can be called with any object that |
|
292 implements Perl's TIEHASH interface, to set that value. |
|
293 |
|
294 =item $MLDBM::Serializer I<or> I<$OBJ>->Serializer(I<[SZROBJECT]>) |
|
295 |
|
296 The global C<$MLDBM::Serializer> can be set to the name of the serializing |
|
297 package to be used. Currently can be set to one of C<Data::Dumper>, |
|
298 C<Storable>, or C<FreezeThaw>. Defaults to C<Data::Dumper>. Alternatively, |
|
299 you can specify the name of the serializer package at C<use> time, as the |
|
300 second "parameter". |
|
301 |
|
302 The corresponding method call returns the underlying MLDBM serializer object |
|
303 when called without arguments. It can be called with an object that |
|
304 implements the MLDBM serializer interface, to set that value. |
|
305 |
|
306 =back |
|
307 |
|
308 =head2 Controlling Serializer Properties |
|
309 |
|
310 These methods are meant to supply an interface to the properties of the |
|
311 underlying serializer used. Do B<not> call or set them without |
|
312 understanding the consequences in full. The defaults are usually sensible. |
|
313 |
|
314 Not all of these necessarily apply to all the supplied serializers, so we |
|
315 specify when to apply them. Failure to respect this will usually lead to |
|
316 an exception. |
|
317 |
|
318 =over 4 |
|
319 |
|
320 =item $MLDBM::DumpMeth I<or> I<$OBJ>->DumpMeth(I<[METHNAME]>) |
|
321 |
|
322 If the serializer provides alternative serialization methods, this |
|
323 can be used to set them. |
|
324 |
|
325 With B<Data::Dumper> (which offers a pure Perl and an XS verion |
|
326 of its serializing routine), this is set to C<Dumpxs> by default if that |
|
327 is supported in your installation. Otherwise, defaults to the slower |
|
328 C<Dump> method. |
|
329 |
|
330 With B<Storable>, a value of C<portable> requests that serialization be |
|
331 architecture neutral, i.e. the deserialization can later occur on another |
|
332 platform. Of course, this only makes sense if your database files are |
|
333 themselves architecture neutral. By default, native format is used for |
|
334 greater serializing speed in B<Storable>. Both B<Data::Dumper> and |
|
335 B<FreezeThaw> are always architecture neutral. |
|
336 |
|
337 B<FreezeThaw> does not honor this attribute. |
|
338 |
|
339 =item $MLDBM::Key I<or> I<$OBJ>->Key(I<[KEYSTRING]>) |
|
340 |
|
341 If the serializer only deals with part of the data (perhaps because |
|
342 the TIEHASH object can natively store some types of data), it may need |
|
343 a unique key string to recognize the data it handles. This can be used |
|
344 to set that string. Best left alone. |
|
345 |
|
346 Defaults to the magic string used to recognize MLDBM data. It is a six |
|
347 character wide, unique string. This is best left alone, unless you know |
|
348 what you are doing. |
|
349 |
|
350 B<Storable> and B<FreezeThaw> do not honor this attribute. |
|
351 |
|
352 =item $MLDBM::RemoveTaint I<or> I<$OBJ>->RemoveTaint(I<[BOOL]>) |
|
353 |
|
354 If the serializer can optionally untaint any retrieved data subject to |
|
355 taint checks in Perl, this can be used to request that feature. Data |
|
356 that comes from external sources (like disk-files) must always be |
|
357 viewed with caution, so use this only when you are sure that that is |
|
358 not an issue. |
|
359 |
|
360 B<Data::Dumper> uses C<eval()> to deserialize and is therefore subject to |
|
361 taint checks. Can be set to a true value to make the B<Data::Dumper> |
|
362 serializer untaint the data retrieved. It is not enabled by default. |
|
363 Use with care. |
|
364 |
|
365 B<Storable> and B<FreezeThaw> do not honor this attribute. |
|
366 |
|
367 =back |
|
368 |
|
369 =head1 EXAMPLES |
|
370 |
|
371 Here is a simple example. Note that does not depend upon the underlying |
|
372 serializing package--most real life examples should not, usually. |
|
373 |
|
374 use MLDBM; # this gets SDBM and Data::Dumper |
|
375 #use MLDBM qw(SDBM_File Storable); # SDBM and Storable |
|
376 use Fcntl; # to get 'em constants |
|
377 |
|
378 $dbm = tie %o, 'MLDBM', 'testmldbm', O_CREAT|O_RDWR, 0640 or die $!; |
|
379 |
|
380 $c = [\ 'c']; |
|
381 $b = {}; |
|
382 $a = [1, $b, $c]; |
|
383 $b->{a} = $a; |
|
384 $b->{b} = $a->[1]; |
|
385 $b->{c} = $a->[2]; |
|
386 @o{qw(a b c)} = ($a, $b, $c); |
|
387 |
|
388 # |
|
389 # to see what was stored |
|
390 # |
|
391 use Data::Dumper; |
|
392 print Data::Dumper->Dump([@o{qw(a b c)}], [qw(a b c)]); |
|
393 |
|
394 # |
|
395 # to modify data in a substructure |
|
396 # |
|
397 $tmp = $o{a}; |
|
398 $tmp->[0] = 'foo'; |
|
399 $o{a} = $tmp; |
|
400 |
|
401 # |
|
402 # can access the underlying DBM methods transparently |
|
403 # |
|
404 #print $dbm->fd, "\n"; # DB_File method |
|
405 |
|
406 Here is another small example using Storable, in a portable format: |
|
407 |
|
408 use MLDBM qw(DB_File Storable); # DB_File and Storable |
|
409 |
|
410 tie %o, 'MLDBM', 'testmldbm', O_CREAT|O_RDWR, 0640 or die $!; |
|
411 |
|
412 (tied %o)->DumpMeth('portable'); # Ask for portable binary |
|
413 $o{'ENV'} = \%ENV; # Stores the whole environment |
|
414 |
|
415 |
|
416 =head1 BUGS |
|
417 |
|
418 =over 4 |
|
419 |
|
420 =item 1. |
|
421 |
|
422 Adding or altering substructures to a hash value is not entirely transparent |
|
423 in current perl. If you want to store a reference or modify an existing |
|
424 reference value in the DBM, it must first be retrieved and stored in a |
|
425 temporary variable for further modifications. In particular, something like |
|
426 this will NOT work properly: |
|
427 |
|
428 $mldb{key}{subkey}[3] = 'stuff'; # won't work |
|
429 |
|
430 Instead, that must be written as: |
|
431 |
|
432 $tmp = $mldb{key}; # retrieve value |
|
433 $tmp->{subkey}[3] = 'stuff'; |
|
434 $mldb{key} = $tmp; # store value |
|
435 |
|
436 This limitation exists because the perl TIEHASH interface currently has no |
|
437 support for multidimensional ties. |
|
438 |
|
439 =item 2. |
|
440 |
|
441 The B<Data::Dumper> serializer uses eval(). A lot. Try the B<Storable> |
|
442 serializer, which is generally the most efficient. |
|
443 |
|
444 =back |
|
445 |
|
446 =head1 WARNINGS |
|
447 |
|
448 =over 4 |
|
449 |
|
450 =item 1. |
|
451 |
|
452 Many DBM implementations have arbitrary limits on the size of records |
|
453 that can be stored. For example, SDBM and many ODBM or NDBM |
|
454 implementations have a default limit of 1024 bytes for the size of a |
|
455 record. MLDBM can easily exceed these limits when storing large data |
|
456 structures, leading to mysterious failures. Although SDBM_File is |
|
457 used by MLDBM by default, it is not a good choice if you're storing |
|
458 large data structures. Berkeley DB and GDBM both do not have these |
|
459 limits, so I recommend using either of those instead. |
|
460 |
|
461 =item 2. |
|
462 |
|
463 MLDBM does well with data structures that are not too deep and not |
|
464 too wide. You also need to be careful about how many C<FETCH>es your |
|
465 code actually ends up doing. Meaning, you should get the most mileage |
|
466 out of a C<FETCH> by holding on to the highest level value for as long |
|
467 as you need it. Remember that every toplevel access of the tied hash, |
|
468 for example C<$mldb{foo}>, translates to a MLDBM C<FETCH()> call. |
|
469 |
|
470 Too often, people end up writing something like this: |
|
471 |
|
472 tie %h, 'MLDBM', ...; |
|
473 for my $k (keys %{$h{something}}) { |
|
474 print $h{something}{$k}[0]{foo}{bar}; # FETCH _every_ time! |
|
475 } |
|
476 |
|
477 when it should be written this for efficiency: |
|
478 |
|
479 tie %h, 'MLDBM', ...; |
|
480 my $root = $h{something}; # FETCH _once_ |
|
481 for my $k (keys %$root) { |
|
482 print $k->[0]{foo}{bar}; |
|
483 } |
|
484 |
|
485 |
|
486 =back |
|
487 |
|
488 =head1 AUTHORS |
|
489 |
|
490 Gurusamy Sarathy <F<gsar@umich.edu>>. |
|
491 |
|
492 Support for multiple serializing packages by |
|
493 Raphael Manfredi <F<Raphael_Manfredi@grenoble.hp.com>>. |
|
494 |
|
495 Copyright (c) 1995-98 Gurusamy Sarathy. All rights reserved. |
|
496 |
|
497 Copyright (c) 1998 Raphael Manfredi. |
|
498 |
|
499 This program is free software; you can redistribute it and/or |
|
500 modify it under the same terms as Perl itself. |
|
501 |
|
502 =head1 VERSION |
|
503 |
|
504 Version 2.00 10 May 1998 |
|
505 |
|
506 =head1 SEE ALSO |
|
507 |
|
508 perl(1), perltie(1), perlfunc(1), Data::Dumper(3), FreezeThaw(3), Storable(3). |
|
509 |
|
510 =cut |