releasing/cbrtools/perl/MLDBM.pm
changeset 602 3145852acc89
equal deleted inserted replaced
600:6d08f4a05d93 602:3145852acc89
       
     1 # Copyright (c) 1995-98 Gurusamy Sarathy.  All rights reserved.
       
     2 #
       
     3 # Copyright (c) 1998 Raphael Manfredi.
       
     4 #
       
     5 # This program is free software; you can redistribute it and/or
       
     6 # modify it under the same terms as Perl itself.
       
     7 
       
     8 #
       
     9 # MLDBM.pm
       
    10 #
       
    11 # store multi-level hash structure in single level tied hash (read DBM)
       
    12 #
       
    13 # Documentation at the __END__
       
    14 #
       
    15 # Gurusamy Sarathy <gsar@umich.edu>
       
    16 # Raphael Manfredi <Raphael_Manfredi@grenoble.hp.com>
       
    17 #
       
    18 
       
    19 require 5.004;
       
    20 use strict;
       
    21 
       
    22 ####################################################################
       
    23 package MLDBM::Serializer;	## deferred
       
    24 
       
    25 use Carp;
       
    26 
       
    27 #
       
    28 # The serialization interface comprises of just three methods:
       
    29 # new(), serialize() and deserialize().  Only the last two are
       
    30 # _required_ to be implemented by any MLDBM serialization wrapper.
       
    31 #
       
    32 
       
    33 sub new { bless {}, shift };
       
    34 
       
    35 sub serialize { confess "deferred" };
       
    36 
       
    37 sub deserialize { confess "deferred" };
       
    38 
       
    39 
       
    40 #
       
    41 # Attributes:
       
    42 #
       
    43 #    dumpmeth:
       
    44 #	the preferred dumping method.
       
    45 #
       
    46 #    removetaint:
       
    47 #	untainting flag; when true, data will be untainted after
       
    48 #	extraction from the database.
       
    49 #
       
    50 #    key:
       
    51 #	the magic string used to recognize non-natively stored data.
       
    52 #
       
    53 # Attribute access methods:
       
    54 #
       
    55 #	These defaults allow readonly access. Sub-class may override
       
    56 #	them to allow write access if any of these attributes
       
    57 #	makes sense for it.
       
    58 #
       
    59 
       
    60 sub DumpMeth	{
       
    61     my $s = shift;
       
    62     confess "can't set dumpmeth with " . ref($s) if @_;
       
    63     $s->_attrib('dumpmeth');
       
    64 }
       
    65 
       
    66 sub RemoveTaint	{
       
    67     my $s = shift;
       
    68     confess "can't set untaint with " . ref($s) if @_;
       
    69     $s->_attrib('removetaint');
       
    70 }
       
    71 
       
    72 sub Key	{
       
    73     my $s = shift;
       
    74     confess "can't set key with " . ref($s) if @_;
       
    75     $s->_attrib('key');
       
    76 }
       
    77 
       
    78 sub _attrib {
       
    79     my ($s, $a, $v) = @_;
       
    80     if (ref $s and @_ > 2) {
       
    81 	$s->{$a} = $v;
       
    82 	return $s;
       
    83     }
       
    84     $s->{$a};
       
    85 }
       
    86 
       
    87 ####################################################################
       
    88 package MLDBM;
       
    89 
       
    90 $MLDBM::VERSION = $MLDBM::VERSION = '2.00';
       
    91 
       
    92 require Tie::Hash;
       
    93 @MLDBM::ISA = 'Tie::Hash';
       
    94 
       
    95 use Carp;
       
    96 
       
    97 #
       
    98 # the DB package to use (we default to SDBM since it comes with perl)
       
    99 # you might want to change this default to something more efficient
       
   100 # like DB_File (you can always override it in the use list)
       
   101 #
       
   102 $MLDBM::UseDB		= "SDBM_File"		unless $MLDBM::UseDB;
       
   103 $MLDBM::Serializer	= 'Data::Dumper'	unless $MLDBM::Serializer;
       
   104 $MLDBM::Key		= '$MlDbM'		unless $MLDBM::Key;
       
   105 $MLDBM::DumpMeth	= ""			unless $MLDBM::DumpMeth;
       
   106 $MLDBM::RemoveTaint	= 0			unless $MLDBM::RemoveTaint;
       
   107 
       
   108 #
       
   109 # A private way to load packages at runtime.
       
   110 my $loadpack = sub {
       
   111     my $pack = shift;
       
   112     $pack =~ s|::|/|g;
       
   113     $pack .= ".pm";
       
   114     eval { require $pack };
       
   115     if ($@) {
       
   116 	carp "MLDBM error: " . 
       
   117 	  "Please make sure $pack is a properly installed package.\n" .
       
   118 	    "\tPerl says: \"$@\"";
       
   119 	return undef;
       
   120     }
       
   121     1;
       
   122 };
       
   123 
       
   124 
       
   125 #
       
   126 # TIEHASH interface methods
       
   127 #
       
   128 sub TIEHASH {
       
   129     my $c = shift;
       
   130     my $s = bless {}, $c;
       
   131 
       
   132     #
       
   133     # Create the right serializer object.
       
   134     my $szr = $MLDBM::Serializer;
       
   135     unless (ref $szr) {
       
   136 	$szr = "MLDBM::Serializer::$szr"	# allow convenient short names
       
   137 	  unless $szr =~ /^MLDBM::Serializer::/;
       
   138 	&$loadpack($szr) or return undef;
       
   139 	$szr = $szr->new($MLDBM::DumpMeth,
       
   140 			 $MLDBM::RemoveTaint,
       
   141 			 $MLDBM::Key);
       
   142     }
       
   143     $s->Serializer($szr);
       
   144 
       
   145     #
       
   146     # Create the right TIEHASH  object.
       
   147     my $db = $MLDBM::UseDB;
       
   148     unless (ref $db) {
       
   149 	&$loadpack($db) or return undef;
       
   150 	$db = $db->TIEHASH(@_)
       
   151 	  or carp "MLDBM error: Second level tie failed, \"$!\""
       
   152 	    and return undef;
       
   153     }
       
   154     $s->UseDB($db);
       
   155 
       
   156     return $s;
       
   157 }
       
   158 
       
   159 sub FETCH {
       
   160     my ($s, $k) = @_;
       
   161     my $ret = $s->{DB}->FETCH($k);
       
   162     $s->{SR}->deserialize($ret);
       
   163 }
       
   164 
       
   165 sub STORE {
       
   166     my ($s, $k, $v) = @_;
       
   167     $v = $s->{SR}->serialize($v);
       
   168     $s->{DB}->STORE($k, $v);
       
   169 }
       
   170 
       
   171 sub DELETE	{ my $s = shift; $s->{DB}->DELETE(@_); }
       
   172 sub FIRSTKEY	{ my $s = shift; $s->{DB}->FIRSTKEY(@_); }
       
   173 sub NEXTKEY	{ my $s = shift; $s->{DB}->NEXTKEY(@_); }
       
   174 sub EXISTS	{ my $s = shift; $s->{DB}->EXISTS(@_); }
       
   175 sub CLEAR	{ my $s = shift; $s->{DB}->CLEAR(@_); }
       
   176 
       
   177 sub new		{ &TIEHASH }
       
   178 
       
   179 #
       
   180 # delegate messages to the underlying DBM
       
   181 #
       
   182 sub AUTOLOAD {
       
   183     return if $MLDBM::AUTOLOAD =~ /::DESTROY$/;
       
   184     my $s = shift;
       
   185     if (ref $s) {			# twas a method call
       
   186 	my $dbname = ref($s->{DB});
       
   187 	# permit inheritance
       
   188 	$MLDBM::AUTOLOAD =~ s/^.*::([^:]+)$/$dbname\:\:$1/;
       
   189 	$s->{DB}->$MLDBM::AUTOLOAD(@_);
       
   190     }
       
   191 }
       
   192 
       
   193 #
       
   194 # delegate messages to the underlying Serializer
       
   195 #
       
   196 sub DumpMeth	{ my $s = shift; $s->{SR}->DumpMeth(@_); }
       
   197 sub RemoveTaint	{ my $s = shift; $s->{SR}->RemoveTaint(@_); }
       
   198 sub Key		{ my $s = shift; $s->{SR}->Key(@_); }
       
   199 
       
   200 #
       
   201 # get/set the DB object
       
   202 #
       
   203 sub UseDB 	{ my $s = shift; @_ ? ($s->{DB} = shift) : $s->{DB}; }
       
   204 
       
   205 #
       
   206 # get/set the Serializer object
       
   207 #
       
   208 sub Serializer	{ my $s = shift; @_ ? ($s->{SR} = shift) : $s->{SR}; }
       
   209 
       
   210 #
       
   211 # stuff to do at 'use' time
       
   212 #
       
   213 sub import {
       
   214     my ($pack, $dbpack, $szr, $dumpmeth, $removetaint, $key) = @_;
       
   215     $MLDBM::UseDB = $dbpack if defined $dbpack and $dbpack;
       
   216     $MLDBM::Serializer = $szr if defined $szr and $szr;
       
   217     # undocumented, may change!
       
   218     $MLDBM::DumpMeth = $dumpmeth if defined $dumpmeth;
       
   219     $MLDBM::RemoveTaint = $removetaint if defined $removetaint;
       
   220     $MLDBM::Key = $key if defined $key and $key;
       
   221 }
       
   222 
       
   223 1;
       
   224 
       
   225 __END__
       
   226 
       
   227 =head1 NAME
       
   228 
       
   229 MLDBM - store multi-level hash structure in single level tied hash
       
   230 
       
   231 =head1 SYNOPSIS
       
   232 
       
   233     use MLDBM;				# this gets the default, SDBM
       
   234     #use MLDBM qw(DB_File FreezeThaw);	# use FreezeThaw for serializing
       
   235     #use MLDBM qw(DB_File Storable);	# use Storable for serializing
       
   236     
       
   237     $dbm = tie %o, 'MLDBM' [..other DBM args..] or die $!;
       
   238 
       
   239 =head1 DESCRIPTION
       
   240 
       
   241 This module can serve as a transparent interface to any TIEHASH package
       
   242 that is required to store arbitrary perl data, including nested references.
       
   243 Thus, this module can be used for storing references and other arbitrary data
       
   244 within DBM databases.
       
   245 
       
   246 It works by serializing the references in the hash into a single string. In the
       
   247 underlying TIEHASH package (usually a DBM database), it is this string that
       
   248 gets stored.  When the value is fetched again, the string is deserialized to
       
   249 reconstruct the data structure into memory.
       
   250 
       
   251 For historical and practical reasons, it requires the B<Data::Dumper> package,
       
   252 available at any CPAN site. B<Data::Dumper> gives you really nice-looking dumps of
       
   253 your data structures, in case you wish to look at them on the screen, and
       
   254 it was the only serializing engine before version 2.00.  However, as of version
       
   255 2.00, you can use any of B<Data::Dumper>, B<FreezeThaw> or B<Storable> to
       
   256 perform the underlying serialization, as hinted at by the L<SYNOPSIS> overview
       
   257 above.  Using B<Storable> is usually much faster than the other methods.
       
   258 
       
   259 See the L<BUGS> section for important limitations.
       
   260 
       
   261 =head2 Changing the Defaults
       
   262 
       
   263 B<MLDBM> relies on an underlying TIEHASH implementation (usually a
       
   264 DBM package), and an underlying serialization package.  The respective
       
   265 defaults are B<SDBM_File> and D<Data::Dumper>.  Both of these defaults
       
   266 can be changed.  Changing the B<SDBM_File> default is strongly recommended.
       
   267 See L<WARNINGS> below.
       
   268 
       
   269 Three serialization wrappers are currently supported: B<Data::Dumper>,
       
   270 B<Storable>, and B<FreezeThaw>.  Additional serializers can be
       
   271 supported by writing a wrapper that implements the interface required by
       
   272 B<MLDBM::Serializer>.  See the supported wrappers and the B<MLDBM::Serializer>
       
   273 source for details.
       
   274 
       
   275 In the following, I<$OBJ> stands for the tied object, as in:
       
   276 
       
   277 	$obj = tie %o, ....
       
   278 	$obj = tied %o;
       
   279 
       
   280 =over 4
       
   281 
       
   282 =item $MLDBM::UseDB	I<or>	I<$OBJ>->UseDB(I<[TIEDOBJECT]>)
       
   283 
       
   284 The global C<$MLDBM::UseDB> can be set to default to something other than
       
   285 C<SDBM_File>, in case you have a more efficient DBM, or if you want to use
       
   286 this with some other TIEHASH implementation.  Alternatively, you can specify
       
   287 the name of the package at C<use> time, as the first "parameter".
       
   288 Nested module names can be specified as "Foo::Bar".
       
   289 
       
   290 The corresponding method call returns the underlying TIEHASH object when
       
   291 called without arguments.  It can be called with any object that
       
   292 implements Perl's TIEHASH interface, to set that value.
       
   293 
       
   294 =item $MLDBM::Serializer	I<or>	I<$OBJ>->Serializer(I<[SZROBJECT]>)
       
   295 
       
   296 The global C<$MLDBM::Serializer> can be set to the name of the serializing
       
   297 package to be used. Currently can be set to one of C<Data::Dumper>,
       
   298 C<Storable>, or C<FreezeThaw>. Defaults to C<Data::Dumper>.  Alternatively,
       
   299 you can specify the name of the serializer package at C<use> time, as the
       
   300 second "parameter".
       
   301 
       
   302 The corresponding method call returns the underlying MLDBM serializer object
       
   303 when called without arguments.  It can be called with an object that
       
   304 implements the MLDBM serializer interface, to set that value.
       
   305 
       
   306 =back
       
   307 
       
   308 =head2 Controlling Serializer Properties
       
   309 
       
   310 These methods are meant to supply an interface to the properties of the
       
   311 underlying serializer used.  Do B<not> call or set them without
       
   312 understanding the consequences in full.  The defaults are usually sensible.
       
   313 
       
   314 Not all of these necessarily apply to all the supplied serializers, so we
       
   315 specify when to apply them.  Failure to respect this will usually lead to
       
   316 an exception.
       
   317 
       
   318 =over 4
       
   319 
       
   320 =item $MLDBM::DumpMeth	I<or>  I<$OBJ>->DumpMeth(I<[METHNAME]>)
       
   321 
       
   322 If the serializer provides alternative serialization methods, this
       
   323 can be used to set them.
       
   324 
       
   325 With B<Data::Dumper> (which offers a pure Perl and an XS verion
       
   326 of its serializing routine), this is set to C<Dumpxs> by default if that
       
   327 is supported in your installation.  Otherwise, defaults to the slower
       
   328 C<Dump> method.
       
   329 
       
   330 With B<Storable>, a value of C<portable> requests that serialization be
       
   331 architecture neutral, i.e. the deserialization can later occur on another
       
   332 platform. Of course, this only makes sense if your database files are
       
   333 themselves architecture neutral.  By default, native format is used for
       
   334 greater serializing speed in B<Storable>.  Both B<Data::Dumper> and
       
   335 B<FreezeThaw> are always architecture neutral.
       
   336 
       
   337 B<FreezeThaw> does not honor this attribute.
       
   338 
       
   339 =item $MLDBM::Key  I<or>  I<$OBJ>->Key(I<[KEYSTRING]>)
       
   340 
       
   341 If the serializer only deals with part of the data (perhaps because
       
   342 the TIEHASH object can natively store some types of data), it may need
       
   343 a unique key string to recognize the data it handles.  This can be used
       
   344 to set that string.  Best left alone.
       
   345 
       
   346 Defaults to the magic string used to recognize MLDBM data. It is a six
       
   347 character wide, unique string. This is best left alone, unless you know
       
   348 what you are doing. 
       
   349 
       
   350 B<Storable> and B<FreezeThaw> do not honor this attribute.
       
   351 
       
   352 =item $MLDBM::RemoveTaint  I<or>  I<$OBJ>->RemoveTaint(I<[BOOL]>)
       
   353 
       
   354 If the serializer can optionally untaint any retrieved data subject to
       
   355 taint checks in Perl, this can be used to request that feature.  Data
       
   356 that comes from external sources (like disk-files) must always be
       
   357 viewed with caution, so use this only when you are sure that that is
       
   358 not an issue.
       
   359 
       
   360 B<Data::Dumper> uses C<eval()> to deserialize and is therefore subject to
       
   361 taint checks.  Can be set to a true value to make the B<Data::Dumper>
       
   362 serializer untaint the data retrieved. It is not enabled by default.
       
   363 Use with care.
       
   364 
       
   365 B<Storable> and B<FreezeThaw> do not honor this attribute.
       
   366 
       
   367 =back
       
   368 
       
   369 =head1 EXAMPLES
       
   370 
       
   371 Here is a simple example.  Note that does not depend upon the underlying
       
   372 serializing package--most real life examples should not, usually.
       
   373 
       
   374     use MLDBM;				# this gets SDBM and Data::Dumper
       
   375     #use MLDBM qw(SDBM_File Storable);	# SDBM and Storable
       
   376     use Fcntl;				# to get 'em constants
       
   377      
       
   378     $dbm = tie %o, 'MLDBM', 'testmldbm', O_CREAT|O_RDWR, 0640 or die $!;
       
   379     
       
   380     $c = [\ 'c'];
       
   381     $b = {};
       
   382     $a = [1, $b, $c];
       
   383     $b->{a} = $a;
       
   384     $b->{b} = $a->[1];
       
   385     $b->{c} = $a->[2];
       
   386     @o{qw(a b c)} = ($a, $b, $c);
       
   387     
       
   388     #
       
   389     # to see what was stored
       
   390     #
       
   391     use Data::Dumper;
       
   392     print Data::Dumper->Dump([@o{qw(a b c)}], [qw(a b c)]);
       
   393     
       
   394     #
       
   395     # to modify data in a substructure
       
   396     #
       
   397     $tmp = $o{a};
       
   398     $tmp->[0] = 'foo';
       
   399     $o{a} = $tmp;
       
   400     
       
   401     #
       
   402     # can access the underlying DBM methods transparently
       
   403     #
       
   404     #print $dbm->fd, "\n";		# DB_File method
       
   405 
       
   406 Here is another small example using Storable, in a portable format:
       
   407 
       
   408     use MLDBM qw(DB_File Storable);	# DB_File and Storable
       
   409     
       
   410     tie %o, 'MLDBM', 'testmldbm', O_CREAT|O_RDWR, 0640 or die $!;
       
   411     
       
   412     (tied %o)->DumpMeth('portable');	# Ask for portable binary
       
   413     $o{'ENV'} = \%ENV;			# Stores the whole environment
       
   414     
       
   415 
       
   416 =head1 BUGS
       
   417 
       
   418 =over 4
       
   419 
       
   420 =item 1.
       
   421 
       
   422 Adding or altering substructures to a hash value is not entirely transparent
       
   423 in current perl.  If you want to store a reference or modify an existing
       
   424 reference value in the DBM, it must first be retrieved and stored in a
       
   425 temporary variable for further modifications.  In particular, something like
       
   426 this will NOT work properly:
       
   427 
       
   428 	$mldb{key}{subkey}[3] = 'stuff';	# won't work
       
   429 
       
   430 Instead, that must be written as:
       
   431 
       
   432 	$tmp = $mldb{key};			# retrieve value
       
   433 	$tmp->{subkey}[3] = 'stuff';
       
   434 	$mldb{key} = $tmp;			# store value
       
   435 
       
   436 This limitation exists because the perl TIEHASH interface currently has no
       
   437 support for multidimensional ties.
       
   438 
       
   439 =item 2.
       
   440 
       
   441 The B<Data::Dumper> serializer uses eval().  A lot.  Try the B<Storable>
       
   442 serializer, which is generally the most efficient.
       
   443 
       
   444 =back
       
   445 
       
   446 =head1 WARNINGS
       
   447 
       
   448 =over 4
       
   449 
       
   450 =item 1.
       
   451 
       
   452 Many DBM implementations have arbitrary limits on the size of records
       
   453 that can be stored.  For example, SDBM and many ODBM or NDBM
       
   454 implementations have a default limit of 1024 bytes for the size of a
       
   455 record.  MLDBM can easily exceed these limits when storing large data
       
   456 structures, leading to mysterious failures.  Although SDBM_File is
       
   457 used by MLDBM by default, it is not a good choice if you're storing
       
   458 large data structures.  Berkeley DB and GDBM both do not have these
       
   459 limits, so I recommend using either of those instead.
       
   460 
       
   461 =item 2.
       
   462 
       
   463 MLDBM does well with data structures that are not too deep and not
       
   464 too wide.  You also need to be careful about how many C<FETCH>es your
       
   465 code actually ends up doing.  Meaning, you should get the most mileage
       
   466 out of a C<FETCH> by holding on to the highest level value for as long
       
   467 as you need it.  Remember that every toplevel access of the tied hash,
       
   468 for example C<$mldb{foo}>, translates to a MLDBM C<FETCH()> call.
       
   469 
       
   470 Too often, people end up writing something like this:
       
   471 
       
   472         tie %h, 'MLDBM', ...;
       
   473         for my $k (keys %{$h{something}}) {
       
   474             print $h{something}{$k}[0]{foo}{bar};  # FETCH _every_ time!
       
   475         }
       
   476 
       
   477 when it should be written this for efficiency:
       
   478 
       
   479         tie %h, 'MLDBM', ...;
       
   480         my $root = $h{something};                  # FETCH _once_
       
   481         for my $k (keys %$root) {
       
   482             print $k->[0]{foo}{bar};
       
   483         }
       
   484 
       
   485 
       
   486 =back
       
   487 
       
   488 =head1 AUTHORS
       
   489 
       
   490 Gurusamy Sarathy <F<gsar@umich.edu>>.
       
   491 
       
   492 Support for multiple serializing packages by
       
   493 Raphael Manfredi <F<Raphael_Manfredi@grenoble.hp.com>>.
       
   494 
       
   495 Copyright (c) 1995-98 Gurusamy Sarathy.  All rights reserved.
       
   496 
       
   497 Copyright (c) 1998 Raphael Manfredi.
       
   498 
       
   499 This program is free software; you can redistribute it and/or
       
   500 modify it under the same terms as Perl itself.
       
   501 
       
   502 =head1 VERSION
       
   503 
       
   504 Version 2.00	10 May 1998
       
   505 
       
   506 =head1 SEE ALSO
       
   507 
       
   508 perl(1), perltie(1), perlfunc(1), Data::Dumper(3), FreezeThaw(3), Storable(3).
       
   509 
       
   510 =cut