diff -r 22486c9c7b15 -r 378360dbbdba releasing/cbrtools/perl/MLDBM.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/releasing/cbrtools/perl/MLDBM.pm Wed Jun 30 11:35:58 2010 +0800 @@ -0,0 +1,510 @@ +# Copyright (c) 1995-98 Gurusamy Sarathy. All rights reserved. +# +# Copyright (c) 1998 Raphael Manfredi. +# +# This program is free software; you can redistribute it and/or +# modify it under the same terms as Perl itself. + +# +# MLDBM.pm +# +# store multi-level hash structure in single level tied hash (read DBM) +# +# Documentation at the __END__ +# +# Gurusamy Sarathy +# Raphael Manfredi +# + +require 5.004; +use strict; + +#################################################################### +package MLDBM::Serializer; ## deferred + +use Carp; + +# +# The serialization interface comprises of just three methods: +# new(), serialize() and deserialize(). Only the last two are +# _required_ to be implemented by any MLDBM serialization wrapper. +# + +sub new { bless {}, shift }; + +sub serialize { confess "deferred" }; + +sub deserialize { confess "deferred" }; + + +# +# Attributes: +# +# dumpmeth: +# the preferred dumping method. +# +# removetaint: +# untainting flag; when true, data will be untainted after +# extraction from the database. +# +# key: +# the magic string used to recognize non-natively stored data. +# +# Attribute access methods: +# +# These defaults allow readonly access. Sub-class may override +# them to allow write access if any of these attributes +# makes sense for it. +# + +sub DumpMeth { + my $s = shift; + confess "can't set dumpmeth with " . ref($s) if @_; + $s->_attrib('dumpmeth'); +} + +sub RemoveTaint { + my $s = shift; + confess "can't set untaint with " . ref($s) if @_; + $s->_attrib('removetaint'); +} + +sub Key { + my $s = shift; + confess "can't set key with " . ref($s) if @_; + $s->_attrib('key'); +} + +sub _attrib { + my ($s, $a, $v) = @_; + if (ref $s and @_ > 2) { + $s->{$a} = $v; + return $s; + } + $s->{$a}; +} + +#################################################################### +package MLDBM; + +$MLDBM::VERSION = $MLDBM::VERSION = '2.00'; + +require Tie::Hash; +@MLDBM::ISA = 'Tie::Hash'; + +use Carp; + +# +# the DB package to use (we default to SDBM since it comes with perl) +# you might want to change this default to something more efficient +# like DB_File (you can always override it in the use list) +# +$MLDBM::UseDB = "SDBM_File" unless $MLDBM::UseDB; +$MLDBM::Serializer = 'Data::Dumper' unless $MLDBM::Serializer; +$MLDBM::Key = '$MlDbM' unless $MLDBM::Key; +$MLDBM::DumpMeth = "" unless $MLDBM::DumpMeth; +$MLDBM::RemoveTaint = 0 unless $MLDBM::RemoveTaint; + +# +# A private way to load packages at runtime. +my $loadpack = sub { + my $pack = shift; + $pack =~ s|::|/|g; + $pack .= ".pm"; + eval { require $pack }; + if ($@) { + carp "MLDBM error: " . + "Please make sure $pack is a properly installed package.\n" . + "\tPerl says: \"$@\""; + return undef; + } + 1; +}; + + +# +# TIEHASH interface methods +# +sub TIEHASH { + my $c = shift; + my $s = bless {}, $c; + + # + # Create the right serializer object. + my $szr = $MLDBM::Serializer; + unless (ref $szr) { + $szr = "MLDBM::Serializer::$szr" # allow convenient short names + unless $szr =~ /^MLDBM::Serializer::/; + &$loadpack($szr) or return undef; + $szr = $szr->new($MLDBM::DumpMeth, + $MLDBM::RemoveTaint, + $MLDBM::Key); + } + $s->Serializer($szr); + + # + # Create the right TIEHASH object. + my $db = $MLDBM::UseDB; + unless (ref $db) { + &$loadpack($db) or return undef; + $db = $db->TIEHASH(@_) + or carp "MLDBM error: Second level tie failed, \"$!\"" + and return undef; + } + $s->UseDB($db); + + return $s; +} + +sub FETCH { + my ($s, $k) = @_; + my $ret = $s->{DB}->FETCH($k); + $s->{SR}->deserialize($ret); +} + +sub STORE { + my ($s, $k, $v) = @_; + $v = $s->{SR}->serialize($v); + $s->{DB}->STORE($k, $v); +} + +sub DELETE { my $s = shift; $s->{DB}->DELETE(@_); } +sub FIRSTKEY { my $s = shift; $s->{DB}->FIRSTKEY(@_); } +sub NEXTKEY { my $s = shift; $s->{DB}->NEXTKEY(@_); } +sub EXISTS { my $s = shift; $s->{DB}->EXISTS(@_); } +sub CLEAR { my $s = shift; $s->{DB}->CLEAR(@_); } + +sub new { &TIEHASH } + +# +# delegate messages to the underlying DBM +# +sub AUTOLOAD { + return if $MLDBM::AUTOLOAD =~ /::DESTROY$/; + my $s = shift; + if (ref $s) { # twas a method call + my $dbname = ref($s->{DB}); + # permit inheritance + $MLDBM::AUTOLOAD =~ s/^.*::([^:]+)$/$dbname\:\:$1/; + $s->{DB}->$MLDBM::AUTOLOAD(@_); + } +} + +# +# delegate messages to the underlying Serializer +# +sub DumpMeth { my $s = shift; $s->{SR}->DumpMeth(@_); } +sub RemoveTaint { my $s = shift; $s->{SR}->RemoveTaint(@_); } +sub Key { my $s = shift; $s->{SR}->Key(@_); } + +# +# get/set the DB object +# +sub UseDB { my $s = shift; @_ ? ($s->{DB} = shift) : $s->{DB}; } + +# +# get/set the Serializer object +# +sub Serializer { my $s = shift; @_ ? ($s->{SR} = shift) : $s->{SR}; } + +# +# stuff to do at 'use' time +# +sub import { + my ($pack, $dbpack, $szr, $dumpmeth, $removetaint, $key) = @_; + $MLDBM::UseDB = $dbpack if defined $dbpack and $dbpack; + $MLDBM::Serializer = $szr if defined $szr and $szr; + # undocumented, may change! + $MLDBM::DumpMeth = $dumpmeth if defined $dumpmeth; + $MLDBM::RemoveTaint = $removetaint if defined $removetaint; + $MLDBM::Key = $key if defined $key and $key; +} + +1; + +__END__ + +=head1 NAME + +MLDBM - store multi-level hash structure in single level tied hash + +=head1 SYNOPSIS + + use MLDBM; # this gets the default, SDBM + #use MLDBM qw(DB_File FreezeThaw); # use FreezeThaw for serializing + #use MLDBM qw(DB_File Storable); # use Storable for serializing + + $dbm = tie %o, 'MLDBM' [..other DBM args..] or die $!; + +=head1 DESCRIPTION + +This module can serve as a transparent interface to any TIEHASH package +that is required to store arbitrary perl data, including nested references. +Thus, this module can be used for storing references and other arbitrary data +within DBM databases. + +It works by serializing the references in the hash into a single string. In the +underlying TIEHASH package (usually a DBM database), it is this string that +gets stored. When the value is fetched again, the string is deserialized to +reconstruct the data structure into memory. + +For historical and practical reasons, it requires the B package, +available at any CPAN site. B gives you really nice-looking dumps of +your data structures, in case you wish to look at them on the screen, and +it was the only serializing engine before version 2.00. However, as of version +2.00, you can use any of B, B or B to +perform the underlying serialization, as hinted at by the L overview +above. Using B is usually much faster than the other methods. + +See the L section for important limitations. + +=head2 Changing the Defaults + +B relies on an underlying TIEHASH implementation (usually a +DBM package), and an underlying serialization package. The respective +defaults are B and D. Both of these defaults +can be changed. Changing the B default is strongly recommended. +See L below. + +Three serialization wrappers are currently supported: B, +B, and B. Additional serializers can be +supported by writing a wrapper that implements the interface required by +B. See the supported wrappers and the B +source for details. + +In the following, I<$OBJ> stands for the tied object, as in: + + $obj = tie %o, .... + $obj = tied %o; + +=over 4 + +=item $MLDBM::UseDB I I<$OBJ>->UseDB(I<[TIEDOBJECT]>) + +The global C<$MLDBM::UseDB> can be set to default to something other than +C, in case you have a more efficient DBM, or if you want to use +this with some other TIEHASH implementation. Alternatively, you can specify +the name of the package at C time, as the first "parameter". +Nested module names can be specified as "Foo::Bar". + +The corresponding method call returns the underlying TIEHASH object when +called without arguments. It can be called with any object that +implements Perl's TIEHASH interface, to set that value. + +=item $MLDBM::Serializer I I<$OBJ>->Serializer(I<[SZROBJECT]>) + +The global C<$MLDBM::Serializer> can be set to the name of the serializing +package to be used. Currently can be set to one of C, +C, or C. Defaults to C. Alternatively, +you can specify the name of the serializer package at C time, as the +second "parameter". + +The corresponding method call returns the underlying MLDBM serializer object +when called without arguments. It can be called with an object that +implements the MLDBM serializer interface, to set that value. + +=back + +=head2 Controlling Serializer Properties + +These methods are meant to supply an interface to the properties of the +underlying serializer used. Do B call or set them without +understanding the consequences in full. The defaults are usually sensible. + +Not all of these necessarily apply to all the supplied serializers, so we +specify when to apply them. Failure to respect this will usually lead to +an exception. + +=over 4 + +=item $MLDBM::DumpMeth I I<$OBJ>->DumpMeth(I<[METHNAME]>) + +If the serializer provides alternative serialization methods, this +can be used to set them. + +With B (which offers a pure Perl and an XS verion +of its serializing routine), this is set to C by default if that +is supported in your installation. Otherwise, defaults to the slower +C method. + +With B, a value of C requests that serialization be +architecture neutral, i.e. the deserialization can later occur on another +platform. Of course, this only makes sense if your database files are +themselves architecture neutral. By default, native format is used for +greater serializing speed in B. Both B and +B are always architecture neutral. + +B does not honor this attribute. + +=item $MLDBM::Key I I<$OBJ>->Key(I<[KEYSTRING]>) + +If the serializer only deals with part of the data (perhaps because +the TIEHASH object can natively store some types of data), it may need +a unique key string to recognize the data it handles. This can be used +to set that string. Best left alone. + +Defaults to the magic string used to recognize MLDBM data. It is a six +character wide, unique string. This is best left alone, unless you know +what you are doing. + +B and B do not honor this attribute. + +=item $MLDBM::RemoveTaint I I<$OBJ>->RemoveTaint(I<[BOOL]>) + +If the serializer can optionally untaint any retrieved data subject to +taint checks in Perl, this can be used to request that feature. Data +that comes from external sources (like disk-files) must always be +viewed with caution, so use this only when you are sure that that is +not an issue. + +B uses C to deserialize and is therefore subject to +taint checks. Can be set to a true value to make the B +serializer untaint the data retrieved. It is not enabled by default. +Use with care. + +B and B do not honor this attribute. + +=back + +=head1 EXAMPLES + +Here is a simple example. Note that does not depend upon the underlying +serializing package--most real life examples should not, usually. + + use MLDBM; # this gets SDBM and Data::Dumper + #use MLDBM qw(SDBM_File Storable); # SDBM and Storable + use Fcntl; # to get 'em constants + + $dbm = tie %o, 'MLDBM', 'testmldbm', O_CREAT|O_RDWR, 0640 or die $!; + + $c = [\ 'c']; + $b = {}; + $a = [1, $b, $c]; + $b->{a} = $a; + $b->{b} = $a->[1]; + $b->{c} = $a->[2]; + @o{qw(a b c)} = ($a, $b, $c); + + # + # to see what was stored + # + use Data::Dumper; + print Data::Dumper->Dump([@o{qw(a b c)}], [qw(a b c)]); + + # + # to modify data in a substructure + # + $tmp = $o{a}; + $tmp->[0] = 'foo'; + $o{a} = $tmp; + + # + # can access the underlying DBM methods transparently + # + #print $dbm->fd, "\n"; # DB_File method + +Here is another small example using Storable, in a portable format: + + use MLDBM qw(DB_File Storable); # DB_File and Storable + + tie %o, 'MLDBM', 'testmldbm', O_CREAT|O_RDWR, 0640 or die $!; + + (tied %o)->DumpMeth('portable'); # Ask for portable binary + $o{'ENV'} = \%ENV; # Stores the whole environment + + +=head1 BUGS + +=over 4 + +=item 1. + +Adding or altering substructures to a hash value is not entirely transparent +in current perl. If you want to store a reference or modify an existing +reference value in the DBM, it must first be retrieved and stored in a +temporary variable for further modifications. In particular, something like +this will NOT work properly: + + $mldb{key}{subkey}[3] = 'stuff'; # won't work + +Instead, that must be written as: + + $tmp = $mldb{key}; # retrieve value + $tmp->{subkey}[3] = 'stuff'; + $mldb{key} = $tmp; # store value + +This limitation exists because the perl TIEHASH interface currently has no +support for multidimensional ties. + +=item 2. + +The B serializer uses eval(). A lot. Try the B +serializer, which is generally the most efficient. + +=back + +=head1 WARNINGS + +=over 4 + +=item 1. + +Many DBM implementations have arbitrary limits on the size of records +that can be stored. For example, SDBM and many ODBM or NDBM +implementations have a default limit of 1024 bytes for the size of a +record. MLDBM can easily exceed these limits when storing large data +structures, leading to mysterious failures. Although SDBM_File is +used by MLDBM by default, it is not a good choice if you're storing +large data structures. Berkeley DB and GDBM both do not have these +limits, so I recommend using either of those instead. + +=item 2. + +MLDBM does well with data structures that are not too deep and not +too wide. You also need to be careful about how many Ces your +code actually ends up doing. Meaning, you should get the most mileage +out of a C by holding on to the highest level value for as long +as you need it. Remember that every toplevel access of the tied hash, +for example C<$mldb{foo}>, translates to a MLDBM C call. + +Too often, people end up writing something like this: + + tie %h, 'MLDBM', ...; + for my $k (keys %{$h{something}}) { + print $h{something}{$k}[0]{foo}{bar}; # FETCH _every_ time! + } + +when it should be written this for efficiency: + + tie %h, 'MLDBM', ...; + my $root = $h{something}; # FETCH _once_ + for my $k (keys %$root) { + print $k->[0]{foo}{bar}; + } + + +=back + +=head1 AUTHORS + +Gurusamy Sarathy >. + +Support for multiple serializing packages by +Raphael Manfredi >. + +Copyright (c) 1995-98 Gurusamy Sarathy. All rights reserved. + +Copyright (c) 1998 Raphael Manfredi. + +This program is free software; you can redistribute it and/or +modify it under the same terms as Perl itself. + +=head1 VERSION + +Version 2.00 10 May 1998 + +=head1 SEE ALSO + +perl(1), perltie(1), perlfunc(1), Data::Dumper(3), FreezeThaw(3), Storable(3). + +=cut