dslinux/user/perl/lib/Memoize AnyDBM_File.pm Expire.pm ExpireFile.pm ExpireTest.pm NDBM_File.pm README SDBM_File.pm Storable.pm TODO

cayenne dslinux_cayenne at user.in-berlin.de
Tue Dec 5 05:27:14 CET 2006


Update of /cvsroot/dslinux/dslinux/user/perl/lib/Memoize
In directory antilope:/tmp/cvs-serv7729/lib/Memoize

Added Files:
	AnyDBM_File.pm Expire.pm ExpireFile.pm ExpireTest.pm 
	NDBM_File.pm README SDBM_File.pm Storable.pm TODO 
Log Message:
Adding fresh perl source to HEAD to branch from

--- NEW FILE: SDBM_File.pm ---
package Memoize::SDBM_File;

=head1 NAME

Memoize::SDBM_File - glue to provide EXISTS for SDBM_File for Storable use

=head1 DESCRIPTION

See L<Memoize>.

=cut

use SDBM_File;
@ISA = qw(SDBM_File);
$VERSION = 0.65;

$Verbose = 0;

sub AUTOLOAD {
  warn "Nonexistent function $AUTOLOAD invoked in Memoize::SDBM_File\n";
}

sub import {
  warn "Importing Memoize::SDBM_File\n" if $Verbose;
}


my %keylist;

# This is so ridiculous...
sub _backhash {
  my $self = shift;
  my %fakehash;
  my $k; 
  for ($k = $self->FIRSTKEY(); defined $k; $k = $self->NEXTKEY($k)) {
    $fakehash{$k} = undef;
  }
  $keylist{$self} = \%fakehash;
}

sub EXISTS {
  warn "Memoize::SDBM_File EXISTS (@_)\n" if $Verbose;
  my $self = shift;
  _backhash($self)  unless exists $keylist{$self};
  my $r = exists $keylist{$self}{$_[0]};
  warn "Memoize::SDBM_File EXISTS (@_) ==> $r\n" if $Verbose;
  $r;
}

sub DEFINED {
  warn "Memoize::SDBM_File DEFINED (@_)\n" if $Verbose;
  my $self = shift;
  _backhash($self)  unless exists $keylist{$self};
  defined $keylist{$self}{$_[0]};
}

sub DESTROY {
  warn "Memoize::SDBM_File DESTROY (@_)\n" if $Verbose;
  my $self = shift;
  delete $keylist{$self};   # So much for reference counting...
  $self->SUPER::DESTROY(@_);
}

# Maybe establish the keylist at TIEHASH time instead?

sub STORE {
  warn "Memoize::SDBM_File STORE (@_)\n" if $VERBOSE;
  my $self = shift;
  $keylist{$self}{$_[0]} = undef;
  $self->SUPER::STORE(@_);
}

# Inherit FETCH and TIEHASH

1;

--- NEW FILE: ExpireFile.pm ---
package Memoize::ExpireFile;

=head1 NAME

Memoize::ExpireFile - test for Memoize expiration semantics

=head1 DESCRIPTION

See L<Memoize::Expire>.

=cut

$VERSION = 1.01;
use Carp;

my $Zero = pack("N", 0);

sub TIEHASH {
  my ($package, %args) = @_;
  my $cache = $args{HASH} || {};
  bless {ARGS => \%args, C => $cache} => $package;
}


sub STORE {
#  print "Expiry manager STORE handler\n";
  my ($self, $key, $data) = @_;
  my $cache = $self->{C};
  my $cur_date = pack("N", (stat($key))[9]);
  $cache->{"C$key"} = $data;
  $cache->{"T$key"} = $cur_date;
}

sub FETCH {
  my ($self, $key) = @_;
  $self->{C}{"C$key"};
}

sub EXISTS {
#  print "Expiry manager EXISTS handler\n";
  my ($self, $key) = @_;
  my $cache_date = $self->{C}{"T$key"} || $Zero;
  my $file_date = pack("N", (stat($key))[9]);#
#  if ($self->{ARGS}{CHECK_DATE} && $old_date gt $cur_date) {
#    return $self->{ARGS}{CHECK_DATE}->($key, $old_date, $cur_date);
#  } 
  my $res = $cache_date ge $file_date;
#  print $res ? "... still good\n" : "... expired\n";
  $res;
}

1;

--- NEW FILE: NDBM_File.pm ---
package Memoize::NDBM_File;

=head1 NAME

Memoize::NDBM_File - glue to provide EXISTS for NDBM_File for Storable use

=head1 DESCRIPTION

See L<Memoize>.

=cut

use NDBM_File;
@ISA = qw(NDBM_File);
$VERSION = 0.65;

$Verbose = 0;

sub AUTOLOAD {
  warn "Nonexistent function $AUTOLOAD invoked in Memoize::NDBM_File\n";
}

sub import {
  warn "Importing Memoize::NDBM_File\n" if $Verbose;
}


my %keylist;

# This is so ridiculous...
sub _backhash {
  my $self = shift;
  my %fakehash;
  my $k; 
  for ($k = $self->FIRSTKEY(); defined $k; $k = $self->NEXTKEY($k)) {
    $fakehash{$k} = undef;
  }
  $keylist{$self} = \%fakehash;
}

sub EXISTS {
  warn "Memoize::NDBM_File EXISTS (@_)\n" if $Verbose;
  my $self = shift;
  _backhash($self)  unless exists $keylist{$self};
  my $r = exists $keylist{$self}{$_[0]};
  warn "Memoize::NDBM_File EXISTS (@_) ==> $r\n" if $Verbose;
  $r;
}

sub DEFINED {
  warn "Memoize::NDBM_File DEFINED (@_)\n" if $Verbose;
  my $self = shift;
  _backhash($self)  unless exists $keylist{$self};
  defined $keylist{$self}{$_[0]};
}

sub DESTROY {
  warn "Memoize::NDBM_File DESTROY (@_)\n" if $Verbose;
  my $self = shift;
  delete $keylist{$self};   # So much for reference counting...
  $self->SUPER::DESTROY(@_);
}

# Maybe establish the keylist at TIEHASH time instead?

sub STORE {
  warn "Memoize::NDBM_File STORE (@_)\n" if $VERBOSE;
  my $self = shift;
  $keylist{$self}{$_[0]} = undef;
  $self->SUPER::STORE(@_);
}



# Inherit FETCH and TIEHASH

1;

--- NEW FILE: ExpireTest.pm ---
package Memoize::ExpireTest;

=head1 NAME

Memoize::ExpireTest - test for Memoize expiration semantics

=head1 DESCRIPTION

This module is just for testing expiration semantics.  It's not a very
good example of how to write an expiration module.

If you are looking for an example, I recommend that you look at the
simple example in the Memoize::Expire documentation, or at the code
for Memoize::Expire itself.

If you have questions, I will be happy to answer them if you send them
to mjd-perl-memoize+ at plover.com.

=cut

$VERSION = 0.65;
my %cache;

sub TIEHASH {	
  my ($pack) = @_;
  bless \%cache => $pack;
}

sub EXISTS {
  my ($cache, $key) = @_;
  exists $cache->{$key} ? 1 : 0;
}

sub FETCH {
  my ($cache, $key) = @_;
  $cache->{$key};
}

sub STORE {
  my ($cache, $key, $val) = @_;
  $cache->{$key} = $val;
}

sub expire {
  my ($key) = @_;
  delete $cache{$key};
}

1;

--- NEW FILE: Expire.pm ---

package Memoize::Expire;
# require 5.00556;
use Carp;
$DEBUG = 0;
$VERSION = '1.00';

# This package will implement expiration by prepending a fixed-length header
# to the font of the cached data.  The format of the header will be:
# (4-byte number of last-access-time)  (For LRU when I implement it)
# (4-byte expiration time: unsigned seconds-since-unix-epoch)
# (2-byte number-of-uses-before-expire)

sub _header_fmt () { "N N n" }
sub _header_size () { length(_header_fmt) }

# Usage:  memoize func 
#         TIE => [Memoize::Expire, LIFETIME => sec, NUM_USES => n,
#                 TIE => [...] ]

BEGIN {
  eval {require Time::HiRes};
  unless ($@) {
    Time::HiRes->import('time');
  }
}

sub TIEHASH {
  my ($package, %args) = @_;
  my %cache;
  if ($args{TIE}) {
    my ($module, @opts) = @{$args{TIE}};
    my $modulefile = $module . '.pm';
    $modulefile =~ s{::}{/}g;
    eval { require $modulefile };
    if ($@) {
      croak "Memoize::Expire: Couldn't load hash tie module `$module': $@; aborting";
    }
    my $rc = (tie %cache => $module, @opts);
    unless ($rc) {
      croak "Memoize::Expire: Couldn't tie hash to `$module': $@; aborting";
    }
  }
  $args{LIFETIME} ||= 0;
  $args{NUM_USES} ||= 0;
  $args{C} = \%cache;
  bless \%args => $package;
}

sub STORE {
  $DEBUG and print STDERR " >> Store $_[1] $_[2]\n";
  my ($self, $key, $value) = @_;
  my $expire_time = $self->{LIFETIME} > 0 ? $self->{LIFETIME} + time : 0;
  # The call that results in a value to store into the cache is the
  # first of the NUM_USES allowed calls.
  my $header = _make_header(time, $expire_time, $self->{NUM_USES}-1);
  $self->{C}{$key} = $header . $value;
  $value;
}

sub FETCH {
  $DEBUG and print STDERR " >> Fetch cached value for $_[1]\n";
  my ($data, $last_access, $expire_time, $num_uses_left) = _get_item($_[0]{C}{$_[1]});
  $DEBUG and print STDERR " >>   (ttl: ", ($expire_time-time()), ", nuses: $num_uses_left)\n";
  $num_uses_left--;
  $last_access = time;
  _set_header(@_, $data, $last_access, $expire_time, $num_uses_left);
  $data;
}

sub EXISTS {
  $DEBUG and print STDERR " >> Exists $_[1]\n";
  unless (exists $_[0]{C}{$_[1]}) {
    $DEBUG and print STDERR "    Not in underlying hash at all.\n";
    return 0;
  }
  my $item = $_[0]{C}{$_[1]};
  my ($last_access, $expire_time, $num_uses_left) = _get_header($item);
  my $ttl = $expire_time - time;
  if ($DEBUG) {
    $_[0]{LIFETIME} and print STDERR "    Time to live for this item: $ttl\n";
    $_[0]{NUM_USES} and print STDERR "    Uses remaining: $num_uses_left\n";
  }
  if (   (! $_[0]{LIFETIME} || $expire_time > time)
      && (! $_[0]{NUM_USES} || $num_uses_left > 0 )) {
	    $DEBUG and print STDERR "    (Still good)\n";
    return 1;
  } else {
    $DEBUG and print STDERR "    (Expired)\n";
    return 0;
  }
}

# Arguments: last access time, expire time, number of uses remaining
sub _make_header {
  pack "N N n", @_;
}

sub _strip_header {
  substr($_[0], 10);
}

# Arguments: last access time, expire time, number of uses remaining
sub _set_header {
  my ($self, $key, $data, @header) = @_;
  $self->{C}{$key} = _make_header(@header) . $data;
}

sub _get_item {
  my $data = substr($_[0], 10);
  my @header = unpack "N N n", substr($_[0], 0, 10);
#  print STDERR " >> _get_item: $data => $data @header\n";
  ($data, @header);
}

# Return last access time, expire time, number of uses remaining
sub _get_header  {
  unpack "N N n", substr($_[0], 0, 10);
}

1;

=head1 NAME 

Memoize::Expire - Plug-in module for automatic expiration of memoized values

=head1 SYNOPSIS

  use Memoize;
  use Memoize::Expire;
  tie my %cache => 'Memoize::Expire',
	  	     LIFETIME => $lifetime,    # In seconds
		     NUM_USES => $n_uses;

  memoize 'function', SCALAR_CACHE => [HASH => \%cache ];

=head1 DESCRIPTION

Memoize::Expire is a plug-in module for Memoize.  It allows the cached
values for memoized functions to expire automatically.  This manual
assumes you are already familiar with the Memoize module.  If not, you
should study that manual carefully first, paying particular attention
to the HASH feature.

Memoize::Expire is a layer of software that you can insert in between
Memoize itself and whatever underlying package implements the cache.
The layer presents a hash variable whose values expire whenever they
get too old, have been used too often, or both. You tell C<Memoize> to
use this forgetful hash as its cache instead of the default, which is
an ordinary hash.

To specify a real-time timeout, supply the C<LIFETIME> option with a
numeric value.  Cached data will expire after this many seconds, and
will be looked up afresh when it expires.  When a data item is looked
up afresh, its lifetime is reset.

If you specify C<NUM_USES> with an argument of I<n>, then each cached
data item will be discarded and looked up afresh after the I<n>th time
you access it.  When a data item is looked up afresh, its number of
uses is reset.

If you specify both arguments, data will be discarded from the cache
when either expiration condition holds.

Memoize::Expire uses a real hash internally to store the cached data.
You can use the C<HASH> option to Memoize::Expire to supply a tied
hash in place of the ordinary hash that Memoize::Expire will normally
use.  You can use this feature to add Memoize::Expire as a layer in
between a persistent disk hash and Memoize.  If you do this, you get a
persistent disk cache whose entries expire automatically.  For
example:

  #   Memoize
  #      |
  #   Memoize::Expire  enforces data expiration policy
  #      |
  #   DB_File  implements persistence of data in a disk file
  #      |
  #   Disk file

  use Memoize;
  use Memoize::Expire;
  use DB_File;

  # Set up persistence
  tie my %disk_cache => 'DB_File', $filename, O_CREAT|O_RDWR, 0666];

  # Set up expiration policy, supplying persistent hash as a target
  tie my %cache => 'Memoize::Expire', 
	  	     LIFETIME => $lifetime,    # In seconds
		     NUM_USES => $n_uses,
                     HASH => \%disk_cache; 

  # Set up memoization, supplying expiring persistent hash for cache
  memoize 'function', SCALAR_CACHE => [ HASH => \%cache ];

=head1 INTERFACE

There is nothing special about Memoize::Expire.  It is just an
example.  If you don't like the policy that it implements, you are
free to write your own expiration policy module that implements
whatever policy you desire.  Here is how to do that.  Let us suppose
that your module will be named MyExpirePolicy.

Short summary: You need to create a package that defines four methods:

=over 4

=item 
TIEHASH

Construct and return cache object.

=item 
EXISTS

Given a function argument, is the corresponding function value in the
cache, and if so, is it fresh enough to use?

=item
FETCH

Given a function argument, look up the corresponding function value in
the cache and return it.

=item 
STORE

Given a function argument and the corresponding function value, store
them into the cache.

=item
CLEAR

(Optional.)  Flush the cache completely.

=back

The user who wants the memoization cache to be expired according to
your policy will say so by writing

  tie my %cache => 'MyExpirePolicy', args...;
  memoize 'function', SCALAR_CACHE => [HASH => \%cache];

This will invoke C<< MyExpirePolicy->TIEHASH(args) >>.
MyExpirePolicy::TIEHASH should do whatever is appropriate to set up
the cache, and it should return the cache object to the caller.

For example, MyExpirePolicy::TIEHASH might create an object that
contains a regular Perl hash (which it will to store the cached
values) and some extra information about the arguments and how old the
data is and things like that.  Let us call this object `C'.

When Memoize needs to check to see if an entry is in the cache
already, it will invoke C<< C->EXISTS(key) >>.  C<key> is the normalized
function argument.  MyExpirePolicy::EXISTS should return 0 if the key
is not in the cache, or if it has expired, and 1 if an unexpired value
is in the cache.  It should I<not> return C<undef>, because there is a
bug in some versions of Perl that will cause a spurious FETCH if the
EXISTS method returns C<undef>.

If your EXISTS function returns true, Memoize will try to fetch the
cached value by invoking C<< C->FETCH(key) >>.  MyExpirePolicy::FETCH should
return the cached value.  Otherwise, Memoize will call the memoized
function to compute the appropriate value, and will store it into the
cache by calling C<< C->STORE(key, value) >>.

Here is a very brief example of a policy module that expires each
cache item after ten seconds.

	package Memoize::TenSecondExpire;

	sub TIEHASH {
	  my ($package, %args) = @_;
          my $cache = $args{HASH} || {};
	  bless $cache => $package;
	}

	sub EXISTS {
	  my ($cache, $key) = @_;
	  if (exists $cache->{$key} && 
              $cache->{$key}{EXPIRE_TIME} > time) {
	    return 1
	  } else {
	    return 0;  # Do NOT return `undef' here.
	  }
	}

	sub FETCH {
	  my ($cache, $key) = @_;
	  return $cache->{$key}{VALUE};
	}

	sub STORE {
	  my ($cache, $key, $newvalue) = @_;
	  $cache->{$key}{VALUE} = $newvalue;
	  $cache->{$key}{EXPIRE_TIME} = time + 10;
	}

To use this expiration policy, the user would say

	use Memoize;
        tie my %cache10sec => 'Memoize::TenSecondExpire';
	memoize 'function', SCALAR_CACHE => [HASH => \%cache10sec];

Memoize would then call C<function> whenever a cached value was
entirely absent or was older than ten seconds.

You should always support a C<HASH> argument to C<TIEHASH> that ties
the underlying cache so that the user can specify that the cache is
also persistent or that it has some other interesting semantics.  The
example above demonstrates how to do this, as does C<Memoize::Expire>.

=head1 ALTERNATIVES

Brent Powers has a C<Memoize::ExpireLRU> module that was designed to
work with Memoize and provides expiration of least-recently-used data.
The cache is held at a fixed number of entries, and when new data
comes in, the least-recently used data is expired.  See
L<http://search.cpan.org/search?mode=module&query=ExpireLRU>.

Joshua Chamas's Tie::Cache module may be useful as an expiration
manager.  (If you try this, let me know how it works out.)

If you develop any useful expiration managers that you think should be
distributed with Memoize, please let me know.

=head1 CAVEATS

This module is experimental, and may contain bugs.  Please report bugs
to the address below.

Number-of-uses is stored as a 16-bit unsigned integer, so can't exceed
65535.

Because of clock granularity, expiration times may occur up to one
second sooner than you expect.  For example, suppose you store a value
with a lifetime of ten seconds, and you store it at 12:00:00.998 on a
certain day.  Memoize will look at the clock and see 12:00:00.  Then
9.01 seconds later, at 12:00:10.008 you try to read it back.  Memoize
will look at the clock and see 12:00:10 and conclude that the value
has expired.  This will probably not occur if you have
C<Time::HiRes> installed.

=head1 AUTHOR

Mark-Jason Dominus (mjd-perl-memoize+ at plover.com)

Mike Cariaso provided valuable insight into the best way to solve this
problem.

=head1 SEE ALSO

perl(1)

The Memoize man page.

http://www.plover.com/~mjd/perl/Memoize/  (for news and updates)

I maintain a mailing list on which I occasionally announce new
versions of Memoize.  The list is for announcements only, not
discussion.  To join, send an empty message to
mjd-perl-memoize-request at Plover.com.

=cut

--- NEW FILE: README ---

Name:		Memoize
What:		Transparently speed up functions by caching return values.
Version:	1.00
Author:		Mark-Jason Dominus (mjd-perl-memoize+ at plover.com)

################################################################

How to build me:

	perl Makefile.PL
	make
	make test

There's a very small chance that the tests in speed.t and
expire_module_t.t might fail because of clock skew or bizarre system
load conditions.  If the tests there fail, rerun them and see if the
problem persists.

If the tests work,

	make install

If not, please send me a report that mentions which tests failed.
The address is: mjd-perl-memoize+ at plover.com.

################################################################
What's new since 0.66:

Minor documentation and test changes only.

################################################################
What's new since 0.65:

Test changes only.  

        0.62 was the fist version that would be distributed with Perl.
        I got so absorbed in integrating it that I wrote some tests
        that used Time::HiRes.  I knew this was safe because
        Time::HiRes is also distributed with the same versions of
        Perl.  I totally forgot that some people will get the module
        off of CPAN without Perl and they may not have TIme::HiRes.
        Sorry!

################################################################
What's new since 0.62:


                            N O T I C E !

    ****************************************************************
    **                                                            **
    **   The TIE option is now strongly deprecated.  It will be   **
    **   permanently removed in the NEXT release of Memoize.      **
    **   Please convert all extant software to use HASH instead.  **
    **                                                            **
    **   See the manual for details.                              **
    **                                                            **
    ****************************************************************

I'm sorry about this.  I hate making incompatible changes.  But as of
v0.65, Memoize is included in the Perl core.  It is about to become
much more difficult to make incompatible interface changes; if I don't
get rid of TIE now, I may not get another chance.

TIE presented serious problems.  First, it had a bizarre syntax.  But
the big problem was that it was difficult and complicated for
expiration manager authors to support; evern expiration manager had to
duplicate the logic for handling TIE.  HASH is much simpler to use,
more powerful, and is trivial for expiration managers to support.

Many long-awaited cleanups and bug fixes. 

Memoize now works under threaded perl

Slow tests speeded up.  More test file improvements.

Long-standing LIST_CACHE bug cleared up---it turns out that there
never was a bug.  I put in tests for it anyway.

Manual increased.


--- NEW FILE: Storable.pm ---
package Memoize::Storable;

=head1 NAME

Memoize::Storable - store Memoized data in Storable database

=head1 DESCRIPTION

See L<Memoize>.

=cut

use Storable ();
$VERSION = 0.65;
$Verbose = 0;

sub TIEHASH {
  require Carp if $Verbose;
  my $package = shift;
  my $filename = shift;
  my $truehash = (-e $filename) ? Storable::retrieve($filename) : {};
  my %options;
  print STDERR "Memoize::Storable::TIEHASH($filename, @_)\n" if $Verbose;
  @options{@_} = ();
  my $self = 
    {FILENAME => $filename, 
     H => $truehash, 
     OPTIONS => \%options
    };
  bless $self => $package;
}

sub STORE {
  require Carp if $Verbose;
  my $self = shift;
  print STDERR "Memoize::Storable::STORE(@_)\n" if $Verbose;
  $self->{H}{$_[0]} = $_[1];
}

sub FETCH {
  require Carp if $Verbose;
  my $self = shift;
  print STDERR "Memoize::Storable::FETCH(@_)\n" if $Verbose;
  $self->{H}{$_[0]};
}

sub EXISTS {
  require Carp if $Verbose;
  my $self = shift;
  print STDERR "Memoize::Storable::EXISTS(@_)\n" if $Verbose;
  exists $self->{H}{$_[0]};
}

sub DESTROY {
  require Carp if $Verbose;
  my $self= shift;
  print STDERR "Memoize::Storable::DESTROY(@_)\n" if $Verbose;
  if ($self->{OPTIONS}{'nstore'}) {
    Storable::nstore($self->{H}, $self->{FILENAME});
  } else {
    Storable::store($self->{H}, $self->{FILENAME});
  }
}

sub FIRSTKEY {
  'Fake hash from Memoize::Storable';
}

sub NEXTKEY {
  undef;
}
1;

--- NEW FILE: AnyDBM_File.pm ---
package Memoize::AnyDBM_File;

=head1 NAME

Memoize::AnyDBM_File - glue to provide EXISTS for AnyDBM_File for Storable use

=head1 DESCRIPTION

See L<Memoize>.

=cut

use vars qw(@ISA $VERSION);
$VERSION = 0.65;
@ISA = qw(DB_File GDBM_File Memoize::NDBM_File Memoize::SDBM_File ODBM_File) unless @ISA;

my $verbose = 1;

my $mod;
for $mod (@ISA) {
#  (my $truemod = $mod) =~ s/^Memoize:://;
#  my $file = "$mod.pm";
#  $file =~ s{::}{/}g;
  if (eval "require $mod") {
    print STDERR "AnyDBM_File => Selected $mod.\n" if $Verbose;
    @ISA = ($mod);	# if we leave @ISA alone, warnings abound
    return 1;
  }
}

die "No DBM package was successfully found or installed";

--- NEW FILE: TODO ---
# Version 0.05 alpha $Revision: 1.1 $ $Date: 2006-12-05 04:27:12 $

=head1 TO DO

=over 4

=item * 

LIST_CACHE doesn't work with ties to most DBM implementations, because
Memouze tries to save a listref, and DB_File etc. can only store
strings.  This should at least be documented.  Maybe Memoize could
detect the problem at TIE time and throw a fatal error.

20010623 This was added sometime prior to 20001025.

Try out MLDBM here and document it if it works.

=item *

We should extend the benchmarking module to allow

	timethis(main, { MEMOIZED => [ suba, subb ] })

What would this do?  It would time C<main> three times, once with
C<suba> and C<subb> unmemoized, twice with them memoized.

Why would you want to do this?  By the third set of runs, the memo
tables would be fully populated, so all calls by C<main> to C<suba>
and C<subb> would return immediately.  You would be able to see how
much of C<main>'s running time was due to time spent computing in
C<suba> and C<subb>.  If that was just a little time, you would know
that optimizing or improving C<suba> and C<subb> would not have a
large effect on the performance of C<main>.  But if there was a big
difference, you would know that C<suba> or C<subb> was a good
candidate for optimization if you needed to make C<main> go faster.

Done.

=item * 

Perhaps C<memoize> should return a reference to the original function
as well as one to the memoized version?  But the programmer could
always construct such a reference themselves, so perhaps it's not
necessary.  We save such a reference anyway, so a new package method
could return it on demand even if it wasn't provided by C<memoize>.
We could even bless the new function reference so that it could have
accessor methods for getting to the original function, the options,
the memo table, etc.

Naah.

=item *

The TODISK feature is not ready yet.  It will have to be rather
complicated, providing options for which disk method to use (GDBM?
DB_File?  Flat file?  Storable?  User-supplied?) and which stringizing
method to use (FreezeThaw?  Marshal?  User-supplied?)

Done!

=item *

Maybe an option for automatic expiration of cache values?  (`After one
day,' `After five uses,' etc.)  Also possibly an option to limit the
number of active entries with automatic LRU expiration.

You have a long note to Mike Cariaso that outlines a good approach
that you sent on 9 April 1999.

What's the timeout stuff going to look like?

	EXPIRE_TIME => time_in_sec
	EXPIRE_USES => num_uses
	MAXENTRIES => n

perhaps?  Is EXPIRE_USES actually useful?

19990916: Memoize::Expire does EXPIRE_TIME and EXPIRE_USES.
MAXENTRIES can come later as a separate module.

=item *

Put in a better example than C<fibo>.  Show an example of a
nonrecursive function that simply takes a long time to run.
C<getpwuid> for example?  But this exposes the bug that you can't say
C<memoize('getpwuid')>, so perhaps it's not a very good example.

Well, I did add the ColorToRGB example, but it's still not so good.
These examples need a lot of work.  C<factorial> might be a better
example than C<fibo>.  

=item *

Add more regression tests for normalizers.

=item *

Maybe resolve normalizer function to code-ref at memoize time instead
of at function call time for efficiency?  I think there was some
reason not to do this, but I can't remember what it was.

=item *

Add more array value tests to the test suite.

Does it need more now?

=item *

Fix that `Subroutine u redefined ... line 484' message.

Fixed, I think.

=item *

Get rid of any remaining *{$ref}{CODE} or similar magic hashes. 

=item *

There should be an option to dump out the memoized values or to
otherwise traverse them.

What for?

Maybe the tied hash interface taskes care of this anyway?

=item *

Include an example that caches DNS lookups.

=item *

Make tie for Storable (Memoize::Storable)

A prototype of Memoize::Storable is finished.  Test it and add to the
test suite.

Done.

=item *

Make tie for DBI  (Memoize::DBI)

=item *

I think there's a bug.  See `###BUG'.

=item * 

Storable probably can't be done, because it doesn't allow updating.
Maybe a different interface that supports readonly caches fronted by a
writable in-memory cache?  A generic tied hash maybe?

	FETCH {
	  if (it's in the memory hash) {
	    return it
	  } elsif (it's in the readonly disk hash) {
	    return it
	  } else { 
	    not-there
	  }
	}

	STORE {
	  put it into the in-memory hash
	}

Maybe `save' and `restore' methods?

It isn't working right because the destructor doesn't get called at
the right time.

This is fixed.  `use strict vars' would have caught it immediately.  Duh.

=item *

Don't forget about generic interface to Storable-like packages

20010627 It would appear that you put this into 0.51.

=item * 

Maybe add in TODISK after all, with TODISK => 'filename' equivalent to

	SCALAR_CACHE => [TIE, Memoize::SDBM_File, $filename, O_RDWR|O_CREAT, 0666],
	LIST_CACHE => MERGE

=item *

Maybe the default for LIST_CACHE should be MERGE anyway.

=item *

There's some terrible bug probably related to use under threaded perl,
possibly connected with line 56:

    my $wrapper = eval "sub { unshift \@_, qq{$cref}; goto &_memoizer; }";

I think becayse C<@_> is lexically scoped in threadperl, the effect of
C<unshift> never makes it into C<_memoizer>.  That's probably a bug in
Perl, but maybe I should work around it.   Can anyone provide more
information here, or lend me a machine with threaded Perl where I can
test this theory?  Line 59, currently commented out, may fix the
problem.  

20010623 Working around this in 0.65, but it still blows.

=item * 

Maybe if the original function has a prototype, the module can use
that to select the most appropriate default normalizer.  For example,
if the prototype was C<($)>, there's no reason to use `join'.  If it's
C<(\@)> then it can use C<join $;,@$_[0];> instead of C<join $;, at _;>.

=item *

Ariel Scolnikov suggests using the change counting problem as an
example.  (How many ways to make change of a dollar?)

=item * 

Jonathan Roy found a use for `unmemoize'.  If you're using the
Storable glue, and your program gets SIGINT, you find that the cache
data is not in the cache, because Perl normally writes it all out at
once from a DESTROY method, and signals skip DESTROY processing.  So
you could add

	$sig{INT} = sub { unmemoize ... };


=item *

This means it would be useful to have a method to return references to
all the currently-memoized functions so that you could say

	$sig{INT} = sub { for $f (Memoize->all_memoized) {
	                    unmemoize $f;
	                  }
	                }


=item *

19990917 There should be a call you can make to get back the cache
itself.  If there were, then you could delete stuff from it to
manually expire data items.

=item *

19990925 Randal says that the docs for Memoize;:Expire should make it
clear that the expired entries are never flushed all at once.  He
asked if you would need to do that manually.  I said:

  Right, if that's what you want.  If you have EXISTS return false,
  it'll throw away the old cached item and replace it in the cache
  with a new item.  But if you want the cache to actually get smaller,
  you have to do that yourself.

  I was planning to build an Expire module that implemented an LRU
  queue and kept the cache at a constant fixed size, but I didn't get
  to it yet.  It's not clear to me that the automatic exptynig-out
  behavior is very useful anyway.  The whole point of a cache is to
  trade space for time, so why bother going through the cache to throw
  away old items before you need to?

Randal then pointed out that it could discard expired items at DESTRoY
or TIEHASH time, which seemed like a good idea, because if the cache
is on disk you might like to keep it as small as possible.

=item *

19991219 Philip Gwyn suggests this technique:  You have a load_file
function that memoizes the file contexts.  But then if the file
changes you get the old contents.  So add a normalizer that does

	return join $;, (stat($_[0])[9]), $_[0];

Now when the modification date changes, the true key returned by the
normalizer is different, so you get a cache miss and it loads the new
contents.   Disadvantage:  The old contents are still in the cache.  I
think it makes more sense to have a special expiration manager for
this.  Make one up and bundle it.

19991220 I have one written: Memoize::ExpireFile.  But how can you
make this work when the function might have several arguments, of
which some are filenames and some aren't?

=item *

19991219 There should be an inheritable TIEHASH method that does the
argument processing properly.

19991220 Philip Gwyn contributed a patch for this.

20001231 You should really put this in.  Jonathan Roy uncovered a
problem that it will be needed to solve.  Here's the problem:  He has:

        memoize "get_items",
        LIST_CACHE => ["TIE", "Memoize::Expire",
                LIFETIME => 86400,
                TIE => ["DB_File", "debug.db", O_CREAT|O_RDWR, 0666]
        ];

This won't work, because memoize is trying to store listrefs in a
DB_File.    He owuld have gotten a fatal error if he had done this:

        memoize "get_items",
          LIST_CACHE => ["TIE", "DB_File", "debug.db", O_CREAT|O_RDWR, 0666]'


But in this case, he tied the cache to Memoize::Expire, which is *not*
scalar-only, and the check for scalar-only ties is missing from
Memoize::Expire.  The inheritable method can take care of this.

20010623 I decided not to put it in.  Instead, we avoid the problem by
getting rid of TIE.  The HASH option does the same thing, and HASH is
so simple to support that a module is superfluous.

=item *

20001130 Custom cache manager that checks to make sure the function
return values actually match the memoized values.

=item *

20001231 Expiration manager that watches cache performance and
accumulates statistics.  Variation:  Have it automatically unmemoize
the function if performance is bad.

=item *

20010517 Option to have normalizer *modify* @_ for use by memoized
function.  This would save code and time in cases like the one in the
manual under 'NORMALIZER', where both f() and normalize_f() do the
same analysis and make the same adjustments to the hash.  If the
normalizer could make the adjustments and save the changes in @_, you
wouldn't have to do it twice. 

=item *
20010623 Add CLEAR methods to tied hash modules.

=item *
20010623 You get a warning if you try to use DB_File as LIST_CACHE,
because it won't store lists.  But if you use it as the underlying
cache with an expiration manager in the middle, no warning---the
expiration manager doesn't know it's managing a list cache, and
memoize doesn't know that DB_File is underlying.  Is this fixable?
Probably not, but think about it.

=item *
There was probably some other stuff that I forgot.



=back




More information about the dslinux-commit mailing list