dslinux/user/perl/lib/Search Dict.pm Dict.t
cayenne
dslinux_cayenne at user.in-berlin.de
Mon Dec 4 18:01:01 CET 2006
Update of /cvsroot/dslinux/dslinux/user/perl/lib/Search
In directory antilope:/tmp/cvs-serv17422/lib/Search
Added Files:
Dict.pm Dict.t
Log Message:
Adding fresh perl source to HEAD to branch from
--- NEW FILE: Dict.t ---
#!./perl
BEGIN {
chdir 't' if -d 't';
@INC = '../lib';
}
print "1..4\n";
$DICT = <<EOT;
Aarhus
Aaron
Ababa
aback
abaft
abandon
abandoned
abandoning
abandonment
abandons
abase
abased
abasement
abasements
abases
abash
abashed
abashes
abashing
abasing
abate
abated
abatement
abatements
abater
abates
abating
Abba
EOT
use Search::Dict;
open(DICT, "+>dict-$$") or die "Can't create dict-$$: $!";
binmode DICT; # To make length expected one.
print DICT $DICT;
my $pos = look *DICT, "Ababa";
chomp($word = <DICT>);
print "not " if $pos < 0 || $word ne "Ababa";
print "ok 1\n";
if (ord('a') > ord('A') ) { # ASCII
$pos = look *DICT, "foo";
chomp($word = <DICT>);
print "not " if $pos != length($DICT); # will search to end of file
print "ok 2\n";
my $pos = look *DICT, "abash";
chomp($word = <DICT>);
print "not " if $pos < 0 || $word ne "abash";
print "ok 3\n";
}
else { # EBCDIC systems e.g. os390
$pos = look *DICT, "FOO";
chomp($word = <DICT>);
print "not " if $pos != length($DICT); # will search to end of file
print "ok 2\n";
my $pos = look *DICT, "Abba";
chomp($word = <DICT>);
print "not " if $pos < 0 || $word ne "Abba";
print "ok 3\n";
}
$pos = look *DICT, "aarhus", 1, 1;
chomp($word = <DICT>);
print "not " if $pos < 0 || $word ne "Aarhus";
print "ok 4\n";
close DICT or die "cannot close";
unlink "dict-$$";
--- NEW FILE: Dict.pm ---
package Search::Dict;
require 5.000;
require Exporter;
use strict;
our $VERSION = '1.02';
our @ISA = qw(Exporter);
our @EXPORT = qw(look);
=head1 NAME
Search::Dict, look - search for key in dictionary file
=head1 SYNOPSIS
use Search::Dict;
look *FILEHANDLE, $key, $dict, $fold;
use Search::Dict;
look *FILEHANDLE, $params;
=head1 DESCRIPTION
Sets file position in FILEHANDLE to be first line greater than or equal
(stringwise) to I<$key>. Returns the new file position, or -1 if an error
occurs.
The flags specify dictionary order and case folding:
If I<$dict> is true, search by dictionary order (ignore anything but word
characters and whitespace). The default is honour all characters.
If I<$fold> is true, ignore case. The default is to honour case.
If there are only three arguments and the third argument is a hash
reference, the keys of that hash can have values C<dict>, C<fold>, and
C<comp> or C<xfrm> (see below), and their correponding values will be
used as the parameters.
If a comparison subroutine (comp) is defined, it must return less than zero,
zero, or greater than zero, if the first comparand is less than,
equal, or greater than the second comparand.
If a transformation subroutine (xfrm) is defined, its value is used to
transform the lines read from the filehandle before their comparison.
=cut
sub look {
my($fh,$key,$dict,$fold) = @_;
my ($comp, $xfrm);
if (@_ == 3 && ref $dict eq 'HASH') {
my $params = $dict;
$dict = 0;
$dict = $params->{dict} if exists $params->{dict};
$fold = $params->{fold} if exists $params->{fold};
$comp = $params->{comp} if exists $params->{comp};
$xfrm = $params->{xfrm} if exists $params->{xfrm};
}
$comp = sub { $_[0] cmp $_[1] } unless defined $comp;
local($_);
my(@stat) = stat($fh)
or return -1;
my($size, $blksize) = @stat[7,11];
$blksize ||= 8192;
$key =~ s/[^\w\s]//g if $dict;
$key = lc $key if $fold;
# find the right block
my($min, $max) = (0, int($size / $blksize));
my $mid;
while ($max - $min > 1) {
$mid = int(($max + $min) / 2);
seek($fh, $mid * $blksize, 0)
or return -1;
<$fh> if $mid; # probably a partial line
$_ = <$fh>;
$_ = $xfrm->($_) if defined $xfrm;
chomp;
s/[^\w\s]//g if $dict;
$_ = lc $_ if $fold;
if (defined($_) && $comp->($_, $key) < 0) {
$min = $mid;
}
else {
$max = $mid;
}
}
# find the right line
$min *= $blksize;
seek($fh,$min,0)
or return -1;
<$fh> if $min;
for (;;) {
$min = tell($fh);
defined($_ = <$fh>)
or last;
$_ = $xfrm->($_) if defined $xfrm;
chomp;
s/[^\w\s]//g if $dict;
$_ = lc $_ if $fold;
last if $comp->($_, $key) >= 0;
}
seek($fh,$min,0);
$min;
}
1;
More information about the dslinux-commit
mailing list