dslinux/user/perl/ext/SDBM_File/sdbm CHANGES COMPARE Makefile.PL README README.too biblio dba.c dbd.c dbe.1 dbe.c dbu.c grind hash.c linux.patches makefile.sdbm pair.c pair.h readme.ms sdbm.3 sdbm.c sdbm.h tune.h util.c
cayenne
dslinux_cayenne at user.in-berlin.de
Mon Dec 4 17:59:41 CET 2006
Update of /cvsroot/dslinux/dslinux/user/perl/ext/SDBM_File/sdbm
In directory antilope:/tmp/cvs-serv17422/ext/SDBM_File/sdbm
Added Files:
CHANGES COMPARE Makefile.PL README README.too biblio dba.c
dbd.c dbe.1 dbe.c dbu.c grind hash.c linux.patches
makefile.sdbm pair.c pair.h readme.ms sdbm.3 sdbm.c sdbm.h
tune.h util.c
Log Message:
Adding fresh perl source to HEAD to branch from
--- NEW FILE: README.too ---
This version of sdbm merely has all the dbm_* names translated to sdbm_*
so that we can link ndbm and sdbm into the same executable. (It also has
the bad() macro redefined to allow a zero-length key.)
Fri Apr 15 10:15:30 EDT 1994.
Additional portability/configuration changes for libsdbm by Andy Dougherty
doughera at lafayette.edu.
Mon Mar 22 03:24:47 PST 1999.
sdbm_exists added to the library by Russ Allbery <rra at stanford.edu>.
--- NEW FILE: tune.h ---
/*
* sdbm - ndbm work-alike hashed database library
* tuning and portability constructs [not nearly enough]
* author: oz at nexus.yorku.ca
*/
#define BYTESIZ 8
/*
* important tuning parms (hah)
*/
#define SEEDUPS /* always detect duplicates */
#define BADMESS /* generate a message for worst case:
cannot make room after SPLTMAX splits */
/*
* misc
*/
#ifdef DEBUG
#define debug(x) printf x
#else
#define debug(x)
#endif
--- NEW FILE: util.c ---
#include <stdio.h>
#ifdef SDBM
#include "sdbm.h"
#else
#include "ndbm.h"
#endif
void
oops(register char *s1, register char *s2)
{
extern int errno, sys_nerr;
extern char *sys_errlist[];
extern char *progname;
if (progname)
fprintf(stderr, "%s: ", progname);
fprintf(stderr, s1, s2);
if (errno > 0 && errno < sys_nerr)
fprintf(stderr, " (%s)", sys_errlist[errno]);
fprintf(stderr, "\n");
exit(1);
}
int
okpage(char *pag)
{
register unsigned n;
register off;
register short *ino = (short *) pag;
if ((n = ino[0]) > PBLKSIZ / sizeof(short))
return 0;
if (!n)
return 1;
off = PBLKSIZ;
for (ino++; n; ino += 2) {
if (ino[0] > off || ino[1] > off ||
ino[1] > ino[0])
return 0;
off = ino[1];
n -= 2;
}
return 1;
}
--- NEW FILE: makefile.sdbm ---
#
# makefile for public domain ndbm-clone: sdbm
# DUFF: use duff's device (loop unroll) in parts of the code
#
CFLAGS = -O -DSDBM -DDUFF -DBSD42 -pic
#LDFLAGS = -p
OBJS = sdbm.o pair.o hash.o
SRCS = sdbm.c pair.c hash.c dbu.c dba.c dbd.c util.c
HDRS = tune.h sdbm.h pair.h
MISC = README CHANGES COMPARE sdbm.3 dbe.c dbe.1 dbm.c dbm.h biblio \
readme.ms readme.ps
all: dbu dba dbd dbe
dbu: dbu.o sdbm util.o
cc $(LDFLAGS) -o dbu dbu.o util.o libsdbm.a
dba: dba.o util.o
cc $(LDFLAGS) -o dba dba.o util.o
dbd: dbd.o util.o
cc $(LDFLAGS) -o dbd dbd.o util.o
dbe: dbe.o sdbm
cc $(LDFLAGS) -o dbe dbe.o libsdbm.a
sdbm: $(OBJS)
ar cr libsdbm.a $(OBJS)
ranlib libsdbm.a
### cp libsdbm.a /usr/lib/libsdbm.a
dba.o: sdbm.h
dbu.o: sdbm.h
util.o:sdbm.h
$(OBJS): sdbm.h tune.h pair.h
#
# dbu using berkelezoid ndbm routines [if you have them] for testing
#
#x-dbu: dbu.o util.o
# cc $(CFLAGS) -o x-dbu dbu.o util.o
lint:
lint -abchx $(SRCS)
clean:
rm -f *.o mon.out core
purge: clean
rm -f dbu libsdbm.a dbd dba dbe x-dbu *.dir *.pag
shar:
shar $(MISC) makefile $(SRCS) $(HDRS) >SDBM.SHAR
readme:
nroff -ms readme.ms | col -b >README
--- NEW FILE: dbe.c ---
#include <stdio.h>
#ifndef VMS
#include <sys/file.h>
#include <ndbm.h>
#else
#include "file.h"
#include "ndbm.h"
#endif
#include <ctype.h>
/***************************************************************************\
** **
** Function name: getopt() **
** Author: Henry Spencer, UofT **
** Coding date: 84/04/28 **
** **
** Description: **
** **
** Parses argv[] for arguments. **
** Works with Whitesmith's C compiler. **
** **
** Inputs - The number of arguments **
** - The base address of the array of arguments **
** - A string listing the valid options (':' indicates an **
** argument to the preceding option is required, a ';' **
** indicates an argument to the preceding option is optional) **
** **
** Outputs - Returns the next option character, **
** '?' for non '-' arguments **
** or ':' when there is no more arguments. **
** **
** Side Effects + The argument to an option is pointed to by 'optarg' **
** **
*****************************************************************************
** **
** REVISION HISTORY: **
** **
** DATE NAME DESCRIPTION **
** YY/MM/DD ------------------ ------------------------------------ **
** 88/10/20 Janick Bergeron Returns '?' on unamed arguments **
** returns '!' on unknown options **
** and 'EOF' only when exhausted. **
** 88/11/18 Janick Bergeron Return ':' when no more arguments **
** 89/08/11 Janick Bergeron Optional optarg when ';' in optstring **
** **
\***************************************************************************/
char *optarg; /* Global argument pointer. */
#ifdef VMS
#define index strchr
#endif
char
getopt(int argc, char **argv, char *optstring)
{
register int c;
register char *place;
extern char *index();
static int optind = 0;
static char *scan = NULL;
optarg = NULL;
if (scan == NULL || *scan == '\0') {
if (optind == 0)
optind++;
if (optind >= argc)
return ':';
optarg = place = argv[optind++];
if (place[0] != '-' || place[1] == '\0')
return '?';
if (place[1] == '-' && place[2] == '\0')
return '?';
scan = place + 1;
}
c = *scan++;
place = index(optstring, c);
if (place == NULL || c == ':' || c == ';') {
(void) fprintf(stderr, "%s: unknown option %c\n", argv[0], c);
scan = NULL;
return '!';
}
if (*++place == ':') {
if (*scan != '\0') {
optarg = scan;
scan = NULL;
}
else {
if (optind >= argc) {
(void) fprintf(stderr, "%s: %c requires an argument\n",
argv[0], c);
return '!';
}
optarg = argv[optind];
optind++;
}
}
else if (*place == ';') {
if (*scan != '\0') {
optarg = scan;
scan = NULL;
}
else {
if (optind >= argc || *argv[optind] == '-')
optarg = NULL;
else {
optarg = argv[optind];
optind++;
}
}
}
return c;
}
void
print_datum(datum db)
{
int i;
putchar('"');
for (i = 0; i < db.dsize; i++) {
if (isprint((unsigned char)db.dptr[i]))
putchar(db.dptr[i]);
else {
putchar('\\');
putchar('0' + ((db.dptr[i] >> 6) & 0x07));
putchar('0' + ((db.dptr[i] >> 3) & 0x07));
putchar('0' + (db.dptr[i] & 0x07));
}
}
putchar('"');
}
datum
read_datum(char *s)
{
datum db;
char *p;
int i;
db.dsize = 0;
db.dptr = (char *) malloc(strlen(s) * sizeof(char));
if (!db.dptr)
oops("cannot get memory");
for (p = db.dptr; *s != '\0'; p++, db.dsize++, s++) {
if (*s == '\\') {
if (*++s == 'n')
*p = '\n';
else if (*s == 'r')
*p = '\r';
else if (*s == 'f')
*p = '\f';
else if (*s == 't')
*p = '\t';
else if (isdigit((unsigned char)*s)
&& isdigit((unsigned char)*(s + 1))
&& isdigit((unsigned char)*(s + 2)))
{
i = (*s++ - '0') << 6;
i |= (*s++ - '0') << 3;
i |= *s - '0';
*p = i;
}
else if (*s == '0')
*p = '\0';
else
*p = *s;
}
else
*p = *s;
}
return db;
}
char *
key2s(datum db)
{
char *buf;
char *p1, *p2;
buf = (char *) malloc((db.dsize + 1) * sizeof(char));
if (!buf)
oops("cannot get memory");
for (p1 = buf, p2 = db.dptr; *p2 != '\0'; *p1++ = *p2++);
*p1 = '\0';
return buf;
}
int
main(int argc, char **argv)
{
typedef enum {
YOW, FETCH, STORE, DELETE, SCAN, REGEXP
} commands;
char opt;
int flags;
int giveusage = 0;
int verbose = 0;
commands what = YOW;
char *comarg[3];
int st_flag = DBM_INSERT;
int argn;
DBM *db;
datum key;
datum content;
flags = O_RDWR;
argn = 0;
while ((opt = getopt(argc, argv, "acdfFm:rstvx")) != ':') {
switch (opt) {
case 'a':
what = SCAN;
break;
case 'c':
flags |= O_CREAT;
break;
case 'd':
what = DELETE;
break;
case 'f':
what = FETCH;
break;
case 'F':
what = REGEXP;
break;
case 'm':
flags &= ~(000007);
if (strcmp(optarg, "r") == 0)
flags |= O_RDONLY;
else if (strcmp(optarg, "w") == 0)
flags |= O_WRONLY;
else if (strcmp(optarg, "rw") == 0)
flags |= O_RDWR;
else {
fprintf(stderr, "Invalid mode: \"%s\"\n", optarg);
giveusage = 1;
}
break;
case 'r':
st_flag = DBM_REPLACE;
break;
case 's':
what = STORE;
break;
case 't':
flags |= O_TRUNC;
break;
case 'v':
verbose = 1;
break;
case 'x':
flags |= O_EXCL;
break;
case '!':
giveusage = 1;
break;
case '?':
if (argn < 3)
comarg[argn++] = optarg;
else {
fprintf(stderr, "Too many arguments.\n");
giveusage = 1;
}
break;
}
}
if (giveusage || what == YOW || argn < 1) {
fprintf(stderr, "Usage: %s databse [-m r|w|rw] [-crtx] -a|-d|-f|-F|-s [key [content]]\n", argv[0]);
exit(-1);
}
if ((db = dbm_open(comarg[0], flags, 0777)) == NULL) {
fprintf(stderr, "Error opening database \"%s\"\n", comarg[0]);
exit(-1);
}
if (argn > 1)
key = read_datum(comarg[1]);
if (argn > 2)
content = read_datum(comarg[2]);
switch (what) {
case SCAN:
key = dbm_firstkey(db);
if (dbm_error(db)) {
fprintf(stderr, "Error when fetching first key\n");
goto db_exit;
}
while (key.dptr != NULL) {
content = dbm_fetch(db, key);
if (dbm_error(db)) {
fprintf(stderr, "Error when fetching ");
print_datum(key);
printf("\n");
goto db_exit;
}
print_datum(key);
printf(": ");
print_datum(content);
printf("\n");
if (dbm_error(db)) {
fprintf(stderr, "Error when fetching next key\n");
goto db_exit;
}
key = dbm_nextkey(db);
}
break;
case REGEXP:
if (argn < 2) {
fprintf(stderr, "Missing regular expression.\n");
goto db_exit;
}
if (re_comp(comarg[1])) {
fprintf(stderr, "Invalid regular expression\n");
goto db_exit;
}
key = dbm_firstkey(db);
if (dbm_error(db)) {
fprintf(stderr, "Error when fetching first key\n");
goto db_exit;
}
while (key.dptr != NULL) {
if (re_exec(key2s(key))) {
content = dbm_fetch(db, key);
if (dbm_error(db)) {
fprintf(stderr, "Error when fetching ");
print_datum(key);
printf("\n");
goto db_exit;
}
print_datum(key);
printf(": ");
print_datum(content);
printf("\n");
if (dbm_error(db)) {
fprintf(stderr, "Error when fetching next key\n");
goto db_exit;
}
}
key = dbm_nextkey(db);
}
break;
case FETCH:
if (argn < 2) {
fprintf(stderr, "Missing fetch key.\n");
goto db_exit;
}
content = dbm_fetch(db, key);
if (dbm_error(db)) {
fprintf(stderr, "Error when fetching ");
print_datum(key);
printf("\n");
goto db_exit;
}
if (content.dptr == NULL) {
fprintf(stderr, "Cannot find ");
print_datum(key);
printf("\n");
goto db_exit;
}
print_datum(key);
printf(": ");
print_datum(content);
printf("\n");
break;
case DELETE:
if (argn < 2) {
fprintf(stderr, "Missing delete key.\n");
goto db_exit;
}
if (dbm_delete(db, key) || dbm_error(db)) {
fprintf(stderr, "Error when deleting ");
print_datum(key);
printf("\n");
goto db_exit;
}
if (verbose) {
print_datum(key);
printf(": DELETED\n");
}
break;
case STORE:
if (argn < 3) {
fprintf(stderr, "Missing key and/or content.\n");
goto db_exit;
}
if (dbm_store(db, key, content, st_flag) || dbm_error(db)) {
fprintf(stderr, "Error when storing ");
print_datum(key);
printf("\n");
goto db_exit;
}
if (verbose) {
print_datum(key);
printf(": ");
print_datum(content);
printf(" STORED\n");
}
break;
}
db_exit:
dbm_clearerr(db);
dbm_close(db);
if (dbm_error(db)) {
fprintf(stderr, "Error closing database \"%s\"\n", comarg[0]);
exit(-1);
}
}
--- NEW FILE: CHANGES ---
Changes from the earlier BETA releases.
o dbm_prep does everything now, so dbm_open is just a simple
wrapper that builds the default filenames. dbm_prep no longer
requires a (DBM *) db parameter: it allocates one itself. It
returns (DBM *) db or (DBM *) NULL.
o makroom is now reliable. In the common-case optimization of the page
split, the page into which the incoming key/value pair is to be inserted
is write-deferred (if the split is successful), thereby saving a cosly
write. BUT, if the split does not make enough room (unsuccessful), the
deferred page is written out, as the failure-window is now dependent on
the number of split attempts.
o if -DDUFF is defined, hash function will also use the DUFF construct.
This may look like a micro-performance tweak (maybe it is), but in fact,
the hash function is the third most-heavily used function, after read
and write.
--- NEW FILE: sdbm.3 ---
.\" $Id: sdbm.3,v 1.2 2006-12-04 16:59:39 dslinux_cayenne Exp $
.TH SDBM 3 "1 March 1990"
.SH NAME
sdbm, sdbm_open, sdbm_prep, sdbm_close, sdbm_fetch, sdbm_store, sdbm_delete, sdbm_exists, sdbm_firstkey, sdbm_nextkey, sdbm_hash, sdbm_rdonly, sdbm_error, sdbm_clearerr, sdbm_dirfno, sdbm_pagfno \- data base subroutines
.SH SYNOPSIS
.nf
.ft B
#include <sdbm.h>
.sp
typedef struct {
char *dptr;
int dsize;
} datum;
.sp
datum nullitem = { NULL, 0 };
.sp
\s-1DBM\s0 *sdbm_open(char *file, int flags, int mode)
.sp
\s-1DBM\s0 *sdbm_prep(char *dirname, char *pagname, int flags, int mode)
.sp
void sdbm_close(\s-1DBM\s0 *db)
.sp
datum sdbm_fetch(\s-1DBM\s0 *db, key)
.sp
int sdbm_store(\s-1DBM\s0 *db, datum key, datum val, int flags)
.sp
int sdbm_delete(\s-1DBM\s0 *db, datum key)
.sp
int sdbm_exists(\s-1DBM\s0 *db, datum key)
.sp
datum sdbm_firstkey(\s-1DBM\s0 *db)
.sp
datum sdbm_nextkey(\s-1DBM\s0 *db)
.sp
long sdbm_hash(char *string, int len)
.sp
int sdbm_rdonly(\s-1DBM\s0 *db)
int sdbm_error(\s-1DBM\s0 *db)
sdbm_clearerr(\s-1DBM\s0 *db)
int sdbm_dirfno(\s-1DBM\s0 *db)
int sdbm_pagfno(\s-1DBM\s0 *db)
.ft R
.fi
.SH DESCRIPTION
.IX "database library" sdbm "" "\fLsdbm\fR"
.IX sdbm_open "" "\fLsdbm_open\fR \(em open \fLsdbm\fR database"
.IX sdbm_prep "" "\fLsdbm_prep\fR \(em prepare \fLsdbm\fR database"
.IX sdbm_close "" "\fLsdbm_close\fR \(em close \fLsdbm\fR routine"
.IX sdbm_fetch "" "\fLsdbm_fetch\fR \(em fetch \fLsdbm\fR database data"
.IX sdbm_store "" "\fLsdbm_store\fR \(em add data to \fLsdbm\fR database"
.IX sdbm_delete "" "\fLsdbm_delete\fR \(em remove data from \fLsdbm\fR database"
.IX sdbm_exists "" "\fLsdbm_exists\fR \(em test \fLsdbm\fR key existence"
.IX sdbm_firstkey "" "\fLsdbm_firstkey\fR \(em access \fLsdbm\fR database"
.IX sdbm_nextkey "" "\fLsdbm_nextkey\fR \(em access \fLsdbm\fR database"
.IX sdbm_hash "" "\fLsdbm_hash\fR \(em string hash for \fLsdbm\fR database"
.IX sdbm_rdonly "" "\fLsdbm_rdonly\fR \(em return \fLsdbm\fR database read-only mode"
.IX sdbm_error "" "\fLsdbm_error\fR \(em return \fLsdbm\fR database error condition"
.IX sdbm_clearerr "" "\fLsdbm_clearerr\fR \(em clear \fLsdbm\fR database error condition"
.IX sdbm_dirfno "" "\fLsdbm_dirfno\fR \(em return \fLsdbm\fR database bitmap file descriptor"
.IX sdbm_pagfno "" "\fLsdbm_pagfno\fR \(em return \fLsdbm\fR database data file descriptor"
.IX "database functions \(em \fLsdbm\fR" sdbm_open "" \fLsdbm_open\fP
.IX "database functions \(em \fLsdbm\fR" sdbm_prep "" \fLsdbm_prep\fP
.IX "database functions \(em \fLsdbm\fR" sdbm_close "" \fLsdbm_close\fP
.IX "database functions \(em \fLsdbm\fR" sdbm_fetch "" \fLsdbm_fetch\fP
.IX "database functions \(em \fLsdbm\fR" sdbm_store "" \fLsdbm_store\fP
.IX "database functions \(em \fLsdbm\fR" sdbm_delete "" \fLsdbm_delete\fP
.IX "database functions \(em \fLsdbm\fR" sdbm_firstkey "" \fLsdbm_firstkey\fP
.IX "database functions \(em \fLsdbm\fR" sdbm_nextkey "" \fLsdbm_nextkey\fP
.IX "database functions \(em \fLsdbm\fR" sdbm_rdonly "" \fLsdbm_rdonly\fP
.IX "database functions \(em \fLsdbm\fR" sdbm_error "" \fLsdbm_error\fP
.IX "database functions \(em \fLsdbm\fR" sdbm_clearerr "" \fLsdbm_clearerr\fP
.IX "database functions \(em \fLsdbm\fR" sdbm_dirfno "" \fLsdbm_dirfno\fP
.IX "database functions \(em \fLsdbm\fR" sdbm_pagfno "" \fLsdbm_pagfno\fP
.LP
This package allows an application to maintain a mapping of <key,value> pairs
in disk files. This is not to be considered a real database system, but is
still useful in many simple applications built around fast retrieval of a data
value from a key. This implementation uses an external hashing scheme,
called Dynamic Hashing, as described by Per-Aake Larson in BIT 18 (1978) pp.
184-201. Retrieval of any item usually requires a single disk access.
The application interface is compatible with the
.IR ndbm (3)
library.
.LP
An
.B sdbm
database is kept in two files usually given the extensions
.B \.dir
and
.BR \.pag .
The
.B \.dir
file contains a bitmap representing a forest of binary hash trees, the leaves
of which indicate data pages in the
.B \.pag
file.
.LP
The application interface uses the
.B datum
structure to describe both
.I keys
and
.IR value s.
A
.B datum
specifies a byte sequence of
.I dsize
size pointed to by
.IR dptr .
If you use
.SM ASCII
strings as
.IR key s
or
.IR value s,
then you must decide whether or not to include the terminating
.SM NUL
byte which sometimes defines strings. Including it will require larger
database files, but it will be possible to get sensible output from a
.IR strings (1)
command applied to the data file.
.LP
In order to allow a process using this package to manipulate multiple
databases, the applications interface always requires a
.IR handle ,
a
.BR "DBM *" ,
to identify the database to be manipulated. Such a handle can be obtained
from the only routines that do not require it, namely
.BR sdbm_open (\|)
or
.BR sdbm_prep (\|).
Either of these will open or create the two necessary files. The
difference is that the latter allows explicitly naming the bitmap and data
files whereas
.BR sdbm_open (\|)
will take a base file name and call
.BR sdbm_prep (\|)
with the default extensions.
The
.I flags
and
.I mode
parameters are the same as for
.BR open (2).
.LP
To free the resources occupied while a database handle is active, call
.BR sdbm_close (\|).
.LP
Given a handle, one can retrieve data associated with a key by using the
.BR sdbm_fetch (\|)
routine, and associate data with a key by using the
.BR sdbm_store (\|)
routine.
.BR sdbm_exists (\|)
will say whether a given key exists in the database.
.LP
The values of the
.I flags
parameter for
.BR sdbm_store (\|)
can be either
.BR \s-1DBM_INSERT\s0 ,
which will not change an existing entry with the same key, or
.BR \s-1DBM_REPLACE\s0 ,
which will replace an existing entry with the same key.
Keys are unique within the database.
.LP
To delete a key and its associated value use the
.BR sdbm_delete (\|)
routine.
.LP
To retrieve every key in the database, use a loop like:
.sp
.nf
.ft B
for (key = sdbm_firstkey(db); key.dptr != NULL; key = sdbm_nextkey(db))
;
.ft R
.fi
.LP
The order of retrieval is unspecified.
.LP
If you determine that the performance of the database is inadequate or
you notice clustering or other effects that may be due to the hashing
algorithm used by this package, you can override it by supplying your
own
.BR sdbm_hash (\|)
routine. Doing so will make the database unintelligable to any other
applications that do not use your specialized hash function.
.sp
.LP
The following macros are defined in the header file:
.IP
.BR sdbm_rdonly (\|)
returns true if the database has been opened read\-only.
.IP
.BR sdbm_error (\|)
returns true if an I/O error has occurred.
.IP
.BR sdbm_clearerr (\|)
allows you to clear the error flag if you think you know what the error
was and insist on ignoring it.
.IP
.BR sdbm_dirfno (\|)
returns the file descriptor associated with the bitmap file.
.IP
.BR sdbm_pagfno (\|)
returns the file descriptor associated with the data file.
.SH SEE ALSO
.IR open (2).
.SH DIAGNOSTICS
Functions that return a
.B "DBM *"
handle will use
.SM NULL
to indicate an error.
Functions that return an
.B int
will use \-1 to indicate an error. The normal return value in that case is 0.
Functions that return a
.B datum
will return
.B nullitem
to indicate an error.
.LP
As a special case of
.BR sdbm_store (\|),
if it is called with the
.B \s-1DBM_INSERT\s0
flag and the key already exists in the database, the return value will be 1.
.LP
In general, if a function parameter is invalid,
.B errno
will be set to
.BR \s-1EINVAL\s0 .
If a write operation is requested on a read-only database,
.B errno
will be set to
.BR \s-1ENOPERM\s0 .
If a memory allocation (using
.IR malloc (3))
failed,
.B errno
will be set to
.BR \s-1ENOMEM\s0 .
For I/O operation failures
.B errno
will contain the value set by the relevant failed system call, either
.IR read (2),
.IR write (2),
or
.IR lseek (2).
.SH AUTHOR
.IP "Ozan S. Yigit" (oz at nexus.yorku.ca)
.SH BUGS
The sum of key and value data sizes must not exceed
.B \s-1PAIRMAX\s0
(1008 bytes).
.LP
The sum of the key and value data sizes where several keys hash to the
same value must fit within one bitmap page.
.LP
The
.B \.pag
file will contain holes, so its apparent size is larger than its contents.
When copied through the filesystem the holes will be filled.
.LP
The contents of
.B datum
values returned are in volatile storage. If you want to retain the values
pointed to, you must copy them immediately before another call to this package.
.LP
The only safe way for multiple processes to (read and) update a database at
the same time, is to implement a private locking scheme outside this package
and open and close the database between lock acquisitions. It is safe for
multiple processes to concurrently access a database read-only.
.SH APPLICATIONS PORTABILITY
For complete source code compatibility with the Berkeley Unix
.IR ndbm (3)
library, the
.B sdbm.h
header file should be installed in
.BR /usr/include/ndbm.h .
.LP
The
.B nullitem
data item, and the
.BR sdbm_prep (\|),
.BR sdbm_hash (\|),
.BR sdbm_rdonly (\|),
.BR sdbm_dirfno (\|),
and
.BR sdbm_pagfno (\|)
functions are unique to this package.
--- NEW FILE: hash.c ---
/*
* sdbm - ndbm work-alike hashed database library
* based on Per-Aake Larson's Dynamic Hashing algorithms. BIT 18 (1978).
* author: oz at nexus.yorku.ca
* status: public domain. keep it that way.
*
* hashing routine
*/
#include "config.h"
#include "EXTERN.h"
#include "sdbm.h"
/*
* polynomial conversion ignoring overflows
* [this seems to work remarkably well, in fact better
* then the ndbm hash function. Replace at your own risk]
* use: 65599 nice.
* 65587 even better.
*/
long
sdbm_hash(register char *str, register int len)
{
register unsigned long n = 0;
#ifdef DUFF
#define HASHC n = *str++ + 65599 * n
if (len > 0) {
register int loop = (len + 8 - 1) >> 3;
switch(len & (8 - 1)) {
case 0: do {
HASHC; case 7: HASHC;
case 6: HASHC; case 5: HASHC;
case 4: HASHC; case 3: HASHC;
case 2: HASHC; case 1: HASHC;
} while (--loop);
}
}
#else
while (len--)
n = *str++ + 65599 * n;
#endif
return n;
}
--- NEW FILE: linux.patches ---
*** sdbm.dist/./dbu.c Mon Feb 17 21:18:52 1992
--- sdbm/./dbu.c Mon Feb 17 21:11:20 1992
***************
*** 12,18 ****
#endif
extern int getopt();
! extern char *strchr();
extern void oops();
char *progname;
--- 12,18 ----
#endif
extern int getopt();
! /* extern char *strchr(); */
extern void oops();
char *progname;
*** sdbm.dist/./makefile Mon Feb 17 21:18:56 1992
--- sdbm/./makefile Mon Feb 17 21:10:46 1992
***************
*** 2,8 ****
# makefile for public domain ndbm-clone: sdbm
# DUFF: use duff's device (loop unroll) in parts of the code
#
! CFLAGS = -O -DSDBM -DDUFF -DBSD42
#LDFLAGS = -p
OBJS = sdbm.o pair.o hash.o
--- 2,8 ----
# makefile for public domain ndbm-clone: sdbm
# DUFF: use duff's device (loop unroll) in parts of the code
#
! CFLAGS = -O -DSDBM -DDUFF
#LDFLAGS = -p
OBJS = sdbm.o pair.o hash.o
*** sdbm.dist/./sdbm.c Mon Feb 17 21:19:17 1992
--- sdbm/./sdbm.c Mon Feb 17 21:12:59 1992
***************
*** 25,30 ****
--- 25,31 ----
#endif
#include <errno.h>
#include <string.h>
+ #include <unistd.h>
#ifdef __STDC__
#include <stddef.h>
***************
*** 43,49 ****
extern char *malloc proto((unsigned int));
extern void free proto((void *));
! extern long lseek();
/*
* forward
--- 44,50 ----
extern char *malloc proto((unsigned int));
extern void free proto((void *));
! /* extern long lseek(); */
/*
* forward
--- NEW FILE: README ---
sdbm - Substitute DBM
or
Berkeley ndbm for Every UN*X[1] Made Simple
Ozan (oz) Yigit
The Guild of PD Software Toolmakers
Toronto - Canada
oz at nexus.yorku.ca
Implementation is the sincerest form of flattery. - L. Peter
Deutsch
A The Clone of the ndbm library
The sources accompanying this notice - sdbm - consti-
tute the first public release (Dec. 1990) of a complete
clone of the Berkeley UN*X ndbm library. The sdbm library is
meant to clone the proven functionality of ndbm as closely
as possible, including a few improvements. It is practical,
easy to understand, and compatible. The sdbm library is not
derived from any licensed, proprietary or copyrighted
software.
The sdbm implementation is based on a 1978 algorithm
[Lar78] by P.-A. (Paul) Larson known as ``Dynamic Hashing''.
In the course of searching for a substitute for ndbm, I pro-
totyped three different external-hashing algorithms [Lar78,
Fag79, Lit80] and ultimately chose Larson's algorithm as a
basis of the sdbm implementation. The Bell Labs dbm (and
therefore ndbm) is based on an algorithm invented by Ken
Thompson, [Tho90, Tor87] and predates Larson's work.
The sdbm programming interface is totally compatible
with ndbm and includes a slight improvement in database ini-
tialization. It is also expected to be binary-compatible
under most UN*X versions that support the ndbm library.
The sdbm implementation shares the shortcomings of the
ndbm library, as a side effect of various simplifications to
the original Larson algorithm. It does produce holes in the
page file as it writes pages past the end of file. (Larson's
paper include a clever solution to this problem that is a
result of using the hash value directly as a block address.)
On the other hand, extensive tests seem to indicate that
sdbm creates fewer holes in general, and the resulting page-
files are smaller. The sdbm implementation is also faster
than ndbm in database creation. Unlike the ndbm, the sdbm
_________________________
[1] UN*X is not a trademark of any (dis)organization.
- 2 -
store operation will not ``wander away'' trying to split its
data pages to insert a datum that cannot (due to elaborate
worst-case situations) be inserted. (It will fail after a
pre-defined number of attempts.)
Important Compatibility Warning
The sdbm and ndbm libraries cannot share databases: one
cannot read the (dir/pag) database created by the other.
This is due to the differences between the ndbm and sdbm
algorithms[2], and the hash functions used. It is easy to
convert between the dbm/ndbm databases and sdbm by ignoring
the index completely: see dbd, dbu etc.
Notice of Intellectual Property
The entire sdbm library package, as authored by me, Ozan S.
Yigit, is hereby placed in the public domain. As such, the
author is not responsible for the consequences of use of
this software, no matter how awful, even if they arise from
defects in it. There is no expressed or implied warranty for
the sdbm library.
Since the sdbm library package is in the public domain,
this original release or any additional public-domain
releases of the modified original cannot possibly (by defin-
ition) be withheld from you. Also by definition, You (singu-
lar) have all the rights to this code (including the right
to sell without permission, the right to hoard[3] and the
right to do other icky things as you see fit) but those
rights are also granted to everyone else.
Please note that all previous distributions of this
software contained a copyright (which is now dropped) to
protect its origins and its current public domain status
against any possible claims and/or challenges.
Acknowledgments
Many people have been very helpful and supportive. A
partial list would necessarily include Rayan Zacherissen
(who contributed the man page, and also hacked a MMAP
_________________________
[2] Torek's discussion [Tor87] indicates that
dbm/ndbm implementations use the hash value to traverse
the radix trie differently than sdbm and as a result,
the page indexes are generated in different order. For
more information, send e-mail to the author.
[3] You cannot really hoard something that is avail-
able to the public at large, but try if it makes you
feel any better.
- 3 -
version of sdbm), Arnold Robbins, Chris Lewis, Bill David-
sen, Henry Spencer, Geoff Collyer, Rich Salz (who got me
started in the first place), Johannes Ruschein (who did the
minix port) and David Tilbrook. I thank you all.
Distribution Manifest and Notes
This distribution of sdbm includes (at least) the following:
CHANGES change log
README this file.
biblio a small bibliography on external hashing
dba.c a crude (n/s)dbm page file analyzer
dbd.c a crude (n/s)dbm page file dumper (for conversion)
dbe.1 man page for dbe.c
dbe.c Janick's database editor
dbm.c a dbm library emulation wrapper for ndbm/sdbm
dbm.h header file for the above
dbu.c a crude db management utility
hash.c hashing function
makefile guess.
pair.c page-level routines (posted earlier)
pair.h header file for the above
readme.ms troff source for the README file
sdbm.3 man page
sdbm.c the real thing
sdbm.h header file for the above
tune.h place for tuning & portability thingies
util.c miscellaneous
dbu is a simple database manipulation program[4] that
tries to look like Bell Labs' cbt utility. It is currently
incomplete in functionality. I use dbu to test out the rou-
tines: it takes (from stdin) tab separated key/value pairs
for commands like build or insert or takes keys for commands
like delete or look.
dbu <build|creat|look|insert|cat|delete> dbmfile
dba is a crude analyzer of dbm/sdbm/ndbm page files. It
scans the entire page file, reporting page level statistics,
and totals at the end.
dbd is a crude dump program for dbm/ndbm/sdbm data-
bases. It ignores the bitmap, and dumps the data pages in
sequence. It can be used to create input for the dbu util-
ity. Note that dbd will skip any NULLs in the key and data
fields, thus is unsuitable to convert some peculiar
_________________________
[4] The dbd, dba, dbu utilities are quick hacks and
are not fit for production use. They were developed
late one night, just to test out sdbm, and convert some
databases.
- 4 -
databases that insist in including the terminating null.
I have also included a copy of the dbe (ndbm DataBase
Editor) by Janick Bergeron [janick at bnr.ca] for your pleas-
ure. You may find it more useful than the little dbu util-
ity.
dbm.[ch] is a dbm library emulation on top of ndbm (and
hence suitable for sdbm). Written by Robert Elz.
The sdbm library has been around in beta test for quite
a long time, and from whatever little feedback I received
(maybe no news is good news), I believe it has been func-
tioning without any significant problems. I would, of
course, appreciate all fixes and/or improvements. Portabil-
ity enhancements would especially be useful.
Implementation Issues
Hash functions: The algorithm behind sdbm implementa-
tion needs a good bit-scrambling hash function to be effec-
tive. I ran into a set of constants for a simple hash func-
tion that seem to help sdbm perform better than ndbm for
various inputs:
/*
* polynomial conversion ignoring overflows
* 65599 nice. 65587 even better.
*/
long
dbm_hash(char *str, int len) {
register unsigned long n = 0;
while (len--)
n = n * 65599 + *str++;
return n;
}
There may be better hash functions for the purposes of
dynamic hashing. Try your favorite, and check the pagefile.
If it contains too many pages with too many holes, (in rela-
tion to this one for example) or if sdbm simply stops work-
ing (fails after SPLTMAX attempts to split) when you feed
your NEWS history file to it, you probably do not have a
good hashing function. If you do better (for different
types of input), I would like to know about the function you
use.
Block sizes: It seems (from various tests on a few
machines) that a page file block size PBLKSIZ of 1024 is by
far the best for performance, but this also happens to limit
the size of a key/value pair. Depending on your needs, you
may wish to increase the page size, and also adjust PAIRMAX
(the maximum size of a key/value pair allowed: should always
- 5 -
be at least three words smaller than PBLKSIZ.) accordingly.
The system-wide version of the library should probably be
configured with 1024 (distribution default), as this appears
to be sufficient for most common uses of sdbm.
Portability
This package has been tested in many different UN*Xes
even including minix, and appears to be reasonably portable.
This does not mean it will port easily to non-UN*X systems.
Notes and Miscellaneous
The sdbm is not a very complicated package, at least
not after you familiarize yourself with the literature on
external hashing. There are other interesting algorithms in
existence that ensure (approximately) single-read access to
a data value associated with any key. These are directory-
less schemes such as linear hashing [Lit80] (+ Larson varia-
tions), spiral storage [Mar79] or directory schemes such as
extensible hashing [Fag79] by Fagin et al. I do hope these
sources provide a reasonable playground for experimentation
with other algorithms. See the June 1988 issue of ACM Com-
puting Surveys [Enb88] for an excellent overview of the
field.
References
[Lar78]
P.-A. Larson, ``Dynamic Hashing'', BIT, vol. 18, pp.
184-201, 1978.
[Tho90]
Ken Thompson, private communication, Nov. 1990
[Lit80]
W. Litwin, `` Linear Hashing: A new tool for file and
table addressing'', Proceedings of the 6th Conference on
Very Large Dabatases (Montreal), pp. 212-223, Very
Large Database Foundation, Saratoga, Calif., 1980.
[Fag79]
R. Fagin, J. Nievergelt, N. Pippinger, and H. R.
Strong, ``Extendible Hashing - A Fast Access Method for
Dynamic Files'', ACM Trans. Database Syst., vol. 4,
no.3, pp. 315-344, Sept. 1979.
[Wal84]
Rich Wales, ``Discussion of "dbm" data base system'',
USENET newsgroup unix.wizards, Jan. 1984.
[Tor87]
Chris Torek, ``Re: dbm.a and ndbm.a archives'',
- 6 -
USENET newsgroup comp.unix, 1987.
[Mar79]
G. N. Martin, ``Spiral Storage: Incrementally Augment-
able Hash Addressed Storage'', Technical Report #27,
University of Varwick, Coventry, U.K., 1979.
[Enb88]
R. J. Enbody and H. C. Du, ``Dynamic Hashing
Schemes'',ACM Computing Surveys, vol. 20, no. 2, pp.
85-113, June 1988.
--- NEW FILE: dbe.1 ---
.TH dbe 1 "ndbm(3) EDITOR"
.SH NAME
dbe \- Edit a ndbm(3) database
.SH USAGE
dbe <database> [-m r|w|rw] [-crtvx] -a|-d|-f|-F|-s [<key> [<content>]]
.SH DESCRIPTION
\fIdbme\fP operates on ndbm(3) databases.
It can be used to create them, look at them or change them.
When specifying the value of a key or the content of its associated entry,
\\nnn, \\0, \\n, \\t, \\f and \\r are interpreted as usual.
When displaying key/content pairs, non-printable characters are displayed
using the \\nnn notation.
.SH OPTIONS
.IP -a
List all entries in the database.
.IP -c
Create the database if it does not exist.
.IP -d
Delete the entry associated with the specified key.
.IP -f
Fetch and display the entry associated with the specified key.
.IP -F
Fetch and display all the entries whose key match the specified
regular-expression
.IP "-m r|w|rw"
Open the database in read-only, write-only or read-write mode
.IP -r
Replace the entry associated with the specified key if it already exists.
See option -s.
.IP -s
Store an entry under a specific key.
An error occurs if the key already exists and the option -r was not specified.
.IP -t
Re-initialize the database before executing the command.
.IP -v
Verbose mode.
Confirm stores and deletions.
.IP -x
If option -x is used with option -c, then if the database already exists,
an error occurs.
This can be used to implement a simple exclusive access locking mechanism.
.SH SEE ALSO
ndbm(3)
.SH AUTHOR
janick at bnr.ca
--- NEW FILE: dbu.c ---
#include <stdio.h>
#include <sys/file.h>
#ifdef SDBM
#include "EXTERN.h"
#include "sdbm.h"
#else
#include <ndbm.h>
#endif
#include <string.h>
#ifdef BSD42
#define strchr index
#endif
extern int getopt();
extern char *strchr();
extern void oops();
char *progname;
static int rflag;
static char *usage = "%s [-R] cat | look |... dbmname";
#define DERROR 0
#define DLOOK 1
#define DINSERT 2
#define DDELETE 3
#define DCAT 4
#define DBUILD 5
#define DPRESS 6
#define DCREAT 7
#define LINEMAX 8192
typedef struct {
char *sname;
int scode;
int flags;
} cmd;
static cmd cmds[] = {
"fetch", DLOOK, O_RDONLY,
"get", DLOOK, O_RDONLY,
"look", DLOOK, O_RDONLY,
"add", DINSERT, O_RDWR,
"insert", DINSERT, O_RDWR,
"store", DINSERT, O_RDWR,
"delete", DDELETE, O_RDWR,
"remove", DDELETE, O_RDWR,
"dump", DCAT, O_RDONLY,
"list", DCAT, O_RDONLY,
"cat", DCAT, O_RDONLY,
"creat", DCREAT, O_RDWR | O_CREAT | O_TRUNC,
"new", DCREAT, O_RDWR | O_CREAT | O_TRUNC,
"build", DBUILD, O_RDWR | O_CREAT,
"squash", DPRESS, O_RDWR,
"compact", DPRESS, O_RDWR,
"compress", DPRESS, O_RDWR
};
#define CTABSIZ (sizeof (cmds)/sizeof (cmd))
static cmd *parse();
static void badk(), doit(), prdatum();
int
main(int argc, char **argv)
{
int c;
register cmd *act;
extern int optind;
extern char *optarg;
progname = argv[0];
while ((c = getopt(argc, argv, "R")) != EOF)
switch (c) {
case 'R': /* raw processing */
rflag++;
break;
default:
oops("usage: %s", usage);
break;
}
if ((argc -= optind) < 2)
oops("usage: %s", usage);
if ((act = parse(argv[optind])) == NULL)
badk(argv[optind]);
optind++;
doit(act, argv[optind]);
return 0;
}
static void
doit(register cmd *act, char *file)
{
datum key;
datum val;
register DBM *db;
register char *op;
register int n;
char *line;
#ifdef TIME
long start;
extern long time();
#endif
if ((db = dbm_open(file, act->flags, 0644)) == NULL)
oops("cannot open: %s", file);
if ((line = (char *) malloc(LINEMAX)) == NULL)
oops("%s: cannot get memory", "line alloc");
switch (act->scode) {
case DLOOK:
while (fgets(line, LINEMAX, stdin) != NULL) {
n = strlen(line) - 1;
line[n] = 0;
key.dptr = line;
key.dsize = n;
val = dbm_fetch(db, key);
if (val.dptr != NULL) {
prdatum(stdout, val);
putchar('\n');
continue;
}
prdatum(stderr, key);
fprintf(stderr, ": not found.\n");
}
break;
case DINSERT:
break;
case DDELETE:
while (fgets(line, LINEMAX, stdin) != NULL) {
n = strlen(line) - 1;
line[n] = 0;
key.dptr = line;
key.dsize = n;
if (dbm_delete(db, key) == -1) {
prdatum(stderr, key);
fprintf(stderr, ": not found.\n");
}
}
break;
case DCAT:
for (key = dbm_firstkey(db); key.dptr != 0;
key = dbm_nextkey(db)) {
prdatum(stdout, key);
putchar('\t');
prdatum(stdout, dbm_fetch(db, key));
putchar('\n');
}
break;
case DBUILD:
#ifdef TIME
start = time(0);
#endif
while (fgets(line, LINEMAX, stdin) != NULL) {
n = strlen(line) - 1;
line[n] = 0;
key.dptr = line;
if ((op = strchr(line, '\t')) != 0) {
key.dsize = op - line;
*op++ = 0;
val.dptr = op;
val.dsize = line + n - op;
}
else
oops("bad input; %s", line);
if (dbm_store(db, key, val, DBM_REPLACE) < 0) {
prdatum(stderr, key);
fprintf(stderr, ": ");
oops("store: %s", "failed");
}
}
#ifdef TIME
printf("done: %d seconds.\n", time(0) - start);
#endif
break;
case DPRESS:
break;
case DCREAT:
break;
}
dbm_close(db);
}
static void
badk(char *word)
{
register int i;
if (progname)
fprintf(stderr, "%s: ", progname);
fprintf(stderr, "bad keywd %s. use one of\n", word);
for (i = 0; i < (int)CTABSIZ; i++)
fprintf(stderr, "%-8s%c", cmds[i].sname,
((i + 1) % 6 == 0) ? '\n' : ' ');
fprintf(stderr, "\n");
exit(1);
/*NOTREACHED*/
}
static cmd *
parse(register char *str)
{
register int i = CTABSIZ;
register cmd *p;
for (p = cmds; i--; p++)
if (strcmp(p->sname, str) == 0)
return p;
return NULL;
}
static void
prdatum(FILE *stream, datum d)
{
register int c;
register char *p = d.dptr;
register int n = d.dsize;
while (n--) {
c = *p++ & 0377;
if (c & 0200) {
fprintf(stream, "M-");
c &= 0177;
}
if (c == 0177 || c < ' ')
fprintf(stream, "^%c", (c == 0177) ? '?' : c + '@');
else
putc(c, stream);
}
}
--- NEW FILE: pair.h ---
/* Mini EMBED (pair.c) */
#define chkpage sdbm__chkpage
#define delpair sdbm__delpair
#define duppair sdbm__duppair
#define exipair sdbm__exipair
#define fitpair sdbm__fitpair
#define getnkey sdbm__getnkey
#define getpair sdbm__getpair
#define putpair sdbm__putpair
#define splpage sdbm__splpage
extern int fitpair proto((char *, int));
extern void putpair proto((char *, datum, datum));
extern datum getpair proto((char *, datum));
extern int exipair proto((char *, datum));
extern int delpair proto((char *, datum));
extern int chkpage proto((char *));
extern datum getnkey proto((char *, int));
extern void splpage proto((char *, char *, long));
#ifdef SEEDUPS
extern int duppair proto((char *, datum));
#endif
--- NEW FILE: pair.c ---
/*
* sdbm - ndbm work-alike hashed database library
* based on Per-Aake Larson's Dynamic Hashing algorithms. BIT 18 (1978).
* author: oz at nexus.yorku.ca
* status: public domain.
*
* page-level routines
*/
#include "config.h"
#ifdef __CYGWIN__
# define EXTCONST extern const
#else
# include "EXTERN.h"
#endif
#include "sdbm.h"
#include "tune.h"
#include "pair.h"
#define exhash(item) sdbm_hash((item).dptr, (item).dsize)
/*
* forward
*/
static int seepair proto((char *, int, char *, int));
/*
* page format:
* +------------------------------+
* ino | n | keyoff | datoff | keyoff |
* +------------+--------+--------+
* | datoff | - - - ----> |
* +--------+---------------------+
* | F R E E A R E A |
* +--------------+---------------+
* | <---- - - - | data |
* +--------+-----+----+----------+
* | key | data | key |
* +--------+----------+----------+
*
* calculating the offsets for free area: if the number
* of entries (ino[0]) is zero, the offset to the END of
* the free area is the block size. Otherwise, it is the
* nth (ino[ino[0]]) entry's offset.
*/
int
fitpair(char *pag, int need)
{
register int n;
register int off;
register int free;
register short *ino = (short *) pag;
off = ((n = ino[0]) > 0) ? ino[n] : PBLKSIZ;
free = off - (n + 1) * sizeof(short);
need += 2 * sizeof(short);
debug(("free %d need %d\n", free, need));
return need <= free;
}
void
putpair(char *pag, datum key, datum val)
{
register int n;
register int off;
register short *ino = (short *) pag;
off = ((n = ino[0]) > 0) ? ino[n] : PBLKSIZ;
/*
* enter the key first
*/
off -= key.dsize;
(void) memcpy(pag + off, key.dptr, key.dsize);
ino[n + 1] = off;
/*
* now the data
*/
off -= val.dsize;
(void) memcpy(pag + off, val.dptr, val.dsize);
ino[n + 2] = off;
/*
* adjust item count
*/
ino[0] += 2;
}
datum
getpair(char *pag, datum key)
{
register int i;
register int n;
datum val;
register short *ino = (short *) pag;
if ((n = ino[0]) == 0)
return nullitem;
if ((i = seepair(pag, n, key.dptr, key.dsize)) == 0)
return nullitem;
val.dptr = pag + ino[i + 1];
val.dsize = ino[i] - ino[i + 1];
return val;
}
int
exipair(char *pag, datum key)
{
register short *ino = (short *) pag;
if (ino[0] == 0)
return 0;
return (seepair(pag, ino[0], key.dptr, key.dsize) != 0);
}
#ifdef SEEDUPS
int
duppair(char *pag, datum key)
{
register short *ino = (short *) pag;
return ino[0] > 0 && seepair(pag, ino[0], key.dptr, key.dsize) > 0;
}
#endif
datum
getnkey(char *pag, int num)
{
datum key;
register int off;
register short *ino = (short *) pag;
num = num * 2 - 1;
if (ino[0] == 0 || num > ino[0])
return nullitem;
off = (num > 1) ? ino[num - 1] : PBLKSIZ;
key.dptr = pag + ino[num];
key.dsize = off - ino[num];
return key;
}
int
delpair(char *pag, datum key)
{
register int n;
register int i;
register short *ino = (short *) pag;
if ((n = ino[0]) == 0)
return 0;
if ((i = seepair(pag, n, key.dptr, key.dsize)) == 0)
return 0;
/*
* found the key. if it is the last entry
* [i.e. i == n - 1] we just adjust the entry count.
* hard case: move all data down onto the deleted pair,
* shift offsets onto deleted offsets, and adjust them.
* [note: 0 < i < n]
*/
if (i < n - 1) {
register int m;
register char *dst = pag + (i == 1 ? PBLKSIZ : ino[i - 1]);
register char *src = pag + ino[i + 1];
register int zoo = dst - src;
debug(("free-up %d ", zoo));
/*
* shift data/keys down
*/
m = ino[i + 1] - ino[n];
#ifdef DUFF
#define MOVB *--dst = *--src
if (m > 0) {
register int loop = (m + 8 - 1) >> 3;
switch (m & (8 - 1)) {
case 0: do {
MOVB; case 7: MOVB;
case 6: MOVB; case 5: MOVB;
case 4: MOVB; case 3: MOVB;
case 2: MOVB; case 1: MOVB;
} while (--loop);
}
}
#else
#ifdef HAS_MEMMOVE
dst -= m;
src -= m;
memmove(dst, src, m);
#else
while (m--)
*--dst = *--src;
#endif
#endif
/*
* adjust offset index up
*/
while (i < n - 1) {
ino[i] = ino[i + 2] + zoo;
i++;
}
}
ino[0] -= 2;
return 1;
}
/*
* search for the key in the page.
* return offset index in the range 0 < i < n.
* return 0 if not found.
*/
static int
seepair(char *pag, register int n, register char *key, register int siz)
{
register int i;
register int off = PBLKSIZ;
register short *ino = (short *) pag;
for (i = 1; i < n; i += 2) {
if (siz == off - ino[i] &&
memEQ(key, pag + ino[i], siz))
return i;
off = ino[i + 1];
}
return 0;
}
void
splpage(char *pag, char *New, long int sbit)
{
datum key;
datum val;
register int n;
register int off = PBLKSIZ;
char cur[PBLKSIZ];
register short *ino = (short *) cur;
(void) memcpy(cur, pag, PBLKSIZ);
(void) memset(pag, 0, PBLKSIZ);
(void) memset(New, 0, PBLKSIZ);
n = ino[0];
for (ino++; n > 0; ino += 2) {
key.dptr = cur + ino[0];
key.dsize = off - ino[0];
val.dptr = cur + ino[1];
val.dsize = ino[0] - ino[1];
/*
* select the page pointer (by looking at sbit) and insert
*/
(void) putpair((exhash(key) & sbit) ? New : pag, key, val);
off = ino[1];
n -= 2;
}
debug(("%d split %d/%d\n", ((short *) cur)[0] / 2,
((short *) New)[0] / 2,
((short *) pag)[0] / 2));
}
/*
* check page sanity:
* number of entries should be something
* reasonable, and all offsets in the index should be in order.
* this could be made more rigorous.
*/
int
chkpage(char *pag)
{
register int n;
register int off;
register short *ino = (short *) pag;
if ((n = ino[0]) < 0 || n > PBLKSIZ / sizeof(short))
return 0;
if (n > 0) {
off = PBLKSIZ;
for (ino++; n > 0; ino += 2) {
if (ino[0] > off || ino[1] > off ||
ino[1] > ino[0])
return 0;
off = ino[1];
n -= 2;
}
}
return 1;
}
--- NEW FILE: readme.ms ---
.\" tbl | readme.ms | [tn]roff -ms | ...
.\" note the "C" (courier) and "CB" fonts: you will probably have to
.\" change these.
.\" $Id: readme.ms,v 1.2 2006-12-04 16:59:39 dslinux_cayenne Exp $
.de P1
.br
.nr dT 4
.nf
.ft C
.sp .5
.nr t \\n(dT*\\w'x'u
.ta 1u*\\ntu 2u*\\ntu 3u*\\ntu 4u*\\ntu 5u*\\ntu 6u*\\ntu 7u*\\ntu 8u*\\ntu 9u*\\ntu 10u*\\ntu 11u*\\ntu 12u*\\ntu 13u*\\ntu 14u*\\ntu
..
.de P2
.br
.ft 1
.br
.sp .5
.br
.fi
..
.\" CW uses the typewriter/courier font.
.de CW
\fC\\$1\\fP\\$2
..
.\" Footnote numbering [by Henry Spencer]
.\" <text>\*f for a footnote number..
.\" .FS
.\" \*F <footnote text>
.\" .FE
.\"
.ds f \\u\\s-2\\n+f\\s+2\\d
.nr f 0 1
.ds F \\n+F.
.nr F 0 1
.ND
.LP
.TL
\fIsdbm\fP \(em Substitute DBM
.br
or
.br
Berkeley \fIndbm\fP for Every UN*X\** Made Simple
.AU
Ozan (oz) Yigit
.AI
The Guild of PD Software Toolmakers
Toronto - Canada
.sp
oz at nexus.yorku.ca
.LP
.FS
UN*X is not a trademark of any (dis)organization.
.FE
.sp 2
\fIImplementation is the sincerest form of flattery. \(em L. Peter Deutsch\fP
.SH
A The Clone of the \fIndbm\fP library
.PP
The sources accompanying this notice \(em \fIsdbm\fP \(em constitute
the first public release (Dec. 1990) of a complete clone of
the Berkeley UN*X \fIndbm\fP library. The \fIsdbm\fP library is meant to
clone the proven functionality of \fIndbm\fP as closely as possible,
including a few improvements. It is practical, easy to understand, and
compatible.
The \fIsdbm\fP library is not derived from any licensed, proprietary or
copyrighted software.
.PP
The \fIsdbm\fP implementation is based on a 1978 algorithm
[Lar78] by P.-A. (Paul) Larson known as ``Dynamic Hashing''.
In the course of searching for a substitute for \fIndbm\fP, I
prototyped three different external-hashing algorithms [Lar78, Fag79, Lit80]
and ultimately chose Larson's algorithm as a basis of the \fIsdbm\fP
implementation. The Bell Labs
\fIdbm\fP (and therefore \fIndbm\fP) is based on an algorithm invented by
Ken Thompson, [Tho90, Tor87] and predates Larson's work.
.PP
The \fIsdbm\fR programming interface is totally compatible
with \fIndbm\fP and includes a slight improvement in database initialization.
It is also expected to be binary-compatible under most UN*X versions that
support the \fIndbm\fP library.
.PP
The \fIsdbm\fP implementation shares the shortcomings of the \fIndbm\fP
library, as a side effect of various simplifications to the original Larson
algorithm. It does produce \fIholes\fP in the page file as it writes
pages past the end of file. (Larson's paper include a clever solution to
this problem that is a result of using the hash value directly as a block
address.) On the other hand, extensive tests seem to indicate that \fIsdbm\fP
creates fewer holes in general, and the resulting pagefiles are
smaller. The \fIsdbm\fP implementation is also faster than \fIndbm\fP
in database creation.
Unlike the \fIndbm\fP, the \fIsdbm\fP
.CW store
operation will not ``wander away'' trying to split its
data pages to insert a datum that \fIcannot\fP (due to elaborate worst-case
situations) be inserted. (It will fail after a pre-defined number of attempts.)
.SH
Important Compatibility Warning
.PP
The \fIsdbm\fP and \fIndbm\fP
libraries \fIcannot\fP share databases: one cannot read the (dir/pag)
database created by the other. This is due to the differences
between the \fIndbm\fP and \fIsdbm\fP algorithms\**,
.FS
Torek's discussion [Tor87]
indicates that \fIdbm/ndbm\fP implementations use the hash
value to traverse the radix trie differently than \fIsdbm\fP
and as a result, the page indexes are generated in \fIdifferent\fP order.
For more information, send e-mail to the author.
.FE
and the hash functions
used.
It is easy to convert between the \fIdbm/ndbm\fP databases and \fIsdbm\fP
by ignoring the index completely: see
.CW dbd ,
.CW dbu
etc.
.R
.LP
.SH
Notice of Intellectual Property
.LP
\fIThe entire\fP sdbm \fIlibrary package, as authored by me,\fP Ozan S. Yigit,
\fIis hereby placed in the public domain.\fP As such, the author is not
responsible for the consequences of use of this software, no matter how
awful, even if they arise from defects in it. There is no expressed or
implied warranty for the \fIsdbm\fP library.
.PP
Since the \fIsdbm\fP
library package is in the public domain, this \fIoriginal\fP
release or any additional public-domain releases of the modified original
cannot possibly (by definition) be withheld from you. Also by definition,
You (singular) have all the rights to this code (including the right to
sell without permission, the right to hoard\**
.FS
You cannot really hoard something that is available to the public at
large, but try if it makes you feel any better.
.FE
and the right to do other icky things as
you see fit) but those rights are also granted to everyone else.
.PP
Please note that all previous distributions of this software contained
a copyright (which is now dropped) to protect its
origins and its current public domain status against any possible claims
and/or challenges.
.SH
Acknowledgments
.PP
Many people have been very helpful and supportive. A partial list would
necessarily include Rayan Zacherissen (who contributed the man page,
and also hacked a MMAP version of \fIsdbm\fP),
Arnold Robbins, Chris Lewis,
Bill Davidsen, Henry Spencer, Geoff Collyer, Rich Salz (who got me started
in the first place), Johannes Ruschein
(who did the minix port) and David Tilbrook. I thank you all.
.SH
Distribution Manifest and Notes
.LP
This distribution of \fIsdbm\fP includes (at least) the following:
.P1
CHANGES change log
README this file.
biblio a small bibliography on external hashing
dba.c a crude (n/s)dbm page file analyzer
dbd.c a crude (n/s)dbm page file dumper (for conversion)
dbe.1 man page for dbe.c
dbe.c Janick's database editor
dbm.c a dbm library emulation wrapper for ndbm/sdbm
dbm.h header file for the above
dbu.c a crude db management utility
hash.c hashing function
makefile guess.
pair.c page-level routines (posted earlier)
pair.h header file for the above
readme.ms troff source for the README file
sdbm.3 man page
sdbm.c the real thing
sdbm.h header file for the above
tune.h place for tuning & portability thingies
util.c miscellaneous
.P2
.PP
.CW dbu
is a simple database manipulation program\** that tries to look
.FS
The
.CW dbd ,
.CW dba ,
.CW dbu
utilities are quick hacks and are not fit for production use. They were
developed late one night, just to test out \fIsdbm\fP, and convert some
databases.
.FE
like Bell Labs'
.CW cbt
utility. It is currently incomplete in functionality.
I use
.CW dbu
to test out the routines: it takes (from stdin) tab separated
key/value pairs for commands like
.CW build
or
.CW insert
or takes keys for
commands like
.CW delete
or
.CW look .
.P1
dbu <build|creat|look|insert|cat|delete> dbmfile
.P2
.PP
.CW dba
is a crude analyzer of \fIdbm/sdbm/ndbm\fP
page files. It scans the entire
page file, reporting page level statistics, and totals at the end.
.PP
.CW dbd
is a crude dump program for \fIdbm/ndbm/sdbm\fP
databases. It ignores the
bitmap, and dumps the data pages in sequence. It can be used to create
input for the
.CW dbu
utility.
Note that
.CW dbd
will skip any NULLs in the key and data
fields, thus is unsuitable to convert some peculiar databases that
insist in including the terminating null.
.PP
I have also included a copy of the
.CW dbe
(\fIndbm\fP DataBase Editor) by Janick Bergeron [janick at bnr.ca] for
your pleasure. You may find it more useful than the little
.CW dbu
utility.
.PP
.CW dbm.[ch]
is a \fIdbm\fP library emulation on top of \fIndbm\fP
(and hence suitable for \fIsdbm\fP). Written by Robert Elz.
.PP
The \fIsdbm\fP
library has been around in beta test for quite a long time, and from whatever
little feedback I received (maybe no news is good news), I believe it has been
functioning without any significant problems. I would, of course, appreciate
all fixes and/or improvements. Portability enhancements would especially be
useful.
.SH
Implementation Issues
.PP
Hash functions:
The algorithm behind \fIsdbm\fP implementation needs a good bit-scrambling
hash function to be effective. I ran into a set of constants for a simple
hash function that seem to help \fIsdbm\fP perform better than \fIndbm\fP
for various inputs:
.P1
/*
* polynomial conversion ignoring overflows
* 65599 nice. 65587 even better.
*/
long
dbm_hash(char *str, int len) {
register unsigned long n = 0;
while (len--)
n = n * 65599 + *str++;
return n;
}
.P2
.PP
There may be better hash functions for the purposes of dynamic hashing.
Try your favorite, and check the pagefile. If it contains too many pages
with too many holes, (in relation to this one for example) or if
\fIsdbm\fP
simply stops working (fails after
.CW SPLTMAX
attempts to split) when you feed your
NEWS
.CW history
file to it, you probably do not have a good hashing function.
If you do better (for different types of input), I would like to know
about the function you use.
.PP
Block sizes: It seems (from various tests on a few machines) that a page
file block size
.CW PBLKSIZ
of 1024 is by far the best for performance, but
this also happens to limit the size of a key/value pair. Depending on your
needs, you may wish to increase the page size, and also adjust
.CW PAIRMAX
(the maximum size of a key/value pair allowed: should always be at least
three words smaller than
.CW PBLKSIZ .)
accordingly. The system-wide version of the library
should probably be
configured with 1024 (distribution default), as this appears to be sufficient
for most common uses of \fIsdbm\fP.
.SH
Portability
.PP
This package has been tested in many different UN*Xes even including minix,
and appears to be reasonably portable. This does not mean it will port
easily to non-UN*X systems.
.SH
Notes and Miscellaneous
.PP
The \fIsdbm\fP is not a very complicated package, at least not after you
familiarize yourself with the literature on external hashing. There are
other interesting algorithms in existence that ensure (approximately)
single-read access to a data value associated with any key. These are
directory-less schemes such as \fIlinear hashing\fP [Lit80] (+ Larson
variations), \fIspiral storage\fP [Mar79] or directory schemes such as
\fIextensible hashing\fP [Fag79] by Fagin et al. I do hope these sources
provide a reasonable playground for experimentation with other algorithms.
See the June 1988 issue of ACM Computing Surveys [Enb88] for an
excellent overview of the field.
.PG
.SH
References
.LP
.IP [Lar78] 4m
P.-A. Larson,
``Dynamic Hashing'', \fIBIT\fP, vol. 18, pp. 184-201, 1978.
.IP [Tho90] 4m
Ken Thompson, \fIprivate communication\fP, Nov. 1990
.IP [Lit80] 4m
W. Litwin,
`` Linear Hashing: A new tool for file and table addressing'',
\fIProceedings of the 6th Conference on Very Large Dabatases (Montreal)\fP,
pp. 212-223, Very Large Database Foundation, Saratoga, Calif., 1980.
.IP [Fag79] 4m
R. Fagin, J. Nievergelt, N. Pippinger, and H. R. Strong,
``Extendible Hashing - A Fast Access Method for Dynamic Files'',
\fIACM Trans. Database Syst.\fP, vol. 4, no.3, pp. 315-344, Sept. 1979.
.IP [Wal84] 4m
Rich Wales,
``Discussion of "dbm" data base system'', \fIUSENET newsgroup unix.wizards\fP,
Jan. 1984.
.IP [Tor87] 4m
Chris Torek,
``Re: dbm.a and ndbm.a archives'', \fIUSENET newsgroup comp.unix\fP,
1987.
.IP [Mar79] 4m
G. N. Martin,
``Spiral Storage: Incrementally Augmentable Hash Addressed Storage'',
\fITechnical Report #27\fP, University of Varwick, Coventry, U.K., 1979.
.IP [Enb88] 4m
R. J. Enbody and H. C. Du,
``Dynamic Hashing Schemes'',\fIACM Computing Surveys\fP,
vol. 20, no. 2, pp. 85-113, June 1988.
--- NEW FILE: dba.c ---
/*
* dba dbm analysis/recovery
*/
#include <stdio.h>
#include <sys/file.h>
#include "EXTERN.h"
#include "sdbm.h"
char *progname;
extern void oops();
int
main(int argc, char **argv)
{
int n;
char *p;
char *name;
int pagf;
progname = argv[0];
if (p = argv[1]) {
name = (char *) malloc((n = strlen(p)) + 5);
if (!name)
oops("cannot get memory");
strcpy(name, p);
strcpy(name + n, ".pag");
if ((pagf = open(name, O_RDONLY)) < 0)
oops("cannot open %s.", name);
sdump(pagf);
}
else
oops("usage: %s dbname", progname);
return 0;
}
void
sdump(int pagf)
{
register b;
register n = 0;
register t = 0;
register o = 0;
register e;
char pag[PBLKSIZ];
while ((b = read(pagf, pag, PBLKSIZ)) > 0) {
printf("#%d: ", n);
if (!okpage(pag))
printf("bad\n");
else {
printf("ok. ");
if (!(e = pagestat(pag)))
o++;
else
t += e;
}
n++;
}
if (b == 0)
printf("%d pages (%d holes): %d entries\n", n, o, t);
else
oops("read failed: block %d", n);
}
int
pagestat(char *pag)
{
register n;
register free;
register short *ino = (short *) pag;
if (!(n = ino[0]))
printf("no entries.\n");
else {
free = ino[n] - (n + 1) * sizeof(short);
printf("%3d entries %2d%% used free %d.\n",
n / 2, ((PBLKSIZ - free) * 100) / PBLKSIZ, free);
}
return n / 2;
}
--- NEW FILE: biblio ---
%A R. J. Enbody
%A H. C. Du
%T Dynamic Hashing Schemes
%J ACM Computing Surveys
%V 20
%N 2
%D June 1988
%P 85-113
%K surveys
%A P.-A. Larson
%T Dynamic Hashing
%J BIT
%V 18
%P 184-201
%D 1978
%K dynamic
%A W. Litwin
%T Linear Hashing: A new tool for file and table addressing
%J Proceedings of the 6th Conference on Very Large Dabatases (Montreal)
%I Very Large Database Foundation
%C Saratoga, Calif.
%P 212-223
%D 1980
%K linear
%A R. Fagin
%A J. Nievergelt
%A N. Pippinger
%A H. R. Strong
%T Extendible Hashing - A Fast Access Method for Dynamic Files
%J ACM Trans. Database Syst.
%V 4
%N 3
%D Sept. 1979
%P 315-344
%K extend
%A G. N. Martin
%T Spiral Storage: Incrementally Augmentable Hash Addressed Storage
%J Technical Report #27
%I University of Varwick
%C Coventry, U.K.
%D 1979
%K spiral
%A Chris Torek
%T Re: dbm.a and ndbm.a archives
%B USENET newsgroup comp.unix
%D 1987
%K torek
%A Rich Wales
%T Discusson of "dbm" data base system
%B USENET newsgroup unix.wizards
%D Jan. 1984
%K rich
--- NEW FILE: COMPARE ---
Script started on Thu Sep 28 15:41:06 1989
% uname -a
titan titan 4_0 UMIPS mips
% make all x-dbm
cc -O -DSDBM -DDUFF -DDUPERROR -DSPLITFAIL -c dbm.c
cc -O -DSDBM -DDUFF -DDUPERROR -DSPLITFAIL -c sdbm.c
cc -O -DSDBM -DDUFF -DDUPERROR -DSPLITFAIL -c pair.c
cc -O -DSDBM -DDUFF -DDUPERROR -DSPLITFAIL -c hash.c
ar cr libsdbm.a sdbm.o pair.o hash.o
ranlib libsdbm.a
cc -o dbm dbm.o libsdbm.a
cc -O -DSDBM -DDUFF -DDUPERROR -DSPLITFAIL -c dba.c
cc -o dba dba.o
cc -O -DSDBM -DDUFF -DDUPERROR -DSPLITFAIL -c dbd.c
cc -o dbd dbd.o
cc -O -DSDBM -DDUFF -DDUPERROR -DSPLITFAIL -o x-dbm dbm.o
%
%
% wc history
65110 218344 3204883 history
%
% /bin/time dbm build foo <history
real 5:56.9
user 13.3
sys 26.3
% ls -s
total 14251
5 README 2 dbd.c 1 hash.c 1 pair.h
0 SCRIPT 5 dbd.o 1 hash.o 5 pair.o
1 WISHLIST 62 dbm 3130 history 1 port.h
46 dba 5 dbm.c 11 howtodbm.txt 11 sdbm.c
3 dba.c 8 dbm.o 14 libsdbm.a 2 sdbm.h
6 dba.o 4 foo.dir 1 makefile 8 sdbm.o
46 dbd 10810 foo.pag 6 pair.c 60 x-dbm
% ls -l foo.*
-rw-r--r-- 1 oz 4096 Sep 28 15:48 foo.dir
-rw-r--r-- 1 oz 11069440 Sep 28 15:48 foo.pag
%
% /bin/time x-dbm build bar <history
real 5:59.4
user 24.7
sys 29.1
%
% ls -s
total 27612
5 README 46 dbd 1 hash.c 5 pair.o
1 SCRIPT 2 dbd.c 1 hash.o 1 port.h
1 WISHLIST 5 dbd.o 3130 history 11 sdbm.c
4 bar.dir 62 dbm 11 howtodbm.txt 2 sdbm.h
13356 bar.pag 5 dbm.c 14 libsdbm.a 8 sdbm.o
46 dba 8 dbm.o 1 makefile 60 x-dbm
3 dba.c 4 foo.dir 6 pair.c
6 dba.o 10810 foo.pag 1 pair.h
%
% ls -l bar.*
-rw-r--r-- 1 oz 4096 Sep 28 15:54 bar.dir
-rw-r--r-- 1 oz 13676544 Sep 28 15:54 bar.pag
%
% dba foo | tail
#10801: ok. no entries.
#10802: ok. no entries.
#10803: ok. no entries.
#10804: ok. no entries.
#10805: ok. no entries.
#10806: ok. no entries.
#10807: ok. no entries.
#10808: ok. no entries.
#10809: ok. 11 entries 67% used free 337.
10810 pages (6036 holes): 65073 entries
%
% dba bar | tail
#13347: ok. no entries.
#13348: ok. no entries.
#13349: ok. no entries.
#13350: ok. no entries.
#13351: ok. no entries.
#13352: ok. no entries.
#13353: ok. no entries.
#13354: ok. no entries.
#13355: ok. 7 entries 33% used free 676.
13356 pages (8643 holes): 65073 entries
%
% exit
script done on Thu Sep 28 16:08:45 1989
--- NEW FILE: Makefile.PL ---
use ExtUtils::MakeMaker;
my $define = '-DSDBM -DDUFF';
$define .= ' -DWIN32 -DPERL_STATIC_SYMS' if ($^O eq 'MSWin32');
if ($^O eq 'VMS') { # Old VAXC compiler can't handle Duff's device
require Config;
$define =~ s/\s+-DDUFF// if $Config::Config{'vms_cc_type'} eq 'vaxc';
}
WriteMakefile(
NAME => 'sdbm', # (doesn't matter what the name is here) oh yes it does
# LINKTYPE => 'static',
DEFINE => $define,
INC => '-I$(PERL_INC)', # force PERL_INC dir ahead of system -I's
SKIP => [qw(dynamic dynamic_lib dlsyms)],
OBJECT => '$(O_FILES)',
clean => {'FILES' => 'dbu libsdbm.a dbd dba dbe x-dbu *.dir *.pag'},
H => [qw(tune.h sdbm.h pair.h $(PERL_INC)/config.h)],
C => [qw(sdbm.c pair.c hash.c)]
);
sub MY::constants {
package MY;
my $self = shift;
$self->{INST_STATIC} = 'libsdbm$(LIB_EXT)';
return $self->SUPER::constants();
}
sub MY::top_targets {
my $r = '
all :: static
$(NOECHO) $(NOOP)
config ::
$(NOECHO) $(NOOP)
lint:
lint -abchx $(LIBSRCS)
';
$r .= '
# This is a workaround, the problem is that our old GNU make exports
# variables into the environment so $(MYEXTLIB) is set in here to this
# value which can not be built.
sdbm/libsdbm.a:
$(NOECHO) $(NOOP)
' unless $^O eq 'VMS';
return $r;
}
--- NEW FILE: sdbm.h ---
/*
* sdbm - ndbm work-alike hashed database library
* based on Per-Ake Larson's Dynamic Hashing algorithms. BIT 18 (1978).
* author: oz at nexus.yorku.ca
* status: public domain.
*/
#define DBLKSIZ 4096
#define PBLKSIZ 1024
#define PAIRMAX 1008 /* arbitrary on PBLKSIZ-N */
#define SPLTMAX 10 /* maximum allowed splits */
/* for a single insertion */
#ifdef VMS
#define DIRFEXT ".sdbm_dir"
#else
#define DIRFEXT ".dir"
#endif
#define PAGFEXT ".pag"
typedef struct {
int dirf; /* directory file descriptor */
int pagf; /* page file descriptor */
int flags; /* status/error flags, see below */
long maxbno; /* size of dirfile in bits */
long curbit; /* current bit number */
long hmask; /* current hash mask */
long blkptr; /* current block for nextkey */
int keyptr; /* current key for nextkey */
long blkno; /* current page to read/write */
long pagbno; /* current page in pagbuf */
char pagbuf[PBLKSIZ]; /* page file block buffer */
long dirbno; /* current block in dirbuf */
char dirbuf[DBLKSIZ]; /* directory file block buffer */
} DBM;
#define DBM_RDONLY 0x1 /* data base open read-only */
#define DBM_IOERR 0x2 /* data base I/O error */
/*
* utility macros
*/
#define sdbm_rdonly(db) ((db)->flags & DBM_RDONLY)
#define sdbm_error(db) ((db)->flags & DBM_IOERR)
#define sdbm_clearerr(db) ((db)->flags &= ~DBM_IOERR) /* ouch */
#define sdbm_dirfno(db) ((db)->dirf)
#define sdbm_pagfno(db) ((db)->pagf)
typedef struct {
char *dptr;
int dsize;
} datum;
EXTCONST datum nullitem
#ifdef DOINIT
= {0, 0}
#endif
;
#if defined(__STDC__) || defined(__cplusplus) || defined(CAN_PROTOTYPE)
#define proto(p) p
#else
#define proto(p) ()
#endif
/*
* flags to sdbm_store
*/
#define DBM_INSERT 0
#define DBM_REPLACE 1
/*
* ndbm interface
*/
extern DBM *sdbm_open proto((char *, int, int));
extern void sdbm_close proto((DBM *));
extern datum sdbm_fetch proto((DBM *, datum));
extern int sdbm_delete proto((DBM *, datum));
extern int sdbm_store proto((DBM *, datum, datum, int));
extern datum sdbm_firstkey proto((DBM *));
extern datum sdbm_nextkey proto((DBM *));
extern int sdbm_exists proto((DBM *, datum));
/*
* other
*/
extern DBM *sdbm_prep proto((char *, char *, int, int));
extern long sdbm_hash proto((char *, int));
#ifndef SDBM_ONLY
#define dbm_open sdbm_open
#define dbm_close sdbm_close
#define dbm_fetch sdbm_fetch
#define dbm_store sdbm_store
#define dbm_delete sdbm_delete
#define dbm_firstkey sdbm_firstkey
#define dbm_nextkey sdbm_nextkey
#define dbm_error sdbm_error
#define dbm_clearerr sdbm_clearerr
#endif
/* Most of the following is stolen from perl.h. We don't include
perl.h here because we just want the portability parts of perl.h,
not everything else.
*/
#ifndef H_PERL /* Include guard */
#include "embed.h" /* Follow all the global renamings. */
/*
* The following contortions are brought to you on behalf of all the
* standards, semi-standards, de facto standards, not-so-de-facto standards
* of the world, as well as all the other botches anyone ever thought of.
* The basic theory is that if we work hard enough here, the rest of the
* code can be a lot prettier. Well, so much for theory. Sorry, Henry...
*/
#include <errno.h>
#ifdef HAS_SOCKET
# ifdef I_NET_ERRNO
# include <net/errno.h>
# endif
#endif
#if defined(__STDC__) || defined(_AIX) || defined(__stdc__) || defined(__cplusplus)
# define STANDARD_C 1
#endif
#include <stdio.h>
#include <ctype.h>
#include <setjmp.h>
#if defined(I_UNISTD)
#include <unistd.h>
#endif
#ifdef VMS
# include <file.h>
# include <unixio.h>
#endif
#ifdef I_SYS_PARAM
# if !defined(MSDOS) && !defined(WIN32) && !defined(VMS)
# ifdef PARAM_NEEDS_TYPES
# include <sys/types.h>
# endif
# include <sys/param.h>
# endif
#endif
#ifndef _TYPES_ /* If types.h defines this it's easy. */
# ifndef major /* Does everyone's types.h define this? */
# include <sys/types.h>
# endif
#endif
#include <sys/stat.h>
#ifndef SEEK_SET
# ifdef L_SET
# define SEEK_SET L_SET
# else
# define SEEK_SET 0 /* Wild guess. */
# endif
#endif
/* Use all the "standard" definitions? */
#if defined(STANDARD_C) && defined(I_STDLIB)
# include <stdlib.h>
#endif /* STANDARD_C */
#define MEM_SIZE Size_t
/* This comes after <stdlib.h> so we don't try to change the standard
* library prototypes; we'll use our own instead. */
#if defined(MYMALLOC) && !defined(PERL_POLLUTE_MALLOC)
# define malloc Perl_malloc
# define calloc Perl_calloc
# define realloc Perl_realloc
# define free Perl_mfree
Malloc_t Perl_malloc proto((MEM_SIZE nbytes));
Malloc_t Perl_calloc proto((MEM_SIZE elements, MEM_SIZE size));
Malloc_t Perl_realloc proto((Malloc_t where, MEM_SIZE nbytes));
Free_t Perl_mfree proto((Malloc_t where));
#endif /* MYMALLOC */
#ifdef I_STRING
# ifndef __ultrix__
# include <string.h>
# endif
#else
# include <strings.h>
#endif
#ifdef I_MEMORY
#include <memory.h>
#endif
#ifdef __cplusplus
#define HAS_MEMCPY
#endif
#ifdef HAS_MEMCPY
# if !defined(STANDARD_C) && !defined(I_STRING) && !defined(I_MEMORY)
# ifndef memcpy
extern char * memcpy proto((char*, char*, int));
# endif
# endif
#else
# ifndef memcpy
# ifdef HAS_BCOPY
# define memcpy(d,s,l) bcopy(s,d,l)
# else
# define memcpy(d,s,l) my_bcopy(s,d,l)
# endif
# endif
#endif /* HAS_MEMCPY */
#ifdef HAS_MEMSET
# if !defined(STANDARD_C) && !defined(I_STRING) && !defined(I_MEMORY)
# ifndef memset
extern char *memset proto((char*, int, int));
# endif
# endif
# define memzero(d,l) memset(d,0,l)
#else
# ifndef memzero
# ifdef HAS_BZERO
# define memzero(d,l) bzero(d,l)
# else
# define memzero(d,l) my_bzero(d,l)
# endif
# endif
#endif /* HAS_MEMSET */
#if defined(mips) && defined(ultrix) && !defined(__STDC__)
# undef HAS_MEMCMP
#endif
#if defined(HAS_MEMCMP) && defined(HAS_SANE_MEMCMP)
# if !defined(STANDARD_C) && !defined(I_STRING) && !defined(I_MEMORY)
# ifndef memcmp
extern int memcmp proto((char*, char*, int));
# endif
# endif
# ifdef BUGGY_MSC
# pragma function(memcmp)
# endif
#else
# ifndef memcmp
/* maybe we should have included the full embedding header... */
# ifdef NO_EMBED
# define memcmp my_memcmp
# else
# define memcmp Perl_my_memcmp
# endif
#ifndef __cplusplus
extern int memcmp proto((char*, char*, int));
#endif
# endif
#endif /* HAS_MEMCMP */
#ifndef HAS_BCMP
# ifndef bcmp
# define bcmp(s1,s2,l) memcmp(s1,s2,l)
# endif
#endif /* !HAS_BCMP */
#ifdef HAS_MEMCMP
# define memNE(s1,s2,l) (memcmp(s1,s2,l))
# define memEQ(s1,s2,l) (!memcmp(s1,s2,l))
#else
# define memNE(s1,s2,l) (bcmp(s1,s2,l))
# define memEQ(s1,s2,l) (!bcmp(s1,s2,l))
#endif
#ifdef I_NETINET_IN
# ifdef VMS
# include <in.h>
# else
# include <netinet/in.h>
# endif
#endif
#endif /* Include guard */
--- NEW FILE: dbd.c ---
/*
* dbd - dump a dbm data file
*/
#include <stdio.h>
#include <sys/file.h>
#include "EXTERN.h"
#include "sdbm.h"
char *progname;
extern void oops();
#define empty(page) (((short *) page)[0] == 0)
int
main(int argc, char **argv)
{
int n;
char *p;
char *name;
int pagf;
progname = argv[0];
if (p = argv[1]) {
name = (char *) malloc((n = strlen(p)) + 5);
if (!name)
oops("cannot get memory");
strcpy(name, p);
strcpy(name + n, ".pag");
if ((pagf = open(name, O_RDONLY)) < 0)
oops("cannot open %s.", name);
sdump(pagf);
}
else
oops("usage: %s dbname", progname);
return 0;
}
void
sdump(int pagf)
{
register r;
register n = 0;
register o = 0;
char pag[PBLKSIZ];
while ((r = read(pagf, pag, PBLKSIZ)) > 0) {
if (!okpage(pag))
fprintf(stderr, "%d: bad page.\n", n);
else if (empty(pag))
o++;
else
dispage(pag);
n++;
}
if (r == 0)
fprintf(stderr, "%d pages (%d holes).\n", n, o);
else
oops("read failed: block %d", n);
}
#ifdef OLD
int
dispage(char *pag)
{
register i, n;
register off;
register short *ino = (short *) pag;
off = PBLKSIZ;
for (i = 1; i < ino[0]; i += 2) {
printf("\t[%d]: ", ino[i]);
for (n = ino[i]; n < off; n++)
putchar(pag[n]);
putchar(' ');
off = ino[i];
printf("[%d]: ", ino[i + 1]);
for (n = ino[i + 1]; n < off; n++)
putchar(pag[n]);
off = ino[i + 1];
putchar('\n');
}
}
#else
void
dispage(char *pag)
{
register i, n;
register off;
register short *ino = (short *) pag;
off = PBLKSIZ;
for (i = 1; i < ino[0]; i += 2) {
for (n = ino[i]; n < off; n++)
if (pag[n] != 0)
putchar(pag[n]);
putchar('\t');
off = ino[i];
for (n = ino[i + 1]; n < off; n++)
if (pag[n] != 0)
putchar(pag[n]);
putchar('\n');
off = ino[i + 1];
}
}
#endif
--- NEW FILE: sdbm.c ---
/*
* sdbm - ndbm work-alike hashed database library
* based on Per-Aake Larson's Dynamic Hashing algorithms. BIT 18 (1978).
* author: oz at nexus.yorku.ca
* status: public domain.
*
* core routines
*/
#include "INTERN.h"
#include "config.h"
#ifdef WIN32
#include "io.h"
#endif
#include "sdbm.h"
#include "tune.h"
#include "pair.h"
#ifdef I_FCNTL
# include <fcntl.h>
#endif
#ifdef I_SYS_FILE
# include <sys/file.h>
#endif
#ifdef I_STRING
# ifndef __ultrix__
# include <string.h>
# endif
#else
# include <strings.h>
#endif
/*
* externals
*/
#include <errno.h> /* See notes in perl.h about avoiding
extern int errno; */
extern Malloc_t malloc proto((MEM_SIZE));
extern Free_t free proto((Malloc_t));
/*
* forward
*/
static int getdbit proto((DBM *, long));
static int setdbit proto((DBM *, long));
static int getpage proto((DBM *, long));
static datum getnext proto((DBM *));
static int makroom proto((DBM *, long, int));
/*
* useful macros
*/
#define bad(x) ((x).dptr == NULL || (x).dsize < 0)
#define exhash(item) sdbm_hash((item).dptr, (item).dsize)
#define ioerr(db) ((db)->flags |= DBM_IOERR)
#define OFF_PAG(off) (long) (off) * PBLKSIZ
#define OFF_DIR(off) (long) (off) * DBLKSIZ
static long masks[] = {
000000000000, 000000000001, 000000000003, 000000000007,
000000000017, 000000000037, 000000000077, 000000000177,
000000000377, 000000000777, 000000001777, 000000003777,
000000007777, 000000017777, 000000037777, 000000077777,
000000177777, 000000377777, 000000777777, 000001777777,
000003777777, 000007777777, 000017777777, 000037777777,
000077777777, 000177777777, 000377777777, 000777777777,
001777777777, 003777777777, 007777777777, 017777777777
};
DBM *
sdbm_open(register char *file, register int flags, register int mode)
{
register DBM *db;
register char *dirname;
register char *pagname;
register int n;
if (file == NULL || !*file)
return errno = EINVAL, (DBM *) NULL;
/*
* need space for two seperate filenames
*/
n = strlen(file) * 2 + strlen(DIRFEXT) + strlen(PAGFEXT) + 2;
if ((dirname = (char *) malloc((unsigned) n)) == NULL)
return errno = ENOMEM, (DBM *) NULL;
/*
* build the file names
*/
dirname = strcat(strcpy(dirname, file), DIRFEXT);
pagname = strcpy(dirname + strlen(dirname) + 1, file);
pagname = strcat(pagname, PAGFEXT);
db = sdbm_prep(dirname, pagname, flags, mode);
free((char *) dirname);
return db;
}
DBM *
sdbm_prep(char *dirname, char *pagname, int flags, int mode)
{
register DBM *db;
struct stat dstat;
if ((db = (DBM *) malloc(sizeof(DBM))) == NULL)
return errno = ENOMEM, (DBM *) NULL;
db->flags = 0;
db->hmask = 0;
db->blkptr = 0;
db->keyptr = 0;
/*
* adjust user flags so that WRONLY becomes RDWR,
* as required by this package. Also set our internal
* flag for RDONLY if needed.
*/
if (flags & O_WRONLY)
flags = (flags & ~O_WRONLY) | O_RDWR;
else if ((flags & 03) == O_RDONLY)
db->flags = DBM_RDONLY;
/*
* open the files in sequence, and stat the dirfile.
* If we fail anywhere, undo everything, return NULL.
*/
#if defined(OS2) || defined(MSDOS) || defined(WIN32) || defined(__CYGWIN__)
flags |= O_BINARY;
# endif
if ((db->pagf = open(pagname, flags, mode)) > -1) {
if ((db->dirf = open(dirname, flags, mode)) > -1) {
/*
* need the dirfile size to establish max bit number.
*/
if (fstat(db->dirf, &dstat) == 0) {
/*
* zero size: either a fresh database, or one with a single,
* unsplit data page: dirpage is all zeros.
*/
db->dirbno = (!dstat.st_size) ? 0 : -1;
db->pagbno = -1;
db->maxbno = dstat.st_size * BYTESIZ;
(void) memset(db->pagbuf, 0, PBLKSIZ);
(void) memset(db->dirbuf, 0, DBLKSIZ);
/*
* success
*/
return db;
}
(void) close(db->dirf);
}
(void) close(db->pagf);
}
free((char *) db);
return (DBM *) NULL;
}
void
sdbm_close(register DBM *db)
{
if (db == NULL)
errno = EINVAL;
else {
(void) close(db->dirf);
(void) close(db->pagf);
free((char *) db);
}
}
datum
sdbm_fetch(register DBM *db, datum key)
{
if (db == NULL || bad(key))
return errno = EINVAL, nullitem;
if (getpage(db, exhash(key)))
return getpair(db->pagbuf, key);
return ioerr(db), nullitem;
}
int
sdbm_exists(register DBM *db, datum key)
{
if (db == NULL || bad(key))
return errno = EINVAL, -1;
if (getpage(db, exhash(key)))
return exipair(db->pagbuf, key);
return ioerr(db), -1;
}
int
sdbm_delete(register DBM *db, datum key)
{
if (db == NULL || bad(key))
return errno = EINVAL, -1;
if (sdbm_rdonly(db))
return errno = EPERM, -1;
if (getpage(db, exhash(key))) {
if (!delpair(db->pagbuf, key))
return -1;
/*
* update the page file
*/
if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0
|| write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
return ioerr(db), -1;
return 0;
}
return ioerr(db), -1;
}
int
sdbm_store(register DBM *db, datum key, datum val, int flags)
{
int need;
register long hash;
if (db == NULL || bad(key))
return errno = EINVAL, -1;
if (sdbm_rdonly(db))
return errno = EPERM, -1;
need = key.dsize + val.dsize;
/*
* is the pair too big (or too small) for this database ??
*/
if (need < 0 || need > PAIRMAX)
return errno = EINVAL, -1;
if (getpage(db, (hash = exhash(key)))) {
/*
* if we need to replace, delete the key/data pair
* first. If it is not there, ignore.
*/
if (flags == DBM_REPLACE)
(void) delpair(db->pagbuf, key);
#ifdef SEEDUPS
else if (duppair(db->pagbuf, key))
return 1;
#endif
/*
* if we do not have enough room, we have to split.
*/
if (!fitpair(db->pagbuf, need))
if (!makroom(db, hash, need))
return ioerr(db), -1;
/*
* we have enough room or split is successful. insert the key,
* and update the page file.
*/
(void) putpair(db->pagbuf, key, val);
if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0
|| write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
return ioerr(db), -1;
/*
* success
*/
return 0;
}
return ioerr(db), -1;
}
/*
* makroom - make room by splitting the overfull page
* this routine will attempt to make room for SPLTMAX times before
* giving up.
*/
static int
makroom(register DBM *db, long int hash, int need)
{
long newp;
char twin[PBLKSIZ];
#if defined(DOSISH) || defined(WIN32)
char zer[PBLKSIZ];
long oldtail;
#endif
char *pag = db->pagbuf;
char *New = twin;
register int smax = SPLTMAX;
do {
/*
* split the current page
*/
(void) splpage(pag, New, db->hmask + 1);
/*
* address of the new page
*/
newp = (hash & db->hmask) | (db->hmask + 1);
/*
* write delay, read avoidence/cache shuffle:
* select the page for incoming pair: if key is to go to the new page,
* write out the previous one, and copy the new one over, thus making
* it the current page. If not, simply write the new page, and we are
* still looking at the page of interest. current page is not updated
* here, as sdbm_store will do so, after it inserts the incoming pair.
*/
#if defined(DOSISH) || defined(WIN32)
/*
* Fill hole with 0 if made it.
* (hole is NOT read as 0)
*/
oldtail = lseek(db->pagf, 0L, SEEK_END);
memset(zer, 0, PBLKSIZ);
while (OFF_PAG(newp) > oldtail) {
if (lseek(db->pagf, 0L, SEEK_END) < 0 ||
write(db->pagf, zer, PBLKSIZ) < 0) {
return 0;
}
oldtail += PBLKSIZ;
}
#endif
if (hash & (db->hmask + 1)) {
if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0
|| write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
return 0;
db->pagbno = newp;
(void) memcpy(pag, New, PBLKSIZ);
}
else if (lseek(db->pagf, OFF_PAG(newp), SEEK_SET) < 0
|| write(db->pagf, New, PBLKSIZ) < 0)
return 0;
if (!setdbit(db, db->curbit))
return 0;
/*
* see if we have enough room now
*/
if (fitpair(pag, need))
return 1;
/*
* try again... update curbit and hmask as getpage would have
* done. because of our update of the current page, we do not
* need to read in anything. BUT we have to write the current
* [deferred] page out, as the window of failure is too great.
*/
db->curbit = 2 * db->curbit +
((hash & (db->hmask + 1)) ? 2 : 1);
db->hmask |= db->hmask + 1;
if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0
|| write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
return 0;
} while (--smax);
/*
* if we are here, this is real bad news. After SPLTMAX splits,
* we still cannot fit the key. say goodnight.
*/
#ifdef BADMESS
(void) write(2, "sdbm: cannot insert after SPLTMAX attempts.\n", 44);
#endif
return 0;
}
/*
* the following two routines will break if
* deletions aren't taken into account. (ndbm bug)
*/
datum
sdbm_firstkey(register DBM *db)
{
if (db == NULL)
return errno = EINVAL, nullitem;
/*
* start at page 0
*/
if (lseek(db->pagf, OFF_PAG(0), SEEK_SET) < 0
|| read(db->pagf, db->pagbuf, PBLKSIZ) < 0)
return ioerr(db), nullitem;
db->pagbno = 0;
db->blkptr = 0;
db->keyptr = 0;
return getnext(db);
}
datum
sdbm_nextkey(register DBM *db)
{
if (db == NULL)
return errno = EINVAL, nullitem;
return getnext(db);
}
/*
* all important binary trie traversal
*/
static int
getpage(register DBM *db, register long int hash)
{
register int hbit;
register long dbit;
register long pagb;
dbit = 0;
hbit = 0;
while (dbit < db->maxbno && getdbit(db, dbit))
dbit = 2 * dbit + ((hash & (1 << hbit++)) ? 2 : 1);
debug(("dbit: %d...", dbit));
db->curbit = dbit;
db->hmask = masks[hbit];
pagb = hash & db->hmask;
/*
* see if the block we need is already in memory.
* note: this lookaside cache has about 10% hit rate.
*/
if (pagb != db->pagbno) {
/*
* note: here, we assume a "hole" is read as 0s.
* if not, must zero pagbuf first.
*/
if (lseek(db->pagf, OFF_PAG(pagb), SEEK_SET) < 0
|| read(db->pagf, db->pagbuf, PBLKSIZ) < 0)
return 0;
if (!chkpage(db->pagbuf))
return 0;
db->pagbno = pagb;
debug(("pag read: %d\n", pagb));
}
return 1;
}
static int
getdbit(register DBM *db, register long int dbit)
{
register long c;
register long dirb;
c = dbit / BYTESIZ;
dirb = c / DBLKSIZ;
if (dirb != db->dirbno) {
int got;
if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0
|| (got=read(db->dirf, db->dirbuf, DBLKSIZ)) < 0)
return 0;
if (got==0)
memset(db->dirbuf,0,DBLKSIZ);
db->dirbno = dirb;
debug(("dir read: %d\n", dirb));
}
return db->dirbuf[c % DBLKSIZ] & (1 << dbit % BYTESIZ);
}
static int
setdbit(register DBM *db, register long int dbit)
{
register long c;
register long dirb;
c = dbit / BYTESIZ;
dirb = c / DBLKSIZ;
if (dirb != db->dirbno) {
int got;
if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0
|| (got=read(db->dirf, db->dirbuf, DBLKSIZ)) < 0)
return 0;
if (got==0)
memset(db->dirbuf,0,DBLKSIZ);
db->dirbno = dirb;
debug(("dir read: %d\n", dirb));
}
db->dirbuf[c % DBLKSIZ] |= (1 << dbit % BYTESIZ);
#if 0
if (dbit >= db->maxbno)
db->maxbno += DBLKSIZ * BYTESIZ;
#else
if (OFF_DIR((dirb+1))*BYTESIZ > db->maxbno)
db->maxbno=OFF_DIR((dirb+1))*BYTESIZ;
#endif
if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0
|| write(db->dirf, db->dirbuf, DBLKSIZ) < 0)
return 0;
return 1;
}
/*
* getnext - get the next key in the page, and if done with
* the page, try the next page in sequence
*/
static datum
getnext(register DBM *db)
{
datum key;
for (;;) {
db->keyptr++;
key = getnkey(db->pagbuf, db->keyptr);
if (key.dptr != NULL)
return key;
/*
* we either run out, or there is nothing on this page..
* try the next one... If we lost our position on the
* file, we will have to seek.
*/
db->keyptr = 0;
if (db->pagbno != db->blkptr++)
if (lseek(db->pagf, OFF_PAG(db->blkptr), SEEK_SET) < 0)
break;
db->pagbno = db->blkptr;
if (read(db->pagf, db->pagbuf, PBLKSIZ) <= 0)
break;
if (!chkpage(db->pagbuf))
break;
}
return ioerr(db), nullitem;
}
--- NEW FILE: grind ---
#!/bin/sh
rm -f /tmp/*.dir /tmp/*.pag
awk -e '{
printf "%s\t", $0
for (i = 0; i < 40; i++)
printf "%s.", $0
printf "\n"
}' < /usr/dict/words | $1 build /tmp/$2
More information about the dslinux-commit
mailing list