dslinux/user/perl/ext/SDBM_File/sdbm CHANGES COMPARE Makefile.PL README README.too biblio dba.c dbd.c dbe.1 dbe.c dbu.c grind hash.c linux.patches makefile.sdbm pair.c pair.h readme.ms sdbm.3 sdbm.c sdbm.h tune.h util.c

Mon Dec 4 17:59:41 CET 2006

Update of /cvsroot/dslinux/dslinux/user/perl/ext/SDBM_File/sdbm
In directory antilope:/tmp/cvs-serv17422/ext/SDBM_File/sdbm

Added Files:
	CHANGES COMPARE Makefile.PL README README.too biblio dba.c 
	dbd.c dbe.1 dbe.c dbu.c grind hash.c linux.patches 
	makefile.sdbm pair.c pair.h readme.ms sdbm.3 sdbm.c sdbm.h 
	tune.h util.c 
Log Message:
Adding fresh perl source to HEAD to branch from

--- NEW FILE: README.too ---
This version of sdbm merely has all the dbm_* names translated to sdbm_*
so that we can link ndbm and sdbm into the same executable.  (It also has
the bad() macro redefined to allow a zero-length key.)

Fri Apr 15 10:15:30 EDT 1994.

Additional portability/configuration changes for libsdbm by Andy Dougherty
doughera at lafayette.edu.

Mon Mar 22 03:24:47 PST 1999.

sdbm_exists added to the library by Russ Allbery <rra at stanford.edu>.

--- NEW FILE: tune.h ---
/*
 * sdbm - ndbm work-alike hashed database library
 * tuning and portability constructs [not nearly enough]
 * author: oz at nexus.yorku.ca
 */

#define BYTESIZ		8

/*
 * important tuning parms (hah)
 */

#define SEEDUPS			/* always detect duplicates */
#define BADMESS			/* generate a message for worst case:
				   cannot make room after SPLTMAX splits */
/*
 * misc
 */
#ifdef DEBUG
#define debug(x)	printf x
#else
#define debug(x)
#endif

--- NEW FILE: util.c ---
#include <stdio.h>
#ifdef SDBM
#include "sdbm.h"
#else
#include "ndbm.h"
#endif

void
oops(register char *s1, register char *s2)
{
	extern int errno, sys_nerr;
	extern char *sys_errlist[];
	extern char *progname;

	if (progname)
		fprintf(stderr, "%s: ", progname);
	fprintf(stderr, s1, s2);
	if (errno > 0 && errno < sys_nerr)
		fprintf(stderr, " (%s)", sys_errlist[errno]);
	fprintf(stderr, "\n");
	exit(1);
}

int
okpage(char *pag)
{
	register unsigned n;
	register off;
	register short *ino = (short *) pag;

	if ((n = ino[0]) > PBLKSIZ / sizeof(short))
		return 0;

	if (!n)
		return 1;

	off = PBLKSIZ;
	for (ino++; n; ino += 2) {
		if (ino[0] > off || ino[1] > off ||
		    ino[1] > ino[0])
			return 0;
		off = ino[1];
		n -= 2;
	}

	return 1;
}

--- NEW FILE: makefile.sdbm ---
#
# makefile for public domain ndbm-clone: sdbm
# DUFF: use duff's device (loop unroll) in parts of the code
#
CFLAGS = -O -DSDBM -DDUFF -DBSD42 -pic
#LDFLAGS = -p

OBJS = sdbm.o pair.o hash.o
SRCS = sdbm.c pair.c hash.c dbu.c dba.c dbd.c util.c
HDRS = tune.h sdbm.h pair.h
MISC = README CHANGES COMPARE sdbm.3 dbe.c dbe.1 dbm.c dbm.h biblio \
       readme.ms readme.ps

all: dbu dba dbd dbe

dbu: dbu.o sdbm util.o
	cc $(LDFLAGS) -o dbu dbu.o util.o libsdbm.a

dba: dba.o util.o
	cc $(LDFLAGS) -o dba dba.o util.o
dbd: dbd.o util.o
	cc $(LDFLAGS) -o dbd dbd.o util.o
dbe: dbe.o sdbm
	cc $(LDFLAGS) -o dbe dbe.o libsdbm.a

sdbm: $(OBJS)
	ar cr libsdbm.a $(OBJS)
	ranlib libsdbm.a
###	cp libsdbm.a /usr/lib/libsdbm.a

dba.o: sdbm.h
dbu.o: sdbm.h
util.o:sdbm.h

$(OBJS): sdbm.h tune.h pair.h

#
# dbu using berkelezoid ndbm routines [if you have them] for testing
#
#x-dbu: dbu.o util.o
#	cc $(CFLAGS) -o x-dbu dbu.o util.o
lint:
	lint -abchx $(SRCS)

clean:
	rm -f *.o mon.out core

purge: 	clean
	rm -f dbu libsdbm.a dbd dba dbe x-dbu *.dir *.pag

shar:
	shar $(MISC) makefile $(SRCS) $(HDRS) >SDBM.SHAR

readme:
	nroff -ms readme.ms | col -b >README

--- NEW FILE: dbe.c ---
#include <stdio.h>
#ifndef VMS
#include <sys/file.h>
#include <ndbm.h>
#else
#include "file.h"
#include "ndbm.h"
#endif
#include <ctype.h>

/***************************************************************************\
**                                                                         **
**   Function name: getopt()                                               **
**   Author:        Henry Spencer, UofT                                    **
**   Coding date:   84/04/28                                               **
**                                                                         **
**   Description:                                                          **
**                                                                         **
**   Parses argv[] for arguments.                                          **
**   Works with Whitesmith's C compiler.                                   **
**                                                                         **
**   Inputs   - The number of arguments                                    **
**            - The base address of the array of arguments                 **
**            - A string listing the valid options (':' indicates an       **
**              argument to the preceding option is required, a ';'        **
**              indicates an argument to the preceding option is optional) **
**                                                                         **
**   Outputs  - Returns the next option character,                         **
**              '?' for non '-' arguments                                  **
**              or ':' when there is no more arguments.                    **
**                                                                         **
**   Side Effects + The argument to an option is pointed to by 'optarg'    **
**                                                                         **
*****************************************************************************
**                                                                         **
**   REVISION HISTORY:                                                     **
**                                                                         **
**     DATE           NAME                        DESCRIPTION              **
**   YY/MM/DD  ------------------   ------------------------------------   **
**   88/10/20  Janick Bergeron      Returns '?' on unamed arguments        **
**                                  returns '!' on unknown options         **
**                                  and 'EOF' only when exhausted.         **
**   88/11/18  Janick Bergeron      Return ':' when no more arguments      **
**   89/08/11  Janick Bergeron      Optional optarg when ';' in optstring  **
**                                                                         **
\***************************************************************************/

char *optarg;			       /* Global argument pointer. */

#ifdef VMS
#define index  strchr
#endif

char
getopt(int argc, char **argv, char *optstring)
{
	register int c;
	register char *place;
	extern char *index();
	static int optind = 0;
	static char *scan = NULL;

	optarg = NULL;

	if (scan == NULL || *scan == '\0') {

		if (optind == 0)
			optind++;
		if (optind >= argc)
			return ':';

		optarg = place = argv[optind++];
		if (place[0] != '-' || place[1] == '\0')
			return '?';
		if (place[1] == '-' && place[2] == '\0')
			return '?';
		scan = place + 1;
	}

	c = *scan++;
	place = index(optstring, c);
	if (place == NULL || c == ':' || c == ';') {

		(void) fprintf(stderr, "%s: unknown option %c\n", argv[0], c);
		scan = NULL;
		return '!';
	}
	if (*++place == ':') {

		if (*scan != '\0') {

			optarg = scan;
			scan = NULL;

		}
		else {

			if (optind >= argc) {

				(void) fprintf(stderr, "%s: %c requires an argument\n",
					       argv[0], c);
				return '!';
			}
			optarg = argv[optind];
			optind++;
		}
	}
	else if (*place == ';') {

		if (*scan != '\0') {

			optarg = scan;
			scan = NULL;

		}
		else {

			if (optind >= argc || *argv[optind] == '-')
				optarg = NULL;
			else {
				optarg = argv[optind];
				optind++;
			}
		}
	}
	return c;
}

void
print_datum(datum db)
{
	int i;

	putchar('"');
	for (i = 0; i < db.dsize; i++) {
		if (isprint((unsigned char)db.dptr[i]))
			putchar(db.dptr[i]);
		else {
			putchar('\\');
			putchar('0' + ((db.dptr[i] >> 6) & 0x07));
			putchar('0' + ((db.dptr[i] >> 3) & 0x07));
			putchar('0' + (db.dptr[i] & 0x07));
		}
	}
	putchar('"');
}

datum
read_datum(char *s)
{
	datum db;
	char *p;
	int i;

	db.dsize = 0;
	db.dptr = (char *) malloc(strlen(s) * sizeof(char));
	if (!db.dptr)
	    oops("cannot get memory");

	for (p = db.dptr; *s != '\0'; p++, db.dsize++, s++) {
		if (*s == '\\') {
			if (*++s == 'n')
				*p = '\n';
			else if (*s == 'r')
				*p = '\r';
			else if (*s == 'f')
				*p = '\f';
			else if (*s == 't')
				*p = '\t';
			else if (isdigit((unsigned char)*s)
				 && isdigit((unsigned char)*(s + 1))
				 && isdigit((unsigned char)*(s + 2)))
			{
				i = (*s++ - '0') << 6;
				i |= (*s++ - '0') << 3;
				i |= *s - '0';
				*p = i;
			}
			else if (*s == '0')
				*p = '\0';
			else
				*p = *s;
		}
		else
			*p = *s;
	}

	return db;
}

char *
key2s(datum db)
{
	char *buf;
	char *p1, *p2;

	buf = (char *) malloc((db.dsize + 1) * sizeof(char));
	if (!buf)
	    oops("cannot get memory");
	for (p1 = buf, p2 = db.dptr; *p2 != '\0'; *p1++ = *p2++);
	*p1 = '\0';
	return buf;
}

int
main(int argc, char **argv)
{
	typedef enum {
		YOW, FETCH, STORE, DELETE, SCAN, REGEXP
	} commands;
	char opt;
	int flags;
	int giveusage = 0;
	int verbose = 0;
	commands what = YOW;
	char *comarg[3];
	int st_flag = DBM_INSERT;
	int argn;
	DBM *db;
	datum key;
	datum content;

	flags = O_RDWR;
	argn = 0;

	while ((opt = getopt(argc, argv, "acdfFm:rstvx")) != ':') {
		switch (opt) {
		case 'a':
			what = SCAN;
			break;
		case 'c':
			flags |= O_CREAT;
			break;
		case 'd':
			what = DELETE;
			break;
		case 'f':
			what = FETCH;
			break;
		case 'F':
			what = REGEXP;
			break;
		case 'm':
			flags &= ~(000007);
			if (strcmp(optarg, "r") == 0)
				flags |= O_RDONLY;
			else if (strcmp(optarg, "w") == 0)
				flags |= O_WRONLY;
			else if (strcmp(optarg, "rw") == 0)
				flags |= O_RDWR;
			else {
				fprintf(stderr, "Invalid mode: \"%s\"\n", optarg);
				giveusage = 1;
			}
			break;
		case 'r':
			st_flag = DBM_REPLACE;
			break;
		case 's':
			what = STORE;
			break;
		case 't':
			flags |= O_TRUNC;
			break;
		case 'v':
			verbose = 1;
			break;
		case 'x':
			flags |= O_EXCL;
			break;
		case '!':
			giveusage = 1;
			break;
		case '?':
			if (argn < 3)
				comarg[argn++] = optarg;
			else {
				fprintf(stderr, "Too many arguments.\n");
				giveusage = 1;
			}
			break;
		}
	}

	if (giveusage || what == YOW || argn < 1) {
		fprintf(stderr, "Usage: %s databse [-m r|w|rw] [-crtx] -a|-d|-f|-F|-s [key [content]]\n", argv[0]);
		exit(-1);
	}

	if ((db = dbm_open(comarg[0], flags, 0777)) == NULL) {
		fprintf(stderr, "Error opening database \"%s\"\n", comarg[0]);
		exit(-1);
	}

	if (argn > 1)
		key = read_datum(comarg[1]);
	if (argn > 2)
		content = read_datum(comarg[2]);

	switch (what) {

	case SCAN:
		key = dbm_firstkey(db);
		if (dbm_error(db)) {
			fprintf(stderr, "Error when fetching first key\n");
			goto db_exit;
		}
		while (key.dptr != NULL) {
			content = dbm_fetch(db, key);
			if (dbm_error(db)) {
				fprintf(stderr, "Error when fetching ");
				print_datum(key);
				printf("\n");
				goto db_exit;
			}
			print_datum(key);
			printf(": ");
			print_datum(content);
			printf("\n");
			if (dbm_error(db)) {
				fprintf(stderr, "Error when fetching next key\n");
				goto db_exit;
			}
			key = dbm_nextkey(db);
		}
		break;

	case REGEXP:
		if (argn < 2) {
			fprintf(stderr, "Missing regular expression.\n");
			goto db_exit;
		}
		if (re_comp(comarg[1])) {
			fprintf(stderr, "Invalid regular expression\n");
			goto db_exit;
		}
		key = dbm_firstkey(db);
		if (dbm_error(db)) {
			fprintf(stderr, "Error when fetching first key\n");
			goto db_exit;
		}
		while (key.dptr != NULL) {
			if (re_exec(key2s(key))) {
				content = dbm_fetch(db, key);
				if (dbm_error(db)) {
					fprintf(stderr, "Error when fetching ");
					print_datum(key);
					printf("\n");
					goto db_exit;
				}
				print_datum(key);
				printf(": ");
				print_datum(content);
				printf("\n");
				if (dbm_error(db)) {
					fprintf(stderr, "Error when fetching next key\n");
					goto db_exit;
				}
			}
			key = dbm_nextkey(db);
		}
		break;

	case FETCH:
		if (argn < 2) {
			fprintf(stderr, "Missing fetch key.\n");
			goto db_exit;
		}
		content = dbm_fetch(db, key);
		if (dbm_error(db)) {
			fprintf(stderr, "Error when fetching ");
			print_datum(key);
			printf("\n");
			goto db_exit;
		}
		if (content.dptr == NULL) {
			fprintf(stderr, "Cannot find ");
			print_datum(key);
			printf("\n");
			goto db_exit;
		}
		print_datum(key);
		printf(": ");
		print_datum(content);
		printf("\n");
		break;

	case DELETE:
		if (argn < 2) {
			fprintf(stderr, "Missing delete key.\n");
			goto db_exit;
		}
		if (dbm_delete(db, key) || dbm_error(db)) {
			fprintf(stderr, "Error when deleting ");
			print_datum(key);
			printf("\n");
			goto db_exit;
		}
		if (verbose) {
			print_datum(key);
			printf(": DELETED\n");
		}
		break;

	case STORE:
		if (argn < 3) {
			fprintf(stderr, "Missing key and/or content.\n");
			goto db_exit;
		}
		if (dbm_store(db, key, content, st_flag) || dbm_error(db)) {
			fprintf(stderr, "Error when storing ");
			print_datum(key);
			printf("\n");
			goto db_exit;
		}
		if (verbose) {
			print_datum(key);
			printf(": ");
			print_datum(content);
			printf(" STORED\n");
		}
		break;
	}

db_exit:
	dbm_clearerr(db);
	dbm_close(db);
	if (dbm_error(db)) {
		fprintf(stderr, "Error closing database \"%s\"\n", comarg[0]);
		exit(-1);
	}
}

--- NEW FILE: CHANGES ---
Changes from the earlier BETA releases.

o dbm_prep does everything now, so dbm_open is just a simple
  wrapper that builds the default filenames. dbm_prep no longer
  requires a (DBM *) db parameter: it allocates one itself. It
  returns (DBM *) db or (DBM *) NULL.

o makroom is now reliable. In the common-case optimization of the page
  split, the page into which the incoming key/value pair is to be inserted
  is write-deferred (if the split is successful), thereby saving a cosly
  write.  BUT, if the split does not make enough room (unsuccessful), the
  deferred page is written out, as the failure-window is now dependent on
  the number of split attempts.

o if -DDUFF is defined, hash function will also use the DUFF construct.
  This may look like a micro-performance tweak (maybe it is), but in fact,
  the hash function is the third most-heavily used function, after read
  and write.

--- NEW FILE: sdbm.3 ---
.\" $Id: sdbm.3,v 1.2 2006-12-04 16:59:39 dslinux_cayenne Exp $
.TH SDBM 3 "1 March 1990"
.SH NAME
sdbm, sdbm_open, sdbm_prep, sdbm_close, sdbm_fetch, sdbm_store, sdbm_delete, sdbm_exists, sdbm_firstkey, sdbm_nextkey, sdbm_hash, sdbm_rdonly, sdbm_error, sdbm_clearerr, sdbm_dirfno, sdbm_pagfno \- data base subroutines
.SH SYNOPSIS
.nf
.ft B
#include <sdbm.h>
.sp
typedef struct {
	char *dptr;
	int dsize;
} datum;
.sp
datum nullitem = { NULL, 0 };
.sp
\s-1DBM\s0 *sdbm_open(char *file, int flags, int mode)
.sp
\s-1DBM\s0 *sdbm_prep(char *dirname, char *pagname, int flags, int mode)
.sp
void sdbm_close(\s-1DBM\s0 *db)
.sp
datum sdbm_fetch(\s-1DBM\s0 *db, key)
.sp
int sdbm_store(\s-1DBM\s0 *db, datum key, datum val, int flags)
.sp
int sdbm_delete(\s-1DBM\s0 *db, datum key)
.sp
int sdbm_exists(\s-1DBM\s0 *db, datum key)
.sp
datum sdbm_firstkey(\s-1DBM\s0 *db)
.sp
datum sdbm_nextkey(\s-1DBM\s0 *db)
.sp
long sdbm_hash(char *string, int len)
.sp
int sdbm_rdonly(\s-1DBM\s0 *db)
int sdbm_error(\s-1DBM\s0 *db)
sdbm_clearerr(\s-1DBM\s0 *db)
int sdbm_dirfno(\s-1DBM\s0 *db)
int sdbm_pagfno(\s-1DBM\s0 *db)
.ft R
.fi
.SH DESCRIPTION
.IX "database library" sdbm "" "\fLsdbm\fR"
.IX sdbm_open "" "\fLsdbm_open\fR \(em open \fLsdbm\fR database"
.IX sdbm_prep "" "\fLsdbm_prep\fR \(em prepare \fLsdbm\fR database"
.IX sdbm_close "" "\fLsdbm_close\fR \(em close \fLsdbm\fR routine"
.IX sdbm_fetch "" "\fLsdbm_fetch\fR \(em fetch \fLsdbm\fR database data"
.IX sdbm_store "" "\fLsdbm_store\fR \(em add data to \fLsdbm\fR database"
.IX sdbm_delete "" "\fLsdbm_delete\fR \(em remove data from \fLsdbm\fR database"
.IX sdbm_exists "" "\fLsdbm_exists\fR \(em test \fLsdbm\fR key existence"
.IX sdbm_firstkey "" "\fLsdbm_firstkey\fR \(em access \fLsdbm\fR database"
.IX sdbm_nextkey "" "\fLsdbm_nextkey\fR \(em access \fLsdbm\fR database"
.IX sdbm_hash "" "\fLsdbm_hash\fR \(em string hash for \fLsdbm\fR database"
.IX sdbm_rdonly "" "\fLsdbm_rdonly\fR \(em return \fLsdbm\fR database read-only mode"
.IX sdbm_error "" "\fLsdbm_error\fR \(em return \fLsdbm\fR database error condition"
.IX sdbm_clearerr "" "\fLsdbm_clearerr\fR \(em clear \fLsdbm\fR database error condition"
.IX sdbm_dirfno "" "\fLsdbm_dirfno\fR \(em return \fLsdbm\fR database bitmap file descriptor"
.IX sdbm_pagfno "" "\fLsdbm_pagfno\fR \(em return \fLsdbm\fR database data file descriptor"
.IX "database functions \(em \fLsdbm\fR"  sdbm_open  ""  \fLsdbm_open\fP
.IX "database functions \(em \fLsdbm\fR"  sdbm_prep  ""  \fLsdbm_prep\fP
.IX "database functions \(em \fLsdbm\fR"  sdbm_close  ""  \fLsdbm_close\fP
.IX "database functions \(em \fLsdbm\fR"  sdbm_fetch  ""  \fLsdbm_fetch\fP
.IX "database functions \(em \fLsdbm\fR"  sdbm_store  ""  \fLsdbm_store\fP
.IX "database functions \(em \fLsdbm\fR"  sdbm_delete  ""  \fLsdbm_delete\fP
.IX "database functions \(em \fLsdbm\fR"  sdbm_firstkey  ""  \fLsdbm_firstkey\fP
.IX "database functions \(em \fLsdbm\fR"  sdbm_nextkey  ""  \fLsdbm_nextkey\fP
.IX "database functions \(em \fLsdbm\fR"  sdbm_rdonly  ""  \fLsdbm_rdonly\fP
.IX "database functions \(em \fLsdbm\fR"  sdbm_error  ""  \fLsdbm_error\fP
.IX "database functions \(em \fLsdbm\fR"  sdbm_clearerr  ""  \fLsdbm_clearerr\fP
.IX "database functions \(em \fLsdbm\fR"  sdbm_dirfno  ""  \fLsdbm_dirfno\fP
.IX "database functions \(em \fLsdbm\fR"  sdbm_pagfno  ""  \fLsdbm_pagfno\fP
.LP
This package allows an application to maintain a mapping of <key,value> pairs
in disk files.  This is not to be considered a real database system, but is
still useful in many simple applications built around fast retrieval of a data
value from a key.  This implementation uses an external hashing scheme,
called Dynamic Hashing, as described by Per-Aake Larson in BIT 18 (1978) pp.
184-201.  Retrieval of any item usually requires a single disk access.
The application interface is compatible with the
.IR ndbm (3)
library.
.LP
An
.B sdbm
database is kept in two files usually given the extensions
.B \.dir
and
.BR \.pag .
The
.B \.dir
file contains a bitmap representing a forest of binary hash trees, the leaves
of which indicate data pages in the
.B \.pag
file.
.LP
The application interface uses the
.B datum
structure to describe both
.I keys
and
.IR value s.
A
.B datum
specifies a byte sequence of
.I dsize
size pointed to by
.IR dptr .
If you use
.SM ASCII
strings as
.IR key s
or
.IR value s,
then you must decide whether or not to include the terminating
.SM NUL
byte which sometimes defines strings.  Including it will require larger
database files, but it will be possible to get sensible output from a
.IR strings (1)
command applied to the data file.
.LP
In order to allow a process using this package to manipulate multiple
databases, the applications interface always requires a
.IR handle ,
a
.BR "DBM *" ,
to identify the database to be manipulated.  Such a handle can be obtained
from the only routines that do not require it, namely
.BR sdbm_open (\|)
or
.BR sdbm_prep (\|).
Either of these will open or create the two necessary files.  The
difference is that the latter allows explicitly naming the bitmap and data
files whereas
.BR sdbm_open (\|)
will take a base file name and call
.BR sdbm_prep (\|)
with the default extensions.
The
.I flags
and
.I mode
parameters are the same as for
.BR open (2).
.LP
To free the resources occupied while a database handle is active, call
.BR sdbm_close (\|).
.LP
Given a handle, one can retrieve data associated with a key by using the
.BR sdbm_fetch (\|)
routine, and associate data with a key by using the
.BR sdbm_store (\|)
routine.
.BR sdbm_exists (\|)
will say whether a given key exists in the database.
.LP
The values of the
.I flags
parameter for
.BR sdbm_store (\|)
can be either
.BR \s-1DBM_INSERT\s0 ,
which will not change an existing entry with the same key, or
.BR \s-1DBM_REPLACE\s0 ,
which will replace an existing entry with the same key.
Keys are unique within the database.
.LP
To delete a key and its associated value use the
.BR sdbm_delete (\|)
routine.
.LP
To retrieve every key in the database, use a loop like:
.sp
.nf
.ft B
for (key = sdbm_firstkey(db); key.dptr != NULL; key = sdbm_nextkey(db))
        ;
.ft R
.fi
.LP
The order of retrieval is unspecified.
.LP
If you determine that the performance of the database is inadequate or
you notice clustering or other effects that may be due to the hashing
algorithm used by this package, you can override it by supplying your
own
.BR sdbm_hash (\|)
routine.  Doing so will make the database unintelligable to any other
applications that do not use your specialized hash function.
.sp
.LP
The following macros are defined in the header file:
.IP
.BR sdbm_rdonly (\|)
returns true if the database has been opened read\-only.
.IP
.BR sdbm_error (\|)
returns true if an I/O error has occurred.
.IP
.BR sdbm_clearerr (\|)
allows you to clear the error flag if you think you know what the error
was and insist on ignoring it.
.IP
.BR sdbm_dirfno (\|)
returns the file descriptor associated with the bitmap file.
.IP
.BR sdbm_pagfno (\|)
returns the file descriptor associated with the data file.
.SH SEE ALSO
.IR open (2).
.SH DIAGNOSTICS
Functions that return a
.B "DBM *"
handle will use
.SM NULL
to indicate an error.
Functions that return an
.B int
will use \-1 to indicate an error.  The normal return value in that case is 0.
Functions that return a
.B datum
will return
.B nullitem
to indicate an error.
.LP
As a special case of
.BR sdbm_store (\|),
if it is called with the
.B \s-1DBM_INSERT\s0
flag and the key already exists in the database, the return value will be 1.
.LP
In general, if a function parameter is invalid,
.B errno
will be set to
.BR \s-1EINVAL\s0 .
If a write operation is requested on a read-only database,
.B errno
will be set to
.BR \s-1ENOPERM\s0 .
If a memory allocation (using
.IR malloc (3))
failed,
.B errno
will be set to
.BR \s-1ENOMEM\s0 .
For I/O operation failures
.B errno
will contain the value set by the relevant failed system call, either
.IR read (2),
.IR write (2),
or
.IR lseek (2).
.SH AUTHOR
.IP "Ozan S. Yigit" (oz at nexus.yorku.ca)
.SH BUGS
The sum of key and value data sizes must not exceed
.B \s-1PAIRMAX\s0
(1008 bytes).
.LP
The sum of the key and value data sizes where several keys hash to the
same value must fit within one bitmap page.
.LP
The
.B \.pag
file will contain holes, so its apparent size is larger than its contents.
When copied through the filesystem the holes will be filled.
.LP
The contents of
.B datum
values returned are in volatile storage.  If you want to retain the values
pointed to, you must copy them immediately before another call to this package.
.LP
The only safe way for multiple processes to (read and) update a database at
the same time, is to implement a private locking scheme outside this package
and open and close the database between lock acquisitions.  It is safe for
multiple processes to concurrently access a database read-only.
.SH APPLICATIONS PORTABILITY
For complete source code compatibility with the Berkeley Unix
.IR ndbm (3)
library, the 
.B sdbm.h
header file should be installed in
.BR /usr/include/ndbm.h .
.LP
The
.B nullitem
data item, and the
.BR sdbm_prep (\|),
.BR sdbm_hash (\|),
.BR sdbm_rdonly (\|),
.BR sdbm_dirfno (\|),
and
.BR sdbm_pagfno (\|)
functions are unique to this package.

--- NEW FILE: hash.c ---
/*
 * sdbm - ndbm work-alike hashed database library
 * based on Per-Aake Larson's Dynamic Hashing algorithms. BIT 18 (1978).
 * author: oz at nexus.yorku.ca
 * status: public domain. keep it that way.
 *
 * hashing routine
 */

#include "config.h"
#include "EXTERN.h"
#include "sdbm.h"
/*
 * polynomial conversion ignoring overflows
 * [this seems to work remarkably well, in fact better
 * then the ndbm hash function. Replace at your own risk]
 * use: 65599	nice.
 *      65587   even better. 
 */
long
sdbm_hash(register char *str, register int len)
{
	register unsigned long n = 0;

#ifdef DUFF

#define HASHC	n = *str++ + 65599 * n

	if (len > 0) {
		register int loop = (len + 8 - 1) >> 3;

		switch(len & (8 - 1)) {
		case 0:	do {
			HASHC;	case 7:	HASHC;
		case 6:	HASHC;	case 5:	HASHC;
		case 4:	HASHC;	case 3:	HASHC;
		case 2:	HASHC;	case 1:	HASHC;
			} while (--loop);
		}

	}
#else
	while (len--)
		n = *str++ + 65599 * n;
#endif
	return n;
}

--- NEW FILE: linux.patches ---
*** sdbm.dist/./dbu.c	Mon Feb 17 21:18:52 1992
--- sdbm/./dbu.c	Mon Feb 17 21:11:20 1992
***************
*** 12,18 ****
  #endif

  extern int	getopt();
! extern char	*strchr();
  extern void	oops();

  char *progname;
--- 12,18 ----
  #endif

  extern int	getopt();
! /* extern char	*strchr(); */
  extern void	oops();

  char *progname;
*** sdbm.dist/./makefile	Mon Feb 17 21:18:56 1992
--- sdbm/./makefile	Mon Feb 17 21:10:46 1992
***************
*** 2,8 ****
  # makefile for public domain ndbm-clone: sdbm
  # DUFF: use duff's device (loop unroll) in parts of the code
  #
! CFLAGS = -O -DSDBM -DDUFF -DBSD42
  #LDFLAGS = -p

  OBJS = sdbm.o pair.o hash.o
--- 2,8 ----
  # makefile for public domain ndbm-clone: sdbm
  # DUFF: use duff's device (loop unroll) in parts of the code
  #
! CFLAGS = -O -DSDBM -DDUFF
  #LDFLAGS = -p

  OBJS = sdbm.o pair.o hash.o
*** sdbm.dist/./sdbm.c	Mon Feb 17 21:19:17 1992
--- sdbm/./sdbm.c	Mon Feb 17 21:12:59 1992
***************
*** 25,30 ****
--- 25,31 ----
  #endif
  #include <errno.h>
  #include <string.h>
+ #include <unistd.h>

  #ifdef __STDC__
  #include <stddef.h>
***************
*** 43,49 ****

  extern char *malloc proto((unsigned int));
  extern void free proto((void *));
! extern long lseek();

  /*
   * forward
--- 44,50 ----

  extern char *malloc proto((unsigned int));
  extern void free proto((void *));
! /* extern long lseek(); */

  /*
   * forward

--- NEW FILE: README ---

                   sdbm - Substitute DBM
                             or
        Berkeley ndbm for Every UN*X[1] Made Simple

                      Ozan (oz) Yigit

            The Guild of PD Software Toolmakers
                      Toronto - Canada

                     oz at nexus.yorku.ca

Implementation is the sincerest form of flattery. - L. Peter
Deutsch

A The Clone of the ndbm library

     The sources accompanying this notice - sdbm  -  consti-
tute  the  first  public  release  (Dec. 1990) of a complete
clone of the Berkeley UN*X ndbm library. The sdbm library is
meant  to  clone the proven functionality of ndbm as closely
as possible, including a few improvements. It is  practical,
easy to understand, and compatible.  The sdbm library is not
derived  from  any  licensed,  proprietary  or   copyrighted
software.

     The sdbm implementation is based on  a  1978  algorithm
[Lar78] by P.-A. (Paul) Larson known as ``Dynamic Hashing''.
In the course of searching for a substitute for ndbm, I pro-
totyped  three different external-hashing algorithms [Lar78,
Fag79, Lit80] and ultimately chose Larson's algorithm  as  a
basis  of  the  sdbm  implementation. The Bell Labs dbm (and
therefore ndbm) is based on an  algorithm  invented  by  Ken
Thompson, [Tho90, Tor87] and predates Larson's work.

     The sdbm programming interface  is  totally  compatible
with ndbm and includes a slight improvement in database ini-
tialization.  It is also expected  to  be  binary-compatible
under most UN*X versions that support the ndbm library.

     The sdbm implementation shares the shortcomings of  the
ndbm library, as a side effect of various simplifications to
the original Larson algorithm. It does produce holes in  the
page file as it writes pages past the end of file. (Larson's
paper include a clever solution to this problem  that  is  a
result of using the hash value directly as a block address.)
On the other hand, extensive tests  seem  to  indicate  that
sdbm creates fewer holes in general, and the resulting page-
files are smaller. The sdbm implementation  is  also  faster
than  ndbm  in database creation.  Unlike the ndbm, the sdbm
_________________________

  [1] UN*X is not a trademark of any (dis)organization.

                           - 2 -

store operation will not ``wander away'' trying to split its
data  pages  to insert a datum that cannot (due to elaborate
worst-case situations) be inserted. (It will  fail  after  a
pre-defined number of attempts.)

Important Compatibility Warning

     The sdbm and ndbm libraries cannot share databases: one
cannot  read  the  (dir/pag)  database created by the other.
This is due to the differences between  the  ndbm  and  sdbm
algorithms[2], and the hash functions used.  It is  easy  to
convert  between the dbm/ndbm databases and sdbm by ignoring
the index completely: see dbd, dbu etc.

Notice of Intellectual Property

The entire sdbm  library package, as authored by me, Ozan S.
Yigit,  is  hereby placed in the public domain. As such, the
author is not responsible for the  consequences  of  use  of
this  software, no matter how awful, even if they arise from
defects in it. There is no expressed or implied warranty for
the sdbm library.

     Since the sdbm library package is in the public domain,
this   original  release  or  any  additional  public-domain
releases of the modified original cannot possibly (by defin-
ition) be withheld from you. Also by definition, You (singu-
lar) have all the rights to this code (including  the  right
to sell without permission, the right to  hoard[3]  and  the
right  to  do  other  icky  things as you see fit) but those
rights are also granted to everyone else.

     Please note that all  previous  distributions  of  this
software  contained  a  copyright  (which is now dropped) to
protect its origins and its  current  public  domain  status
against any possible claims and/or challenges.

Acknowledgments

     Many people have been very helpful and  supportive.   A
partial  list  would  necessarily  include Rayan Zacherissen
(who contributed the  man  page,  and  also  hacked  a  MMAP
_________________________

  [2] Torek's   discussion   [Tor87]   indicates   that
dbm/ndbm implementations use the hash value to traverse
the radix trie differently than sdbm and as  a  result,
the page indexes are generated in different order.  For
more information, send e-mail to the author.
  [3] You  cannot really hoard something that is avail-
able to the public at large, but try if  it  makes  you
feel any better.

                           - 3 -

version of sdbm), Arnold Robbins, Chris Lewis,  Bill  David-
sen,  Henry  Spencer,  Geoff  Collyer, Rich Salz (who got me
started in the first place), Johannes Ruschein (who did  the
minix port) and David Tilbrook. I thank you all.

Distribution Manifest and Notes

This distribution of sdbm includes (at least) the following:

    CHANGES     change log
    README      this file.
    biblio      a small bibliography on external hashing
    dba.c       a crude (n/s)dbm page file analyzer
    dbd.c       a crude (n/s)dbm page file dumper (for conversion)
    dbe.1       man page for dbe.c
    dbe.c       Janick's database editor
    dbm.c       a dbm library emulation wrapper for ndbm/sdbm
    dbm.h       header file for the above
    dbu.c       a crude db management utility
    hash.c      hashing function
    makefile    guess.
    pair.c      page-level routines (posted earlier)
    pair.h      header file for the above
    readme.ms   troff source for the README file
    sdbm.3      man page
    sdbm.c      the real thing
    sdbm.h      header file for the above
    tune.h      place for tuning & portability thingies
    util.c      miscellaneous

     dbu is a simple database manipulation  program[4]  that
tries to look like Bell Labs' cbt utility. It  is  currently
incomplete in functionality.  I use dbu to test out the rou-
tines: it takes (from stdin) tab separated  key/value  pairs
for commands like build or insert or takes keys for commands
like delete or look.

    dbu <build|creat|look|insert|cat|delete> dbmfile

     dba is a crude analyzer of dbm/sdbm/ndbm page files. It
scans the entire page file, reporting page level statistics,
and totals at the end.

     dbd is a crude dump  program  for  dbm/ndbm/sdbm  data-
bases.  It  ignores  the bitmap, and dumps the data pages in
sequence. It can be used to create input for the  dbu  util-
ity.   Note that dbd will skip any NULLs in the key and data
fields,  thus  is  unsuitable  to  convert   some   peculiar
_________________________

  [4] The dbd, dba, dbu utilities are quick  hacks  and
are  not  fit  for  production use. They were developed
late one night, just to test out sdbm, and convert some
databases.

                           - 4 -

databases that insist in including the terminating null.

     I have also included a copy of the dbe  (ndbm  DataBase
Editor)  by  Janick Bergeron [janick at bnr.ca] for your pleas-
ure. You may find it more useful than the little  dbu  util-
ity.

     dbm.[ch] is a dbm library emulation on top of ndbm (and
hence suitable for sdbm). Written by Robert Elz.

     The sdbm library has been around in beta test for quite
a  long  time,  and from whatever little feedback I received
(maybe no news is good news), I believe it  has  been  func-
tioning  without  any  significant  problems.  I  would,  of
course, appreciate all fixes and/or improvements.  Portabil-
ity enhancements would especially be useful.

Implementation Issues

     Hash functions: The algorithm behind  sdbm  implementa-
tion  needs a good bit-scrambling hash function to be effec-
tive. I ran into a set of constants for a simple hash  func-
tion  that  seem  to  help sdbm perform better than ndbm for
various inputs:

    /*
     * polynomial conversion ignoring overflows
     * 65599 nice. 65587 even better.
     */
    long
    dbm_hash(char *str, int len) {
        register unsigned long n = 0;

        while (len--)
            n = n * 65599 + *str++;
        return n;
    }

     There may be better hash functions for the purposes  of
dynamic hashing.  Try your favorite, and check the pagefile.
If it contains too many pages with too many holes, (in rela-
tion  to this one for example) or if sdbm simply stops work-
ing (fails after SPLTMAX attempts to split)  when  you  feed
your  NEWS  history  file  to it, you probably do not have a
good hashing function.  If  you  do  better  (for  different
types of input), I would like to know about the function you
use.

     Block sizes: It seems (from  various  tests  on  a  few
machines)  that a page file block size PBLKSIZ of 1024 is by
far the best for performance, but this also happens to limit
the  size  of a key/value pair. Depending on your needs, you
may wish to increase the page size, and also adjust  PAIRMAX
(the maximum size of a key/value pair allowed: should always

                           - 5 -

be at least three words smaller than PBLKSIZ.)  accordingly.
The  system-wide  version  of the library should probably be
configured with 1024 (distribution default), as this appears
to be sufficient for most common uses of sdbm.

Portability

     This package has been tested in many  different  UN*Xes
even including minix, and appears to be reasonably portable.
This does not mean it will port easily to non-UN*X systems.

Notes and Miscellaneous

     The sdbm is not a very complicated  package,  at  least
not  after  you  familiarize yourself with the literature on
external hashing. There are other interesting algorithms  in
existence  that ensure (approximately) single-read access to
a data value associated with any key. These  are  directory-
less schemes such as linear hashing [Lit80] (+ Larson varia-
tions), spiral storage [Mar79] or directory schemes such  as
extensible  hashing  [Fag79] by Fagin et al. I do hope these
sources provide a reasonable playground for  experimentation
with  other algorithms.  See the June 1988 issue of ACM Com-
puting Surveys [Enb88] for  an  excellent  overview  of  the
field.

References

[Lar78]
    P.-A. Larson, ``Dynamic Hashing'', BIT, vol.   18,   pp.
    184-201, 1978.

[Tho90]
    Ken Thompson, private communication, Nov. 1990

[Lit80]
    W. Litwin, `` Linear Hashing: A new tool  for  file  and
    table addressing'', Proceedings of the 6th Conference on
    Very Large  Dabatases  (Montreal), pp.   212-223,   Very
    Large Database Foundation, Saratoga, Calif., 1980.

[Fag79]
    R. Fagin, J.  Nievergelt,  N.  Pippinger,  and   H.   R.
    Strong,  ``Extendible Hashing - A Fast Access Method for
    Dynamic Files'', ACM  Trans.  Database  Syst.,  vol.  4,
    no.3, pp. 315-344, Sept. 1979.

[Wal84]
    Rich Wales, ``Discussion of "dbm"  data  base  system'',
    USENET newsgroup unix.wizards, Jan. 1984.

[Tor87]
    Chris Torek,  ``Re:   dbm.a   and   ndbm.a   archives'',

                           - 6 -

    USENET newsgroup comp.unix, 1987.

[Mar79]
    G. N. Martin, ``Spiral Storage: Incrementally   Augment-
    able   Hash  Addressed  Storage'', Technical Report #27,
    University of Varwick, Coventry, U.K., 1979.

[Enb88]
    R.  J.  Enbody  and  H.   C.   Du,   ``Dynamic   Hashing
    Schemes'',ACM  Computing  Surveys,  vol.  20, no. 2, pp.
    85-113, June 1988.

--- NEW FILE: dbe.1 ---
.TH dbe 1 "ndbm(3) EDITOR"
.SH NAME
dbe \- Edit a ndbm(3) database
.SH USAGE
dbe <database> [-m r|w|rw] [-crtvx] -a|-d|-f|-F|-s [<key> [<content>]]
.SH DESCRIPTION
\fIdbme\fP operates on ndbm(3) databases.
It can be used to create them, look at them or change them.
When specifying the value of a key or the content of its associated entry,
\\nnn, \\0, \\n, \\t, \\f and \\r are interpreted as usual.
When displaying key/content pairs, non-printable characters are displayed
using the \\nnn notation.
.SH OPTIONS
.IP -a
List all entries in the database.
.IP -c
Create the database if it does not exist.
.IP -d
Delete the entry associated with the specified key.
.IP -f
Fetch and display the entry associated with the specified key.
.IP -F
Fetch and display all the entries whose key match the specified
regular-expression
.IP "-m r|w|rw"
Open the database in read-only, write-only or read-write mode
.IP -r
Replace the entry associated with the specified key if it already exists.
See option -s.
.IP -s
Store an entry under a specific key.
An error occurs if the key already exists and the option -r was not specified.
.IP -t
Re-initialize the database before executing the command.
.IP -v
Verbose mode.
Confirm stores and deletions.
.IP -x
If option -x is used with option -c, then if the database already exists,
an error occurs.
This can be used to implement a simple exclusive access locking mechanism.
.SH SEE ALSO
ndbm(3)
.SH AUTHOR
janick at bnr.ca

--- NEW FILE: dbu.c ---
#include <stdio.h>
#include <sys/file.h>
#ifdef SDBM
#include "EXTERN.h"
#include "sdbm.h"
#else
#include <ndbm.h>
#endif
#include <string.h>

#ifdef BSD42
#define strchr	index
#endif

extern int	getopt();
extern char	*strchr();
extern void	oops();

char *progname;

static int rflag;
static char *usage = "%s [-R] cat | look |... dbmname";

#define DERROR	0
#define DLOOK	1
#define DINSERT	2
#define DDELETE 3
#define	DCAT	4
#define DBUILD	5
#define DPRESS	6
#define DCREAT	7

#define LINEMAX	8192

typedef struct {
	char *sname;
	int scode;
	int flags;
} cmd;

static cmd cmds[] = {

	"fetch", DLOOK, 	O_RDONLY,
	"get", DLOOK,		O_RDONLY,
	"look", DLOOK,		O_RDONLY,
	"add", DINSERT,		O_RDWR,
	"insert", DINSERT,	O_RDWR,
	"store", DINSERT,	O_RDWR,
	"delete", DDELETE,	O_RDWR,
	"remove", DDELETE,	O_RDWR,
	"dump", DCAT,		O_RDONLY,
	"list", DCAT, 		O_RDONLY,
	"cat", DCAT,		O_RDONLY,
	"creat", DCREAT,	O_RDWR | O_CREAT | O_TRUNC,
	"new", DCREAT,		O_RDWR | O_CREAT | O_TRUNC,
	"build", DBUILD,	O_RDWR | O_CREAT,
	"squash", DPRESS,	O_RDWR,
	"compact", DPRESS,	O_RDWR,
	"compress", DPRESS,	O_RDWR
};

#define CTABSIZ (sizeof (cmds)/sizeof (cmd))

static cmd *parse();
static void badk(), doit(), prdatum();

int
main(int argc, char **argv)
{
	int c;
	register cmd *act;
	extern int optind;
	extern char *optarg;

	progname = argv[0];

	while ((c = getopt(argc, argv, "R")) != EOF)
		switch (c) {
		case 'R':	       /* raw processing  */
			rflag++;
			break;

		default:
			oops("usage: %s", usage);
			break;
		}

	if ((argc -= optind) < 2)
		oops("usage: %s", usage);

	if ((act = parse(argv[optind])) == NULL)
		badk(argv[optind]);
	optind++;
	doit(act, argv[optind]);
	return 0;
}

static void
doit(register cmd *act, char *file)
{
	datum key;
	datum val;
	register DBM *db;
	register char *op;
	register int n;
	char *line;
#ifdef TIME
	long start;
	extern long time();
#endif

	if ((db = dbm_open(file, act->flags, 0644)) == NULL)
		oops("cannot open: %s", file);

	if ((line = (char *) malloc(LINEMAX)) == NULL)
		oops("%s: cannot get memory", "line alloc");

	switch (act->scode) {

	case DLOOK:
		while (fgets(line, LINEMAX, stdin) != NULL) {
			n = strlen(line) - 1;
			line[n] = 0;
			key.dptr = line;
			key.dsize = n;
			val = dbm_fetch(db, key);
			if (val.dptr != NULL) {
				prdatum(stdout, val);
				putchar('\n');
				continue;
			}
			prdatum(stderr, key);
			fprintf(stderr, ": not found.\n");
		}
		break;
	case DINSERT:
		break;
	case DDELETE:
		while (fgets(line, LINEMAX, stdin) != NULL) {
			n = strlen(line) - 1;
			line[n] = 0;
			key.dptr = line;
			key.dsize = n;
			if (dbm_delete(db, key) == -1) {
				prdatum(stderr, key);
				fprintf(stderr, ": not found.\n");
			}
		}
		break;
	case DCAT:
		for (key = dbm_firstkey(db); key.dptr != 0; 
		     key = dbm_nextkey(db)) {
			prdatum(stdout, key);
			putchar('\t');
			prdatum(stdout, dbm_fetch(db, key));
			putchar('\n');
		}
		break;
	case DBUILD:
#ifdef TIME
		start = time(0);
#endif
		while (fgets(line, LINEMAX, stdin) != NULL) {
			n = strlen(line) - 1;
			line[n] = 0;
			key.dptr = line;
			if ((op = strchr(line, '\t')) != 0) {
				key.dsize = op - line;
				*op++ = 0;
				val.dptr = op;
				val.dsize = line + n - op;
			}
			else
				oops("bad input; %s", line);

			if (dbm_store(db, key, val, DBM_REPLACE) < 0) {
				prdatum(stderr, key);
				fprintf(stderr, ": ");
				oops("store: %s", "failed");
			}
		}
#ifdef TIME
		printf("done: %d seconds.\n", time(0) - start);
#endif
		break;
	case DPRESS:
		break;
	case DCREAT:
		break;
	}

	dbm_close(db);
}

static void
badk(char *word)
{
	register int i;

	if (progname)
		fprintf(stderr, "%s: ", progname);
	fprintf(stderr, "bad keywd %s. use one of\n", word);
	for (i = 0; i < (int)CTABSIZ; i++)
		fprintf(stderr, "%-8s%c", cmds[i].sname,
			((i + 1) % 6 == 0) ? '\n' : ' ');
	fprintf(stderr, "\n");
	exit(1);
	/*NOTREACHED*/
}

static cmd *
parse(register char *str)
{
	register int i = CTABSIZ;
	register cmd *p;

	for (p = cmds; i--; p++)
		if (strcmp(p->sname, str) == 0)
			return p;
	return NULL;
}

static void
prdatum(FILE *stream, datum d)
{
	register int c;
	register char *p = d.dptr;
	register int n = d.dsize;

	while (n--) {
		c = *p++ & 0377;
		if (c & 0200) {
			fprintf(stream, "M-");
			c &= 0177;
		}
		if (c == 0177 || c < ' ') 
			fprintf(stream, "^%c", (c == 0177) ? '?' : c + '@');
		else
			putc(c, stream);
	}
}

--- NEW FILE: pair.h ---
/* Mini EMBED (pair.c) */
#define chkpage sdbm__chkpage
#define delpair sdbm__delpair
#define duppair sdbm__duppair
#define exipair sdbm__exipair
#define fitpair sdbm__fitpair
#define getnkey sdbm__getnkey
#define getpair sdbm__getpair
#define putpair sdbm__putpair
#define splpage sdbm__splpage

extern int fitpair proto((char *, int));
extern void  putpair proto((char *, datum, datum));
extern datum	getpair proto((char *, datum));
extern int  exipair proto((char *, datum));
extern int  delpair proto((char *, datum));
extern int  chkpage proto((char *));
extern datum getnkey proto((char *, int));
extern void splpage proto((char *, char *, long));
#ifdef SEEDUPS
extern int duppair proto((char *, datum));
#endif

--- NEW FILE: pair.c ---
/*
 * sdbm - ndbm work-alike hashed database library
 * based on Per-Aake Larson's Dynamic Hashing algorithms. BIT 18 (1978).
 * author: oz at nexus.yorku.ca
 * status: public domain.
 *
 * page-level routines
 */

#include "config.h"
#ifdef __CYGWIN__
# define EXTCONST extern const
#else
# include "EXTERN.h"
#endif
#include "sdbm.h"
#include "tune.h"
#include "pair.h"

#define exhash(item)	sdbm_hash((item).dptr, (item).dsize)

/* 
 * forward 
 */
static int seepair proto((char *, int, char *, int));

/*
 * page format:
 *	+------------------------------+
 * ino	| n | keyoff | datoff | keyoff |
 * 	+------------+--------+--------+
 *	| datoff | - - - ---->	       |
 *	+--------+---------------------+
 *	|	 F R E E A R E A       |
 *	+--------------+---------------+
 *	|  <---- - - - | data          |
 *	+--------+-----+----+----------+
 *	|  key   | data     | key      |
 *	+--------+----------+----------+
 *
 * calculating the offsets for free area:  if the number
 * of entries (ino[0]) is zero, the offset to the END of
 * the free area is the block size. Otherwise, it is the
 * nth (ino[ino[0]]) entry's offset.
 */

int
fitpair(char *pag, int need)
{
	register int n;
	register int off;
	register int free;
	register short *ino = (short *) pag;

	off = ((n = ino[0]) > 0) ? ino[n] : PBLKSIZ;
	free = off - (n + 1) * sizeof(short);
	need += 2 * sizeof(short);

	debug(("free %d need %d\n", free, need));

	return need <= free;
}

void
putpair(char *pag, datum key, datum val)
{
	register int n;
	register int off;
	register short *ino = (short *) pag;

	off = ((n = ino[0]) > 0) ? ino[n] : PBLKSIZ;
/*
 * enter the key first
 */
	off -= key.dsize;
	(void) memcpy(pag + off, key.dptr, key.dsize);
	ino[n + 1] = off;
/*
 * now the data
 */
	off -= val.dsize;
	(void) memcpy(pag + off, val.dptr, val.dsize);
	ino[n + 2] = off;
/*
 * adjust item count
 */
	ino[0] += 2;
}

datum
getpair(char *pag, datum key)
{
	register int i;
	register int n;
	datum val;
	register short *ino = (short *) pag;

	if ((n = ino[0]) == 0)
		return nullitem;

	if ((i = seepair(pag, n, key.dptr, key.dsize)) == 0)
		return nullitem;

	val.dptr = pag + ino[i + 1];
	val.dsize = ino[i] - ino[i + 1];
	return val;
}

int
exipair(char *pag, datum key)
{
	register short *ino = (short *) pag;

	if (ino[0] == 0)
		return 0;

	return (seepair(pag, ino[0], key.dptr, key.dsize) != 0);
}

#ifdef SEEDUPS
int
duppair(char *pag, datum key)
{
	register short *ino = (short *) pag;
	return ino[0] > 0 && seepair(pag, ino[0], key.dptr, key.dsize) > 0;
}
#endif

datum
getnkey(char *pag, int num)
{
	datum key;
	register int off;
	register short *ino = (short *) pag;

	num = num * 2 - 1;
	if (ino[0] == 0 || num > ino[0])
		return nullitem;

	off = (num > 1) ? ino[num - 1] : PBLKSIZ;

	key.dptr = pag + ino[num];
	key.dsize = off - ino[num];

	return key;
}

int
delpair(char *pag, datum key)
{
	register int n;
	register int i;
	register short *ino = (short *) pag;

	if ((n = ino[0]) == 0)
		return 0;

	if ((i = seepair(pag, n, key.dptr, key.dsize)) == 0)
		return 0;
/*
 * found the key. if it is the last entry
 * [i.e. i == n - 1] we just adjust the entry count.
 * hard case: move all data down onto the deleted pair,
 * shift offsets onto deleted offsets, and adjust them.
 * [note: 0 < i < n]
 */
	if (i < n - 1) {
		register int m;
		register char *dst = pag + (i == 1 ? PBLKSIZ : ino[i - 1]);
		register char *src = pag + ino[i + 1];
		register int   zoo = dst - src;

		debug(("free-up %d ", zoo));
/*
 * shift data/keys down
 */
		m = ino[i + 1] - ino[n];
#ifdef DUFF
#define MOVB 	*--dst = *--src

		if (m > 0) {
			register int loop = (m + 8 - 1) >> 3;

			switch (m & (8 - 1)) {
			case 0:	do {
				MOVB;	case 7:	MOVB;
			case 6:	MOVB;	case 5:	MOVB;
			case 4:	MOVB;	case 3:	MOVB;
			case 2:	MOVB;	case 1:	MOVB;
				} while (--loop);
			}
		}
#else
#ifdef HAS_MEMMOVE
		dst -= m;
		src -= m;
		memmove(dst, src, m);
#else
		while (m--)
			*--dst = *--src;
#endif
#endif
/*
 * adjust offset index up
 */
		while (i < n - 1) {
			ino[i] = ino[i + 2] + zoo;
			i++;
		}
	}
	ino[0] -= 2;
	return 1;
}

/*
 * search for the key in the page.
 * return offset index in the range 0 < i < n.
 * return 0 if not found.
 */
static int
seepair(char *pag, register int n, register char *key, register int siz)
{
	register int i;
	register int off = PBLKSIZ;
	register short *ino = (short *) pag;

	for (i = 1; i < n; i += 2) {
		if (siz == off - ino[i] &&
		    memEQ(key, pag + ino[i], siz))
			return i;
		off = ino[i + 1];
	}
	return 0;
}

void
splpage(char *pag, char *New, long int sbit)
{
	datum key;
	datum val;

	register int n;
	register int off = PBLKSIZ;
	char cur[PBLKSIZ];
	register short *ino = (short *) cur;

	(void) memcpy(cur, pag, PBLKSIZ);
	(void) memset(pag, 0, PBLKSIZ);
	(void) memset(New, 0, PBLKSIZ);

	n = ino[0];
	for (ino++; n > 0; ino += 2) {
		key.dptr = cur + ino[0]; 
		key.dsize = off - ino[0];
		val.dptr = cur + ino[1];
		val.dsize = ino[0] - ino[1];
/*
 * select the page pointer (by looking at sbit) and insert
 */
		(void) putpair((exhash(key) & sbit) ? New : pag, key, val);

		off = ino[1];
		n -= 2;
	}

	debug(("%d split %d/%d\n", ((short *) cur)[0] / 2, 
	       ((short *) New)[0] / 2,
	       ((short *) pag)[0] / 2));
}

/*
 * check page sanity: 
 * number of entries should be something
 * reasonable, and all offsets in the index should be in order.
 * this could be made more rigorous.
 */
int
chkpage(char *pag)
{
	register int n;
	register int off;
	register short *ino = (short *) pag;

	if ((n = ino[0]) < 0 || n > PBLKSIZ / sizeof(short))
		return 0;

	if (n > 0) {
		off = PBLKSIZ;
		for (ino++; n > 0; ino += 2) {
			if (ino[0] > off || ino[1] > off ||
			    ino[1] > ino[0])
				return 0;
			off = ino[1];
			n -= 2;
		}
	}
	return 1;
}

--- NEW FILE: readme.ms ---
.\" tbl | readme.ms | [tn]roff -ms | ...
.\" note the "C" (courier) and "CB" fonts: you will probably have to
.\" change these.
.\" $Id: readme.ms,v 1.2 2006-12-04 16:59:39 dslinux_cayenne Exp $

.de P1
.br
.nr dT 4
.nf
.ft C
.sp .5
.nr t \\n(dT*\\w'x'u
.ta 1u*\\ntu 2u*\\ntu 3u*\\ntu 4u*\\ntu 5u*\\ntu 6u*\\ntu 7u*\\ntu 8u*\\ntu 9u*\\ntu 10u*\\ntu 11u*\\ntu 12u*\\ntu 13u*\\ntu 14u*\\ntu
..
.de P2
.br
.ft 1
.br
.sp .5
.br
.fi
..
.\" CW uses the typewriter/courier font.
.de CW
\fC\\$1\\fP\\$2
..

.\" Footnote numbering [by Henry Spencer]
.\" <text>\*f for a footnote number..
.\" .FS
.\" \*F <footnote text>
.\" .FE
.\"
.ds f \\u\\s-2\\n+f\\s+2\\d
.nr f 0 1
.ds F \\n+F.
.nr F 0 1

.ND
.LP
.TL
\fIsdbm\fP \(em Substitute DBM
.br
or
.br
Berkeley \fIndbm\fP for Every UN*X\** Made Simple
.AU
Ozan (oz) Yigit
.AI
The Guild of PD Software Toolmakers
Toronto - Canada
.sp
oz at nexus.yorku.ca
.LP
.FS
UN*X is not a trademark of any (dis)organization.
.FE
.sp 2
\fIImplementation is the sincerest form of flattery. \(em L. Peter Deutsch\fP
.SH
A The Clone of the \fIndbm\fP library
.PP
The sources accompanying this notice \(em \fIsdbm\fP \(em constitute
the first public release (Dec. 1990) of a complete clone of
the Berkeley UN*X \fIndbm\fP library. The \fIsdbm\fP library is meant to
clone the proven functionality of \fIndbm\fP as closely as possible,
including a few improvements. It is practical, easy to understand, and
compatible.
The \fIsdbm\fP library is not derived from any licensed, proprietary or
copyrighted software.
.PP
The \fIsdbm\fP implementation is based on a 1978 algorithm
[Lar78] by P.-A. (Paul) Larson known as ``Dynamic Hashing''.
In the course of searching for a substitute for \fIndbm\fP, I
prototyped three different external-hashing algorithms [Lar78, Fag79, Lit80]
and ultimately chose Larson's algorithm as a basis of the \fIsdbm\fP
implementation. The Bell Labs
\fIdbm\fP (and therefore \fIndbm\fP) is based on an algorithm invented by
Ken Thompson, [Tho90, Tor87] and predates Larson's work.
.PP
The \fIsdbm\fR programming interface is totally compatible
with \fIndbm\fP and includes a slight improvement in database initialization.
It is also expected to be binary-compatible under most UN*X versions that
support the \fIndbm\fP library.
.PP
The \fIsdbm\fP implementation shares the shortcomings of the \fIndbm\fP
library, as a side effect of various simplifications to the original Larson
algorithm. It does produce \fIholes\fP in the page file as it writes
pages past the end of file. (Larson's paper include a clever solution to
this problem that is a result of using the hash value directly as a block
address.) On the other hand, extensive tests seem to indicate that \fIsdbm\fP
creates fewer holes in general, and the resulting pagefiles are
smaller. The \fIsdbm\fP implementation is also faster than \fIndbm\fP
in database creation.
Unlike the \fIndbm\fP, the \fIsdbm\fP
.CW store
operation will not ``wander away'' trying to split its
data pages to insert a datum that \fIcannot\fP (due to elaborate worst-case
situations) be inserted. (It will fail after a pre-defined number of attempts.)
.SH
Important Compatibility Warning
.PP
The \fIsdbm\fP and \fIndbm\fP
libraries \fIcannot\fP share databases: one cannot read the (dir/pag)
database created by the other. This is due to the differences
between the \fIndbm\fP and \fIsdbm\fP algorithms\**, 
.FS
Torek's discussion [Tor87]
indicates that \fIdbm/ndbm\fP implementations use the hash
value to traverse the radix trie differently than \fIsdbm\fP
and as a result, the page indexes are generated in \fIdifferent\fP order.
For more information, send e-mail to the author.
.FE
and the hash functions
used.
It is easy to convert between the \fIdbm/ndbm\fP databases and \fIsdbm\fP
by ignoring the index completely: see
.CW dbd ,
.CW dbu
etc.
.R
.LP
.SH
Notice of Intellectual Property
.LP
\fIThe entire\fP sdbm  \fIlibrary package, as authored by me,\fP Ozan S. Yigit,
\fIis hereby placed in the public domain.\fP As such, the author is not
responsible for the consequences of use of this software, no matter how
awful, even if they arise from defects in it. There is no expressed or
implied warranty for the \fIsdbm\fP library.
.PP
Since the \fIsdbm\fP
library package is in the public domain, this \fIoriginal\fP
release or any additional public-domain releases of the modified original
cannot possibly (by definition) be withheld from you. Also by definition,
You (singular) have all the rights to this code (including the right to
sell without permission, the right to hoard\**
.FS
You cannot really hoard something that is available to the public at
large, but try if it makes you feel any better.
.FE
and the right to do other icky things as
you see fit) but those rights are also granted to everyone else.
.PP
Please note that all previous distributions of this software contained
a copyright (which is now dropped) to protect its
origins and its current public domain status against any possible claims
and/or challenges.
.SH
Acknowledgments
.PP
Many people have been very helpful and supportive.  A partial list would
necessarily include Rayan Zacherissen (who contributed the man page,
and also hacked a MMAP version of \fIsdbm\fP),
Arnold Robbins, Chris Lewis,
Bill Davidsen, Henry Spencer, Geoff Collyer, Rich Salz (who got me started
in the first place), Johannes Ruschein
(who did the minix port) and David Tilbrook. I thank you all.
.SH
Distribution Manifest and Notes
.LP
This distribution of \fIsdbm\fP includes (at least) the following:
.P1
	CHANGES		change log
	README		this file.
	biblio		a small bibliography on external hashing
	dba.c		a crude (n/s)dbm page file analyzer
	dbd.c		a crude (n/s)dbm page file dumper (for conversion)
	dbe.1		man page for dbe.c
	dbe.c		Janick's database editor
	dbm.c		a dbm library emulation wrapper for ndbm/sdbm
	dbm.h		header file for the above
	dbu.c		a crude db management utility
	hash.c		hashing function
	makefile	guess.
	pair.c		page-level routines (posted earlier)
	pair.h		header file for the above
	readme.ms	troff source for the README file
	sdbm.3		man page
	sdbm.c		the real thing
	sdbm.h		header file for the above
	tune.h		place for tuning & portability thingies
	util.c		miscellaneous
.P2
.PP
.CW dbu
is a simple database manipulation program\** that tries to look
.FS
The 
.CW dbd ,
.CW dba ,
.CW dbu
utilities are quick hacks and are not fit for production use. They were
developed late one night, just to test out \fIsdbm\fP, and convert some
databases.
.FE
like Bell Labs'
.CW cbt
utility. It is currently incomplete in functionality.
I use
.CW dbu
to test out the routines: it takes (from stdin) tab separated
key/value pairs for commands like
.CW build
or
.CW insert
or takes keys for
commands like
.CW delete
or
.CW look .
.P1
	dbu <build|creat|look|insert|cat|delete> dbmfile
.P2
.PP
.CW dba
is a crude analyzer of \fIdbm/sdbm/ndbm\fP
page files. It scans the entire
page file, reporting page level statistics, and totals at the end.
.PP
.CW dbd
is a crude dump program for \fIdbm/ndbm/sdbm\fP
databases. It ignores the
bitmap, and dumps the data pages in sequence. It can be used to create
input for the
.CW dbu 
utility.
Note that
.CW dbd
will skip any NULLs in the key and data
fields, thus is unsuitable to convert some peculiar databases that
insist in including the terminating null.
.PP
I have also included a copy of the
.CW dbe
(\fIndbm\fP DataBase Editor) by Janick Bergeron [janick at bnr.ca] for
your pleasure. You may find it more useful than the little
.CW dbu
utility.
.PP
.CW dbm.[ch]
is a \fIdbm\fP library emulation on top of \fIndbm\fP
(and hence suitable for \fIsdbm\fP). Written by Robert Elz.
.PP
The \fIsdbm\fP
library has been around in beta test for quite a long time, and from whatever
little feedback I received (maybe no news is good news), I believe it has been
functioning without any significant problems. I would, of course, appreciate
all fixes and/or improvements. Portability enhancements would especially be
useful.
.SH
Implementation Issues
.PP
Hash functions:
The algorithm behind \fIsdbm\fP implementation needs a good bit-scrambling
hash function to be effective. I ran into a set of constants for a simple
hash function that seem to help \fIsdbm\fP perform better than \fIndbm\fP
for various inputs:
.P1
	/*
	 * polynomial conversion ignoring overflows
	 * 65599 nice. 65587 even better.
	 */
	long
	dbm_hash(char *str, int len) {
		register unsigned long n = 0;

		while (len--)
			n = n * 65599 + *str++;
		return n;
	}
.P2
.PP
There may be better hash functions for the purposes of dynamic hashing.
Try your favorite, and check the pagefile. If it contains too many pages
with too many holes, (in relation to this one for example) or if
\fIsdbm\fP
simply stops working (fails after 
.CW SPLTMAX
attempts to split) when you feed your
NEWS 
.CW history
file to it, you probably do not have a good hashing function.
If you do better (for different types of input), I would like to know
about the function you use.
.PP
Block sizes: It seems (from various tests on a few machines) that a page
file block size
.CW PBLKSIZ
of 1024 is by far the best for performance, but
this also happens to limit the size of a key/value pair. Depending on your
needs, you may wish to increase the page size, and also adjust
.CW PAIRMAX
(the maximum size of a key/value pair allowed: should always be at least
three words smaller than
.CW PBLKSIZ .)
accordingly. The system-wide version of the library
should probably be
configured with 1024 (distribution default), as this appears to be sufficient
for most common uses of \fIsdbm\fP.
.SH
Portability
.PP
This package has been tested in many different UN*Xes even including minix,
and appears to be reasonably portable. This does not mean it will port
easily to non-UN*X systems.
.SH
Notes and Miscellaneous
.PP
The \fIsdbm\fP is not a very complicated package, at least not after you
familiarize yourself with the literature on external hashing. There are
other interesting algorithms in existence that ensure (approximately)
single-read access to a data value associated with any key. These are
directory-less schemes such as \fIlinear hashing\fP [Lit80] (+ Larson
variations), \fIspiral storage\fP [Mar79] or directory schemes such as
\fIextensible hashing\fP [Fag79] by Fagin et al. I do hope these sources
provide a reasonable playground for experimentation with other algorithms.
See the June 1988 issue of ACM Computing Surveys [Enb88] for an
excellent overview of the field. 
.PG
.SH
References
.LP
.IP [Lar78] 4m
P.-A. Larson,
``Dynamic Hashing'', \fIBIT\fP, vol.  18,  pp. 184-201, 1978.
.IP [Tho90] 4m
Ken Thompson, \fIprivate communication\fP, Nov. 1990
.IP [Lit80] 4m
W. Litwin,
`` Linear Hashing: A new tool  for  file  and table addressing'',
\fIProceedings of the 6th Conference on Very Large  Dabatases  (Montreal)\fP,
pp.  212-223,  Very Large Database Foundation, Saratoga, Calif., 1980.
.IP [Fag79] 4m
R. Fagin, J.  Nievergelt,  N.  Pippinger,  and  H.  R. Strong,
``Extendible Hashing - A Fast Access Method for Dynamic Files'',
\fIACM Trans. Database Syst.\fP, vol. 4,  no.3, pp. 315-344, Sept. 1979.
.IP [Wal84] 4m
Rich Wales,
``Discussion of "dbm" data base system'', \fIUSENET newsgroup unix.wizards\fP,
Jan. 1984.
.IP [Tor87] 4m
Chris Torek,
``Re:  dbm.a  and  ndbm.a  archives'', \fIUSENET newsgroup comp.unix\fP,
1987.
.IP [Mar79] 4m
G. N. Martin,
``Spiral Storage: Incrementally  Augmentable  Hash  Addressed  Storage'',
\fITechnical Report #27\fP, University of Varwick, Coventry, U.K., 1979.
.IP [Enb88] 4m
R. J. Enbody and H. C. Du,
``Dynamic Hashing  Schemes'',\fIACM Computing Surveys\fP,
vol. 20, no. 2, pp. 85-113, June 1988.

--- NEW FILE: dba.c ---
/*
 * dba	dbm analysis/recovery
 */

#include <stdio.h>
#include <sys/file.h>
#include "EXTERN.h"
#include "sdbm.h"

char *progname;
extern void oops();

int
main(int argc, char **argv)
{
	int n;
	char *p;
	char *name;
	int pagf;

	progname = argv[0];

	if (p = argv[1]) {
		name = (char *) malloc((n = strlen(p)) + 5);
		if (!name)
		    oops("cannot get memory");

		strcpy(name, p);
		strcpy(name + n, ".pag");

		if ((pagf = open(name, O_RDONLY)) < 0)
			oops("cannot open %s.", name);

		sdump(pagf);
	}
	else
		oops("usage: %s dbname", progname);

	return 0;
}

void
sdump(int pagf)
{
	register b;
	register n = 0;
	register t = 0;
	register o = 0;
	register e;
	char pag[PBLKSIZ];

	while ((b = read(pagf, pag, PBLKSIZ)) > 0) {
		printf("#%d: ", n);
		if (!okpage(pag))
			printf("bad\n");
		else {
			printf("ok. ");
			if (!(e = pagestat(pag)))
			    o++;
			else
			    t += e;
		}
		n++;
	}

	if (b == 0)
		printf("%d pages (%d holes):  %d entries\n", n, o, t);
	else
		oops("read failed: block %d", n);
}

int
pagestat(char *pag)
{
	register n;
	register free;
	register short *ino = (short *) pag;

	if (!(n = ino[0]))
		printf("no entries.\n");
	else {
		free = ino[n] - (n + 1) * sizeof(short);
		printf("%3d entries %2d%% used free %d.\n",
		       n / 2, ((PBLKSIZ - free) * 100) / PBLKSIZ, free);
	}
	return n / 2;
}

--- NEW FILE: biblio ---
%A R. J. Enbody
%A H. C. Du
%T Dynamic Hashing Schemes
%J ACM Computing Surveys
%V 20
%N 2
%D June 1988
%P 85-113
%K surveys

%A P.-A. Larson
%T Dynamic Hashing
%J BIT
%V 18
%P 184-201
%D 1978
%K dynamic

%A W. Litwin
%T Linear Hashing: A new tool for file and table addressing
%J Proceedings of the 6th Conference on Very Large Dabatases (Montreal)
%I Very Large Database Foundation
%C Saratoga, Calif.
%P 212-223
%D 1980
%K linear

%A R. Fagin
%A J. Nievergelt
%A N. Pippinger
%A H. R. Strong
%T Extendible Hashing - A Fast Access Method for Dynamic Files
%J ACM Trans. Database Syst.
%V 4
%N 3
%D Sept. 1979
%P 315-344
%K extend

%A G. N. Martin
%T Spiral Storage: Incrementally Augmentable Hash Addressed Storage
%J Technical Report #27
%I University of Varwick
%C Coventry, U.K.
%D 1979
%K spiral

%A Chris Torek
%T Re: dbm.a and ndbm.a archives
%B USENET newsgroup comp.unix
%D 1987
%K torek

%A Rich Wales
%T Discusson of "dbm" data base system
%B USENET newsgroup unix.wizards
%D Jan. 1984
%K rich

--- NEW FILE: COMPARE ---

Script started on Thu Sep 28 15:41:06 1989
% uname -a
titan titan 4_0 UMIPS mips
% make all x-dbm
        cc -O -DSDBM -DDUFF -DDUPERROR -DSPLITFAIL -c dbm.c
        cc -O -DSDBM -DDUFF -DDUPERROR -DSPLITFAIL -c sdbm.c
        cc -O -DSDBM -DDUFF -DDUPERROR -DSPLITFAIL -c pair.c
        cc -O -DSDBM -DDUFF -DDUPERROR -DSPLITFAIL -c hash.c
        ar cr libsdbm.a sdbm.o pair.o hash.o
        ranlib libsdbm.a
        cc  -o dbm dbm.o libsdbm.a
        cc -O -DSDBM -DDUFF -DDUPERROR -DSPLITFAIL -c dba.c
        cc  -o dba dba.o
        cc -O -DSDBM -DDUFF -DDUPERROR -DSPLITFAIL -c dbd.c
        cc  -o dbd dbd.o
        cc -O -DSDBM -DDUFF -DDUPERROR -DSPLITFAIL -o x-dbm dbm.o
% 
% 
% wc history
  65110 218344 3204883 history
% 
% /bin/time dbm build foo <history

real     5:56.9
user       13.3
sys        26.3
% ls -s
total 14251
   5 README           2 dbd.c            1 hash.c           1 pair.h
   0 SCRIPT           5 dbd.o            1 hash.o           5 pair.o
   1 WISHLIST        62 dbm           3130 history          1 port.h
  46 dba              5 dbm.c           11 howtodbm.txt    11 sdbm.c
   3 dba.c            8 dbm.o           14 libsdbm.a        2 sdbm.h
   6 dba.o            4 foo.dir          1 makefile         8 sdbm.o
  46 dbd           10810 foo.pag         6 pair.c          60 x-dbm
% ls -l foo.*
-rw-r--r--  1 oz           4096 Sep 28 15:48 foo.dir
-rw-r--r--  1 oz       11069440 Sep 28 15:48 foo.pag
% 
% /bin/time x-dbm build bar <history

real     5:59.4
user       24.7
sys        29.1
% 
% ls -s
total 27612
   5 README          46 dbd              1 hash.c           5 pair.o
   1 SCRIPT           2 dbd.c            1 hash.o           1 port.h
   1 WISHLIST         5 dbd.o         3130 history         11 sdbm.c
   4 bar.dir         62 dbm             11 howtodbm.txt     2 sdbm.h
13356 bar.pag         5 dbm.c           14 libsdbm.a        8 sdbm.o
  46 dba              8 dbm.o            1 makefile        60 x-dbm
   3 dba.c            4 foo.dir          6 pair.c
   6 dba.o         10810 foo.pag         1 pair.h
% 
% ls -l bar.*
-rw-r--r--  1 oz           4096 Sep 28 15:54 bar.dir
-rw-r--r--  1 oz       13676544 Sep 28 15:54 bar.pag
% 
% dba foo | tail
#10801: ok. no entries.
#10802: ok. no entries.
#10803: ok. no entries.
#10804: ok. no entries.
#10805: ok. no entries.
#10806: ok. no entries.
#10807: ok. no entries.
#10808: ok. no entries.
#10809: ok.  11 entries 67% used free 337.
10810 pages (6036 holes):  65073 entries
% 
% dba bar | tail
#13347: ok. no entries.
#13348: ok. no entries.
#13349: ok. no entries.
#13350: ok. no entries.
#13351: ok. no entries.
#13352: ok. no entries.
#13353: ok. no entries.
#13354: ok. no entries.
#13355: ok.   7 entries 33% used free 676.
13356 pages (8643 holes):  65073 entries
%
% exit
script done on Thu Sep 28 16:08:45 1989

--- NEW FILE: Makefile.PL ---
use ExtUtils::MakeMaker;

my $define = '-DSDBM -DDUFF';
$define .= ' -DWIN32 -DPERL_STATIC_SYMS' if ($^O eq 'MSWin32');

if ($^O eq 'VMS') {  # Old VAXC compiler can't handle Duff's device
    require Config;
    $define =~ s/\s+-DDUFF// if $Config::Config{'vms_cc_type'} eq 'vaxc';
}

WriteMakefile(
    NAME      => 'sdbm', # (doesn't matter what the name is here) oh yes it does
#    LINKTYPE  => 'static',
    DEFINE    => $define,
    INC       => '-I$(PERL_INC)', # force PERL_INC dir ahead of system -I's
    SKIP      => [qw(dynamic dynamic_lib dlsyms)],
    OBJECT    => '$(O_FILES)',
    clean     => {'FILES' => 'dbu libsdbm.a dbd dba dbe x-dbu *.dir *.pag'},
    H         => [qw(tune.h sdbm.h pair.h $(PERL_INC)/config.h)],
    C         => [qw(sdbm.c pair.c hash.c)]
);

sub MY::constants {
    package MY;
    my $self = shift;

    $self->{INST_STATIC} = 'libsdbm$(LIB_EXT)';

    return $self->SUPER::constants();
}

sub MY::top_targets {
    my $r = '
all :: static
	$(NOECHO) $(NOOP)

config ::
	$(NOECHO) $(NOOP)

lint:
	lint -abchx $(LIBSRCS)

';
    $r .= '
# This is a workaround, the problem is that our old GNU make exports
# variables into the environment so $(MYEXTLIB) is set in here to this
# value which can not be built.
sdbm/libsdbm.a:
	$(NOECHO) $(NOOP)
' unless $^O eq 'VMS';

    return $r;
}

--- NEW FILE: sdbm.h ---
/*
 * sdbm - ndbm work-alike hashed database library
 * based on Per-Ake Larson's Dynamic Hashing algorithms. BIT 18 (1978).
 * author: oz at nexus.yorku.ca
 * status: public domain. 
 */
#define DBLKSIZ 4096
#define PBLKSIZ 1024
#define PAIRMAX 1008			/* arbitrary on PBLKSIZ-N */
#define SPLTMAX	10			/* maximum allowed splits */
					/* for a single insertion */
#ifdef VMS
#define DIRFEXT	".sdbm_dir"
#else
#define DIRFEXT	".dir"
#endif
#define PAGFEXT	".pag"

typedef struct {
	int dirf;		       /* directory file descriptor */
	int pagf;		       /* page file descriptor */
	int flags;		       /* status/error flags, see below */
	long maxbno;		       /* size of dirfile in bits */
	long curbit;		       /* current bit number */
	long hmask;		       /* current hash mask */
	long blkptr;		       /* current block for nextkey */
	int keyptr;		       /* current key for nextkey */
	long blkno;		       /* current page to read/write */
	long pagbno;		       /* current page in pagbuf */
	char pagbuf[PBLKSIZ];	       /* page file block buffer */
	long dirbno;		       /* current block in dirbuf */
	char dirbuf[DBLKSIZ];	       /* directory file block buffer */
} DBM;

#define DBM_RDONLY	0x1	       /* data base open read-only */
#define DBM_IOERR	0x2	       /* data base I/O error */

/*
 * utility macros
 */
#define sdbm_rdonly(db)		((db)->flags & DBM_RDONLY)
#define sdbm_error(db)		((db)->flags & DBM_IOERR)

#define sdbm_clearerr(db)	((db)->flags &= ~DBM_IOERR)  /* ouch */

#define sdbm_dirfno(db)	((db)->dirf)
#define sdbm_pagfno(db)	((db)->pagf)

typedef struct {
	char *dptr;
	int dsize;
} datum;

EXTCONST datum nullitem
#ifdef DOINIT
                        = {0, 0}
#endif
                                   ;

#if defined(__STDC__) || defined(__cplusplus) || defined(CAN_PROTOTYPE)
#define proto(p) p
#else
#define proto(p) ()
#endif

/*
 * flags to sdbm_store
 */
#define DBM_INSERT	0
#define DBM_REPLACE	1

/*
 * ndbm interface
 */
extern DBM *sdbm_open proto((char *, int, int));
extern void sdbm_close proto((DBM *));
extern datum sdbm_fetch proto((DBM *, datum));
extern int sdbm_delete proto((DBM *, datum));
extern int sdbm_store proto((DBM *, datum, datum, int));
extern datum sdbm_firstkey proto((DBM *));
extern datum sdbm_nextkey proto((DBM *));
extern int sdbm_exists proto((DBM *, datum));

/*
 * other
 */
extern DBM *sdbm_prep proto((char *, char *, int, int));
extern long sdbm_hash proto((char *, int));

#ifndef SDBM_ONLY
#define dbm_open sdbm_open
#define dbm_close sdbm_close
#define dbm_fetch sdbm_fetch
#define dbm_store sdbm_store
#define dbm_delete sdbm_delete
#define dbm_firstkey sdbm_firstkey
#define dbm_nextkey sdbm_nextkey
#define dbm_error sdbm_error
#define dbm_clearerr sdbm_clearerr
#endif

/* Most of the following is stolen from perl.h.  We don't include
   perl.h here because we just want the portability parts of perl.h,
   not everything else.
*/
#ifndef H_PERL  /* Include guard */
#include "embed.h"  /* Follow all the global renamings. */

/*
 * The following contortions are brought to you on behalf of all the
 * standards, semi-standards, de facto standards, not-so-de-facto standards
 * of the world, as well as all the other botches anyone ever thought of.
 * The basic theory is that if we work hard enough here, the rest of the
 * code can be a lot prettier.  Well, so much for theory.  Sorry, Henry...
 */

#include <errno.h>
#ifdef HAS_SOCKET
#   ifdef I_NET_ERRNO
#     include <net/errno.h>
#   endif
#endif

#if defined(__STDC__) || defined(_AIX) || defined(__stdc__) || defined(__cplusplus)
# define STANDARD_C 1
#endif

#include <stdio.h>
#include <ctype.h>
#include <setjmp.h>

#if defined(I_UNISTD)
#include <unistd.h>
#endif

#ifdef VMS
#  include <file.h>
#  include <unixio.h>
#endif

#ifdef I_SYS_PARAM
#   if !defined(MSDOS) && !defined(WIN32) && !defined(VMS)
#       ifdef PARAM_NEEDS_TYPES
#	    include <sys/types.h>
#       endif
#       include <sys/param.h>
#   endif
#endif

#ifndef _TYPES_		/* If types.h defines this it's easy. */
#   ifndef major		/* Does everyone's types.h define this? */
#	include <sys/types.h>
#   endif
#endif

#include <sys/stat.h>

#ifndef SEEK_SET
# ifdef L_SET
#  define SEEK_SET	L_SET
# else
#  define SEEK_SET	0  /* Wild guess. */
# endif
#endif

/* Use all the "standard" definitions? */
#if defined(STANDARD_C) && defined(I_STDLIB)
#   include <stdlib.h>
#endif /* STANDARD_C */

#define MEM_SIZE Size_t

/* This comes after <stdlib.h> so we don't try to change the standard
 * library prototypes; we'll use our own instead. */

#if defined(MYMALLOC) && !defined(PERL_POLLUTE_MALLOC)
#  define malloc  Perl_malloc
#  define calloc  Perl_calloc
#  define realloc Perl_realloc
#  define free    Perl_mfree

Malloc_t Perl_malloc proto((MEM_SIZE nbytes));
Malloc_t Perl_calloc proto((MEM_SIZE elements, MEM_SIZE size));
Malloc_t Perl_realloc proto((Malloc_t where, MEM_SIZE nbytes));
Free_t   Perl_mfree proto((Malloc_t where));
#endif /* MYMALLOC */

#ifdef I_STRING
# ifndef __ultrix__
#  include <string.h>
# endif
#else
# include <strings.h>
#endif

#ifdef I_MEMORY
#include <memory.h>
#endif      

#ifdef __cplusplus
#define HAS_MEMCPY
#endif

#ifdef HAS_MEMCPY
#  if !defined(STANDARD_C) && !defined(I_STRING) && !defined(I_MEMORY)
#    ifndef memcpy
        extern char * memcpy proto((char*, char*, int));
#    endif
#  endif
#else
#   ifndef memcpy
#	ifdef HAS_BCOPY
#	    define memcpy(d,s,l) bcopy(s,d,l)
#	else
#	    define memcpy(d,s,l) my_bcopy(s,d,l)
#	endif
#   endif
#endif /* HAS_MEMCPY */

#ifdef HAS_MEMSET
#  if !defined(STANDARD_C) && !defined(I_STRING) && !defined(I_MEMORY)
#    ifndef memset
	extern char *memset proto((char*, int, int));
#    endif
#  endif
#  define memzero(d,l) memset(d,0,l)
#else
#   ifndef memzero
#	ifdef HAS_BZERO
#	    define memzero(d,l) bzero(d,l)
#	else
#	    define memzero(d,l) my_bzero(d,l)
#	endif
#   endif
#endif /* HAS_MEMSET */

#if defined(mips) && defined(ultrix) && !defined(__STDC__)
#   undef HAS_MEMCMP
#endif

#if defined(HAS_MEMCMP) && defined(HAS_SANE_MEMCMP)
#  if !defined(STANDARD_C) && !defined(I_STRING) && !defined(I_MEMORY)
#    ifndef memcmp
	extern int memcmp proto((char*, char*, int));
#    endif
#  endif
#  ifdef BUGGY_MSC
#    pragma function(memcmp)
#  endif
#else
#   ifndef memcmp
	/* maybe we should have included the full embedding header... */
#	ifdef NO_EMBED
#	    define memcmp my_memcmp
#	else
#	    define memcmp Perl_my_memcmp
#	endif
#ifndef __cplusplus
	extern int memcmp proto((char*, char*, int));
#endif
#   endif
#endif /* HAS_MEMCMP */

#ifndef HAS_BCMP
#   ifndef bcmp
#	define bcmp(s1,s2,l) memcmp(s1,s2,l)
#   endif
#endif /* !HAS_BCMP */

#ifdef HAS_MEMCMP
#  define memNE(s1,s2,l) (memcmp(s1,s2,l))
#  define memEQ(s1,s2,l) (!memcmp(s1,s2,l))
#else
#  define memNE(s1,s2,l) (bcmp(s1,s2,l))
#  define memEQ(s1,s2,l) (!bcmp(s1,s2,l))
#endif

#ifdef I_NETINET_IN
#  ifdef VMS
#    include <in.h>
#  else
#    include <netinet/in.h>
#  endif
#endif

#endif /* Include guard */

--- NEW FILE: dbd.c ---
/*
 * dbd - dump a dbm data file
 */

#include <stdio.h>
#include <sys/file.h>
#include "EXTERN.h"
#include "sdbm.h"

char *progname;
extern void oops();

#define empty(page)	(((short *) page)[0] == 0)

int
main(int argc, char **argv)
{
	int n;
	char *p;
	char *name;
	int pagf;

	progname = argv[0];

	if (p = argv[1]) {
		name = (char *) malloc((n = strlen(p)) + 5);
		if (!name)
		    oops("cannot get memory");

		strcpy(name, p);
		strcpy(name + n, ".pag");

		if ((pagf = open(name, O_RDONLY)) < 0)
			oops("cannot open %s.", name);

		sdump(pagf);
	}
	else
		oops("usage: %s dbname", progname);
	return 0;
}

void
sdump(int pagf)
{
	register r;
	register n = 0;
	register o = 0;
	char pag[PBLKSIZ];

	while ((r = read(pagf, pag, PBLKSIZ)) > 0) {
		if (!okpage(pag))
			fprintf(stderr, "%d: bad page.\n", n);
		else if (empty(pag))
			o++;
		else
			dispage(pag);
		n++;
	}

	if (r == 0)
		fprintf(stderr, "%d pages (%d holes).\n", n, o);
	else
		oops("read failed: block %d", n);
}

#ifdef OLD
int
dispage(char *pag)
{
	register i, n;
	register off;
	register short *ino = (short *) pag;

	off = PBLKSIZ;
	for (i = 1; i < ino[0]; i += 2) {
		printf("\t[%d]: ", ino[i]);
		for (n = ino[i]; n < off; n++)
			putchar(pag[n]);
		putchar(' ');
		off = ino[i];
		printf("[%d]: ", ino[i + 1]);
		for (n = ino[i + 1]; n < off; n++)
			putchar(pag[n]);
		off = ino[i + 1];
		putchar('\n');
	}
}
#else
void
dispage(char *pag)
{
	register i, n;
	register off;
	register short *ino = (short *) pag;

	off = PBLKSIZ;
	for (i = 1; i < ino[0]; i += 2) {
		for (n = ino[i]; n < off; n++)
			if (pag[n] != 0)
				putchar(pag[n]);
		putchar('\t');
		off = ino[i];
		for (n = ino[i + 1]; n < off; n++)
			if (pag[n] != 0)
				putchar(pag[n]);
		putchar('\n');
		off = ino[i + 1];
	}
}
#endif

--- NEW FILE: sdbm.c ---
/*
 * sdbm - ndbm work-alike hashed database library
 * based on Per-Aake Larson's Dynamic Hashing algorithms. BIT 18 (1978).
 * author: oz at nexus.yorku.ca
 * status: public domain.
 *
 * core routines
 */

#include "INTERN.h"
#include "config.h"
#ifdef WIN32
#include "io.h"
#endif
#include "sdbm.h"
#include "tune.h"
#include "pair.h"

#ifdef I_FCNTL
# include <fcntl.h>
#endif
#ifdef I_SYS_FILE
# include <sys/file.h>
#endif

#ifdef I_STRING
# ifndef __ultrix__
#  include <string.h>
# endif
#else
# include <strings.h>
#endif

/*
 * externals
 */

#include <errno.h> /* See notes in perl.h about avoiding
			extern int errno; */

extern Malloc_t malloc proto((MEM_SIZE));
extern Free_t free proto((Malloc_t));

/*
 * forward
 */
static int getdbit proto((DBM *, long));
static int setdbit proto((DBM *, long));
static int getpage proto((DBM *, long));
static datum getnext proto((DBM *));
static int makroom proto((DBM *, long, int));

/*
 * useful macros
 */
#define bad(x)		((x).dptr == NULL || (x).dsize < 0)
#define exhash(item)	sdbm_hash((item).dptr, (item).dsize)
#define ioerr(db)	((db)->flags |= DBM_IOERR)

#define OFF_PAG(off)	(long) (off) * PBLKSIZ
#define OFF_DIR(off)	(long) (off) * DBLKSIZ

static long masks[] = {
	000000000000, 000000000001, 000000000003, 000000000007,
	000000000017, 000000000037, 000000000077, 000000000177,
	000000000377, 000000000777, 000000001777, 000000003777,
	000000007777, 000000017777, 000000037777, 000000077777,
	000000177777, 000000377777, 000000777777, 000001777777,
	000003777777, 000007777777, 000017777777, 000037777777,
	000077777777, 000177777777, 000377777777, 000777777777,
	001777777777, 003777777777, 007777777777, 017777777777
};

DBM *
sdbm_open(register char *file, register int flags, register int mode)
{
	register DBM *db;
	register char *dirname;
	register char *pagname;
	register int n;

	if (file == NULL || !*file)
		return errno = EINVAL, (DBM *) NULL;
/*
 * need space for two seperate filenames
 */
	n = strlen(file) * 2 + strlen(DIRFEXT) + strlen(PAGFEXT) + 2;

	if ((dirname = (char *) malloc((unsigned) n)) == NULL)
		return errno = ENOMEM, (DBM *) NULL;
/*
 * build the file names
 */
	dirname = strcat(strcpy(dirname, file), DIRFEXT);
	pagname = strcpy(dirname + strlen(dirname) + 1, file);
	pagname = strcat(pagname, PAGFEXT);

	db = sdbm_prep(dirname, pagname, flags, mode);
	free((char *) dirname);
	return db;
}

DBM *
sdbm_prep(char *dirname, char *pagname, int flags, int mode)
{
	register DBM *db;
	struct stat dstat;

	if ((db = (DBM *) malloc(sizeof(DBM))) == NULL)
		return errno = ENOMEM, (DBM *) NULL;

        db->flags = 0;
        db->hmask = 0;
        db->blkptr = 0;
        db->keyptr = 0;
/*
 * adjust user flags so that WRONLY becomes RDWR, 
 * as required by this package. Also set our internal
 * flag for RDONLY if needed.
 */
	if (flags & O_WRONLY)
		flags = (flags & ~O_WRONLY) | O_RDWR;

	else if ((flags & 03) == O_RDONLY)
		db->flags = DBM_RDONLY;
/*
 * open the files in sequence, and stat the dirfile.
 * If we fail anywhere, undo everything, return NULL.
 */
#if defined(OS2) || defined(MSDOS) || defined(WIN32) || defined(__CYGWIN__)
	flags |= O_BINARY;
#	endif
	if ((db->pagf = open(pagname, flags, mode)) > -1) {
		if ((db->dirf = open(dirname, flags, mode)) > -1) {
/*
 * need the dirfile size to establish max bit number.
 */
			if (fstat(db->dirf, &dstat) == 0) {
/*
 * zero size: either a fresh database, or one with a single,
 * unsplit data page: dirpage is all zeros.
 */
				db->dirbno = (!dstat.st_size) ? 0 : -1;
				db->pagbno = -1;
				db->maxbno = dstat.st_size * BYTESIZ;

				(void) memset(db->pagbuf, 0, PBLKSIZ);
				(void) memset(db->dirbuf, 0, DBLKSIZ);
			/*
			 * success
			 */
				return db;
			}
			(void) close(db->dirf);
		}
		(void) close(db->pagf);
	}
	free((char *) db);
	return (DBM *) NULL;
}

void
sdbm_close(register DBM *db)
{
	if (db == NULL)
		errno = EINVAL;
	else {
		(void) close(db->dirf);
		(void) close(db->pagf);
		free((char *) db);
	}
}

datum
sdbm_fetch(register DBM *db, datum key)
{
	if (db == NULL || bad(key))
		return errno = EINVAL, nullitem;

	if (getpage(db, exhash(key)))
		return getpair(db->pagbuf, key);

	return ioerr(db), nullitem;
}

int
sdbm_exists(register DBM *db, datum key)
{
	if (db == NULL || bad(key))
		return errno = EINVAL, -1;

	if (getpage(db, exhash(key)))
		return exipair(db->pagbuf, key);

	return ioerr(db), -1;
}

int
sdbm_delete(register DBM *db, datum key)
{
	if (db == NULL || bad(key))
		return errno = EINVAL, -1;
	if (sdbm_rdonly(db))
		return errno = EPERM, -1;

	if (getpage(db, exhash(key))) {
		if (!delpair(db->pagbuf, key))
			return -1;
/*
 * update the page file
 */
		if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0
		    || write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
			return ioerr(db), -1;

		return 0;
	}

	return ioerr(db), -1;
}

int
sdbm_store(register DBM *db, datum key, datum val, int flags)
{
	int need;
	register long hash;

	if (db == NULL || bad(key))
		return errno = EINVAL, -1;
	if (sdbm_rdonly(db))
		return errno = EPERM, -1;

	need = key.dsize + val.dsize;
/*
 * is the pair too big (or too small) for this database ??
 */
	if (need < 0 || need > PAIRMAX)
		return errno = EINVAL, -1;

	if (getpage(db, (hash = exhash(key)))) {
/*
 * if we need to replace, delete the key/data pair
 * first. If it is not there, ignore.
 */
		if (flags == DBM_REPLACE)
			(void) delpair(db->pagbuf, key);
#ifdef SEEDUPS
		else if (duppair(db->pagbuf, key))
			return 1;
#endif
/*
 * if we do not have enough room, we have to split.
 */
		if (!fitpair(db->pagbuf, need))
			if (!makroom(db, hash, need))
				return ioerr(db), -1;
/*
 * we have enough room or split is successful. insert the key,
 * and update the page file.
 */
		(void) putpair(db->pagbuf, key, val);

		if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0
		    || write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
			return ioerr(db), -1;
	/*
	 * success
	 */
		return 0;
	}

	return ioerr(db), -1;
}

/*
 * makroom - make room by splitting the overfull page
 * this routine will attempt to make room for SPLTMAX times before
 * giving up.
 */
static int
makroom(register DBM *db, long int hash, int need)
{
	long newp;
	char twin[PBLKSIZ];
#if defined(DOSISH) || defined(WIN32)
	char zer[PBLKSIZ];
	long oldtail;
#endif
	char *pag = db->pagbuf;
	char *New = twin;
	register int smax = SPLTMAX;

	do {
/*
 * split the current page
 */
		(void) splpage(pag, New, db->hmask + 1);
/*
 * address of the new page
 */
		newp = (hash & db->hmask) | (db->hmask + 1);

/*
 * write delay, read avoidence/cache shuffle:
 * select the page for incoming pair: if key is to go to the new page,
 * write out the previous one, and copy the new one over, thus making
 * it the current page. If not, simply write the new page, and we are
 * still looking at the page of interest. current page is not updated
 * here, as sdbm_store will do so, after it inserts the incoming pair.
 */

#if defined(DOSISH) || defined(WIN32)
		/*
		 * Fill hole with 0 if made it.
		 * (hole is NOT read as 0)
		 */
		oldtail = lseek(db->pagf, 0L, SEEK_END);
		memset(zer, 0, PBLKSIZ);
		while (OFF_PAG(newp) > oldtail) {
			if (lseek(db->pagf, 0L, SEEK_END) < 0 ||
			    write(db->pagf, zer, PBLKSIZ) < 0) {

				return 0;
			}
			oldtail += PBLKSIZ;
		}
#endif
		if (hash & (db->hmask + 1)) {
			if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0
			    || write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
				return 0;
			db->pagbno = newp;
			(void) memcpy(pag, New, PBLKSIZ);
		}
		else if (lseek(db->pagf, OFF_PAG(newp), SEEK_SET) < 0
			 || write(db->pagf, New, PBLKSIZ) < 0)
			return 0;

		if (!setdbit(db, db->curbit))
			return 0;
/*
 * see if we have enough room now
 */
		if (fitpair(pag, need))
			return 1;
/*
 * try again... update curbit and hmask as getpage would have
 * done. because of our update of the current page, we do not
 * need to read in anything. BUT we have to write the current
 * [deferred] page out, as the window of failure is too great.
 */
		db->curbit = 2 * db->curbit +
			((hash & (db->hmask + 1)) ? 2 : 1);
		db->hmask |= db->hmask + 1;

		if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0
		    || write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
			return 0;

	} while (--smax);
/*
 * if we are here, this is real bad news. After SPLTMAX splits,
 * we still cannot fit the key. say goodnight.
 */
#ifdef BADMESS
	(void) write(2, "sdbm: cannot insert after SPLTMAX attempts.\n", 44);
#endif
	return 0;

}

/*
 * the following two routines will break if
 * deletions aren't taken into account. (ndbm bug)
 */
datum
sdbm_firstkey(register DBM *db)
{
	if (db == NULL)
		return errno = EINVAL, nullitem;
/*
 * start at page 0
 */
	if (lseek(db->pagf, OFF_PAG(0), SEEK_SET) < 0
	    || read(db->pagf, db->pagbuf, PBLKSIZ) < 0)
		return ioerr(db), nullitem;
	db->pagbno = 0;
	db->blkptr = 0;
	db->keyptr = 0;

	return getnext(db);
}

datum
sdbm_nextkey(register DBM *db)
{
	if (db == NULL)
		return errno = EINVAL, nullitem;
	return getnext(db);
}

/*
 * all important binary trie traversal
 */
static int
getpage(register DBM *db, register long int hash)
{
	register int hbit;
	register long dbit;
	register long pagb;

	dbit = 0;
	hbit = 0;
	while (dbit < db->maxbno && getdbit(db, dbit))
		dbit = 2 * dbit + ((hash & (1 << hbit++)) ? 2 : 1);

	debug(("dbit: %d...", dbit));

	db->curbit = dbit;
	db->hmask = masks[hbit];

	pagb = hash & db->hmask;
/*
 * see if the block we need is already in memory.
 * note: this lookaside cache has about 10% hit rate.
 */
	if (pagb != db->pagbno) { 
/*
 * note: here, we assume a "hole" is read as 0s.
 * if not, must zero pagbuf first.
 */
		if (lseek(db->pagf, OFF_PAG(pagb), SEEK_SET) < 0
		    || read(db->pagf, db->pagbuf, PBLKSIZ) < 0)
			return 0;
		if (!chkpage(db->pagbuf))
			return 0;
		db->pagbno = pagb;

		debug(("pag read: %d\n", pagb));
	}
	return 1;
}

static int
getdbit(register DBM *db, register long int dbit)
{
	register long c;
	register long dirb;

	c = dbit / BYTESIZ;
	dirb = c / DBLKSIZ;

	if (dirb != db->dirbno) {
		int got;
		if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0
		    || (got=read(db->dirf, db->dirbuf, DBLKSIZ)) < 0)
			return 0;
		if (got==0) 
			memset(db->dirbuf,0,DBLKSIZ);
		db->dirbno = dirb;

		debug(("dir read: %d\n", dirb));
	}

	return db->dirbuf[c % DBLKSIZ] & (1 << dbit % BYTESIZ);
}

static int
setdbit(register DBM *db, register long int dbit)
{
	register long c;
	register long dirb;

	c = dbit / BYTESIZ;
	dirb = c / DBLKSIZ;

	if (dirb != db->dirbno) {
		int got;
		if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0
		    || (got=read(db->dirf, db->dirbuf, DBLKSIZ)) < 0)
			return 0;
		if (got==0) 
			memset(db->dirbuf,0,DBLKSIZ);
		db->dirbno = dirb;

		debug(("dir read: %d\n", dirb));
	}

	db->dirbuf[c % DBLKSIZ] |= (1 << dbit % BYTESIZ);

#if 0
	if (dbit >= db->maxbno)
		db->maxbno += DBLKSIZ * BYTESIZ;
#else
	if (OFF_DIR((dirb+1))*BYTESIZ > db->maxbno) 
		db->maxbno=OFF_DIR((dirb+1))*BYTESIZ;
#endif

	if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0
	    || write(db->dirf, db->dirbuf, DBLKSIZ) < 0)
		return 0;

	return 1;
}

/*
 * getnext - get the next key in the page, and if done with
 * the page, try the next page in sequence
 */
static datum
getnext(register DBM *db)
{
	datum key;

	for (;;) {
		db->keyptr++;
		key = getnkey(db->pagbuf, db->keyptr);
		if (key.dptr != NULL)
			return key;
/*
 * we either run out, or there is nothing on this page..
 * try the next one... If we lost our position on the
 * file, we will have to seek.
 */
		db->keyptr = 0;
		if (db->pagbno != db->blkptr++)
			if (lseek(db->pagf, OFF_PAG(db->blkptr), SEEK_SET) < 0)
				break;
		db->pagbno = db->blkptr;
		if (read(db->pagf, db->pagbuf, PBLKSIZ) <= 0)
			break;
		if (!chkpage(db->pagbuf))
			break;
	}

	return ioerr(db), nullitem;
}

--- NEW FILE: grind ---
#!/bin/sh
rm -f /tmp/*.dir /tmp/*.pag
awk -e '{
        printf "%s\t", $0
        for (i = 0; i < 40; i++)
                printf "%s.", $0
        printf "\n"
}' < /usr/dict/words | $1 build /tmp/$2