dslinux/user/perl/lib/Text Abbrev.pm Abbrev.t Balanced.pm ParseWords.pm ParseWords.t Soundex.pm Soundex.t Tabs.pm Wrap.pm

cayenne dslinux_cayenne at user.in-berlin.de
Mon Dec 4 18:01:11 CET 2006


Update of /cvsroot/dslinux/dslinux/user/perl/lib/Text
In directory antilope:/tmp/cvs-serv17422/lib/Text

Added Files:
	Abbrev.pm Abbrev.t Balanced.pm ParseWords.pm ParseWords.t 
	Soundex.pm Soundex.t Tabs.pm Wrap.pm 
Log Message:
Adding fresh perl source to HEAD to branch from

--- NEW FILE: Tabs.pm ---

package Text::Tabs;

require Exporter;

@ISA = (Exporter);
@EXPORT = qw(expand unexpand $tabstop);

use vars qw($VERSION $tabstop $debug);
$VERSION = 2005.0824;

use strict;

BEGIN	{
	$tabstop = 8;
	$debug = 0;
}

sub expand {
	my @l;
	my $pad;
	for ( @_ ) {
		my $s = '';
		for (split(/^/m, $_, -1)) {
			my $offs = 0;
			s{\t}{
				$pad = $tabstop - (pos() + $offs) % $tabstop;
				$offs += $pad - 1;
				" " x $pad;
			}eg;
			$s .= $_;
		}
		push(@l, $s);
	}
	return @l if wantarray;
	return $l[0];
}

sub unexpand
{
	my (@l) = @_;
	my @e;
	my $x;
	my $line;
	my @lines;
	my $lastbit;
	for $x (@l) {
		@lines = split("\n", $x, -1);
		for $line (@lines) {
			$line = expand($line);
			@e = split(/(.{$tabstop})/,$line,-1);
			$lastbit = pop(@e);
			$lastbit = '' unless defined $lastbit;
			$lastbit = "\t"
				if $lastbit eq " "x$tabstop;
			for $_ (@e) {
				if ($debug) {
					my $x = $_;
					$x =~ s/\t/^I\t/gs;
					print "sub on '$x'\n";
				}
				s/  +$/\t/;
			}
			$line = join('', at e, $lastbit);
		}
		$x = join("\n", @lines);
	}
	return @l if wantarray;
	return $l[0];
}

1;
__END__

sub expand
{
	my (@l) = @_;
	for $_ (@l) {
		1 while s/(^|\n)([^\t\n]*)(\t+)/
			$1. $2 . (" " x 
				($tabstop * length($3)
				- (length($2) % $tabstop)))
			/sex;
	}
	return @l if wantarray;
	return $l[0];
}


=head1 NAME

Text::Tabs -- expand and unexpand tabs per the unix expand(1) and unexpand(1)

=head1 SYNOPSIS

  use Text::Tabs;

  $tabstop = 4;
  @lines_without_tabs = expand(@lines_with_tabs);
  @lines_with_tabs = unexpand(@lines_without_tabs);

=head1 DESCRIPTION

Text::Tabs does about what the unix utilities expand(1) and unexpand(1)
do.  Given a line with tabs in it, expand will replace the tabs with
the appropriate number of spaces.  Given a line with or without tabs in
it, unexpand will add tabs when it can save bytes by doing so.  Invisible
compression with plain ascii!

=head1 BUGS

expand doesn't handle newlines very quickly -- do not feed it an
entire document in one string.  Instead feed it an array of lines.

=head1 LICENSE

Copyright (C) 1996-2002,2005 David Muir Sharnoff.  
Copyright (C) 2005 Aristotle Pagaltzis 
This module may be modified, used, copied, and redistributed at your own risk.
Publicly redistributed modified versions must use a different name.


--- NEW FILE: ParseWords.t ---
#!./perl

BEGIN {
    chdir 't' if -d 't';
    @INC = '../lib';
}

use warnings;
use Text::ParseWords;

print "1..22\n";

@words = shellwords(qq(foo "bar quiz" zoo));
print "not " if $words[0] ne 'foo';
print "ok 1\n";
print "not " if $words[1] ne 'bar quiz';
print "ok 2\n";
print "not " if $words[2] ne 'zoo';
print "ok 3\n";

{
  # Gonna get some undefined things back
  no warnings 'uninitialized' ;

  # Test quotewords() with other parameters and null last field
  @words = quotewords(':+', 1, 'foo:::"bar:foo":zoo zoo:');
  print "not " unless join(";", @words) eq qq(foo;"bar:foo";zoo zoo;);
  print "ok 4\n";
}

# Test $keep eq 'delimiters' and last field zero
@words = quotewords('\s+', 'delimiters', '4 3 2 1 0');
print "not " unless join(";", @words) eq qq(4; ;3; ;2; ;1; ;0);
print "ok 5\n";

# Big ol' nasty test (thanks, Joerk!)
$string = 'aaaa"bbbbb" cc\\ cc \\\\\\"dddd" eee\\\\\\"ffff" "gg"';

# First with $keep == 1
$result = join('|', parse_line('\s+', 1, $string));
print "not " unless $result eq 'aaaa"bbbbb"|cc\\ cc|\\\\\\"dddd" eee\\\\\\"ffff"|"gg"';
print "ok 6\n";

# Now, $keep == 0
$result = join('|', parse_line('\s+', 0, $string));
print "not " unless $result eq 'aaaabbbbb|cc cc|\\"dddd eee\\"ffff|gg';
print "ok 7\n";

# Now test single quote behavior
$string = 'aaaa"bbbbb" cc\\ cc \\\\\\"dddd\' eee\\\\\\"ffff\' gg';
$result = join('|', parse_line('\s+', 0, $string));
print "not " unless $result eq 'aaaabbbbb|cc cc|\\"dddd eee\\\\\\"ffff|gg';
print "ok 8\n";

# Make sure @nested_quotewords does the right thing
@lists = nested_quotewords('\s+', 0, 'a b c', '1 2 3', 'x y z');
print "not " unless (@lists == 3 && @{$lists[0]} == 3 && @{$lists[1]} == 3 && @{$lists[2]} == 3);
print "ok 9\n";

# Now test error return
$string = 'foo bar baz"bach blech boop';

@words = shellwords($string);
print "not " if (@words);
print "ok 10\n";

@words = parse_line('s+', 0, $string);
print "not " if (@words);
print "ok 11\n";

@words = quotewords('s+', 0, $string);
print "not " if (@words);
print "ok 12\n";

{
  # Gonna get some more undefined things back
  no warnings 'uninitialized' ;

  @words = nested_quotewords('s+', 0, $string);
  print "not " if (@words);
  print "ok 13\n";

  # Now test empty fields
  $result = join('|', parse_line(':', 0, 'foo::0:"":::'));
  print "not " unless ($result eq 'foo||0||||');
  print "ok 14\n";

  # Test for 0 in quotes without $keep
  $result = join('|', parse_line(':', 0, ':"0":'));
  print "not " unless ($result eq '|0|');
  print "ok 15\n";

  # Test for \001 in quoted string
  $result = join('|', parse_line(':', 0, ':"' . "\001" . '":'));
  print "not " unless ($result eq "|\1|");
  print "ok 16\n";

}

# Now test perlish single quote behavior
$Text::ParseWords::PERL_SINGLE_QUOTE = 1;
$string = 'aaaa"bbbbb" cc\ cc \\\\\"dddd\' eee\\\\\"\\\'ffff\' gg';
$result = join('|', parse_line('\s+', 0, $string));
print "not " unless $result eq 'aaaabbbbb|cc cc|\"dddd eee\\\\"\'ffff|gg';
print "ok 17\n";

# test whitespace in the delimiters
@words = quotewords(' ', 1, '4 3 2 1 0');
print "not " unless join(";", @words) eq qq(4;3;2;1;0);
print "ok 18\n";

# [perl #30442] Text::ParseWords does not handle backslashed newline inside quoted text
$string = qq{"field1"	"field2\\\nstill field2"	"field3"};

$result = join('|', parse_line("\t", 1, $string));
print "not " unless $result eq qq{"field1"|"field2\\\nstill field2"|"field3"};
print "ok 19\n";

$result = join('|', parse_line("\t", 0, $string));
print "not " unless $result eq "field1|field2\nstill field2|field3";
print "ok 20\n";

# unicode
$string = qq{"field1"\x{1234}"field2\\\x{1234}still field2"\x{1234}"field3"};
$result = join('|', parse_line("\x{1234}", 0, $string));
print "not " unless $result eq "field1|field2\x{1234}still field2|field3";
print "ok 21\n";

# missing quote after matching regex used to hang after change #22997
"1234" =~ /(1)(2)(3)(4)/;
$string = qq{"missing quote};
$result = join('|', shellwords($string));
print "not " unless $result eq "";
print "ok 22\n";

--- NEW FILE: Soundex.t ---
#!./perl
#
# $Id: Soundex.t,v 1.1 2006-12-04 17:01:09 dslinux_cayenne Exp $
#
# test module for soundex.pl
#
# $Log: Soundex.t,v $
# Revision 1.1  2006-12-04 17:01:09  dslinux_cayenne
# Adding fresh perl source to HEAD to branch from
#
# Revision 1.2  1994/03/24  00:30:27  mike
# Subtle bug (any excuse :-) spotted by Rich Pinder <rpinder at hsc.usc.edu>
# in the way I handles leasing characters which were different but had
# the same soundex code.  This showed up comparing it with Oracle's
# soundex output.
#
# Revision 1.1  1994/03/02  13:03:02  mike
# Initial revision
#
#

BEGIN {
    chdir 't' if -d 't';
    @INC = '../lib';
}

use Text::Soundex;

$test = 0;
print "1..13\n";

while (<DATA>)
{
  chop;
  next if /^\s*;?#/;
  next if /^\s*$/;

  ++$test;
  $bad = 0;

  if (/^eval\s+/)
  {
    ($try = $_) =~ s/^eval\s+//;

    eval ($try);
    if ($@)
    {
      $bad++;
      print "not ok $test\n";
      print "# eval '$try' returned $@";
    }
  }
  elsif (/^\(/)
  {
    ($in, $out) = split (':');

    $try = "\@expect = $out; \@got = &soundex $in;";
    eval ($try);

    if (@expect != @got)
    {
      $bad++;
      print "not ok $test\n";
      print "# expected ", scalar @expect, " results, got ", scalar @got, "\n";
      print "# expected (", join (', ', @expect),
	    ") got (", join (', ', @got), ")\n";
    }
    else
    {
      while (@got)
      {
	$expect = shift @expect;
	$got = shift @got;

	if ($expect ne $got)
	{
	  $bad++;
	  print "not ok $test\n";
	  print "# expected $expect, got $got\n";
	}
      }
    }
  }
  else
  {
    ($in, $out) = split (':');

    $try = "\$expect = $out; \$got = &soundex ($in);";
    eval ($try);

    if ($expect ne $got)
    {
      $bad++;
      print "not ok $test\n";
      print "# expected $expect, got $got\n";
    }
  }

  print "ok $test\n" unless $bad;
}

__END__
#
# 1..6
#
# Knuth's test cases, scalar in, scalar out
#
'Euler':'E460'
'Gauss':'G200'
'Hilbert':'H416'
'Knuth':'K530'
'Lloyd':'L300'
'Lukasiewicz':'L222'
#
# 7..8
#
# check default bad code
#
'2 + 2 = 4':undef
undef:undef
#
# 9
#
# check array in, array out
#
('Ellery', 'Ghosh', 'Heilbronn', 'Kant', 'Ladd', 'Lissajous'):('E460', 'G200', 'H416', 'K530', 'L300', 'L222')
#
# 10
#
# check array with explicit undef
#
('Mike', undef, 'Stok'):('M200', undef, 'S320')
#
# 11..12
#
# check setting $Text::Soundex::noCode
#
eval $soundex_nocode = 'Z000';
('Mike', undef, 'Stok'):('M200', 'Z000', 'S320')
#
# 13
#
# a subtle difference between me & oracle, spotted by Rich Pinder
# <rpinder at hsc.usc.edu>
#
CZARKOWSKA:C622

--- NEW FILE: Abbrev.t ---
#!./perl

print "1..8\n";

BEGIN {
    chdir 't' if -d 't';
    @INC = '../lib';
}

use Text::Abbrev;

print "ok 1\n";

# old style as reference
local(%x);
my @z = qw(list edit send abort gripe listen);
abbrev(*x, @z);
my $r = join ':', sort keys %x; 
print "not " if exists $x{'l'}   ||
                exists $x{'li'}  ||
                exists $x{'lis'};
print "ok 2\n";

print "not " unless $x{'list'}   eq 'list' &&
                    $x{'liste'}  eq 'listen' &&
                    $x{'listen'} eq 'listen';
print "ok 3\n";

print "not " unless $x{'a'}     eq 'abort' &&
                    $x{'ab'}    eq 'abort' &&
                    $x{'abo'}   eq 'abort' &&
                    $x{'abor'}  eq 'abort' &&
                    $x{'abort'} eq 'abort';
print "ok 4\n";

my $test = 5;

# wantarray
my %y = abbrev @z;
my $s = join ':', sort keys %y;
print (($r eq $s)?"ok $test\n":"not ok $test\n"); $test++;

my $y = abbrev @z;
$s = join ':', sort keys %$y;
print (($r eq $s)?"ok $test\n":"not ok $test\n"); $test++;

%y = ();
abbrev \%y, @z;

$s = join ':', sort keys %y;
print (($r eq $s)?"ok $test\n":"not ok $test\n"); $test++;


# warnings safe with zero arguments
my $notok;
$^W = 1;
$SIG{__WARN__} = sub { $notok++ };
abbrev();
print ($notok ? "not ok $test\n" : "ok $test\n"); $test++;

--- NEW FILE: Soundex.pm ---
package Text::Soundex;
require 5.000;
require Exporter;

@ISA = qw(Exporter);
@EXPORT = qw(&soundex $soundex_nocode);

$VERSION = '1.01';

# $Id: Soundex.pm,v 1.2 2006-12-04 17:01:09 dslinux_cayenne Exp $
#
# Implementation of soundex algorithm as described by Knuth in volume
# 3 of The Art of Computer Programming, with ideas stolen from Ian
# Phillipps <ian at pipex.net>.
#
# Mike Stok <Mike.Stok at meiko.concord.ma.us>, 2 March 1994.
#
# Knuth's test cases are:
# 
# Euler, Ellery -> E460
# Gauss, Ghosh -> G200
# Hilbert, Heilbronn -> H416
# Knuth, Kant -> K530
# Lloyd, Ladd -> L300
# Lukasiewicz, Lissajous -> L222
#
# $Log: Soundex.pm,v $
# Revision 1.2  2006-12-04 17:01:09  dslinux_cayenne
# Adding fresh perl source to HEAD to branch from
#
# Revision 1.2  1994/03/24  00:30:27  mike
# Subtle bug (any excuse :-) spotted by Rich Pinder <rpinder at hsc.usc.edu>
# in the way I handles leasing characters which were different but had
# the same soundex code.  This showed up comparing it with Oracle's
# soundex output.
#
# Revision 1.1  1994/03/02  13:01:30  mike
# Initial revision
#
#
##############################################################################

# $soundex_nocode is used to indicate a string doesn't have a soundex
# code, I like undef other people may want to set it to 'Z000'.

$soundex_nocode = undef;

sub soundex
{
  local (@s, $f, $fc, $_) = @_;

  push @s, '' unless @s;	# handle no args as a single empty string

  foreach (@s)
  {
    $_ = uc $_;
    tr/A-Z//cd;

    if ($_ eq '')
    {
      $_ = $soundex_nocode;
    }
    else
    {
      ($f) = /^(.)/;
      tr/AEHIOUWYBFPVCGJKQSXZDTLMNR/00000000111122222222334556/;
      ($fc) = /^(.)/;
      s/^$fc+//;
      tr///cs;
      tr/0//d;
      $_ = $f . $_ . '000';
      s/^(.{4}).*/$1/;
    }
  }

  wantarray ? @s : shift @s;
}

1;

__END__

=head1 NAME

Text::Soundex - Implementation of the Soundex Algorithm as Described by Knuth

=head1 SYNOPSIS

  use Text::Soundex;

  $code = soundex $string;            # get soundex code for a string
  @codes = soundex @list;             # get list of codes for list of strings

  # set value to be returned for strings without soundex code

  $soundex_nocode = 'Z000';

=head1 DESCRIPTION

This module implements the soundex algorithm as described by Donald Knuth
in Volume 3 of B<The Art of Computer Programming>.  The algorithm is
intended to hash words (in particular surnames) into a small space using a
simple model which approximates the sound of the word when spoken by an English
speaker.  Each word is reduced to a four character string, the first
character being an upper case letter and the remaining three being digits.

If there is no soundex code representation for a string then the value of
C<$soundex_nocode> is returned.  This is initially set to C<undef>, but
many people seem to prefer an I<unlikely> value like C<Z000>
(how unlikely this is depends on the data set being dealt with.)  Any value
can be assigned to C<$soundex_nocode>.

In scalar context C<soundex> returns the soundex code of its first
argument, and in list context a list is returned in which each element is the 
soundex code for the corresponding argument passed to C<soundex> e.g.

  @codes = soundex qw(Mike Stok);

leaves C<@codes> containing C<('M200', 'S320')>.

=head1 EXAMPLES

Knuth's examples of various names and the soundex codes they map to
are listed below:

  Euler, Ellery -> E460
  Gauss, Ghosh -> G200
  Hilbert, Heilbronn -> H416
  Knuth, Kant -> K530
  Lloyd, Ladd -> L300
  Lukasiewicz, Lissajous -> L222

so:

  $code = soundex 'Knuth';              # $code contains 'K530'
  @list = soundex qw(Lloyd Gauss);	# @list contains 'L300', 'G200'

=head1 LIMITATIONS

As the soundex algorithm was originally used a B<long> time ago in the US
it considers only the English alphabet and pronunciation.

As it is mapping a large space (arbitrary length strings) onto a small
space (single letter plus 3 digits) no inference can be made about the
similarity of two strings which end up with the same soundex code.  For 
example, both C<Hilbert> and C<Heilbronn> end up with a soundex code
of C<H416>.

=head1 AUTHOR

This code was implemented by Mike Stok (C<stok at cybercom.net>) from the 
description given by Knuth.  Ian Phillipps (C<ian at pipex.net>) and Rich Pinder 
(C<rpinder at hsc.usc.edu>) supplied ideas and spotted mistakes.

--- NEW FILE: Wrap.pm ---
package Text::Wrap;

require Exporter;

@ISA = qw(Exporter);
@EXPORT = qw(wrap fill);
@EXPORT_OK = qw($columns $break $huge);

$VERSION = 2005.0824_01;

use vars qw($VERSION $columns $debug $break $huge $unexpand $tabstop
	$separator $separator2);
use strict;

BEGIN	{
	$columns = 76;  # <= screen width
	$debug = 0;
	$break = '\s';
	$huge = 'wrap'; # alternatively: 'die' or 'overflow'
	$unexpand = 1;
	$tabstop = 8;
	$separator = "\n";
	$separator2 = undef;
}

use Text::Tabs qw(expand unexpand);

sub wrap
{
	my ($ip, $xp, @t) = @_;

	local($Text::Tabs::tabstop) = $tabstop;
	my $r = "";
	my $tail = pop(@t);
	my $t = expand(join("", (map { /\s+\z/ ? ( $_ ) : ($_, ' ') } @t), $tail));
	my $lead = $ip;
	my $ll = $columns - length(expand($ip)) - 1;
	$ll = 0 if $ll < 0;
	my $nll = $columns - length(expand($xp)) - 1;
	my $nl = "";
	my $remainder = "";

	use re 'taint';

	pos($t) = 0;
	while ($t !~ /\G\s*\Z/gc) {
		if ($t =~ /\G([^\n]{0,$ll})($break|\n*\z)/xmgc) {
			$r .= $unexpand 
				? unexpand($nl . $lead . $1)
				: $nl . $lead . $1;
			$remainder = $2;
		} elsif ($huge eq 'wrap' && $t =~ /\G([^\n]{$ll})/gc) {
			$r .= $unexpand 
				? unexpand($nl . $lead . $1)
				: $nl . $lead . $1;
			$remainder = defined($separator2) ? $separator2 : $separator;
		} elsif ($huge eq 'overflow' && $t =~ /\G([^\n]*?)($break|\z)/xmgc) {
			$r .= $unexpand 
				? unexpand($nl . $lead . $1)
				: $nl . $lead . $1;
			$remainder = $2;
		} elsif ($huge eq 'die') {
			die "couldn't wrap '$t'";
		} else {
			die "This shouldn't happen";
		}
			
		$lead = $xp;
		$ll = $nll;
		$nl = defined($separator2)
			? ($remainder eq "\n"
				? "\n"
				: $separator2)
			: $separator;
	}
	$r .= $remainder;

	print "-----------$r---------\n" if $debug;

	print "Finish up with '$lead'\n" if $debug;

	$r .= $lead . substr($t, pos($t), length($t)-pos($t))
		if pos($t) ne length($t);

	print "-----------$r---------\n" if $debug;;

	return $r;
}

sub fill 
{
	my ($ip, $xp, @raw) = @_;
	my @para;
	my $pp;

	for $pp (split(/\n\s+/, join("\n", at raw))) {
		$pp =~ s/\s+/ /g;
		my $x = wrap($ip, $xp, $pp);
		push(@para, $x);
	}

	# if paragraph_indent is the same as line_indent, 
	# separate paragraphs with blank lines

	my $ps = ($ip eq $xp) ? "\n\n" : "\n";
	return join ($ps, @para);
}

1;
__END__

=head1 NAME

Text::Wrap - line wrapping to form simple paragraphs

=head1 SYNOPSIS 

B<Example 1>

	use Text::Wrap

	$initial_tab = "\t";	# Tab before first line
	$subsequent_tab = "";	# All other lines flush left

	print wrap($initial_tab, $subsequent_tab, @text);
	print fill($initial_tab, $subsequent_tab, @text);

	$lines = wrap($initial_tab, $subsequent_tab, @text);

	@paragraphs = fill($initial_tab, $subsequent_tab, @text);

B<Example 2>

	use Text::Wrap qw(wrap $columns $huge);

	$columns = 132;		# Wrap at 132 characters
	$huge = 'die';
	$huge = 'wrap';
	$huge = 'overflow';

B<Example 3>

	use Text::Wrap

	$Text::Wrap::columns = 72;
	print wrap('', '', @text);

=head1 DESCRIPTION

C<Text::Wrap::wrap()> is a very simple paragraph formatter.  It formats a
single paragraph at a time by breaking lines at word boundries.
Indentation is controlled for the first line (C<$initial_tab>) and
all subsequent lines (C<$subsequent_tab>) independently.  Please note: 
C<$initial_tab> and C<$subsequent_tab> are the literal strings that will
be used: it is unlikley you would want to pass in a number.

Text::Wrap::fill() is a simple multi-paragraph formatter.  It formats
each paragraph separately and then joins them together when it's done.  It
will destroy any whitespace in the original text.  It breaks text into
paragraphs by looking for whitespace after a newline.  In other respects
it acts like wrap().

=head1 OVERRIDES

C<Text::Wrap::wrap()> has a number of variables that control its behavior.
Because other modules might be using C<Text::Wrap::wrap()> it is suggested
that you leave these variables alone!  If you can't do that, then 
use C<local($Text::Wrap::VARIABLE) = YOURVALUE> when you change the
values so that the original value is restored.  This C<local()> trick
will not work if you import the variable into your own namespace.

Lines are wrapped at C<$Text::Wrap::columns> columns.  C<$Text::Wrap::columns>
should be set to the full width of your output device.  In fact,
every resulting line will have length of no more than C<$columns - 1>.  

It is possible to control which characters terminate words by
modifying C<$Text::Wrap::break>. Set this to a string such as
C<'[\s:]'> (to break before spaces or colons) or a pre-compiled regexp
such as C<qr/[\s']/> (to break before spaces or apostrophes). The
default is simply C<'\s'>; that is, words are terminated by spaces.
(This means, among other things, that trailing punctuation  such as
full stops or commas stay with the word they are "attached" to.)

Beginner note: In example 2, above C<$columns> is imported into
the local namespace, and set locally.  In example 3,
C<$Text::Wrap::columns> is set in its own namespace without importing it.

C<Text::Wrap::wrap()> starts its work by expanding all the tabs in its
input into spaces.  The last thing it does it to turn spaces back
into tabs.  If you do not want tabs in your results, set 
C<$Text::Wrap::unexpand> to a false value.  Likewise if you do not
want to use 8-character tabstops, set C<$Text::Wrap::tabstop> to
the number of characters you do want for your tabstops.

If you want to separate your lines with something other than C<\n>
then set C<$Text::Wrap::separator> to your preference.  This replaces
all newlines with C<$Text::Wrap::separator>.  If you just to preserve
existing newlines but add new breaks with something else, set 
C<$Text::Wrap::separator2> instead.

When words that are longer than C<$columns> are encountered, they
are broken up.  C<wrap()> adds a C<"\n"> at column C<$columns>.
This behavior can be overridden by setting C<$huge> to
'die' or to 'overflow'.  When set to 'die', large words will cause
C<die()> to be called.  When set to 'overflow', large words will be
left intact.  

Historical notes: 'die' used to be the default value of
C<$huge>.  Now, 'wrap' is the default value.

=head1 EXAMPLE

	print wrap("\t","","This is a bit of text that forms 
		a normal book-style paragraph");

=head1 LICENSE

David Muir Sharnoff <muir at idiom.com> with help from Tim Pierce and
many many others.  Copyright (C) 1996-2002 David Muir Sharnoff.  
This module may be modified, used, copied, and redistributed at
your own risk.  Publicly redistributed modified versions must use 
a different name.


--- NEW FILE: Abbrev.pm ---
package Text::Abbrev;
require 5.005;		# Probably works on earlier versions too.
require Exporter;

our $VERSION = '1.01';

=head1 NAME

abbrev - create an abbreviation table from a list

=head1 SYNOPSIS

    use Text::Abbrev;
    abbrev $hashref, LIST


=head1 DESCRIPTION

Stores all unambiguous truncations of each element of LIST
as keys in the associative array referenced by C<$hashref>.
The values are the original list elements.

=head1 EXAMPLE

    $hashref = abbrev qw(list edit send abort gripe);

    %hash = abbrev qw(list edit send abort gripe);

    abbrev $hashref, qw(list edit send abort gripe);

    abbrev(*hash, qw(list edit send abort gripe));

=cut

@ISA = qw(Exporter);
@EXPORT = qw(abbrev);

# Usage:
#	abbrev \%foo, LIST;
#	...
#	$long = $foo{$short};

sub abbrev {
    my ($word, $hashref, $glob, %table, $returnvoid);

    @_ or return;   # So we don't autovivify onto @_ and trigger warning
    if (ref($_[0])) {           # hash reference preferably
      $hashref = shift;
      $returnvoid = 1;
    } elsif (ref \$_[0] eq 'GLOB') {  # is actually a glob (deprecated)
      $hashref = \%{shift()};
      $returnvoid = 1;
    }
    %{$hashref} = ();

    WORD: foreach $word (@_) {
        for (my $len = (length $word) - 1; $len > 0; --$len) {
	    my $abbrev = substr($word,0,$len);
	    my $seen = ++$table{$abbrev};
	    if ($seen == 1) {	    # We're the first word so far to have
	    			    # this abbreviation.
	        $hashref->{$abbrev} = $word;
	    } elsif ($seen == 2) {  # We're the second word to have this
	    			    # abbreviation, so we can't use it.
	        delete $hashref->{$abbrev};
	    } else {		    # We're the third word to have this
	    			    # abbreviation, so skip to the next word.
	        next WORD;
	    }
	}
    }
    # Non-abbreviations always get entered, even if they aren't unique
    foreach $word (@_) {
        $hashref->{$word} = $word;
    }
    return if $returnvoid;
    if (wantarray) {
      %{$hashref};
    } else {
      $hashref;
    }
}

1;

--- NEW FILE: Balanced.pm ---
# EXTRACT VARIOUSLY DELIMITED TEXT SEQUENCES FROM STRINGS.
# FOR FULL DOCUMENTATION SEE Balanced.pod

use 5.005;
use strict;

package Text::Balanced;

use Exporter;
use SelfLoader;
use vars qw { $VERSION @ISA %EXPORT_TAGS };

$VERSION = '1.95';
@ISA		= qw ( Exporter );
		     
%EXPORT_TAGS	= ( ALL => [ qw(
				&extract_delimited
				&extract_bracketed
				&extract_quotelike
[...2263 lines suppressed...]

=head1 AUTHOR

Damian Conway (damian at conway.org)


=head1 BUGS AND IRRITATIONS

There are undoubtedly serious bugs lurking somewhere in this code, if
only because parts of it give the impression of understanding a great deal
more about Perl than they really do. 

Bug reports and other feedback are most welcome.


=head1 COPYRIGHT

 Copyright (c) 1997-2001, Damian Conway. All Rights Reserved.
 This module is free software. It may be used, redistributed
     and/or modified under the same terms as Perl itself.

--- NEW FILE: ParseWords.pm ---
package Text::ParseWords;

use vars qw($VERSION @ISA @EXPORT $PERL_SINGLE_QUOTE);
$VERSION = "3.24";

require 5.000;

use Exporter;
@ISA = qw(Exporter);
@EXPORT = qw(shellwords quotewords nested_quotewords parse_line);
@EXPORT_OK = qw(old_shellwords);


sub shellwords {
    my(@lines) = @_;
    $lines[$#lines] =~ s/\s+$//;
    return(quotewords('\s+', 0, @lines));
}



sub quotewords {
    my($delim, $keep, @lines) = @_;
    my($line, @words, @allwords);

    foreach $line (@lines) {
	@words = parse_line($delim, $keep, $line);
	return() unless (@words || !length($line));
	push(@allwords, @words);
    }
    return(@allwords);
}



sub nested_quotewords {
    my($delim, $keep, @lines) = @_;
    my($i, @allwords);

    for ($i = 0; $i < @lines; $i++) {
	@{$allwords[$i]} = parse_line($delim, $keep, $lines[$i]);
	return() unless (@{$allwords[$i]} || !length($lines[$i]));
    }
    return(@allwords);
}



sub parse_line {
    my($delimiter, $keep, $line) = @_;
    my($word, @pieces);

    no warnings 'uninitialized';	# we will be testing undef strings

    while (length($line)) {
	$line =~ s/^(["'])			# a $quote
        	    ((?:\\.|(?!\1)[^\\])*)	# and $quoted text
		    \1				# followed by the same quote
		   |				# --OR--
		   ^((?:\\.|[^\\"'])*?)		# an $unquoted text
		    (\Z(?!\n)|(?-x:$delimiter)|(?!^)(?=["']))  
		    				# plus EOL, delimiter, or quote
		  //xs or return;		# extended layout
	my($quote, $quoted, $unquoted, $delim) = ($1, $2, $3, $4);
	return() unless( defined($quote) || length($unquoted) || length($delim));

        if ($keep) {
	    $quoted = "$quote$quoted$quote";
	}
        else {
	    $unquoted =~ s/\\(.)/$1/sg;
	    if (defined $quote) {
		$quoted =~ s/\\(.)/$1/sg if ($quote eq '"');
		$quoted =~ s/\\([\\'])/$1/g if ( $PERL_SINGLE_QUOTE && $quote eq "'");
            }
	}
        $word .= substr($line, 0, 0);	# leave results tainted
        $word .= defined $quote ? $quoted : $unquoted;
 
        if (length($delim)) {
            push(@pieces, $word);
            push(@pieces, $delim) if ($keep eq 'delimiters');
            undef $word;
        }
        if (!length($line)) {
            push(@pieces, $word);
	}
    }
    return(@pieces);
}



sub old_shellwords {

    # Usage:
    #	use ParseWords;
    #	@words = old_shellwords($line);
    #	or
    #	@words = old_shellwords(@lines);
    #	or
    #	@words = old_shellwords();	# defaults to $_ (and clobbers it)

    no warnings 'uninitialized';	# we will be testing undef strings
    local *_ = \join('', @_) if @_;
    my (@words, $snippet);

    s/\A\s+//;
    while ($_ ne '') {
	my $field = substr($_, 0, 0);	# leave results tainted
	for (;;) {
	    if (s/\A"(([^"\\]|\\.)*)"//s) {
		($snippet = $1) =~ s#\\(.)#$1#sg;
	    }
	    elsif (/\A"/) {
		require Carp;
		Carp::carp("Unmatched double quote: $_");
		return();
	    }
	    elsif (s/\A'(([^'\\]|\\.)*)'//s) {
		($snippet = $1) =~ s#\\(.)#$1#sg;
	    }
	    elsif (/\A'/) {
		require Carp;
		Carp::carp("Unmatched single quote: $_");
		return();
	    }
	    elsif (s/\A\\(.)//s) {
		$snippet = $1;
	    }
	    elsif (s/\A([^\s\\'"]+)//) {
		$snippet = $1;
	    }
	    else {
		s/\A\s+//;
		last;
	    }
	    $field .= $snippet;
	}
	push(@words, $field);
    }
    return @words;
}

1;

__END__

=head1 NAME

Text::ParseWords - parse text into an array of tokens or array of arrays

=head1 SYNOPSIS

  use Text::ParseWords;
  @lists = &nested_quotewords($delim, $keep, @lines);
  @words = &quotewords($delim, $keep, @lines);
  @words = &shellwords(@lines);
  @words = &parse_line($delim, $keep, $line);
  @words = &old_shellwords(@lines); # DEPRECATED!

=head1 DESCRIPTION

The &nested_quotewords() and &quotewords() functions accept a delimiter 
(which can be a regular expression)
and a list of lines and then breaks those lines up into a list of
words ignoring delimiters that appear inside quotes.  &quotewords()
returns all of the tokens in a single long list, while &nested_quotewords()
returns a list of token lists corresponding to the elements of @lines.
&parse_line() does tokenizing on a single string.  The &*quotewords()
functions simply call &parse_line(), so if you're only splitting
one line you can call &parse_line() directly and save a function
call.

The $keep argument is a boolean flag.  If true, then the tokens are
split on the specified delimiter, but all other characters (quotes,
backslashes, etc.) are kept in the tokens.  If $keep is false then the
&*quotewords() functions remove all quotes and backslashes that are
not themselves backslash-escaped or inside of single quotes (i.e.,
&quotewords() tries to interpret these characters just like the Bourne
shell).  NB: these semantics are significantly different from the
original version of this module shipped with Perl 5.000 through 5.004.
As an additional feature, $keep may be the keyword "delimiters" which
causes the functions to preserve the delimiters in each string as
tokens in the token lists, in addition to preserving quote and
backslash characters.

&shellwords() is written as a special case of &quotewords(), and it
does token parsing with whitespace as a delimiter-- similar to most
Unix shells.

=head1 EXAMPLES

The sample program:

  use Text::ParseWords;
  @words = &quotewords('\s+', 0, q{this   is "a test" of\ quotewords \"for you});
  $i = 0;
  foreach (@words) {
      print "$i: <$_>\n";
      $i++;
  }

produces:

  0: <this>
  1: <is>
  2: <a test>
  3: <of quotewords>
  4: <"for>
  5: <you>

demonstrating:

=over 4

=item 0

a simple word

=item 1

multiple spaces are skipped because of our $delim

=item 2

use of quotes to include a space in a word

=item 3

use of a backslash to include a space in a word

=item 4

use of a backslash to remove the special meaning of a double-quote

=item 5

another simple word (note the lack of effect of the
backslashed double-quote)

=back

Replacing C<&quotewords('\s+', 0, q{this   is...})>
with C<&shellwords(q{this   is...})>
is a simpler way to accomplish the same thing.

=head1 AUTHORS

Maintainer is Hal Pomeranz <pomeranz at netcom.com>, 1994-1997 (Original
author unknown).  Much of the code for &parse_line() (including the
primary regexp) from Joerk Behrends <jbehrends at multimediaproduzenten.de>.

Examples section another documentation provided by John Heidemann 
<johnh at ISI.EDU>

Bug reports, patches, and nagging provided by lots of folks-- thanks
everybody!  Special thanks to Michael Schwern <schwern at envirolink.org>
for assuring me that a &nested_quotewords() would be useful, and to 
Jeff Friedl <jfriedl at yahoo-inc.com> for telling me not to worry about
error-checking (sort of-- you had to be there).

=cut




More information about the dslinux-commit mailing list