dslinux/user/perl/t/uni case.pl chomp.t class.t fold.t lower.t sprintf.t title.t tr_7jis.t tr_eucjp.t tr_sjis.t tr_utf8.t upper.t write.t

cayenne dslinux_cayenne at user.in-berlin.de
Tue Dec 5 05:27:28 CET 2006


Update of /cvsroot/dslinux/dslinux/user/perl/t/uni
In directory antilope:/tmp/cvs-serv7729/t/uni

Added Files:
	case.pl chomp.t class.t fold.t lower.t sprintf.t title.t 
	tr_7jis.t tr_eucjp.t tr_sjis.t tr_utf8.t upper.t write.t 
Log Message:
Adding fresh perl source to HEAD to branch from

--- NEW FILE: write.t ---
#!./perl -w
use strict;

BEGIN {
    chdir 't' if -d 't';
    @INC = qw(../lib .);
    require "test.pl";
    unless (PerlIO::Layer->find('perlio')){
        print "1..0 # Skip: PerlIO required\n";
        exit 0;
    }
    if (ord("A") == 193) {
        print "1..0 # Skip: EBCDIC porting needed\n";
        exit 0;
    }
}

plan tests => 6;

# Some tests for UTF8 and format/write

our ($bitem1, $uitem1) = ("\x{ff}", "\x{100}");
our ($bitem2, $uitem2) = ("\x{fe}", "\x{101}");
our ($blite1, $ulite1) = ("\x{fd}", "\x{102}");
our ($blite2, $ulite2) = ("\x{fc}", "\x{103}");
our ($bmulti, $umulti) = ("\x{fb}\n\x{fa}\n\x{f9}\n",
			  "\x{104}\n\x{105}\n\x{106}\n");

sub fmwrtest {
  no strict 'refs';
  my ($out, $format, $expect, $name) = @_;
  eval "format $out =\n$format.\n"; die $@ if $@;
  open $out, '>:utf8', 'Uni_write.tmp' or die "Can't create Uni_write.tmp";
  write $out;
  close $out or die "Could not close $out: $!";

  open UIN, '<:utf8', 'Uni_write.tmp' or die "Can't open Uni_write.tmp";;
  my $result = do { local $/; <UIN>; };
  close UIN;

  is($result, $expect, $name);
}

fmwrtest OUT1 => <<EOFORMAT, <<EOEXPECT, "non-UTF8 literal / UTF8 item (1)";
$blite1 @<<
\$uitem1
$blite2 @<<
\$bitem2
EOFORMAT
$blite1 $uitem1
$blite2 $bitem2
EOEXPECT

fmwrtest OUT2 => <<EOFORMAT, <<EOEXPECT, "non-UTF8 literal / UTF8 item (2)";
$blite1 @<<
\$bitem1
$blite2 @<<
\$uitem2
EOFORMAT
$blite1 $bitem1
$blite2 $uitem2
EOEXPECT

fmwrtest OUT3 => <<EOFORMAT, <<EOEXPECT, "UTF8 literal / non-UTF8 item (1)";
$ulite1 @<<
\$bitem1
$blite2 @<<
\$bitem2
EOFORMAT
$ulite1 $bitem1
$blite2 $bitem2
EOEXPECT

fmwrtest OUT4 => <<EOFORMAT, <<EOEXPECT, "UTF8 literal / non-UTF8 item (2)";
$blite1 @<<
\$bitem1
$ulite2 @<<
\$bitem2
EOFORMAT
$blite1 $bitem1
$ulite2 $bitem2
EOEXPECT

fmwrtest OUT5 => <<EOFORMAT, <<EOEXPECT, "non-UTF8 literal / UTF8 multiline";
$blite1
@*
\$umulti
$blite2
EOFORMAT
$blite1
$umulti$blite2
EOEXPECT

fmwrtest OUT6 => <<EOFORMAT, <<EOEXPECT, "UTF8 literal / non-UTF8 multiline";
$ulite1
@*
\$bmulti
$blite2
EOFORMAT
$ulite1
$bmulti$blite2
EOEXPECT

1 while unlink 'Uni_write.tmp';

--- NEW FILE: fold.t ---
BEGIN {
    chdir 't' if -d 't';
    @INC = '../lib';
}

use File::Spec;

my $CF = File::Spec->catfile(File::Spec->catdir(File::Spec->updir,
					       "lib", "unicore"),
			    "CaseFolding.txt");

use constant EBCDIC => ord 'A' == 193;

if (open(CF, $CF)) {
    my @CF;

    while (<CF>) {
	# Skip S since we are going for 'F'ull case folding
        if (/^([0-9A-F]+); ([CFI]); ((?:[0-9A-F]+)(?: [0-9A-F]+)*); \# (.+)/) {
	    next if EBCDIC && hex $1 < 0x100;
	    push @CF, [$1, $2, $3, $4];
	}
    }

    close(CF);

    die qq[$0: failed to find casefoldings from "$CF"\n] unless @CF;

    print "1..", scalar @CF, "\n";

    my $i = 0;
    for my $cf (@CF) {
	my ($code, $status, $mapping, $name) = @$cf;
	$i++;
	my $a = pack("U0U*", hex $code);
	my $b = pack("U0U*", map { hex } split " ", $mapping);
	my $t0 = ":$a:" =~ /:$a:/    ? 1 : 0;
	my $t1 = ":$a:" =~ /:$a:/i   ? 1 : 0;
	my $t2 = ":$a:" =~ /:[$a]:/  ? 1 : 0;
	my $t3 = ":$a:" =~ /:[$a]:/i ? 1 : 0;
	my $t4 = ":$a:" =~ /:$b:/i   ? 1 : 0;
	my $t5 = ":$a:" =~ /:[$b]:/i ? 1 : 0;
	my $t6 = ":$b:" =~ /:$a:/i   ? 1 : 0;
	my $t7 = ":$b:" =~ /:[$a]:/i ? 1 : 0;
	print $t0 && $t1 && $t2 && $t3 && $t4 && $t5 && $t6 && $t7 ?
	    "ok $i \# - $code - $name - $mapping - $status\n" :
	    "not ok $i \# - $code - $name - $mapping - $status - $t0 $t1 $t2 $t3 $t4 $t5 $t6 $t7\n";
    }
} else {
    die qq[$0: failed to open "$CF": $!\n];
}

--- NEW FILE: lower.t ---
BEGIN {
    chdir 't' if -d 't';
    @INC = qw(../lib uni .);
    require "case.pl";
}

casetest("Lower", \%utf8::ToSpecLower,
	 sub { lc $_[0] }, sub { my $a = ""; lc ($_[0] . $a) },
	 sub { lcfirst $_[0] }, sub { my $a = ""; lcfirst ($_[0] . $a) });

--- NEW FILE: tr_utf8.t ---
#
# $Id: tr_utf8.t,v 1.1 2006-12-05 04:27:26 dslinux_cayenne Exp $
#
# This script is written intentionally in UTF-8
# Requires Encode 1.83 or better
# -- dankogai

BEGIN {
    if ($ENV{'PERL_CORE'}){
        chdir 't';
        @INC = '../lib';
    }
    require Config; import Config;
    if ($Config{'extensions'} !~ /\bEncode\b/) {
      print "1..0 # Skip: Encode was not built\n";
      exit 0;
    }
    if (ord("A") == 193) {
        print "1..0 # Skip: EBCDIC\n";
        exit 0;
    }
    unless (PerlIO::Layer->find('perlio')){
        print "1..0 # Skip: PerlIO required\n";
        exit 0;
    }
    if ($ENV{PERL_CORE_MINITEST}) {
        print "1..0 # Skip: no dynamic loading on miniperl, no Encode\n";
        exit 0;
    }
    $| = 1;
}

use strict;
use Test::More tests => 7;

use encoding 'utf8';

my @hiragana =  map {chr} ord("ぁ")..ord("ん");
my @katakana =  map {chr} ord("ァ")..ord("ン");
my $hiragana = join('' => @hiragana);
my $katakana = join('' => @katakana);
my %h2k; @h2k{@hiragana} = @katakana;
my %k2h; @k2h{@katakana} = @hiragana;

# print @hiragana, "\n";

my $str;

$str = $hiragana; $str =~ tr/ぁ-ん/ァ-ン/;
is($str, $katakana, "tr// # hiragana -> katakana");
$str = $katakana; $str =~ tr/ァ-ン/ぁ-ん/;
is($str, $hiragana, "tr// # hiragana -> katakana");

$str = $hiragana; eval qq(\$str =~ tr/ぁ-ん/ァ-ン/);
is($str, $katakana, "eval qq(tr//) # hiragana -> katakana");
$str = $katakana; eval qq(\$str =~ tr/ァ-ン/ぁ-ん/);
is($str, $hiragana, "eval qq(tr//) # hiragana -> katakana");

$str = $hiragana; $str =~ s/([ぁ-ん])/$h2k{$1}/go;
is($str, $katakana, "s/// # hiragana -> katakana");
$str = $katakana; $str =~ s/([ァ-ン])/$k2h{$1}/go;
is($str, $hiragana, "s/// # hiragana -> katakana");

{
  # [perl 16843]
  my $line = 'abcdefghijklmnopqrstuvwxyz$0123456789';
  $line =~ tr/bcdeghijklmnprstvwxyz$02578/בצדעגהיײקלמנפּרסטװשכיזשױתײחא/;
  is($line, "aבצדעfגהיײקלמנoפqּרסuטװשכיזש1ױ34ת6ײח9", "[perl #16843]");
}
__END__

--- NEW FILE: case.pl ---
use File::Spec;

require "test.pl";

sub unidump {
    join " ", map { sprintf "%04X", $_ } unpack "U*", $_[0];
}

sub casetest {
    my ($base, $spec, @funcs) = @_;
    # For each provided function run it, and run a version with some extra
    # characters afterwards. Use a recylcing symbol, as it doesn't change case.
    my $ballast = chr (0x2672) x 3;
    @funcs = map {my $f = $_;
		  ($f,
		   sub {my $r = $f->($_[0] . $ballast); # Add it before
			$r =~ s/$ballast\z//so # Remove it afterwards
			    or die "'$_[0]' to '$r' mangled";
			$r; # Result with $ballast removed.
		    },
		   )} @funcs;

    my $file = File::Spec->catfile(File::Spec->catdir(File::Spec->updir,
						      "lib", "unicore", "To"),
				   "$base.pl");
    my $simple = do $file;
    my %simple;
    for my $i (split(/\n/, $simple)) {
	my ($k, $v) = split(' ', $i);
	$simple{$k} = $v;
    }
    my %seen;

    for my $i (sort keys %simple) {
	$seen{$i}++;
    }
    print "# ", scalar keys %simple, " simple mappings\n";

    my $both;

    for my $i (sort keys %$spec) {
	if (++$seen{$i} == 2) {
	    warn sprintf "$base: $i seen twice\n";
	    $both++;
	}
    }
    print "# ", scalar keys %$spec, " special mappings\n";

    exit(1) if $both;

    my %none;
    for my $i (map { ord } split //,
	       "\e !\"#\$%&'()+,-./0123456789:;<=>?\@[\\]^_{|}~\b") {
	next if pack("U0U", $i) =~ /\w/;
	$none{$i}++ unless $seen{$i};
    }
    print "# ", scalar keys %none, " noncase mappings\n";

    my $tests = 
	((scalar keys %simple) +
	 (scalar keys %$spec) +
	 (scalar keys %none)) * @funcs;
    print "1..$tests\n";

    my $test = 1;

    for my $i (sort keys %simple) {
	my $w = $simple{$i};
	my $c = pack "U0U", hex $i;
	foreach my $func (@funcs) {
	    my $d = $func->($c);
	    my $e = unidump($d);
	    print $d eq pack("U0U", hex $simple{$i}) ?
		"ok $test # $i -> $w\n" : "not ok $test # $i -> $e ($w)\n";
		$test++;
	}
    }

    for my $i (sort keys %$spec) {
	my $w = unidump($spec->{$i});
	my $u = unpack "U0U", $i;
	my $h = sprintf "%04X", $u;
	my $c = chr($u); $c .= chr(0x100); chop $c;
	foreach my $func (@funcs) {
	    my $d = $func->($c);
	    my $e = unidump($d);
	    if (ord "A" == 193) { # EBCDIC
		# We need to a little bit of remapping.
		#
		# For example, in titlecase (ucfirst) mapping
		# of U+0149 the Unicode mapping is U+02BC U+004E.
		# The 4E is N, which in EBCDIC is 2B--
		# and the ucfirst() does that right.
		# The problem is that our reference
		# data is in Unicode code points.
		#
		# The Right Way here would be to use, say,
		# Encode, to remap the less-than 0x100 code points,
		# but let's try to be Encode-independent here. 
		#
		# These are the titlecase exceptions:
		#
		#         Unicode   Unicode+EBCDIC  
		#
		# 0149 -> 02BC 004E (02BC 002B)
		# 01F0 -> 004A 030C (00A2 030C)
		# 1E96 -> 0048 0331 (00E7 0331)
		# 1E97 -> 0054 0308 (00E8 0308)
		# 1E98 -> 0057 030A (00EF 030A)
		# 1E99 -> 0059 030A (00DF 030A)
		# 1E9A -> 0041 02BE (00A0 02BE)
		#
		# The uppercase exceptions are identical.
		#
		# The lowercase has one more:
		#
		#         Unicode   Unicode+EBCDIC  
		#
		# 0130 -> 0069 0307 (00D1 0307)
		#
		if ($i =~ /^(0130|0149|01F0|1E96|1E97|1E98|1E99|1E9A)$/) {
		    $e =~ s/004E/002B/; # N
		    $e =~ s/004A/00A2/; # J
		    $e =~ s/0048/00E7/; # H
		    $e =~ s/0054/00E8/; # T
		    $e =~ s/0057/00EF/; # W
		    $e =~ s/0059/00DF/; # Y
		    $e =~ s/0041/00A0/; # A
		    $e =~ s/0069/00D1/; # i
		}
		# We have to map the output, not the input, because
		# pack/unpack U has been EBCDICified, too, it would
		# just undo our remapping.
	    }
	    print $w eq $e ?
		"ok $test # $i -> $w\n" : "not ok $test # $h -> $e ($w)\n";
		$test++;
	}
    }

    for my $i (sort { $a <=> $b } keys %none) {
	my $w = $i = sprintf "%04X", $i;
	my $c = pack "U0U", hex $i;
	foreach my $func (@funcs) {
	    my $d = $func->($c);
	    my $e = unidump($d);
	    print $d eq $c ?
		"ok $test # $i -> $w\n" : "not ok $test # $i -> $e ($w)\n";
		$test++;
	}
    }
}

1;

--- NEW FILE: title.t ---
BEGIN {
    chdir 't' if -d 't';
    @INC = qw(../lib uni .);
    require "case.pl";
}

casetest("Title", \%utf8::ToSpecTitle, sub { ucfirst $_[0] },
	 sub { my $a = ""; ucfirst ($_[0] . $a) });

--- NEW FILE: tr_7jis.t ---
#
# $Id: tr_7jis.t,v 1.1 2006-12-05 04:27:26 dslinux_cayenne Exp $
#
# This script is written intentionally in ISO-2022-JP
# requires Encode 1.83 or better to work
# -- dankogai

BEGIN {
    if ($ENV{'PERL_CORE'}){
        chdir 't';
        @INC = '../lib';
    }
    require Config; import Config;
    if ($Config{'extensions'} !~ /\bEncode\b/) {
      print "1..0 # Skip: Encode was not built\n";
      exit 0;
    }
    if (ord("A") == 193) {
        print "1..0 # Skip: EBCDIC\n";
        exit 0;
    }
    unless (PerlIO::Layer->find('perlio')){
        print "1..0 # Skip: PerlIO required\n";
        exit 0;
    }
    if ($ENV{PERL_CORE_MINITEST}) {
        print "1..0 # Skip: no dynamic loading on miniperl, no Encode\n";
        exit 0;
    }
    $| = 1;
}

use strict;
use Test::More tests => 6;
use Encode;
use encoding 'iso-2022-jp';

my @hiragana =  map {chr} ord("ぁ")..ord("ん");
my @katakana =  map {chr} ord("ァ")..ord("ン");
my $hiragana = join('' => @hiragana);
my $katakana = join('' => @katakana);
my %h2k; @h2k{@hiragana} = @katakana;
my %k2h; @k2h{@katakana} = @hiragana;

# print @hiragana, "\n";

my $str;

$str = $hiragana; $str =~ tr/ぁ-ん/ァ-ン/;
is($str, $katakana, "tr// # hiragana -> katakana");
$str = $katakana; $str =~ tr/ァ-ン/ぁ-ん/;
is($str, $hiragana, "tr// # hiragana -> katakana");

$str = $hiragana; eval qq(\$str =~ tr/ぁ-ん/ァ-ン/);
is($str, $katakana, "eval qq(tr//) # hiragana -> katakana");
$str = $katakana; eval qq(\$str =~ tr/ァ-ン/ぁ-ん/);
is($str, $hiragana, "eval qq(tr//) # hiragana -> katakana");

$str = $hiragana; $str =~ s/([ぁ-ん])/$h2k{$1}/go;
is($str, $katakana, "s/// # hiragana -> katakana");
$str = $katakana; $str =~ s/([ァ-ン])/$k2h{$1}/go;
is($str, $hiragana, "s/// # hiragana -> katakana");
__END__

--- NEW FILE: class.t ---
BEGIN {
    chdir 't' if -d 't';
    @INC = qw(../lib .);
    require "test.pl";
}

plan tests => 4670;

sub MyUniClass {
  <<END;
0030	004F
END
}

sub Other::Class {
  <<END;
0040	005F
END
}

sub A::B::Intersection {
  <<END;
+main::MyUniClass
&Other::Class
END
}

sub test_regexp ($$) {
  # test that given string consists of N-1 chars matching $qr1, and 1
  # char matching $qr2
  my ($str, $blk) = @_;

  # constructing these objects here makes the last test loop go much faster
  my $qr1 = qr/(\p{$blk}+)/;
  if ($str =~ $qr1) {
    is($1, substr($str, 0, -1));		# all except last char
  }
  else {
    fail('first N-1 chars did not match');
  }

  my $qr2 = qr/(\P{$blk}+)/;
  if ($str =~ $qr2) {
    is($1, substr($str, -1));			# only last char
  }
  else {
    fail('last char did not match');
  }
}

use strict;

my $str = join "", map chr($_), 0x20 .. 0x6F;

# make sure it finds built-in class
is(($str =~ /(\p{Letter}+)/)[0], 'ABCDEFGHIJKLMNOPQRSTUVWXYZ');
is(($str =~ /(\p{l}+)/)[0], 'ABCDEFGHIJKLMNOPQRSTUVWXYZ');

# make sure it finds user-defined class
is(($str =~ /(\p{MyUniClass}+)/)[0], '0123456789:;<=>?@ABCDEFGHIJKLMNO');

# make sure it finds class in other package
is(($str =~ /(\p{Other::Class}+)/)[0], '@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_');

# make sure it finds class in other OTHER package
is(($str =~ /(\p{A::B::Intersection}+)/)[0], '@ABCDEFGHIJKLMNO');

# all of these should look in lib/unicore/bc/AL.pl
$str = "\x{070D}\x{070E}\x{070F}\x{0710}\x{0711}";
is(($str =~ /(\P{BidiClass: ArabicLetter}+)/)[0], "\x{070E}\x{070F}");
is(($str =~ /(\P{BidiClass: AL}+)/)[0], "\x{070E}\x{070F}");
is(($str =~ /(\P{BC :ArabicLetter}+)/)[0], "\x{070E}\x{070F}");
is(($str =~ /(\P{bc=AL}+)/)[0], "\x{070E}\x{070F}");

# make sure InGreek works
$str = "[\x{038B}\x{038C}\x{038D}]";

is(($str =~ /(\p{InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}");
is(($str =~ /(\p{Script:InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}");
is(($str =~ /(\p{Script=InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}");
is(($str =~ /(\p{sc:InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}");
is(($str =~ /(\p{sc=InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}");

use File::Spec;
my $updir = File::Spec->updir;

# the %utf8::... hashes are already in existence
# because utf8_pva.pl was run by utf8_heavy.pl

*utf8::PropertyAlias = *utf8::PropertyAlias; # thwart a warning

no warnings 'utf8'; # we do not want warnings about surrogates etc

# non-General Category and non-Script
while (my ($abbrev, $files) = each %utf8::PVA_abbr_map) {
  my $prop_name = $utf8::PropertyAlias{$abbrev};
  next unless $prop_name;
  next if $abbrev eq "gc_sc";

  for (sort keys %$files) {
    my $filename = File::Spec->catfile(
      $updir => lib => unicore => lib => $abbrev => "$files->{$_}.pl"
    );

    next unless -e $filename;
    my ($h1, $h2) = map hex, (split(/\t/, (do $filename), 3))[0,1];
    my $str = join "", map chr, $h1 .. (($h2 || $h1) + 1);

    for my $p ($prop_name, $abbrev) {
      for my $c ($files->{$_}, $_) {
        is($str =~ /(\p{$p: $c}+)/ && $1, substr($str, 0, -1));
        is($str =~ /(\P{$p= $c}+)/ && $1, substr($str, -1));
      }
    }
  }
}

# General Category and Script
for my $p ('gc', 'sc') {
  while (my ($abbr) = each %{ $utf8::PropValueAlias{$p} }) {
    my $filename = File::Spec->catfile(
      $updir => lib => unicore => lib => gc_sc => "$utf8::PVA_abbr_map{gc_sc}{$abbr}.pl"
    );

    next unless -e $filename;
    my ($h1, $h2) = map hex, (split(/\t/, (do $filename), 3))[0,1];
    my $str = join "", map chr, $h1 .. (($h2 || $h1) + 1);

    for my $x ($p, { gc => 'General Category', sc => 'Script' }->{$p}) {
      for my $y ($abbr, $utf8::PropValueAlias{$p}{$abbr}, $utf8::PVA_abbr_map{gc_sc}{$abbr}) {
        is($str =~ /(\p{$x: $y}+)/ && $1, substr($str, 0, -1));
        is($str =~ /(\P{$x= $y}+)/ && $1, substr($str, -1));
	test_regexp ($str, $y);
      }
    }
  }
}

# test extra properties (ASCII_Hex_Digit, Bidi_Control, etc.)
SKIP:
{
  skip "Can't reliably derive class names from file names", 592 if $^O eq 'VMS';

  # On case tolerant filesystems, Cf.pl will cause a -e test for cf.pl to
  # return true. Try to work around this by reading the filenames explicitly
  # to get a case sensitive test.  N.B.  This will fail if filename case is
  # not preserved because you might go looking for a class name of CF or cf
  # when you really want Cf.  Storing case sensitive data in filenames is 
  # simply not portable.

  my %files;

  my $dirname = File::Spec->catdir($updir => lib => unicore => lib => 'gc_sc');
  opendir D, $dirname or die $!;
  @files{readdir(D)} = ();
  closedir D;

  for (keys %utf8::PA_reverse) {
    my $leafname = "$utf8::PA_reverse{$_}.pl";
    next unless exists $files{$leafname};

    my $filename = File::Spec->catfile($dirname, $leafname);

    my ($h1, $h2) = map hex, (split(/\t/, (do $filename), 3))[0,1];
    my $str = join "", map chr, $h1 .. (($h2 || $h1) + 1);

    for my $x ('gc', 'General Category') {
      print "# $filename $x $_, $utf8::PA_reverse{$_}\n";
      for my $y ($_, $utf8::PA_reverse{$_}) {
	is($str =~ /(\p{$x: $y}+)/ && $1, substr($str, 0, -1));
	is($str =~ /(\P{$x= $y}+)/ && $1, substr($str, -1));
	test_regexp ($str, $y);
      }
    }
  }
}

# test the blocks (InFoobar)
for (grep $utf8::Canonical{$_} =~ /^In/, keys %utf8::Canonical) {
  my $filename = File::Spec->catfile(
    $updir => lib => unicore => lib => gc_sc => "$utf8::Canonical{$_}.pl"
  );

  next unless -e $filename;

  print "# In$_ $filename\n";

  my ($h1, $h2) = map hex, (split(/\t/, (do $filename), 3))[0,1];
  my $str = join "", map chr, $h1 .. (($h2 || $h1) + 1);

  my $blk = $_;

  test_regexp ($str, $blk);
  $blk =~ s/^In/Block:/;
  test_regexp ($str, $blk);
}


--- NEW FILE: upper.t ---
BEGIN {
    chdir 't' if -d 't';
    @INC = qw(../lib uni .);
    require "case.pl";
}

casetest("Upper", \%utf8::ToSpecUpper, sub { uc $_[0] },
	 sub { my $a = ""; uc ($_[0] . $a) });

--- NEW FILE: tr_eucjp.t ---
#
# $Id: tr_eucjp.t,v 1.1 2006-12-05 04:27:26 dslinux_cayenne Exp $
#
# This script is written intentionally in EUC-JP
# -- dankogai

BEGIN {
    if ($ENV{'PERL_CORE'}){
        chdir 't';
        @INC = '../lib';
    }
    require Config; import Config;
    if ($Config{'extensions'} !~ /\bEncode\b/) {
      print "1..0 # Skip: Encode was not built\n";
      exit 0;
    }
    if (ord("A") == 193) {
        print "1..0 # Skip: EBCDIC\n";
        exit 0;
    }
    unless (PerlIO::Layer->find('perlio')){
        print "1..0 # Skip: PerlIO required\n";
        exit 0;
    }
    if ($ENV{PERL_CORE_MINITEST}) {
        print "1..0 # Skip: no dynamic loading on miniperl, no Encode\n";
        exit 0;
    }
    $| = 1;
}

use strict;
use Test::More tests => 6;
use Encode;
use encoding 'euc-jp';

my @hiragana =  map {chr} ord("¤¡")..ord("¤ó");
my @katakana =  map {chr} ord("¥¡")..ord("¥ó");
my $hiragana = join('' => @hiragana);
my $katakana = join('' => @katakana);
my %h2k; @h2k{@hiragana} = @katakana;
my %k2h; @k2h{@katakana} = @hiragana;

# print @hiragana, "\n";

my $str;

$str = $hiragana; $str =~ tr/¤¡-¤ó/¥¡-¥ó/;
is($str, $katakana, "tr// # hiragana -> katakana");
$str = $katakana; $str =~ tr/¥¡-¥ó/¤¡-¤ó/;
is($str, $hiragana, "tr// # hiragana -> katakana");

$str = $hiragana; eval qq(\$str =~ tr/¤¡-¤ó/¥¡-¥ó/);
is($str, $katakana, "eval qq(tr//) # hiragana -> katakana");
$str = $katakana; eval qq(\$str =~ tr/¥¡-¥ó/¤¡-¤ó/);
is($str, $hiragana, "eval qq(tr//) # hiragana -> katakana");

$str = $hiragana; $str =~ s/([¤¡-¤ó])/$h2k{$1}/go;
is($str, $katakana, "s/// # hiragana -> katakana");
$str = $katakana; $str =~ s/([¥¡-¥ó])/$k2h{$1}/go;
is($str, $hiragana, "s/// # hiragana -> katakana");
__END__

--- NEW FILE: tr_sjis.t ---
#
# $Id: tr_sjis.t,v 1.1 2006-12-05 04:27:26 dslinux_cayenne Exp $
#
# This script is written intentionally in Shift JIS
# -- dankogai

BEGIN {
    if ($ENV{'PERL_CORE'}){
        chdir 't';
        @INC = '../lib';
    }
    require Config; import Config;
    if ($Config{'extensions'} !~ /\bEncode\b/) {
      print "1..0 # Skip: Encode was not built\n";
      exit 0;
    }
    if (ord("A") == 193) {
        print "1..0 # Skip: EBCDIC\n";
        exit 0;
    }
    unless (PerlIO::Layer->find('perlio')){
        print "1..0 # Skip: PerlIO required\n";
        exit 0;
    }
    if ($ENV{PERL_CORE_MINITEST}) {
        print "1..0 # Skip: no dynamic loading on miniperl, no Encode\n";
        exit 0;
    }
    $| = 1;
}

use strict;
use Test::More tests => 6;
use Encode;
use encoding 'shiftjis';

my @hiragana =  map {chr} ord("‚Ÿ")..ord("‚ñ");
my @katakana =  map {chr} ord("ƒ@")..ord("ƒ“");
my $hiragana = join('' => @hiragana);
my $katakana = join('' => @katakana);
my %h2k; @h2k{@hiragana} = @katakana;
my %k2h; @k2h{@katakana} = @hiragana;

# print @hiragana, "\n";

my $str;

$str = $hiragana; $str =~ tr/‚Ÿ-‚ñ/ƒ@-ƒ“/;
is($str, $katakana, "tr// # hiragana -> katakana");
$str = $katakana; $str =~ tr/ƒ@-ƒ“/‚Ÿ-‚ñ/;
is($str, $hiragana, "tr// # hiragana -> katakana");

$str = $hiragana; eval qq(\$str =~ tr/‚Ÿ-‚ñ/ƒ@-ƒ“/);
is($str, $katakana, "eval qq(tr//) # hiragana -> katakana");
$str = $katakana; eval qq(\$str =~ tr/ƒ@-ƒ“/‚Ÿ-‚ñ/);
is($str, $hiragana, "eval qq(tr//) # hiragana -> katakana");

$str = $hiragana; $str =~ s/([‚Ÿ-‚ñ])/$h2k{$1}/go;
is($str, $katakana, "s/// # hiragana -> katakana");
$str = $katakana; $str =~ s/([ƒ@-ƒ“])/$k2h{$1}/go;
is($str, $hiragana, "s/// # hiragana -> katakana");
__END__

--- NEW FILE: chomp.t ---
#!./perl -w

BEGIN {
    if ($ENV{'PERL_CORE'}){
        chdir 't';
        @INC = '../lib';
    }
    require Config; import Config;
    if ($Config{'extensions'} !~ /\bEncode\b/) {
      print "1..0 # Skip: Encode was not built\n";
      exit 0;
    }
    if (ord("A") == 193) {
        print "1..0 # Skip: EBCDIC\n";
        exit 0;
    }
    unless (PerlIO::Layer->find('perlio')){
        print "1..0 # Skip: PerlIO required\n";
        exit 0;
    }
    if ($ENV{PERL_CORE_MINITEST}) {
        print "1..0 # Skip: no dynamic loading on miniperl, no Encode\n";
        exit 0;
    }
}

use Encode;
use strict;
use Test::More;

# %mbchars = (encoding => { bytes => utf8, ... }, ...);
# * pack('C*') is expected to return bytes even if ${^ENCODING} is true.
our %mbchars = (
    'big-5' => {
	pack('C*', 0x40)       => pack('U*', 0x40), # COMMERCIAL AT
	pack('C*', 0xA4, 0x40) => "\x{4E00}",       # CJK-4E00
    },
    'euc-jp' => {
	pack('C*', 0xB0, 0xA1)       => "\x{4E9C}", # CJK-4E9C
	pack('C*', 0x8F, 0xB0, 0xA1) => "\x{4E02}", # CJK-4E02
    },
    'shift-jis' => {
	pack('C*', 0xA9)       => "\x{FF69}", # halfwidth katakana small U
	pack('C*', 0x82, 0xA9) => "\x{304B}", # hiragana KA
    },
);

# 4 == @char; paired tests inside 3 nested loops,
# plus extra pair of tests in a loop, plus extra pair of tests.
plan tests => 2 * (4 ** 3 + 4 + 1) * (keys %mbchars);

for my $enc (sort keys %mbchars) {
    local ${^ENCODING} = find_encoding($enc);
    my @char = (sort(keys   %{ $mbchars{$enc} }),
		sort(values %{ $mbchars{$enc} }));

    for my $rs (@char) {
	local $/ = $rs;
	for my $start (@char) {
	    for my $end (@char) {
		my $string = $start.$end;
		my ($expect, $return);
		if ($end eq $rs) {
		    $expect = $start;
		    # The answer will always be a length in utf8, even if the
		    # scalar was encoded with a different length
		    $return = length ($end . "\x{100}") - 1;
		} else {
		    $expect = $string;
		    $return = 0;
		}
		is (chomp ($string), $return);
		is ($string, $expect); # "$enc \$/=$rs $start $end"
	    }
	}
	# chomp should not stringify references unless it decides to modify
	# them
	$_ = [];
	my $got = chomp();
	is ($got, 0);
	is (ref($_), "ARRAY", "chomp ref (no modify)");
    }

    $/ = ")";  # the last char of something like "ARRAY(0x80ff6e4)"
    my $got = chomp();
    is ($got, 1);
    ok (!ref($_), "chomp ref (modify)");
}

--- NEW FILE: sprintf.t ---
#!./perl -w

BEGIN {
    chdir 't' if -d 't';
    @INC = qw(../lib .);
    require "test.pl";
}

plan tests => 52;

$a = "B\x{fc}f";
$b = "G\x{100}r";
$c = 0x200;

{
    my $s = sprintf "%s", $a;
    is($s, $a, "%s a");
}

{
    my $s = sprintf "%s", $b;
    is($s, $b, "%s b");
}

{
    my $s = sprintf "%s%s", $a, $b;
    is($s, $a.$b, "%s%s a b");
}

{
    my $s = sprintf "%s%s", $b, $a;
    is($s, $b.$a, "%s%s b a");
}

{
    my $s = sprintf "%s%s", $b, $b;
    is($s, $b.$b, "%s%s b b");
}

{
    my $s = sprintf "%s$b", $a;
    is($s, $a.$b, "%sb a");
}

{
    my $s = sprintf "$b%s", $a;
    is($s, $b.$a, "b%s a");
}

{
    my $s = sprintf "%s$a", $b;
    is($s, $b.$a, "%sa b");
}

{
    my $s = sprintf "$a%s", $b;
    is($s, $a.$b, "a%s b");
}

{
    my $s = sprintf "$a%s", $a;
    is($s, $a.$a, "a%s a");
}

{
    my $s = sprintf "$b%s", $b;
    is($s, $b.$b, "a%s b");
}

{
    my $s = sprintf "%c", $c;
    is($s, chr($c), "%c c");
}

{
    my $s = sprintf "%s%c", $a, $c;
    is($s, $a.chr($c), "%s%c a c");
}

{
    my $s = sprintf "%c%s", $c, $a;
    is($s, chr($c).$a, "%c%s c a");
}

{
    my $s = sprintf "%c$b", $c;
    is($s, chr($c).$b, "%cb c");
}

{
    my $s = sprintf "%s%c$b", $a, $c;
    is($s, $a.chr($c).$b, "%s%cb a c");
}

{
    my $s = sprintf "%c%s$b", $c, $a;
    is($s, chr($c).$a.$b, "%c%sb c a");
}

{
    my $s = sprintf "$b%c", $c;
    is($s, $b.chr($c), "b%c c");
}

{
    my $s = sprintf "$b%s%c", $a, $c;
    is($s, $b.$a.chr($c), "b%s%c a c");
}

{
    my $s = sprintf "$b%c%s", $c, $a;
    is($s, $b.chr($c).$a, "b%c%s c a");
}

{
    # 20010407.008 sprintf removes utf8-ness
    $a = sprintf "\x{1234}";
    is((sprintf "%x %d", unpack("U*", $a), length($a)),    "1234 1",
       '\x{1234}');
    $a = sprintf "%s", "\x{5678}";
    is((sprintf "%x %d", unpack("U*", $a), length($a)),    "5678 1",
       '%s \x{5678}');
    $a = sprintf "\x{1234}%s", "\x{5678}";
    is((sprintf "%x %x %d", unpack("U*", $a), length($a)), "1234 5678 2",
       '\x{1234}%s \x{5678}');
}

{
    # check that utf8ness doesn't "accumulate"

    my $w = "w\x{fc}";
    my $sprintf;

    $sprintf = sprintf "%s%s", $w, "$w\x{100}";
    is(substr($sprintf,0,2), $w, "utf8 echo");

    $sprintf = sprintf "%s%s", $w, "$w\x{100}";    
    is(substr($sprintf,0,2), $w, "utf8 echo echo");
}

my @values =(chr 110, chr 255, chr 256);

foreach my $prefix (@values) {
    foreach my $vector (map {$_ . $_} @values) {

	my $format = "$prefix%*vd";

	foreach my $dot (@values) {
	    my $result = sprintf $format, $dot, $vector;
	    is (length $result, 8)
		or print "# ", join (',', map {ord $_} $prefix, $dot, $vector),
		  "\n";
	}
    }
}




More information about the dslinux-commit mailing list