dslinux/user/perl/t/uni case.pl chomp.t class.t fold.t lower.t sprintf.t title.t tr_7jis.t tr_eucjp.t tr_sjis.t tr_utf8.t upper.t write.t
cayenne
dslinux_cayenne at user.in-berlin.de
Tue Dec 5 05:27:28 CET 2006
Update of /cvsroot/dslinux/dslinux/user/perl/t/uni
In directory antilope:/tmp/cvs-serv7729/t/uni
Added Files:
case.pl chomp.t class.t fold.t lower.t sprintf.t title.t
tr_7jis.t tr_eucjp.t tr_sjis.t tr_utf8.t upper.t write.t
Log Message:
Adding fresh perl source to HEAD to branch from
--- NEW FILE: write.t ---
#!./perl -w
use strict;
BEGIN {
chdir 't' if -d 't';
@INC = qw(../lib .);
require "test.pl";
unless (PerlIO::Layer->find('perlio')){
print "1..0 # Skip: PerlIO required\n";
exit 0;
}
if (ord("A") == 193) {
print "1..0 # Skip: EBCDIC porting needed\n";
exit 0;
}
}
plan tests => 6;
# Some tests for UTF8 and format/write
our ($bitem1, $uitem1) = ("\x{ff}", "\x{100}");
our ($bitem2, $uitem2) = ("\x{fe}", "\x{101}");
our ($blite1, $ulite1) = ("\x{fd}", "\x{102}");
our ($blite2, $ulite2) = ("\x{fc}", "\x{103}");
our ($bmulti, $umulti) = ("\x{fb}\n\x{fa}\n\x{f9}\n",
"\x{104}\n\x{105}\n\x{106}\n");
sub fmwrtest {
no strict 'refs';
my ($out, $format, $expect, $name) = @_;
eval "format $out =\n$format.\n"; die $@ if $@;
open $out, '>:utf8', 'Uni_write.tmp' or die "Can't create Uni_write.tmp";
write $out;
close $out or die "Could not close $out: $!";
open UIN, '<:utf8', 'Uni_write.tmp' or die "Can't open Uni_write.tmp";;
my $result = do { local $/; <UIN>; };
close UIN;
is($result, $expect, $name);
}
fmwrtest OUT1 => <<EOFORMAT, <<EOEXPECT, "non-UTF8 literal / UTF8 item (1)";
$blite1 @<<
\$uitem1
$blite2 @<<
\$bitem2
EOFORMAT
$blite1 $uitem1
$blite2 $bitem2
EOEXPECT
fmwrtest OUT2 => <<EOFORMAT, <<EOEXPECT, "non-UTF8 literal / UTF8 item (2)";
$blite1 @<<
\$bitem1
$blite2 @<<
\$uitem2
EOFORMAT
$blite1 $bitem1
$blite2 $uitem2
EOEXPECT
fmwrtest OUT3 => <<EOFORMAT, <<EOEXPECT, "UTF8 literal / non-UTF8 item (1)";
$ulite1 @<<
\$bitem1
$blite2 @<<
\$bitem2
EOFORMAT
$ulite1 $bitem1
$blite2 $bitem2
EOEXPECT
fmwrtest OUT4 => <<EOFORMAT, <<EOEXPECT, "UTF8 literal / non-UTF8 item (2)";
$blite1 @<<
\$bitem1
$ulite2 @<<
\$bitem2
EOFORMAT
$blite1 $bitem1
$ulite2 $bitem2
EOEXPECT
fmwrtest OUT5 => <<EOFORMAT, <<EOEXPECT, "non-UTF8 literal / UTF8 multiline";
$blite1
@*
\$umulti
$blite2
EOFORMAT
$blite1
$umulti$blite2
EOEXPECT
fmwrtest OUT6 => <<EOFORMAT, <<EOEXPECT, "UTF8 literal / non-UTF8 multiline";
$ulite1
@*
\$bmulti
$blite2
EOFORMAT
$ulite1
$bmulti$blite2
EOEXPECT
1 while unlink 'Uni_write.tmp';
--- NEW FILE: fold.t ---
BEGIN {
chdir 't' if -d 't';
@INC = '../lib';
}
use File::Spec;
my $CF = File::Spec->catfile(File::Spec->catdir(File::Spec->updir,
"lib", "unicore"),
"CaseFolding.txt");
use constant EBCDIC => ord 'A' == 193;
if (open(CF, $CF)) {
my @CF;
while (<CF>) {
# Skip S since we are going for 'F'ull case folding
if (/^([0-9A-F]+); ([CFI]); ((?:[0-9A-F]+)(?: [0-9A-F]+)*); \# (.+)/) {
next if EBCDIC && hex $1 < 0x100;
push @CF, [$1, $2, $3, $4];
}
}
close(CF);
die qq[$0: failed to find casefoldings from "$CF"\n] unless @CF;
print "1..", scalar @CF, "\n";
my $i = 0;
for my $cf (@CF) {
my ($code, $status, $mapping, $name) = @$cf;
$i++;
my $a = pack("U0U*", hex $code);
my $b = pack("U0U*", map { hex } split " ", $mapping);
my $t0 = ":$a:" =~ /:$a:/ ? 1 : 0;
my $t1 = ":$a:" =~ /:$a:/i ? 1 : 0;
my $t2 = ":$a:" =~ /:[$a]:/ ? 1 : 0;
my $t3 = ":$a:" =~ /:[$a]:/i ? 1 : 0;
my $t4 = ":$a:" =~ /:$b:/i ? 1 : 0;
my $t5 = ":$a:" =~ /:[$b]:/i ? 1 : 0;
my $t6 = ":$b:" =~ /:$a:/i ? 1 : 0;
my $t7 = ":$b:" =~ /:[$a]:/i ? 1 : 0;
print $t0 && $t1 && $t2 && $t3 && $t4 && $t5 && $t6 && $t7 ?
"ok $i \# - $code - $name - $mapping - $status\n" :
"not ok $i \# - $code - $name - $mapping - $status - $t0 $t1 $t2 $t3 $t4 $t5 $t6 $t7\n";
}
} else {
die qq[$0: failed to open "$CF": $!\n];
}
--- NEW FILE: lower.t ---
BEGIN {
chdir 't' if -d 't';
@INC = qw(../lib uni .);
require "case.pl";
}
casetest("Lower", \%utf8::ToSpecLower,
sub { lc $_[0] }, sub { my $a = ""; lc ($_[0] . $a) },
sub { lcfirst $_[0] }, sub { my $a = ""; lcfirst ($_[0] . $a) });
--- NEW FILE: tr_utf8.t ---
#
# $Id: tr_utf8.t,v 1.1 2006-12-05 04:27:26 dslinux_cayenne Exp $
#
# This script is written intentionally in UTF-8
# Requires Encode 1.83 or better
# -- dankogai
BEGIN {
if ($ENV{'PERL_CORE'}){
chdir 't';
@INC = '../lib';
}
require Config; import Config;
if ($Config{'extensions'} !~ /\bEncode\b/) {
print "1..0 # Skip: Encode was not built\n";
exit 0;
}
if (ord("A") == 193) {
print "1..0 # Skip: EBCDIC\n";
exit 0;
}
unless (PerlIO::Layer->find('perlio')){
print "1..0 # Skip: PerlIO required\n";
exit 0;
}
if ($ENV{PERL_CORE_MINITEST}) {
print "1..0 # Skip: no dynamic loading on miniperl, no Encode\n";
exit 0;
}
$| = 1;
}
use strict;
use Test::More tests => 7;
use encoding 'utf8';
my @hiragana = map {chr} ord("ã")..ord("ã");
my @katakana = map {chr} ord("ã¡")..ord("ã³");
my $hiragana = join('' => @hiragana);
my $katakana = join('' => @katakana);
my %h2k; @h2k{@hiragana} = @katakana;
my %k2h; @k2h{@katakana} = @hiragana;
# print @hiragana, "\n";
my $str;
$str = $hiragana; $str =~ tr/ã-ã/ã¡-ã³/;
is($str, $katakana, "tr// # hiragana -> katakana");
$str = $katakana; $str =~ tr/ã¡-ã³/ã-ã/;
is($str, $hiragana, "tr// # hiragana -> katakana");
$str = $hiragana; eval qq(\$str =~ tr/ã-ã/ã¡-ã³/);
is($str, $katakana, "eval qq(tr//) # hiragana -> katakana");
$str = $katakana; eval qq(\$str =~ tr/ã¡-ã³/ã-ã/);
is($str, $hiragana, "eval qq(tr//) # hiragana -> katakana");
$str = $hiragana; $str =~ s/([ã-ã])/$h2k{$1}/go;
is($str, $katakana, "s/// # hiragana -> katakana");
$str = $katakana; $str =~ s/([ã¡-ã³])/$k2h{$1}/go;
is($str, $hiragana, "s/// # hiragana -> katakana");
{
# [perl 16843]
my $line = 'abcdefghijklmnopqrstuvwxyz$0123456789';
$line =~ tr/bcdeghijklmnprstvwxyz$02578/×צ××¢×××ײק××× ×¤Ö¼×¨×¡×װש×××שױתײ××/;
is($line, "a×צ××¢f×××ײק××× oפqּרסu×װש×××ש1×±34ת6ײ×9", "[perl #16843]");
}
__END__
--- NEW FILE: case.pl ---
use File::Spec;
require "test.pl";
sub unidump {
join " ", map { sprintf "%04X", $_ } unpack "U*", $_[0];
}
sub casetest {
my ($base, $spec, @funcs) = @_;
# For each provided function run it, and run a version with some extra
# characters afterwards. Use a recylcing symbol, as it doesn't change case.
my $ballast = chr (0x2672) x 3;
@funcs = map {my $f = $_;
($f,
sub {my $r = $f->($_[0] . $ballast); # Add it before
$r =~ s/$ballast\z//so # Remove it afterwards
or die "'$_[0]' to '$r' mangled";
$r; # Result with $ballast removed.
},
)} @funcs;
my $file = File::Spec->catfile(File::Spec->catdir(File::Spec->updir,
"lib", "unicore", "To"),
"$base.pl");
my $simple = do $file;
my %simple;
for my $i (split(/\n/, $simple)) {
my ($k, $v) = split(' ', $i);
$simple{$k} = $v;
}
my %seen;
for my $i (sort keys %simple) {
$seen{$i}++;
}
print "# ", scalar keys %simple, " simple mappings\n";
my $both;
for my $i (sort keys %$spec) {
if (++$seen{$i} == 2) {
warn sprintf "$base: $i seen twice\n";
$both++;
}
}
print "# ", scalar keys %$spec, " special mappings\n";
exit(1) if $both;
my %none;
for my $i (map { ord } split //,
"\e !\"#\$%&'()+,-./0123456789:;<=>?\@[\\]^_{|}~\b") {
next if pack("U0U", $i) =~ /\w/;
$none{$i}++ unless $seen{$i};
}
print "# ", scalar keys %none, " noncase mappings\n";
my $tests =
((scalar keys %simple) +
(scalar keys %$spec) +
(scalar keys %none)) * @funcs;
print "1..$tests\n";
my $test = 1;
for my $i (sort keys %simple) {
my $w = $simple{$i};
my $c = pack "U0U", hex $i;
foreach my $func (@funcs) {
my $d = $func->($c);
my $e = unidump($d);
print $d eq pack("U0U", hex $simple{$i}) ?
"ok $test # $i -> $w\n" : "not ok $test # $i -> $e ($w)\n";
$test++;
}
}
for my $i (sort keys %$spec) {
my $w = unidump($spec->{$i});
my $u = unpack "U0U", $i;
my $h = sprintf "%04X", $u;
my $c = chr($u); $c .= chr(0x100); chop $c;
foreach my $func (@funcs) {
my $d = $func->($c);
my $e = unidump($d);
if (ord "A" == 193) { # EBCDIC
# We need to a little bit of remapping.
#
# For example, in titlecase (ucfirst) mapping
# of U+0149 the Unicode mapping is U+02BC U+004E.
# The 4E is N, which in EBCDIC is 2B--
# and the ucfirst() does that right.
# The problem is that our reference
# data is in Unicode code points.
#
# The Right Way here would be to use, say,
# Encode, to remap the less-than 0x100 code points,
# but let's try to be Encode-independent here.
#
# These are the titlecase exceptions:
#
# Unicode Unicode+EBCDIC
#
# 0149 -> 02BC 004E (02BC 002B)
# 01F0 -> 004A 030C (00A2 030C)
# 1E96 -> 0048 0331 (00E7 0331)
# 1E97 -> 0054 0308 (00E8 0308)
# 1E98 -> 0057 030A (00EF 030A)
# 1E99 -> 0059 030A (00DF 030A)
# 1E9A -> 0041 02BE (00A0 02BE)
#
# The uppercase exceptions are identical.
#
# The lowercase has one more:
#
# Unicode Unicode+EBCDIC
#
# 0130 -> 0069 0307 (00D1 0307)
#
if ($i =~ /^(0130|0149|01F0|1E96|1E97|1E98|1E99|1E9A)$/) {
$e =~ s/004E/002B/; # N
$e =~ s/004A/00A2/; # J
$e =~ s/0048/00E7/; # H
$e =~ s/0054/00E8/; # T
$e =~ s/0057/00EF/; # W
$e =~ s/0059/00DF/; # Y
$e =~ s/0041/00A0/; # A
$e =~ s/0069/00D1/; # i
}
# We have to map the output, not the input, because
# pack/unpack U has been EBCDICified, too, it would
# just undo our remapping.
}
print $w eq $e ?
"ok $test # $i -> $w\n" : "not ok $test # $h -> $e ($w)\n";
$test++;
}
}
for my $i (sort { $a <=> $b } keys %none) {
my $w = $i = sprintf "%04X", $i;
my $c = pack "U0U", hex $i;
foreach my $func (@funcs) {
my $d = $func->($c);
my $e = unidump($d);
print $d eq $c ?
"ok $test # $i -> $w\n" : "not ok $test # $i -> $e ($w)\n";
$test++;
}
}
}
1;
--- NEW FILE: title.t ---
BEGIN {
chdir 't' if -d 't';
@INC = qw(../lib uni .);
require "case.pl";
}
casetest("Title", \%utf8::ToSpecTitle, sub { ucfirst $_[0] },
sub { my $a = ""; ucfirst ($_[0] . $a) });
--- NEW FILE: tr_7jis.t ---
#
# $Id: tr_7jis.t,v 1.1 2006-12-05 04:27:26 dslinux_cayenne Exp $
#
# This script is written intentionally in ISO-2022-JP
# requires Encode 1.83 or better to work
# -- dankogai
BEGIN {
if ($ENV{'PERL_CORE'}){
chdir 't';
@INC = '../lib';
}
require Config; import Config;
if ($Config{'extensions'} !~ /\bEncode\b/) {
print "1..0 # Skip: Encode was not built\n";
exit 0;
}
if (ord("A") == 193) {
print "1..0 # Skip: EBCDIC\n";
exit 0;
}
unless (PerlIO::Layer->find('perlio')){
print "1..0 # Skip: PerlIO required\n";
exit 0;
}
if ($ENV{PERL_CORE_MINITEST}) {
print "1..0 # Skip: no dynamic loading on miniperl, no Encode\n";
exit 0;
}
$| = 1;
}
use strict;
use Test::More tests => 6;
use Encode;
use encoding 'iso-2022-jp';
my @hiragana = map {chr} ord("ぁ")..ord("ん");
my @katakana = map {chr} ord("ァ")..ord("ン");
my $hiragana = join('' => @hiragana);
my $katakana = join('' => @katakana);
my %h2k; @h2k{@hiragana} = @katakana;
my %k2h; @k2h{@katakana} = @hiragana;
# print @hiragana, "\n";
my $str;
$str = $hiragana; $str =~ tr/ぁ-ん/ァ-ン/;
is($str, $katakana, "tr// # hiragana -> katakana");
$str = $katakana; $str =~ tr/ァ-ン/ぁ-ん/;
is($str, $hiragana, "tr// # hiragana -> katakana");
$str = $hiragana; eval qq(\$str =~ tr/ぁ-ん/ァ-ン/);
is($str, $katakana, "eval qq(tr//) # hiragana -> katakana");
$str = $katakana; eval qq(\$str =~ tr/ァ-ン/ぁ-ん/);
is($str, $hiragana, "eval qq(tr//) # hiragana -> katakana");
$str = $hiragana; $str =~ s/([ぁ-ん])/$h2k{$1}/go;
is($str, $katakana, "s/// # hiragana -> katakana");
$str = $katakana; $str =~ s/([ァ-ン])/$k2h{$1}/go;
is($str, $hiragana, "s/// # hiragana -> katakana");
__END__
--- NEW FILE: class.t ---
BEGIN {
chdir 't' if -d 't';
@INC = qw(../lib .);
require "test.pl";
}
plan tests => 4670;
sub MyUniClass {
<<END;
0030 004F
END
}
sub Other::Class {
<<END;
0040 005F
END
}
sub A::B::Intersection {
<<END;
+main::MyUniClass
&Other::Class
END
}
sub test_regexp ($$) {
# test that given string consists of N-1 chars matching $qr1, and 1
# char matching $qr2
my ($str, $blk) = @_;
# constructing these objects here makes the last test loop go much faster
my $qr1 = qr/(\p{$blk}+)/;
if ($str =~ $qr1) {
is($1, substr($str, 0, -1)); # all except last char
}
else {
fail('first N-1 chars did not match');
}
my $qr2 = qr/(\P{$blk}+)/;
if ($str =~ $qr2) {
is($1, substr($str, -1)); # only last char
}
else {
fail('last char did not match');
}
}
use strict;
my $str = join "", map chr($_), 0x20 .. 0x6F;
# make sure it finds built-in class
is(($str =~ /(\p{Letter}+)/)[0], 'ABCDEFGHIJKLMNOPQRSTUVWXYZ');
is(($str =~ /(\p{l}+)/)[0], 'ABCDEFGHIJKLMNOPQRSTUVWXYZ');
# make sure it finds user-defined class
is(($str =~ /(\p{MyUniClass}+)/)[0], '0123456789:;<=>?@ABCDEFGHIJKLMNO');
# make sure it finds class in other package
is(($str =~ /(\p{Other::Class}+)/)[0], '@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_');
# make sure it finds class in other OTHER package
is(($str =~ /(\p{A::B::Intersection}+)/)[0], '@ABCDEFGHIJKLMNO');
# all of these should look in lib/unicore/bc/AL.pl
$str = "\x{070D}\x{070E}\x{070F}\x{0710}\x{0711}";
is(($str =~ /(\P{BidiClass: ArabicLetter}+)/)[0], "\x{070E}\x{070F}");
is(($str =~ /(\P{BidiClass: AL}+)/)[0], "\x{070E}\x{070F}");
is(($str =~ /(\P{BC :ArabicLetter}+)/)[0], "\x{070E}\x{070F}");
is(($str =~ /(\P{bc=AL}+)/)[0], "\x{070E}\x{070F}");
# make sure InGreek works
$str = "[\x{038B}\x{038C}\x{038D}]";
is(($str =~ /(\p{InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}");
is(($str =~ /(\p{Script:InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}");
is(($str =~ /(\p{Script=InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}");
is(($str =~ /(\p{sc:InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}");
is(($str =~ /(\p{sc=InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}");
use File::Spec;
my $updir = File::Spec->updir;
# the %utf8::... hashes are already in existence
# because utf8_pva.pl was run by utf8_heavy.pl
*utf8::PropertyAlias = *utf8::PropertyAlias; # thwart a warning
no warnings 'utf8'; # we do not want warnings about surrogates etc
# non-General Category and non-Script
while (my ($abbrev, $files) = each %utf8::PVA_abbr_map) {
my $prop_name = $utf8::PropertyAlias{$abbrev};
next unless $prop_name;
next if $abbrev eq "gc_sc";
for (sort keys %$files) {
my $filename = File::Spec->catfile(
$updir => lib => unicore => lib => $abbrev => "$files->{$_}.pl"
);
next unless -e $filename;
my ($h1, $h2) = map hex, (split(/\t/, (do $filename), 3))[0,1];
my $str = join "", map chr, $h1 .. (($h2 || $h1) + 1);
for my $p ($prop_name, $abbrev) {
for my $c ($files->{$_}, $_) {
is($str =~ /(\p{$p: $c}+)/ && $1, substr($str, 0, -1));
is($str =~ /(\P{$p= $c}+)/ && $1, substr($str, -1));
}
}
}
}
# General Category and Script
for my $p ('gc', 'sc') {
while (my ($abbr) = each %{ $utf8::PropValueAlias{$p} }) {
my $filename = File::Spec->catfile(
$updir => lib => unicore => lib => gc_sc => "$utf8::PVA_abbr_map{gc_sc}{$abbr}.pl"
);
next unless -e $filename;
my ($h1, $h2) = map hex, (split(/\t/, (do $filename), 3))[0,1];
my $str = join "", map chr, $h1 .. (($h2 || $h1) + 1);
for my $x ($p, { gc => 'General Category', sc => 'Script' }->{$p}) {
for my $y ($abbr, $utf8::PropValueAlias{$p}{$abbr}, $utf8::PVA_abbr_map{gc_sc}{$abbr}) {
is($str =~ /(\p{$x: $y}+)/ && $1, substr($str, 0, -1));
is($str =~ /(\P{$x= $y}+)/ && $1, substr($str, -1));
test_regexp ($str, $y);
}
}
}
}
# test extra properties (ASCII_Hex_Digit, Bidi_Control, etc.)
SKIP:
{
skip "Can't reliably derive class names from file names", 592 if $^O eq 'VMS';
# On case tolerant filesystems, Cf.pl will cause a -e test for cf.pl to
# return true. Try to work around this by reading the filenames explicitly
# to get a case sensitive test. N.B. This will fail if filename case is
# not preserved because you might go looking for a class name of CF or cf
# when you really want Cf. Storing case sensitive data in filenames is
# simply not portable.
my %files;
my $dirname = File::Spec->catdir($updir => lib => unicore => lib => 'gc_sc');
opendir D, $dirname or die $!;
@files{readdir(D)} = ();
closedir D;
for (keys %utf8::PA_reverse) {
my $leafname = "$utf8::PA_reverse{$_}.pl";
next unless exists $files{$leafname};
my $filename = File::Spec->catfile($dirname, $leafname);
my ($h1, $h2) = map hex, (split(/\t/, (do $filename), 3))[0,1];
my $str = join "", map chr, $h1 .. (($h2 || $h1) + 1);
for my $x ('gc', 'General Category') {
print "# $filename $x $_, $utf8::PA_reverse{$_}\n";
for my $y ($_, $utf8::PA_reverse{$_}) {
is($str =~ /(\p{$x: $y}+)/ && $1, substr($str, 0, -1));
is($str =~ /(\P{$x= $y}+)/ && $1, substr($str, -1));
test_regexp ($str, $y);
}
}
}
}
# test the blocks (InFoobar)
for (grep $utf8::Canonical{$_} =~ /^In/, keys %utf8::Canonical) {
my $filename = File::Spec->catfile(
$updir => lib => unicore => lib => gc_sc => "$utf8::Canonical{$_}.pl"
);
next unless -e $filename;
print "# In$_ $filename\n";
my ($h1, $h2) = map hex, (split(/\t/, (do $filename), 3))[0,1];
my $str = join "", map chr, $h1 .. (($h2 || $h1) + 1);
my $blk = $_;
test_regexp ($str, $blk);
$blk =~ s/^In/Block:/;
test_regexp ($str, $blk);
}
--- NEW FILE: upper.t ---
BEGIN {
chdir 't' if -d 't';
@INC = qw(../lib uni .);
require "case.pl";
}
casetest("Upper", \%utf8::ToSpecUpper, sub { uc $_[0] },
sub { my $a = ""; uc ($_[0] . $a) });
--- NEW FILE: tr_eucjp.t ---
#
# $Id: tr_eucjp.t,v 1.1 2006-12-05 04:27:26 dslinux_cayenne Exp $
#
# This script is written intentionally in EUC-JP
# -- dankogai
BEGIN {
if ($ENV{'PERL_CORE'}){
chdir 't';
@INC = '../lib';
}
require Config; import Config;
if ($Config{'extensions'} !~ /\bEncode\b/) {
print "1..0 # Skip: Encode was not built\n";
exit 0;
}
if (ord("A") == 193) {
print "1..0 # Skip: EBCDIC\n";
exit 0;
}
unless (PerlIO::Layer->find('perlio')){
print "1..0 # Skip: PerlIO required\n";
exit 0;
}
if ($ENV{PERL_CORE_MINITEST}) {
print "1..0 # Skip: no dynamic loading on miniperl, no Encode\n";
exit 0;
}
$| = 1;
}
use strict;
use Test::More tests => 6;
use Encode;
use encoding 'euc-jp';
my @hiragana = map {chr} ord("¤¡")..ord("¤ó");
my @katakana = map {chr} ord("¥¡")..ord("¥ó");
my $hiragana = join('' => @hiragana);
my $katakana = join('' => @katakana);
my %h2k; @h2k{@hiragana} = @katakana;
my %k2h; @k2h{@katakana} = @hiragana;
# print @hiragana, "\n";
my $str;
$str = $hiragana; $str =~ tr/¤¡-¤ó/¥¡-¥ó/;
is($str, $katakana, "tr// # hiragana -> katakana");
$str = $katakana; $str =~ tr/¥¡-¥ó/¤¡-¤ó/;
is($str, $hiragana, "tr// # hiragana -> katakana");
$str = $hiragana; eval qq(\$str =~ tr/¤¡-¤ó/¥¡-¥ó/);
is($str, $katakana, "eval qq(tr//) # hiragana -> katakana");
$str = $katakana; eval qq(\$str =~ tr/¥¡-¥ó/¤¡-¤ó/);
is($str, $hiragana, "eval qq(tr//) # hiragana -> katakana");
$str = $hiragana; $str =~ s/([¤¡-¤ó])/$h2k{$1}/go;
is($str, $katakana, "s/// # hiragana -> katakana");
$str = $katakana; $str =~ s/([¥¡-¥ó])/$k2h{$1}/go;
is($str, $hiragana, "s/// # hiragana -> katakana");
__END__
--- NEW FILE: tr_sjis.t ---
#
# $Id: tr_sjis.t,v 1.1 2006-12-05 04:27:26 dslinux_cayenne Exp $
#
# This script is written intentionally in Shift JIS
# -- dankogai
BEGIN {
if ($ENV{'PERL_CORE'}){
chdir 't';
@INC = '../lib';
}
require Config; import Config;
if ($Config{'extensions'} !~ /\bEncode\b/) {
print "1..0 # Skip: Encode was not built\n";
exit 0;
}
if (ord("A") == 193) {
print "1..0 # Skip: EBCDIC\n";
exit 0;
}
unless (PerlIO::Layer->find('perlio')){
print "1..0 # Skip: PerlIO required\n";
exit 0;
}
if ($ENV{PERL_CORE_MINITEST}) {
print "1..0 # Skip: no dynamic loading on miniperl, no Encode\n";
exit 0;
}
$| = 1;
}
use strict;
use Test::More tests => 6;
use Encode;
use encoding 'shiftjis';
my @hiragana = map {chr} ord("")..ord("ñ");
my @katakana = map {chr} ord("@")..ord("");
my $hiragana = join('' => @hiragana);
my $katakana = join('' => @katakana);
my %h2k; @h2k{@hiragana} = @katakana;
my %k2h; @k2h{@katakana} = @hiragana;
# print @hiragana, "\n";
my $str;
$str = $hiragana; $str =~ tr/-ñ/@-/;
is($str, $katakana, "tr// # hiragana -> katakana");
$str = $katakana; $str =~ tr/@-/-ñ/;
is($str, $hiragana, "tr// # hiragana -> katakana");
$str = $hiragana; eval qq(\$str =~ tr/-ñ/@-/);
is($str, $katakana, "eval qq(tr//) # hiragana -> katakana");
$str = $katakana; eval qq(\$str =~ tr/@-/-ñ/);
is($str, $hiragana, "eval qq(tr//) # hiragana -> katakana");
$str = $hiragana; $str =~ s/([-ñ])/$h2k{$1}/go;
is($str, $katakana, "s/// # hiragana -> katakana");
$str = $katakana; $str =~ s/([@-])/$k2h{$1}/go;
is($str, $hiragana, "s/// # hiragana -> katakana");
__END__
--- NEW FILE: chomp.t ---
#!./perl -w
BEGIN {
if ($ENV{'PERL_CORE'}){
chdir 't';
@INC = '../lib';
}
require Config; import Config;
if ($Config{'extensions'} !~ /\bEncode\b/) {
print "1..0 # Skip: Encode was not built\n";
exit 0;
}
if (ord("A") == 193) {
print "1..0 # Skip: EBCDIC\n";
exit 0;
}
unless (PerlIO::Layer->find('perlio')){
print "1..0 # Skip: PerlIO required\n";
exit 0;
}
if ($ENV{PERL_CORE_MINITEST}) {
print "1..0 # Skip: no dynamic loading on miniperl, no Encode\n";
exit 0;
}
}
use Encode;
use strict;
use Test::More;
# %mbchars = (encoding => { bytes => utf8, ... }, ...);
# * pack('C*') is expected to return bytes even if ${^ENCODING} is true.
our %mbchars = (
'big-5' => {
pack('C*', 0x40) => pack('U*', 0x40), # COMMERCIAL AT
pack('C*', 0xA4, 0x40) => "\x{4E00}", # CJK-4E00
},
'euc-jp' => {
pack('C*', 0xB0, 0xA1) => "\x{4E9C}", # CJK-4E9C
pack('C*', 0x8F, 0xB0, 0xA1) => "\x{4E02}", # CJK-4E02
},
'shift-jis' => {
pack('C*', 0xA9) => "\x{FF69}", # halfwidth katakana small U
pack('C*', 0x82, 0xA9) => "\x{304B}", # hiragana KA
},
);
# 4 == @char; paired tests inside 3 nested loops,
# plus extra pair of tests in a loop, plus extra pair of tests.
plan tests => 2 * (4 ** 3 + 4 + 1) * (keys %mbchars);
for my $enc (sort keys %mbchars) {
local ${^ENCODING} = find_encoding($enc);
my @char = (sort(keys %{ $mbchars{$enc} }),
sort(values %{ $mbchars{$enc} }));
for my $rs (@char) {
local $/ = $rs;
for my $start (@char) {
for my $end (@char) {
my $string = $start.$end;
my ($expect, $return);
if ($end eq $rs) {
$expect = $start;
# The answer will always be a length in utf8, even if the
# scalar was encoded with a different length
$return = length ($end . "\x{100}") - 1;
} else {
$expect = $string;
$return = 0;
}
is (chomp ($string), $return);
is ($string, $expect); # "$enc \$/=$rs $start $end"
}
}
# chomp should not stringify references unless it decides to modify
# them
$_ = [];
my $got = chomp();
is ($got, 0);
is (ref($_), "ARRAY", "chomp ref (no modify)");
}
$/ = ")"; # the last char of something like "ARRAY(0x80ff6e4)"
my $got = chomp();
is ($got, 1);
ok (!ref($_), "chomp ref (modify)");
}
--- NEW FILE: sprintf.t ---
#!./perl -w
BEGIN {
chdir 't' if -d 't';
@INC = qw(../lib .);
require "test.pl";
}
plan tests => 52;
$a = "B\x{fc}f";
$b = "G\x{100}r";
$c = 0x200;
{
my $s = sprintf "%s", $a;
is($s, $a, "%s a");
}
{
my $s = sprintf "%s", $b;
is($s, $b, "%s b");
}
{
my $s = sprintf "%s%s", $a, $b;
is($s, $a.$b, "%s%s a b");
}
{
my $s = sprintf "%s%s", $b, $a;
is($s, $b.$a, "%s%s b a");
}
{
my $s = sprintf "%s%s", $b, $b;
is($s, $b.$b, "%s%s b b");
}
{
my $s = sprintf "%s$b", $a;
is($s, $a.$b, "%sb a");
}
{
my $s = sprintf "$b%s", $a;
is($s, $b.$a, "b%s a");
}
{
my $s = sprintf "%s$a", $b;
is($s, $b.$a, "%sa b");
}
{
my $s = sprintf "$a%s", $b;
is($s, $a.$b, "a%s b");
}
{
my $s = sprintf "$a%s", $a;
is($s, $a.$a, "a%s a");
}
{
my $s = sprintf "$b%s", $b;
is($s, $b.$b, "a%s b");
}
{
my $s = sprintf "%c", $c;
is($s, chr($c), "%c c");
}
{
my $s = sprintf "%s%c", $a, $c;
is($s, $a.chr($c), "%s%c a c");
}
{
my $s = sprintf "%c%s", $c, $a;
is($s, chr($c).$a, "%c%s c a");
}
{
my $s = sprintf "%c$b", $c;
is($s, chr($c).$b, "%cb c");
}
{
my $s = sprintf "%s%c$b", $a, $c;
is($s, $a.chr($c).$b, "%s%cb a c");
}
{
my $s = sprintf "%c%s$b", $c, $a;
is($s, chr($c).$a.$b, "%c%sb c a");
}
{
my $s = sprintf "$b%c", $c;
is($s, $b.chr($c), "b%c c");
}
{
my $s = sprintf "$b%s%c", $a, $c;
is($s, $b.$a.chr($c), "b%s%c a c");
}
{
my $s = sprintf "$b%c%s", $c, $a;
is($s, $b.chr($c).$a, "b%c%s c a");
}
{
# 20010407.008 sprintf removes utf8-ness
$a = sprintf "\x{1234}";
is((sprintf "%x %d", unpack("U*", $a), length($a)), "1234 1",
'\x{1234}');
$a = sprintf "%s", "\x{5678}";
is((sprintf "%x %d", unpack("U*", $a), length($a)), "5678 1",
'%s \x{5678}');
$a = sprintf "\x{1234}%s", "\x{5678}";
is((sprintf "%x %x %d", unpack("U*", $a), length($a)), "1234 5678 2",
'\x{1234}%s \x{5678}');
}
{
# check that utf8ness doesn't "accumulate"
my $w = "w\x{fc}";
my $sprintf;
$sprintf = sprintf "%s%s", $w, "$w\x{100}";
is(substr($sprintf,0,2), $w, "utf8 echo");
$sprintf = sprintf "%s%s", $w, "$w\x{100}";
is(substr($sprintf,0,2), $w, "utf8 echo echo");
}
my @values =(chr 110, chr 255, chr 256);
foreach my $prefix (@values) {
foreach my $vector (map {$_ . $_} @values) {
my $format = "$prefix%*vd";
foreach my $dot (@values) {
my $result = sprintf $format, $dot, $vector;
is (length $result, 8)
or print "# ", join (',', map {ord $_} $prefix, $dot, $vector),
"\n";
}
}
}
More information about the dslinux-commit
mailing list