dslinux/user/perl/ext/Encode/t Aliases.t CJKT.t Encode.t Encoder.t Mod_EUCJP.pm Unicode.t at-cn.t at-tw.t big5-eten.enc big5-eten.utf big5-hkscs.enc big5-hkscs.utf enc_data.t enc_eucjp.t enc_module.enc enc_module.t enc_utf8.t encoding.t fallback.t gb2312.enc gb2312.utf grow.t gsm0338.t guess.t jisx0201.enc jisx0201.utf jisx0208.enc jisx0208.utf jisx0212.enc jisx0212.utf jperl.t ksc5601.enc ksc5601.utf mime-header.t mime_header_iso2022jp.t perlio.t rt.pl unibench.pl utf8strict.t

cayenne dslinux_cayenne at user.in-berlin.de
Tue Dec 5 05:26:40 CET 2006


Update of /cvsroot/dslinux/dslinux/user/perl/ext/Encode/t
In directory antilope:/tmp/cvs-serv7729/ext/Encode/t

Added Files:
	Aliases.t CJKT.t Encode.t Encoder.t Mod_EUCJP.pm Unicode.t 
	at-cn.t at-tw.t big5-eten.enc big5-eten.utf big5-hkscs.enc 
	big5-hkscs.utf enc_data.t enc_eucjp.t enc_module.enc 
	enc_module.t enc_utf8.t encoding.t fallback.t gb2312.enc 
	gb2312.utf grow.t gsm0338.t guess.t jisx0201.enc jisx0201.utf 
	jisx0208.enc jisx0208.utf jisx0212.enc jisx0212.utf jperl.t 
	ksc5601.enc ksc5601.utf mime-header.t mime_header_iso2022jp.t 
	perlio.t rt.pl unibench.pl utf8strict.t 
Log Message:
Adding fresh perl source to HEAD to branch from

--- NEW FILE: big5-hkscs.utf ---
(This appears to be a binary file; contents omitted.)

--- NEW FILE: jperl.t ---
#
# $Id: jperl.t,v 1.1 2006-12-05 04:26:38 dslinux_cayenne Exp $
#
# This script is written in euc-jp

BEGIN {
    require Config; import Config;
    if ($Config{'extensions'} !~ /\bEncode\b/) {
      print "1..0 # Skip: Encode was not built\n";
      exit 0;
    }
    unless (find PerlIO::Layer 'perlio') {
	print "1..0 # Skip: PerlIO was not built\n";
	exit 0;
    }
    if (ord("A") == 193) {
	print "1..0 # Skip: EBCDIC\n";
	exit 0;
    }
    $| = 1;
}

no utf8; # we have raw Japanese encodings here

use strict;
#use Test::More tests => 18;
use Test::More tests => 15; # black magic tests commented out
my $Debug = shift;

no encoding; # ensure
my $Enamae = "\xbe\xae\xbb\xf4\x20\xc3\xc6"; # euc-jp, with \x escapes
use encoding "euc-jp";

my $Namae  = "¾®»ô ÃÆ";   # in Japanese, in euc-jp
my $Name   = "Dan Kogai"; # in English
# euc-jp in \x format but after the pragma.  But this one will be converted!
my $Ynamae = "\xbe\xae\xbb\xf4\x20\xc3\xc6"; 


my $str = $Namae; $str =~ s/¾®»ô ÃÆ/Dan Kogai/o;
is($str, $Name, q{regex});
$str = $Namae; $str =~ s/$Namae/Dan Kogai/o;
is($str, $Name, q{regex - with variable});
is(length($Namae), 4, q{utf8:length});
{
    use bytes;
    # converted to UTF-8 so 3*3+1
    is(length($Namae),   10, q{bytes:length}); 
    # 
    is(length($Enamae),   7, q{euc:length}); # 2*3+1
    is ($Namae, $Ynamae,     q{literal conversions});
    isnt($Enamae, $Ynamae,   q{before and after}); 
    is($Enamae, Encode::encode('euc-jp', $Namae)); 
}
# let's test the scope as well.  Must be in utf8 realm
is(length($Namae), 4, q{utf8:length});

{
    no encoding;
    ok(! defined(${^ENCODING}), q{no encoding;});
}
# should've been isnt() but no scoping is suported -- yet
ok(! defined(${^ENCODING}), q{not scoped yet});

#
# The following tests are commented out to accomodate
# Inaba-San's patch to make tr/// work w/o eval qq{}
#{
#    # now let's try some real black magic!
#    local(${^ENCODING}) = Encode::find_encoding("euc-jp");
#    my $str = "\xbe\xae\xbb\xf4\x20\xc3\xc6";
#   is (length($str), 4, q{black magic:length});
#   is ($str, $Enamae,   q{black magic:eq});
#}
#ok(! defined(${^ENCODING}), q{out of black magic});
use bytes;
is (length($Namae), 10);

#
# now something completely different!
#
{
    use encoding "euc-jp", Filter=>1;
    ok(1, "Filter on");
    use utf8;
    no strict 'vars'; # fools
    # doesn't work w/ "my" as of this writing.
    # because of  buggy strict.pm and utf8.pm
    our $¿Í = 2; 
    #   ^^U+4eba, "human" in CJK ideograph
    $¿Í++; # a child is born
    *people = \$¿Í;
    is ($people, 3, "Filter:utf8 identifier");
    no encoding;
    ok(1, "Filter off");
}

1;
__END__



--- NEW FILE: enc_module.t ---
# $Id: enc_module.t,v 1.1 2006-12-05 04:26:37 dslinux_cayenne Exp $
# This file is in euc-jp
BEGIN {
    require Config; import Config;
    if ($Config{'extensions'} !~ /\bEncode\b/) {
      print "1..0 # Skip: Encode was not built\n";
      exit 0;
    }
    unless (find PerlIO::Layer 'perlio') {
	print "1..0 # Skip: PerlIO was not built\n";
	exit 0;
    }
    if (defined ${^UNICODE} and ${^UNICODE} != 0){
	print "1..0 # Skip: \${^UNICODE} == ${^UNICODE}\n";
	exit 0;
    }
    if (ord("A") == 193) {
	print "1..0 # encoding pragma does not support EBCDIC platforms\n";
	exit(0);
    }
}
use lib qw(t ext/Encode/t ../ext/Encode/t); # latter 2 for perl core
use Mod_EUCJP;
use encoding "euc-jp";
use Test::More tests => 3;
use File::Basename;
use File::Spec;
use File::Compare qw(compare_text);

my $DEBUG = shift || 0;
my $dir = dirname(__FILE__);
my $file0 = File::Spec->catfile($dir,"enc_module.enc");
my $file1 = File::Spec->catfile($dir,"$$.enc");

my $obj = Mod_EUCJP->new;
local $SIG{__WARN__} = sub{ $DEBUG and print STDERR @_ };
# to silence reopening STD(IN|OUT) w/o closing unless $DEBUG

open STDOUT, ">", $file1 or die "$file1:$!";
print $obj->str, "\n";
$obj->set("¥Æ¥¹¥Èʸ»úÎó");
print $obj->str, "\n";

# Please do not move this to a point after the comparison -- Craig Berry
# and "unless $^O eq 'freebsd'" is needed for FreeBSD (toy-)?thread
# -- dankogai
close STDOUT unless $^O eq 'freebsd';

my $cmp = compare_text($file0, $file1);
is($cmp, 0, "encoding vs. STDOUT");

my @cmp = qw/½é´üʸ»úÎó ¥Æ¥¹¥Èʸ»úÎó/;
open STDIN, "<", $file0 or die "$file0:$!";
$obj = Mod_EUCJP->new;
my $i = 0;
while(<STDIN>){
    s/\r?\n\z//;
    is ($cmp[$i++], $_, "encoding vs. STDIN - $i");
}

unlink $file1 unless $cmp;
__END__


--- NEW FILE: at-cn.t ---
BEGIN {
    if ($ENV{'PERL_CORE'}){
        chdir 't';
	unshift @INC, '../lib';
    }
    require Config; import Config;
    if ($Config{'extensions'} !~ /\bEncode\b/) {
      print "1..0 # Skip: Encode was not built\n";
      exit 0;
    }
    if (ord("A") == 193) {
	print "1..0 # Skip: EBCDIC\n";
	exit 0;
    }
    $| = 1;
}

use strict;
use Test::More tests => 29;
use Encode;

no utf8; # we have raw Chinese encodings here

use_ok('Encode::CN');

# Since JP.t already tests basic file IO, we will just focus on
# internal encode / decode test here. Unfortunately, to test
# against all the UniHan characters will take a huge disk space,
# not to mention the time it will take, and the fact that Perl
# did not bundle UniHan.txt anyway.

# So, here we just test a typical snippet spanning multiple Unicode
# blocks, and hope it can point out obvious errors.

run_tests('Simplified Chinese only', {
    'utf'	=> (
12298.26131.32463.12299.31532.19968.21350.
24406.26352.65306.
22823.21705.20094.20803.65292.19975.29289.36164.22987.65292.
20035.32479.22825.12290.
20113.34892.38632.26045.65292.21697.29289.27969.24418.12290.
22823.26126.22987.32456.65292.20845.20301.26102.25104.65292.
26102.20056.20845.40857.20197.24481.22825.12290.
20094.36947.21464.21270.65292.21508.27491.24615.21629.65292.
20445.21512.22823.21644.65292.20035.21033.36126.12290.
39318.20986.24246.29289.65292.19975.22269.21688.23425.12290
    ),

    'euc-cn'	=> join('',
'¡¶Ò×¾­¡·µÚÒ»ØÔ',
'åèÔ»£º',
'´óÔÕǬԪ£¬ÍòÎï×Êʼ£¬',
'ÄËͳÌì¡£',
'ÔÆÐÐÓêÊ©£¬Æ·ÎïÁ÷ÐΡ£',
'´óÃ÷ʼÖÕ£¬Áùλʱ³É£¬',
'ʱ³ËÁùÁúÒÔÓùÌì¡£',
'ǬµÀ±ä»¯£¬¸÷ÕýÐÔÃü£¬',
'±£ºÏ´óºÍ£¬ÄËÀûÕê¡£',
'Ê׳öÊüÎÍò¹úÏÌÄþ¡£',
    ),

    'gb2312-raw'	=> join('',
'!6RW>-!75ZR;XT',
'ehT;#:',
'4sTUG,T*#,MrNoWJJ<#,',
'DKM3Ll!#',
'TFPPSjJ)#,F7NoAwPN!#',
'4sCwJ<VU#,AyN;J13I#,',
'J13KAyAzRTSyLl!#',
'G,5 at 1d;/#,8wU}PTC|#,',
'1#:O4s:M#,DK@{Uj!#',
'JW3vJ|No#,Mr9zOLD~!#'
    ), 

    'iso-ir-165'=> join('',
'!6RW>-!75ZR;XT',
'ehT;#:',
'4sTUG,T*#,MrNoWJJ<#,',
'DKM3Ll!#',
'TFPPSjJ)#,F7NoAwPN!#',
'4sCwJ<VU#,AyN;J13I#,',
'J13KAyAzRTSyLl!#',
'G,5 at 1d;/#,8wU}PTC|#,',
'1#:O4s:M#,DK@{Uj!#',
'JW3vJ|No#,Mr9zOLD~!#'
    ), 
});

run_tests('Simplified Chinese + ASCII', {
    'utf'	=> (
35937.26352.65306.10.
22825.34892.20581.65292.21531.23376.20197.33258.24378.19981.24687.12290.10.
28508.40857.21247.29992.65292.38451.22312.19979.20063.12290.32.
35265.40857.22312.30000.65292.24503.26045.26222.20063.12290.32.
32456.26085.20094.20094.65292.21453.22797.36947.20063.12290.10.
25110.36291.22312.28170.65292.36827.26080.21646.20063.12290.39134.
40857.22312.22825.65292.22823.20154.36896.20063.12290.32.
20130.40857.26377.24724.65292.30408.19981.21487.20037.20063.12290.10.
29992.20061.65292.22825.24503.19981.21487.20026.39318.20063.12290
    ),

    'cp936'	=> join(chr(10),
'ÏóÔ»£º',
'ÌìÐн¡£¬¾ý×ÓÒÔ×ÔÇ¿²»Ï¢¡£',
'DZÁúÎðÓã¬ÑôÔÚÏÂÒ²¡£ ¼ûÁúÔÚÌµÂÊ©ÆÕÒ²¡£ ÖÕÈÕǬǬ£¬·´¸´µÀÒ²¡£',
'»òÔ¾ÔÚÔ¨£¬½øÎÞ¾ÌÒ²¡£·ÉÁúÔÚÌ죬´óÈËÔìÒ²¡£ ¿ºÁúÓлڣ¬Ó¯²»¿É¾ÃÒ²¡£',
'Óþţ¬ÌìµÂ²»¿ÉΪÊ×Ò²¡£',
    ),

    'hz'	=> join(chr(10),
'~{OsT;#:~}',
'~{LlPP=!#,>}WSRTWTG?2;O"!#~}',
'~{G1AzNpSC#,QtTZOBR2!#~} ~{<{AzTZLo#,5BJ)FUR2!#~} ~{VUHUG,G,#,74845 at R2!#~}',
'~{;rT>TZT(#,=xN^>LR2!#7IAzTZLl#,4sHKTlR2!#~} ~{?:AzSP;Z#,S/2;?I>CR2!#~}',
'~{SC>E#,Ll5B2;?IN*JWR2!#~}',
    ),
});

run_tests('Traditional Chinese', {
    'utf',	=> 20094.65306.20803.12289.20136.12289.21033.12289.35998,
    'gb12345-raw'	=> 'G,#:T*!":`!"@{!"Uj',
    'gbk'	=> 'Ǭ£ºÔª¡¢ºà¡¢Àû¡¢Ø‘',
});

sub run_tests {
    my ($title, $tests) = @_;
    my $utf = delete $tests->{'utf'};

    # $enc = encoding, $str = content
    foreach my $enc (sort keys %{$tests}) {
	my $str = $tests->{$enc};

	is(Encode::decode($enc, $str), $utf, "[$enc] decode - $title");
	is(Encode::encode($enc, $utf), $str, "[$enc] encode - $title");

	my $str2 = $str;
	my $utf8 = Encode::encode('utf-8', $utf);

	Encode::from_to($str2, $enc, 'utf-8');
	is($str2, $utf8, "[$enc] from_to => utf8 - $title");

	Encode::from_to($utf8, 'utf-8', $enc); # convert $utf8 as $enc
	is($utf8, $str,  "[$enc] utf8 => from_to - $title");
    }
}

--- NEW FILE: Mod_EUCJP.pm ---
# $Id: Mod_EUCJP.pm,v 1.1 2006-12-05 04:26:37 dslinux_cayenne Exp $
# This file is in euc-jp
package Mod_EUCJP;
use encoding "euc-jp";
sub new {
  my $class = shift;
  my $str = shift || qw/½é´üʸ»úÎó/;
  my $self = bless { 
      str => '',
  }, $class;
  $self->set($str);
  $self;
}
sub set {
  my ($self,$str) = @_;
  $self->{str} = $str;
  $self;
}
sub str { shift->{str}; }
sub put { print shift->{str}; }
1;
__END__

--- NEW FILE: rt.pl ---
#!/usr/local/bin/perl
#
# $Id: rt.pl,v 1.1 2006-12-05 04:26:38 dslinux_cayenne Exp $
#

BEGIN {
    my $ucmdir  = "ucm";
    if ($ENV{'PERL_CORE'}){
        chdir 't';
        unshift @INC, '../lib';
        $ucmdir = "../ext/Encode/ucm";
    }
    require Config; import Config;
    if ($Config{'extensions'} !~ /\bEncode\b/) {
      print "1..0 # Skip: Encode was not built\n";
      exit 0;
    }
    if (ord("A") == 193) {
	print "1..0 # Skip: EBCDIC\n";
	exit 0;
    }
    use strict;
    require Test::More;
    our $DEBUG;
    our @ucm;
    unless(@ARGV){
	use File::Spec;
	Test::More->import(tests => 103);
	opendir my $dh, $ucmdir or die "$ucmdir:$!";
	@ucm = 
	    map {File::Spec->catfile($ucmdir, $_) } 
		sort grep {/\.ucm$/o} readdir($dh);
	closedir $dh;
    }else{
	Test::More->import("no_plan");
	$DEBUG = 1;
	@ucm = @ARGV;
    }
}

use strict;
use Encode qw/encode decode/;
our $DEBUG;
our @ucm;

for my $ucm (@ucm){
    my ($name, $nchar, $nrt, $nok) = rttest($ucm);
    $nok += 0;
    ok($nok == 0, "$ucm => $name ($nchar, $nrt, $nok)");
}

sub rttest{
    my $ucm = shift;
    my ($name, $nchar, $nrt, $nok);
    open my $rfh, "<$ucm" or die "$ucm:$!";
    # <U0000> \x00 |0 # <control>
    while(<$rfh>){
	s/#.*//o; /^$/ and next;
	unless ($name){
	    /^<code_set_name>\s+"([^\"]+)"/io or next;
	    $name = $1 and next;
	}else{
	    /^<U([0-9a-f]+)>\s+(\S+)\s+\|(\d)/io or next;
	    $nchar++;
	    $3 == 0 or next;
	    $nrt++;
	    my $uni = chr(hex($1));
	    my $enc = eval qq{ "$2" };
	    decode($name, $enc) eq $uni or $nok++;
	    encode($name, $uni) eq $enc or $nok++;
	}
    }
    return($name, $nchar, $nrt, $nok);
}
__END__

--- NEW FILE: fallback.t ---
BEGIN {
    if ($ENV{'PERL_CORE'}){
        chdir 't';
        unshift @INC, '../lib';
    }
    require Config; import Config;
    if ($Config{'extensions'} !~ /\bEncode\b/) {
      print "1..0 # Skip: Encode was not built\n";
      exit 0;
    }
    if (ord("A") == 193) {
	print "1..0 # Skip: EBCDIC\n";
	exit 0;
    }
    $| = 1;
}

use strict;
#use Test::More qw(no_plan);
use Test::More tests => 44;
use Encode q(:all);

my $uo = '';
my $nf  = '';
my ($af, $aq, $ap, $ah, $ax, $uf, $uq, $up, $uh, $ux, $ac, $uc);
for my $i (0x20..0x7e){
    $uo .= chr($i);
}
$af = $aq = $ap = $ah = $ax = $ac =
$uf = $uq = $up = $uh = $ux = $uc =
$nf = $uo;

my $residue = '';
for my $i (0x80..0xff){
    $uo   .= chr($i);
    $residue    .= chr($i);
    $af .= '?';
    $uf .= "\x{FFFD}";
    $ap .= sprintf("\\x{%04x}", $i);
    $up .= sprintf("\\x%02X", $i);
    $ah .= sprintf("&#%d;", $i);
    $uh .= sprintf("\\x%02X", $i);
    $ax .= sprintf("&#x%x;", $i);
    $ux .= sprintf("\\x%02X", $i);
    $ac .= sprintf("<U+%04X>", $i);
    $uc .= sprintf("[%02X]", $i);
}

my $ao = $uo;
utf8::upgrade($uo);

my $ascii  = find_encoding('ascii');
my $utf8   = find_encoding('utf8');

my $src = $uo;
my $dst = $ascii->encode($src, FB_DEFAULT);
is($dst, $af, "FB_DEFAULT ascii");
is($src, $uo, "FB_DEFAULT residue ascii");

$src = $ao;
$dst = $utf8->decode($src, FB_DEFAULT);
is($dst, $uf, "FB_DEFAULT utf8");
is($src, $ao, "FB_DEFAULT residue utf8");

$src = $uo;
eval{ $dst = $ascii->encode($src, FB_CROAK) };
like($@, qr/does not map to ascii/o, "FB_CROAK ascii");
is($src, $uo, "FB_CROAK residue ascii");

$src = $ao;
eval{ $dst = $utf8->decode($src, FB_CROAK) };
like($@, qr/does not map to Unicode/o, "FB_CROAK utf8");
is($src, $ao, "FB_CROAK residue utf8");

$src = $nf;
eval{ $dst = $ascii->encode($src, FB_CROAK) };
is($@, '', "FB_CROAK on success ascii");
is($src, '', "FB_CROAK on success residue ascii");

$src = $nf;
eval{ $dst = $utf8->decode($src, FB_CROAK) };
is($@, '', "FB_CROAK on success utf8");
is($src, '', "FB_CROAK on success residue utf8");

$src = $uo;
$dst = $ascii->encode($src, FB_QUIET);
is($dst, $aq,   "FB_QUIET ascii");
is($src, $residue, "FB_QUIET residue ascii");

$src = $ao;
$dst = $utf8->decode($src, FB_QUIET);
is($dst, $uq,   "FB_QUIET utf8");
is($src, $residue, "FB_QUIET residue utf8");

{
    my $message = '';
    local $SIG{__WARN__} = sub { $message = $_[0] };

    $src = $uo;
    $dst = $ascii->encode($src, FB_WARN);
    is($dst, $aq,   "FB_WARN ascii");
    is($src, $residue, "FB_WARN residue ascii");
    like($message, qr/does not map to ascii/o, "FB_WARN message ascii");

    $message = '';
    $src = $ao;
    $dst = $utf8->decode($src, FB_WARN);
    is($dst, $uq,   "FB_WARN utf8");
    is($src, $residue, "FB_WARN residue utf8");
    like($message, qr/does not map to Unicode/o, "FB_WARN message utf8");

    $message = '';
    $src = $uo;
    $dst = $ascii->encode($src, WARN_ON_ERR);
    is($dst, $af, "WARN_ON_ERR ascii");
    is($src, '',  "WARN_ON_ERR residue ascii");
    like($message, qr/does not map to ascii/o, "WARN_ON_ERR message ascii");

    $message = '';
    $src = $ao;
    $dst = $utf8->decode($src, WARN_ON_ERR);
    is($dst, $uf, "WARN_ON_ERR utf8");
    is($src, '',  "WARN_ON_ERR residue utf8");
    like($message, qr/does not map to Unicode/o, "WARN_ON_ERR message ascii");
}

$src = $uo;
$dst = $ascii->encode($src, FB_PERLQQ);
is($dst, $ap, "FB_PERLQQ encode");
is($src, $uo, "FB_PERLQQ residue encode");

$src = $ao;
$dst = $ascii->decode($src, FB_PERLQQ);
is($dst, $up, "FB_PERLQQ decode");
is($src, $ao, "FB_PERLQQ residue decode");

$src = $uo;
$dst = $ascii->encode($src, FB_HTMLCREF);
is($dst, $ah, "FB_HTMLCREF encode");
is($src, $uo, "FB_HTMLCREF residue encode");

$src = $ao;
$dst = $ascii->decode($src, FB_HTMLCREF);
is($dst, $uh, "FB_HTMLCREF decode");
is($src, $ao, "FB_HTMLCREF residue decode");

$src = $uo;
$dst = $ascii->encode($src, FB_XMLCREF);
is($dst, $ax, "FB_XMLCREF encode");
is($src, $uo, "FB_XMLCREF residue encode");

$src = $ao;
$dst = $ascii->decode($src, FB_XMLCREF);
is($dst, $ux, "FB_XMLCREF decode");
is($src, $ao, "FB_XMLCREF residue decode");

$src = $uo;
$dst = $ascii->encode($src, sub{ sprintf "<U+%04X>", shift });
is($dst, $ac, "coderef encode");
is($src, $uo, "coderef residue encode");

$src = $ao;
$dst = $ascii->decode($src, sub{ sprintf "[%02X]", shift });
is($dst, $uc, "coderef decode");
is($src, $ao, "coderef residue decode");

--- NEW FILE: enc_eucjp.t ---
# $Id: enc_eucjp.t,v 1.1 2006-12-05 04:26:37 dslinux_cayenne Exp $
# This is the twin of enc_utf8.t .

BEGIN {
    require Config; import Config;
    if ($Config{'extensions'} !~ /\bEncode\b/) {
      print "1..0 # Skip: Encode was not built\n";
      exit 0;
    }
    unless (find PerlIO::Layer 'perlio') {
	print "1..0 # Skip: PerlIO was not built\n";
	exit 0;
    }
    if (ord("A") == 193) {
	print "1..0 # encoding pragma does not support EBCDIC platforms\n";
	exit(0);
    }
    if ($] <= 5.008 and !$Config{perl_patchlevel}){
	print "1..0 # Skip: Perl 5.8.1 or later required\n";
	exit 0;
    }
}

use encoding 'euc-jp';

my @c = (127, 128, 255, 256);

print "1.." . (scalar @c + 1) . "\n";

my @f;

for my $i (0..$#c) {
  no warnings 'pack';
  my $file = filename("f$i");
  push @f, $file;
  open(F, ">$file") or die "$0: failed to open '$file' for writing: $!";
  binmode(F, ":utf8");
  print F chr($c[$i]);
  print F pack("C" => $c[$i]);
  close F;
}

my $t = 1;

for my $i (0..$#c) {
  my $file = filename("f$i");
  open(F, "<$file") or die "$0: failed to open '$file' for reading: $!";
  binmode(F, ":utf8");
  my $c = <F>;
  my $o = ord($c);
  print $o == $c[$i] ? "ok $t - utf8 I/O $c[$i]\n" : "not ok $t - utf8 I/O $c[$i]: $o != $c[$i]\n";
  $t++;
}

my $f = filename("f" . @f);

push @f, $f;
open(F, ">$f") or die "$0: failed to open '$f' for writing: $!";
binmode(F, ":raw"); # Output raw bytes.
print F chr(128); # Output illegal UTF-8.
close F;
open(F, $f) or die "$0: failed to open '$f' for reading: $!";
binmode(F, ":encoding(utf-8)");
{
	local $^W = 1;
	local $SIG{__WARN__} = sub { $a = shift };
	eval { <F> }; # This should get caught.
}
close F;
print $a =~ qr{^utf8 "\\x80" does not map to Unicode} ?
  "ok $t - illegal utf8 input\n" : "not ok $t - illegal utf8 input: a = " . unpack("H*", $a) . "\n";

# On VMS temporary file names like "f0." may be more readable than "f0" since
# "f0" could be a logical name pointing elsewhere.
sub filename {
    my $name = shift;
    $name .= '.' if $^O eq 'VMS';
    return $name;
}

END {
  1 while unlink @f;
}

--- NEW FILE: mime-header.t ---
#
# $Id: mime-header.t,v 1.1 2006-12-05 04:26:38 dslinux_cayenne Exp $
# This script is written in utf8
#
BEGIN {
    if ($ENV{'PERL_CORE'}){
        chdir 't';
        unshift @INC, '../lib';
    }
    require Config; import Config;
    if ($Config{'extensions'} !~ /\bEncode\b/) {
      print "1..0 # Skip: Encode was not built\n";
      exit 0;
    }
    if (ord("A") == 193) {
	print "1..0 # Skip: EBCDIC\n";
	exit 0;
    }
    $| = 1;
}

no utf8;

use strict;
#use Test::More qw(no_plan);
use Test::More tests => 10;
use_ok("Encode::MIME::Header");

my $eheader =<<'EOS';
From: =?US-ASCII?Q?Keith_Moore?= <moore at cs.utk.edu>
To: =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= <keld at dkuug.dk>
CC: =?ISO-8859-1?Q?Andr=E9?= Pirard <PIRARD at vm1.ulg.ac.be>
Subject: =?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=
 =?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=
EOS

my $dheader=<<"EOS";
From: Keith Moore <moore\@cs.utk.edu>
To: Keld J\xF8rn Simonsen <keld\@dkuug.dk>
CC: Andr\xE9 Pirard <PIRARD\@vm1.ulg.ac.be>
Subject: If you can read this you understand the example.
EOS

is(Encode::decode('MIME-Header', $eheader), $dheader, "decode ASCII (RFC2047)");

use utf8;

my $uheader =<<'EOS';
From: =?US-ASCII?Q?Keith_Moore?= <moore at cs.utk.edu>
To: =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= <keld at dkuug.dk>
CC: =?ISO-8859-1?Q?Andr=E9?= Pirard <PIRARD at vm1.ulg.ac.be>
Subject: =?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=
 =?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=
EOS

is(Encode::decode('MIME-Header', $uheader), $dheader, "decode UTF-8 (RFC2047)");


$dheader=<<'EOS';
From: 小飼 弾 <dankogai at dan.co.jp>
To: dankogai at dan.co.jp (小飼=Kogai, 弾=Dan)
Subject: 漢字、カタカナ、ひらがなを含む、非常に長いタイトル行が一体全体どのようにしてEncodeされるのか?
EOS

my $bheader =<<'EOS';
From:=?UTF-8?B?IOWwj+mjvCDlvL4g?=<dankogai at dan.co.jp>
To: dankogai at dan.co.jp (=?UTF-8?B?5bCP6aO8?==Kogai,=?UTF-8?B?IOW8vg==?==Dan
 )
Subject:
 =?UTF-8?B?IOa8ouWtl+OAgeOCq+OCv+OCq+ODiuOAgeOBsuOCieOBjOOBquOCkuWQq+OCgA==?=
 =?UTF-8?B?44CB6Z2e5bi444Gr6ZW344GE44K/44Kk44OI44Or6KGM44GM5LiA5L2T5YWo?=
 =?UTF-8?B?5L2T44Gp44Gu44KI44GG44Gr44GX44GmRW5jb2Rl44GV44KM44KL44Gu44GL?=
 =?UTF-8?B?77yf?=
EOS

my $qheader=<<'EOS';
From:=?UTF-8?Q?=20=E5=B0=8F=E9=A3=BC=20=E5=BC=BE=20?=<dankogai at dan.co.jp>
To: dankogai at dan.co.jp (=?UTF-8?Q?=E5=B0=8F=E9=A3=BC?==Kogai,
 =?UTF-8?Q?=20=E5=BC=BE?==Dan)
Subject:
 =?UTF-8?Q?=20=E6=BC=A2=E5=AD=97=E3=80=81=E3=82=AB=E3=82=BF=E3=82=AB?=
 =?UTF-8?Q?=E3=83=8A=E3=80=81=E3=81=B2=E3=82=89=E3=81=8C=E3=81=AA=E3=82=92?=
 =?UTF-8?Q?=E5=90=AB=E3=82=80=E3=80=81=E9=9D=9E=E5=B8=B8=E3=81=AB=E9=95=B7?=
 =?UTF-8?Q?=E3=81=84=E3=82=BF=E3=82=A4=E3=83=88=E3=83=AB=E8=A1=8C=E3=81=8C?=
 =?UTF-8?Q?=E4=B8=80=E4=BD=93=E5=85=A8=E4=BD=93=E3=81=A9=E3=81=AE=E3=82=88?=
 =?UTF-8?Q?=E3=81=86=E3=81=AB=E3=81=97=E3=81=A6Encode=E3=81=95?=
 =?UTF-8?Q?=E3=82=8C=E3=82=8B=E3=81=AE=E3=81=8B=EF=BC=9F?=
EOS

is(Encode::decode('MIME-Header', $bheader), $dheader, "decode B");
is(Encode::decode('MIME-Header', $qheader), $dheader, "decode Q");
is(Encode::encode('MIME-B', $dheader)."\n", $bheader, "encode B");
is(Encode::encode('MIME-Q', $dheader)."\n", $qheader, "encode Q");

$dheader = "What is =?UTF-8?B?w4RwZmVs?= ?";
$bheader = "What is =?UTF-8?B?PT9VVEYtOD9CP3c0UndabVZzPz0=?= ?";
$qheader = "What is =?UTF-8?Q?=3D=3FUTF=2D8=3FB=3Fw4RwZmVs=3F=3D?= ?";
is(Encode::encode('MIME-B', $dheader), $bheader, "Double decode B");
is(Encode::encode('MIME-Q', $dheader), $qheader, "Double decode Q");
{
    # From: Dave Evans <dave at rudolf.org.uk>
    # Subject: Bug in Encode::MIME::Header
    # Message-Id: <3F43440B.7060606 at rudolf.org.uk>
    use charnames ":full";
    my $pound_1024 = "\N{POUND SIGN}1024";
    is(Encode::encode('MIME-Q' => $pound_1024), '=?UTF-8?Q?=C2=A31024?=',
       'pound 1024');
}
__END__;

--- NEW FILE: enc_module.enc ---
(This appears to be a binary file; contents omitted.)

--- NEW FILE: grow.t ---
#!../perl
our $POWER;
BEGIN {
     if ($ENV{'PERL_CORE'}){
         chdir 't';
         unshift @INC, '../lib';
     }
     require Config; import Config;
     if ($Config{'extensions'} !~ /\bEncode\b/) {
         print "1..0 # Skip: Encode was not built\n";
             exit 0;
     }
     $POWER = 12; # up to 1 MB.  You may adjust the figure here
}

use strict;
use Encode;

my $seed = "";
for my $i (0x00..0xff){
     my $c = chr($i);
     $seed .= ($c =~ /^\p{IsPrint}/o) ? $c : " ";
}

use Test::More tests => $POWER*2;
my $octs = $seed;
use bytes ();
for my $i (1..$POWER){
     $octs .= $octs;
     my $len = bytes::length($octs);
     my $utf8 = Encode::decode('latin1', $octs);
     ok(1, "decode $len bytes");
     is($octs,
        Encode::encode('latin1', $utf8),
        "encode $len bytes");
}
__END__



--- NEW FILE: Encode.t ---
BEGIN {
    if ($ENV{'PERL_CORE'}){
        chdir 't';
        unshift @INC, '../lib';
    }
    if (ord("A") == 193) {
        print "1..0 # Skip: EBCDIC\n";
        exit 0;
    }
    require Config; import Config;
    if ($Config{'extensions'} !~ /\bEncode\b/) {
      print "1..0 # Skip: Encode was not built\n";
      exit 0;
    }
}
use strict;
use Test;
use Encode qw(from_to encode decode
	      encode_utf8 decode_utf8
	      find_encoding is_utf8);
use charnames qw(greek);
my @encodings = grep(/iso-?8859/,Encode::encodings());
my $n = 2;
my @character_set = ('0'..'9', 'A'..'Z', 'a'..'z');
my @source = qw(ascii iso8859-1 cp1250);
my @destiny = qw(cp1047 cp37 posix-bc);
my @ebcdic_sets = qw(cp1047 cp37 posix-bc);
plan test => 38+$n*@encodings + 2*@source*@destiny*@character_set + 2*@ebcdic_sets*256 + 6 + 2;
my $str = join('',map(chr($_),0x20..0x7E));
my $cpy = $str;
ok(length($str),from_to($cpy,'iso8859-1','Unicode'),"Length Wrong");
ok($cpy,$str,"ASCII mangled by translating from iso8859-1 to Unicode");
$cpy = $str;
ok(from_to($cpy,'Unicode','iso8859-1'),length($str),"Length wrong");
ok($cpy,$str,"ASCII mangled by translating from Unicode to iso8859-1");

$str = join('',map(chr($_),0xa0..0xff));
$cpy = $str;
ok(length($str),from_to($cpy,'iso8859-1','Unicode'),"Length Wrong");

my $sym = Encode->getEncoding('symbol');
my $uni = $sym->decode(encode(ascii => 'a'));
ok("\N{alpha}",substr($uni,0,1),"alpha does not map to symbol 'a'");
$str = $sym->encode("\N{Beta}");
ok("B",decode(ascii => substr($str,0,1)),"Symbol 'B' does not map to Beta");

foreach my $enc (qw(symbol dingbats ascii), at encodings)
 {
  my $tab = Encode->getEncoding($enc);
  ok(1,defined($tab),"Could not load $enc");
  $str = join('',map(chr($_),0x20..0x7E));
  $uni = $tab->decode($str);
  $cpy = $tab->encode($uni);
  ok($cpy,$str,"$enc mangled translating to Unicode and back");
 }

# On ASCII based machines see if we can map several codepoints from
# three distinct ASCII sets to three distinct EBCDIC coded character sets.
# On EBCDIC machines see if we can map from three EBCDIC sets to three
# distinct ASCII sets.

my @expectation = (240..249, 193..201,209..217,226..233, 129..137,145..153,162..169);
if (ord('A') != 65) {
    my @temp = @destiny;
    @destiny = @source;
    @source = @temp;
    undef(@temp);
    @expectation = (48..57, 65..90, 97..122);
}

foreach my $to (@destiny)
 {
  foreach my $from (@source)
   {
    my @expected = @expectation;
    foreach my $chr (@character_set)
     {
      my $native_chr = $chr;
      my $cpy = $chr;
      my $rc = from_to($cpy,$from,$to);
      ok(1,$rc,"Could not translate from $from to $to");
      ok(ord($cpy),shift(@expected),"mangled translating $native_chr from $from to $to");
     }
   }
 }

# On either ASCII or EBCDIC machines ensure we can take the full one
# byte repetoire to EBCDIC sets and back.

my $enc_as = 'iso8859-1';
foreach my $enc_eb (@ebcdic_sets)
 {
  foreach my $ord (0..255)
   {
    $str = chr($ord);
    my $rc = from_to($str,$enc_as,$enc_eb);
    $rc += from_to($str,$enc_eb,$enc_as);
    ok($rc,2,"return code for $ord $enc_eb -> $enc_as -> $enc_eb was not obtained");
    ok($ord,ord($str),"$enc_as mangled translating $ord to $enc_eb and back");
   }
 }

my $mime = find_encoding('iso-8859-2');
ok(defined($mime),1,"Cannot find MIME-ish'iso-8859-2'");
my $x11 = find_encoding('iso8859-2');
ok(defined($x11),1,"Cannot find X11-ish 'iso8859-2'");
ok($mime,$x11,"iso8598-2 and iso-8859-2 not same");
my $spc = find_encoding('iso 8859-2');
ok(defined($spc),1,"Cannot find 'iso 8859-2'");
ok($spc,$mime,"iso 8859-2 and iso-8859-2 not same");

for my $i (256,128,129,256)
 {
  my $c = chr($i);
  my $s = "$c\n".sprintf("%02X",$i);
  ok(utf8::valid($s),1,"concat of $i botched");
  utf8::upgrade($s);
  ok(utf8::valid($s),1,"concat of $i botched");
 }

# Spot check a few points in/out of utf8
for my $i (ord('A'),128,256,0x20AC)
 {
  my $c = chr($i);
  my $o = encode_utf8($c);
  ok(decode_utf8($o),$c,"decode_utf8 not inverse of encode_utf8 for $i");
  ok(encode('utf8',$c),$o,"utf8 encode by name broken for $i");
  ok(decode('utf8',$o),$c,"utf8 decode by name broken for $i");
 }


# is_utf8

ok(  is_utf8("\x{100}"));
ok(! is_utf8("a"));
ok(! is_utf8(""));
"\x{100}" =~ /(.)/;
ok(  is_utf8($1)); # ID 20011127.151
$a = $1;
ok(  is_utf8($a));
$a = "\x{100}";
chop $a;
ok(  is_utf8($a)); # weird but true: an empty UTF-8 string

# non-string arguments
package Encode::Dummy;
use overload q("") => sub { $_[0]->[0] };
sub new { my $class = shift; bless [ @_  ] => $class }
package main;
ok(decode(latin1 => Encode::Dummy->new("foobar")), "foobar");
ok(encode(utf8   => Encode::Dummy->new("foobar")), "foobar");

--- NEW FILE: mime_header_iso2022jp.t ---

use Test::More tests => 14;

use strict;
use Encode;

BEGIN{
	use_ok('Encode::MIME::Header::ISO_2022_JP');
}

require_ok('Encode::MIME::Header::ISO_2022_JP');

#  below codes are from mime.t in Jcode

my %mime = (
	"´Á»ú¡¢¥«¥¿¥«¥Ê¡¢¤Ò¤é¤¬¤Ê"
	 => "=?ISO-2022-JP?B?GyRCNEE7eiEiJSslPyUrJUohIiRSJGkkLCRKGyhC?=",
	"foo bar"
	 => "foo bar",
	"´Á»ú¡¢¥«¥¿¥«¥Ê¡¢¤Ò¤é¤¬¤Ê¤Îº®¤¸¤Ã¤¿Subject Header."
	 => "=?ISO-2022-JP?B?GyRCNEE7eiEiJSslPyUrJUohIiRSJGkkLCRKJE46LiQ4JEMkPxsoQlN1?=\n =?ISO-2022-JP?B?YmplY3Q=?= Header.",
);


for my $k (keys %mime){
    $mime{"$k\n"} = $mime{$k} . "\n";
}


for my $decoded (sort keys %mime){
	my $encoded = $mime{$decoded};

	my $header = Encode::encode('MIME-Header-ISO_2022_JP', decode('euc-jp', $decoded));
	my $utf8   = Encode::decode('MIME-Header', $header);

	is(encode('euc-jp', $utf8), $decoded);
	is($header, $encoded);
}

__END__

--- NEW FILE: ksc5601.enc ---
(This appears to be a binary file; contents omitted.)

--- NEW FILE: gb2312.enc ---
(This appears to be a binary file; contents omitted.)

--- NEW FILE: jisx0201.enc ---
(This appears to be a binary file; contents omitted.)

--- NEW FILE: jisx0208.utf ---
(This appears to be a binary file; contents omitted.)

--- NEW FILE: jisx0212.utf ---
(This appears to be a binary file; contents omitted.)

--- NEW FILE: CJKT.t ---
BEGIN {
    if ($ENV{'PERL_CORE'}){
        chdir 't';
        unshift @INC, '../lib';
    }
    require Config; import Config;
    if ($Config{'extensions'} !~ /\bEncode\b/) {
      print "1..0 # Skip: Encode was not built\n";
      exit 0;
    }
    if (ord("A") == 193) {
	print "1..0 # Skip: EBCDIC\n";
	exit 0;
    }
# should work w/o PerlIO now!
#    unless (PerlIO::Layer->find('perlio')){
#	print "1..0 # Skip: PerlIO required\n";
#	exit 0;
#   }
    $| = 1;
}
use strict;
use Test::More tests => 60;
use Encode;
use File::Basename;
use File::Spec;
use File::Compare qw(compare_text);
our $DEBUG = shift || 0;

my %Charset =
    (
     'big5-eten'  => [qw(big5-eten)],
     'big5-hkscs' => [qw(big5-hkscs)],
     gb2312       => [qw(euc-cn hz)],
     jisx0201     => [qw(euc-jp shiftjis 7bit-jis)],
     jisx0208     => [qw(euc-jp shiftjis 7bit-jis iso-2022-jp iso-2022-jp-1)],
     jisx0212     => [qw(euc-jp 7bit-jis iso-2022-jp-1)],
     ksc5601      => [qw(euc-kr iso-2022-kr johab)],
    );


my $dir = dirname(__FILE__);
my $seq = 1;

for my $charset (sort keys %Charset){
    my ($src, $uni, $dst, $txt);

    my $transcoder = find_encoding($Charset{$charset}[0]) or die;

    my $src_enc = File::Spec->catfile($dir,"$charset.enc");
    my $src_utf = File::Spec->catfile($dir,"$charset.utf");
    my $dst_enc = File::Spec->catfile($dir,"$$.enc");
    my $dst_utf = File::Spec->catfile($dir,"$$.utf");

    open $src, "<$src_enc" or die "$src_enc : $!";
    
    if (PerlIO::Layer->find('perlio')){
	binmode($src, ":bytes"); # needed when :utf8 in default open layer
    }

    $txt = join('',<$src>);
    close($src);
    
    eval{ $uni = $transcoder->decode($txt, 1) }; 
    $@ and print $@;
    ok(defined($uni),  "decode $charset"); $seq++;
    is(length($txt),0, "decode $charset completely"); $seq++;
    
    open $dst, ">$dst_utf" or die "$dst_utf : $!";
    if (PerlIO::Layer->find('perlio')){
	binmode($dst, ":utf8");
	print $dst $uni;
    }else{ # ugh!
	binmode($dst);
	my $raw = $uni; Encode::_utf8_off($raw);
	print $dst $raw;
    }

    close($dst); 
    is(compare_text($dst_utf, $src_utf), 0, "$dst_utf eq $src_utf")
	or ($DEBUG and rename $dst_utf, "$dst_utf.$seq");
    $seq++;
    
    open $src, "<$src_utf" or die "$src_utf : $!";
    if (PerlIO::Layer->find('perlio')){
	binmode($src, ":utf8");
	$uni = join('', <$src>);
    }else{ # ugh!
	binmode($src);
	$uni = join('', <$src>);
	Encode::_utf8_on($uni);
    }
    close $src;

    my $unisave = $uni;
    eval{ $txt = $transcoder->encode($uni,1) };    
    $@ and print $@;
    ok(defined($txt),   "encode $charset"); $seq++;
    is(length($uni), 0, "encode $charset completely");  $seq++;
    $uni = $unisave;

    open $dst,">$dst_enc" or die "$dst_utf : $!";
    binmode($dst);
    print $dst $txt;
    close($dst); 
    is(compare_text($src_enc, $dst_enc), 0 => "$dst_enc eq $src_enc")
	or ($DEBUG and rename $dst_enc, "$dst_enc.$seq");
    $seq++;
    
    unlink($dst_utf, $dst_enc);

    for my $encoding (@{$Charset{$charset}}){
	my $rt = decode($encoding, encode($encoding, $uni));
	is ($rt, $uni, "RT $encoding");
    }
}

--- NEW FILE: perlio.t ---
BEGIN {
    if ($ENV{'PERL_CORE'}){
        chdir 't';
        unshift @INC, '../lib';
    }
    require Config; import Config;
    if ($Config{'extensions'} !~ /\bEncode\b/) {
      print "1..0 # Skip: Encode was not built\n";
      exit 0;
    }
    if (ord("A") == 193) {
	print "1..0 # Skip: EBCDIC\n";
	exit 0;
    }
    unless (PerlIO::Layer->find('perlio')){
        print "1..0 # Skip: PerlIO required\n";
        exit 0;
    }
    $| = 1;
}

use strict;
use File::Basename;
use File::Spec;
use File::Compare qw(compare_text);
use File::Copy;
use FileHandle;

#use Test::More qw(no_plan);
use Test::More tests => 38;

our $DEBUG = 0;

use Encode (":all");
{
    no warnings;
    @ARGV and $DEBUG = shift;
    #require Encode::JP::JIS7;
    #require Encode::KR::2022_KR;
    #$Encode::JP::JIS7::DEBUG = $DEBUG;
}

my $seq = 0;
my $dir = dirname(__FILE__);

my %e = 
    (
     jisx0208 => [ qw/euc-jp shiftjis 7bit-jis iso-2022-jp iso-2022-jp-1/],
     ksc5601  => [ qw/euc-kr/],
     gb2312   => [ qw/euc-cn hz/],
    );

$/ = "\x0a"; # may fix VMS problem for test #28 and #29

for my $src (sort keys %e) {
    my $ufile = File::Spec->catfile($dir,"$src.utf");
    open my $fh, "<:utf8", $ufile or die "$ufile : $!";
    my @uline = <$fh>;
    my $utext = join('' => @uline);
    close $fh;

    for my $e (@{$e{$src}}){
	my $sfile = File::Spec->catfile($dir,"$$.sio");
	my $pfile = File::Spec->catfile($dir,"$$.pio");
    
	# first create a file without perlio
	dump2file($sfile, &encode($e, $utext, 0));
    
	# then create a file via perlio without autoflush

    SKIP:{
	    skip "$e: !perlio_ok", 4 unless (perlio_ok($e) or $DEBUG);
	    no warnings 'uninitialized';
	    open $fh, ">:encoding($e)", $pfile or die "$sfile : $!";
	    $fh->autoflush(0);
	    print $fh $utext;
	    close $fh;
	    $seq++;
	    is(compare_text($sfile, $pfile), 0 => ">:encoding($e)");
	    if ($DEBUG){
		copy $sfile, "$sfile.$seq";
		copy $pfile, "$pfile.$seq";
	    }
	    
	    # this time print line by line.
	    # works even for ISO-2022 but not ISO-2022-KR
	    open $fh, ">:encoding($e)", $pfile or die "$sfile : $!";
	    $fh->autoflush(1);
	    for my $l (@uline) {
		print $fh $l;
	    }
	    close $fh;
	    $seq++;
	    is(compare_text($sfile, $pfile), 0 => ">:encoding($e) by lines");
	    if ($DEBUG){
		copy $sfile, "$sfile.$seq";
		copy $pfile, "$pfile.$seq";
	    }
	    my $dtext;
	    open $fh, "<:encoding($e)", $pfile or die "$pfile : $!";
	    $fh->autoflush(0);
	    $dtext = join('' => <$fh>);
	    close $fh;
	    $seq++;
	    ok($utext eq $dtext, "<:encoding($e)");
	    if ($DEBUG){
		dump2file("$sfile.$seq", $utext);
		dump2file("$pfile.$seq", $dtext);
	    }
	    if (perlio_ok($e) or $DEBUG){
		$dtext = '';
		open $fh, "<:encoding($e)", $pfile or die "$pfile : $!";
		while(defined(my $l = <$fh>)) {
		    $dtext .= $l;
		}
		close $fh;
	    }
	    $seq++;
	    ok($utext eq $dtext,  "<:encoding($e) by lines");
	    if ($DEBUG){
		dump2file("$sfile.$seq", $utext);
		dump2file("$pfile.$seq", $dtext);
	    }
	}
     if ( ! $DEBUG ) {
            1 while unlink ($sfile);
            1 while unlink ($pfile);
        }
    }
}

# BOM Test

SKIP:{
    my $pev = PerlIO::encoding->VERSION;
    skip "PerlIO::encoding->VERSION = $pev <= 0.07 ", 6
	unless ($pev >= 0.07 or $DEBUG);

    my $file = File::Spec->catfile($dir,"jisx0208.utf");
    open my $fh, "<:utf8", $file or die "$file : $!";
    my $str = join('' => <$fh>);
    close $fh;
    my %bom = (
	       'UTF-16BE' => pack('n', 0xFeFF),
	       'UTF-16LE' => pack('v', 0xFeFF),
	       'UTF-32BE' => pack('N', 0xFeFF),
	       'UTF-32LE' => pack('V', 0xFeFF),
	      );
    # reading
    for my $utf (sort keys %bom){
	my $bomed = $bom{$utf} . encode($utf, $str);
	my $sfile = File::Spec->catfile($dir,".${utf}_${seq}_$$");
	dump2file($sfile, $bomed);
	my $utf_nobom = $utf; $utf_nobom =~ s/(LE|BE)$//o;
	# reading
	open $fh, "<:encoding($utf_nobom)", $sfile or die "$sfile : $!";
	my $cmp = join '' => <$fh>;
	close $fh;
	is($str, $cmp, "<:encoding($utf_nobom) eq $utf");
	unlink $sfile;  $seq++;
    }
    # writing
    for my $utf_nobom (qw/UTF-16 UTF-32/){
	my $utf = $utf_nobom . 'BE';
	my $sfile = File::Spec->catfile($dir,".${utf_nobom}_${seq}_$$");
	my $bomed = $bom{$utf} . encode($utf, $str);
	open  $fh, ">:encoding($utf_nobom)", $sfile or die "$sfile : $!";
	print $fh $str;
	close $fh;
	open my $fh, "<:bytes", $sfile or die "$sfile : $!";
	read $fh, my $cmp, -s $sfile;
	close $fh;
	use bytes ();
	ok($bomed eq $cmp, ">:encoding($utf_nobom) eq $utf");
	unlink $sfile; $seq++;
    }
}
sub dump2file{
    no warnings;
    open my $fh, ">", $_[0] or die "$_[0]: $!";
    binmode $fh;
    print $fh $_[1];
    close $fh;
}

--- NEW FILE: at-tw.t ---
BEGIN {
    if (! -d 'blib' and -d 't'){ chdir 't' };
    unshift @INC,  '../lib';
    require Config; import Config;
    if ($Config{'extensions'} !~ /\bEncode\b/) {
      print "1..0 # Skip: Encode was not built\n";
      exit 0;
    }
    unless (find PerlIO::Layer 'perlio') {
	print "1..0 # Skip: PerlIO was not built\n";
	exit 0;
    }
    if (ord("A") == 193) {
	print "1..0 # Skip: EBCDIC\n";
	exit 0;
    }
    $| = 1;
}

use strict;
use Test::More tests => 17;
use Encode;

no utf8; # we have raw Chinese encodings here

use_ok('Encode::TW');

# Since JP.t already tests basic file IO, we will just focus on
# internal encode / decode test here. Unfortunately, to test
# against all the UniHan characters will take a huge disk space,
# not to mention the time it will take, and the fact that Perl
# did not bundle UniHan.txt anyway.

# So, here we just test a typical snippet spanning multiple Unicode
# blocks, and hope it can point out obvious errors.

run_tests('Basic Big5 range', {
    'utf'	=> (
24093.39640.38525.20043.33495.35028.20846.65292.
26389.30343.32771.26352.20271.24248.65108.
25885.25552.35998.20110.23391.38508.20846.65292.
24799.24218.23493.21566.20197.38477.65108
    ),

    'big5'	=> (join('',
'«Ò°ª¶§¤§­]¸Ç¤¼¡A®Ó¬Ó¦Ò¤ê§B±e¡Q',
'Äá´£­s¤_©s³µ¤¼¡A±©©°±G§^¥H­°¡Q',
    )),

    'big5-hkscs'=> (join('',
'«Ò°ª¶§¤§­]¸Ç¤¼¡A®Ó¬Ó¦Ò¤ê§B±e¡Q',
'Äá´£­s¤_©s³µ¤¼¡A±©©°±G§^¥H­°¡Q',
    )),

    'cp950'	=> (join('',
'«Ò°ª¶§¤§­]¸Ç¤¼¡A®Ó¬Ó¦Ò¤ê§B±e¡Q',
'Äá´£­s¤_©s³µ¤¼¡A±©©°±G§^¥H­°¡Q',
    )),
});

run_tests('Hong Kong Extensions', {
    'utf'	=> (
24863.35613.25152.26377.20351.29992.32.80.101.114.108.32.
22021.26379.21451.65292.32102.25105.21707.22021.
25903.25345.12289.24847.35211.21644.40723.21237.
22914.26524.32232.30908.26377.20219.20309.37679.28431.
65292.35531.21578.35380.25105.21707.12290
    ),

    'big5-hkscs'	=> join('',
'·PÁ©Ҧ³¨Ï¥Î Perl ïªB¤Í¡Aµ¹§Ú’]ï¤ä«ù¡B·N¨£©M¹ªÀy',
'¦pªG½s½X¦³¥ô¦ó¿ùº|¡A½Ð§i¶D§Ú’]¡C'
    ),
});

sub run_tests {
    my ($title, $tests) = @_;
    my $utf = delete $tests->{'utf'};

    # $enc = encoding, $str = content
    foreach my $enc (sort keys %{$tests}) {
	my $str = $tests->{$enc};

	is(Encode::decode($enc, $str), $utf, "[$enc] decode - $title");
	is(Encode::encode($enc, $utf), $str, "[$enc] encode - $title");

	my $str2 = $str;
	my $utf8 = Encode::encode('utf-8', $utf);

	Encode::from_to($str2, $enc, 'utf-8');
	is($str2, $utf8, "[$enc] from_to => utf8 - $title");

	Encode::from_to($utf8, 'utf-8', $enc); # convert $utf8 as $enc
	is($utf8, $str,  "[$enc] utf8 => from_to - $title");
    }
}

--- NEW FILE: jisx0208.enc ---
(This appears to be a binary file; contents omitted.)

--- NEW FILE: enc_data.t ---
# $Id: enc_data.t,v 1.1 2006-12-05 04:26:37 dslinux_cayenne Exp $

BEGIN {
    require Config; import Config;
    if ($Config{'extensions'} !~ /\bEncode\b/) {
      print "1..0 # Skip: Encode was not built\n";
      exit 0;
    }
    unless (find PerlIO::Layer 'perlio') {
	print "1..0 # Skip: PerlIO was not built\n";
	exit 0;
    }
    if (ord("A") == 193) {
	print "1..0 # encoding pragma does not support EBCDIC platforms\n";
	exit(0);
    }
    if ($] <= 5.008 and !$Config{perl_patchlevel}){
	print "1..0 # Skip: Perl 5.8.1 or later required\n";
	exit 0;
    }
}


use strict;
use encoding 'euc-jp';
use Test::More tests => 4;

my @a;

while (<DATA>) {
  chomp;
  tr/¤¡-¤ó¥¡-¥ó/¥¡-¥ó¤¡-¤ó/;
  push @a, $_;
}

is(scalar @a, 3);
is($a[0], "¥³¥ì¥ÏDATA¤Õ¤¡¤¤¤ë¤Ï¤ó¤É¤ë¥Î¤Æ¤¹¤È¥Ç¥¹¡£");
is($a[1], "ÆüËܸ쥬¥Á¥ã¥ó¥ÈÊÑ´¹¥Ç¥­¥ë¥«");
is($a[2], "¥É¥¦¥«¥Î¤Æ¤¹¤È¥ò¥·¥Æ¥¤¥Þ¥¹¡£");

__DATA__
¤³¤ì¤ÏDATA¥Õ¥¡¥¤¥ë¥Ï¥ó¥É¥ë¤Î¥Æ¥¹¥È¤Ç¤¹¡£
ÆüËܸ줬¤Á¤ã¤ó¤ÈÊÑ´¹¤Ç¤­¤ë¤«
¤É¤¦¤«¤Î¥Æ¥¹¥È¤ò¤·¤Æ¤¤¤Þ¤¹¡£

--- NEW FILE: big5-hkscs.enc ---
(This appears to be a binary file; contents omitted.)

--- NEW FILE: utf8strict.t ---
#!../perl
our $DEBUG = @ARGV;
our (%ORD, %SEQ, $NTESTS);
BEGIN {
     if ($ENV{'PERL_CORE'}){
         chdir 't';
         unshift @INC, '../lib';
     }
     require Config; import Config;
     if ($Config{'extensions'} !~ /\bEncode\b/) {
         print "1..0 # Skip: Encode was not built\n";
	 exit 0;
     }
     if ($] <= 5.008 and !$Config{perl_patchlevel}){
	 print "1..0 # Skip: Perl 5.8.1 or later required\n";
	 exit 0;
     }
     # http://smontagu.damowmow.com/utf8test.html
     %ORD = (
	     0x00000080 => 0, # 2.1.2
	     0x00000800 => 0, # 2.1.3
	     0x00010000 => 0, # 2.1.4
	     0x00200000 => 1, # 2.1.5
	     0x00400000 => 1, # 2.1.6
	     0x0000007F => 0, # 2.2.1 -- unmapped okay
	     0x000007FF => 0, # 2.2.2
	     0x0000FFFF => 1, # 2.2.3
	     0x001FFFFF => 1, # 2.2.4
	     0x03FFFFFF => 1, # 2.2.5
	     0x7FFFFFFF => 1, # 2.2.6
	     0x0000D800 => 1, # 5.1.1
	     0x0000DB7F => 1, # 5.1.2
	     0x0000D880 => 1, # 5.1.3
	     0x0000DBFF => 1, # 5.1.4
	     0x0000DC00 => 1, # 5.1.5
	     0x0000DF80 => 1, # 5.1.6
	     0x0000DFFF => 1, # 5.1.7
	     # 5.2 "Paird UTF-16 surrogates skipped
	     # because utf-8-strict raises exception at the first one
	     0x0000FFFF => 1, # 5.3.1
	    );
     $NTESTS +=  scalar keys %ORD;
     %SEQ = (
	     qq/ed 9f bf/    => 0, # 2.3.1
	     qq/ee 80 80/    => 0, # 2.3.2
	     qq/f4 8f bf bf/ => 0, # 2.3.3
	     qq/f4 90 80 80/ => 1, # 2.3.4 -- out of range so NG
	     # "3 Malformed sequences" are checked by perl.
	     # "4 Overlong sequences"  are checked by perl.
	    );
     $NTESTS +=  scalar keys %SEQ;
}
use strict;
use Encode;
use utf8;
use Test::More tests => $NTESTS;

local($SIG{__WARN__}) = sub { $DEBUG and $@ and print STDERR $@ };

my $d = find_encoding("utf-8-strict");
for my $u (sort keys %ORD){
    my $c = chr($u);
    eval { $d->encode($c,1) };
    $DEBUG and $@ and warn $@;
    my $t = $@ ? 1 : 0;
    is($t, $ORD{$u}, sprintf "U+%04X", $u);
}
for my $s (sort keys %SEQ){
    my $o = pack "C*" => map {hex} split /\s+/, $s;
    eval { $d->decode($o,1) };
    $DEBUG and $@ and warn $@;
    my $t = $@ ? 1 : 0;
    is($t, $SEQ{$s}, $s);
}

__END__



--- NEW FILE: enc_utf8.t ---
# $Id: enc_utf8.t,v 1.1 2006-12-05 04:26:37 dslinux_cayenne Exp $
# This is the twin of enc_eucjp.t .

BEGIN {
    require Config; import Config;
    if ($Config{'extensions'} !~ /\bEncode\b/) {
      print "1..0 # Skip: Encode was not built\n";
      exit 0;
    }
    unless (find PerlIO::Layer 'perlio') {
	print "1..0 # Skip: PerlIO was not built\n";
	exit 0;
    }
    if (ord("A") == 193) {
	print "1..0 # encoding pragma does not support EBCDIC platforms\n";
	exit(0);
    }
}

use encoding 'utf8';

my @c = (127, 128, 255, 256);

print "1.." . (scalar @c + 1) . "\n";

my @f;

for my $i (0..$#c) {
  my $file = filename("f$i");
  push @f, $file;
  open(F, ">$file") or die "$0: failed to open '$file' for writing: $!";
  binmode(F, ":utf8");
  print F chr($c[$i]);
  close F;
}

my $t = 1;

for my $i (0..$#c) {
  my $file = filename("f$i");
  open(F, "<$file") or die "$0: failed to open '$file' for reading: $!";
  binmode(F, ":utf8");
  my $c = <F>;
  my $o = ord($c);
  print $o == $c[$i] ? "ok $t - utf8 I/O $c[$i]\n" : "not ok $t - utf8 I/O $c[$i]: $o != $c[$i]\n";
  $t++;
}

my $f = filename("f" . @f);

push @f, $f;
open(F, ">$f") or die "$0: failed to open '$f' for writing: $!";
binmode(F, ":raw"); # Output raw bytes.
print F chr(128); # Output illegal UTF-8.
close F;
open(F, $f) or die "$0: failed to open '$f' for reading: $!";
binmode(F, ":encoding(utf-8)");
{
	local $^W = 1;
	local $SIG{__WARN__} = sub { $a = shift };
	eval { <F> }; # This should get caught.
}
close F;
print $a =~ qr{^utf8 "\\x80" does not map to Unicode} ?
  "ok $t - illegal utf8 input\n" : "not ok $t - illegal utf8 input: a = " . unpack("H*", $a) . "\n";

# On VMS temporary file names like "f0." may be more readable than "f0" since
# "f0" could be a logical name pointing elsewhere.
sub filename {
    my $name = shift;
    $name .= '.' if $^O eq 'VMS';
    return $name;
}

END {
  1 while unlink @f;
}

--- NEW FILE: gsm0338.t ---
BEGIN {
    if ($ENV{'PERL_CORE'}){
        chdir 't';
        unshift @INC, '../lib';
    }
    require Config; import Config;
    if ($Config{'extensions'} !~ /\bEncode\b/) {
      print "1..0 # Skip: Encode was not built\n";
      exit 0;
    }
    $| = 1;
}

use strict;
use Test::More tests => 21;
use Encode;

# The specification of GSM 03.38 is not awfully clear.
# (http://www.unicode.org/Public/MAPPINGS/ETSI/GSM0338.TXT)
# The various combinations of 0x00 and 0x1B as leading bytes
# are unclear, as is the semantics of those bytes as standalone
# or as final single bytes.

sub t { is(decode("gsm0338", my $t = $_[0]), $_[1]) }

# t("\x00",     "\x00"); # ???

# "Round-trip".
t("\x41",     "\x41");

t("\x01",     "\xA3");
t("\x02",     "\x24");
t("\x03",     "\xA5");
t("\x09",     "\xE7");

t("\x00\x00", "\x00\x00"); # Maybe?
t("\x00\x1B", "\x40\xA0"); # Maybe?
t("\x00\x41", "\x40\x41");

# t("\x1B",     "\x1B"); # ???

# Escape with no special second byte is just a NBSP.
t("\x1B\x41", "\xA0\x41");

t("\x1B\x00", "\xA0\x40"); # Maybe?

# Special escape characters.
t("\x1B\x0A", "\x0C");
t("\x1B\x14", "\x5E");
t("\x1B\x28", "\x7B");
t("\x1B\x29", "\x7D");
t("\x1B\x2F", "\x5C");
t("\x1B\x3C", "\x5B");
t("\x1B\x3D", "\x7E");
t("\x1B\x3E", "\x5D");
t("\x1B\x40", "\x7C");
t("\x1B\x40", "\x7C");
t("\x1B\x65", "\x{20AC}");





--- NEW FILE: big5-eten.enc ---
(This appears to be a binary file; contents omitted.)

--- NEW FILE: guess.t ---
BEGIN {
    if ($ENV{'PERL_CORE'}){
        chdir 't';
        unshift @INC, '../lib';
    }
    require Config; import Config;
    if ($Config{'extensions'} !~ /\bEncode\b/) {
      print "1..0 # Skip: Encode was not built\n";
      exit 0;
    }
    if (ord("A") == 193) {
	print "1..0 # Skip: EBCDIC\n";
	exit 0;
    }
    $| = 1;
}

use strict;
use File::Basename;
use File::Spec;
use Encode qw(decode encode find_encoding _utf8_off);

#use Test::More qw(no_plan);
use Test::More tests => 29;
use_ok("Encode::Guess");
{
    no warnings;
    $Encode::Guess::DEBUG = shift || 0;
}

my $ascii  = join('' => map {chr($_)}(0x21..0x7e));
my $latin1 = join('' => map {chr($_)}(0xa1..0xfe));
my $utf8on  = join('' => map {chr($_)}(0x3000..0x30fe));
my $utf8off = $utf8on; _utf8_off($utf8off);
my $utf16 = encode('UTF-16', $utf8on);
my $utf32 = encode('UTF-32', $utf8on);

is(guess_encoding($ascii)->name, 'ascii', 'ascii');
like(guess_encoding($latin1), qr/No appropriate encoding/io, 'no ascii');
is(guess_encoding($latin1, 'latin1')->name, 'iso-8859-1', 'iso-8859-1');
is(guess_encoding($utf8on)->name, 'utf8', 'utf8 w/ flag');
is(guess_encoding($utf8off)->name, 'utf8', 'utf8 w/o flag');
is(guess_encoding($utf16)->name, 'UTF-16', 'UTF-16');
is(guess_encoding($utf32)->name, 'UTF-32', 'UTF-32');

my $jisx0201 = File::Spec->catfile(dirname(__FILE__), 'jisx0201.utf');
my $jisx0208 = File::Spec->catfile(dirname(__FILE__), 'jisx0208.utf');
my $jisx0212 = File::Spec->catfile(dirname(__FILE__), 'jisx0212.utf');

open my $fh, $jisx0208 or die "$jisx0208: $!";
binmode($fh);
$utf8off = join('' => <$fh>);
close $fh;
$utf8on = decode('utf8', $utf8off);

my @jp = qw(7bit-jis shiftjis euc-jp);

Encode::Guess->set_suspects(@jp);

for my $jp (@jp){
    my $test = encode($jp, $utf8on);
    is(guess_encoding($test)->name, $jp, "JP:$jp");
}

is (decode('Guess', encode('euc-jp', $utf8on)), $utf8on, "decode('Guess')");
eval{ encode('Guess', $utf8on) };
like($@, qr/not defined/io, "no encode()");

my %CJKT = 
    (
     'euc-cn'    => File::Spec->catfile(dirname(__FILE__), 'gb2312.utf'),
     'euc-jp'    => File::Spec->catfile(dirname(__FILE__), 'jisx0208.utf'),
     'euc-kr'    => File::Spec->catfile(dirname(__FILE__), 'ksc5601.utf'),
     'big5-eten' => File::Spec->catfile(dirname(__FILE__), 'big5-eten.utf'),
);

Encode::Guess->set_suspects(keys %CJKT);

for my $name (keys %CJKT){
    open my $fh, $CJKT{$name} or die "$CJKT{$name}: $!";
    binmode($fh);
    $utf8off = join('' => <$fh>);
    close $fh;

    my $test = encode($name, decode('utf8', $utf8off));
    is(guess_encoding($test)->name, $name, "CJKT:$name");
}

my $ambiguous =  "\x{5c0f}\x{98fc}\x{5f3e}";
my $english   = "The quick brown fox jumps over the black lazy dog.";
for my $utf (qw/UTF-16 UTF-32/){
    for my $bl (qw/BE LE/){
	my $test = encode("$utf$bl" => $english);
	is(guess_encoding($test)->name, "$utf$bl", "$utf$bl");
    }
}
for my $bl (qw/BE LE/){
    my $test = encode("UTF-16$bl" => $ambiguous);
    my $result = guess_encoding($test);
    ok(! ref($result), "UTF-16$bl:$result");
}



Encode::Guess->set_suspects();
for my $jp (@jp){
    # intentionally set $1 a priori -- see Changes
    my $test = "English";
    '$1' =~ m/^(.*)/o;
    is(guess_encoding($test, ($jp))->name, 'ascii', 
       "ascii vs $jp (\$1 messed)");
    $test = encode($jp, $test . "\n\x{65e5}\x{672c}\x{8a9e}");
    is(guess_encoding($test, ($jp))->name, 
       $jp, "$jp vs ascii (\$1 messed)");
}

__END__;

--- NEW FILE: unibench.pl ---
#!./perl

use strict;
use Encode;
use Benchmark qw(:all);

my $Count = shift @ARGV;
$Count ||= 16;
my @sizes = @ARGV || (1, 4, 16);

my %utf8_seed;
for my $i (0x00..0xff){
    my $c = chr($i);
    $utf8_seed{BMP} .= ($c =~ /^\p{IsPrint}/o) ? $c : " ";
}
utf8::upgrade($utf8_seed{BMP});

for my $i (0x00..0xff){
    my $c = chr(0x10000+$i);
    $utf8_seed{HIGH} .= ($c =~ /^\p{IsPrint}/o) ? $c : " ";
}
utf8::upgrade($utf8_seed{HIGH});

my %S;
for my $i (@sizes){
    my $sz = 256 * $i;
    for my $cp (qw(BMP HIGH)){
	$S{utf8}{$sz}{$cp}  = $utf8_seed{$cp} x $i;
	$S{utf16}{$sz}{$cp} = encode('UTF-16BE', $S{utf8}{$sz}{$cp});
    }
}

for my $i (@sizes){
    my $sz = $i * 256;
    my $count = $Count * int(256/$i);
    for my $cp (qw(BMP HIGH)){
	for my $op (qw(encode decode)){
	    my ($meth, $from, $to) = ($op eq 'encode') ?
		(\&encode, 'utf8', 'utf16') : (\&decode, 'utf16', 'utf8');
	    my $XS = sub {
		Encode::Unicode::set_transcoder("xs");  
		$meth->('UTF-16BE', $S{$from}{$sz}{$cp})
		     eq $S{$to}{$sz}{$cp} 
			 or die "$op,$from,$to,$sz,$cp";
	    };
	    my $modern = sub {
		Encode::Unicode::set_transcoder("modern");  
		$meth->('UTF-16BE', $S{$from}{$sz}{$cp})
		     eq $S{$to}{$sz}{$cp} 
			 or die "$op,$from,$to,$sz,$cp";
	    };
	    my $classic = sub {
		Encode::Unicode::set_transcoder("classic");  
		$meth->('UTF-16BE', $S{$from}{$sz}{$cp})
		     eq $S{$to}{$sz}{$cp} or 
			 die "$op,$from,$to,$sz,$cp";
	    };
	    print "---- $op length=$sz/range=$cp ----\n";
	    my $r = timethese($count,
		     {
		      "XS"      => $XS,
		      "Modern"  => $modern,
		      "Classic" => $classic,
		     },
		     'none',
		    );
	    cmpthese($r);
	}
    }
}

--- NEW FILE: jisx0212.enc ---
(This appears to be a binary file; contents omitted.)

--- NEW FILE: ksc5601.utf ---
(This appears to be a binary file; contents omitted.)

--- NEW FILE: jisx0201.utf ---
(This appears to be a binary file; contents omitted.)

--- NEW FILE: Aliases.t ---
#!../perl

BEGIN {
    if ($ENV{'PERL_CORE'}){
	chdir 't';
	unshift @INC, '../lib';
    }
    require Config; import Config;
    if ($Config{'extensions'} !~ /\bEncode\b/) {
	print "1..0 # Skip: Encode was not built\n";
	    exit 0;
    }
}

use strict;
use Encode;
use Encode::Alias;
my %a2c;
my $ON_EBCDIC;

sub init_a2c{
    %a2c = (
	    'US-ascii' => 'ascii',
	    'ISO-646-US' => 'ascii',
	    'UTF-8'    => 'utf-8-strict',
	    'UCS-2'    => 'UCS-2BE',
	    'UCS2'     => 'UCS-2BE',
	    'iso-10646-1' => 'UCS-2BE',
	    'ucs2-le'  => 'UCS-2LE',
	    'ucs2-be'  => 'UCS-2BE',
	    'utf16'    => 'UTF-16',
	    'utf32'    => 'UTF-32',
	    'utf16-be'  => 'UTF-16BE',
	    'utf32-be'  => 'UTF-32BE',
	    'utf16-le'  => 'UTF-16LE',
	    'utf32-le'  => 'UTF-32LE',
	    'UCS4-BE'   => 'UTF-32BE',
	    'UCS-4-LE'  => 'UTF-32LE',
	    'cyrillic' => 'iso-8859-5',
	    'arabic'   => 'iso-8859-6',
	    'greek'    => 'iso-8859-7',
	    'hebrew'   => 'iso-8859-8',
	    'thai'     => 'iso-8859-11',
	    'tis620'   => 'iso-8859-11',
	    'WinLatin1'     => 'cp1252',
	    'WinLatin2'     => 'cp1250',
	    'WinCyrillic'   => 'cp1251',
	    'WinGreek'      => 'cp1253',
	    'WinTurkish'    => 'cp1254',
	    'WinHebrew'     => 'cp1255',
	    'WinArabic'     => 'cp1256',
	    'WinBaltic'     => 'cp1257',
	    'WinVietnamese' => 'cp1258',
	    'koi8r'         => 'koi8-r',
	    'koi8u'         => 'koi8-u',
	    'ja_JP.euc'	    => $ON_EBCDIC ? '' : 'euc-jp',
	    'x-euc-jp'	    => $ON_EBCDIC ? '' : 'euc-jp',
	    'zh_CN.euc'	    => $ON_EBCDIC ? '' : 'euc-cn',
	    'x-euc-cn'	    => $ON_EBCDIC ? '' : 'euc-cn',
	    'ko_KR.euc'	    => $ON_EBCDIC ? '' : 'euc-kr',
	    'x-euc-kr'	    => $ON_EBCDIC ? '' : 'euc-kr',
	    'ujis'	    => $ON_EBCDIC ? '' : 'euc-jp',
	    'Shift_JIS'	    => $ON_EBCDIC ? '' : 'shiftjis',
	    'x-sjis'	    => $ON_EBCDIC ? '' : 'shiftjis',
	    'jis'	    => $ON_EBCDIC ? '' : '7bit-jis',
	    'big-5'	    => $ON_EBCDIC ? '' : 'big5-eten',
	    'zh_TW.Big5'    => $ON_EBCDIC ? '' : 'big5-eten',
	    'tca-big5'	    => $ON_EBCDIC ? '' : 'big5-eten',
	    'big5-hk'	    => $ON_EBCDIC ? '' : 'big5-hkscs',
	    'hkscs-big5'    => $ON_EBCDIC ? '' : 'big5-hkscs',
	    'GB_2312-80'    => $ON_EBCDIC ? '' : 'euc-cn',
	    'KS_C_5601-1987'    => $ON_EBCDIC ? '' : 'cp949',
	    #
	    'gb12345-raw'   => $ON_EBCDIC ? '' : 'gb12345-raw',
	    'gb2312-raw'    => $ON_EBCDIC ? '' : 'gb2312-raw',
	    'jis0201-raw'   => $ON_EBCDIC ? '' : 'jis0201-raw',
	    'jis0208-raw'   => $ON_EBCDIC ? '' : 'jis0208-raw',
	    'jis0212-raw'   => $ON_EBCDIC ? '' : 'jis0212-raw',
	    'ksc5601-raw'   => $ON_EBCDIC ? '' : 'ksc5601-raw',
	   );

    for my $i (1..11,13..16){
	$a2c{"ISO 8859 $i"} = "iso-8859-$i";
    }
    for my $i (1..10){
	$a2c{"ISO Latin $i"} = "iso-8859-$Encode::Alias::Latin2iso[$i]";
    }
    for my $k (keys %Encode::Alias::Winlatin2cp){
	my $v = $Encode::Alias::Winlatin2cp{$k};
	$a2c{"Win" . ucfirst($k)} = "cp" . $v;
	$a2c{"IBM-$v"} = $a2c{"MS-$v"} = "cp" . $v;
	$a2c{"cp-" . $v} = "cp" . $v;
    }
    my @a2c = keys %a2c;
    for my $k (@a2c){
	$a2c{uc($k)} = $a2c{$k};
	$a2c{lc($k)} = $a2c{$k};
	$a2c{lcfirst($k)} = $a2c{$k};
	$a2c{ucfirst($k)} = $a2c{$k};
    }
}

BEGIN{
    $ON_EBCDIC = ord("A") == 193;
    @ARGV and $ON_EBCDIC = $ARGV[0] eq 'EBCDIC';
    $Encode::ON_EBCDIC = $ON_EBCDIC;
    init_a2c();
}

if ($ON_EBCDIC){
    delete @Encode::ExtModule{
	qw(euc-cn gb2312 gb12345 gbk cp936 iso-ir-165 MacChineseSimp
	   euc-jp iso-2022-jp 7bit-jis shiftjis MacJapanese cp932
	   euc-kr ksc5601 cp949 MacKorean
	   big5	big5-hkscs cp950 MacChineseTrad
	   gb18030 big5plus euc-tw)
	};
}

use Test::More tests => (scalar keys %a2c) * 4;

print "# alias test;  \$ON_EBCDIC == $ON_EBCDIC\n";

foreach my $a (keys %a2c){	
    my $e = Encode::find_encoding($a);
    is((defined($e) and $e->name), $a2c{$a},$a)
	or warn "alias was $a";;
}

# now we override some of the aliases and see if it works fine

define_alias(
	     qr/ascii/i    => 'WinLatin1',
	     qr/cyrillic/i => 'WinCyrillic',
	     qr/arabic/i   => 'WinArabic',
	     qr/greek/i    => 'WinGreek',
	     qr/hebrew/i   => 'WinHebrew'
	    );

print "# alias test with alias overrides\n";

foreach my $a (keys %a2c){	
    my $e = Encode::find_encoding($a);
    is((defined($e) and $e->name), $a2c{$a}, "Override $a")
	or warn "alias was $a";
}

print "# alias undef test\n";

Encode::Alias->undef_aliases;
foreach my $a (keys %a2c){	
    my $e = Encode::find_encoding($a);
    ok(!defined($e) || $e->name =~ /-raw$/o,"Undef $a")
	or warn "alias was $a";
}

print "# alias reinit test\n";

Encode::Alias->init_aliases;
init_a2c();
foreach my $a (keys %a2c){	
    my $e = Encode::find_encoding($a);
    is((defined($e) and $e->name), $a2c{$a}, "Reinit $a")
	or warn "alias was $a";
}
__END__
for my $k (keys %a2c){
    $k =~ /[A-Z]/ and next;
    print "$k => $a2c{$k}\n";
}




--- NEW FILE: big5-eten.utf ---
(This appears to be a binary file; contents omitted.)

--- NEW FILE: encoding.t ---
BEGIN {
    require Config; import Config;
    if ($Config{'extensions'} !~ /\bEncode\b/) {
      print "1..0 # Skip: Encode was not built\n";
      exit 0;
    }
    unless (find PerlIO::Layer 'perlio') {
	print "1..0 # Skip: PerlIO was not built\n";
	exit 0;
    }
    if (ord("A") == 193) {
	print "1..0 # encoding pragma does not support EBCDIC platforms\n";
	exit(0);
    }
}

print "1..31\n";

use encoding "latin1"; # ignored (overwritten by the next line)
use encoding "greek";  # iso 8859-7 (no "latin" alias, surprise...)

# "greek" is "ISO 8859-7", and \xDF in ISO 8859-7 is
# \x{3AF} in Unicode (GREEK SMALL LETTER IOTA WITH TONOS),
# instead of \xDF in Unicode (LATIN SMALL LETTER SHARP S)

$a = "\xDF";
$b = "\x{100}";

print "not " unless ord($a) == 0x3af;
print "ok 1\n";

print "not " unless ord($b) == 0x100;
print "ok 2\n";

my $c;

$c = $a . $b;

print "not " unless ord($c) == 0x3af;
print "ok 3\n";

print "not " unless length($c) == 2;
print "ok 4\n";

print "not " unless ord(substr($c, 1, 1)) == 0x100;
print "ok 5\n";

print "not " unless ord(chr(0xdf)) == 0x3af; # spooky
print "ok 6\n";

print "not " unless ord(pack("C", 0xdf)) == 0x3af;
print "ok 7\n";

# we didn't break pack/unpack, I hope

print "not " unless unpack("C", pack("C", 0xdf)) == 0xdf;
print "ok 8\n";

# the first octet of UTF-8 encoded 0x3af 
print "not " unless unpack("C", chr(0xdf)) == 0xce;
print "ok 9\n";

print "not " unless unpack("U", pack("U", 0xdf)) == 0xdf;
print "ok 10\n";

print "not " unless unpack("U", chr(0xdf)) == 0x3af;
print "ok 11\n";

# charnames must still work
use charnames ':full';
print "not " unless ord("\N{LATIN SMALL LETTER SHARP S}") == 0xdf;
print "ok 12\n";

# combine

$c = "\xDF\N{LATIN SMALL LETTER SHARP S}" . chr(0xdf);

print "not " unless ord($c) == 0x3af;
print "ok 13\n";

print "not " unless ord(substr($c, 1, 1)) == 0xdf;
print "ok 14\n";

print "not " unless ord(substr($c, 2, 1)) == 0x3af;
print "ok 15\n";

# regex literals

print "not " unless "\xDF"    =~ /\x{3AF}/;
print "ok 16\n";

print "not " unless "\x{3AF}" =~ /\xDF/;
print "ok 17\n";

print "not " unless "\xDF"    =~ /\xDF/;
print "ok 18\n";

print "not " unless "\x{3AF}" =~ /\x{3AF}/;
print "ok 19\n";

# eq, cmp

my ($byte,$bytes,$U,$Ub,$g1,$g2,$l) = ( 
    pack("C*", 0xDF ),       # byte
    pack("C*", 0xDF, 0x20),  # ($bytes2 cmp $U) > 0
    pack("U*", 0x3AF),       # $U eq $byte
    pack("U*", 0xDF ),       # $Ub would eq $bytev w/o use encoding
    pack("U*", 0x3B1),       # ($g1 cmp $byte) > 0; === chr(0xe1)
    pack("U*", 0x3AF, 0x20), # ($g2 cmp $byte) > 0;
    pack("U*", 0x3AB),       # ($l  cmp $byte) < 0; === chr(0xdb)
);

# all the tests in this section that compare a byte encoded string 
# ato UTF-8 encoded are run in all possible vairants 
# all of the eq, ne, cmp operations tested,
# $v z $u tested as well as $u z $v

sub alleq($$){
    my ($a,$b)    =    (shift, shift);
     $a  eq  $b        &&     $b  eq  $a         && 
  !( $a  ne  $b )      &&  !( $b  ne  $a )       &&
   ( $a  cmp $b ) == 0 &&   ( $b  cmp $a ) == 0;
}
   
sub anyeq($$){
    my ($a,$b)    =    (shift, shift);
     $a  eq  $b        ||     $b  eq  $a         ||
  !( $a  ne  $b )      ||  !( $b  ne  $a )       ||
   ( $a  cmp $b ) == 0 ||   ( $b  cmp $a ) == 0;
}

sub allgt($$){
    my ($a,$b)    =    (shift, shift);
    ( $a cmp $b ) == 1 && ( $b cmp $a ) == -1;
}
#match the correct UTF-8 string
print "not " unless  alleq($byte, $U);
print "ok 20\n";

#do not match a wrong UTF-8 string
print "not " if anyeq($byte, $Ub);
print "ok 21\n";

#string ordering
print "not " unless allgt ( $g1,    $byte  )  &&
                    allgt ( $g2,    $byte  )  &&
                    allgt ( $byte,  $l     )  &&
                    allgt ( $bytes, $U     );
print "ok 22\n";

# upgrade, downgrade

my ($u,$v,$v2);
$u = $v = $v2 = pack("C*", 0xDF);
utf8::upgrade($v);                   #explicit upgrade
$v2 = substr( $v2."\x{410}", 0, -1); #implicit upgrade

# implicit upgrade === explicit upgrade
print "not "  if do{{use bytes; $v ne $v2}} || $v ne $v2;
print "ok 23\n";

# utf8::upgrade is transparent and does not break equality
print "not " unless alleq( $u, $v );
print "ok 24\n";

$u = $v = pack("C*", 0xDF);
utf8::upgrade($v);
#test for a roundtrip, we should get back from where we left
eval {utf8::downgrade( $v )};
print "not " if $@ !~ /^Wide / || do{{use bytes; $u eq $v}} || $u ne $v;
print "ok 25\n";

# some more eq, cmp

$byte=pack("C*", 0xDF);

print "not " unless pack("U*", 0x3AF) eq $byte;
print "ok 26\n";

print "not " if chr(0xDF) cmp $byte;
print "ok 27\n";

print "not " unless ((pack("U*", 0x3B0)       cmp $byte) ==  1) &&
                    ((pack("U*", 0x3AE)       cmp $byte) == -1) &&
                    ((pack("U*", 0x3AF, 0x20) cmp $byte) ==  1) &&
	            ((pack("U*", 0x3AF) cmp pack("C*",0xDF,0x20))==-1);
print "ok 28\n";


{
    # Used to core dump in 5.7.3
    no warnings; # so test goes noiselessly
    print ord(undef) == 0 ? "ok 29\n" : "not ok 29\n";
}

{
	my %h1;
	my %h2;
	$h1{"\xdf"}    = 41;
	$h2{"\x{3af}"} = 42;
	print $h1{"\x{3af}"} == 41 ? "ok 30\n" : "not ok 30\n";
	print $h2{"\xdf"}    == 42 ? "ok 31\n" : "not ok 31\n";
}

--- NEW FILE: gb2312.utf ---
(This appears to be a binary file; contents omitted.)

--- NEW FILE: Unicode.t ---
#
# $Id: Unicode.t,v 1.1 2006-12-05 04:26:37 dslinux_cayenne Exp $
#
# This script is written entirely in ASCII, even though quoted literals
# do include non-BMP unicode characters -- Are you happy, jhi?
#

BEGIN {
    require Config; import Config;
    if ($Config{'extensions'} !~ /\bEncode\b/) {
      print "1..0 # Skip: Encode was not built\n";
      exit 0;
    }
    if (ord("A") == 193) {
        print "1..0 # Skip: EBCDIC\n";
	exit 0;
    }
    $| = 1;
}

use strict;
#use Test::More 'no_plan';
use Test::More tests => 37;
use Encode qw(encode decode);

#
# see
# http://www.unicode.org/unicode/reports/tr19/
#

my $dankogai   = "\x{5c0f}\x{98fc}\x{3000}\x{5f3e}";
my $nasty      = "$dankogai\x{1abcd}";
my $fallback   = "$dankogai\x{fffd}";

#hi: (0x1abcd - 0x10000) / 0x400 + 0xD800 = 0xd82a
#lo: (0x1abcd - 0x10000) % 0x400 + 0xDC00 = 0xdfcd

my $n_16be = 
    pack("C*", map {hex($_)} qw<5c 0f 98 fc 30 00 5f 3e  d8 2a df cd>);
my $n_16le =
    pack("C*", map {hex($_)} qw<0f 5c fc 98 00 30 3e 5f  2a d8 cd df>);
my $f_16be = 
    pack("C*", map {hex($_)} qw<5c 0f 98 fc 30 00 5f 3e  ff fd>);
my $f_16le =
    pack("C*", map {hex($_)} qw<0f 5c fc 98 00 30 3e 5f  fd ff>);
my $n_32be =
    pack("C*", map {hex($_)} 
	 qw<00 00 5c 0f 00 00 98 fc 00 00 30 00 00 00 5f 3e  00 01 ab cd>);
my $n_32le = 
    pack("C*", map {hex($_)} 
	 qw<0f 5c 00 00 fc 98 00 00 00 30 00 00 3e 5f 00 00  cd ab 01 00>);

my $n_16bb = pack('n', 0xFeFF) . $n_16be;
my $n_16lb = pack('v', 0xFeFF) . $n_16le;
my $n_32bb = pack('N', 0xFeFF) . $n_32be;
my $n_32lb = pack('V', 0xFeFF) . $n_32le;

is($n_16be, encode('UTF-16BE', $nasty),  qq{encode UTF-16BE});
is($n_16le, encode('UTF-16LE', $nasty),  qq{encode UTF-16LE});
is($n_32be, encode('UTF-32BE', $nasty),  qq{encode UTF-32BE});
is($n_32le, encode('UTF-32LE', $nasty),  qq{encode UTF-16LE});

is($nasty,  decode('UTF-16BE', $n_16be), qq{decode UTF-16BE});
is($nasty,  decode('UTF-16LE', $n_16le), qq{decode UTF-16LE});
is($nasty,  decode('UTF-32BE', $n_32be), qq{decode UTF-32BE});
is($nasty,  decode('UTF-32LE', $n_32le), qq{decode UTF-32LE});

is($n_16bb, encode('UTF-16',   $nasty),  qq{encode UTF-16});
is($n_32bb, encode('UTF-32',   $nasty),  qq{encode UTF-32});
is($nasty,  decode('UTF-16',   $n_16bb), qq{decode UTF-16, bom=be});
is($nasty,  decode('UTF-16',   $n_16lb), qq{decode UTF-16, bom=le});
is($nasty,  decode('UTF-32',   $n_32bb), qq{decode UTF-32, bom=be});
is($nasty,  decode('UTF-32',   $n_32lb), qq{decode UTF-32, bom=le});

is(decode('UCS-2BE', $n_16be), $fallback, "decode UCS-2BE: fallback");
is(decode('UCS-2LE', $n_16le), $fallback, "decode UCS-2LE: fallback");
eval { decode('UCS-2BE', $n_16be, 1) }; 
is (index($@,'UCS-2BE:'), 0, "decode UCS-2BE: exception");
eval { decode('UCS-2LE', $n_16le, 1) };
is (index($@,'UCS-2LE:'), 0, "decode UCS-2LE: exception");
is(encode('UCS-2BE', $nasty), $f_16be, "encode UCS-2BE: fallback");
is(encode('UCS-2LE', $nasty), $f_16le, "encode UCS-2LE: fallback");
eval { encode('UCS-2BE', $nasty, 1) }; 
is(index($@, 'UCS-2BE'), 0, "encode UCS-2BE: exception");
eval { encode('UCS-2LE', $nasty, 1) }; 
is(index($@, 'UCS-2LE'), 0, "encode UCS-2LE: exception");

#
# SvGROW test for (en|de)code_xs
#
SKIP: {
    my $utf8 = '';
    for my $j (0,0x10){
	for my $i (0..0xffff){
	    $j == 0 and (0xD800 <= $i && $i <= 0xDFFF) and next;
	    $utf8 .= ord($j+$i);
	}
	for my $major ('UTF-16', 'UTF-32'){
	    for my $minor ('BE', 'LE'){
		my $enc = $major.$minor;
		is(decode($enc, encode($enc, $utf8)), $utf8, "$enc RT");
	    }
	}
    }
};

#
# CJKT vs. UTF-7
#

use File::Spec;
use File::Basename;

my $dir =  dirname(__FILE__);
opendir my $dh, $dir or die "$dir:$!";
my @file = sort grep {/\.utf$/o} readdir $dh;
closedir $dh;
for my $file (@file){
    my $path = File::Spec->catfile($dir, $file);
    open my $fh, '<', $path or die "$path:$!";
    my $content;
    if (PerlIO::Layer->find('perlio')){
	binmode $fh => ':utf8';
	$content = join('' => <$fh>);
    }else{ # ugh!
	binmode $fh;
	$content = join('' => <$fh>);
	Encode::_utf8_on($content)
    }
    close $fh;
    is(decode("UTF-7", encode("UTF-7", $content)), $content, 
       "UTF-7 RT:$file");
}
1;
__END__

--- NEW FILE: Encoder.t ---
#
# $Id: Encoder.t,v 1.1 2006-12-05 04:26:37 dslinux_cayenne Exp $
#

BEGIN {
    require Config; import Config;
    if ($Config{'extensions'} !~ /\bEncode\b/) {
      print "1..0 # Skip: Encode was not built\n";
      exit 0;
    }
    $| = 1;
}

use strict;
#use Test::More 'no_plan';
use Test::More tests => 516;
use Encode::Encoder qw(encoder);
use MIME::Base64;
package Encode::Base64;
use base 'Encode::Encoding';
__PACKAGE__->Define('base64');
use MIME::Base64;
sub encode{
    my ($obj, $data) = @_;
    return encode_base64($data);
}
sub decode{
    my ($obj, $data) = @_;
    return decode_base64($data);
}

package main;

my $e = encoder("foo", "ascii");
ok ($e->data("bar"));
is ($e->data, "bar");
ok ($e->encoding("latin1"));
is ($e->encoding, "iso-8859-1");

my $data = '';
for my $i (0..255){
    no warnings;
    $data .= chr($i);
    my $base64 = encode_base64($data);
    is(encoder($data)->base64, $base64, "encode");
    is(encoder($base64)->bytes('base64'), $data, "decode");
}

1;
__END__




More information about the dslinux-commit mailing list