dslinux/user/perl/ext/Unicode/Normalize/t fcdc.t form.t func.t illegal.t norm.t null.t proto.t short.t split.t test.t
cayenne
dslinux_cayenne at user.in-berlin.de
Tue Dec 5 05:27:00 CET 2006
Update of /cvsroot/dslinux/dslinux/user/perl/ext/Unicode/Normalize/t
In directory antilope:/tmp/cvs-serv7729/ext/Unicode/Normalize/t
Added Files:
fcdc.t form.t func.t illegal.t norm.t null.t proto.t short.t
split.t test.t
Log Message:
Adding fresh perl source to HEAD to branch from
--- NEW FILE: null.t ---
BEGIN {
unless ("A" eq pack('U', 0x41)) {
print "1..0 # Unicode::Normalize " .
"cannot stringify a Unicode code point\n";
exit 0;
}
}
BEGIN {
if ($ENV{PERL_CORE}) {
chdir('t') if -d 't';
@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
}
}
#########################
use strict;
use warnings;
use Unicode::Normalize qw(:all);
print "1..8\n";
print "ok 1\n";
# if $_ is not NULL-terminated, test may fail.
$_ = compose('abc');
print /c$/ ? "ok" : "not ok", " 2\n";
$_ = decompose('abc');
print /c$/ ? "ok" : "not ok", " 3\n";
$_ = reorder('abc');
print /c$/ ? "ok" : "not ok", " 4\n";
$_ = NFD('abc');
print /c$/ ? "ok" : "not ok", " 5\n";
$_ = NFC('abc');
print /c$/ ? "ok" : "not ok", " 6\n";
$_ = NFKD('abc');
print /c$/ ? "ok" : "not ok", " 7\n";
$_ = NFKC('abc');
print /c$/ ? "ok" : "not ok", " 8\n";
--- NEW FILE: fcdc.t ---
BEGIN {
unless ("A" eq pack('U', 0x41)) {
print "1..0 # Unicode::Normalize " .
"cannot stringify a Unicode code point\n";
exit 0;
}
}
BEGIN {
if ($ENV{PERL_CORE}) {
chdir('t') if -d 't';
@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
}
}
#########################
use Test;
use strict;
use warnings;
BEGIN { plan tests => 35 };
use Unicode::Normalize qw(:all);
ok(1); # If we made it this far, we're ok.
sub _pack_U { Unicode::Normalize::pack_U(@_) }
sub _unpack_U { Unicode::Normalize::unpack_U(@_) }
sub answer { defined $_[0] ? $_[0] ? "YES" : "NO" : "MAYBE" }
#########################
ok(answer(checkFCD('')), 'YES');
ok(answer(checkFCD('A')), 'YES');
ok(answer(checkFCD("\x{030A}")), 'YES'); # 030A;COMBINING RING ABOVE
ok(answer(checkFCD("\x{0327}")), 'YES'); # 0327;COMBINING CEDILLA
ok(answer(checkFCD(_pack_U(0x00C5))), 'YES'); # A with ring above
ok(answer(checkFCD(_pack_U(0x41, 0x30A))), 'YES'); # A+ring
ok(answer(checkFCD(_pack_U(0x41, 0x327, 0x30A))), 'YES'); # A+cedilla+ring
ok(answer(checkFCD(_pack_U(0x41, 0x30A, 0x327))), 'NO'); # A+ring+cedilla
ok(answer(checkFCD(_pack_U(0xC5, 0x0327))), 'NO'); # A-ring+cedilla
ok(answer(checkNFC(_pack_U(0xC5, 0x0327))), 'MAYBE'); # NFC: A-ring+cedilla
ok(answer(check("FCD", _pack_U(0xC5, 0x0327))), 'NO');
ok(answer(check("NFC", _pack_U(0xC5, 0x0327))), 'MAYBE');
ok(answer(checkFCD("\x{AC01}\x{1100}\x{1161}")), 'YES'); # hangul
ok(answer(checkFCD("\x{212B}\x{F900}")), 'YES'); # compat
ok(FCD(''), "");
ok(FCC(''), "");
ok(FCD('A'), "A");
ok(FCC('A'), "A");
ok(answer(checkFCD(_pack_U(0x1EA7, 0x05AE, 0x0315, 0x0062))), "NO");
ok(answer(checkFCC(_pack_U(0x1EA7, 0x05AE, 0x0315, 0x0062))), "NO");
ok(FCC(_pack_U(0xC5, 0x327)), _pack_U(0x41, 0x327, 0x30A));
ok(FCC(_pack_U(0x45, 0x304, 0x300)), _pack_U(0x1E14));
ok(FCC("\x{1100}\x{1161}\x{1100}\x{1173}\x{11AF}"), "\x{AC00}\x{AE00}");
ok(answer(checkFCC('')), 'YES');
ok(answer(checkFCC('A')), 'YES');
ok(answer(checkFCC("\x{030A}")), 'MAYBE'); # 030A;COMBINING RING ABOVE
ok(answer(checkFCC("\x{0327}")), 'MAYBE'); # 0327;COMBINING CEDILLA
ok(answer(checkFCC(_pack_U(0x00C5))), 'YES'); # A with ring above
ok(answer(checkFCC(_pack_U(0x41, 0x30A))), 'MAYBE'); # A+ring
ok(answer(checkFCC(_pack_U(0x41, 0x327, 0x30A))), 'MAYBE'); # A+cedilla+ring
ok(answer(checkFCC(_pack_U(0x41, 0x30A, 0x327))), 'NO'); # A+ring+cedilla
ok(answer(checkFCC(_pack_U(0xC5, 0x0327))), 'NO'); # A-ring+cedilla
ok(answer(checkFCC("\x{AC01}\x{1100}\x{1161}")), 'MAYBE'); # hangul
ok(answer(checkFCC("\x{212B}\x{F900}")), 'NO'); # compat
--- NEW FILE: short.t ---
BEGIN {
unless ("A" eq pack('U', 0x41)) {
print "1..0 # Unicode::Normalize " .
"cannot stringify a Unicode code point\n";
exit 0;
}
}
BEGIN {
if ($ENV{PERL_CORE}) {
chdir('t') if -d 't';
@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
}
}
BEGIN {
unless (5.006001 <= $]) {
print "1..0 # skipped: Perl 5.6.1 or later".
" needed for this test\n";
exit;
}
}
#########################
use strict;
use Unicode::Normalize qw(:all);
print "1..8\n";
print "ok 1\n";
#########################
no warnings qw(utf8);
# U+3042 is 3-byte length (in UTF-8/UTF-EBCDIC)
our $a = pack 'U0C', unpack 'C', "\x{3042}";
print NFD($a) eq "\0"
? "ok" : "not ok", " 2\n";
print NFKD($a) eq "\0"
? "ok" : "not ok", " 3\n";
print NFC($a) eq "\0"
? "ok" : "not ok", " 4\n";
print NFKC($a) eq "\0"
? "ok" : "not ok", " 5\n";
print decompose($a) eq "\0"
? "ok" : "not ok", " 6\n";
print reorder($a) eq "\0"
? "ok" : "not ok", " 7\n";
print compose($a) eq "\0"
? "ok" : "not ok", " 8\n";
--- NEW FILE: split.t ---
BEGIN {
unless ("A" eq pack('U', 0x41)) {
print "1..0 # Unicode::Normalize " .
"cannot stringify a Unicode code point\n";
exit 0;
}
}
BEGIN {
if ($ENV{PERL_CORE}) {
chdir('t') if -d 't';
@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
}
}
BEGIN {
unless (5.006001 <= $]) {
print "1..0 # skipped: Perl 5.6.1 or later".
" needed for this test\n";
exit;
}
}
#########################
use Test;
use strict;
use warnings;
BEGIN { plan tests => 14 };
use Unicode::Normalize qw(:all);
ok(1); # If we made it this far, we're ok.
sub _pack_U { Unicode::Normalize::pack_U(@_) }
sub _unpack_U { Unicode::Normalize::unpack_U(@_) }
#########################
our $proc; # before the last starter
our $unproc; # the last starter and after
# If string has no starter, entire string is set to $unproc.
# When you have $normalized string and $unnormalized string following,
# a simple concatenation
# C<$concat = $normalized . normalize($form, $unnormalized)>
# is wrong. Instead of it, like this:
#
# ($processed, $unprocessed) = splitOnLastStarter($normalized);
# $concat = $processed . normalize($form, $unprocessed.$unnormalized);
($proc, $unproc) = splitOnLastStarter("");
ok($proc, "");
ok($unproc, "");
($proc, $unproc) = splitOnLastStarter("A");
ok($proc, "");
ok($unproc, "A");
($proc, $unproc) = splitOnLastStarter(_pack_U(0x41, 0x300, 0x327, 0x42));
ok($proc, _pack_U(0x41, 0x300, 0x327));
ok($unproc, "B");
($proc, $unproc) = splitOnLastStarter(_pack_U(0x4E00, 0x41, 0x301));
ok($proc, _pack_U(0x4E00));
ok($unproc, _pack_U(0x41, 0x301));
($proc, $unproc) = splitOnLastStarter(_pack_U(0x302, 0x301, 0x300));
ok($proc, "");
ok($unproc, _pack_U(0x302, 0x301, 0x300));
our $ka_grave = _pack_U(0x41, 0, 0x42, 0x304B, 0x300);
our $dakuten = _pack_U(0x3099);
our $ga_grave = _pack_U(0x41, 0, 0x42, 0x304C, 0x300);
our ($p, $u) = splitOnLastStarter($ka_grave);
our $concat = $p . NFC($u.$dakuten);
ok(NFC($ka_grave.$dakuten) eq $ga_grave);
ok(NFC($ka_grave).NFC($dakuten) ne $ga_grave);
ok($concat eq $ga_grave);
--- NEW FILE: form.t ---
BEGIN {
unless ("A" eq pack('U', 0x41)) {
print "1..0 # Unicode::Normalize " .
"cannot stringify a Unicode code point\n";
exit 0;
}
}
BEGIN {
if ($ENV{PERL_CORE}) {
chdir('t') if -d 't';
@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
}
}
#########################
use Test;
use strict;
use warnings;
BEGIN { plan tests => 37 };
use Unicode::Normalize qw(:all);
ok(1); # If we made it this far, we're ok.
sub answer { defined $_[0] ? $_[0] ? "YES" : "NO" : "MAYBE" }
#########################
ok(NFD ("\x{304C}\x{FF76}"), "\x{304B}\x{3099}\x{FF76}");
ok(NFC ("\x{304C}\x{FF76}"), "\x{304C}\x{FF76}");
ok(NFKD("\x{304C}\x{FF76}"), "\x{304B}\x{3099}\x{30AB}");
ok(NFKC("\x{304C}\x{FF76}"), "\x{304C}\x{30AB}");
ok(answer(checkNFD ("\x{304C}")), "NO");
ok(answer(checkNFC ("\x{304C}")), "YES");
ok(answer(checkNFKD("\x{304C}")), "NO");
ok(answer(checkNFKC("\x{304C}")), "YES");
ok(answer(checkNFD ("\x{FF76}")), "YES");
ok(answer(checkNFC ("\x{FF76}")), "YES");
ok(answer(checkNFKD("\x{FF76}")), "NO");
ok(answer(checkNFKC("\x{FF76}")), "NO");
ok(normalize('D', "\x{304C}\x{FF76}"), "\x{304B}\x{3099}\x{FF76}");
ok(normalize('C', "\x{304C}\x{FF76}"), "\x{304C}\x{FF76}");
ok(normalize('KD',"\x{304C}\x{FF76}"), "\x{304B}\x{3099}\x{30AB}");
ok(normalize('KC',"\x{304C}\x{FF76}"), "\x{304C}\x{30AB}");
ok(answer(check('D', "\x{304C}")), "NO");
ok(answer(check('C', "\x{304C}")), "YES");
ok(answer(check('KD',"\x{304C}")), "NO");
ok(answer(check('KC',"\x{304C}")), "YES");
ok(answer(check('D' ,"\x{FF76}")), "YES");
ok(answer(check('C' ,"\x{FF76}")), "YES");
ok(answer(check('KD',"\x{FF76}")), "NO");
ok(answer(check('KC',"\x{FF76}")), "NO");
ok(normalize('NFD', "\x{304C}\x{FF76}"), "\x{304B}\x{3099}\x{FF76}");
ok(normalize('NFC', "\x{304C}\x{FF76}"), "\x{304C}\x{FF76}");
ok(normalize('NFKD',"\x{304C}\x{FF76}"), "\x{304B}\x{3099}\x{30AB}");
ok(normalize('NFKC',"\x{304C}\x{FF76}"), "\x{304C}\x{30AB}");
ok(answer(check('NFD', "\x{304C}")), "NO");
ok(answer(check('NFC', "\x{304C}")), "YES");
ok(answer(check('NFKD',"\x{304C}")), "NO");
ok(answer(check('NFKC',"\x{304C}")), "YES");
ok(answer(check('NFD' ,"\x{FF76}")), "YES");
ok(answer(check('NFC' ,"\x{FF76}")), "YES");
ok(answer(check('NFKD',"\x{FF76}")), "NO");
ok(answer(check('NFKC',"\x{FF76}")), "NO");
--- NEW FILE: proto.t ---
BEGIN {
unless ("A" eq pack('U', 0x41)) {
print "1..0 # Unicode::Normalize " .
"cannot stringify a Unicode code point\n";
exit 0;
}
}
BEGIN {
if ($ENV{PERL_CORE}) {
chdir('t') if -d 't';
@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
}
}
#########################
use Test;
use strict;
use warnings;
BEGIN { plan tests => 42 };
use Unicode::Normalize qw(:all);
ok(1); # If we made it this far, we're ok.
#########################
# unary op. RING-CEDILLA
ok( "\x{30A}\x{327}" ne "\x{327}\x{30A}");
ok(NFD "\x{30A}\x{327}" eq "\x{327}\x{30A}");
ok(NFC "\x{30A}\x{327}" eq "\x{327}\x{30A}");
ok(NFKD "\x{30A}\x{327}" eq "\x{327}\x{30A}");
ok(NFKC "\x{30A}\x{327}" eq "\x{327}\x{30A}");
ok(FCD "\x{30A}\x{327}" eq "\x{327}\x{30A}");
ok(FCC "\x{30A}\x{327}" eq "\x{327}\x{30A}");
ok(reorder "\x{30A}\x{327}" eq "\x{327}\x{30A}");
ok(prototype \&normalize,'$$');
ok(prototype \&NFD, '$');
ok(prototype \&NFC, '$');
ok(prototype \&NFKD, '$');
ok(prototype \&NFKC, '$');
ok(prototype \&FCD, '$');
ok(prototype \&FCC, '$');
ok(prototype \&check, '$$');
ok(prototype \&checkNFD, '$');
ok(prototype \&checkNFC, '$');
ok(prototype \&checkNFKD,'$');
ok(prototype \&checkNFKC,'$');
ok(prototype \&checkFCD, '$');
ok(prototype \&checkFCC, '$');
ok(prototype \&decompose, '$;$');
ok(prototype \&reorder, '$');
ok(prototype \&compose, '$');
ok(prototype \&composeContiguous, '$');
ok(prototype \&getCanon, '$');
ok(prototype \&getCompat, '$');
ok(prototype \&getComposite, '$$');
ok(prototype \&getCombinClass,'$');
ok(prototype \&isExclusion, '$');
ok(prototype \&isSingleton, '$');
ok(prototype \&isNonStDecomp, '$');
ok(prototype \&isComp2nd, '$');
ok(prototype \&isComp_Ex, '$');
ok(prototype \&isNFD_NO, '$');
ok(prototype \&isNFC_NO, '$');
ok(prototype \&isNFC_MAYBE, '$');
ok(prototype \&isNFKD_NO, '$');
ok(prototype \&isNFKC_NO, '$');
ok(prototype \&isNFKC_MAYBE, '$');
--- NEW FILE: illegal.t ---
BEGIN {
unless ("A" eq pack('U', 0x41)) {
print "1..0 # Unicode::Normalize " .
"cannot stringify a Unicode code point\n";
exit 0;
}
}
BEGIN {
if ($ENV{PERL_CORE}) {
chdir('t') if -d 't';
@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
}
}
BEGIN {
unless (5.006001 <= $]) {
print "1..0 # skipped: Perl 5.6.1 or later".
" needed for this test\n";
exit;
}
}
#########################
use Test;
use strict;
use warnings;
BEGIN {
use Unicode::Normalize qw(:all);
unless (exists &Unicode::Normalize::bootstrap or 5.008 <= $]) {
print "1..0 # skipped: XSUB, or Perl 5.8.0 or later".
" needed for this test\n";
print $@;
exit;
}
}
BEGIN { plan tests => 112 };
#########################
no warnings qw(utf8);
# To avoid warning in Test.pm, EXPR in ok(EXPR) must be boolean.
for my $u (0xD800, 0xDFFF, 0xFDD0, 0xFDEF, 0xFEFF, 0xFFFE, 0xFFFF,
0x1FFFF, 0x10FFFF, 0x110000, 0x7FFFFFFF)
{
my $c = chr $u;
ok($c eq NFD($c)); # 1
ok($c eq NFC($c)); # 2
ok($c eq NFKD($c)); # 3
ok($c eq NFKC($c)); # 4
ok($c eq FCD($c)); # 5
ok($c eq FCC($c)); # 6
ok($c eq decompose($c)); # 7
ok($c eq decompose($c,1)); # 8
ok($c eq reorder($c)); # 9
ok($c eq compose($c)); # 10
}
our $proc; # before the last starter
our $unproc; # the last starter and after
sub _pack_U { Unicode::Normalize::pack_U(@_) }
($proc, $unproc) = splitOnLastStarter(_pack_U(0x41, 0x300, 0x327, 0xFFFF));
ok($proc eq _pack_U(0x41, 0x300, 0x327));
ok($unproc eq "\x{FFFF}");
--- NEW FILE: norm.t ---
BEGIN {
unless ("A" eq pack('U', 0x41)) {
print "1..0 # Unicode::Normalize " .
"cannot stringify a Unicode code point\n";
exit 0;
}
}
BEGIN {
if ($ENV{PERL_CORE}) {
chdir('t') if -d 't';
@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
}
}
#########################
use Test;
use strict;
use warnings;
BEGIN { plan tests => 29 };
use Unicode::Normalize qw(normalize);
ok(1); # If we made it this far, we're ok.
sub _pack_U { Unicode::Normalize::pack_U(@_) }
sub _unpack_U { Unicode::Normalize::unpack_U(@_) }
#########################
ok(normalize('C', ""), "");
ok(normalize('D', ""), "");
sub hexNFC {
join " ", map sprintf("%04X", $_),
_unpack_U normalize 'C', _pack_U map hex, split ' ', shift;
}
sub hexNFD {
join " ", map sprintf("%04X", $_),
_unpack_U normalize 'D', _pack_U map hex, split ' ', shift;
}
ok(hexNFC("0061 0315 0300 05AE 05C4 0062"), "00E0 05AE 05C4 0315 0062");
ok(hexNFC("00E0 05AE 05C4 0315 0062"), "00E0 05AE 05C4 0315 0062");
ok(hexNFC("0061 05AE 0300 05C4 0315 0062"), "00E0 05AE 05C4 0315 0062");
ok(hexNFC("0045 0304 0300 AC00 11A8"), "1E14 AC01");
ok(hexNFC("1100 1161 1100 1173 11AF"), "AC00 AE00");
ok(hexNFC("1100 0300 1161 1173 11AF"), "1100 0300 1161 1173 11AF");
ok(hexNFD("0061 0315 0300 05AE 05C4 0062"), "0061 05AE 0300 05C4 0315 0062");
ok(hexNFD("00E0 05AE 05C4 0315 0062"), "0061 05AE 0300 05C4 0315 0062");
ok(hexNFD("0061 05AE 0300 05C4 0315 0062"), "0061 05AE 0300 05C4 0315 0062");
ok(hexNFC("0061 05C4 0315 0300 05AE 0062"), "0061 05AE 05C4 0300 0315 0062");
ok(hexNFC("0061 05AE 05C4 0300 0315 0062"), "0061 05AE 05C4 0300 0315 0062");
ok(hexNFD("0061 05C4 0315 0300 05AE 0062"), "0061 05AE 05C4 0300 0315 0062");
ok(hexNFD("0061 05AE 05C4 0300 0315 0062"), "0061 05AE 05C4 0300 0315 0062");
ok(hexNFC("0000 0041 0000 0000"), "0000 0041 0000 0000");
ok(hexNFD("0000 0041 0000 0000"), "0000 0041 0000 0000");
ok(hexNFC("AC00 11A7"), "AC00 11A7");
ok(hexNFC("AC00 11A8"), "AC01");
ok(hexNFC("AC00 11A9"), "AC02");
ok(hexNFC("AC00 11C2"), "AC1B");
ok(hexNFC("AC00 11C3"), "AC00 11C3");
# Test Cases from Public Review Issue #29: Normalization Issue
# cf. http://www.unicode.org/review/pr-29.html
ok(hexNFC("0B47 0300 0B3E"), "0B47 0300 0B3E");
ok(hexNFC("1100 0300 1161"), "1100 0300 1161");
ok(hexNFC("0B47 0B3E 0300"), "0B4B 0300");
ok(hexNFC("1100 1161 0300"), "AC00 0300");
ok(hexNFC("0B47 0300 0B3E 0327"), "0B47 0300 0B3E 0327");
ok(hexNFC("1100 0300 1161 0327"), "1100 0300 1161 0327");
--- NEW FILE: func.t ---
BEGIN {
unless ("A" eq pack('U', 0x41)) {
print "1..0 # Unicode::Normalize " .
"cannot stringify a Unicode code point\n";
exit 0;
}
}
BEGIN {
if ($ENV{PERL_CORE}) {
chdir('t') if -d 't';
@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
}
}
#########################
use Test;
use strict;
use warnings;
BEGIN { plan tests => 13 };
use Unicode::Normalize qw(:all);
ok(1); # If we made it this far, we're ok.
sub _pack_U { Unicode::Normalize::pack_U(@_) }
sub _unpack_U { Unicode::Normalize::unpack_U(@_) }
#########################
print getCombinClass( 0) == 0
&& getCombinClass( 768) == 230
&& getCombinClass(1809) == 36
&& ($] < 5.007003 || getCombinClass(0x1D167) == 1) # Unicode 3.1
? "ok" : "not ok", " 2\n";
print ! defined getCanon( 0)
&& ! defined getCanon(41)
&& getCanon(0x00C0) eq _pack_U(0x0041, 0x0300)
&& getCanon(0x00EF) eq _pack_U(0x0069, 0x0308)
&& getCanon(0x304C) eq _pack_U(0x304B, 0x3099)
&& getCanon(0x1EA4) eq _pack_U(0x0041, 0x0302, 0x0301)
&& getCanon(0x1F82) eq _pack_U(0x03B1, 0x0313, 0x0300, 0x0345)
&& getCanon(0x1FAF) eq _pack_U(0x03A9, 0x0314, 0x0342, 0x0345)
&& getCanon(0xAC00) eq _pack_U(0x1100, 0x1161)
&& getCanon(0xAE00) eq _pack_U(0x1100, 0x1173, 0x11AF)
&& ! defined getCanon(0x212C)
&& ! defined getCanon(0x3243)
&& getCanon(0xFA2D) eq _pack_U(0x9DB4)
? "ok" : "not ok", " 3\n";
print ! defined getCompat( 0)
&& ! defined getCompat(41)
&& getCompat(0x00C0) eq _pack_U(0x0041, 0x0300)
&& getCompat(0x00EF) eq _pack_U(0x0069, 0x0308)
&& getCompat(0x304C) eq _pack_U(0x304B, 0x3099)
&& getCompat(0x1EA4) eq _pack_U(0x0041, 0x0302, 0x0301)
&& getCompat(0x1F82) eq _pack_U(0x03B1, 0x0313, 0x0300, 0x0345)
&& getCompat(0x1FAF) eq _pack_U(0x03A9, 0x0314, 0x0342, 0x0345)
&& getCompat(0x212C) eq _pack_U(0x0042)
&& getCompat(0x3243) eq _pack_U(0x0028, 0x81F3, 0x0029)
&& getCompat(0xAC00) eq _pack_U(0x1100, 0x1161)
&& getCompat(0xAE00) eq _pack_U(0x1100, 0x1173, 0x11AF)
&& getCompat(0xFA2D) eq _pack_U(0x9DB4)
? "ok" : "not ok", " 4\n";
print ! defined getComposite( 0, 0)
&& ! defined getComposite( 0, 41)
&& ! defined getComposite(41, 0)
&& ! defined getComposite(41, 41)
&& ! defined getComposite(12, 0x0300)
&& ! defined getComposite(0x0055, 0xFF00)
&& 0x00C0 == getComposite(0x0041, 0x0300)
&& 0x00D9 == getComposite(0x0055, 0x0300)
&& 0x1E14 == getComposite(0x0112, 0x0300)
&& 0xAC00 == getComposite(0x1100, 0x1161)
&& 0xADF8 == getComposite(0x1100, 0x1173)
&& ! defined getComposite(0x1100, 0x11AF)
&& ! defined getComposite(0x1173, 0x11AF)
&& ! defined getComposite(0xAC00, 0x11A7)
&& 0xAC01 == getComposite(0xAC00, 0x11A8)
&& 0xAE00 == getComposite(0xADF8, 0x11AF)
? "ok" : "not ok", " 5\n";
print ! isExclusion( 0)
&& ! isExclusion(41)
&& isExclusion(2392) # DEVANAGARI LETTER QA
&& isExclusion(3907) # TIBETAN LETTER GHA
&& isExclusion(64334) # HEBREW LETTER PE WITH RAFE
? "ok" : "not ok", " 6\n";
print ! isSingleton( 0)
&& isSingleton(0x212B) # ANGSTROM SIGN
? "ok" : "not ok", " 7\n";
print reorder("") eq ""
&& reorder(_pack_U(0x0041, 0x0300, 0x0315, 0x0313, 0x031b, 0x0061))
eq _pack_U(0x0041, 0x031b, 0x0300, 0x0313, 0x0315, 0x0061)
&& reorder(_pack_U(0x00C1, 0x0300, 0x0315, 0x0313, 0x031b,
0x0061, 0x309A, 0x3099))
eq _pack_U(0x00C1, 0x031b, 0x0300, 0x0313, 0x0315,
0x0061, 0x309A, 0x3099)
? "ok" : "not ok", " 8\n";
sub answer { defined $_[0] ? $_[0] ? "YES" : "NO" : "MAYBE" }
print answer(checkNFD("")) eq "YES"
&& answer(checkNFC("")) eq "YES"
&& answer(checkNFKD("")) eq "YES"
&& answer(checkNFKC("")) eq "YES"
&& answer(check("NFD", "")) eq "YES"
&& answer(check("NFC", "")) eq "YES"
&& answer(check("NFKD","")) eq "YES"
&& answer(check("NFKC","")) eq "YES"
# U+0000 to U+007F are prenormalized in all the normalization forms.
&& answer(checkNFD("AZaz\t12!#`")) eq "YES"
&& answer(checkNFC("AZaz\t12!#`")) eq "YES"
&& answer(checkNFKD("AZaz\t12!#`")) eq "YES"
&& answer(checkNFKC("AZaz\t12!#`")) eq "YES"
&& answer(check("D", "AZaz\t12!#`")) eq "YES"
&& answer(check("C", "AZaz\t12!#`")) eq "YES"
&& answer(check("KD","AZaz\t12!#`")) eq "YES"
&& answer(check("KC","AZaz\t12!#`")) eq "YES"
? "ok" : "not ok", " 9\n";
print 1
&& answer(checkNFD(NFD(_pack_U(0xC1, 0x1100, 0x1173, 0x11AF)))) eq "YES"
&& answer(checkNFD(_pack_U(0x20, 0xC1, 0x1100, 0x1173, 0x11AF))) eq "NO"
&& answer(checkNFC(_pack_U(0x20, 0xC1, 0x1173, 0x11AF))) eq "MAYBE"
&& answer(checkNFC(_pack_U(0x20, 0xC1, 0xAE00, 0x1100))) eq "YES"
&& answer(checkNFC(_pack_U(0x20, 0xC1, 0xAE00, 0x1100, 0x300))) eq "MAYBE"
&& answer(checkNFC(_pack_U(0x20, 0xC1, 0xFF71, 0x2025))) eq "YES"
&& answer(check("NFC", _pack_U(0x20, 0xC1, 0x212B, 0x300))) eq "NO"
&& answer(checkNFKD(_pack_U(0x20, 0xC1, 0xFF71, 0x2025))) eq "NO"
&& answer(checkNFKC(_pack_U(0x20, 0xC1, 0xAE00, 0x2025))) eq "NO"
? "ok" : "not ok", " 10\n";
"012ABC" =~ /(\d+)(\w+)/;
print "012" eq NFC $1 && "ABC" eq NFC $2
? "ok" : "not ok", " 11\n";
print "012" eq normalize('C', $1) && "ABC" eq normalize('C', $2)
? "ok" : "not ok", " 12\n";
print "012" eq normalize('NFC', $1) && "ABC" eq normalize('NFC', $2)
? "ok" : "not ok", " 13\n";
# s/^NF// in normalize() must not prevent using $1, $&, etc.
--- NEW FILE: test.t ---
BEGIN {
unless ("A" eq pack('U', 0x41)) {
print "1..0 # Unicode::Normalize " .
"cannot stringify a Unicode code point\n";
exit 0;
}
}
BEGIN {
if ($ENV{PERL_CORE}) {
chdir('t') if -d 't';
@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
}
}
#########################
use Test;
use strict;
use warnings;
BEGIN { plan tests => 31 };
use Unicode::Normalize;
ok(1); # If we made it this far, we're ok.
sub _pack_U { Unicode::Normalize::pack_U(@_) }
sub _unpack_U { Unicode::Normalize::unpack_U(@_) }
#########################
ok(NFC(""), "");
ok(NFD(""), "");
sub hexNFC {
join " ", map sprintf("%04X", $_),
_unpack_U NFC _pack_U map hex, split ' ', shift;
}
sub hexNFD {
join " ", map sprintf("%04X", $_),
_unpack_U NFD _pack_U map hex, split ' ', shift;
}
ok(hexNFC("0061 0315 0300 05AE 05C4 0062"), "00E0 05AE 05C4 0315 0062");
ok(hexNFC("00E0 05AE 05C4 0315 0062"), "00E0 05AE 05C4 0315 0062");
ok(hexNFC("0061 05AE 0300 05C4 0315 0062"), "00E0 05AE 05C4 0315 0062");
ok(hexNFC("0045 0304 0300 AC00 11A8"), "1E14 AC01");
ok(hexNFC("1100 1161 1100 1173 11AF"), "AC00 AE00");
ok(hexNFC("1100 0300 1161 1173 11AF"), "1100 0300 1161 1173 11AF");
ok(hexNFD("0061 0315 0300 05AE 05C4 0062"), "0061 05AE 0300 05C4 0315 0062");
ok(hexNFD("00E0 05AE 05C4 0315 0062"), "0061 05AE 0300 05C4 0315 0062");
ok(hexNFD("0061 05AE 0300 05C4 0315 0062"), "0061 05AE 0300 05C4 0315 0062");
ok(hexNFC("0061 05C4 0315 0300 05AE 0062"), "0061 05AE 05C4 0300 0315 0062");
ok(hexNFC("0061 05AE 05C4 0300 0315 0062"), "0061 05AE 05C4 0300 0315 0062");
ok(hexNFD("0061 05C4 0315 0300 05AE 0062"), "0061 05AE 05C4 0300 0315 0062");
ok(hexNFD("0061 05AE 05C4 0300 0315 0062"), "0061 05AE 05C4 0300 0315 0062");
ok(hexNFC("0000 0041 0000 0000"), "0000 0041 0000 0000");
ok(hexNFD("0000 0041 0000 0000"), "0000 0041 0000 0000");
ok(hexNFC("AC00 11A7"), "AC00 11A7");
ok(hexNFC("AC00 11A8"), "AC01");
ok(hexNFC("AC00 11A9"), "AC02");
ok(hexNFC("AC00 11C2"), "AC1B");
ok(hexNFC("AC00 11C3"), "AC00 11C3");
# Test Cases from Public Review Issue #29: Normalization Issue
# cf. http://www.unicode.org/review/pr-29.html
ok(hexNFC("0B47 0300 0B3E"), "0B47 0300 0B3E");
ok(hexNFC("1100 0300 1161"), "1100 0300 1161");
ok(hexNFC("0B47 0B3E 0300"), "0B4B 0300");
ok(hexNFC("1100 1161 0300"), "AC00 0300");
ok(hexNFC("0B47 0300 0B3E 0327"), "0B47 0300 0B3E 0327");
ok(hexNFC("1100 0300 1161 0327"), "1100 0300 1161 0327");
# NFC() should be unary.
my $str11 = _pack_U(0x41, 0x0302, 0x0301, 0x62);
my $str12 = _pack_U(0x1EA4, 0x62);
ok(NFC $str11 eq $str12);
# NFD() should be unary.
my $str21 = _pack_U(0xE0, 0xAC00);
my $str22 = _pack_U(0x61, 0x0300, 0x1100, 0x1161);
ok(NFD $str21 eq $str22);
More information about the dslinux-commit
mailing list