dslinux/user/perl/ext/Unicode/Normalize/t fcdc.t form.t func.t illegal.t norm.t null.t proto.t short.t split.t test.t

cayenne dslinux_cayenne at user.in-berlin.de
Tue Dec 5 05:27:00 CET 2006


Update of /cvsroot/dslinux/dslinux/user/perl/ext/Unicode/Normalize/t
In directory antilope:/tmp/cvs-serv7729/ext/Unicode/Normalize/t

Added Files:
	fcdc.t form.t func.t illegal.t norm.t null.t proto.t short.t 
	split.t test.t 
Log Message:
Adding fresh perl source to HEAD to branch from

--- NEW FILE: null.t ---

BEGIN {
    unless ("A" eq pack('U', 0x41)) {
	print "1..0 # Unicode::Normalize " .
	    "cannot stringify a Unicode code point\n";
	exit 0;
    }
}

BEGIN {
    if ($ENV{PERL_CORE}) {
        chdir('t') if -d 't';
        @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
    }
}

#########################

use strict;
use warnings;

use Unicode::Normalize qw(:all);
print "1..8\n";

print "ok 1\n";

# if $_ is not NULL-terminated, test may fail.

$_ = compose('abc');
print /c$/ ? "ok" : "not ok", " 2\n";

$_ = decompose('abc');
print /c$/ ? "ok" : "not ok", " 3\n";

$_ = reorder('abc');
print /c$/ ? "ok" : "not ok", " 4\n";

$_ = NFD('abc');
print /c$/ ? "ok" : "not ok", " 5\n";

$_ = NFC('abc');
print /c$/ ? "ok" : "not ok", " 6\n";

$_ = NFKD('abc');
print /c$/ ? "ok" : "not ok", " 7\n";

$_ = NFKC('abc');
print /c$/ ? "ok" : "not ok", " 8\n";


--- NEW FILE: fcdc.t ---

BEGIN {
    unless ("A" eq pack('U', 0x41)) {
	print "1..0 # Unicode::Normalize " .
	    "cannot stringify a Unicode code point\n";
	exit 0;
    }
}

BEGIN {
    if ($ENV{PERL_CORE}) {
        chdir('t') if -d 't';
        @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
    }
}

#########################

use Test;
use strict;
use warnings;
BEGIN { plan tests => 35 };
use Unicode::Normalize qw(:all);
ok(1); # If we made it this far, we're ok.

sub _pack_U   { Unicode::Normalize::pack_U(@_) }
sub _unpack_U { Unicode::Normalize::unpack_U(@_) }
sub answer { defined $_[0] ? $_[0] ? "YES" : "NO" : "MAYBE" }

#########################

ok(answer(checkFCD('')), 'YES');
ok(answer(checkFCD('A')), 'YES');
ok(answer(checkFCD("\x{030A}")), 'YES');  # 030A;COMBINING RING ABOVE
ok(answer(checkFCD("\x{0327}")), 'YES'); # 0327;COMBINING CEDILLA
ok(answer(checkFCD(_pack_U(0x00C5))), 'YES'); # A with ring above
ok(answer(checkFCD(_pack_U(0x41, 0x30A))), 'YES'); # A+ring
ok(answer(checkFCD(_pack_U(0x41, 0x327, 0x30A))), 'YES'); # A+cedilla+ring
ok(answer(checkFCD(_pack_U(0x41, 0x30A, 0x327))), 'NO');  # A+ring+cedilla
ok(answer(checkFCD(_pack_U(0xC5, 0x0327))), 'NO'); # A-ring+cedilla
ok(answer(checkNFC(_pack_U(0xC5, 0x0327))), 'MAYBE'); # NFC: A-ring+cedilla
ok(answer(check("FCD", _pack_U(0xC5, 0x0327))), 'NO');
ok(answer(check("NFC", _pack_U(0xC5, 0x0327))), 'MAYBE');
ok(answer(checkFCD("\x{AC01}\x{1100}\x{1161}")), 'YES'); # hangul
ok(answer(checkFCD("\x{212B}\x{F900}")), 'YES'); # compat

ok(FCD(''), "");
ok(FCC(''), "");

ok(FCD('A'), "A");
ok(FCC('A'), "A");

ok(answer(checkFCD(_pack_U(0x1EA7, 0x05AE, 0x0315, 0x0062))), "NO");
ok(answer(checkFCC(_pack_U(0x1EA7, 0x05AE, 0x0315, 0x0062))), "NO");

ok(FCC(_pack_U(0xC5, 0x327)), _pack_U(0x41, 0x327, 0x30A));
ok(FCC(_pack_U(0x45, 0x304, 0x300)), _pack_U(0x1E14));
ok(FCC("\x{1100}\x{1161}\x{1100}\x{1173}\x{11AF}"), "\x{AC00}\x{AE00}");

ok(answer(checkFCC('')), 'YES');
ok(answer(checkFCC('A')), 'YES');
ok(answer(checkFCC("\x{030A}")), 'MAYBE');  # 030A;COMBINING RING ABOVE
ok(answer(checkFCC("\x{0327}")), 'MAYBE'); # 0327;COMBINING CEDILLA
ok(answer(checkFCC(_pack_U(0x00C5))), 'YES'); # A with ring above
ok(answer(checkFCC(_pack_U(0x41, 0x30A))), 'MAYBE'); # A+ring
ok(answer(checkFCC(_pack_U(0x41, 0x327, 0x30A))), 'MAYBE'); # A+cedilla+ring
ok(answer(checkFCC(_pack_U(0x41, 0x30A, 0x327))), 'NO');  # A+ring+cedilla
ok(answer(checkFCC(_pack_U(0xC5, 0x0327))), 'NO'); # A-ring+cedilla
ok(answer(checkFCC("\x{AC01}\x{1100}\x{1161}")), 'MAYBE'); # hangul
ok(answer(checkFCC("\x{212B}\x{F900}")), 'NO'); # compat


--- NEW FILE: short.t ---

BEGIN {
    unless ("A" eq pack('U', 0x41)) {
	print "1..0 # Unicode::Normalize " .
	    "cannot stringify a Unicode code point\n";
	exit 0;
    }
}

BEGIN {
    if ($ENV{PERL_CORE}) {
        chdir('t') if -d 't';
        @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
    }
}

BEGIN {
    unless (5.006001 <= $]) {
	print "1..0 # skipped: Perl 5.6.1 or later".
		" needed for this test\n";
	exit;
    }
}

#########################

use strict;
use Unicode::Normalize qw(:all);

print "1..8\n";
print "ok 1\n";

#########################

no warnings qw(utf8);

# U+3042 is 3-byte length (in UTF-8/UTF-EBCDIC)
our $a = pack 'U0C', unpack 'C', "\x{3042}";

print NFD($a) eq "\0"
   ? "ok" : "not ok", " 2\n";

print NFKD($a) eq "\0"
   ? "ok" : "not ok", " 3\n";

print NFC($a) eq "\0"
   ? "ok" : "not ok", " 4\n";

print NFKC($a) eq "\0"
   ? "ok" : "not ok", " 5\n";

print decompose($a) eq "\0"
   ? "ok" : "not ok", " 6\n";

print reorder($a) eq "\0"
   ? "ok" : "not ok", " 7\n";

print compose($a) eq "\0"
   ? "ok" : "not ok", " 8\n";


--- NEW FILE: split.t ---

BEGIN {
    unless ("A" eq pack('U', 0x41)) {
	print "1..0 # Unicode::Normalize " .
	    "cannot stringify a Unicode code point\n";
	exit 0;
    }
}

BEGIN {
    if ($ENV{PERL_CORE}) {
        chdir('t') if -d 't';
        @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
    }
}

BEGIN {
    unless (5.006001 <= $]) {
	print "1..0 # skipped: Perl 5.6.1 or later".
		" needed for this test\n";
	exit;
    }
}

#########################

use Test;
use strict;
use warnings;
BEGIN { plan tests => 14 };
use Unicode::Normalize qw(:all);
ok(1); # If we made it this far, we're ok.

sub _pack_U   { Unicode::Normalize::pack_U(@_) }
sub _unpack_U { Unicode::Normalize::unpack_U(@_) }

#########################

our $proc;    # before the last starter
our $unproc;  # the last starter and after
# If string has no starter, entire string is set to $unproc.

# When you have $normalized string and $unnormalized string following,
# a simple concatenation
#   C<$concat = $normalized . normalize($form, $unnormalized)>
# is wrong. Instead of it, like this:
#
#       ($processed, $unprocessed) = splitOnLastStarter($normalized);
#       $concat = $processed . normalize($form, $unprocessed.$unnormalized);

($proc, $unproc) = splitOnLastStarter("");
ok($proc,   "");
ok($unproc, "");

($proc, $unproc) = splitOnLastStarter("A");
ok($proc,   "");
ok($unproc, "A");

($proc, $unproc) = splitOnLastStarter(_pack_U(0x41, 0x300, 0x327, 0x42));
ok($proc,   _pack_U(0x41, 0x300, 0x327));
ok($unproc, "B");

($proc, $unproc) = splitOnLastStarter(_pack_U(0x4E00, 0x41, 0x301));
ok($proc,   _pack_U(0x4E00));
ok($unproc, _pack_U(0x41, 0x301));

($proc, $unproc) = splitOnLastStarter(_pack_U(0x302, 0x301, 0x300));
ok($proc,   "");
ok($unproc, _pack_U(0x302, 0x301, 0x300));

our $ka_grave = _pack_U(0x41, 0, 0x42, 0x304B, 0x300);
our $dakuten  = _pack_U(0x3099);
our $ga_grave = _pack_U(0x41, 0, 0x42, 0x304C, 0x300);

our ($p, $u) = splitOnLastStarter($ka_grave);
our $concat = $p . NFC($u.$dakuten);

ok(NFC($ka_grave.$dakuten) eq $ga_grave);
ok(NFC($ka_grave).NFC($dakuten) ne $ga_grave);
ok($concat eq $ga_grave);


--- NEW FILE: form.t ---

BEGIN {
    unless ("A" eq pack('U', 0x41)) {
	print "1..0 # Unicode::Normalize " .
	    "cannot stringify a Unicode code point\n";
	exit 0;
    }
}

BEGIN {
    if ($ENV{PERL_CORE}) {
	chdir('t') if -d 't';
	@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
    }
}

#########################

use Test;
use strict;
use warnings;
BEGIN { plan tests => 37 };
use Unicode::Normalize qw(:all);
ok(1); # If we made it this far, we're ok.

sub answer { defined $_[0] ? $_[0] ? "YES" : "NO" : "MAYBE" }

#########################

ok(NFD ("\x{304C}\x{FF76}"), "\x{304B}\x{3099}\x{FF76}");
ok(NFC ("\x{304C}\x{FF76}"), "\x{304C}\x{FF76}");
ok(NFKD("\x{304C}\x{FF76}"), "\x{304B}\x{3099}\x{30AB}");
ok(NFKC("\x{304C}\x{FF76}"), "\x{304C}\x{30AB}");

ok(answer(checkNFD ("\x{304C}")), "NO");
ok(answer(checkNFC ("\x{304C}")), "YES");
ok(answer(checkNFKD("\x{304C}")), "NO");
ok(answer(checkNFKC("\x{304C}")), "YES");
ok(answer(checkNFD ("\x{FF76}")), "YES");
ok(answer(checkNFC ("\x{FF76}")), "YES");
ok(answer(checkNFKD("\x{FF76}")), "NO");
ok(answer(checkNFKC("\x{FF76}")), "NO");

ok(normalize('D', "\x{304C}\x{FF76}"), "\x{304B}\x{3099}\x{FF76}");
ok(normalize('C', "\x{304C}\x{FF76}"), "\x{304C}\x{FF76}");
ok(normalize('KD',"\x{304C}\x{FF76}"), "\x{304B}\x{3099}\x{30AB}");
ok(normalize('KC',"\x{304C}\x{FF76}"), "\x{304C}\x{30AB}");

ok(answer(check('D', "\x{304C}")), "NO");
ok(answer(check('C', "\x{304C}")), "YES");
ok(answer(check('KD',"\x{304C}")), "NO");
ok(answer(check('KC',"\x{304C}")), "YES");
ok(answer(check('D' ,"\x{FF76}")), "YES");
ok(answer(check('C' ,"\x{FF76}")), "YES");
ok(answer(check('KD',"\x{FF76}")), "NO");
ok(answer(check('KC',"\x{FF76}")), "NO");

ok(normalize('NFD', "\x{304C}\x{FF76}"), "\x{304B}\x{3099}\x{FF76}");
ok(normalize('NFC', "\x{304C}\x{FF76}"), "\x{304C}\x{FF76}");
ok(normalize('NFKD',"\x{304C}\x{FF76}"), "\x{304B}\x{3099}\x{30AB}");
ok(normalize('NFKC',"\x{304C}\x{FF76}"), "\x{304C}\x{30AB}");

ok(answer(check('NFD', "\x{304C}")), "NO");
ok(answer(check('NFC', "\x{304C}")), "YES");
ok(answer(check('NFKD',"\x{304C}")), "NO");
ok(answer(check('NFKC',"\x{304C}")), "YES");
ok(answer(check('NFD' ,"\x{FF76}")), "YES");
ok(answer(check('NFC' ,"\x{FF76}")), "YES");
ok(answer(check('NFKD',"\x{FF76}")), "NO");
ok(answer(check('NFKC',"\x{FF76}")), "NO");


--- NEW FILE: proto.t ---

BEGIN {
    unless ("A" eq pack('U', 0x41)) {
	print "1..0 # Unicode::Normalize " .
	    "cannot stringify a Unicode code point\n";
	exit 0;
    }
}

BEGIN {
    if ($ENV{PERL_CORE}) {
        chdir('t') if -d 't';
        @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
    }
}

#########################

use Test;
use strict;
use warnings;
BEGIN { plan tests => 42 };
use Unicode::Normalize qw(:all);
ok(1); # If we made it this far, we're ok.

#########################

# unary op. RING-CEDILLA
ok(        "\x{30A}\x{327}" ne "\x{327}\x{30A}");
ok(NFD     "\x{30A}\x{327}" eq "\x{327}\x{30A}");
ok(NFC     "\x{30A}\x{327}" eq "\x{327}\x{30A}");
ok(NFKD    "\x{30A}\x{327}" eq "\x{327}\x{30A}");
ok(NFKC    "\x{30A}\x{327}" eq "\x{327}\x{30A}");
ok(FCD     "\x{30A}\x{327}" eq "\x{327}\x{30A}");
ok(FCC     "\x{30A}\x{327}" eq "\x{327}\x{30A}");
ok(reorder "\x{30A}\x{327}" eq "\x{327}\x{30A}");

ok(prototype \&normalize,'$$');
ok(prototype \&NFD,  '$');
ok(prototype \&NFC,  '$');
ok(prototype \&NFKD, '$');
ok(prototype \&NFKC, '$');
ok(prototype \&FCD,  '$');
ok(prototype \&FCC,  '$');

ok(prototype \&check,    '$$');
ok(prototype \&checkNFD, '$');
ok(prototype \&checkNFC, '$');
ok(prototype \&checkNFKD,'$');
ok(prototype \&checkNFKC,'$');
ok(prototype \&checkFCD, '$');
ok(prototype \&checkFCC, '$');

ok(prototype \&decompose, '$;$');
ok(prototype \&reorder,   '$');
ok(prototype \&compose,   '$');
ok(prototype \&composeContiguous, '$');

ok(prototype \&getCanon,      '$');
ok(prototype \&getCompat,     '$');
ok(prototype \&getComposite,  '$$');
ok(prototype \&getCombinClass,'$');
ok(prototype \&isExclusion,   '$');
ok(prototype \&isSingleton,   '$');
ok(prototype \&isNonStDecomp, '$');
ok(prototype \&isComp2nd,     '$');
ok(prototype \&isComp_Ex,     '$');

ok(prototype \&isNFD_NO,      '$');
ok(prototype \&isNFC_NO,      '$');
ok(prototype \&isNFC_MAYBE,   '$');
ok(prototype \&isNFKD_NO,     '$');
ok(prototype \&isNFKC_NO,     '$');
ok(prototype \&isNFKC_MAYBE,  '$');


--- NEW FILE: illegal.t ---

BEGIN {
    unless ("A" eq pack('U', 0x41)) {
	print "1..0 # Unicode::Normalize " .
	    "cannot stringify a Unicode code point\n";
	exit 0;
    }
}

BEGIN {
    if ($ENV{PERL_CORE}) {
        chdir('t') if -d 't';
        @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
    }
}

BEGIN {
    unless (5.006001 <= $]) {
	print "1..0 # skipped: Perl 5.6.1 or later".
		" needed for this test\n";
	exit;
    }
}

#########################

use Test;
use strict;
use warnings;

BEGIN {
    use Unicode::Normalize qw(:all);

    unless (exists &Unicode::Normalize::bootstrap or 5.008 <= $]) {
	print "1..0 # skipped: XSUB, or Perl 5.8.0 or later".
		" needed for this test\n";
	print $@;
	exit;
    }
}

BEGIN { plan tests => 112 };

#########################

no warnings qw(utf8);
# To avoid warning in Test.pm, EXPR in ok(EXPR) must be boolean.

for my $u (0xD800, 0xDFFF, 0xFDD0, 0xFDEF, 0xFEFF, 0xFFFE, 0xFFFF,
	   0x1FFFF, 0x10FFFF, 0x110000, 0x7FFFFFFF)
{
    my $c = chr $u;
    ok($c eq NFD($c));  # 1
    ok($c eq NFC($c));  # 2
    ok($c eq NFKD($c)); # 3
    ok($c eq NFKC($c)); # 4
    ok($c eq FCD($c));  # 5
    ok($c eq FCC($c));  # 6
    ok($c eq decompose($c));   # 7
    ok($c eq decompose($c,1)); # 8
    ok($c eq reorder($c));     # 9
    ok($c eq compose($c));     # 10
}

our $proc;    # before the last starter
our $unproc;  # the last starter and after

sub _pack_U   { Unicode::Normalize::pack_U(@_) }

($proc, $unproc) = splitOnLastStarter(_pack_U(0x41, 0x300, 0x327, 0xFFFF));
ok($proc   eq _pack_U(0x41, 0x300, 0x327));
ok($unproc eq "\x{FFFF}");


--- NEW FILE: norm.t ---

BEGIN {
    unless ("A" eq pack('U', 0x41)) {
	print "1..0 # Unicode::Normalize " .
	    "cannot stringify a Unicode code point\n";
	exit 0;
    }
}

BEGIN {
    if ($ENV{PERL_CORE}) {
        chdir('t') if -d 't';
        @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
    }
}

#########################

use Test;
use strict;
use warnings;
BEGIN { plan tests => 29 };
use Unicode::Normalize qw(normalize);
ok(1); # If we made it this far, we're ok.

sub _pack_U   { Unicode::Normalize::pack_U(@_) }
sub _unpack_U { Unicode::Normalize::unpack_U(@_) }

#########################

ok(normalize('C', ""), "");
ok(normalize('D', ""), "");

sub hexNFC {
  join " ", map sprintf("%04X", $_),
  _unpack_U normalize 'C', _pack_U map hex, split ' ', shift;
}
sub hexNFD {
  join " ", map sprintf("%04X", $_),
  _unpack_U normalize 'D', _pack_U map hex, split ' ', shift;
}

ok(hexNFC("0061 0315 0300 05AE 05C4 0062"), "00E0 05AE 05C4 0315 0062");
ok(hexNFC("00E0 05AE 05C4 0315 0062"),      "00E0 05AE 05C4 0315 0062");
ok(hexNFC("0061 05AE 0300 05C4 0315 0062"), "00E0 05AE 05C4 0315 0062");
ok(hexNFC("0045 0304 0300 AC00 11A8"), "1E14 AC01");
ok(hexNFC("1100 1161 1100 1173 11AF"), "AC00 AE00");
ok(hexNFC("1100 0300 1161 1173 11AF"), "1100 0300 1161 1173 11AF");

ok(hexNFD("0061 0315 0300 05AE 05C4 0062"), "0061 05AE 0300 05C4 0315 0062");
ok(hexNFD("00E0 05AE 05C4 0315 0062"),      "0061 05AE 0300 05C4 0315 0062");
ok(hexNFD("0061 05AE 0300 05C4 0315 0062"), "0061 05AE 0300 05C4 0315 0062");
ok(hexNFC("0061 05C4 0315 0300 05AE 0062"), "0061 05AE 05C4 0300 0315 0062");
ok(hexNFC("0061 05AE 05C4 0300 0315 0062"), "0061 05AE 05C4 0300 0315 0062");
ok(hexNFD("0061 05C4 0315 0300 05AE 0062"), "0061 05AE 05C4 0300 0315 0062");
ok(hexNFD("0061 05AE 05C4 0300 0315 0062"), "0061 05AE 05C4 0300 0315 0062");
ok(hexNFC("0000 0041 0000 0000"), "0000 0041 0000 0000");
ok(hexNFD("0000 0041 0000 0000"), "0000 0041 0000 0000");

ok(hexNFC("AC00 11A7"), "AC00 11A7");
ok(hexNFC("AC00 11A8"), "AC01");
ok(hexNFC("AC00 11A9"), "AC02");
ok(hexNFC("AC00 11C2"), "AC1B");
ok(hexNFC("AC00 11C3"), "AC00 11C3");

# Test Cases from Public Review Issue #29: Normalization Issue
# cf. http://www.unicode.org/review/pr-29.html
ok(hexNFC("0B47 0300 0B3E"), "0B47 0300 0B3E");
ok(hexNFC("1100 0300 1161"), "1100 0300 1161");

ok(hexNFC("0B47 0B3E 0300"), "0B4B 0300");
ok(hexNFC("1100 1161 0300"), "AC00 0300");

ok(hexNFC("0B47 0300 0B3E 0327"), "0B47 0300 0B3E 0327");
ok(hexNFC("1100 0300 1161 0327"), "1100 0300 1161 0327");

--- NEW FILE: func.t ---

BEGIN {
    unless ("A" eq pack('U', 0x41)) {
	print "1..0 # Unicode::Normalize " .
	    "cannot stringify a Unicode code point\n";
	exit 0;
    }
}

BEGIN {
    if ($ENV{PERL_CORE}) {
        chdir('t') if -d 't';
        @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
    }
}

#########################

use Test;
use strict;
use warnings;
BEGIN { plan tests => 13 };
use Unicode::Normalize qw(:all);
ok(1); # If we made it this far, we're ok.

sub _pack_U   { Unicode::Normalize::pack_U(@_) }
sub _unpack_U { Unicode::Normalize::unpack_U(@_) }

#########################

print getCombinClass(   0) == 0
   && getCombinClass( 768) == 230
   && getCombinClass(1809) == 36
   && ($] < 5.007003 || getCombinClass(0x1D167) == 1) # Unicode 3.1
  ? "ok" : "not ok", " 2\n";

print ! defined getCanon( 0)
   && ! defined getCanon(41)
   && getCanon(0x00C0) eq _pack_U(0x0041, 0x0300)
   && getCanon(0x00EF) eq _pack_U(0x0069, 0x0308)
   && getCanon(0x304C) eq _pack_U(0x304B, 0x3099)
   && getCanon(0x1EA4) eq _pack_U(0x0041, 0x0302, 0x0301)
   && getCanon(0x1F82) eq _pack_U(0x03B1, 0x0313, 0x0300, 0x0345)
   && getCanon(0x1FAF) eq _pack_U(0x03A9, 0x0314, 0x0342, 0x0345)
   && getCanon(0xAC00) eq _pack_U(0x1100, 0x1161)
   && getCanon(0xAE00) eq _pack_U(0x1100, 0x1173, 0x11AF)
   && ! defined getCanon(0x212C)
   && ! defined getCanon(0x3243)
   && getCanon(0xFA2D) eq _pack_U(0x9DB4)
  ? "ok" : "not ok", " 3\n";

print ! defined getCompat( 0)
   && ! defined getCompat(41)
   && getCompat(0x00C0) eq _pack_U(0x0041, 0x0300)
   && getCompat(0x00EF) eq _pack_U(0x0069, 0x0308)
   && getCompat(0x304C) eq _pack_U(0x304B, 0x3099)
   && getCompat(0x1EA4) eq _pack_U(0x0041, 0x0302, 0x0301)
   && getCompat(0x1F82) eq _pack_U(0x03B1, 0x0313, 0x0300, 0x0345)
   && getCompat(0x1FAF) eq _pack_U(0x03A9, 0x0314, 0x0342, 0x0345)
   && getCompat(0x212C) eq _pack_U(0x0042)
   && getCompat(0x3243) eq _pack_U(0x0028, 0x81F3, 0x0029)
   && getCompat(0xAC00) eq _pack_U(0x1100, 0x1161)
   && getCompat(0xAE00) eq _pack_U(0x1100, 0x1173, 0x11AF)
   && getCompat(0xFA2D) eq _pack_U(0x9DB4)
  ? "ok" : "not ok", " 4\n";

print ! defined getComposite( 0,  0)
   && ! defined getComposite( 0, 41)
   && ! defined getComposite(41,  0)
   && ! defined getComposite(41, 41)
   && ! defined getComposite(12, 0x0300)
   && ! defined getComposite(0x0055, 0xFF00)
   && 0x00C0 == getComposite(0x0041, 0x0300)
   && 0x00D9 == getComposite(0x0055, 0x0300)
   && 0x1E14 == getComposite(0x0112, 0x0300)
   && 0xAC00 == getComposite(0x1100, 0x1161)
   && 0xADF8 == getComposite(0x1100, 0x1173)
   && ! defined getComposite(0x1100, 0x11AF)
   && ! defined getComposite(0x1173, 0x11AF)
   && ! defined getComposite(0xAC00, 0x11A7)
   && 0xAC01 == getComposite(0xAC00, 0x11A8)
   && 0xAE00 == getComposite(0xADF8, 0x11AF)
  ? "ok" : "not ok", " 5\n";

print ! isExclusion( 0)
   && ! isExclusion(41)
   && isExclusion(2392)  # DEVANAGARI LETTER QA
   && isExclusion(3907)  # TIBETAN LETTER GHA
   && isExclusion(64334) # HEBREW LETTER PE WITH RAFE
  ? "ok" : "not ok", " 6\n";

print ! isSingleton( 0)
   && isSingleton(0x212B) # ANGSTROM SIGN
  ? "ok" : "not ok", " 7\n";

print reorder("") eq ""
   && reorder(_pack_U(0x0041, 0x0300, 0x0315, 0x0313, 0x031b, 0x0061))
      eq _pack_U(0x0041, 0x031b, 0x0300, 0x0313, 0x0315, 0x0061)
   && reorder(_pack_U(0x00C1, 0x0300, 0x0315, 0x0313, 0x031b,
	0x0061, 0x309A, 0x3099))
      eq _pack_U(0x00C1, 0x031b, 0x0300, 0x0313, 0x0315,
	0x0061, 0x309A, 0x3099)
  ? "ok" : "not ok", " 8\n";

sub answer { defined $_[0] ? $_[0] ? "YES" : "NO" : "MAYBE" }

print answer(checkNFD(""))  eq "YES"
  &&  answer(checkNFC(""))  eq "YES"
  &&  answer(checkNFKD("")) eq "YES"
  &&  answer(checkNFKC("")) eq "YES"
  &&  answer(check("NFD", "")) eq "YES"
  &&  answer(check("NFC", "")) eq "YES"
  &&  answer(check("NFKD","")) eq "YES"
  &&  answer(check("NFKC","")) eq "YES"
# U+0000 to U+007F are prenormalized in all the normalization forms.
  && answer(checkNFD("AZaz\t12!#`"))  eq "YES"
  && answer(checkNFC("AZaz\t12!#`"))  eq "YES"
  && answer(checkNFKD("AZaz\t12!#`")) eq "YES"
  && answer(checkNFKC("AZaz\t12!#`")) eq "YES"
  && answer(check("D", "AZaz\t12!#`")) eq "YES"
  && answer(check("C", "AZaz\t12!#`")) eq "YES"
  && answer(check("KD","AZaz\t12!#`")) eq "YES"
  && answer(check("KC","AZaz\t12!#`")) eq "YES"
  ? "ok" : "not ok", " 9\n";

print 1
  && answer(checkNFD(NFD(_pack_U(0xC1, 0x1100, 0x1173, 0x11AF)))) eq "YES"
  && answer(checkNFD(_pack_U(0x20, 0xC1, 0x1100, 0x1173, 0x11AF))) eq "NO"
  && answer(checkNFC(_pack_U(0x20, 0xC1, 0x1173, 0x11AF))) eq "MAYBE"
  && answer(checkNFC(_pack_U(0x20, 0xC1, 0xAE00, 0x1100))) eq "YES"
  && answer(checkNFC(_pack_U(0x20, 0xC1, 0xAE00, 0x1100, 0x300))) eq "MAYBE"
  && answer(checkNFC(_pack_U(0x20, 0xC1, 0xFF71, 0x2025))) eq "YES"
  && answer(check("NFC", _pack_U(0x20, 0xC1, 0x212B, 0x300))) eq "NO"
  && answer(checkNFKD(_pack_U(0x20, 0xC1, 0xFF71, 0x2025))) eq "NO"
  && answer(checkNFKC(_pack_U(0x20, 0xC1, 0xAE00, 0x2025))) eq "NO"
  ? "ok" : "not ok", " 10\n";

"012ABC" =~ /(\d+)(\w+)/;
print "012" eq NFC $1 && "ABC" eq NFC $2
  ? "ok" : "not ok", " 11\n";

print "012" eq normalize('C', $1) && "ABC" eq normalize('C', $2)
  ? "ok" : "not ok", " 12\n";

print "012" eq normalize('NFC', $1) && "ABC" eq normalize('NFC', $2)
  ? "ok" : "not ok", " 13\n";
 # s/^NF// in normalize() must not prevent using $1, $&, etc.


--- NEW FILE: test.t ---

BEGIN {
    unless ("A" eq pack('U', 0x41)) {
	print "1..0 # Unicode::Normalize " .
	    "cannot stringify a Unicode code point\n";
	exit 0;
    }
}

BEGIN {
    if ($ENV{PERL_CORE}) {
        chdir('t') if -d 't';
        @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
    }
}

#########################

use Test;
use strict;
use warnings;
BEGIN { plan tests => 31 };
use Unicode::Normalize;
ok(1); # If we made it this far, we're ok.

sub _pack_U   { Unicode::Normalize::pack_U(@_) }
sub _unpack_U { Unicode::Normalize::unpack_U(@_) }

#########################

ok(NFC(""), "");
ok(NFD(""), "");

sub hexNFC {
  join " ", map sprintf("%04X", $_),
  _unpack_U NFC _pack_U map hex, split ' ', shift;
}
sub hexNFD {
  join " ", map sprintf("%04X", $_),
  _unpack_U NFD _pack_U map hex, split ' ', shift;
}

ok(hexNFC("0061 0315 0300 05AE 05C4 0062"), "00E0 05AE 05C4 0315 0062");
ok(hexNFC("00E0 05AE 05C4 0315 0062"),      "00E0 05AE 05C4 0315 0062");
ok(hexNFC("0061 05AE 0300 05C4 0315 0062"), "00E0 05AE 05C4 0315 0062");
ok(hexNFC("0045 0304 0300 AC00 11A8"), "1E14 AC01");
ok(hexNFC("1100 1161 1100 1173 11AF"), "AC00 AE00");
ok(hexNFC("1100 0300 1161 1173 11AF"), "1100 0300 1161 1173 11AF");

ok(hexNFD("0061 0315 0300 05AE 05C4 0062"), "0061 05AE 0300 05C4 0315 0062");
ok(hexNFD("00E0 05AE 05C4 0315 0062"),      "0061 05AE 0300 05C4 0315 0062");
ok(hexNFD("0061 05AE 0300 05C4 0315 0062"), "0061 05AE 0300 05C4 0315 0062");
ok(hexNFC("0061 05C4 0315 0300 05AE 0062"), "0061 05AE 05C4 0300 0315 0062");
ok(hexNFC("0061 05AE 05C4 0300 0315 0062"), "0061 05AE 05C4 0300 0315 0062");
ok(hexNFD("0061 05C4 0315 0300 05AE 0062"), "0061 05AE 05C4 0300 0315 0062");
ok(hexNFD("0061 05AE 05C4 0300 0315 0062"), "0061 05AE 05C4 0300 0315 0062");
ok(hexNFC("0000 0041 0000 0000"), "0000 0041 0000 0000");
ok(hexNFD("0000 0041 0000 0000"), "0000 0041 0000 0000");

ok(hexNFC("AC00 11A7"), "AC00 11A7");
ok(hexNFC("AC00 11A8"), "AC01");
ok(hexNFC("AC00 11A9"), "AC02");
ok(hexNFC("AC00 11C2"), "AC1B");
ok(hexNFC("AC00 11C3"), "AC00 11C3");

# Test Cases from Public Review Issue #29: Normalization Issue
# cf. http://www.unicode.org/review/pr-29.html
ok(hexNFC("0B47 0300 0B3E"), "0B47 0300 0B3E");
ok(hexNFC("1100 0300 1161"), "1100 0300 1161");

ok(hexNFC("0B47 0B3E 0300"), "0B4B 0300");
ok(hexNFC("1100 1161 0300"), "AC00 0300");

ok(hexNFC("0B47 0300 0B3E 0327"), "0B47 0300 0B3E 0327");
ok(hexNFC("1100 0300 1161 0327"), "1100 0300 1161 0327");

# NFC() should be unary.
my $str11 = _pack_U(0x41, 0x0302, 0x0301, 0x62);
my $str12 = _pack_U(0x1EA4, 0x62);
ok(NFC $str11 eq $str12);

# NFD() should be unary.
my $str21 = _pack_U(0xE0, 0xAC00);
my $str22 = _pack_U(0x61, 0x0300, 0x1100, 0x1161);
ok(NFD $str21 eq $str22);





More information about the dslinux-commit mailing list