dslinux/user/perl/lib/Unicode/Collate/t altern.t cjkrange.t contract.t hangtype.t hangul.t ignor.t illegal.t illegalp.t index.t normal.t override.t rearrang.t test.t trailwt.t variable.t version.t view.t
cayenne
dslinux_cayenne at user.in-berlin.de
Tue Dec 5 05:27:22 CET 2006
Update of /cvsroot/dslinux/dslinux/user/perl/lib/Unicode/Collate/t
In directory antilope:/tmp/cvs-serv7729/lib/Unicode/Collate/t
Added Files:
altern.t cjkrange.t contract.t hangtype.t hangul.t ignor.t
illegal.t illegalp.t index.t normal.t override.t rearrang.t
test.t trailwt.t variable.t version.t view.t
Log Message:
Adding fresh perl source to HEAD to branch from
--- NEW FILE: variable.t ---
BEGIN {
unless ("A" eq pack('U', 0x41)) {
print "1..0 # Unicode::Collate " .
"cannot stringify a Unicode code point\n";
exit 0;
}
if ($ENV{PERL_CORE}) {
chdir('t') if -d 't';
@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
}
}
use Test;
BEGIN { plan tests => 37 };
use strict;
use warnings;
use Unicode::Collate;
ok(1);
#########################
sub _pack_U { Unicode::Collate::pack_U(@_) }
sub _unpack_U { Unicode::Collate::unpack_U(@_) }
my $A_acute = _pack_U(0xC1);
my $acute = _pack_U(0x0301);
my $Collator = Unicode::Collate->new(
table => 'keys.txt',
normalization => undef,
);
my %origVar = $Collator->change(variable => 'Blanked');
ok($Collator->lt("death", "de luge"));
ok($Collator->lt("de luge", "de-luge"));
ok($Collator->lt("de-luge", "deluge"));
ok($Collator->lt("deluge", "de\x{2010}luge"));
ok($Collator->lt("deluge", "de Luge"));
$Collator->change(variable => 'Non-ignorable');
ok($Collator->lt("de luge", "de Luge"));
ok($Collator->lt("de Luge", "de-luge"));
ok($Collator->lt("de-Luge", "de\x{2010}luge"));
ok($Collator->lt("de-luge", "death"));
ok($Collator->lt("death", "deluge"));
$Collator->change(variable => 'Shifted');
ok($Collator->lt("death", "de luge"));
ok($Collator->lt("de luge", "de-luge"));
ok($Collator->lt("de-luge", "deluge"));
ok($Collator->lt("deluge", "de Luge"));
ok($Collator->lt("de Luge", "deLuge"));
$Collator->change(variable => 'Shift-Trimmed');
ok($Collator->lt("death", "deluge"));
ok($Collator->lt("deluge", "de luge"));
ok($Collator->lt("de luge", "de-luge"));
ok($Collator->lt("de-luge", "deLuge"));
ok($Collator->lt("deLuge", "de Luge"));
$Collator->change(%origVar);
ok($Collator->{variable}, 'shifted');
##############
# ignorable after variable
# Shifted;
ok($Collator->eq("?\x{300}!\x{301}\x{315}", "?!"));
ok($Collator->eq("?\x{300}A\x{301}", "?$A_acute"));
ok($Collator->eq("?\x{300}", "?"));
ok($Collator->eq("?\x{344}", "?")); # U+0344 has two CEs.
$Collator->change(level => 3);
ok($Collator->eq("\cA", "?"));
$Collator->change(variable => 'blanked', level => 4);
ok($Collator->eq("?\x{300}!\x{301}\x{315}", "?!"));
ok($Collator->eq("?\x{300}A\x{301}", "?$A_acute"));
ok($Collator->eq("?\x{300}", "?"));
ok($Collator->eq("?\x{344}", "?")); # U+0344 has two CEs.
$Collator->change(level => 3);
ok($Collator->eq("\cA", "?"));
$Collator->change(variable => 'Non-ignorable', level => 4);
ok($Collator->lt("?\x{300}", "?!"));
ok($Collator->gt("?\x{300}A$acute", "?$A_acute"));
ok($Collator->gt("?\x{300}", "?"));
ok($Collator->gt("?\x{344}", "?"));
$Collator->change(level => 3);
ok($Collator->lt("\cA", "?"));
$Collator->change(variable => 'Shifted', level => 4);
--- NEW FILE: hangul.t ---
BEGIN {
unless ("A" eq pack('U', 0x41)) {
print "1..0 # Unicode::Collate " .
"cannot stringify a Unicode code point\n";
exit 0;
}
if ($ENV{PERL_CORE}) {
chdir('t') if -d 't';
@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
}
}
use Test;
BEGIN { plan tests => 72 };
use strict;
use warnings;
use Unicode::Collate;
#########################
ok(1);
# a standard collator (3.1.1)
my $Collator = Unicode::Collate->new(
table => 'keys.txt',
normalization => undef,
);
# a collator for hangul sorting,
# cf. http://std.dkuug.dk/JTC1/SC22/WG20/docs/documents.html
# http://std.dkuug.dk/JTC1/SC22/WG20/docs/n1051-hangulsort.pdf
my $hangul = Unicode::Collate->new(
level => 3,
table => undef,
normalization => undef,
entry => <<'ENTRIES',
0061 ; [.0A15.0020.0002] # LATIN SMALL LETTER A
0041 ; [.0A15.0020.0008] # LATIN CAPITAL LETTER A
#1161 ; [.1800.0020.0002] # <comment> initial jungseong A
#1163 ; [.1801.0020.0002] # <comment> initial jungseong YA
1100 ; [.1831.0020.0002] # choseong KIYEOK
1100 1161 ; [.1831.0020.0002][.1800.0020.0002] # G-A
1100 1163 ; [.1831.0020.0002][.1801.0020.0002] # G-YA
1101 ; [.1831.0020.0002][.1831.0020.0002] # choseong SSANGKIYEOK
1101 1161 ; [.1831.0020.0002][.1831.0020.0002][.1800.0020.0002] # GG-A
1101 1163 ; [.1831.0020.0002][.1831.0020.0002][.1801.0020.0002] # GG-YA
1102 ; [.1833.0020.0002] # choseong NIEUN
1102 1161 ; [.1833.0020.0002][.1800.0020.0002] # N-A
1102 1163 ; [.1833.0020.0002][.1801.0020.0002] # N-YA
3042 ; [.1921.0020.000E] # HIRAGANA LETTER A
11A8 ; [.FE10.0020.0002] # jongseong KIYEOK
11A9 ; [.FE10.0020.0002][.FE10.0020.0002] # jongseong SSANGKIYEOK
1161 ; [.FE20.0020.0002] # jungseong A <non-initial>
1163 ; [.FE21.0020.0002] # jungseong YA <non-initial>
ENTRIES
);
ok(ref $hangul, "Unicode::Collate");
my $trailwt = Unicode::Collate->new(
level => 3,
table => undef,
normalization => undef,
hangul_terminator => 16,
entry => <<'ENTRIES', # Term < Jongseong < Jungseong < Choseong
0061 ; [.0A15.0020.0002] # LATIN SMALL LETTER A
0041 ; [.0A15.0020.0008] # LATIN CAPITAL LETTER A
11A8 ; [.1801.0020.0002] # HANGUL JONGSEONG KIYEOK
11A9 ; [.1801.0020.0002][.1801.0020.0002] # HANGUL JONGSEONG SSANGKIYEOK
1161 ; [.1831.0020.0002] # HANGUL JUNGSEONG A
1163 ; [.1832.0020.0002] # HANGUL JUNGSEONG YA
1100 ; [.1861.0020.0002] # HANGUL CHOSEONG KIYEOK
1101 ; [.1861.0020.0002][.1861.0020.0002] # HANGUL CHOSEONG SSANGKIYEOK
1102 ; [.1862.0020.0002] # HANGUL CHOSEONG NIEUN
3042 ; [.1921.0020.000E] # HIRAGANA LETTER A
ENTRIES
);
#########################
# L(simp)L(simp) vs L(comp): /GGA/
ok($Collator->lt("\x{1100}\x{1100}\x{1161}", "\x{1101}\x{1161}"));
ok($hangul ->eq("\x{1100}\x{1100}\x{1161}", "\x{1101}\x{1161}"));
ok($trailwt ->eq("\x{1100}\x{1100}\x{1161}", "\x{1101}\x{1161}"));
# L(simp) vs L(simp)L(simp): /GA/ vs /GGA/
ok($Collator->gt("\x{1100}\x{1161}", "\x{1100}\x{1100}\x{1161}"));
ok($hangul ->lt("\x{1100}\x{1161}", "\x{1100}\x{1100}\x{1161}"));
ok($trailwt ->lt("\x{1100}\x{1161}", "\x{1100}\x{1100}\x{1161}"));
# T(simp)T(simp) vs T(comp): /AGG/
ok($Collator->lt("\x{1161}\x{11A8}\x{11A8}", "\x{1161}\x{11A9}"));
ok($hangul ->eq("\x{1161}\x{11A8}\x{11A8}", "\x{1161}\x{11A9}"));
ok($trailwt ->eq("\x{1161}\x{11A8}\x{11A8}", "\x{1161}\x{11A9}"));
# T(simp) vs T(simp)T(simp): /AG/ vs /AGG/
ok($Collator->lt("\x{1161}\x{11A8}", "\x{1161}\x{11A8}\x{11A8}"));
ok($hangul ->lt("\x{1161}\x{11A8}", "\x{1161}\x{11A8}\x{11A8}"));
ok($trailwt ->lt("\x{1161}\x{11A8}", "\x{1161}\x{11A8}\x{11A8}"));
# LV vs LLV: /GA/ vs /GNA/
ok($Collator->gt("\x{1100}\x{1161}", "\x{1100}\x{1102}\x{1161}"));
ok($hangul ->lt("\x{1100}\x{1161}", "\x{1100}\x{1102}\x{1161}"));
ok($trailwt ->lt("\x{1100}\x{1161}", "\x{1100}\x{1102}\x{1161}"));
# LVX vs LVV: /GAA/ vs /GA/.latinA
ok($Collator->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}A"));
ok($hangul ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}A"));
ok($trailwt ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}A"));
# LVX vs LVV: /GAA/ vs /GA/.hiraganaA
ok($Collator->lt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{3042}"));
ok($hangul ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{3042}"));
ok($trailwt ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{3042}"));
# LVX vs LVV: /GAA/ vs /GA/.hanja
ok($Collator->lt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{4E00}"));
ok($hangul ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{4E00}"));
ok($trailwt ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{4E00}"));
# LVL vs LVT: /GA/./G/ vs /GAG/
ok($Collator->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{11A8}"));
ok($hangul ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{11A8}"));
ok($trailwt ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{11A8}"));
# LVT vs LVX: /GAG/ vs /GA/.latinA
ok($Collator->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}A"));
ok($hangul ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}A"));
ok($trailwt ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}A"));
# LVT vs LVX: /GAG/ vs /GA/.hiraganaA
ok($Collator->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{3042}"));
ok($hangul ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{3042}"));
ok($trailwt ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{3042}"));
# LVT vs LVX: /GAG/ vs /GA/.hanja
ok($Collator->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}"));
ok($hangul ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}"));
ok($trailwt ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}"));
# LVT vs LVV: /GAG/ vs /GAA/
ok($Collator->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{1161}"));
ok($hangul ->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{1161}"));
ok($trailwt ->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{1161}"));
# LVL vs LVV: /GA/./G/ vs /GAA/
ok($Collator->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{1161}"));
ok($hangul ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{1161}"));
ok($trailwt ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{1161}"));
# LV vs Syl(LV): /GA/ vs /[GA]/
ok($Collator->eq("\x{1100}\x{1161}", "\x{AC00}"));
ok($hangul ->eq("\x{1100}\x{1161}", "\x{AC00}"));
ok($trailwt ->eq("\x{1100}\x{1161}", "\x{AC00}"));
# LVT vs Syl(LV)T: /GAG/ vs /[GA]G/
ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}"));
ok($hangul ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}"));
ok($trailwt ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}"));
# LVT vs Syl(LVT): /GAG/ vs /[GAG]/
ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}"));
ok($hangul ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}"));
ok($trailwt ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}"));
# LVTT vs Syl(LVTT): /GAGG/ vs /[GAGG]/
ok($Collator->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}"));
ok($hangul ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}"));
ok($trailwt ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}"));
# LVTT vs Syl(LVT).T: /GAGG/ vs /[GAG]G/
ok($Collator->gt("\x{1100}\x{1161}\x{11A9}", "\x{AC01}\x{11A8}"));
ok($hangul ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC01}\x{11A8}"));
ok($trailwt ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC01}\x{11A8}"));
# LLVT vs L.Syl(LVT): /GGAG/ vs /G[GAG]/
ok($Collator->gt("\x{1101}\x{1161}\x{11A8}", "\x{1100}\x{AC01}"));
ok($hangul ->eq("\x{1101}\x{1161}\x{11A8}", "\x{1100}\x{AC01}"));
ok($trailwt ->eq("\x{1101}\x{1161}\x{11A8}", "\x{1100}\x{AC01}"));
#########################
# checks contraction in LVT:
# weights of these contractions may be non-sense.
my $hangcont = Unicode::Collate->new(
level => 3,
table => undef,
normalization => undef,
entry => <<'ENTRIES',
1100 ; [.1831.0020.0002] # HANGUL CHOSEONG KIYEOK
1101 ; [.1832.0020.0002] # HANGUL CHOSEONG SSANGKIYEOK
1161 ; [.188D.0020.0002] # HANGUL JUNGSEONG A
1162 ; [.188E.0020.0002] # HANGUL JUNGSEONG AE
1163 ; [.188F.0020.0002] # HANGUL JUNGSEONG YA
11A8 ; [.18CF.0020.0002] # HANGUL JONGSEONG KIYEOK
11A9 ; [.18D0.0020.0002] # HANGUL JONGSEONG SSANGKIYEOK
1161 11A9 ; [.0000.0000.0000] # A-GG <contraction>
1100 1163 11A8 ; [.1000.0020.0002] # G-YA-G <contraction> eq. U+AC39
ENTRIES
);
# contracted into VT
ok($Collator->lt("\x{1101}", "\x{1101}\x{1161}\x{11A9}"));
ok($hangcont->eq("\x{1101}", "\x{1101}\x{1161}\x{11A9}"));
# not contracted into LVT but into VT
ok($Collator->lt("\x{1100}", "\x{1100}\x{1161}\x{11A9}"));
ok($hangcont->eq("\x{1100}", "\x{1100}\x{1161}\x{11A9}"));
# contracted into LVT
ok($Collator->gt("\x{1100}\x{1163}\x{11A8}", "\x{1100}"));
ok($hangcont->lt("\x{1100}\x{1163}\x{11A8}", "\x{1100}"));
# LVTT vs Syl(LVTT): /GAGG/ vs /[GAGG]/
ok($Collator->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}"));
ok($hangcont->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}"));
# LVT vs Syl(LVT): /GYAG/ vs /[GYAG]/
ok($Collator->eq("\x{1100}\x{1163}\x{11A8}", "\x{AC39}"));
ok($hangcont->eq("\x{1100}\x{1163}\x{11A8}", "\x{AC39}"));
1;
__END__
--- NEW FILE: rearrang.t ---
BEGIN {
unless ("A" eq pack('U', 0x41)) {
print "1..0 # Unicode::Collate " .
"cannot stringify a Unicode code point\n";
exit 0;
}
if ($ENV{PERL_CORE}) {
chdir('t') if -d 't';
@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
}
}
use Test;
BEGIN { plan tests => 23 };
use strict;
use warnings;
use Unicode::Collate;
ok(1);
#########################
my $Collator = Unicode::Collate->new(
table => 'keys.txt',
normalization => undef,
UCA_Version => 9,
);
# rearrange : 0x0E40..0x0E44, 0x0EC0..0x0EC4 (default)
##### 2..9
my %old_rearrange = $Collator->change(rearrange => undef);
ok($Collator->gt("\x{0E41}A", "\x{0E40}B"));
ok($Collator->gt("A\x{0E41}A", "A\x{0E40}B"));
$Collator->change(rearrange => [ 0x61 ]);
# U+0061, 'a': This is a Unicode value, never a native value.
ok($Collator->gt("ab", "AB")); # as 'ba' > 'AB'
$Collator->change(%old_rearrange);
ok($Collator->lt("ab", "AB"));
ok($Collator->lt("\x{0E40}", "\x{0E41}"));
ok($Collator->lt("\x{0E40}A", "\x{0E41}B"));
ok($Collator->lt("\x{0E41}A", "\x{0E40}B"));
ok($Collator->lt("A\x{0E41}A", "A\x{0E40}B"));
##### 10..13
my $all_undef_8 = Unicode::Collate->new(
table => undef,
normalization => undef,
overrideCJK => undef,
overrideHangul => undef,
UCA_Version => 8,
);
ok($all_undef_8->lt("\x{0E40}", "\x{0E41}"));
ok($all_undef_8->lt("\x{0E40}A", "\x{0E41}B"));
ok($all_undef_8->lt("\x{0E41}A", "\x{0E40}B"));
ok($all_undef_8->lt("A\x{0E41}A", "A\x{0E40}B"));
##### 14..18
my $no_rearrange = Unicode::Collate->new(
table => undef,
normalization => undef,
rearrange => [],
UCA_Version => 9,
);
ok($no_rearrange->lt("A", "B"));
ok($no_rearrange->lt("\x{0E40}", "\x{0E41}"));
ok($no_rearrange->lt("\x{0E40}A", "\x{0E41}B"));
ok($no_rearrange->gt("\x{0E41}A", "\x{0E40}B"));
ok($no_rearrange->gt("A\x{0E41}A", "A\x{0E40}B"));
##### 19..23
my $undef_rearrange = Unicode::Collate->new(
table => undef,
normalization => undef,
rearrange => undef,
UCA_Version => 9,
);
ok($undef_rearrange->lt("A", "B"));
ok($undef_rearrange->lt("\x{0E40}", "\x{0E41}"));
ok($undef_rearrange->lt("\x{0E40}A", "\x{0E41}B"));
ok($undef_rearrange->gt("\x{0E41}A", "\x{0E40}B"));
ok($undef_rearrange->gt("A\x{0E41}A", "A\x{0E40}B"));
--- NEW FILE: hangtype.t ---
BEGIN {
unless ("A" eq pack('U', 0x41)) {
print "1..0 # Unicode::Collate " .
"cannot stringify a Unicode code point\n";
exit 0;
}
if ($ENV{PERL_CORE}) {
chdir('t') if -d 't';
@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
}
}
use Test;
BEGIN { plan tests => 33 };
use strict;
use warnings;
use Unicode::Collate;
ok(1);
#########################
ok(Unicode::Collate::getHST(0x0000), '');
ok(Unicode::Collate::getHST(0x0100), '');
ok(Unicode::Collate::getHST(0x1000), '');
ok(Unicode::Collate::getHST(0x10FF), '');
ok(Unicode::Collate::getHST(0x1100), 'L');
ok(Unicode::Collate::getHST(0x1101), 'L');
ok(Unicode::Collate::getHST(0x1159), 'L');
ok(Unicode::Collate::getHST(0x115A), '');
ok(Unicode::Collate::getHST(0x115E), '');
ok(Unicode::Collate::getHST(0x115F), 'L');
ok(Unicode::Collate::getHST(0x1160), 'V');
ok(Unicode::Collate::getHST(0x1161), 'V');
ok(Unicode::Collate::getHST(0x11A0), 'V');
ok(Unicode::Collate::getHST(0x11A2), 'V');
ok(Unicode::Collate::getHST(0x11A3), '');
ok(Unicode::Collate::getHST(0x11A7), '');
ok(Unicode::Collate::getHST(0x11A8), 'T');
ok(Unicode::Collate::getHST(0x11AF), 'T');
ok(Unicode::Collate::getHST(0x11E0), 'T');
ok(Unicode::Collate::getHST(0x11F9), 'T');
ok(Unicode::Collate::getHST(0x11FA), '');
ok(Unicode::Collate::getHST(0x11FF), '');
ok(Unicode::Collate::getHST(0x3011), '');
ok(Unicode::Collate::getHST(0x11A7), '');
ok(Unicode::Collate::getHST(0xABFF), '');
ok(Unicode::Collate::getHST(0xAC00), 'LV');
ok(Unicode::Collate::getHST(0xAC01), 'LVT');
ok(Unicode::Collate::getHST(0xAC1B), 'LVT');
ok(Unicode::Collate::getHST(0xAC1C), 'LV');
ok(Unicode::Collate::getHST(0xD7A3), 'LVT');
ok(Unicode::Collate::getHST(0xD7A4), '');
ok(Unicode::Collate::getHST(0xFFFF), '');
--- NEW FILE: contract.t ---
BEGIN {
unless ("A" eq pack('U', 0x41)) {
print "1..0 # Unicode::Collate " .
"cannot stringify a Unicode code point\n";
exit 0;
}
if ($ENV{PERL_CORE}) {
chdir('t') if -d 't';
@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
}
}
use Test;
BEGIN { plan tests => 40 };
use strict;
use warnings;
use Unicode::Collate;
our $kjeEntry = <<'ENTRIES';
0301 ; [.0000.0032.0002.0301] # COMBINING ACUTE ACCENT
0334 ; [.0000.008B.0002.0334] # COMBINING TILDE OVERLAY
043A ; [.0D31.0020.0002.043A] # CYRILLIC SMALL LETTER KA
041A ; [.0D31.0020.0008.041A] # CYRILLIC CAPITAL LETTER KA
045C ; [.0DA1.0020.0002.045C] # CYRILLIC SMALL LETTER KJE
043A 0301 ; [.0DA1.0020.0002.045C] # CYRILLIC SMALL LETTER KJE
040C ; [.0DA1.0020.0008.040C] # CYRILLIC CAPITAL LETTER KJE
041A 0301 ; [.0DA1.0020.0008.040C] # CYRILLIC CAPITAL LETTER KJE
ENTRIES
our $aaEntry = <<'ENTRIES';
0304 ; [.0000.005A.0002.0304] # COMBINING MACRON (cc = 230)
030A ; [.0000.0043.0002.030A] # COMBINING RING ABOVE (cc = 230)
0327 ; [.0000.0055.0002.0327] # COMBINING CEDILLA (cc = 202)
031A ; [.0000.006B.0002.031A] # COMBINING LEFT ANGLE ABOVE (cc = 232)
0061 ; [.0A15.0020.0002.0061] # LATIN SMALL LETTER A
0041 ; [.0A15.0020.0008.0041] # LATIN CAPITAL LETTER A
007A ; [.0C13.0020.0002.007A] # LATIN SMALL LETTER Z
005A ; [.0C13.0020.0008.005A] # LATIN CAPITAL LETTER Z
00E5 ; [.0C25.0020.0002.00E5] # LATIN SMALL LETTER A WITH RING ABOVE; QQCM
00C5 ; [.0C25.0020.0008.00C5] # LATIN CAPITAL LETTER A WITH RING ABOVE; QQCM
0061 030A ; [.0C25.0020.0002.0061] # LATIN SMALL LETTER A WITH RING ABOVE
0041 030A ; [.0C25.0020.0008.0041] # LATIN CAPITAL LETTER A WITH RING ABOVE
ENTRIES
#########################
ok(1);
my $kjeNoN = Unicode::Collate->new(
level => 1,
table => undef,
normalization => undef,
entry => $kjeEntry,
);
ok($kjeNoN->lt("\x{043A}", "\x{043A}\x{0301}"));
ok($kjeNoN->gt("\x{045C}", "\x{043A}\x{0334}\x{0301}"));
ok($kjeNoN->eq("\x{043A}", "\x{043A}\x{0334}\x{0301}"));
ok($kjeNoN->eq("\x{045C}", "\x{043A}\x{0301}\x{0334}"));
our %sortkeys;
$sortkeys{'KAac'} = $kjeNoN->viewSortKey("\x{043A}\x{0301}");
$sortkeys{'KAta'} = $kjeNoN->viewSortKey("\x{043A}\x{0334}\x{0301}");
$sortkeys{'KAat'} = $kjeNoN->viewSortKey("\x{043A}\x{0301}\x{0334}");
eval { require Unicode::Normalize };
if (!$@) {
my $kjeNFD = Unicode::Collate->new(
level => 1,
table => undef,
entry => $kjeEntry,
);
ok($kjeNFD->lt("\x{043A}", "\x{043A}\x{0301}"));
ok($kjeNFD->eq("\x{045C}", "\x{043A}\x{0334}\x{0301}"));
ok($kjeNFD->lt("\x{043A}", "\x{043A}\x{0334}\x{0301}"));
ok($kjeNFD->eq("\x{045C}", "\x{043A}\x{0301}\x{0334}"));
my $aaNFD = Unicode::Collate->new(
level => 1,
table => undef,
entry => $aaEntry,
);
ok($aaNFD->lt("Z", "A\x{30A}\x{304}"));
ok($aaNFD->eq("A", "A\x{304}\x{30A}"));
ok($aaNFD->eq(pack('U', 0xE5), "A\x{30A}\x{304}"));
ok($aaNFD->eq("A\x{304}", "A\x{304}\x{30A}"));
ok($aaNFD->lt("Z", "A\x{327}\x{30A}"));
ok($aaNFD->lt("Z", "A\x{30A}\x{327}"));
ok($aaNFD->lt("Z", "A\x{31A}\x{30A}"));
ok($aaNFD->lt("Z", "A\x{30A}\x{31A}"));
my $aaPre = Unicode::Collate->new(
level => 1,
normalization => "prenormalized",
table => undef,
entry => $aaEntry,
);
ok($aaPre->lt("Z", "A\x{30A}\x{304}"));
ok($aaPre->eq("A", "A\x{304}\x{30A}"));
ok($aaPre->eq(pack('U', 0xE5), "A\x{30A}\x{304}"));
ok($aaPre->eq("A\x{304}", "A\x{304}\x{30A}"));
ok($aaPre->lt("Z", "A\x{327}\x{30A}"));
ok($aaPre->lt("Z", "A\x{30A}\x{327}"));
ok($aaPre->lt("Z", "A\x{31A}\x{30A}"));
ok($aaPre->lt("Z", "A\x{30A}\x{31A}"));
}
else {
ok(1) for 1..20;
}
# again: loading Unicode::Normalize should not affect $kjeNoN.
ok($kjeNoN->lt("\x{043A}", "\x{043A}\x{0301}"));
ok($kjeNoN->gt("\x{045C}", "\x{043A}\x{0334}\x{0301}"));
ok($kjeNoN->eq("\x{043A}", "\x{043A}\x{0334}\x{0301}"));
ok($kjeNoN->eq("\x{045C}", "\x{043A}\x{0301}\x{0334}"));
ok($sortkeys{'KAac'}, $kjeNoN->viewSortKey("\x{043A}\x{0301}"));
ok($sortkeys{'KAta'}, $kjeNoN->viewSortKey("\x{043A}\x{0334}\x{0301}"));
ok($sortkeys{'KAat'}, $kjeNoN->viewSortKey("\x{043A}\x{0301}\x{0334}"));
my $aaNoN = Unicode::Collate->new(
level => 1,
table => undef,
entry => $aaEntry,
normalization => undef,
);
ok($aaNoN->lt("Z", "A\x{30A}\x{304}"));
ok($aaNoN->eq("A", "A\x{304}\x{30A}"));
ok($aaNoN->eq(pack('U', 0xE5), "A\x{30A}\x{304}"));
ok($aaNoN->eq("A\x{304}", "A\x{304}\x{30A}"));
ok($aaNoN->eq("A", "A\x{327}\x{30A}"));
ok($aaNoN->lt("Z", "A\x{30A}\x{327}"));
ok($aaNoN->eq("A", "A\x{31A}\x{30A}"));
ok($aaNoN->lt("Z", "A\x{30A}\x{31A}"));
--- NEW FILE: version.t ---
BEGIN {
unless ("A" eq pack('U', 0x41)) {
print "1..0 # Unicode::Collate " .
"cannot stringify a Unicode code point\n";
exit 0;
}
if ($ENV{PERL_CORE}) {
chdir('t') if -d 't';
@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
}
}
use Test;
BEGIN { plan tests => 17 };
use strict;
use warnings;
use Unicode::Collate;
ok(1);
#########################
# Fix me when UCA and/or keys.txt is upgraded.
my $UCA_Version = "14";
my $Base_Unicode_Version = "4.1.0";
my $Key_Version = "3.1.1";
ok(Unicode::Collate::UCA_Version, $UCA_Version);
ok(Unicode::Collate->UCA_Version, $UCA_Version);
ok(Unicode::Collate::Base_Unicode_Version, $Base_Unicode_Version);
ok(Unicode::Collate->Base_Unicode_Version, $Base_Unicode_Version);
my $Collator = Unicode::Collate->new(
table => 'keys.txt',
normalization => undef,
);
ok($Collator->UCA_Version, $UCA_Version);
ok($Collator->UCA_Version(), $UCA_Version);
ok($Collator->Base_Unicode_Version, $Base_Unicode_Version);
ok($Collator->Base_Unicode_Version(), $Base_Unicode_Version);
ok($Collator->version, $Key_Version);
ok($Collator->version(), $Key_Version);
my $UndefTable = Unicode::Collate->new(
table => undef,
normalization => undef,
);
ok($UndefTable->UCA_Version, $UCA_Version);
ok($UndefTable->UCA_Version(), $UCA_Version);
ok($UndefTable->Base_Unicode_Version, $Base_Unicode_Version);
ok($UndefTable->Base_Unicode_Version(), $Base_Unicode_Version);
ok($UndefTable->version, "unknown");
ok($UndefTable->version(), "unknown");
--- NEW FILE: trailwt.t ---
BEGIN {
unless ("A" eq pack('U', 0x41)) {
print "1..0 # Unicode::Collate " .
"cannot stringify a Unicode code point\n";
exit 0;
}
if ($ENV{PERL_CORE}) {
chdir('t') if -d 't';
@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
}
}
use Test;
BEGIN { plan tests => 58 };
use strict;
use warnings;
use Unicode::Collate;
#########################
ok(1);
# a standard collator (3.1.1)
my $Collator = Unicode::Collate->new(
level => 1,
table => 'keys.txt',
normalization => undef,
entry => <<'ENTRIES',
326E ; [.1831.0020.0006.326E][.188D.0020.0006.326E] # c.h.s. GA
326F ; [.1833.0020.0006.326F][.188D.0020.0006.326F] # c.h.s. NA
3270 ; [.1834.0020.0006.3270][.188D.0020.0006.3270] # c.h.s. DA
3271 ; [.1836.0020.0006.3271][.188D.0020.0006.3271] # c.h.s. RA
3272 ; [.1837.0020.0006.3272][.188D.0020.0006.3272] # c.h.s. MA
3273 ; [.1838.0020.0006.3273][.188D.0020.0006.3273] # c.h.s. BA
3274 ; [.183A.0020.0006.3274][.188D.0020.0006.3274] # c.h.s. SA
3275 ; [.183C.0020.0006.3275][.188D.0020.0006.3275] # c.h.s. A
3276 ; [.183D.0020.0006.3276][.188D.0020.0006.3276] # c.h.s. JA
3277 ; [.183F.0020.0006.3277][.188D.0020.0006.3277] # c.h.s. CA
3278 ; [.1840.0020.0006.3278][.188D.0020.0006.3278] # c.h.s. KA
3279 ; [.1841.0020.0006.3279][.188D.0020.0006.3279] # c.h.s. TA
327A ; [.1842.0020.0006.327A][.188D.0020.0006.327A] # c.h.s. PA
327B ; [.1843.0020.0006.327B][.188D.0020.0006.327B] # c.h.s. HA
ENTRIES
);
my $hangul = Unicode::Collate->new(
level => 1,
table => 'keys.txt',
normalization => undef,
hangul_terminator => 16,
entry => <<'ENTRIES',
326E ; [.1831.0020.0006.326E][.188D.0020.0006.326E] # c.h.s. GA
326F ; [.1833.0020.0006.326F][.188D.0020.0006.326F] # c.h.s. NA
3270 ; [.1834.0020.0006.3270][.188D.0020.0006.3270] # c.h.s. DA
3271 ; [.1836.0020.0006.3271][.188D.0020.0006.3271] # c.h.s. RA
3272 ; [.1837.0020.0006.3272][.188D.0020.0006.3272] # c.h.s. MA
3273 ; [.1838.0020.0006.3273][.188D.0020.0006.3273] # c.h.s. BA
3274 ; [.183A.0020.0006.3274][.188D.0020.0006.3274] # c.h.s. SA
3275 ; [.183C.0020.0006.3275][.188D.0020.0006.3275] # c.h.s. A
3276 ; [.183D.0020.0006.3276][.188D.0020.0006.3276] # c.h.s. JA
3277 ; [.183F.0020.0006.3277][.188D.0020.0006.3277] # c.h.s. CA
3278 ; [.1840.0020.0006.3278][.188D.0020.0006.3278] # c.h.s. KA
3279 ; [.1841.0020.0006.3279][.188D.0020.0006.3279] # c.h.s. TA
327A ; [.1842.0020.0006.327A][.188D.0020.0006.327A] # c.h.s. PA
327B ; [.1843.0020.0006.327B][.188D.0020.0006.327B] # c.h.s. HA
ENTRIES
);
ok(ref $hangul, "Unicode::Collate");
#########################
# LVX vs LVV: /GAA/ vs /GA/.latinA
ok($Collator->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}A"));
ok($hangul ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}A"));
# LVX vs LVV: /GAA/ vs /GA/.hiraganaA
ok($Collator->lt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{3042}"));
ok($hangul ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{3042}"));
# LVX vs LVV: /GAA/ vs /GA/.hanja
ok($Collator->lt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{4E00}"));
ok($hangul ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{4E00}"));
# LVL vs LVT: /GA/./G/ vs /GAG/
ok($Collator->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{11A8}"));
ok($hangul ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{11A8}"));
# LVT vs LVX: /GAG/ vs /GA/.latinA
ok($Collator->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}A"));
ok($hangul ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}A"));
# LVT vs LVX: /GAG/ vs /GA/.hiraganaA
ok($Collator->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{3042}"));
ok($hangul ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{3042}"));
# LVT vs LVX: /GAG/ vs /GA/.hanja
ok($Collator->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}"));
ok($hangul ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}"));
# LV vs Syl(LV): /GA/ vs /[GA]/
ok($Collator->eq("\x{1100}\x{1161}", "\x{AC00}"));
ok($hangul ->eq("\x{1100}\x{1161}", "\x{AC00}"));
# LVT vs Syl(LV)T: /GAG/ vs /[GA]G/
ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}"));
ok($hangul ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}"));
# LVT vs Syl(LVT): /GAG/ vs /[GAG]/
ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}"));
ok($hangul ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}"));
# LVTT vs Syl(LVTT): /GAGG/ vs /[GAGG]/
ok($Collator->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}"));
ok($hangul ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}"));
# Syl(LVT) vs : /GAG/ vs /[GAG]/
ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}"));
ok($hangul ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}"));
#########################
my $hangcirc = Unicode::Collate->new(
level => 1,
table => 'keys.txt',
normalization => undef,
hangul_terminator => 16,
entry => <<'ENTRIES',
326E ; [.1831.0020.0006.326E][.188D.0020.0006.326E][.10.0.0.0] # c.h.s. GA
326F ; [.1833.0020.0006.326F][.188D.0020.0006.326F][.10.0.0.0] # c.h.s. NA
3270 ; [.1834.0020.0006.3270][.188D.0020.0006.3270][.10.0.0.0] # c.h.s. DA
3271 ; [.1836.0020.0006.3271][.188D.0020.0006.3271][.10.0.0.0] # c.h.s. RA
3272 ; [.1837.0020.0006.3272][.188D.0020.0006.3272][.10.0.0.0] # c.h.s. MA
3273 ; [.1838.0020.0006.3273][.188D.0020.0006.3273][.10.0.0.0] # c.h.s. BA
3274 ; [.183A.0020.0006.3274][.188D.0020.0006.3274][.10.0.0.0] # c.h.s. SA
3275 ; [.183C.0020.0006.3275][.188D.0020.0006.3275][.10.0.0.0] # c.h.s. A
3276 ; [.183D.0020.0006.3276][.188D.0020.0006.3276][.10.0.0.0] # c.h.s. JA
3277 ; [.183F.0020.0006.3277][.188D.0020.0006.3277][.10.0.0.0] # c.h.s. CA
3278 ; [.1840.0020.0006.3278][.188D.0020.0006.3278][.10.0.0.0] # c.h.s. KA
3279 ; [.1841.0020.0006.3279][.188D.0020.0006.3279][.10.0.0.0] # c.h.s. TA
327A ; [.1842.0020.0006.327A][.188D.0020.0006.327A][.10.0.0.0] # c.h.s. PA
327B ; [.1843.0020.0006.327B][.188D.0020.0006.327B][.10.0.0.0] # c.h.s. HA
ENTRIES
);
# LV vs Circled Syl(LV): /GA/ vs /(GA)/
ok($Collator->eq("\x{1100}\x{1161}", "\x{326E}"));
ok($hangul ->gt("\x{1100}\x{1161}", "\x{326E}"));
ok($hangcirc->eq("\x{1100}\x{1161}", "\x{326E}"));
# LV vs Circled Syl(LV): followed by latin A
ok($Collator->eq("\x{1100}\x{1161}A", "\x{326E}A"));
ok($hangul ->lt("\x{1100}\x{1161}A", "\x{326E}A"));
ok($hangcirc->eq("\x{1100}\x{1161}A", "\x{326E}A"));
# LV vs Circled Syl(LV): followed by hiragana A
ok($Collator->eq("\x{1100}\x{1161}\x{3042}", "\x{326E}\x{3042}"));
ok($hangul ->lt("\x{1100}\x{1161}\x{3042}", "\x{326E}\x{3042}"));
ok($hangcirc->eq("\x{1100}\x{1161}\x{3042}", "\x{326E}\x{3042}"));
# LVT vs LVX: /GAG/ vs /GA/.hanja
ok($Collator->eq("\x{1100}\x{1161}\x{4E00}", "\x{326E}\x{4E00}"));
ok($hangul ->lt("\x{1100}\x{1161}\x{4E00}", "\x{326E}\x{4E00}"));
ok($hangcirc->eq("\x{1100}\x{1161}\x{4E00}", "\x{326E}\x{4E00}"));
#########################
# checks contraction in LVT:
# weights of these contractions may be non-sense.
my $hangcont = Unicode::Collate->new(
level => 1,
table => 'keys.txt',
normalization => undef,
hangul_terminator => 16,
entry => <<'ENTRIES',
1100 1161 ; [.1831.0020.0002.1100][.188D.0020.0002.1161] # KIYEOK+A
1161 11A8 ; [.188D.0020.0002.1161][.18CF.0020.0002.11A8] # A+KIYEOK
ENTRIES
);
# cont<LV> vs Syl(LV): /<GA>/ vs /[GA]/
ok($Collator->eq("\x{1100}\x{1161}", "\x{AC00}"));
ok($hangcont->eq("\x{1100}\x{1161}", "\x{AC00}"));
# cont<LV>.T vs Syl(LV).T: /<GA>G/ vs /[GA]G/
ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}"));
ok($hangcont->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}"));
# cont<LV>.T vs Syl(LVT): /<GA>G/ vs /[GAG]/
ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}"));
ok($hangcont->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}"));
# L.cont<VT> vs Syl(LV).T: /D<AG>/ vs /[DA]G/
ok($Collator->eq("\x{1103}\x{1161}\x{11A8}", "\x{B2E4}\x{11A8}"));
ok($hangcont->eq("\x{1103}\x{1161}\x{11A8}", "\x{B2E4}\x{11A8}"));
# L.cont<VT> vs Syl(LVT): /D<AG>/ vs /[DAG]/
ok($Collator->eq("\x{1103}\x{1161}\x{11A8}", "\x{B2E5}"));
ok($hangcont->eq("\x{1103}\x{1161}\x{11A8}", "\x{B2E5}"));
#####
$Collator->change(hangul_terminator => 16);
ok($Collator->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}"));
ok($Collator->gt("\x{1100}\x{1161}", "\x{326E}"));
ok($Collator->lt("\x{1100}\x{1161}A", "\x{326E}A"));
ok($Collator->lt("\x{1100}\x{1161}\x{3042}", "\x{326E}\x{3042}"));
ok($Collator->lt("\x{1100}\x{1161}\x{4E00}", "\x{326E}\x{4E00}"));
$Collator->change(hangul_terminator => 0);
ok($Collator->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}"));
ok($Collator->eq("\x{1100}\x{1161}", "\x{326E}"));
ok($Collator->eq("\x{1100}\x{1161}A", "\x{326E}A"));
ok($Collator->eq("\x{1100}\x{1161}\x{3042}", "\x{326E}\x{3042}"));
ok($Collator->eq("\x{1100}\x{1161}\x{4E00}", "\x{326E}\x{4E00}"));
1;
__END__
--- NEW FILE: view.t ---
BEGIN {
unless ("A" eq pack('U', 0x41)) {
print "1..0 # Unicode::Collate " .
"cannot stringify a Unicode code point\n";
exit 0;
}
if ($ENV{PERL_CORE}) {
chdir('t') if -d 't';
@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
}
}
use Test;
BEGIN { plan tests => 53 };
use strict;
use warnings;
use Unicode::Collate;
#########################
ok(1);
my $Collator = Unicode::Collate->new(
table => 'keys.txt',
normalization => undef,
);
##############
ok($Collator->viewSortKey(""), "[| | |]");
ok($Collator->viewSortKey("A"), "[0A15 | 0020 | 0008 | FFFF]");
ok($Collator->viewSortKey("ABC"),
"[0A15 0A29 0A3D | 0020 0020 0020 | 0008 0008 0008 | FFFF FFFF FFFF]");
ok($Collator->viewSortKey("(12)"),
"[0A0C 0A0D | 0020 0020 | 0002 0002 | 027A FFFF FFFF 027B]");
ok($Collator->viewSortKey("!\x{300}"), "[| | | 024B]");
ok($Collator->viewSortKey("\x{300}"), "[| 0035 | 0002 | FFFF]");
$Collator->change(level => 3);
ok($Collator->viewSortKey("A"), "[0A15 | 0020 | 0008 |]");
$Collator->change(level => 2);
ok($Collator->viewSortKey("A"), "[0A15 | 0020 | |]");
$Collator->change(level => 1);
ok($Collator->viewSortKey("A"), "[0A15 | | |]");
### Version 8
$Collator->change(level => 4, UCA_Version => 8);
ok($Collator->viewSortKey(""), "[|||]");
ok($Collator->viewSortKey("A"), "[0A15|0020|0008|FFFF]");
ok($Collator->viewSortKey("ABC"),
"[0A15 0A29 0A3D|0020 0020 0020|0008 0008 0008|FFFF FFFF FFFF]");
ok($Collator->viewSortKey("(12)"),
"[0A0C 0A0D|0020 0020|0002 0002|027A FFFF FFFF 027B]");
ok($Collator->viewSortKey("!\x{300}"), "[|0035|0002|024B FFFF]");
ok($Collator->viewSortKey("\x{300}"), "[|0035|0002|FFFF]");
$Collator->change(level => 3);
ok($Collator->viewSortKey("A"), "[0A15|0020|0008|]");
$Collator->change(level => 2);
ok($Collator->viewSortKey("A"), "[0A15|0020||]");
$Collator->change(level => 1);
ok($Collator->viewSortKey("A"), "[0A15|||]");
# Version 9
$Collator->change(level => 3, UCA_Version => 9);
ok($Collator->viewSortKey("A\x{300}z\x{301}"),
"[0A15 0C13 | 0020 0035 0020 0032 | 0008 0002 0002 0002 |]");
$Collator->change(backwards => 1);
ok($Collator->viewSortKey("A\x{300}z\x{301}"),
"[0C13 0A15 | 0020 0035 0020 0032 | 0008 0002 0002 0002 |]");
$Collator->change(backwards => 2);
ok($Collator->viewSortKey("A\x{300}z\x{301}"),
"[0A15 0C13 | 0032 0020 0035 0020 | 0008 0002 0002 0002 |]");
$Collator->change(backwards => [1,3]);
ok($Collator->viewSortKey("A\x{300}z\x{301}"),
"[0C13 0A15 | 0020 0035 0020 0032 | 0002 0002 0002 0008 |]");
$Collator->change(backwards => [2]);
ok($Collator->viewSortKey("\x{300}\x{301}\x{302}\x{303}"),
"[| 004E 003C 0032 0035 | 0002 0002 0002 0002 |]");
$Collator->change(backwards => []);
ok($Collator->viewSortKey("A\x{300}z\x{301}"),
"[0A15 0C13 | 0020 0035 0020 0032 | 0008 0002 0002 0002 |]");
$Collator->change(level => 4);
# Variable
our %origVar = $Collator->change(variable => 'Blanked');
ok($Collator->viewSortKey("1+2"),
'[0A0C 0A0D | 0020 0020 | 0002 0002 | 0031 002B 0032]');
ok($Collator->viewSortKey("?\x{300}!\x{301}\x{315}."),
'[| | | 003F 0021 002E]');
ok($Collator->viewSortKey("?!."), '[| | | 003F 0021 002E]');
$Collator->change(variable => 'Non-ignorable');
ok($Collator->viewSortKey("1+2"),
'[0A0C 039F 0A0D | 0020 0020 0020 | 0002 0002 0002 | 0031 002B 0032]');
ok($Collator->viewSortKey("?\x{300}!"),
'[024E 024B | 0020 0035 0020 | 0002 0002 0002 | 003F 0300 0021]');
ok($Collator->viewSortKey("?!."),
'[024E 024B 0255 | 0020 0020 0020 | 0002 0002 0002 | 003F 0021 002E]');
$Collator->change(variable => 'Shifted');
ok($Collator->viewSortKey("1+2"),
'[0A0C 0A0D | 0020 0020 | 0002 0002 | FFFF 039F FFFF]');
ok($Collator->viewSortKey("?\x{300}!\x{301}\x{315}."),
'[| | | 024E 024B 0255]');
ok($Collator->viewSortKey("?!."), '[| | | 024E 024B 0255]');
$Collator->change(variable => 'Shift-Trimmed');
ok($Collator->viewSortKey("1+2"),
'[0A0C 0A0D | 0020 0020 | 0002 0002 | 039F]');
ok($Collator->viewSortKey("?\x{300}!\x{301}\x{315}."),
'[| | | 024E 024B 0255]');
ok($Collator->viewSortKey("?!."), '[| | | 024E 024B 0255]');
$Collator->change(%origVar);
#####
# Level 3 weight
ok($Collator->viewSortKey("a\x{3042}"),
'[0A15 1921 | 0020 0020 | 0002 000E | FFFF FFFF]');
ok($Collator->viewSortKey("A\x{30A2}"),
'[0A15 1921 | 0020 0020 | 0008 0011 | FFFF FFFF]');
$Collator->change(upper_before_lower => 1);
ok($Collator->viewSortKey("a\x{3042}"),
'[0A15 1921 | 0020 0020 | 0008 000E | FFFF FFFF]');
ok($Collator->viewSortKey("A\x{30A2}"),
'[0A15 1921 | 0020 0020 | 0002 0011 | FFFF FFFF]');
$Collator->change(katakana_before_hiragana => 1);
ok($Collator->viewSortKey("a\x{3042}"),
'[0A15 1921 | 0020 0020 | 0008 0013 | FFFF FFFF]');
ok($Collator->viewSortKey("A\x{30A2}"),
'[0A15 1921 | 0020 0020 | 0002 000F | FFFF FFFF]');
$Collator->change(upper_before_lower => 0);
ok($Collator->viewSortKey("a\x{3042}"),
'[0A15 1921 | 0020 0020 | 0002 0013 | FFFF FFFF]');
ok($Collator->viewSortKey("A\x{30A2}"),
'[0A15 1921 | 0020 0020 | 0008 000F | FFFF FFFF]');
$Collator->change(katakana_before_hiragana => 0);
ok($Collator->viewSortKey("a\x{3042}"),
'[0A15 1921 | 0020 0020 | 0002 000E | FFFF FFFF]');
ok($Collator->viewSortKey("A\x{30A2}"),
'[0A15 1921 | 0020 0020 | 0008 0011 | FFFF FFFF]');
#####
our $el = Unicode::Collate->new(
entry => <<'ENTRY',
006C ; [.0B03.0020.0002.006C] # LATIN SMALL LETTER L
FF4C ; [.0B03.0020.0003.FF4C] # FULLWIDTH LATIN SMALL LETTER L; QQK
217C ; [.0B03.0020.0004.217C] # SMALL ROMAN NUMERAL FIFTY; QQK
2113 ; [.0B03.0020.0005.2113] # SCRIPT SMALL L; QQK
24DB ; [.0B03.0020.0006.24DB] # CIRCLED LATIN SMALL LETTER L; QQK
004C ; [.0B03.0020.0008.004C] # LATIN CAPITAL LETTER L
FF2C ; [.0B03.0020.0009.FF2C] # FULLWIDTH LATIN CAPITAL LETTER L; QQK
216C ; [.0B03.0020.000A.216C] # ROMAN NUMERAL FIFTY; QQK
2112 ; [.0B03.0020.000B.2112] # SCRIPT CAPITAL L; QQK
24C1 ; [.0B03.0020.000C.24C1] # CIRCLED LATIN CAPITAL LETTER L; QQK
ENTRY
table => undef,
normalization => undef,
);
our $el12 = '0B03 0B03 0B03 0B03 0B03 | 0020 0020 0020 0020 0020';
ok($el->viewSortKey("l\x{FF4C}\x{217C}\x{2113}\x{24DB}"),
"[$el12 | 0002 0003 0004 0005 0006 | FFFF FFFF FFFF FFFF FFFF]");
ok($el->viewSortKey("L\x{FF2C}\x{216C}\x{2112}\x{24C1}"),
"[$el12 | 0008 0009 000A 000B 000C | FFFF FFFF FFFF FFFF FFFF]");
$el->change(upper_before_lower => 1);
ok($el->viewSortKey("l\x{FF4C}\x{217C}\x{2113}\x{24DB}"),
"[$el12 | 0008 0009 000A 000B 000C | FFFF FFFF FFFF FFFF FFFF]");
ok($el->viewSortKey("L\x{FF2C}\x{216C}\x{2112}\x{24C1}"),
"[$el12 | 0002 0003 0004 0005 0006 | FFFF FFFF FFFF FFFF FFFF]");
$el->change(upper_before_lower => 0);
ok($el->viewSortKey("l\x{FF4C}\x{217C}\x{2113}\x{24DB}"),
"[$el12 | 0002 0003 0004 0005 0006 | FFFF FFFF FFFF FFFF FFFF]");
ok($el->viewSortKey("L\x{FF2C}\x{216C}\x{2112}\x{24C1}"),
"[$el12 | 0008 0009 000A 000B 000C | FFFF FFFF FFFF FFFF FFFF]");
#####
--- NEW FILE: illegal.t ---
BEGIN {
unless ("A" eq pack('U', 0x41)) {
print "1..0 # Unicode::Collate " .
"cannot stringify a Unicode code point\n";
exit 0;
}
if ($ENV{PERL_CORE}) {
chdir('t') if -d 't';
@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
}
}
use Test;
use strict;
use warnings;
BEGIN {
use Unicode::Collate;
unless (exists &Unicode::Collate::bootstrap or 5.008 <= $]) {
print "1..0 # skipped: XSUB, or Perl 5.8.0 or later".
" needed for this test\n";
print $@;
exit;
}
}
BEGIN { plan tests => 40 };
ok(1);
#########################
no warnings 'utf8';
# NULL is tailorable but illegal code points are not.
# illegal code points should be always ingored
# (cf. UCA, 7.1.1 Illegal code points).
my $illeg = Unicode::Collate->new(
entry => <<'ENTRIES',
0000 ; [.0020.0000.0000.0000] # [0000] NULL
0001 ; [.0021.0000.0000.0001] # [0001] START OF HEADING
FFFE ; [.0022.0000.0000.FFFE] # <noncharacter-FFFE> (invalid)
FFFF ; [.0023.0000.0000.FFFF] # <noncharacter-FFFF> (invalid)
D800 ; [.0024.0000.0000.D800] # <surrogate-D800> (invalid)
DFFF ; [.0025.0000.0000.DFFF] # <surrogate-DFFF> (invalid)
FDD0 ; [.0026.0000.0000.FDD0] # <noncharacter-FDD0> (invalid)
FDEF ; [.0027.0000.0000.FDEF] # <noncharacter-FDEF> (invalid)
0002 ; [.0030.0000.0000.0002] # [0002] START OF TEXT
10FFFF; [.0040.0000.0000.10FFFF] # <noncharacter-10FFFF> (invalid)
110000; [.0041.0000.0000.110000] # <out-of-range 110000> (invalid)
0041 ; [.1000.0020.0008.0041] # latin A
0041 0000 ; [.1100.0020.0008.0041] # latin A + NULL
0041 FFFF ; [.1200.0020.0008.0041] # latin A + FFFF (invalid)
ENTRIES
level => 1,
table => undef,
normalization => undef,
);
# 2..12
ok($illeg->lt("", "\x00"));
ok($illeg->lt("", "\x01"));
ok($illeg->eq("", "\x{FFFE}"));
ok($illeg->eq("", "\x{FFFF}"));
ok($illeg->eq("", "\x{D800}"));
ok($illeg->eq("", "\x{DFFF}"));
ok($illeg->eq("", "\x{FDD0}"));
ok($illeg->eq("", "\x{FDEF}"));
ok($illeg->lt("", "\x02"));
ok($illeg->eq("", "\x{10FFFF}"));
ok($illeg->eq("", "\x{110000}"));
# 13..22
ok($illeg->lt("\x00", "\x01"));
ok($illeg->lt("\x01", "\x02"));
ok($illeg->ne("\0", "\x{D800}"));
ok($illeg->ne("\0", "\x{DFFF}"));
ok($illeg->ne("\0", "\x{FDD0}"));
ok($illeg->ne("\0", "\x{FDEF}"));
ok($illeg->ne("\0", "\x{FFFE}"));
ok($illeg->ne("\0", "\x{FFFF}"));
ok($illeg->ne("\0", "\x{10FFFF}"));
ok($illeg->ne("\0", "\x{110000}"));
# 23..26
ok($illeg->eq("A", "A\x{FFFF}"));
ok($illeg->gt("A\0", "A\x{FFFF}"));
ok($illeg->lt("A", "A\0"));
ok($illeg->lt("AA", "A\0"));
##################
my($match, $str, $sub, $ret);
my $Collator = Unicode::Collate->new(
table => 'keys.txt',
level => 1,
normalization => undef,
);
$sub = "pe";
$str = "Pe\x{300}\x{301}rl";
$ret = "Pe\x{300}\x{301}";
($match) = $Collator->match($str, $sub);
ok($match, $ret);
$str = "Pe\x{300}\0\0\x{301}rl";
$ret = "Pe\x{300}\0\0\x{301}";
($match) = $Collator->match($str, $sub);
ok($match, $ret);
$str = "Pe\x{DA00}\x{301}\x{DFFF}rl";
$ret = "Pe\x{DA00}\x{301}\x{DFFF}";
($match) = $Collator->match($str, $sub);
ok($match, $ret);
$str = "Pe\x{FFFF}\x{301}rl";
$ret = "Pe\x{FFFF}\x{301}";
($match) = $Collator->match($str, $sub);
ok($match, $ret);
$str = "Pe\x{110000}\x{301}rl";
$ret = "Pe\x{110000}\x{301}";
($match) = $Collator->match($str, $sub);
ok($match, $ret);
$str = "Pe\x{300}\x{d801}\x{301}rl";
$ret = "Pe\x{300}\x{d801}\x{301}";
($match) = $Collator->match($str, $sub);
ok($match, $ret);
$str = "Pe\x{300}\x{ffff}\x{301}rl";
$ret = "Pe\x{300}\x{ffff}\x{301}";
($match) = $Collator->match($str, $sub);
ok($match, $ret);
$str = "Pe\x{300}\x{110000}\x{301}rl";
$ret = "Pe\x{300}\x{110000}\x{301}";
($match) = $Collator->match($str, $sub);
ok($match, $ret);
$str = "Pe\x{D9ab}\x{DFFF}rl";
$ret = "Pe\x{D9ab}\x{DFFF}";
($match) = $Collator->match($str, $sub);
ok($match, $ret);
$str = "Pe\x{FFFF}rl";
$ret = "Pe\x{FFFF}";
($match) = $Collator->match($str, $sub);
ok($match, $ret);
$str = "Pe\x{110000}rl";
$ret = "Pe\x{110000}";
($match) = $Collator->match($str, $sub);
ok($match, $ret);
$str = "Pe\x{300}\x{D800}\x{DFFF}rl";
$ret = "Pe\x{300}\x{D800}\x{DFFF}";
($match) = $Collator->match($str, $sub);
ok($match, $ret);
$str = "Pe\x{300}\x{FFFF}rl";
$ret = "Pe\x{300}\x{FFFF}";
($match) = $Collator->match($str, $sub);
ok($match, $ret);
$str = "Pe\x{300}\x{110000}rl";
$ret = "Pe\x{300}\x{110000}";
($match) = $Collator->match($str, $sub);
ok($match, $ret);
--- NEW FILE: index.t ---
BEGIN {
unless ("A" eq pack('U', 0x41)) {
print "1..0 # Unicode::Collate " .
"cannot stringify a Unicode code point\n";
exit 0;
}
if ($ENV{PERL_CORE}) {
chdir('t') if -d 't';
@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
}
}
use Test;
BEGIN { plan tests => 65 };
use strict;
use warnings;
use Unicode::Collate;
our $IsEBCDIC = ord("A") != 0x41;
#########################
ok(1);
my $Collator = Unicode::Collate->new(
table => 'keys.txt',
normalization => undef,
);
##############
my %old_level = $Collator->change(level => 2);
my $str;
my $orig = "This is a Perl book.";
my $sub = "PERL";
my $rep = "camel";
my $ret = "This is a camel book.";
$str = $orig;
if (my($pos,$len) = $Collator->index($str, $sub)) {
substr($str, $pos, $len, $rep);
}
ok($str, $ret);
$Collator->change(%old_level);
$str = $orig;
if (my($pos,$len) = $Collator->index($str, $sub)) {
substr($str, $pos, $len, $rep);
}
ok($str, $orig);
##############
my $match;
$Collator->change(level => 1);
$str = "Pe\x{300}rl";
$sub = "pe";
$ret = "Pe\x{300}";
$match = undef;
if (my($pos, $len) = $Collator->index($str, $sub)) {
$match = substr($str, $pos, $len);
}
ok($match, $ret);
$str = "P\x{300}e\x{300}\x{301}\x{303}rl";
$sub = "pE";
$ret = "P\x{300}e\x{300}\x{301}\x{303}";
$match = undef;
if (my($pos, $len) = $Collator->index($str, $sub)) {
$match = substr($str, $pos, $len);
}
ok($match, $ret);
$Collator->change(level => 2);
$str = "Pe\x{300}rl";
$sub = "pe";
$ret = undef;
$match = undef;
if (my($pos, $len) = $Collator->index($str, $sub)) {
$match = substr($str, $pos, $len);
}
ok($match, $ret);
$str = "P\x{300}e\x{300}\x{301}\x{303}rl";
$sub = "pE";
$ret = undef;
$match = undef;
if (my($pos, $len) = $Collator->index($str, $sub)) {
$match = substr($str, $pos, $len);
}
ok($match, $ret);
$str = "Pe\x{300}rl";
$sub = "pe\x{300}";
$ret = "Pe\x{300}";
$match = undef;
if (my($pos, $len) = $Collator->index($str, $sub)) {
$match = substr($str, $pos, $len);
}
ok($match, $ret);
$str = "P\x{300}e\x{300}\x{301}\x{303}rl";
$sub = "p\x{300}E\x{300}\x{301}\x{303}";
$ret = "P\x{300}e\x{300}\x{301}\x{303}";
$match = undef;
if (my($pos, $len) = $Collator->index($str, $sub)) {
$match = substr($str, $pos, $len);
}
ok($match, $ret);
##############
$Collator->change(level => 1);
$str = $IsEBCDIC
? "Ich mu\x{0059} studieren Perl."
: "Ich mu\x{00DF} studieren Perl.";
$sub = $IsEBCDIC
? "m\x{00DC}ss"
: "m\x{00FC}ss";
$ret = $IsEBCDIC
? "mu\x{0059}"
: "mu\x{00DF}";
$match = undef;
if (my($pos, $len) = $Collator->index($str, $sub)) {
$match = substr($str, $pos, $len);
}
ok($match, $ret);
$Collator->change(%old_level);
$match = undef;
if (my($pos, $len) = $Collator->index($str, $sub)) {
$match = substr($str, $pos, $len);
}
ok($match, undef);
$match = undef;
if (my($pos,$len) = $Collator->index("", "")) {
$match = substr("", $pos, $len);
}
ok($match, "");
$match = undef;
if (my($pos,$len) = $Collator->index("", "abc")) {
$match = substr("", $pos, $len);
}
ok($match, undef);
##############
$Collator->change(level => 1);
$str = "\0\cA\0\cAe\0\x{300}\cA\x{301}\cB\x{302}\0 \0\cA";
$sub = "e";
$ret = "e\0\x{300}\cA\x{301}\cB\x{302}\0";
$match = undef;
if (my($pos, $len) = $Collator->index($str, $sub)) {
$match = substr($str, $pos, $len);
}
ok($match, $ret);
$Collator->change(level => 1);
$str = "\0\cA\0\cAe\0\cA\x{300}\0\cAe";
$sub = "e";
$ret = "e\0\cA\x{300}\0\cA";
$match = undef;
if (my($pos, $len) = $Collator->index($str, $sub)) {
$match = substr($str, $pos, $len);
}
ok($match, $ret);
$Collator->change(%old_level);
$str = "e\x{300}";
$sub = "e";
$ret = undef;
$match = undef;
if (my($pos, $len) = $Collator->index($str, $sub)) {
$match = substr($str, $pos, $len);
}
ok($match, $ret);
##############
$Collator->change(level => 1);
$str = "The Perl is a language, and the perl is an interpreter.";
$sub = "PERL";
$match = undef;
if (my($pos, $len) = $Collator->index($str, $sub, -40)) {
$match = substr($str, $pos, $len);
}
ok($match, "Perl");
$match = undef;
if (my($pos, $len) = $Collator->index($str, $sub, 4)) {
$match = substr($str, $pos, $len);
}
ok($match, "Perl");
$match = undef;
if (my($pos, $len) = $Collator->index($str, $sub, 5)) {
$match = substr($str, $pos, $len);
}
ok($match, "perl");
$match = undef;
if (my($pos, $len) = $Collator->index($str, $sub, 32)) {
$match = substr($str, $pos, $len);
}
ok($match, "perl");
$match = undef;
if (my($pos, $len) = $Collator->index($str, $sub, 33)) {
$match = substr($str, $pos, $len);
}
ok($match, undef);
$match = undef;
if (my($pos, $len) = $Collator->index($str, $sub, 100)) {
$match = substr($str, $pos, $len);
}
ok($match, undef);
$Collator->change(%old_level);
##############
my @ret;
$Collator->change(level => 1);
$ret = $Collator->match("P\cBe\x{300}\cBrl and PERL", "pe");
ok($ret);
ok($$ret eq "P\cBe\x{300}\cB");
@ret = $Collator->match("P\cBe\x{300}\cBrl and PERL", "pe");
ok($ret[0], "P\cBe\x{300}\cB");
$str = $IsEBCDIC ? "mu\x{0059}" : "mu\x{00DF}";
$sub = $IsEBCDIC ? "m\x{00DC}ss" : "m\x{00FC}ss";
($ret) = $Collator->match($str, $sub);
ok($ret, $str);
$str = $IsEBCDIC ? "mu\x{0059}" : "mu\x{00DF}";
$sub = $IsEBCDIC ? "m\x{00DC}s" : "m\x{00FC}s";
($ret) = $Collator->match($str, $sub);
ok($ret, undef);
$ret = join ':', $Collator->gmatch("P\cBe\x{300}\cBrl, perl, and PERL", "pe");
ok($ret eq "P\cBe\x{300}\cB:pe:PE");
$ret = $Collator->gmatch("P\cBe\x{300}\cBrl, perl, and PERL", "pe");
ok($ret == 3);
$str = "ABCDEF";
$sub = "cde";
$ret = $Collator->match($str, $sub);
$str = "01234567";
ok($ret && $$ret, "CDE");
$str = "ABCDEF";
$sub = "cde";
($ret) = $Collator->match($str, $sub);
$str = "01234567";
ok($ret, "CDE");
$Collator->change(level => 3);
$ret = $Collator->match("P\cBe\x{300}\cBrl and PERL", "pe");
ok($ret, undef);
@ret = $Collator->match("P\cBe\x{300}\cBrl and PERL", "pe");
ok(@ret == 0);
$ret = join ':', $Collator->gmatch("P\cBe\x{300}\cBrl and PERL", "pe");
ok($ret eq "");
$ret = $Collator->gmatch("P\cBe\x{300}\cBrl and PERL", "pe");
ok($ret == 0);
$ret = join ':', $Collator->gmatch("P\cBe\x{300}\cBrl, perl, and PERL", "pe");
ok($ret eq "pe");
$ret = $Collator->gmatch("P\cBe\x{300}\cBrl, perl, and PERL", "pe");
ok($ret == 1);
$str = $IsEBCDIC ? "mu\x{0059}" : "mu\x{00DF}";
$sub = $IsEBCDIC ? "m\x{00DC}ss" : "m\x{00FC}ss";
($ret) = $Collator->match($str, $sub);
ok($ret, undef);
$Collator->change(%old_level);
##############
$Collator->change(level => 1);
sub strreverse { scalar reverse shift }
$str = "P\cBe\x{300}\cBrl and PERL.";
$ret = $Collator->subst($str, "perl", 'Camel');
ok($ret, 1);
ok($str, "Camel and PERL.");
$str = "P\cBe\x{300}\cBrl and PERL.";
$ret = $Collator->subst($str, "perl", \&strreverse);
ok($ret, 1);
ok($str, "lr\cB\x{300}e\cBP and PERL.");
$str = "P\cBe\x{300}\cBrl and PERL.";
$ret = $Collator->gsubst($str, "perl", 'Camel');
ok($ret, 2);
ok($str, "Camel and Camel.");
$str = "P\cBe\x{300}\cBrl and PERL.";
$ret = $Collator->gsubst($str, "perl", \&strreverse);
ok($ret, 2);
ok($str, "lr\cB\x{300}e\cBP and LREP.");
$str = "Camel donkey zebra came\x{301}l CAMEL horse cAm\0E\0L...";
$Collator->gsubst($str, "camel", sub { "<b>$_[0]</b>" });
ok($str, "<b>Camel</b> donkey zebra <b>came\x{301}l</b> "
. "<b>CAMEL</b> horse <b>cAm\0E\0L</b>...");
$Collator->change(level => 3);
$str = "P\cBe\x{300}\cBrl and PERL.";
$ret = $Collator->subst($str, "perl", "Camel");
ok(! $ret);
ok($str, "P\cBe\x{300}\cBrl and PERL.");
$str = "P\cBe\x{300}\cBrl and PERL.";
$ret = $Collator->subst($str, "perl", \&strreverse);
ok(! $ret);
ok($str, "P\cBe\x{300}\cBrl and PERL.");
$str = "P\cBe\x{300}\cBrl and PERL.";
$ret = $Collator->gsubst($str, "perl", "Camel");
ok($ret, 0);
ok($str, "P\cBe\x{300}\cBrl and PERL.");
$str = "P\cBe\x{300}\cBrl and PERL.";
$ret = $Collator->gsubst($str, "perl", \&strreverse);
ok($ret, 0);
ok($str, "P\cBe\x{300}\cBrl and PERL.");
$Collator->change(%old_level);
##############
$str = "Perl and Camel";
$ret = $Collator->gsubst($str, "\cA\cA\0", "AB");
ok($ret, 15);
ok($str, "ABPABeABrABlAB ABaABnABdAB ABCABaABmABeABlAB");
$str = '';
$ret = $Collator->subst($str, "", "ABC");
ok($ret, 1);
ok($str, "ABC");
$str = '';
$ret = $Collator->gsubst($str, "", "ABC");
ok($ret, 1);
ok($str, "ABC");
$str = 'PPPPP';
$ret = $Collator->gsubst($str, 'PP', "ABC");
ok($ret, 2);
ok($str, "ABCABCP");
##############
# Shifted; ignorable after variable
($ret) = $Collator->match("A?\x{300}!\x{301}\x{344}B\x{315}", "?!");
ok($ret, "?\x{300}!\x{301}\x{344}");
$Collator->change(alternate => 'Non-ignorable');
($ret) = $Collator->match("A?\x{300}!\x{301}B\x{315}", "?!");
ok($ret, undef);
--- NEW FILE: cjkrange.t ---
BEGIN {
unless ("A" eq pack('U', 0x41)) {
print "1..0 # Unicode::Collate " .
"cannot stringify a Unicode code point\n";
exit 0;
}
if ($ENV{PERL_CORE}) {
chdir('t') if -d 't';
@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
}
}
use Test;
BEGIN { plan tests => 51 };
use strict;
use warnings;
use Unicode::Collate;
ok(1);
my $Collator = Unicode::Collate->new(
table => 'keys.txt',
normalization => undef,
);
# U+9FA6..U+9FBB are CJK UI since Unicode 4.1.0.
# U+3400 is CJK UI ExtA, then greater than any CJK UI.
##### 2..11
ok($Collator->lt("\x{9FA5}", "\x{3400}")); # UI < ExtA
ok($Collator->lt("\x{9FA6}", "\x{3400}")); # new UI < ExtA
ok($Collator->lt("\x{9FBB}", "\x{3400}")); # new UI < ExtA
ok($Collator->gt("\x{9FBC}", "\x{3400}")); # Unassigned > ExtA
ok($Collator->gt("\x{9FFF}", "\x{3400}")); # Unassigned > ExtA
ok($Collator->lt("\x{9FA6}", "\x{9FBB}")); # new UI > new UI
ok($Collator->lt("\x{3400}","\x{20000}")); # ExtA < ExtB
ok($Collator->lt("\x{3400}","\x{2A6D6}")); # ExtA < ExtB
ok($Collator->gt("\x{9FFF}","\x{20000}")); # Unassigned > ExtB
ok($Collator->gt("\x{9FFF}","\x{2A6D6}")); # Unassigned > ExtB
##### 12..21
$Collator->change(UCA_Version => 11);
ok($Collator->lt("\x{9FA5}", "\x{3400}")); # UI < ExtA
ok($Collator->gt("\x{9FA6}", "\x{3400}")); # Unassigned > ExtA
ok($Collator->gt("\x{9FBB}", "\x{3400}")); # Unassigned > ExtA
ok($Collator->gt("\x{9FBC}", "\x{3400}")); # Unassigned > ExtA
ok($Collator->gt("\x{9FFF}", "\x{3400}")); # Unassigned > ExtA
ok($Collator->lt("\x{9FA6}", "\x{9FBB}")); # Unassigned > Unassigned
ok($Collator->lt("\x{3400}","\x{20000}")); # ExtA < ExtB
ok($Collator->lt("\x{3400}","\x{2A6D6}")); # ExtA < ExtB
ok($Collator->gt("\x{9FFF}","\x{20000}")); # Unassigned > ExtB
ok($Collator->gt("\x{9FFF}","\x{2A6D6}")); # Unassigned > ExtB
##### 22..31
$Collator->change(UCA_Version => 9);
ok($Collator->lt("\x{9FA5}", "\x{3400}")); # UI < ExtA
ok($Collator->gt("\x{9FA6}", "\x{3400}")); # Unassigned > ExtA
ok($Collator->gt("\x{9FBB}", "\x{3400}")); # Unassigned > ExtA
ok($Collator->gt("\x{9FBC}", "\x{3400}")); # Unassigned > ExtA
ok($Collator->gt("\x{9FFF}", "\x{3400}")); # Unassigned > ExtA
ok($Collator->lt("\x{9FA6}", "\x{9FBB}")); # Unassigned > Unassigned
ok($Collator->lt("\x{3400}","\x{20000}")); # ExtA < ExtB
ok($Collator->lt("\x{3400}","\x{2A6D6}")); # ExtA < ExtB
ok($Collator->gt("\x{9FFF}","\x{20000}")); # Unassigned > ExtB
ok($Collator->gt("\x{9FFF}","\x{2A6D6}")); # Unassigned > ExtB
##### 32..41
$Collator->change(UCA_Version => 8);
ok($Collator->gt("\x{9FA5}", "\x{3400}")); # UI > ExtA
ok($Collator->gt("\x{9FA6}", "\x{3400}")); # Unassigned > ExtA
ok($Collator->gt("\x{9FBB}", "\x{3400}")); # Unassigned > ExtA
ok($Collator->gt("\x{9FBC}", "\x{3400}")); # Unassigned > ExtA
ok($Collator->gt("\x{9FFF}", "\x{3400}")); # Unassigned > ExtA
ok($Collator->lt("\x{9FA6}", "\x{9FBB}")); # new UI > new UI
ok($Collator->lt("\x{3400}","\x{20000}")); # ExtA < Unassigned(ExtB)
ok($Collator->lt("\x{3400}","\x{2A6D6}")); # ExtA < Unassigned(ExtB)
ok($Collator->lt("\x{9FFF}","\x{20000}")); # Unassigned < Unassigned(ExtB)
ok($Collator->lt("\x{9FFF}","\x{2A6D6}")); # Unassigned < Unassigned(ExtB)
##### 42..51
$Collator->change(UCA_Version => 14);
ok($Collator->lt("\x{9FA5}", "\x{3400}")); # UI < ExtA
ok($Collator->lt("\x{9FA6}", "\x{3400}")); # new UI < ExtA
ok($Collator->lt("\x{9FBB}", "\x{3400}")); # new UI < ExtA
ok($Collator->gt("\x{9FBC}", "\x{3400}")); # Unassigned > ExtA
ok($Collator->gt("\x{9FFF}", "\x{3400}")); # Unassigned > ExtA
ok($Collator->lt("\x{9FA6}", "\x{9FBB}")); # new UI > new UI
ok($Collator->lt("\x{3400}","\x{20000}")); # ExtA < ExtB
ok($Collator->lt("\x{3400}","\x{2A6D6}")); # ExtA < ExtB
ok($Collator->gt("\x{9FFF}","\x{20000}")); # Unassigned > ExtB
ok($Collator->gt("\x{9FFF}","\x{2A6D6}")); # Unassigned > ExtB
--- NEW FILE: override.t ---
BEGIN {
unless ("A" eq pack('U', 0x41)) {
print "1..0 # Unicode::Collate " .
"cannot stringify a Unicode code point\n";
exit 0;
}
if ($ENV{PERL_CORE}) {
chdir('t') if -d 't';
@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
}
}
use Test;
BEGIN { plan tests => 76 };
use strict;
use warnings;
use Unicode::Collate;
ok(1);
##### 2..6
my $all_undef_8 = Unicode::Collate->new(
table => undef,
normalization => undef,
overrideCJK => undef,
overrideHangul => undef,
UCA_Version => 8,
);
# All in the Unicode code point order.
# No hangul decomposition.
ok($all_undef_8->lt("\x{3402}", "\x{4E00}"));
ok($all_undef_8->lt("\x{4DFF}", "\x{4E00}"));
ok($all_undef_8->lt("\x{4E00}", "\x{AC00}"));
ok($all_undef_8->gt("\x{AC00}", "\x{1100}\x{1161}"));
ok($all_undef_8->gt("\x{AC00}", "\x{ABFF}"));
##### 7..11
my $all_undef_9 = Unicode::Collate->new(
table => undef,
normalization => undef,
overrideCJK => undef,
overrideHangul => undef,
UCA_Version => 9,
);
# CJK Ideo. < CJK ext A/B < Others.
# No hangul decomposition.
ok($all_undef_9->lt("\x{4E00}", "\x{3402}"));
ok($all_undef_9->lt("\x{3402}", "\x{20000}"));
ok($all_undef_9->lt("\x{20000}", "\x{AC00}"));
ok($all_undef_9->gt("\x{AC00}", "\x{1100}\x{1161}"));
ok($all_undef_9->gt("\x{AC00}", "\x{ABFF}")); # U+ABFF: not assigned
##### 12..16
my $ignoreHangul = Unicode::Collate->new(
table => undef,
normalization => undef,
overrideHangul => sub {()},
entry => <<'ENTRIES',
AE00 ; [.0100.0020.0002.AE00] # Hangul GEUL
ENTRIES
);
# All Hangul Syllables except U+AE00 are ignored.
ok($ignoreHangul->eq("\x{AC00}", ""));
ok($ignoreHangul->lt("\x{AC00}", "\0"));
ok($ignoreHangul->lt("\x{AC00}", "\x{AE00}"));
ok($ignoreHangul->lt("\x{AC00}", "\x{1100}\x{1161}")); # Jamo are not ignored.
ok($ignoreHangul->lt("Pe\x{AE00}rl", "Perl")); # 'r' is unassigned.
my $ignoreCJK = Unicode::Collate->new(
table => undef,
normalization => undef,
overrideCJK => sub {()},
entry => <<'ENTRIES',
5B57 ; [.0107.0020.0002.5B57] # CJK Ideograph "Letter"
ENTRIES
);
# All CJK Unified Ideographs except U+5B57 are ignored.
##### 17..21
ok($ignoreCJK->eq("\x{4E00}", ""));
ok($ignoreCJK->lt("\x{4E00}", "\0"));
ok($ignoreCJK->eq("Pe\x{4E00}rl", "Perl")); # U+4E00 is a CJK.
ok($ignoreCJK->gt("\x{4DFF}", "\x{4E00}")); # U+4DFF is not CJK.
ok($ignoreCJK->lt("Pe\x{5B57}rl", "Perl")); # 'r' is unassigned.
##### 22..29
ok($ignoreCJK->eq("\x{3400}", ""));
ok($ignoreCJK->eq("\x{4DB5}", ""));
ok($ignoreCJK->eq("\x{9FA5}", ""));
ok($ignoreCJK->eq("\x{9FA6}", "")); # UI since Unicode 4.1.0
ok($ignoreCJK->eq("\x{9FBB}", "")); # UI since Unicode 4.1.0
ok($ignoreCJK->gt("\x{9FBC}", "Perl"));
ok($ignoreCJK->eq("\x{20000}", ""));
ok($ignoreCJK->eq("\x{2A6D6}", ""));
##### 30..37
$ignoreCJK->change(UCA_Version => 9);
ok($ignoreCJK->eq("\x{3400}", ""));
ok($ignoreCJK->eq("\x{4DB5}", ""));
ok($ignoreCJK->eq("\x{9FA5}", ""));
ok($ignoreCJK->gt("\x{9FA6}", "Perl"));
ok($ignoreCJK->gt("\x{9FBB}", "Perl"));
ok($ignoreCJK->gt("\x{9FBC}", "Perl"));
ok($ignoreCJK->eq("\x{20000}", ""));
ok($ignoreCJK->eq("\x{2A6D6}", ""));
##### 38..45
$ignoreCJK->change(UCA_Version => 8);
ok($ignoreCJK->eq("\x{3400}", ""));
ok($ignoreCJK->eq("\x{4DB5}", ""));
ok($ignoreCJK->eq("\x{9FA5}", ""));
ok($ignoreCJK->gt("\x{9FA6}", "Perl"));
ok($ignoreCJK->gt("\x{9FBB}", "Perl"));
ok($ignoreCJK->gt("\x{9FBC}", "Perl"));
ok($ignoreCJK->eq("\x{20000}", ""));
ok($ignoreCJK->eq("\x{2A6D6}", ""));
##### 46..53
$ignoreCJK->change(UCA_Version => 14);
ok($ignoreCJK->eq("\x{3400}", ""));
ok($ignoreCJK->eq("\x{4DB5}", ""));
ok($ignoreCJK->eq("\x{9FA5}", ""));
ok($ignoreCJK->eq("\x{9FA6}", "")); # UI since Unicode 4.1.0
ok($ignoreCJK->eq("\x{9FBB}", "")); # UI since Unicode 4.1.0
ok($ignoreCJK->gt("\x{9FBC}", "Perl"));
ok($ignoreCJK->eq("\x{20000}", ""));
ok($ignoreCJK->eq("\x{2A6D6}", ""));
##### 54..76
my $overCJK = Unicode::Collate->new(
table => undef,
normalization => undef,
entry => <<'ENTRIES',
0061 ; [.0101.0020.0002.0061] # latin a
0041 ; [.0101.0020.0008.0041] # LATIN A
4E00 ; [.B1FC.0030.0004.4E00] # Ideograph; B1FC = FFFF - 4E03.
ENTRIES
overrideCJK => sub {
my $u = 0xFFFF - $_[0]; # reversed
[$u, 0x20, 0x2, $u];
},
);
ok($overCJK->lt("a", "A")); # diff. at level 3.
ok($overCJK->lt( "\x{4E03}", "\x{4E00}")); # diff. at level 2.
ok($overCJK->lt("A\x{4E03}", "A\x{4E00}"));
ok($overCJK->lt("A\x{4E03}", "a\x{4E00}"));
ok($overCJK->lt("a\x{4E03}", "A\x{4E00}"));
ok($overCJK->gt("a\x{3400}", "A\x{4DB5}"));
ok($overCJK->gt("a\x{4DB5}", "A\x{9FA5}"));
ok($overCJK->gt("a\x{9FA5}", "A\x{9FA6}"));
ok($overCJK->gt("a\x{9FA6}", "A\x{9FBB}"));
ok($overCJK->lt("a\x{9FBB}", "A\x{9FBC}"));
ok($overCJK->lt("a\x{9FBC}", "A\x{9FBF}"));
$overCJK->change(UCA_Version => 9);
ok($overCJK->gt("a\x{3400}", "A\x{4DB5}"));
ok($overCJK->gt("a\x{4DB5}", "A\x{9FA5}"));
ok($overCJK->lt("a\x{9FA5}", "A\x{9FA6}"));
ok($overCJK->lt("a\x{9FA6}", "A\x{9FBB}"));
ok($overCJK->lt("a\x{9FBB}", "A\x{9FBC}"));
ok($overCJK->lt("a\x{9FBC}", "A\x{9FBF}"));
$overCJK->change(UCA_Version => 14);
ok($overCJK->gt("a\x{3400}", "A\x{4DB5}"));
ok($overCJK->gt("a\x{4DB5}", "A\x{9FA5}"));
ok($overCJK->gt("a\x{9FA5}", "A\x{9FA6}"));
ok($overCJK->gt("a\x{9FA6}", "A\x{9FBB}"));
ok($overCJK->lt("a\x{9FBB}", "A\x{9FBC}"));
ok($overCJK->lt("a\x{9FBC}", "A\x{9FBF}"));
--- NEW FILE: ignor.t ---
BEGIN {
unless ("A" eq pack('U', 0x41)) {
print "1..0 # Unicode::Collate " .
"cannot stringify a Unicode code point\n";
exit 0;
}
if ($ENV{PERL_CORE}) {
chdir('t') if -d 't';
@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
}
}
use Test;
BEGIN { plan tests => 41 };
use strict;
use warnings;
use Unicode::Collate;
ok(1);
my $trad = Unicode::Collate->new(
table => 'keys.txt',
normalization => undef,
ignoreName => qr/HANGUL|HIRAGANA|KATAKANA|BOPOMOFO/,
level => 3,
entry => << 'ENTRIES',
0063 0068 ; [.0A3F.0020.0002.0063] % "ch" in traditional Spanish
0043 0068 ; [.0A3F.0020.0007.0043] # "Ch" in traditional Spanish
0043 0048 ; [.0A3F.0020.0008.0043] # "CH" in traditional Spanish
ENTRIES
);
# 0063 ; [.0A3D.0020.0002.0063] # LATIN SMALL LETTER C
# 0064 ; [.0A49.0020.0002.0064] # LATIN SMALL LETTER D
##### 2..3
ok(
join(':', $trad->sort( qw/ acha aca ada acia acka / ) ),
join(':', qw/ aca acia acka acha ada / ),
);
ok(
join(':', $trad->sort( qw/ ACHA ACA ADA ACIA ACKA / ) ),
join(':', qw/ ACA ACIA ACKA ACHA ADA / ),
);
##### 4..7
ok($trad->gt("ocho", "oc\cAho")); # UCA v14
ok($trad->gt("ocho", "oc\0\cA\0\cBho")); # UCA v14
ok($trad->eq("-", ""));
ok($trad->gt("ocho", "oc-ho"));
##### 8..11
$trad->change(UCA_Version => 9);
ok($trad->eq("ocho", "oc\cAho")); # UCA v9
ok($trad->eq("ocho", "oc\0\cA\0\cBho")); # UCA v9
ok($trad->eq("-", ""));
ok($trad->gt("ocho", "oc-ho"));
##### 12..15
$trad->change(UCA_Version => 8);
ok($trad->gt("ocho", "oc\cAho"));
ok($trad->gt("ocho", "oc\0\cA\0\cBho"));
ok($trad->eq("-", ""));
ok($trad->gt("ocho", "oc-ho"));
##### 16..19
$trad->change(UCA_Version => 9);
my $hiragana = "\x{3042}\x{3044}";
my $katakana = "\x{30A2}\x{30A4}";
# HIRAGANA and KATAKANA are ignorable via ignoreName
ok($trad->eq($hiragana, ""));
ok($trad->eq("", $katakana));
ok($trad->eq($hiragana, $katakana));
ok($trad->eq($katakana, $hiragana));
##### 20..31
# According to Conformance Test (UCA_Version == 9 or 11),
# a L3-ignorable is treated as a completely ignorable.
my $L3ignorable = Unicode::Collate->new(
alternate => 'Non-ignorable',
level => 3,
table => undef,
normalization => undef,
UCA_Version => 9,
entry => <<'ENTRIES',
0000 ; [.0000.0000.0000.0000] # [0000] NULL (in 6429)
0001 ; [.0000.0000.0000.0000] # [0001] START OF HEADING (in 6429)
0591 ; [.0000.0000.0000.0591] # HEBREW ACCENT ETNAHTA
1D165 ; [.0000.0000.0000.1D165] # MUSICAL SYMBOL COMBINING STEM
0021 ; [*024B.0020.0002.0021] # EXCLAMATION MARK
09BE ; [.114E.0020.0002.09BE] # BENGALI VOWEL SIGN AA
09C7 ; [.1157.0020.0002.09C7] # BENGALI VOWEL SIGN E
09CB ; [.1159.0020.0002.09CB] # BENGALI VOWEL SIGN O
09C7 09BE ; [.1159.0020.0002.09CB] # BENGALI VOWEL SIGN O
1D1B9 ; [*098A.0020.0002.1D1B9] # MUSICAL SYMBOL SEMIBREVIS WHITE
1D1BA ; [*098B.0020.0002.1D1BA] # MUSICAL SYMBOL SEMIBREVIS BLACK
1D1BB ; [*098A.0020.0002.1D1B9][.0000.0000.0000.1D165] # M.S. MINIMA
1D1BC ; [*098B.0020.0002.1D1BA][.0000.0000.0000.1D165] # M.S. MINIMA BLACK
ENTRIES
);
ok($L3ignorable->lt("\cA", "!"));
ok($L3ignorable->lt("\x{591}", "!"));
ok($L3ignorable->eq("\cA", "\x{591}"));
ok($L3ignorable->eq("\x{09C7}\x{09BE}A", "\x{09C7}\cA\x{09BE}A"));
ok($L3ignorable->eq("\x{09C7}\x{09BE}A", "\x{09C7}\x{0591}\x{09BE}A"));
ok($L3ignorable->eq("\x{09C7}\x{09BE}A", "\x{09C7}\x{1D165}\x{09BE}A"));
ok($L3ignorable->eq("\x{09C7}\x{09BE}A", "\x{09CB}A"));
ok($L3ignorable->lt("\x{1D1BB}", "\x{1D1BC}"));
ok($L3ignorable->eq("\x{1D1BB}", "\x{1D1B9}"));
ok($L3ignorable->eq("\x{1D1BC}", "\x{1D1BA}"));
ok($L3ignorable->eq("\x{1D1BB}", "\x{1D1B9}\x{1D165}"));
ok($L3ignorable->eq("\x{1D1BC}", "\x{1D1BA}\x{1D165}"));
##### 32..41
my $c = Unicode::Collate->new(
table => 'keys.txt',
normalization => undef,
level => 1,
UCA_Version => 14,
entry => << 'ENTRIES',
034F ; [.0000.0000.0000.034F] # COMBINING GRAPHEME JOINER
0063 0068 ; [.0A3F.0020.0002.0063] % "ch" in traditional Spanish
0043 0068 ; [.0A3F.0020.0007.0043] # "Ch" in traditional Spanish
0043 0048 ; [.0A3F.0020.0008.0043] # "CH" in traditional Spanish
ENTRIES
);
# 0063 ; [.0A3D.0020.0002.0063] # LATIN SMALL LETTER C
# 0064 ; [.0A49.0020.0002.0064] # LATIN SMALL LETTER D
ok($c->gt("ocho", "oc\x00\x00ho"));
ok($c->gt("ocho", "oc\cAho"));
ok($c->gt("ocho", "oc\x{034F}ho"));
ok($c->gt("ocio", "oc\x{034F}ho"));
ok($c->lt("ocgo", "oc\x{034F}ho"));
ok($c->lt("oceo", "oc\x{034F}ho"));
ok($c->viewSortKey("ocho"), "[0B4B 0A3F 0B4B | | |]");
ok($c->viewSortKey("oc\x00\x00ho"), "[0B4B 0A3D 0AB9 0B4B | | |]");
ok($c->viewSortKey("oc\cAho"), "[0B4B 0A3D 0AB9 0B4B | | |]");
ok($c->viewSortKey("oc\x{034F}ho"), "[0B4B 0A3D 0AB9 0B4B | | |]");
--- NEW FILE: normal.t ---
BEGIN {
unless ("A" eq pack('U', 0x41)) {
print "1..0 # Unicode::Collate " .
"cannot stringify a Unicode code point\n";
exit 0;
}
if ($ENV{PERL_CORE}) {
chdir('t') if -d 't';
@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
}
}
BEGIN {
eval { require Unicode::Normalize; };
if ($@) {
print "1..0 # skipped: Unicode::Normalize needed for this test\n";
print $@;
exit;
}
}
use Test;
BEGIN { plan tests => 100 };
use strict;
use warnings;
use Unicode::Collate;
our $Aring = pack('U', 0xC5);
our $aring = pack('U', 0xE5);
our $entry = <<'ENTRIES';
030A; [.0000.030A.0002] # COMBINING RING ABOVE
212B; [.002B.0020.0008] # ANGSTROM SIGN
0061; [.0A41.0020.0002] # LATIN SMALL LETTER A
0041; [.0A41.0020.0008] # LATIN CAPITAL LETTER A
007A; [.0A5A.0020.0002] # LATIN SMALL LETTER Z
005A; [.0A5A.0020.0008] # LATIN CAPITAL LETTER Z
FF41; [.0A87.0020.0002] # LATIN SMALL LETTER A
FF21; [.0A87.0020.0008] # LATIN CAPITAL LETTER A
00E5; [.0AC5.0020.0002] # LATIN SMALL LETTER A WITH RING ABOVE
00C5; [.0AC5.0020.0008] # LATIN CAPITAL LETTER A WITH RING ABOVE
ENTRIES
# Aong < A+ring < Z < fullA+ring < A-ring
#########################
our $noN = Unicode::Collate->new(
level => 1,
table => undef,
normalization => undef,
entry => $entry,
);
our $nfc = Unicode::Collate->new(
level => 1,
table => undef,
normalization => 'NFC',
entry => $entry,
);
our $nfd = Unicode::Collate->new(
level => 1,
table => undef,
normalization => 'NFD',
entry => $entry,
);
our $nfkc = Unicode::Collate->new(
level => 1,
table => undef,
normalization => 'NFKC',
entry => $entry,
);
our $nfkd = Unicode::Collate->new(
level => 1,
table => undef,
normalization => 'NFKD',
entry => $entry,
);
ok($noN->lt("\x{212B}", "A"));
ok($noN->lt("\x{212B}", $Aring));
ok($noN->lt("A\x{30A}", $Aring));
ok($noN->lt("A", "\x{FF21}"));
ok($noN->lt("Z", "\x{FF21}"));
ok($noN->lt("Z", $Aring));
ok($noN->lt("\x{212B}", $aring));
ok($noN->lt("A\x{30A}", $aring));
ok($noN->lt("Z", $aring));
ok($noN->lt("a\x{30A}", "Z"));
ok($nfd->eq("\x{212B}", "A"));
ok($nfd->eq("\x{212B}", $Aring));
ok($nfd->eq("A\x{30A}", $Aring));
ok($nfd->lt("A", "\x{FF21}"));
ok($nfd->lt("Z", "\x{FF21}"));
ok($nfd->gt("Z", $Aring));
ok($nfd->eq("\x{212B}", $aring));
ok($nfd->eq("A\x{30A}", $aring));
ok($nfd->gt("Z", $aring));
ok($nfd->lt("a\x{30A}", "Z"));
ok($nfc->gt("\x{212B}", "A"));
ok($nfc->eq("\x{212B}", $Aring));
ok($nfc->eq("A\x{30A}", $Aring));
ok($nfc->lt("A", "\x{FF21}"));
ok($nfc->lt("Z", "\x{FF21}"));
ok($nfc->lt("Z", $Aring));
ok($nfc->eq("\x{212B}", $aring));
ok($nfc->eq("A\x{30A}", $aring));
ok($nfc->lt("Z", $aring));
ok($nfc->gt("a\x{30A}", "Z"));
ok($nfkd->eq("\x{212B}", "A"));
ok($nfkd->eq("\x{212B}", $Aring));
ok($nfkd->eq("A\x{30A}", $Aring));
ok($nfkd->eq("A", "\x{FF21}"));
ok($nfkd->gt("Z", "\x{FF21}"));
ok($nfkd->gt("Z", $Aring));
ok($nfkd->eq("\x{212B}", $aring));
ok($nfkd->eq("A\x{30A}", $aring));
ok($nfkd->gt("Z", $aring));
ok($nfkd->lt("a\x{30A}", "Z"));
ok($nfkc->gt("\x{212B}", "A"));
ok($nfkc->eq("\x{212B}", $Aring));
ok($nfkc->eq("A\x{30A}", $Aring));
ok($nfkc->eq("A", "\x{FF21}"));
ok($nfkc->gt("Z", "\x{FF21}"));
ok($nfkc->lt("Z", $Aring));
ok($nfkc->eq("\x{212B}", $aring));
ok($nfkc->eq("A\x{30A}", $aring));
ok($nfkc->lt("Z", $aring));
ok($nfkc->gt("a\x{30A}", "Z"));
$nfd->change(normalization => undef);
ok($nfd->lt("\x{212B}", "A"));
ok($nfd->lt("\x{212B}", $Aring));
ok($nfd->lt("A\x{30A}", $Aring));
ok($nfd->lt("A", "\x{FF21}"));
ok($nfd->lt("Z", "\x{FF21}"));
ok($nfd->lt("Z", $Aring));
ok($nfd->lt("\x{212B}", $aring));
ok($nfd->lt("A\x{30A}", $aring));
ok($nfd->lt("Z", $aring));
ok($nfd->lt("a\x{30A}", "Z"));
$nfd->change(normalization => 'C');
ok($nfd->gt("\x{212B}", "A"));
ok($nfd->eq("\x{212B}", $Aring));
ok($nfd->eq("A\x{30A}", $Aring));
ok($nfd->lt("A", "\x{FF21}"));
ok($nfd->lt("Z", "\x{FF21}"));
ok($nfd->lt("Z", $Aring));
ok($nfd->eq("\x{212B}", $aring));
ok($nfd->eq("A\x{30A}", $aring));
ok($nfd->lt("Z", $aring));
ok($nfd->gt("a\x{30A}", "Z"));
$nfd->change(normalization => 'D');
ok($nfd->eq("\x{212B}", "A"));
ok($nfd->eq("\x{212B}", $Aring));
ok($nfd->eq("A\x{30A}", $Aring));
ok($nfd->lt("A", "\x{FF21}"));
ok($nfd->lt("Z", "\x{FF21}"));
ok($nfd->gt("Z", $Aring));
ok($nfd->eq("\x{212B}", $aring));
ok($nfd->eq("A\x{30A}", $aring));
ok($nfd->gt("Z", $aring));
ok($nfd->lt("a\x{30A}", "Z"));
$nfd->change(normalization => 'KD');
ok($nfd->eq("\x{212B}", "A"));
ok($nfd->eq("\x{212B}", $Aring));
ok($nfd->eq("A\x{30A}", $Aring));
ok($nfd->eq("A", "\x{FF21}"));
ok($nfd->gt("Z", "\x{FF21}"));
ok($nfd->gt("Z", $Aring));
ok($nfd->eq("\x{212B}", $aring));
ok($nfd->eq("A\x{30A}", $aring));
ok($nfd->gt("Z", $aring));
ok($nfd->lt("a\x{30A}", "Z"));
$nfd->change(normalization => 'KC');
ok($nfd->gt("\x{212B}", "A"));
ok($nfd->eq("\x{212B}", $Aring));
ok($nfd->eq("A\x{30A}", $Aring));
ok($nfd->eq("A", "\x{FF21}"));
ok($nfd->gt("Z", "\x{FF21}"));
ok($nfd->lt("Z", $Aring));
ok($nfd->eq("\x{212B}", $aring));
ok($nfd->eq("A\x{30A}", $aring));
ok($nfd->lt("Z", $aring));
ok($nfd->gt("a\x{30A}", "Z"));
--- NEW FILE: test.t ---
BEGIN {
unless ("A" eq pack('U', 0x41)) {
print "1..0 # Unicode::Collate " .
"cannot stringify a Unicode code point\n";
exit 0;
}
if ($ENV{PERL_CORE}) {
chdir('t') if -d 't';
@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
}
}
use Test;
BEGIN { plan tests => 113 };
use strict;
use warnings;
use Unicode::Collate;
ok(1);
sub _pack_U { Unicode::Collate::pack_U(@_) }
sub _unpack_U { Unicode::Collate::unpack_U(@_) }
my $A_acute = _pack_U(0xC1);
my $a_acute = _pack_U(0xE1);
my $acute = _pack_U(0x0301);
my $hiragana = "\x{3042}\x{3044}";
my $katakana = "\x{30A2}\x{30A4}";
##### 2..7
my $Collator = Unicode::Collate->new(
table => 'keys.txt',
normalization => undef,
);
ok(ref $Collator, "Unicode::Collate");
ok($Collator->cmp("", ""), 0);
ok($Collator->eq("", ""));
ok($Collator->cmp("", "perl"), -1);
ok(
join(':', $Collator->sort( qw/ acha aca ada acia acka / ) ),
join(':', qw/ aca acha acia acka ada / ),
);
ok(
join(':', $Collator->sort( qw/ ACHA ACA ADA ACIA ACKA / ) ),
join(':', qw/ ACA ACHA ACIA ACKA ADA / ),
);
##### 8..18
ok($Collator->cmp("A$acute", $A_acute), 0); # @version 3.1.1 (prev: -1)
ok($Collator->cmp($a_acute, $A_acute), -1);
ok($Collator->eq("A\cA$acute", $A_acute)); # UCA v9. \cA is invariant.
my %old_level = $Collator->change(level => 1);
ok($Collator->eq("A$acute", $A_acute));
ok($Collator->eq("A", $A_acute));
ok($Collator->change(level => 2)->eq($a_acute, $A_acute));
ok($Collator->lt("A", $A_acute));
ok($Collator->change(%old_level)->lt("A", $A_acute));
ok($Collator->lt("A", $A_acute));
ok($Collator->lt("A", $a_acute));
ok($Collator->lt($a_acute, $A_acute));
##### 19..25
$Collator->change(level => 2);
ok($Collator->{level}, 2);
ok( $Collator->cmp("ABC","abc"), 0);
ok( $Collator->eq("ABC","abc") );
ok( $Collator->le("ABC","abc") );
ok( $Collator->cmp($hiragana, $katakana), 0);
ok( $Collator->eq($hiragana, $katakana) );
ok( $Collator->ge($hiragana, $katakana) );
##### 26..31
# hangul
ok( $Collator->eq("a\x{AC00}b", "a\x{1100}\x{1161}b") );
ok( $Collator->eq("a\x{AE00}b", "a\x{1100}\x{1173}\x{11AF}b") );
ok( $Collator->gt("a\x{AE00}b", "a\x{1100}\x{1173}b\x{11AF}") );
ok( $Collator->lt("a\x{AC00}b", "a\x{AE00}b") );
ok( $Collator->gt("a\x{D7A3}b", "a\x{C544}b") );
ok( $Collator->lt("a\x{C544}b", "a\x{30A2}b") ); # hangul < hiragana
##### 32..40
$Collator->change(%old_level, katakana_before_hiragana => 1);
ok($Collator->{level}, 4);
ok( $Collator->cmp("abc", "ABC"), -1);
ok( $Collator->ne("abc", "ABC") );
ok( $Collator->lt("abc", "ABC") );
ok( $Collator->le("abc", "ABC") );
ok( $Collator->cmp($hiragana, $katakana), 1);
ok( $Collator->ne($hiragana, $katakana) );
ok( $Collator->gt($hiragana, $katakana) );
ok( $Collator->ge($hiragana, $katakana) );
##### 41..46
$Collator->change(upper_before_lower => 1);
ok( $Collator->cmp("abc", "ABC"), 1);
ok( $Collator->ge("abc", "ABC"), 1);
ok( $Collator->gt("abc", "ABC"), 1);
ok( $Collator->cmp($hiragana, $katakana), 1);
ok( $Collator->ge($hiragana, $katakana), 1);
ok( $Collator->gt($hiragana, $katakana), 1);
##### 47..48
$Collator->change(katakana_before_hiragana => 0);
ok( $Collator->cmp("abc", "ABC"), 1);
ok( $Collator->cmp($hiragana, $katakana), -1);
##### 49..52
$Collator->change(upper_before_lower => 0);
ok( $Collator->cmp("abc", "ABC"), -1);
ok( $Collator->le("abc", "ABC") );
ok( $Collator->cmp($hiragana, $katakana), -1);
ok( $Collator->lt($hiragana, $katakana) );
##### 53..54
my $ignoreAE = Unicode::Collate->new(
table => 'keys.txt',
normalization => undef,
ignoreChar => qr/^[aAeE]$/,
);
ok($ignoreAE->eq("element","lament"));
ok($ignoreAE->eq("Perl","ePrl"));
##### 55
my $onlyABC = Unicode::Collate->new(
table => undef,
normalization => undef,
entry => << 'ENTRIES',
0061 ; [.0101.0020.0002.0061] # LATIN SMALL LETTER A
0041 ; [.0101.0020.0008.0041] # LATIN CAPITAL LETTER A
0062 ; [.0102.0020.0002.0062] # LATIN SMALL LETTER B
0042 ; [.0102.0020.0008.0042] # LATIN CAPITAL LETTER B
0063 ; [.0103.0020.0002.0063] # LATIN SMALL LETTER C
0043 ; [.0103.0020.0008.0043] # LATIN CAPITAL LETTER C
ENTRIES
);
ok(
join(':', $onlyABC->sort( qw/ ABA BAC cc A Ab cAc aB / ) ),
join(':', qw/ A aB Ab ABA BAC cAc cc / ),
);
##### 56..59
my $undefAE = Unicode::Collate->new(
table => 'keys.txt',
normalization => undef,
undefChar => qr/^[aAeE]$/,
);
ok($undefAE ->gt("edge","fog"));
ok($Collator->lt("edge","fog"));
ok($undefAE ->gt("lake","like"));
ok($Collator->lt("lake","like"));
##### 60..69
# Table is undefined, then no entry is defined.
my $undef_table = Unicode::Collate->new(
table => undef,
normalization => undef,
level => 1,
);
# in the Unicode code point order
ok($undef_table->lt('', 'A'));
ok($undef_table->lt('ABC', 'B'));
# Hangul should be decomposed (even w/o Unicode::Normalize).
ok($undef_table->lt("Perl", "\x{AC00}"));
ok($undef_table->eq("\x{AC00}", "\x{1100}\x{1161}"));
ok($undef_table->eq("\x{AE00}", "\x{1100}\x{1173}\x{11AF}"));
ok($undef_table->lt("\x{AE00}", "\x{3042}"));
# U+AC00: Hangul GA
# U+AE00: Hangul GEUL
# U+3042: Hiragana A
# Weight for CJK Ideographs is defined, though.
ok($undef_table->lt("", "\x{4E00}"));
ok($undef_table->lt("\x{4E8C}","ABC"));
ok($undef_table->lt("\x{4E00}","\x{3042}"));
ok($undef_table->lt("\x{4E00}","\x{4E8C}"));
# U+4E00: Ideograph "ONE"
# U+4E8C: Ideograph "TWO"
##### 70..74
my $few_entries = Unicode::Collate->new(
entry => <<'ENTRIES',
0050 ; [.0101.0020.0002.0050] # P
0045 ; [.0102.0020.0002.0045] # E
0052 ; [.0103.0020.0002.0052] # R
004C ; [.0104.0020.0002.004C] # L
1100 ; [.0105.0020.0002.1100] # Hangul Jamo initial G
1175 ; [.0106.0020.0002.1175] # Hangul Jamo middle I
5B57 ; [.0107.0020.0002.5B57] # CJK Ideograph "Letter"
ENTRIES
table => undef,
normalization => undef,
);
# defined before undefined
my $sortABC = join '',
$few_entries->sort(split //, "ABCDEFGHIJKLMNOPQRSTUVWXYZ ");
ok($sortABC eq "PERL ABCDFGHIJKMNOQSTUVWXYZ");
ok($few_entries->lt('E', 'D'));
ok($few_entries->lt("\x{5B57}", "\x{4E00}"));
ok($few_entries->lt("\x{AE30}", "\x{AC00}"));
# Hangul must be decomposed.
ok($few_entries->eq("\x{AC00}", "\x{1100}\x{1161}"));
##### 75..79
my $dropArticles = Unicode::Collate->new(
table => "keys.txt",
normalization => undef,
preprocess => sub {
my $string = shift;
$string =~ s/\b(?:an?|the)\s+//ig;
$string;
},
);
ok($dropArticles->eq("camel", "a camel"));
ok($dropArticles->eq("Perl", "The Perl"));
ok($dropArticles->lt("the pen", "a pencil"));
ok($Collator->lt("Perl", "The Perl"));
ok($Collator->gt("the pen", "a pencil"));
##### 80..81
my $backLevel1 = Unicode::Collate->new(
table => undef,
normalization => undef,
backwards => [ 1 ],
);
# all strings are reversed at level 1.
ok($backLevel1->gt("AB", "BA"));
ok($backLevel1->gt("\x{3042}\x{3044}", "\x{3044}\x{3042}"));
##### 82..89
my $backLevel2 = Unicode::Collate->new(
table => "keys.txt",
normalization => undef,
undefName => qr/HANGUL|HIRAGANA|KATAKANA|BOPOMOFO/,
backwards => 2,
);
ok($backLevel2->gt("Ca\x{300}ca\x{302}", "ca\x{302}ca\x{300}"));
ok($backLevel2->gt("ca\x{300}ca\x{302}", "Ca\x{302}ca\x{300}"));
ok($Collator ->lt("Ca\x{300}ca\x{302}", "ca\x{302}ca\x{300}"));
ok($Collator ->lt("ca\x{300}ca\x{302}", "Ca\x{302}ca\x{300}"));
# HIRAGANA and KATAKANA are made undefined via undefName.
# So they are after CJK Unified Ideographs.
ok($backLevel2->lt("\x{4E00}", $hiragana));
ok($backLevel2->lt("\x{4E03}", $katakana));
ok($Collator ->gt("\x{4E00}", $hiragana));
ok($Collator ->gt("\x{4E03}", $katakana));
##### 90..96
my $O_str = Unicode::Collate->new(
table => "keys.txt",
normalization => undef,
entry => <<'ENTRIES',
0008 ; [*0008.0000.0000.0000] # BACKSPACE (need to be non-ignorable)
004F 0337 ; [.0B53.0020.0008.004F] # capital O WITH SHORT SOLIDUS OVERLAY
006F 0008 002F ; [.0B53.0020.0002.006F] # LATIN SMALL LETTER O WITH STROKE
004F 0008 002F ; [.0B53.0020.0008.004F] # LATIN CAPITAL LETTER O WITH STROKE
006F 0337 ; [.0B53.0020.0002.004F] # small O WITH SHORT SOLIDUS OVERLAY
200B ; [.2000.0000.0000.0000] # ZERO WIDTH SPACE (may be non-sense but ...)
#00F8 ; [.0B53.0020.0002.00F8] # LATIN SMALL LETTER O WITH STROKE
#00D8 ; [.0B53.0020.0008.00D8] # LATIN CAPITAL LETTER O WITH STROKE
ENTRIES
);
my $o_BS_slash = _pack_U(0x006F, 0x0008, 0x002F);
my $O_BS_slash = _pack_U(0x004F, 0x0008, 0x002F);
my $o_sol = _pack_U(0x006F, 0x0337);
my $O_sol = _pack_U(0x004F, 0x0337);
my $o_stroke = _pack_U(0x00F8);
my $O_stroke = _pack_U(0x00D8);
ok($O_str->eq($o_stroke, $o_BS_slash));
ok($O_str->eq($O_stroke, $O_BS_slash));
ok($O_str->eq($o_stroke, $o_sol));
ok($O_str->eq($O_stroke, $O_sol));
ok($Collator->eq("\x{200B}", "\0"));
ok($O_str ->gt("\x{200B}", "\0"));
ok($O_str ->gt("\x{200B}", "A"));
##### 97..107
my %origVer = $Collator->change(UCA_Version => 8);
$Collator->change(level => 3);
ok($Collator->gt("!\x{300}", ""));
ok($Collator->gt("!\x{300}", "!"));
ok($Collator->eq("!\x{300}", "\x{300}"));
$Collator->change(level => 2);
ok($Collator->eq("!\x{300}", "\x{300}"));
$Collator->change(level => 4);
ok($Collator->gt("!\x{300}", "!"));
ok($Collator->lt("!\x{300}", "\x{300}"));
$Collator->change(%origVer, level => 3);
ok($Collator->eq("!\x{300}", ""));
ok($Collator->eq("!\x{300}", "!"));
ok($Collator->lt("!\x{300}", "\x{300}"));
$Collator->change(level => 4);
ok($Collator->gt("!\x{300}", ""));
ok($Collator->eq("!\x{300}", "!"));
##### 108..113
$_ = 'Foo';
my $c = Unicode::Collate->new(
table => 'keys.txt',
normalization => undef,
upper_before_lower => 1,
);
ok($_, 'Foo'); # fixed at v. 0.52; no longer clobber $_
my($temp, @temp); # Not the result but the side effect matters.
$_ = 'Foo';
$temp = $c->getSortKey("abc");
ok($_, 'Foo');
$_ = 'Foo';
$temp = $c->viewSortKey("abc");
ok($_, 'Foo');
$_ = 'Foo';
@temp = $c->sort("abc", "xyz", "def");
ok($_, 'Foo');
$_ = 'Foo';
@temp = $c->index("perl5", "RL");
ok($_, 'Foo');
$_ = 'Foo';
@temp = $c->index("perl5", "LR");
ok($_, 'Foo');
#####
--- NEW FILE: altern.t ---
BEGIN {
unless ("A" eq pack('U', 0x41)) {
print "1..0 # Unicode::Collate " .
"cannot stringify a Unicode code point\n";
exit 0;
}
if ($ENV{PERL_CORE}) {
chdir('t') if -d 't';
@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
}
}
use Test;
BEGIN { plan tests => 37 };
use strict;
use warnings;
use Unicode::Collate;
ok(1);
#########################
sub _pack_U { Unicode::Collate::pack_U(@_) }
sub _unpack_U { Unicode::Collate::unpack_U(@_) }
my $A_acute = _pack_U(0xC1);
my $acute = _pack_U(0x0301);
my $Collator = Unicode::Collate->new(
table => 'keys.txt',
normalization => undef,
);
my %origAlt = $Collator->change(alternate => 'Blanked');
ok($Collator->lt("death", "de luge"));
ok($Collator->lt("de luge", "de-luge"));
ok($Collator->lt("de-luge", "deluge"));
ok($Collator->lt("deluge", "de\x{2010}luge"));
ok($Collator->lt("deluge", "de Luge"));
$Collator->change(alternate => 'Non-ignorable');
ok($Collator->lt("de luge", "de Luge"));
ok($Collator->lt("de Luge", "de-luge"));
ok($Collator->lt("de-Luge", "de\x{2010}luge"));
ok($Collator->lt("de-luge", "death"));
ok($Collator->lt("death", "deluge"));
$Collator->change(alternate => 'Shifted');
ok($Collator->lt("death", "de luge"));
ok($Collator->lt("de luge", "de-luge"));
ok($Collator->lt("de-luge", "deluge"));
ok($Collator->lt("deluge", "de Luge"));
ok($Collator->lt("de Luge", "deLuge"));
$Collator->change(alternate => 'Shift-Trimmed');
ok($Collator->lt("death", "deluge"));
ok($Collator->lt("deluge", "de luge"));
ok($Collator->lt("de luge", "de-luge"));
ok($Collator->lt("de-luge", "deLuge"));
ok($Collator->lt("deLuge", "de Luge"));
$Collator->change(%origAlt);
ok($Collator->{alternate}, 'shifted');
##############
# ignorable after alternate
# Shifted;
ok($Collator->eq("?\x{300}!\x{301}\x{315}", "?!"));
ok($Collator->eq("?\x{300}A\x{301}", "?$A_acute"));
ok($Collator->eq("?\x{300}", "?"));
ok($Collator->eq("?\x{344}", "?")); # U+0344 has two CEs.
$Collator->change(level => 3);
ok($Collator->eq("\cA", "?"));
$Collator->change(alternate => 'blanked', level => 4);
ok($Collator->eq("?\x{300}!\x{301}\x{315}", "?!"));
ok($Collator->eq("?\x{300}A\x{301}", "?$A_acute"));
ok($Collator->eq("?\x{300}", "?"));
ok($Collator->eq("?\x{344}", "?")); # U+0344 has two CEs.
$Collator->change(level => 3);
ok($Collator->eq("\cA", "?"));
$Collator->change(alternate => 'Non-ignorable', level => 4);
ok($Collator->lt("?\x{300}", "?!"));
ok($Collator->gt("?\x{300}A$acute", "?$A_acute"));
ok($Collator->gt("?\x{300}", "?"));
ok($Collator->gt("?\x{344}", "?"));
$Collator->change(level => 3);
ok($Collator->lt("\cA", "?"));
$Collator->change(alternate => 'Shifted', level => 4);
--- NEW FILE: illegalp.t ---
BEGIN {
unless ("A" eq pack('U', 0x41)) {
print "1..0 # Unicode::Collate " .
"cannot stringify a Unicode code point\n";
exit 0;
}
if ($ENV{PERL_CORE}) {
chdir('t') if -d 't';
@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
}
}
use Test;
BEGIN { plan tests => 17 };
use strict;
use warnings;
ok(1);
#
# No test for Unicode::Collate is included in this .t file.
#
# UCA conformance test requires completely ignorable characters
# (including noncharacters) must be able to be sorted in code point order.
# If not so, Unicode::Collate must not be compliant with UCA.
#
# ~~~ CollationTest_SHIFTED.txt in CollationTest-4.0.0
#
# 206F 0021; # ! NOMINAL DIGIT SHAPES [| | | 0251]
# D800 0021; # ! <surrogate-D800> [| | | 0251]
# DFFF 0021; # ! <surrogate-DFFF> [| | | 0251]
# FDD0 0021; # ! <noncharacter-FDD0> [| | | 0251]
# FFFB 0021; # ! INTERLINEAR ANNOTATION TERMINATOR [| | | 0251]
# FFFE 0021; # ! <noncharacter-FFFE> [| | | 0251]
# FFFF 0021; # ! <noncharacter-FFFF> [| | | 0251]
# 1D165 0021; # ! MS. Cm. STEM [| | | 0251]
#
# ~~~ CollationTest_NON_IGNORABLE.txt in CollationTest-4.0.0
#
# 206F 0021; # ! NOMINAL DIGIT SHAPES [0251 | 0020 | 0002 |]
# D800 0021; # ! <surrogate-D800> [0251 | 0020 | 0002 |]
# DFFF 0021; # ! <surrogate-DFFF> [0251 | 0020 | 0002 |]
# FDD0 0021; # ! <noncharacter-FDD0> [0251 | 0020 | 0002 |]
# FFFB 0021; # ! INTERLINEAR ANNOTATION TERMINATOR [0251 | 0020 | 0002 |]
# FFFE 0021; # ! <noncharacter-FFFE> [0251 | 0020 | 0002 |]
# FFFF 0021; # ! <noncharacter-FFFF> [0251 | 0020 | 0002 |]
# 1D165 0021; # ! MS. Cm. STEM [0251 | 0020 | 0002 |]
#
no warnings 'utf8';
ok("\x{206F}!" lt "\x{D800}!");
ok(pack('U*', 0x206F, 0x21) lt pack('U*', 0xD800, 0x21));
ok("\x{D800}!" lt "\x{DFFF}!");
ok(pack('U*', 0xD800, 0x21) lt pack('U*', 0xDFFF, 0x21));
ok("\x{DFFF}!" lt "\x{FDD0}!");
ok(pack('U*', 0xDFFF, 0x21) lt pack('U*', 0xFDD0, 0x21) );
ok("\x{FDD0}!" lt "\x{FFFB}!");
ok(pack('U*', 0xFDD0, 0x21) lt pack('U*', 0xFFFB, 0x21));
ok("\x{FFFB}!" lt "\x{FFFE}!");
ok(pack('U*', 0xFFFB, 0x21) lt pack('U*', 0xFFFE, 0x21));
ok("\x{FFFE}!" lt "\x{FFFF}!");
ok(pack('U*', 0xFFFE, 0x21) lt pack('U*', 0xFFFF, 0x21));
ok("\x{FFFF}!" lt "\x{1D165}!");
ok(pack('U*', 0xFFFF, 0x21) lt pack('U*', 0x1D165, 0x21));
ok("\000!" lt "\x{FFFF}!");
ok(pack('U*', 0, 0x21) lt pack('U*', 0xFFFF, 0x21));
More information about the dslinux-commit
mailing list