dslinux/user/perl/lib/Unicode/Collate/t altern.t cjkrange.t contract.t hangtype.t hangul.t ignor.t illegal.t illegalp.t index.t normal.t override.t rearrang.t test.t trailwt.t variable.t version.t view.t

cayenne dslinux_cayenne at user.in-berlin.de
Tue Dec 5 05:27:22 CET 2006


Update of /cvsroot/dslinux/dslinux/user/perl/lib/Unicode/Collate/t
In directory antilope:/tmp/cvs-serv7729/lib/Unicode/Collate/t

Added Files:
	altern.t cjkrange.t contract.t hangtype.t hangul.t ignor.t 
	illegal.t illegalp.t index.t normal.t override.t rearrang.t 
	test.t trailwt.t variable.t version.t view.t 
Log Message:
Adding fresh perl source to HEAD to branch from

--- NEW FILE: variable.t ---

BEGIN {
    unless ("A" eq pack('U', 0x41)) {
	print "1..0 # Unicode::Collate " .
	    "cannot stringify a Unicode code point\n";
	exit 0;
    }
    if ($ENV{PERL_CORE}) {
	chdir('t') if -d 't';
	@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
    }
}

use Test;
BEGIN { plan tests => 37 };

use strict;
use warnings;
use Unicode::Collate;

ok(1);

#########################

sub _pack_U   { Unicode::Collate::pack_U(@_) }
sub _unpack_U { Unicode::Collate::unpack_U(@_) }

my $A_acute = _pack_U(0xC1);
my $acute   = _pack_U(0x0301);

my $Collator = Unicode::Collate->new(
  table => 'keys.txt',
  normalization => undef,
);

my %origVar = $Collator->change(variable => 'Blanked');

ok($Collator->lt("death", "de luge"));
ok($Collator->lt("de luge", "de-luge"));
ok($Collator->lt("de-luge", "deluge"));
ok($Collator->lt("deluge", "de\x{2010}luge"));
ok($Collator->lt("deluge", "de Luge"));

$Collator->change(variable => 'Non-ignorable');

ok($Collator->lt("de luge", "de Luge"));
ok($Collator->lt("de Luge", "de-luge"));
ok($Collator->lt("de-Luge", "de\x{2010}luge"));
ok($Collator->lt("de-luge", "death"));
ok($Collator->lt("death", "deluge"));

$Collator->change(variable => 'Shifted');

ok($Collator->lt("death", "de luge"));
ok($Collator->lt("de luge", "de-luge"));
ok($Collator->lt("de-luge", "deluge"));
ok($Collator->lt("deluge", "de Luge"));
ok($Collator->lt("de Luge", "deLuge"));

$Collator->change(variable => 'Shift-Trimmed');

ok($Collator->lt("death", "deluge"));
ok($Collator->lt("deluge", "de luge"));
ok($Collator->lt("de luge", "de-luge"));
ok($Collator->lt("de-luge", "deLuge"));
ok($Collator->lt("deLuge", "de Luge"));

$Collator->change(%origVar);

ok($Collator->{variable}, 'shifted');

##############

# ignorable after variable

# Shifted;
ok($Collator->eq("?\x{300}!\x{301}\x{315}", "?!"));
ok($Collator->eq("?\x{300}A\x{301}", "?$A_acute"));
ok($Collator->eq("?\x{300}", "?"));
ok($Collator->eq("?\x{344}", "?")); # U+0344 has two CEs.

$Collator->change(level => 3);
ok($Collator->eq("\cA", "?"));

$Collator->change(variable => 'blanked', level => 4);
ok($Collator->eq("?\x{300}!\x{301}\x{315}", "?!"));
ok($Collator->eq("?\x{300}A\x{301}", "?$A_acute"));
ok($Collator->eq("?\x{300}", "?"));
ok($Collator->eq("?\x{344}", "?")); # U+0344 has two CEs.

$Collator->change(level => 3);
ok($Collator->eq("\cA", "?"));

$Collator->change(variable => 'Non-ignorable', level => 4);

ok($Collator->lt("?\x{300}", "?!"));
ok($Collator->gt("?\x{300}A$acute", "?$A_acute"));
ok($Collator->gt("?\x{300}", "?"));
ok($Collator->gt("?\x{344}", "?"));

$Collator->change(level => 3);
ok($Collator->lt("\cA", "?"));

$Collator->change(variable => 'Shifted', level => 4);


--- NEW FILE: hangul.t ---
BEGIN {
    unless ("A" eq pack('U', 0x41)) {
	print "1..0 # Unicode::Collate " .
	    "cannot stringify a Unicode code point\n";
	exit 0;
    }
    if ($ENV{PERL_CORE}) {
	chdir('t') if -d 't';
	@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
    }
}

use Test;
BEGIN { plan tests => 72 };

use strict;
use warnings;
use Unicode::Collate;

#########################

ok(1);

# a standard collator (3.1.1)
my $Collator = Unicode::Collate->new(
  table => 'keys.txt',
  normalization => undef,
);


# a collator for hangul sorting,
# cf. http://std.dkuug.dk/JTC1/SC22/WG20/docs/documents.html
#     http://std.dkuug.dk/JTC1/SC22/WG20/docs/n1051-hangulsort.pdf
my $hangul = Unicode::Collate->new(
  level => 3,
  table => undef,
  normalization => undef,

  entry => <<'ENTRIES',
0061      ; [.0A15.0020.0002] # LATIN SMALL LETTER A
0041      ; [.0A15.0020.0008] # LATIN CAPITAL LETTER A
#1161     ; [.1800.0020.0002] # <comment> initial jungseong A
#1163     ; [.1801.0020.0002] # <comment> initial jungseong YA
1100      ; [.1831.0020.0002] # choseong KIYEOK
1100 1161 ; [.1831.0020.0002][.1800.0020.0002] # G-A
1100 1163 ; [.1831.0020.0002][.1801.0020.0002] # G-YA
1101      ; [.1831.0020.0002][.1831.0020.0002] # choseong SSANGKIYEOK
1101 1161 ; [.1831.0020.0002][.1831.0020.0002][.1800.0020.0002] # GG-A
1101 1163 ; [.1831.0020.0002][.1831.0020.0002][.1801.0020.0002] # GG-YA
1102      ; [.1833.0020.0002] # choseong NIEUN
1102 1161 ; [.1833.0020.0002][.1800.0020.0002] # N-A
1102 1163 ; [.1833.0020.0002][.1801.0020.0002] # N-YA
3042      ; [.1921.0020.000E] # HIRAGANA LETTER A
11A8      ; [.FE10.0020.0002] # jongseong KIYEOK
11A9      ; [.FE10.0020.0002][.FE10.0020.0002] # jongseong SSANGKIYEOK
1161      ; [.FE20.0020.0002] # jungseong A <non-initial>
1163      ; [.FE21.0020.0002] # jungseong YA <non-initial>
ENTRIES
);

ok(ref $hangul, "Unicode::Collate");

my $trailwt = Unicode::Collate->new(
  level => 3,
  table => undef,
  normalization => undef,
  hangul_terminator => 16,

  entry => <<'ENTRIES', # Term < Jongseong < Jungseong < Choseong
0061  ; [.0A15.0020.0002] # LATIN SMALL LETTER A
0041  ; [.0A15.0020.0008] # LATIN CAPITAL LETTER A
11A8  ; [.1801.0020.0002] # HANGUL JONGSEONG KIYEOK
11A9  ; [.1801.0020.0002][.1801.0020.0002] # HANGUL JONGSEONG SSANGKIYEOK
1161  ; [.1831.0020.0002] # HANGUL JUNGSEONG A
1163  ; [.1832.0020.0002] # HANGUL JUNGSEONG YA
1100  ; [.1861.0020.0002] # HANGUL CHOSEONG KIYEOK
1101  ; [.1861.0020.0002][.1861.0020.0002] # HANGUL CHOSEONG SSANGKIYEOK
1102  ; [.1862.0020.0002] # HANGUL CHOSEONG NIEUN
3042  ; [.1921.0020.000E] # HIRAGANA LETTER A
ENTRIES
);

#########################

# L(simp)L(simp) vs L(comp): /GGA/
ok($Collator->lt("\x{1100}\x{1100}\x{1161}", "\x{1101}\x{1161}"));
ok($hangul  ->eq("\x{1100}\x{1100}\x{1161}", "\x{1101}\x{1161}"));
ok($trailwt ->eq("\x{1100}\x{1100}\x{1161}", "\x{1101}\x{1161}"));

# L(simp) vs L(simp)L(simp): /GA/ vs /GGA/
ok($Collator->gt("\x{1100}\x{1161}", "\x{1100}\x{1100}\x{1161}"));
ok($hangul  ->lt("\x{1100}\x{1161}", "\x{1100}\x{1100}\x{1161}"));
ok($trailwt ->lt("\x{1100}\x{1161}", "\x{1100}\x{1100}\x{1161}"));

# T(simp)T(simp) vs T(comp): /AGG/
ok($Collator->lt("\x{1161}\x{11A8}\x{11A8}", "\x{1161}\x{11A9}"));
ok($hangul  ->eq("\x{1161}\x{11A8}\x{11A8}", "\x{1161}\x{11A9}"));
ok($trailwt ->eq("\x{1161}\x{11A8}\x{11A8}", "\x{1161}\x{11A9}"));

# T(simp) vs T(simp)T(simp): /AG/ vs /AGG/
ok($Collator->lt("\x{1161}\x{11A8}", "\x{1161}\x{11A8}\x{11A8}"));
ok($hangul  ->lt("\x{1161}\x{11A8}", "\x{1161}\x{11A8}\x{11A8}"));
ok($trailwt ->lt("\x{1161}\x{11A8}", "\x{1161}\x{11A8}\x{11A8}"));

# LV vs LLV: /GA/ vs /GNA/
ok($Collator->gt("\x{1100}\x{1161}", "\x{1100}\x{1102}\x{1161}"));
ok($hangul  ->lt("\x{1100}\x{1161}", "\x{1100}\x{1102}\x{1161}"));
ok($trailwt ->lt("\x{1100}\x{1161}", "\x{1100}\x{1102}\x{1161}"));

# LVX vs LVV: /GAA/ vs /GA/.latinA
ok($Collator->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}A"));
ok($hangul  ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}A"));
ok($trailwt ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}A"));

# LVX vs LVV: /GAA/ vs /GA/.hiraganaA
ok($Collator->lt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{3042}"));
ok($hangul  ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{3042}"));
ok($trailwt ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{3042}"));

# LVX vs LVV: /GAA/ vs /GA/.hanja
ok($Collator->lt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{4E00}"));
ok($hangul  ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{4E00}"));
ok($trailwt ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{4E00}"));

# LVL vs LVT: /GA/./G/ vs /GAG/
ok($Collator->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{11A8}"));
ok($hangul  ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{11A8}"));
ok($trailwt ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{11A8}"));

# LVT vs LVX: /GAG/ vs /GA/.latinA
ok($Collator->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}A"));
ok($hangul  ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}A"));
ok($trailwt ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}A"));

# LVT vs LVX: /GAG/ vs /GA/.hiraganaA
ok($Collator->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{3042}"));
ok($hangul  ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{3042}"));
ok($trailwt ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{3042}"));

# LVT vs LVX: /GAG/ vs /GA/.hanja
ok($Collator->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}"));
ok($hangul  ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}"));
ok($trailwt ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}"));

# LVT vs LVV: /GAG/ vs /GAA/
ok($Collator->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{1161}"));
ok($hangul  ->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{1161}"));
ok($trailwt ->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{1161}"));

# LVL vs LVV: /GA/./G/ vs /GAA/
ok($Collator->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{1161}"));
ok($hangul  ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{1161}"));
ok($trailwt ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{1161}"));

# LV vs Syl(LV): /GA/ vs /[GA]/
ok($Collator->eq("\x{1100}\x{1161}", "\x{AC00}"));
ok($hangul  ->eq("\x{1100}\x{1161}", "\x{AC00}"));
ok($trailwt ->eq("\x{1100}\x{1161}", "\x{AC00}"));

# LVT vs Syl(LV)T: /GAG/ vs /[GA]G/
ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}"));
ok($hangul  ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}"));
ok($trailwt ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}"));

# LVT vs Syl(LVT): /GAG/ vs /[GAG]/
ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}"));
ok($hangul  ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}"));
ok($trailwt ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}"));

# LVTT vs Syl(LVTT): /GAGG/ vs /[GAGG]/
ok($Collator->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}"));
ok($hangul  ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}"));
ok($trailwt ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}"));

# LVTT vs Syl(LVT).T: /GAGG/ vs /[GAG]G/
ok($Collator->gt("\x{1100}\x{1161}\x{11A9}", "\x{AC01}\x{11A8}"));
ok($hangul  ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC01}\x{11A8}"));
ok($trailwt ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC01}\x{11A8}"));

# LLVT vs L.Syl(LVT): /GGAG/ vs /G[GAG]/
ok($Collator->gt("\x{1101}\x{1161}\x{11A8}", "\x{1100}\x{AC01}"));
ok($hangul  ->eq("\x{1101}\x{1161}\x{11A8}", "\x{1100}\x{AC01}"));
ok($trailwt ->eq("\x{1101}\x{1161}\x{11A8}", "\x{1100}\x{AC01}"));

#########################

# checks contraction in LVT:
# weights of these contractions may be non-sense.

my $hangcont = Unicode::Collate->new(
  level => 3,
  table => undef,
  normalization => undef,
  entry => <<'ENTRIES',
1100  ; [.1831.0020.0002] # HANGUL CHOSEONG KIYEOK
1101  ; [.1832.0020.0002] # HANGUL CHOSEONG SSANGKIYEOK
1161  ; [.188D.0020.0002] # HANGUL JUNGSEONG A
1162  ; [.188E.0020.0002] # HANGUL JUNGSEONG AE
1163  ; [.188F.0020.0002] # HANGUL JUNGSEONG YA
11A8  ; [.18CF.0020.0002] # HANGUL JONGSEONG KIYEOK
11A9  ; [.18D0.0020.0002] # HANGUL JONGSEONG SSANGKIYEOK
1161 11A9 ; [.0000.0000.0000] # A-GG <contraction>
1100 1163 11A8 ; [.1000.0020.0002] # G-YA-G <contraction> eq. U+AC39
ENTRIES
);

# contracted into VT
ok($Collator->lt("\x{1101}", "\x{1101}\x{1161}\x{11A9}"));
ok($hangcont->eq("\x{1101}", "\x{1101}\x{1161}\x{11A9}"));

# not contracted into LVT but into VT
ok($Collator->lt("\x{1100}", "\x{1100}\x{1161}\x{11A9}"));
ok($hangcont->eq("\x{1100}", "\x{1100}\x{1161}\x{11A9}"));

# contracted into LVT
ok($Collator->gt("\x{1100}\x{1163}\x{11A8}", "\x{1100}"));
ok($hangcont->lt("\x{1100}\x{1163}\x{11A8}", "\x{1100}"));

# LVTT vs Syl(LVTT): /GAGG/ vs /[GAGG]/
ok($Collator->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}"));
ok($hangcont->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}"));

# LVT vs Syl(LVT): /GYAG/ vs /[GYAG]/
ok($Collator->eq("\x{1100}\x{1163}\x{11A8}", "\x{AC39}"));
ok($hangcont->eq("\x{1100}\x{1163}\x{11A8}", "\x{AC39}"));

1;
__END__

--- NEW FILE: rearrang.t ---

BEGIN {
    unless ("A" eq pack('U', 0x41)) {
	print "1..0 # Unicode::Collate " .
	    "cannot stringify a Unicode code point\n";
	exit 0;
    }
    if ($ENV{PERL_CORE}) {
	chdir('t') if -d 't';
	@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
    }
}

use Test;
BEGIN { plan tests => 23 };

use strict;
use warnings;
use Unicode::Collate;

ok(1);

#########################

my $Collator = Unicode::Collate->new(
  table => 'keys.txt',
  normalization => undef,
  UCA_Version => 9,
);

# rearrange : 0x0E40..0x0E44, 0x0EC0..0x0EC4 (default)

##### 2..9

my %old_rearrange = $Collator->change(rearrange => undef);

ok($Collator->gt("\x{0E41}A", "\x{0E40}B"));
ok($Collator->gt("A\x{0E41}A", "A\x{0E40}B"));

$Collator->change(rearrange => [ 0x61 ]);
 # U+0061, 'a': This is a Unicode value, never a native value.

ok($Collator->gt("ab", "AB")); # as 'ba' > 'AB'

$Collator->change(%old_rearrange);

ok($Collator->lt("ab", "AB"));
ok($Collator->lt("\x{0E40}", "\x{0E41}"));
ok($Collator->lt("\x{0E40}A", "\x{0E41}B"));
ok($Collator->lt("\x{0E41}A", "\x{0E40}B"));
ok($Collator->lt("A\x{0E41}A", "A\x{0E40}B"));

##### 10..13

my $all_undef_8 = Unicode::Collate->new(
  table => undef,
  normalization => undef,
  overrideCJK => undef,
  overrideHangul => undef,
  UCA_Version => 8,
);

ok($all_undef_8->lt("\x{0E40}", "\x{0E41}"));
ok($all_undef_8->lt("\x{0E40}A", "\x{0E41}B"));
ok($all_undef_8->lt("\x{0E41}A", "\x{0E40}B"));
ok($all_undef_8->lt("A\x{0E41}A", "A\x{0E40}B"));

##### 14..18

my $no_rearrange = Unicode::Collate->new(
  table => undef,
  normalization => undef,
  rearrange => [],
  UCA_Version => 9,
);

ok($no_rearrange->lt("A", "B"));
ok($no_rearrange->lt("\x{0E40}", "\x{0E41}"));
ok($no_rearrange->lt("\x{0E40}A", "\x{0E41}B"));
ok($no_rearrange->gt("\x{0E41}A", "\x{0E40}B"));
ok($no_rearrange->gt("A\x{0E41}A", "A\x{0E40}B"));

##### 19..23

my $undef_rearrange = Unicode::Collate->new(
  table => undef,
  normalization => undef,
  rearrange => undef,
  UCA_Version => 9,
);

ok($undef_rearrange->lt("A", "B"));
ok($undef_rearrange->lt("\x{0E40}", "\x{0E41}"));
ok($undef_rearrange->lt("\x{0E40}A", "\x{0E41}B"));
ok($undef_rearrange->gt("\x{0E41}A", "\x{0E40}B"));
ok($undef_rearrange->gt("A\x{0E41}A", "A\x{0E40}B"));


--- NEW FILE: hangtype.t ---
BEGIN {
    unless ("A" eq pack('U', 0x41)) {
	print "1..0 # Unicode::Collate " .
	    "cannot stringify a Unicode code point\n";
	exit 0;
    }
    if ($ENV{PERL_CORE}) {
	chdir('t') if -d 't';
	@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
    }
}

use Test;
BEGIN { plan tests => 33 };

use strict;
use warnings;
use Unicode::Collate;

ok(1);

#########################

ok(Unicode::Collate::getHST(0x0000), '');
ok(Unicode::Collate::getHST(0x0100), '');
ok(Unicode::Collate::getHST(0x1000), '');
ok(Unicode::Collate::getHST(0x10FF), '');
ok(Unicode::Collate::getHST(0x1100), 'L');
ok(Unicode::Collate::getHST(0x1101), 'L');
ok(Unicode::Collate::getHST(0x1159), 'L');
ok(Unicode::Collate::getHST(0x115A), '');
ok(Unicode::Collate::getHST(0x115E), '');
ok(Unicode::Collate::getHST(0x115F), 'L');
ok(Unicode::Collate::getHST(0x1160), 'V');
ok(Unicode::Collate::getHST(0x1161), 'V');
ok(Unicode::Collate::getHST(0x11A0), 'V');
ok(Unicode::Collate::getHST(0x11A2), 'V');
ok(Unicode::Collate::getHST(0x11A3), '');
ok(Unicode::Collate::getHST(0x11A7), '');
ok(Unicode::Collate::getHST(0x11A8), 'T');
ok(Unicode::Collate::getHST(0x11AF), 'T');
ok(Unicode::Collate::getHST(0x11E0), 'T');
ok(Unicode::Collate::getHST(0x11F9), 'T');
ok(Unicode::Collate::getHST(0x11FA), '');
ok(Unicode::Collate::getHST(0x11FF), '');
ok(Unicode::Collate::getHST(0x3011), '');
ok(Unicode::Collate::getHST(0x11A7), '');
ok(Unicode::Collate::getHST(0xABFF), '');
ok(Unicode::Collate::getHST(0xAC00), 'LV');
ok(Unicode::Collate::getHST(0xAC01), 'LVT');
ok(Unicode::Collate::getHST(0xAC1B), 'LVT');
ok(Unicode::Collate::getHST(0xAC1C), 'LV');
ok(Unicode::Collate::getHST(0xD7A3), 'LVT');
ok(Unicode::Collate::getHST(0xD7A4), '');
ok(Unicode::Collate::getHST(0xFFFF), '');


--- NEW FILE: contract.t ---
BEGIN {
    unless ("A" eq pack('U', 0x41)) {
	print "1..0 # Unicode::Collate " .
	    "cannot stringify a Unicode code point\n";
	exit 0;
    }
    if ($ENV{PERL_CORE}) {
	chdir('t') if -d 't';
	@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
    }
}

use Test;
BEGIN { plan tests => 40 };

use strict;
use warnings;
use Unicode::Collate;

our $kjeEntry = <<'ENTRIES';
0301  ; [.0000.0032.0002.0301] # COMBINING ACUTE ACCENT
0334  ; [.0000.008B.0002.0334] # COMBINING TILDE OVERLAY
043A  ; [.0D31.0020.0002.043A] # CYRILLIC SMALL LETTER KA
041A  ; [.0D31.0020.0008.041A] # CYRILLIC CAPITAL LETTER KA
045C  ; [.0DA1.0020.0002.045C] # CYRILLIC SMALL LETTER KJE
043A 0301 ; [.0DA1.0020.0002.045C] # CYRILLIC SMALL LETTER KJE
040C  ; [.0DA1.0020.0008.040C] # CYRILLIC CAPITAL LETTER KJE
041A 0301 ; [.0DA1.0020.0008.040C] # CYRILLIC CAPITAL LETTER KJE
ENTRIES

our $aaEntry = <<'ENTRIES';
0304  ; [.0000.005A.0002.0304] # COMBINING MACRON (cc = 230)
030A  ; [.0000.0043.0002.030A] # COMBINING RING ABOVE (cc = 230)
0327  ; [.0000.0055.0002.0327] # COMBINING CEDILLA (cc = 202)
031A  ; [.0000.006B.0002.031A] # COMBINING LEFT ANGLE ABOVE (cc = 232)
0061  ; [.0A15.0020.0002.0061] # LATIN SMALL LETTER A
0041  ; [.0A15.0020.0008.0041] # LATIN CAPITAL LETTER A
007A  ; [.0C13.0020.0002.007A] # LATIN SMALL LETTER Z
005A  ; [.0C13.0020.0008.005A] # LATIN CAPITAL LETTER Z
00E5  ; [.0C25.0020.0002.00E5] # LATIN SMALL LETTER A WITH RING ABOVE; QQCM
00C5  ; [.0C25.0020.0008.00C5] # LATIN CAPITAL LETTER A WITH RING ABOVE; QQCM
0061 030A ; [.0C25.0020.0002.0061] # LATIN SMALL LETTER A WITH RING ABOVE
0041 030A ; [.0C25.0020.0008.0041] # LATIN CAPITAL LETTER A WITH RING ABOVE
ENTRIES

#########################

ok(1);

my $kjeNoN = Unicode::Collate->new(
    level => 1,
    table => undef,
    normalization => undef,
    entry => $kjeEntry,
);

ok($kjeNoN->lt("\x{043A}", "\x{043A}\x{0301}"));
ok($kjeNoN->gt("\x{045C}", "\x{043A}\x{0334}\x{0301}"));
ok($kjeNoN->eq("\x{043A}", "\x{043A}\x{0334}\x{0301}"));
ok($kjeNoN->eq("\x{045C}", "\x{043A}\x{0301}\x{0334}"));

our %sortkeys;

$sortkeys{'KAac'} = $kjeNoN->viewSortKey("\x{043A}\x{0301}");
$sortkeys{'KAta'} = $kjeNoN->viewSortKey("\x{043A}\x{0334}\x{0301}");
$sortkeys{'KAat'} = $kjeNoN->viewSortKey("\x{043A}\x{0301}\x{0334}");

eval { require Unicode::Normalize };
if (!$@) {
    my $kjeNFD = Unicode::Collate->new(
	level => 1,
	table => undef,
	entry => $kjeEntry,
    );
ok($kjeNFD->lt("\x{043A}", "\x{043A}\x{0301}"));
ok($kjeNFD->eq("\x{045C}", "\x{043A}\x{0334}\x{0301}"));
ok($kjeNFD->lt("\x{043A}", "\x{043A}\x{0334}\x{0301}"));
ok($kjeNFD->eq("\x{045C}", "\x{043A}\x{0301}\x{0334}"));

    my $aaNFD = Unicode::Collate->new(
	level => 1,
	table => undef,
	entry => $aaEntry,
    );

ok($aaNFD->lt("Z", "A\x{30A}\x{304}"));
ok($aaNFD->eq("A", "A\x{304}\x{30A}"));
ok($aaNFD->eq(pack('U', 0xE5), "A\x{30A}\x{304}"));
ok($aaNFD->eq("A\x{304}", "A\x{304}\x{30A}"));
ok($aaNFD->lt("Z", "A\x{327}\x{30A}"));
ok($aaNFD->lt("Z", "A\x{30A}\x{327}"));
ok($aaNFD->lt("Z", "A\x{31A}\x{30A}"));
ok($aaNFD->lt("Z", "A\x{30A}\x{31A}"));

    my $aaPre = Unicode::Collate->new(
	level => 1,
	normalization => "prenormalized",
	table => undef,
	entry => $aaEntry,
    );

ok($aaPre->lt("Z", "A\x{30A}\x{304}"));
ok($aaPre->eq("A", "A\x{304}\x{30A}"));
ok($aaPre->eq(pack('U', 0xE5), "A\x{30A}\x{304}"));
ok($aaPre->eq("A\x{304}", "A\x{304}\x{30A}"));
ok($aaPre->lt("Z", "A\x{327}\x{30A}"));
ok($aaPre->lt("Z", "A\x{30A}\x{327}"));
ok($aaPre->lt("Z", "A\x{31A}\x{30A}"));
ok($aaPre->lt("Z", "A\x{30A}\x{31A}"));
}
else {
  ok(1) for 1..20;
}

# again: loading Unicode::Normalize should not affect $kjeNoN.
ok($kjeNoN->lt("\x{043A}", "\x{043A}\x{0301}"));
ok($kjeNoN->gt("\x{045C}", "\x{043A}\x{0334}\x{0301}"));
ok($kjeNoN->eq("\x{043A}", "\x{043A}\x{0334}\x{0301}"));
ok($kjeNoN->eq("\x{045C}", "\x{043A}\x{0301}\x{0334}"));

ok($sortkeys{'KAac'}, $kjeNoN->viewSortKey("\x{043A}\x{0301}"));
ok($sortkeys{'KAta'}, $kjeNoN->viewSortKey("\x{043A}\x{0334}\x{0301}"));
ok($sortkeys{'KAat'}, $kjeNoN->viewSortKey("\x{043A}\x{0301}\x{0334}"));

my $aaNoN = Unicode::Collate->new(
    level => 1,
    table => undef,
    entry => $aaEntry,
    normalization => undef,
);

ok($aaNoN->lt("Z", "A\x{30A}\x{304}"));
ok($aaNoN->eq("A", "A\x{304}\x{30A}"));
ok($aaNoN->eq(pack('U', 0xE5), "A\x{30A}\x{304}"));
ok($aaNoN->eq("A\x{304}", "A\x{304}\x{30A}"));
ok($aaNoN->eq("A", "A\x{327}\x{30A}"));
ok($aaNoN->lt("Z", "A\x{30A}\x{327}"));
ok($aaNoN->eq("A", "A\x{31A}\x{30A}"));
ok($aaNoN->lt("Z", "A\x{30A}\x{31A}"));


--- NEW FILE: version.t ---

BEGIN {
    unless ("A" eq pack('U', 0x41)) {
	print "1..0 # Unicode::Collate " .
	    "cannot stringify a Unicode code point\n";
	exit 0;
    }
    if ($ENV{PERL_CORE}) {
	chdir('t') if -d 't';
	@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
    }
}

use Test;
BEGIN { plan tests => 17 };

use strict;
use warnings;
use Unicode::Collate;

ok(1);

#########################

# Fix me when UCA and/or keys.txt is upgraded.
my $UCA_Version = "14";
my $Base_Unicode_Version = "4.1.0";
my $Key_Version = "3.1.1";

ok(Unicode::Collate::UCA_Version, $UCA_Version);
ok(Unicode::Collate->UCA_Version, $UCA_Version);
ok(Unicode::Collate::Base_Unicode_Version, $Base_Unicode_Version);
ok(Unicode::Collate->Base_Unicode_Version, $Base_Unicode_Version);

my $Collator = Unicode::Collate->new(
  table => 'keys.txt',
  normalization => undef,
);

ok($Collator->UCA_Version,   $UCA_Version);
ok($Collator->UCA_Version(), $UCA_Version);
ok($Collator->Base_Unicode_Version,   $Base_Unicode_Version);
ok($Collator->Base_Unicode_Version(), $Base_Unicode_Version);
ok($Collator->version,   $Key_Version);
ok($Collator->version(), $Key_Version);

my $UndefTable = Unicode::Collate->new(
  table => undef,
  normalization => undef,
);

ok($UndefTable->UCA_Version,   $UCA_Version);
ok($UndefTable->UCA_Version(), $UCA_Version);
ok($UndefTable->Base_Unicode_Version,   $Base_Unicode_Version);
ok($UndefTable->Base_Unicode_Version(), $Base_Unicode_Version);
ok($UndefTable->version,   "unknown");
ok($UndefTable->version(), "unknown");


--- NEW FILE: trailwt.t ---
BEGIN {
    unless ("A" eq pack('U', 0x41)) {
	print "1..0 # Unicode::Collate " .
	    "cannot stringify a Unicode code point\n";
	exit 0;
    }
    if ($ENV{PERL_CORE}) {
	chdir('t') if -d 't';
	@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
    }
}

use Test;
BEGIN { plan tests => 58 };

use strict;
use warnings;
use Unicode::Collate;

#########################

ok(1);

# a standard collator (3.1.1)
my $Collator = Unicode::Collate->new(
  level => 1,
  table => 'keys.txt',
  normalization => undef,

  entry => <<'ENTRIES',
326E  ; [.1831.0020.0006.326E][.188D.0020.0006.326E] # c.h.s. GA
326F  ; [.1833.0020.0006.326F][.188D.0020.0006.326F] # c.h.s. NA
3270  ; [.1834.0020.0006.3270][.188D.0020.0006.3270] # c.h.s. DA
3271  ; [.1836.0020.0006.3271][.188D.0020.0006.3271] # c.h.s. RA
3272  ; [.1837.0020.0006.3272][.188D.0020.0006.3272] # c.h.s. MA
3273  ; [.1838.0020.0006.3273][.188D.0020.0006.3273] # c.h.s. BA
3274  ; [.183A.0020.0006.3274][.188D.0020.0006.3274] # c.h.s. SA
3275  ; [.183C.0020.0006.3275][.188D.0020.0006.3275] # c.h.s. A
3276  ; [.183D.0020.0006.3276][.188D.0020.0006.3276] # c.h.s. JA
3277  ; [.183F.0020.0006.3277][.188D.0020.0006.3277] # c.h.s. CA
3278  ; [.1840.0020.0006.3278][.188D.0020.0006.3278] # c.h.s. KA
3279  ; [.1841.0020.0006.3279][.188D.0020.0006.3279] # c.h.s. TA
327A  ; [.1842.0020.0006.327A][.188D.0020.0006.327A] # c.h.s. PA
327B  ; [.1843.0020.0006.327B][.188D.0020.0006.327B] # c.h.s. HA
ENTRIES
);

my $hangul = Unicode::Collate->new(
  level => 1,
  table => 'keys.txt',
  normalization => undef,
  hangul_terminator => 16,

  entry => <<'ENTRIES',
326E  ; [.1831.0020.0006.326E][.188D.0020.0006.326E] # c.h.s. GA
326F  ; [.1833.0020.0006.326F][.188D.0020.0006.326F] # c.h.s. NA
3270  ; [.1834.0020.0006.3270][.188D.0020.0006.3270] # c.h.s. DA
3271  ; [.1836.0020.0006.3271][.188D.0020.0006.3271] # c.h.s. RA
3272  ; [.1837.0020.0006.3272][.188D.0020.0006.3272] # c.h.s. MA
3273  ; [.1838.0020.0006.3273][.188D.0020.0006.3273] # c.h.s. BA
3274  ; [.183A.0020.0006.3274][.188D.0020.0006.3274] # c.h.s. SA
3275  ; [.183C.0020.0006.3275][.188D.0020.0006.3275] # c.h.s. A
3276  ; [.183D.0020.0006.3276][.188D.0020.0006.3276] # c.h.s. JA
3277  ; [.183F.0020.0006.3277][.188D.0020.0006.3277] # c.h.s. CA
3278  ; [.1840.0020.0006.3278][.188D.0020.0006.3278] # c.h.s. KA
3279  ; [.1841.0020.0006.3279][.188D.0020.0006.3279] # c.h.s. TA
327A  ; [.1842.0020.0006.327A][.188D.0020.0006.327A] # c.h.s. PA
327B  ; [.1843.0020.0006.327B][.188D.0020.0006.327B] # c.h.s. HA
ENTRIES
);

ok(ref $hangul, "Unicode::Collate");

#########################

# LVX vs LVV: /GAA/ vs /GA/.latinA
ok($Collator->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}A"));
ok($hangul  ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}A"));

# LVX vs LVV: /GAA/ vs /GA/.hiraganaA
ok($Collator->lt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{3042}"));
ok($hangul  ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{3042}"));

# LVX vs LVV: /GAA/ vs /GA/.hanja
ok($Collator->lt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{4E00}"));
ok($hangul  ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{4E00}"));

# LVL vs LVT: /GA/./G/ vs /GAG/
ok($Collator->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{11A8}"));
ok($hangul  ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{11A8}"));

# LVT vs LVX: /GAG/ vs /GA/.latinA
ok($Collator->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}A"));
ok($hangul  ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}A"));

# LVT vs LVX: /GAG/ vs /GA/.hiraganaA
ok($Collator->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{3042}"));
ok($hangul  ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{3042}"));

# LVT vs LVX: /GAG/ vs /GA/.hanja
ok($Collator->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}"));
ok($hangul  ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}"));

# LV vs Syl(LV): /GA/ vs /[GA]/
ok($Collator->eq("\x{1100}\x{1161}", "\x{AC00}"));
ok($hangul  ->eq("\x{1100}\x{1161}", "\x{AC00}"));

# LVT vs Syl(LV)T: /GAG/ vs /[GA]G/
ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}"));
ok($hangul  ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}"));

# LVT vs Syl(LVT): /GAG/ vs /[GAG]/
ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}"));
ok($hangul  ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}"));

# LVTT vs Syl(LVTT): /GAGG/ vs /[GAGG]/
ok($Collator->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}"));
ok($hangul  ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}"));

# Syl(LVT) vs : /GAG/ vs /[GAG]/
ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}"));
ok($hangul  ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}"));

#########################

my $hangcirc = Unicode::Collate->new(
  level => 1,
  table => 'keys.txt',
  normalization => undef,
  hangul_terminator => 16,

  entry => <<'ENTRIES',
326E  ; [.1831.0020.0006.326E][.188D.0020.0006.326E][.10.0.0.0] # c.h.s. GA
326F  ; [.1833.0020.0006.326F][.188D.0020.0006.326F][.10.0.0.0] # c.h.s. NA
3270  ; [.1834.0020.0006.3270][.188D.0020.0006.3270][.10.0.0.0] # c.h.s. DA
3271  ; [.1836.0020.0006.3271][.188D.0020.0006.3271][.10.0.0.0] # c.h.s. RA
3272  ; [.1837.0020.0006.3272][.188D.0020.0006.3272][.10.0.0.0] # c.h.s. MA
3273  ; [.1838.0020.0006.3273][.188D.0020.0006.3273][.10.0.0.0] # c.h.s. BA
3274  ; [.183A.0020.0006.3274][.188D.0020.0006.3274][.10.0.0.0] # c.h.s. SA
3275  ; [.183C.0020.0006.3275][.188D.0020.0006.3275][.10.0.0.0] # c.h.s. A
3276  ; [.183D.0020.0006.3276][.188D.0020.0006.3276][.10.0.0.0] # c.h.s. JA
3277  ; [.183F.0020.0006.3277][.188D.0020.0006.3277][.10.0.0.0] # c.h.s. CA
3278  ; [.1840.0020.0006.3278][.188D.0020.0006.3278][.10.0.0.0] # c.h.s. KA
3279  ; [.1841.0020.0006.3279][.188D.0020.0006.3279][.10.0.0.0] # c.h.s. TA
327A  ; [.1842.0020.0006.327A][.188D.0020.0006.327A][.10.0.0.0] # c.h.s. PA
327B  ; [.1843.0020.0006.327B][.188D.0020.0006.327B][.10.0.0.0] # c.h.s. HA
ENTRIES
);

# LV vs Circled Syl(LV): /GA/ vs /(GA)/
ok($Collator->eq("\x{1100}\x{1161}", "\x{326E}"));
ok($hangul  ->gt("\x{1100}\x{1161}", "\x{326E}"));
ok($hangcirc->eq("\x{1100}\x{1161}", "\x{326E}"));

# LV vs Circled Syl(LV): followed by latin A
ok($Collator->eq("\x{1100}\x{1161}A", "\x{326E}A"));
ok($hangul  ->lt("\x{1100}\x{1161}A", "\x{326E}A"));
ok($hangcirc->eq("\x{1100}\x{1161}A", "\x{326E}A"));

# LV vs Circled Syl(LV): followed by hiragana A
ok($Collator->eq("\x{1100}\x{1161}\x{3042}", "\x{326E}\x{3042}"));
ok($hangul  ->lt("\x{1100}\x{1161}\x{3042}", "\x{326E}\x{3042}"));
ok($hangcirc->eq("\x{1100}\x{1161}\x{3042}", "\x{326E}\x{3042}"));

# LVT vs LVX: /GAG/ vs /GA/.hanja
ok($Collator->eq("\x{1100}\x{1161}\x{4E00}", "\x{326E}\x{4E00}"));
ok($hangul  ->lt("\x{1100}\x{1161}\x{4E00}", "\x{326E}\x{4E00}"));
ok($hangcirc->eq("\x{1100}\x{1161}\x{4E00}", "\x{326E}\x{4E00}"));

#########################

# checks contraction in LVT:
# weights of these contractions may be non-sense.

my $hangcont = Unicode::Collate->new(
  level => 1,
  table => 'keys.txt',
  normalization => undef,
  hangul_terminator => 16,

  entry => <<'ENTRIES',
1100 1161 ; [.1831.0020.0002.1100][.188D.0020.0002.1161] # KIYEOK+A
1161 11A8 ; [.188D.0020.0002.1161][.18CF.0020.0002.11A8] # A+KIYEOK
ENTRIES
);

# cont<LV> vs Syl(LV): /<GA>/ vs /[GA]/
ok($Collator->eq("\x{1100}\x{1161}", "\x{AC00}"));
ok($hangcont->eq("\x{1100}\x{1161}", "\x{AC00}"));

# cont<LV>.T vs Syl(LV).T: /<GA>G/ vs /[GA]G/
ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}"));
ok($hangcont->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}"));

# cont<LV>.T vs Syl(LVT): /<GA>G/ vs /[GAG]/
ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}"));
ok($hangcont->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}"));

# L.cont<VT> vs Syl(LV).T: /D<AG>/ vs /[DA]G/
ok($Collator->eq("\x{1103}\x{1161}\x{11A8}", "\x{B2E4}\x{11A8}"));
ok($hangcont->eq("\x{1103}\x{1161}\x{11A8}", "\x{B2E4}\x{11A8}"));

# L.cont<VT> vs Syl(LVT): /D<AG>/ vs /[DAG]/
ok($Collator->eq("\x{1103}\x{1161}\x{11A8}", "\x{B2E5}"));
ok($hangcont->eq("\x{1103}\x{1161}\x{11A8}", "\x{B2E5}"));

#####

$Collator->change(hangul_terminator => 16);

ok($Collator->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}"));
ok($Collator->gt("\x{1100}\x{1161}", "\x{326E}"));
ok($Collator->lt("\x{1100}\x{1161}A", "\x{326E}A"));
ok($Collator->lt("\x{1100}\x{1161}\x{3042}", "\x{326E}\x{3042}"));
ok($Collator->lt("\x{1100}\x{1161}\x{4E00}", "\x{326E}\x{4E00}"));

$Collator->change(hangul_terminator => 0);

ok($Collator->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}"));
ok($Collator->eq("\x{1100}\x{1161}", "\x{326E}"));
ok($Collator->eq("\x{1100}\x{1161}A", "\x{326E}A"));
ok($Collator->eq("\x{1100}\x{1161}\x{3042}", "\x{326E}\x{3042}"));
ok($Collator->eq("\x{1100}\x{1161}\x{4E00}", "\x{326E}\x{4E00}"));

1;
__END__

--- NEW FILE: view.t ---

BEGIN {
    unless ("A" eq pack('U', 0x41)) {
	print "1..0 # Unicode::Collate " .
	    "cannot stringify a Unicode code point\n";
	exit 0;
    }
    if ($ENV{PERL_CORE}) {
	chdir('t') if -d 't';
	@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
    }
}

use Test;
BEGIN { plan tests => 53 };

use strict;
use warnings;
use Unicode::Collate;

#########################

ok(1);

my $Collator = Unicode::Collate->new(
  table => 'keys.txt',
  normalization => undef,
);

##############

ok($Collator->viewSortKey(""), "[| | |]");

ok($Collator->viewSortKey("A"), "[0A15 | 0020 | 0008 | FFFF]");

ok($Collator->viewSortKey("ABC"),
    "[0A15 0A29 0A3D | 0020 0020 0020 | 0008 0008 0008 | FFFF FFFF FFFF]");

ok($Collator->viewSortKey("(12)"),
    "[0A0C 0A0D | 0020 0020 | 0002 0002 | 027A FFFF FFFF 027B]");

ok($Collator->viewSortKey("!\x{300}"), "[| | | 024B]");

ok($Collator->viewSortKey("\x{300}"), "[| 0035 | 0002 | FFFF]");

$Collator->change(level => 3);
ok($Collator->viewSortKey("A"), "[0A15 | 0020 | 0008 |]");

$Collator->change(level => 2);
ok($Collator->viewSortKey("A"), "[0A15 | 0020 | |]");

$Collator->change(level => 1);
ok($Collator->viewSortKey("A"), "[0A15 | | |]");

### Version 8

$Collator->change(level => 4, UCA_Version => 8);

ok($Collator->viewSortKey(""), "[|||]");

ok($Collator->viewSortKey("A"), "[0A15|0020|0008|FFFF]");

ok($Collator->viewSortKey("ABC"),
    "[0A15 0A29 0A3D|0020 0020 0020|0008 0008 0008|FFFF FFFF FFFF]");

ok($Collator->viewSortKey("(12)"),
    "[0A0C 0A0D|0020 0020|0002 0002|027A FFFF FFFF 027B]");

ok($Collator->viewSortKey("!\x{300}"), "[|0035|0002|024B FFFF]");

ok($Collator->viewSortKey("\x{300}"), "[|0035|0002|FFFF]");

$Collator->change(level => 3);
ok($Collator->viewSortKey("A"), "[0A15|0020|0008|]");

$Collator->change(level => 2);
ok($Collator->viewSortKey("A"), "[0A15|0020||]");

$Collator->change(level => 1);
ok($Collator->viewSortKey("A"), "[0A15|||]");

# Version 9

$Collator->change(level => 3, UCA_Version => 9);
ok($Collator->viewSortKey("A\x{300}z\x{301}"),
    "[0A15 0C13 | 0020 0035 0020 0032 | 0008 0002 0002 0002 |]");

$Collator->change(backwards => 1);
ok($Collator->viewSortKey("A\x{300}z\x{301}"),
    "[0C13 0A15 | 0020 0035 0020 0032 | 0008 0002 0002 0002 |]");

$Collator->change(backwards => 2);
ok($Collator->viewSortKey("A\x{300}z\x{301}"),
    "[0A15 0C13 | 0032 0020 0035 0020 | 0008 0002 0002 0002 |]");

$Collator->change(backwards => [1,3]);
ok($Collator->viewSortKey("A\x{300}z\x{301}"),
    "[0C13 0A15 | 0020 0035 0020 0032 | 0002 0002 0002 0008 |]");

$Collator->change(backwards => [2]);
ok($Collator->viewSortKey("\x{300}\x{301}\x{302}\x{303}"),
    "[| 004E 003C 0032 0035 | 0002 0002 0002 0002 |]");

$Collator->change(backwards => []);
ok($Collator->viewSortKey("A\x{300}z\x{301}"),
    "[0A15 0C13 | 0020 0035 0020 0032 | 0008 0002 0002 0002 |]");

$Collator->change(level => 4);

# Variable

our %origVar = $Collator->change(variable => 'Blanked');
ok($Collator->viewSortKey("1+2"),
    '[0A0C 0A0D | 0020 0020 | 0002 0002 | 0031 002B 0032]');

ok($Collator->viewSortKey("?\x{300}!\x{301}\x{315}."),
    '[| | | 003F 0021 002E]');

ok($Collator->viewSortKey("?!."), '[| | | 003F 0021 002E]');

$Collator->change(variable => 'Non-ignorable');
ok($Collator->viewSortKey("1+2"),
    '[0A0C 039F 0A0D | 0020 0020 0020 | 0002 0002 0002 | 0031 002B 0032]');

ok($Collator->viewSortKey("?\x{300}!"),
    '[024E 024B | 0020 0035 0020 | 0002 0002 0002 | 003F 0300 0021]');

ok($Collator->viewSortKey("?!."),
    '[024E 024B 0255 | 0020 0020 0020 | 0002 0002 0002 | 003F 0021 002E]');

$Collator->change(variable => 'Shifted');
ok($Collator->viewSortKey("1+2"),
    '[0A0C 0A0D | 0020 0020 | 0002 0002 | FFFF 039F FFFF]');

ok($Collator->viewSortKey("?\x{300}!\x{301}\x{315}."),
    '[| | | 024E 024B 0255]');

ok($Collator->viewSortKey("?!."), '[| | | 024E 024B 0255]');

$Collator->change(variable => 'Shift-Trimmed');
ok($Collator->viewSortKey("1+2"),
    '[0A0C 0A0D | 0020 0020 | 0002 0002 | 039F]');

ok($Collator->viewSortKey("?\x{300}!\x{301}\x{315}."),
    '[| | | 024E 024B 0255]');

ok($Collator->viewSortKey("?!."), '[| | | 024E 024B 0255]');

$Collator->change(%origVar);

#####

# Level 3 weight

ok($Collator->viewSortKey("a\x{3042}"),
    '[0A15 1921 | 0020 0020 | 0002 000E | FFFF FFFF]');

ok($Collator->viewSortKey("A\x{30A2}"),
    '[0A15 1921 | 0020 0020 | 0008 0011 | FFFF FFFF]');

$Collator->change(upper_before_lower => 1);

ok($Collator->viewSortKey("a\x{3042}"),
    '[0A15 1921 | 0020 0020 | 0008 000E | FFFF FFFF]');

ok($Collator->viewSortKey("A\x{30A2}"),
    '[0A15 1921 | 0020 0020 | 0002 0011 | FFFF FFFF]');

$Collator->change(katakana_before_hiragana => 1);

ok($Collator->viewSortKey("a\x{3042}"),
    '[0A15 1921 | 0020 0020 | 0008 0013 | FFFF FFFF]');
ok($Collator->viewSortKey("A\x{30A2}"),
    '[0A15 1921 | 0020 0020 | 0002 000F | FFFF FFFF]');

$Collator->change(upper_before_lower => 0);

ok($Collator->viewSortKey("a\x{3042}"),
    '[0A15 1921 | 0020 0020 | 0002 0013 | FFFF FFFF]');

ok($Collator->viewSortKey("A\x{30A2}"),
    '[0A15 1921 | 0020 0020 | 0008 000F | FFFF FFFF]');

$Collator->change(katakana_before_hiragana => 0);

ok($Collator->viewSortKey("a\x{3042}"),
    '[0A15 1921 | 0020 0020 | 0002 000E | FFFF FFFF]');

ok($Collator->viewSortKey("A\x{30A2}"),
    '[0A15 1921 | 0020 0020 | 0008 0011 | FFFF FFFF]');

#####

our $el = Unicode::Collate->new(
  entry => <<'ENTRY',
006C ; [.0B03.0020.0002.006C] # LATIN SMALL LETTER L
FF4C ; [.0B03.0020.0003.FF4C] # FULLWIDTH LATIN SMALL LETTER L; QQK
217C ; [.0B03.0020.0004.217C] # SMALL ROMAN NUMERAL FIFTY; QQK
2113 ; [.0B03.0020.0005.2113] # SCRIPT SMALL L; QQK
24DB ; [.0B03.0020.0006.24DB] # CIRCLED LATIN SMALL LETTER L; QQK
004C ; [.0B03.0020.0008.004C] # LATIN CAPITAL LETTER L
FF2C ; [.0B03.0020.0009.FF2C] # FULLWIDTH LATIN CAPITAL LETTER L; QQK
216C ; [.0B03.0020.000A.216C] # ROMAN NUMERAL FIFTY; QQK
2112 ; [.0B03.0020.000B.2112] # SCRIPT CAPITAL L; QQK
24C1 ; [.0B03.0020.000C.24C1] # CIRCLED LATIN CAPITAL LETTER L; QQK
ENTRY
  table => undef,
  normalization => undef,
);

our $el12 = '0B03 0B03 0B03 0B03 0B03 | 0020 0020 0020 0020 0020';

ok($el->viewSortKey("l\x{FF4C}\x{217C}\x{2113}\x{24DB}"),
    "[$el12 | 0002 0003 0004 0005 0006 | FFFF FFFF FFFF FFFF FFFF]");

ok($el->viewSortKey("L\x{FF2C}\x{216C}\x{2112}\x{24C1}"),
    "[$el12 | 0008 0009 000A 000B 000C | FFFF FFFF FFFF FFFF FFFF]");

$el->change(upper_before_lower => 1);

ok($el->viewSortKey("l\x{FF4C}\x{217C}\x{2113}\x{24DB}"),
    "[$el12 | 0008 0009 000A 000B 000C | FFFF FFFF FFFF FFFF FFFF]");

ok($el->viewSortKey("L\x{FF2C}\x{216C}\x{2112}\x{24C1}"),
    "[$el12 | 0002 0003 0004 0005 0006 | FFFF FFFF FFFF FFFF FFFF]");

$el->change(upper_before_lower => 0);

ok($el->viewSortKey("l\x{FF4C}\x{217C}\x{2113}\x{24DB}"),
    "[$el12 | 0002 0003 0004 0005 0006 | FFFF FFFF FFFF FFFF FFFF]");

ok($el->viewSortKey("L\x{FF2C}\x{216C}\x{2112}\x{24C1}"),
    "[$el12 | 0008 0009 000A 000B 000C | FFFF FFFF FFFF FFFF FFFF]");

#####


--- NEW FILE: illegal.t ---

BEGIN {
    unless ("A" eq pack('U', 0x41)) {
	print "1..0 # Unicode::Collate " .
	    "cannot stringify a Unicode code point\n";
	exit 0;
    }
    if ($ENV{PERL_CORE}) {
	chdir('t') if -d 't';
	@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
    }
}

use Test;
use strict;
use warnings;

BEGIN {
    use Unicode::Collate;

    unless (exists &Unicode::Collate::bootstrap or 5.008 <= $]) {
	print "1..0 # skipped: XSUB, or Perl 5.8.0 or later".
		" needed for this test\n";
	print $@;
	exit;
    }
}

BEGIN { plan tests => 40 };

ok(1);

#########################

no warnings 'utf8';

# NULL is tailorable but illegal code points are not.
# illegal code points should be always ingored
# (cf. UCA, 7.1.1 Illegal code points).

my $illeg = Unicode::Collate->new(
  entry => <<'ENTRIES',
0000  ; [.0020.0000.0000.0000] # [0000] NULL
0001  ; [.0021.0000.0000.0001] # [0001] START OF HEADING
FFFE  ; [.0022.0000.0000.FFFE] # <noncharacter-FFFE> (invalid)
FFFF  ; [.0023.0000.0000.FFFF] # <noncharacter-FFFF> (invalid)
D800  ; [.0024.0000.0000.D800] # <surrogate-D800> (invalid)
DFFF  ; [.0025.0000.0000.DFFF] # <surrogate-DFFF> (invalid)
FDD0  ; [.0026.0000.0000.FDD0] # <noncharacter-FDD0> (invalid)
FDEF  ; [.0027.0000.0000.FDEF] # <noncharacter-FDEF> (invalid)
0002  ; [.0030.0000.0000.0002] # [0002] START OF TEXT
10FFFF; [.0040.0000.0000.10FFFF] # <noncharacter-10FFFF> (invalid)
110000; [.0041.0000.0000.110000] # <out-of-range 110000> (invalid)
0041  ; [.1000.0020.0008.0041] # latin A
0041 0000 ; [.1100.0020.0008.0041] # latin A + NULL
0041 FFFF ; [.1200.0020.0008.0041] # latin A + FFFF (invalid)
ENTRIES
  level => 1,
  table => undef,
  normalization => undef,
);

# 2..12
ok($illeg->lt("", "\x00"));
ok($illeg->lt("", "\x01"));
ok($illeg->eq("", "\x{FFFE}"));
ok($illeg->eq("", "\x{FFFF}"));
ok($illeg->eq("", "\x{D800}"));
ok($illeg->eq("", "\x{DFFF}"));
ok($illeg->eq("", "\x{FDD0}"));
ok($illeg->eq("", "\x{FDEF}"));
ok($illeg->lt("", "\x02"));
ok($illeg->eq("", "\x{10FFFF}"));
ok($illeg->eq("", "\x{110000}"));

# 13..22
ok($illeg->lt("\x00", "\x01"));
ok($illeg->lt("\x01", "\x02"));
ok($illeg->ne("\0", "\x{D800}"));
ok($illeg->ne("\0", "\x{DFFF}"));
ok($illeg->ne("\0", "\x{FDD0}"));
ok($illeg->ne("\0", "\x{FDEF}"));
ok($illeg->ne("\0", "\x{FFFE}"));
ok($illeg->ne("\0", "\x{FFFF}"));
ok($illeg->ne("\0", "\x{10FFFF}"));
ok($illeg->ne("\0", "\x{110000}"));

# 23..26
ok($illeg->eq("A",   "A\x{FFFF}"));
ok($illeg->gt("A\0", "A\x{FFFF}"));
ok($illeg->lt("A",  "A\0"));
ok($illeg->lt("AA", "A\0"));

##################

my($match, $str, $sub, $ret);

my $Collator = Unicode::Collate->new(
  table => 'keys.txt',
  level => 1,
  normalization => undef,
);

$sub = "pe";


$str = "Pe\x{300}\x{301}rl";
$ret = "Pe\x{300}\x{301}";
($match) = $Collator->match($str, $sub);
ok($match, $ret);

$str = "Pe\x{300}\0\0\x{301}rl";
$ret = "Pe\x{300}\0\0\x{301}";
($match) = $Collator->match($str, $sub);
ok($match, $ret);

$str = "Pe\x{DA00}\x{301}\x{DFFF}rl";
$ret = "Pe\x{DA00}\x{301}\x{DFFF}";
($match) = $Collator->match($str, $sub);
ok($match, $ret);

$str = "Pe\x{FFFF}\x{301}rl";
$ret = "Pe\x{FFFF}\x{301}";
($match) = $Collator->match($str, $sub);
ok($match, $ret);

$str = "Pe\x{110000}\x{301}rl";
$ret = "Pe\x{110000}\x{301}";
($match) = $Collator->match($str, $sub);
ok($match, $ret);

$str = "Pe\x{300}\x{d801}\x{301}rl";
$ret = "Pe\x{300}\x{d801}\x{301}";
($match) = $Collator->match($str, $sub);
ok($match, $ret);

$str = "Pe\x{300}\x{ffff}\x{301}rl";
$ret = "Pe\x{300}\x{ffff}\x{301}";
($match) = $Collator->match($str, $sub);
ok($match, $ret);

$str = "Pe\x{300}\x{110000}\x{301}rl";
$ret = "Pe\x{300}\x{110000}\x{301}";
($match) = $Collator->match($str, $sub);
ok($match, $ret);

$str = "Pe\x{D9ab}\x{DFFF}rl";
$ret = "Pe\x{D9ab}\x{DFFF}";
($match) = $Collator->match($str, $sub);
ok($match, $ret);

$str = "Pe\x{FFFF}rl";
$ret = "Pe\x{FFFF}";
($match) = $Collator->match($str, $sub);
ok($match, $ret);

$str = "Pe\x{110000}rl";
$ret = "Pe\x{110000}";
($match) = $Collator->match($str, $sub);
ok($match, $ret);

$str = "Pe\x{300}\x{D800}\x{DFFF}rl";
$ret = "Pe\x{300}\x{D800}\x{DFFF}";
($match) = $Collator->match($str, $sub);
ok($match, $ret);

$str = "Pe\x{300}\x{FFFF}rl";
$ret = "Pe\x{300}\x{FFFF}";
($match) = $Collator->match($str, $sub);
ok($match, $ret);

$str = "Pe\x{300}\x{110000}rl";
$ret = "Pe\x{300}\x{110000}";
($match) = $Collator->match($str, $sub);
ok($match, $ret);



--- NEW FILE: index.t ---

BEGIN {
    unless ("A" eq pack('U', 0x41)) {
	print "1..0 # Unicode::Collate " .
	    "cannot stringify a Unicode code point\n";
	exit 0;
    }
    if ($ENV{PERL_CORE}) {
	chdir('t') if -d 't';
	@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
    }
}

use Test;
BEGIN { plan tests => 65 };

use strict;
use warnings;
use Unicode::Collate;

our $IsEBCDIC = ord("A") != 0x41;

#########################

ok(1);

my $Collator = Unicode::Collate->new(
  table => 'keys.txt',
  normalization => undef,
);

##############

my %old_level = $Collator->change(level => 2);

my $str;

my $orig = "This is a Perl book.";
my $sub = "PERL";
my $rep = "camel";
my $ret = "This is a camel book.";

$str = $orig;
if (my($pos,$len) = $Collator->index($str, $sub)) {
  substr($str, $pos, $len, $rep);
}

ok($str, $ret);

$Collator->change(%old_level);

$str = $orig;
if (my($pos,$len) = $Collator->index($str, $sub)) {
  substr($str, $pos, $len, $rep);
}

ok($str, $orig);

##############

my $match;

$Collator->change(level => 1);

$str = "Pe\x{300}rl";
$sub = "pe";
$ret = "Pe\x{300}";
$match = undef;
if (my($pos, $len) = $Collator->index($str, $sub)) {
    $match = substr($str, $pos, $len);
}
ok($match, $ret);

$str = "P\x{300}e\x{300}\x{301}\x{303}rl";
$sub = "pE";
$ret = "P\x{300}e\x{300}\x{301}\x{303}";
$match = undef;
if (my($pos, $len) = $Collator->index($str, $sub)) {
    $match = substr($str, $pos, $len);
}
ok($match, $ret);

$Collator->change(level => 2);

$str = "Pe\x{300}rl";
$sub = "pe";
$ret = undef;
$match = undef;
if (my($pos, $len) = $Collator->index($str, $sub)) {
    $match = substr($str, $pos, $len);
}
ok($match, $ret);

$str = "P\x{300}e\x{300}\x{301}\x{303}rl";
$sub = "pE";
$ret = undef;
$match = undef;
if (my($pos, $len) = $Collator->index($str, $sub)) {
    $match = substr($str, $pos, $len);
}
ok($match, $ret);

$str = "Pe\x{300}rl";
$sub = "pe\x{300}";
$ret = "Pe\x{300}";
$match = undef;
if (my($pos, $len) = $Collator->index($str, $sub)) {
    $match = substr($str, $pos, $len);
}
ok($match, $ret);

$str = "P\x{300}e\x{300}\x{301}\x{303}rl";
$sub = "p\x{300}E\x{300}\x{301}\x{303}";
$ret = "P\x{300}e\x{300}\x{301}\x{303}";
$match = undef;
if (my($pos, $len) = $Collator->index($str, $sub)) {
    $match = substr($str, $pos, $len);
}
ok($match, $ret);

##############

$Collator->change(level => 1);

$str = $IsEBCDIC
    ? "Ich mu\x{0059} studieren Perl."
    : "Ich mu\x{00DF} studieren Perl.";
$sub = $IsEBCDIC
    ? "m\x{00DC}ss"
    : "m\x{00FC}ss";
$ret = $IsEBCDIC
    ? "mu\x{0059}"
    : "mu\x{00DF}";
$match = undef;
if (my($pos, $len) = $Collator->index($str, $sub)) {
    $match = substr($str, $pos, $len);
}
ok($match, $ret);

$Collator->change(%old_level);

$match = undef;
if (my($pos, $len) = $Collator->index($str, $sub)) {
    $match = substr($str, $pos, $len);
}
ok($match, undef);

$match = undef;
if (my($pos,$len) = $Collator->index("", "")) {
    $match = substr("", $pos, $len);
}
ok($match, "");

$match = undef;
if (my($pos,$len) = $Collator->index("", "abc")) {
    $match = substr("", $pos, $len);
}
ok($match, undef);

##############

$Collator->change(level => 1);

$str = "\0\cA\0\cAe\0\x{300}\cA\x{301}\cB\x{302}\0 \0\cA";
$sub = "e";
$ret = "e\0\x{300}\cA\x{301}\cB\x{302}\0";
$match = undef;
if (my($pos, $len) = $Collator->index($str, $sub)) {
    $match = substr($str, $pos, $len);
}
ok($match, $ret);

$Collator->change(level => 1);

$str = "\0\cA\0\cAe\0\cA\x{300}\0\cAe";
$sub = "e";
$ret = "e\0\cA\x{300}\0\cA";
$match = undef;
if (my($pos, $len) = $Collator->index($str, $sub)) {
    $match = substr($str, $pos, $len);
}
ok($match, $ret);


$Collator->change(%old_level);

$str = "e\x{300}";
$sub = "e";
$ret = undef;
$match = undef;
if (my($pos, $len) = $Collator->index($str, $sub)) {
    $match = substr($str, $pos, $len);
}
ok($match, $ret);

##############

$Collator->change(level => 1);

$str = "The Perl is a language, and the perl is an interpreter.";
$sub = "PERL";

$match = undef;
if (my($pos, $len) = $Collator->index($str, $sub, -40)) {
    $match = substr($str, $pos, $len);
}
ok($match, "Perl");

$match = undef;
if (my($pos, $len) = $Collator->index($str, $sub, 4)) {
    $match = substr($str, $pos, $len);
}
ok($match, "Perl");

$match = undef;
if (my($pos, $len) = $Collator->index($str, $sub, 5)) {
    $match = substr($str, $pos, $len);
}
ok($match, "perl");

$match = undef;
if (my($pos, $len) = $Collator->index($str, $sub, 32)) {
    $match = substr($str, $pos, $len);
}
ok($match, "perl");

$match = undef;
if (my($pos, $len) = $Collator->index($str, $sub, 33)) {
    $match = substr($str, $pos, $len);
}
ok($match, undef);

$match = undef;
if (my($pos, $len) = $Collator->index($str, $sub, 100)) {
    $match = substr($str, $pos, $len);
}
ok($match, undef);

$Collator->change(%old_level);

##############

my @ret;

$Collator->change(level => 1);

$ret = $Collator->match("P\cBe\x{300}\cBrl and PERL", "pe");
ok($ret);
ok($$ret eq "P\cBe\x{300}\cB");

@ret = $Collator->match("P\cBe\x{300}\cBrl and PERL", "pe");
ok($ret[0], "P\cBe\x{300}\cB");

$str = $IsEBCDIC ? "mu\x{0059}" : "mu\x{00DF}";
$sub = $IsEBCDIC ? "m\x{00DC}ss" : "m\x{00FC}ss";

($ret) = $Collator->match($str, $sub);
ok($ret, $str);

$str = $IsEBCDIC ? "mu\x{0059}" : "mu\x{00DF}";
$sub = $IsEBCDIC ? "m\x{00DC}s" : "m\x{00FC}s";

($ret) = $Collator->match($str, $sub);
ok($ret, undef);

$ret = join ':', $Collator->gmatch("P\cBe\x{300}\cBrl, perl, and PERL", "pe");
ok($ret eq "P\cBe\x{300}\cB:pe:PE");

$ret = $Collator->gmatch("P\cBe\x{300}\cBrl, perl, and PERL", "pe");
ok($ret == 3);

$str = "ABCDEF";
$sub = "cde";
$ret = $Collator->match($str, $sub);
$str = "01234567";
ok($ret && $$ret, "CDE");

$str = "ABCDEF";
$sub = "cde";
($ret) = $Collator->match($str, $sub);
$str = "01234567";
ok($ret, "CDE");


$Collator->change(level => 3);

$ret = $Collator->match("P\cBe\x{300}\cBrl and PERL", "pe");
ok($ret, undef);

@ret = $Collator->match("P\cBe\x{300}\cBrl and PERL", "pe");
ok(@ret == 0);

$ret = join ':', $Collator->gmatch("P\cBe\x{300}\cBrl and PERL", "pe");
ok($ret eq "");

$ret = $Collator->gmatch("P\cBe\x{300}\cBrl and PERL", "pe");
ok($ret == 0);

$ret = join ':', $Collator->gmatch("P\cBe\x{300}\cBrl, perl, and PERL", "pe");
ok($ret eq "pe");

$ret = $Collator->gmatch("P\cBe\x{300}\cBrl, perl, and PERL", "pe");
ok($ret == 1);

$str = $IsEBCDIC ? "mu\x{0059}" : "mu\x{00DF}";
$sub = $IsEBCDIC ? "m\x{00DC}ss" : "m\x{00FC}ss";

($ret) = $Collator->match($str, $sub);
ok($ret, undef);

$Collator->change(%old_level);

##############

$Collator->change(level => 1);

sub strreverse { scalar reverse shift }

$str = "P\cBe\x{300}\cBrl and PERL.";
$ret = $Collator->subst($str, "perl", 'Camel');
ok($ret, 1);
ok($str, "Camel and PERL.");

$str = "P\cBe\x{300}\cBrl and PERL.";
$ret = $Collator->subst($str, "perl", \&strreverse);
ok($ret, 1);
ok($str, "lr\cB\x{300}e\cBP and PERL.");

$str = "P\cBe\x{300}\cBrl and PERL.";
$ret = $Collator->gsubst($str, "perl", 'Camel');
ok($ret, 2);
ok($str, "Camel and Camel.");

$str = "P\cBe\x{300}\cBrl and PERL.";
$ret = $Collator->gsubst($str, "perl", \&strreverse);
ok($ret, 2);
ok($str, "lr\cB\x{300}e\cBP and LREP.");

$str = "Camel donkey zebra came\x{301}l CAMEL horse cAm\0E\0L...";
$Collator->gsubst($str, "camel", sub { "<b>$_[0]</b>" });
ok($str, "<b>Camel</b> donkey zebra <b>came\x{301}l</b> "
	. "<b>CAMEL</b> horse <b>cAm\0E\0L</b>...");

$Collator->change(level => 3);

$str = "P\cBe\x{300}\cBrl and PERL.";
$ret = $Collator->subst($str, "perl", "Camel");
ok(! $ret);
ok($str, "P\cBe\x{300}\cBrl and PERL.");

$str = "P\cBe\x{300}\cBrl and PERL.";
$ret = $Collator->subst($str, "perl", \&strreverse);
ok(! $ret);
ok($str, "P\cBe\x{300}\cBrl and PERL.");

$str = "P\cBe\x{300}\cBrl and PERL.";
$ret = $Collator->gsubst($str, "perl", "Camel");
ok($ret, 0);
ok($str, "P\cBe\x{300}\cBrl and PERL.");

$str = "P\cBe\x{300}\cBrl and PERL.";
$ret = $Collator->gsubst($str, "perl", \&strreverse);
ok($ret, 0);
ok($str, "P\cBe\x{300}\cBrl and PERL.");

$Collator->change(%old_level);

##############

$str = "Perl and Camel";
$ret = $Collator->gsubst($str, "\cA\cA\0", "AB");
ok($ret, 15);
ok($str, "ABPABeABrABlAB ABaABnABdAB ABCABaABmABeABlAB");

$str = '';
$ret = $Collator->subst($str, "", "ABC");
ok($ret, 1);
ok($str, "ABC");

$str = '';
$ret = $Collator->gsubst($str, "", "ABC");
ok($ret, 1);
ok($str, "ABC");

$str = 'PPPPP';
$ret = $Collator->gsubst($str, 'PP', "ABC");
ok($ret, 2);
ok($str, "ABCABCP");

##############

# Shifted; ignorable after variable

($ret) = $Collator->match("A?\x{300}!\x{301}\x{344}B\x{315}", "?!");
ok($ret, "?\x{300}!\x{301}\x{344}");

$Collator->change(alternate => 'Non-ignorable');

($ret) = $Collator->match("A?\x{300}!\x{301}B\x{315}", "?!");
ok($ret, undef);


--- NEW FILE: cjkrange.t ---

BEGIN {
    unless ("A" eq pack('U', 0x41)) {
	print "1..0 # Unicode::Collate " .
	    "cannot stringify a Unicode code point\n";
	exit 0;
    }
    if ($ENV{PERL_CORE}) {
	chdir('t') if -d 't';
	@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
    }
}

use Test;
BEGIN { plan tests => 51 };

use strict;
use warnings;
use Unicode::Collate;

ok(1);

my $Collator = Unicode::Collate->new(
  table => 'keys.txt',
  normalization => undef,
);

# U+9FA6..U+9FBB are CJK UI since Unicode 4.1.0.
# U+3400 is CJK UI ExtA, then greater than any CJK UI.

##### 2..11
ok($Collator->lt("\x{9FA5}", "\x{3400}")); # UI < ExtA
ok($Collator->lt("\x{9FA6}", "\x{3400}")); # new UI < ExtA
ok($Collator->lt("\x{9FBB}", "\x{3400}")); # new UI < ExtA
ok($Collator->gt("\x{9FBC}", "\x{3400}")); # Unassigned > ExtA
ok($Collator->gt("\x{9FFF}", "\x{3400}")); # Unassigned > ExtA
ok($Collator->lt("\x{9FA6}", "\x{9FBB}")); # new UI > new UI
ok($Collator->lt("\x{3400}","\x{20000}")); # ExtA < ExtB
ok($Collator->lt("\x{3400}","\x{2A6D6}")); # ExtA < ExtB
ok($Collator->gt("\x{9FFF}","\x{20000}")); # Unassigned > ExtB
ok($Collator->gt("\x{9FFF}","\x{2A6D6}")); # Unassigned > ExtB

##### 12..21
$Collator->change(UCA_Version => 11);
ok($Collator->lt("\x{9FA5}", "\x{3400}")); # UI < ExtA
ok($Collator->gt("\x{9FA6}", "\x{3400}")); # Unassigned > ExtA
ok($Collator->gt("\x{9FBB}", "\x{3400}")); # Unassigned > ExtA
ok($Collator->gt("\x{9FBC}", "\x{3400}")); # Unassigned > ExtA
ok($Collator->gt("\x{9FFF}", "\x{3400}")); # Unassigned > ExtA
ok($Collator->lt("\x{9FA6}", "\x{9FBB}")); # Unassigned > Unassigned
ok($Collator->lt("\x{3400}","\x{20000}")); # ExtA < ExtB
ok($Collator->lt("\x{3400}","\x{2A6D6}")); # ExtA < ExtB
ok($Collator->gt("\x{9FFF}","\x{20000}")); # Unassigned > ExtB
ok($Collator->gt("\x{9FFF}","\x{2A6D6}")); # Unassigned > ExtB

##### 22..31
$Collator->change(UCA_Version => 9);
ok($Collator->lt("\x{9FA5}", "\x{3400}")); # UI < ExtA
ok($Collator->gt("\x{9FA6}", "\x{3400}")); # Unassigned > ExtA
ok($Collator->gt("\x{9FBB}", "\x{3400}")); # Unassigned > ExtA
ok($Collator->gt("\x{9FBC}", "\x{3400}")); # Unassigned > ExtA
ok($Collator->gt("\x{9FFF}", "\x{3400}")); # Unassigned > ExtA
ok($Collator->lt("\x{9FA6}", "\x{9FBB}")); # Unassigned > Unassigned
ok($Collator->lt("\x{3400}","\x{20000}")); # ExtA < ExtB
ok($Collator->lt("\x{3400}","\x{2A6D6}")); # ExtA < ExtB
ok($Collator->gt("\x{9FFF}","\x{20000}")); # Unassigned > ExtB
ok($Collator->gt("\x{9FFF}","\x{2A6D6}")); # Unassigned > ExtB

##### 32..41
$Collator->change(UCA_Version => 8);
ok($Collator->gt("\x{9FA5}", "\x{3400}")); # UI > ExtA
ok($Collator->gt("\x{9FA6}", "\x{3400}")); # Unassigned > ExtA
ok($Collator->gt("\x{9FBB}", "\x{3400}")); # Unassigned > ExtA
ok($Collator->gt("\x{9FBC}", "\x{3400}")); # Unassigned > ExtA
ok($Collator->gt("\x{9FFF}", "\x{3400}")); # Unassigned > ExtA
ok($Collator->lt("\x{9FA6}", "\x{9FBB}")); # new UI > new UI
ok($Collator->lt("\x{3400}","\x{20000}")); # ExtA < Unassigned(ExtB)
ok($Collator->lt("\x{3400}","\x{2A6D6}")); # ExtA < Unassigned(ExtB)
ok($Collator->lt("\x{9FFF}","\x{20000}")); # Unassigned < Unassigned(ExtB)
ok($Collator->lt("\x{9FFF}","\x{2A6D6}")); # Unassigned < Unassigned(ExtB)

##### 42..51
$Collator->change(UCA_Version => 14);
ok($Collator->lt("\x{9FA5}", "\x{3400}")); # UI < ExtA
ok($Collator->lt("\x{9FA6}", "\x{3400}")); # new UI < ExtA
ok($Collator->lt("\x{9FBB}", "\x{3400}")); # new UI < ExtA
ok($Collator->gt("\x{9FBC}", "\x{3400}")); # Unassigned > ExtA
ok($Collator->gt("\x{9FFF}", "\x{3400}")); # Unassigned > ExtA
ok($Collator->lt("\x{9FA6}", "\x{9FBB}")); # new UI > new UI
ok($Collator->lt("\x{3400}","\x{20000}")); # ExtA < ExtB
ok($Collator->lt("\x{3400}","\x{2A6D6}")); # ExtA < ExtB
ok($Collator->gt("\x{9FFF}","\x{20000}")); # Unassigned > ExtB
ok($Collator->gt("\x{9FFF}","\x{2A6D6}")); # Unassigned > ExtB


--- NEW FILE: override.t ---
BEGIN {
    unless ("A" eq pack('U', 0x41)) {
	print "1..0 # Unicode::Collate " .
	    "cannot stringify a Unicode code point\n";
	exit 0;
    }
    if ($ENV{PERL_CORE}) {
	chdir('t') if -d 't';
	@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
    }
}

use Test;
BEGIN { plan tests => 76 };

use strict;
use warnings;
use Unicode::Collate;

ok(1);

##### 2..6

my $all_undef_8 = Unicode::Collate->new(
  table => undef,
  normalization => undef,
  overrideCJK => undef,
  overrideHangul => undef,
  UCA_Version => 8,
);

# All in the Unicode code point order.
# No hangul decomposition.

ok($all_undef_8->lt("\x{3402}", "\x{4E00}"));
ok($all_undef_8->lt("\x{4DFF}", "\x{4E00}"));
ok($all_undef_8->lt("\x{4E00}", "\x{AC00}"));
ok($all_undef_8->gt("\x{AC00}", "\x{1100}\x{1161}"));
ok($all_undef_8->gt("\x{AC00}", "\x{ABFF}"));


##### 7..11

my $all_undef_9 = Unicode::Collate->new(
  table => undef,
  normalization => undef,
  overrideCJK => undef,
  overrideHangul => undef,
  UCA_Version => 9,
);

# CJK Ideo. < CJK ext A/B < Others.
# No hangul decomposition.

ok($all_undef_9->lt("\x{4E00}", "\x{3402}"));
ok($all_undef_9->lt("\x{3402}", "\x{20000}"));
ok($all_undef_9->lt("\x{20000}", "\x{AC00}"));
ok($all_undef_9->gt("\x{AC00}", "\x{1100}\x{1161}"));
ok($all_undef_9->gt("\x{AC00}", "\x{ABFF}")); # U+ABFF: not assigned

##### 12..16

my $ignoreHangul = Unicode::Collate->new(
  table => undef,
  normalization => undef,
  overrideHangul => sub {()},
  entry => <<'ENTRIES',
AE00 ; [.0100.0020.0002.AE00]  # Hangul GEUL
ENTRIES
);

# All Hangul Syllables except U+AE00 are ignored.

ok($ignoreHangul->eq("\x{AC00}", ""));
ok($ignoreHangul->lt("\x{AC00}", "\0"));
ok($ignoreHangul->lt("\x{AC00}", "\x{AE00}"));
ok($ignoreHangul->lt("\x{AC00}", "\x{1100}\x{1161}")); # Jamo are not ignored.
ok($ignoreHangul->lt("Pe\x{AE00}rl", "Perl")); # 'r' is unassigned.


my $ignoreCJK = Unicode::Collate->new(
  table => undef,
  normalization => undef,
  overrideCJK => sub {()},
  entry => <<'ENTRIES',
5B57 ; [.0107.0020.0002.5B57]  # CJK Ideograph "Letter"
ENTRIES
);

# All CJK Unified Ideographs except U+5B57 are ignored.

##### 17..21
ok($ignoreCJK->eq("\x{4E00}", ""));
ok($ignoreCJK->lt("\x{4E00}", "\0"));
ok($ignoreCJK->eq("Pe\x{4E00}rl", "Perl")); # U+4E00 is a CJK.
ok($ignoreCJK->gt("\x{4DFF}", "\x{4E00}")); # U+4DFF is not CJK.
ok($ignoreCJK->lt("Pe\x{5B57}rl", "Perl")); # 'r' is unassigned.

##### 22..29
ok($ignoreCJK->eq("\x{3400}", ""));
ok($ignoreCJK->eq("\x{4DB5}", ""));
ok($ignoreCJK->eq("\x{9FA5}", ""));
ok($ignoreCJK->eq("\x{9FA6}", "")); # UI since Unicode 4.1.0
ok($ignoreCJK->eq("\x{9FBB}", "")); # UI since Unicode 4.1.0
ok($ignoreCJK->gt("\x{9FBC}", "Perl"));
ok($ignoreCJK->eq("\x{20000}", ""));
ok($ignoreCJK->eq("\x{2A6D6}", ""));

##### 30..37
$ignoreCJK->change(UCA_Version => 9);
ok($ignoreCJK->eq("\x{3400}", ""));
ok($ignoreCJK->eq("\x{4DB5}", ""));
ok($ignoreCJK->eq("\x{9FA5}", ""));
ok($ignoreCJK->gt("\x{9FA6}", "Perl"));
ok($ignoreCJK->gt("\x{9FBB}", "Perl"));
ok($ignoreCJK->gt("\x{9FBC}", "Perl"));
ok($ignoreCJK->eq("\x{20000}", ""));
ok($ignoreCJK->eq("\x{2A6D6}", ""));

##### 38..45
$ignoreCJK->change(UCA_Version => 8);
ok($ignoreCJK->eq("\x{3400}", ""));
ok($ignoreCJK->eq("\x{4DB5}", ""));
ok($ignoreCJK->eq("\x{9FA5}", ""));
ok($ignoreCJK->gt("\x{9FA6}", "Perl"));
ok($ignoreCJK->gt("\x{9FBB}", "Perl"));
ok($ignoreCJK->gt("\x{9FBC}", "Perl"));
ok($ignoreCJK->eq("\x{20000}", ""));
ok($ignoreCJK->eq("\x{2A6D6}", ""));

##### 46..53
$ignoreCJK->change(UCA_Version => 14);
ok($ignoreCJK->eq("\x{3400}", ""));
ok($ignoreCJK->eq("\x{4DB5}", ""));
ok($ignoreCJK->eq("\x{9FA5}", ""));
ok($ignoreCJK->eq("\x{9FA6}", "")); # UI since Unicode 4.1.0
ok($ignoreCJK->eq("\x{9FBB}", "")); # UI since Unicode 4.1.0
ok($ignoreCJK->gt("\x{9FBC}", "Perl"));
ok($ignoreCJK->eq("\x{20000}", ""));
ok($ignoreCJK->eq("\x{2A6D6}", ""));

##### 54..76
my $overCJK = Unicode::Collate->new(
  table => undef,
  normalization => undef,
  entry => <<'ENTRIES',
0061 ; [.0101.0020.0002.0061] # latin a
0041 ; [.0101.0020.0008.0041] # LATIN A
4E00 ; [.B1FC.0030.0004.4E00] # Ideograph; B1FC = FFFF - 4E03.
ENTRIES
  overrideCJK => sub {
    my $u = 0xFFFF - $_[0]; # reversed
    [$u, 0x20, 0x2, $u];
  },
);

ok($overCJK->lt("a", "A")); # diff. at level 3.
ok($overCJK->lt( "\x{4E03}",  "\x{4E00}")); # diff. at level 2.
ok($overCJK->lt("A\x{4E03}", "A\x{4E00}"));
ok($overCJK->lt("A\x{4E03}", "a\x{4E00}"));
ok($overCJK->lt("a\x{4E03}", "A\x{4E00}"));

ok($overCJK->gt("a\x{3400}", "A\x{4DB5}"));
ok($overCJK->gt("a\x{4DB5}", "A\x{9FA5}"));
ok($overCJK->gt("a\x{9FA5}", "A\x{9FA6}"));
ok($overCJK->gt("a\x{9FA6}", "A\x{9FBB}"));
ok($overCJK->lt("a\x{9FBB}", "A\x{9FBC}"));
ok($overCJK->lt("a\x{9FBC}", "A\x{9FBF}"));

$overCJK->change(UCA_Version => 9);

ok($overCJK->gt("a\x{3400}", "A\x{4DB5}"));
ok($overCJK->gt("a\x{4DB5}", "A\x{9FA5}"));
ok($overCJK->lt("a\x{9FA5}", "A\x{9FA6}"));
ok($overCJK->lt("a\x{9FA6}", "A\x{9FBB}"));
ok($overCJK->lt("a\x{9FBB}", "A\x{9FBC}"));
ok($overCJK->lt("a\x{9FBC}", "A\x{9FBF}"));

$overCJK->change(UCA_Version => 14);

ok($overCJK->gt("a\x{3400}", "A\x{4DB5}"));
ok($overCJK->gt("a\x{4DB5}", "A\x{9FA5}"));
ok($overCJK->gt("a\x{9FA5}", "A\x{9FA6}"));
ok($overCJK->gt("a\x{9FA6}", "A\x{9FBB}"));
ok($overCJK->lt("a\x{9FBB}", "A\x{9FBC}"));
ok($overCJK->lt("a\x{9FBC}", "A\x{9FBF}"));


--- NEW FILE: ignor.t ---
BEGIN {
    unless ("A" eq pack('U', 0x41)) {
	print "1..0 # Unicode::Collate " .
	    "cannot stringify a Unicode code point\n";
	exit 0;
    }
    if ($ENV{PERL_CORE}) {
	chdir('t') if -d 't';
	@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
    }
}

use Test;
BEGIN { plan tests => 41 };

use strict;
use warnings;
use Unicode::Collate;

ok(1);

my $trad = Unicode::Collate->new(
  table => 'keys.txt',
  normalization => undef,
  ignoreName => qr/HANGUL|HIRAGANA|KATAKANA|BOPOMOFO/,
  level => 3,
  entry => << 'ENTRIES',
 0063 0068 ; [.0A3F.0020.0002.0063] % "ch" in traditional Spanish
 0043 0068 ; [.0A3F.0020.0007.0043] # "Ch" in traditional Spanish
 0043 0048 ; [.0A3F.0020.0008.0043] # "CH" in traditional Spanish
ENTRIES
);
# 0063  ; [.0A3D.0020.0002.0063] # LATIN SMALL LETTER C
# 0064  ; [.0A49.0020.0002.0064] # LATIN SMALL LETTER D

##### 2..3

ok(
  join(':', $trad->sort( qw/ acha aca ada acia acka / ) ),
  join(':',              qw/ aca acia acka acha ada / ),
);

ok(
  join(':', $trad->sort( qw/ ACHA ACA ADA ACIA ACKA / ) ),
  join(':',              qw/ ACA ACIA ACKA ACHA ADA / ),
);

##### 4..7

ok($trad->gt("ocho", "oc\cAho")); # UCA v14
ok($trad->gt("ocho", "oc\0\cA\0\cBho"));  # UCA v14
ok($trad->eq("-", ""));
ok($trad->gt("ocho", "oc-ho"));

##### 8..11

$trad->change(UCA_Version => 9);

ok($trad->eq("ocho", "oc\cAho")); # UCA v9
ok($trad->eq("ocho", "oc\0\cA\0\cBho")); # UCA v9
ok($trad->eq("-", ""));
ok($trad->gt("ocho", "oc-ho"));

##### 12..15

$trad->change(UCA_Version => 8);

ok($trad->gt("ocho", "oc\cAho"));
ok($trad->gt("ocho", "oc\0\cA\0\cBho"));
ok($trad->eq("-", ""));
ok($trad->gt("ocho", "oc-ho"));


##### 16..19

$trad->change(UCA_Version => 9);

my $hiragana = "\x{3042}\x{3044}";
my $katakana = "\x{30A2}\x{30A4}";

# HIRAGANA and KATAKANA are ignorable via ignoreName
ok($trad->eq($hiragana, ""));
ok($trad->eq("", $katakana));
ok($trad->eq($hiragana, $katakana));
ok($trad->eq($katakana, $hiragana));


##### 20..31

# According to Conformance Test (UCA_Version == 9 or 11),
# a L3-ignorable is treated as a completely ignorable.

my $L3ignorable = Unicode::Collate->new(
  alternate => 'Non-ignorable',
  level => 3,
  table => undef,
  normalization => undef,
  UCA_Version => 9,
  entry => <<'ENTRIES',
0000  ; [.0000.0000.0000.0000] # [0000] NULL (in 6429)
0001  ; [.0000.0000.0000.0000] # [0001] START OF HEADING (in 6429)
0591  ; [.0000.0000.0000.0591] # HEBREW ACCENT ETNAHTA
1D165 ; [.0000.0000.0000.1D165] # MUSICAL SYMBOL COMBINING STEM
0021  ; [*024B.0020.0002.0021] # EXCLAMATION MARK
09BE  ; [.114E.0020.0002.09BE] # BENGALI VOWEL SIGN AA
09C7  ; [.1157.0020.0002.09C7] # BENGALI VOWEL SIGN E
09CB  ; [.1159.0020.0002.09CB] # BENGALI VOWEL SIGN O
09C7 09BE ; [.1159.0020.0002.09CB] # BENGALI VOWEL SIGN O
1D1B9 ; [*098A.0020.0002.1D1B9] # MUSICAL SYMBOL SEMIBREVIS WHITE
1D1BA ; [*098B.0020.0002.1D1BA] # MUSICAL SYMBOL SEMIBREVIS BLACK
1D1BB ; [*098A.0020.0002.1D1B9][.0000.0000.0000.1D165] # M.S. MINIMA
1D1BC ; [*098B.0020.0002.1D1BA][.0000.0000.0000.1D165] # M.S. MINIMA BLACK
ENTRIES
);

ok($L3ignorable->lt("\cA", "!"));
ok($L3ignorable->lt("\x{591}", "!"));
ok($L3ignorable->eq("\cA", "\x{591}"));
ok($L3ignorable->eq("\x{09C7}\x{09BE}A", "\x{09C7}\cA\x{09BE}A"));
ok($L3ignorable->eq("\x{09C7}\x{09BE}A", "\x{09C7}\x{0591}\x{09BE}A"));
ok($L3ignorable->eq("\x{09C7}\x{09BE}A", "\x{09C7}\x{1D165}\x{09BE}A"));
ok($L3ignorable->eq("\x{09C7}\x{09BE}A", "\x{09CB}A"));
ok($L3ignorable->lt("\x{1D1BB}", "\x{1D1BC}"));
ok($L3ignorable->eq("\x{1D1BB}", "\x{1D1B9}"));
ok($L3ignorable->eq("\x{1D1BC}", "\x{1D1BA}"));
ok($L3ignorable->eq("\x{1D1BB}", "\x{1D1B9}\x{1D165}"));
ok($L3ignorable->eq("\x{1D1BC}", "\x{1D1BA}\x{1D165}"));

##### 32..41

my $c = Unicode::Collate->new(
  table => 'keys.txt',
  normalization => undef,
  level => 1,
  UCA_Version => 14,
  entry => << 'ENTRIES',
034F  ; [.0000.0000.0000.034F] # COMBINING GRAPHEME JOINER
0063 0068 ; [.0A3F.0020.0002.0063] % "ch" in traditional Spanish
0043 0068 ; [.0A3F.0020.0007.0043] # "Ch" in traditional Spanish
0043 0048 ; [.0A3F.0020.0008.0043] # "CH" in traditional Spanish
ENTRIES
);
# 0063  ; [.0A3D.0020.0002.0063] # LATIN SMALL LETTER C
# 0064  ; [.0A49.0020.0002.0064] # LATIN SMALL LETTER D

ok($c->gt("ocho", "oc\x00\x00ho"));
ok($c->gt("ocho", "oc\cAho"));
ok($c->gt("ocho", "oc\x{034F}ho"));
ok($c->gt("ocio", "oc\x{034F}ho"));
ok($c->lt("ocgo", "oc\x{034F}ho"));
ok($c->lt("oceo", "oc\x{034F}ho"));

ok($c->viewSortKey("ocho"),         "[0B4B 0A3F 0B4B | | |]");
ok($c->viewSortKey("oc\x00\x00ho"), "[0B4B 0A3D 0AB9 0B4B | | |]");
ok($c->viewSortKey("oc\cAho"),      "[0B4B 0A3D 0AB9 0B4B | | |]");
ok($c->viewSortKey("oc\x{034F}ho"), "[0B4B 0A3D 0AB9 0B4B | | |]");



--- NEW FILE: normal.t ---
BEGIN {
    unless ("A" eq pack('U', 0x41)) {
	print "1..0 # Unicode::Collate " .
	    "cannot stringify a Unicode code point\n";
	exit 0;
    }
    if ($ENV{PERL_CORE}) {
	chdir('t') if -d 't';
	@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
    }
}

BEGIN {
    eval { require Unicode::Normalize; };
    if ($@) {
	print "1..0 # skipped: Unicode::Normalize needed for this test\n";
	print $@;
	exit;
    }
}
use Test;
BEGIN { plan tests => 100 };

use strict;
use warnings;
use Unicode::Collate;

our $Aring = pack('U', 0xC5);
our $aring = pack('U', 0xE5);

our $entry = <<'ENTRIES';
030A; [.0000.030A.0002] # COMBINING RING ABOVE
212B; [.002B.0020.0008] # ANGSTROM SIGN
0061; [.0A41.0020.0002] # LATIN SMALL LETTER A
0041; [.0A41.0020.0008] # LATIN CAPITAL LETTER A
007A; [.0A5A.0020.0002] # LATIN SMALL LETTER Z
005A; [.0A5A.0020.0008] # LATIN CAPITAL LETTER Z
FF41; [.0A87.0020.0002] # LATIN SMALL LETTER A
FF21; [.0A87.0020.0008] # LATIN CAPITAL LETTER A
00E5; [.0AC5.0020.0002] # LATIN SMALL LETTER A WITH RING ABOVE
00C5; [.0AC5.0020.0008] # LATIN CAPITAL LETTER A WITH RING ABOVE
ENTRIES

# Aong < A+ring < Z < fullA+ring < A-ring

#########################

our $noN = Unicode::Collate->new(
    level => 1,
    table => undef,
    normalization => undef,
    entry => $entry,
);

our $nfc = Unicode::Collate->new(
  level => 1,
  table => undef,
  normalization => 'NFC',
  entry => $entry,
);

our $nfd = Unicode::Collate->new(
  level => 1,
  table => undef,
  normalization => 'NFD',
  entry => $entry,
);

our $nfkc = Unicode::Collate->new(
  level => 1,
  table => undef,
  normalization => 'NFKC',
  entry => $entry,
);

our $nfkd = Unicode::Collate->new(
  level => 1,
  table => undef,
  normalization => 'NFKD',
  entry => $entry,
);

ok($noN->lt("\x{212B}", "A"));
ok($noN->lt("\x{212B}", $Aring));
ok($noN->lt("A\x{30A}", $Aring));
ok($noN->lt("A",       "\x{FF21}"));
ok($noN->lt("Z",       "\x{FF21}"));
ok($noN->lt("Z",        $Aring));
ok($noN->lt("\x{212B}", $aring));
ok($noN->lt("A\x{30A}", $aring));
ok($noN->lt("Z",        $aring));
ok($noN->lt("a\x{30A}", "Z"));

ok($nfd->eq("\x{212B}", "A"));
ok($nfd->eq("\x{212B}", $Aring));
ok($nfd->eq("A\x{30A}", $Aring));
ok($nfd->lt("A",       "\x{FF21}"));
ok($nfd->lt("Z",       "\x{FF21}"));
ok($nfd->gt("Z",        $Aring));
ok($nfd->eq("\x{212B}", $aring));
ok($nfd->eq("A\x{30A}", $aring));
ok($nfd->gt("Z",        $aring));
ok($nfd->lt("a\x{30A}", "Z"));

ok($nfc->gt("\x{212B}", "A"));
ok($nfc->eq("\x{212B}", $Aring));
ok($nfc->eq("A\x{30A}", $Aring));
ok($nfc->lt("A",       "\x{FF21}"));
ok($nfc->lt("Z",       "\x{FF21}"));
ok($nfc->lt("Z",        $Aring));
ok($nfc->eq("\x{212B}", $aring));
ok($nfc->eq("A\x{30A}", $aring));
ok($nfc->lt("Z",        $aring));
ok($nfc->gt("a\x{30A}", "Z"));

ok($nfkd->eq("\x{212B}", "A"));
ok($nfkd->eq("\x{212B}", $Aring));
ok($nfkd->eq("A\x{30A}", $Aring));
ok($nfkd->eq("A",       "\x{FF21}"));
ok($nfkd->gt("Z",       "\x{FF21}"));
ok($nfkd->gt("Z",        $Aring));
ok($nfkd->eq("\x{212B}", $aring));
ok($nfkd->eq("A\x{30A}", $aring));
ok($nfkd->gt("Z",        $aring));
ok($nfkd->lt("a\x{30A}", "Z"));

ok($nfkc->gt("\x{212B}", "A"));
ok($nfkc->eq("\x{212B}", $Aring));
ok($nfkc->eq("A\x{30A}", $Aring));
ok($nfkc->eq("A",       "\x{FF21}"));
ok($nfkc->gt("Z",       "\x{FF21}"));
ok($nfkc->lt("Z",        $Aring));
ok($nfkc->eq("\x{212B}", $aring));
ok($nfkc->eq("A\x{30A}", $aring));
ok($nfkc->lt("Z",        $aring));
ok($nfkc->gt("a\x{30A}", "Z"));

$nfd->change(normalization => undef);

ok($nfd->lt("\x{212B}", "A"));
ok($nfd->lt("\x{212B}", $Aring));
ok($nfd->lt("A\x{30A}", $Aring));
ok($nfd->lt("A",       "\x{FF21}"));
ok($nfd->lt("Z",       "\x{FF21}"));
ok($nfd->lt("Z",        $Aring));
ok($nfd->lt("\x{212B}", $aring));
ok($nfd->lt("A\x{30A}", $aring));
ok($nfd->lt("Z",        $aring));
ok($nfd->lt("a\x{30A}", "Z"));

$nfd->change(normalization => 'C');

ok($nfd->gt("\x{212B}", "A"));
ok($nfd->eq("\x{212B}", $Aring));
ok($nfd->eq("A\x{30A}", $Aring));
ok($nfd->lt("A",       "\x{FF21}"));
ok($nfd->lt("Z",       "\x{FF21}"));
ok($nfd->lt("Z",        $Aring));
ok($nfd->eq("\x{212B}", $aring));
ok($nfd->eq("A\x{30A}", $aring));
ok($nfd->lt("Z",        $aring));
ok($nfd->gt("a\x{30A}", "Z"));

$nfd->change(normalization => 'D');

ok($nfd->eq("\x{212B}", "A"));
ok($nfd->eq("\x{212B}", $Aring));
ok($nfd->eq("A\x{30A}", $Aring));
ok($nfd->lt("A",       "\x{FF21}"));
ok($nfd->lt("Z",       "\x{FF21}"));
ok($nfd->gt("Z",        $Aring));
ok($nfd->eq("\x{212B}", $aring));
ok($nfd->eq("A\x{30A}", $aring));
ok($nfd->gt("Z",        $aring));
ok($nfd->lt("a\x{30A}", "Z"));

$nfd->change(normalization => 'KD');

ok($nfd->eq("\x{212B}", "A"));
ok($nfd->eq("\x{212B}", $Aring));
ok($nfd->eq("A\x{30A}", $Aring));
ok($nfd->eq("A",       "\x{FF21}"));
ok($nfd->gt("Z",       "\x{FF21}"));
ok($nfd->gt("Z",        $Aring));
ok($nfd->eq("\x{212B}", $aring));
ok($nfd->eq("A\x{30A}", $aring));
ok($nfd->gt("Z",        $aring));
ok($nfd->lt("a\x{30A}", "Z"));

$nfd->change(normalization => 'KC');

ok($nfd->gt("\x{212B}", "A"));
ok($nfd->eq("\x{212B}", $Aring));
ok($nfd->eq("A\x{30A}", $Aring));
ok($nfd->eq("A",       "\x{FF21}"));
ok($nfd->gt("Z",       "\x{FF21}"));
ok($nfd->lt("Z",        $Aring));
ok($nfd->eq("\x{212B}", $aring));
ok($nfd->eq("A\x{30A}", $aring));
ok($nfd->lt("Z",        $aring));
ok($nfd->gt("a\x{30A}", "Z"));


--- NEW FILE: test.t ---

BEGIN {
    unless ("A" eq pack('U', 0x41)) {
	print "1..0 # Unicode::Collate " .
	    "cannot stringify a Unicode code point\n";
	exit 0;
    }
    if ($ENV{PERL_CORE}) {
	chdir('t') if -d 't';
	@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
    }
}

use Test;
BEGIN { plan tests => 113 };

use strict;
use warnings;
use Unicode::Collate;

ok(1);

sub _pack_U   { Unicode::Collate::pack_U(@_) }
sub _unpack_U { Unicode::Collate::unpack_U(@_) }

my $A_acute = _pack_U(0xC1);
my $a_acute = _pack_U(0xE1);
my $acute   = _pack_U(0x0301);

my $hiragana = "\x{3042}\x{3044}";
my $katakana = "\x{30A2}\x{30A4}";

##### 2..7

my $Collator = Unicode::Collate->new(
  table => 'keys.txt',
  normalization => undef,
);

ok(ref $Collator, "Unicode::Collate");

ok($Collator->cmp("", ""), 0);
ok($Collator->eq("", ""));
ok($Collator->cmp("", "perl"), -1);

ok(
  join(':', $Collator->sort( qw/ acha aca ada acia acka / ) ),
  join(':',                  qw/ aca acha acia acka ada / ),
);

ok(
  join(':', $Collator->sort( qw/ ACHA ACA ADA ACIA ACKA / ) ),
  join(':',                  qw/ ACA ACHA ACIA ACKA ADA / ),
);

##### 8..18

ok($Collator->cmp("A$acute", $A_acute), 0); # @version 3.1.1 (prev: -1)
ok($Collator->cmp($a_acute, $A_acute), -1);
ok($Collator->eq("A\cA$acute", $A_acute)); # UCA v9. \cA is invariant.

my %old_level = $Collator->change(level => 1);
ok($Collator->eq("A$acute", $A_acute));
ok($Collator->eq("A", $A_acute));

ok($Collator->change(level => 2)->eq($a_acute, $A_acute));
ok($Collator->lt("A", $A_acute));

ok($Collator->change(%old_level)->lt("A", $A_acute));
ok($Collator->lt("A", $A_acute));
ok($Collator->lt("A", $a_acute));
ok($Collator->lt($a_acute, $A_acute));

##### 19..25

$Collator->change(level => 2);

ok($Collator->{level}, 2);

ok( $Collator->cmp("ABC","abc"), 0);
ok( $Collator->eq("ABC","abc") );
ok( $Collator->le("ABC","abc") );
ok( $Collator->cmp($hiragana, $katakana), 0);
ok( $Collator->eq($hiragana, $katakana) );
ok( $Collator->ge($hiragana, $katakana) );

##### 26..31

# hangul
ok( $Collator->eq("a\x{AC00}b", "a\x{1100}\x{1161}b") );
ok( $Collator->eq("a\x{AE00}b", "a\x{1100}\x{1173}\x{11AF}b") );
ok( $Collator->gt("a\x{AE00}b", "a\x{1100}\x{1173}b\x{11AF}") );
ok( $Collator->lt("a\x{AC00}b", "a\x{AE00}b") );
ok( $Collator->gt("a\x{D7A3}b", "a\x{C544}b") );
ok( $Collator->lt("a\x{C544}b", "a\x{30A2}b") ); # hangul < hiragana

##### 32..40

$Collator->change(%old_level, katakana_before_hiragana => 1);

ok($Collator->{level}, 4);

ok( $Collator->cmp("abc", "ABC"), -1);
ok( $Collator->ne("abc", "ABC") );
ok( $Collator->lt("abc", "ABC") );
ok( $Collator->le("abc", "ABC") );
ok( $Collator->cmp($hiragana, $katakana), 1);
ok( $Collator->ne($hiragana, $katakana) );
ok( $Collator->gt($hiragana, $katakana) );
ok( $Collator->ge($hiragana, $katakana) );

##### 41..46

$Collator->change(upper_before_lower => 1);

ok( $Collator->cmp("abc", "ABC"), 1);
ok( $Collator->ge("abc", "ABC"), 1);
ok( $Collator->gt("abc", "ABC"), 1);
ok( $Collator->cmp($hiragana, $katakana), 1);
ok( $Collator->ge($hiragana, $katakana), 1);
ok( $Collator->gt($hiragana, $katakana), 1);

##### 47..48

$Collator->change(katakana_before_hiragana => 0);

ok( $Collator->cmp("abc", "ABC"), 1);
ok( $Collator->cmp($hiragana, $katakana), -1);

##### 49..52

$Collator->change(upper_before_lower => 0);

ok( $Collator->cmp("abc", "ABC"), -1);
ok( $Collator->le("abc", "ABC") );
ok( $Collator->cmp($hiragana, $katakana), -1);
ok( $Collator->lt($hiragana, $katakana) );

##### 53..54

my $ignoreAE = Unicode::Collate->new(
  table => 'keys.txt',
  normalization => undef,
  ignoreChar => qr/^[aAeE]$/,
);

ok($ignoreAE->eq("element","lament"));
ok($ignoreAE->eq("Perl","ePrl"));

##### 55

my $onlyABC = Unicode::Collate->new(
    table => undef,
    normalization => undef,
    entry => << 'ENTRIES',
0061 ; [.0101.0020.0002.0061] # LATIN SMALL LETTER A
0041 ; [.0101.0020.0008.0041] # LATIN CAPITAL LETTER A
0062 ; [.0102.0020.0002.0062] # LATIN SMALL LETTER B
0042 ; [.0102.0020.0008.0042] # LATIN CAPITAL LETTER B
0063 ; [.0103.0020.0002.0063] # LATIN SMALL LETTER C
0043 ; [.0103.0020.0008.0043] # LATIN CAPITAL LETTER C
ENTRIES
);

ok(
  join(':', $onlyABC->sort( qw/ ABA BAC cc A Ab cAc aB / ) ),
  join(':',                 qw/ A aB Ab ABA BAC cAc cc / ),
);

##### 56..59

my $undefAE = Unicode::Collate->new(
  table => 'keys.txt',
  normalization => undef,
  undefChar => qr/^[aAeE]$/,
);

ok($undefAE ->gt("edge","fog"));
ok($Collator->lt("edge","fog"));
ok($undefAE ->gt("lake","like"));
ok($Collator->lt("lake","like"));

##### 60..69

# Table is undefined, then no entry is defined.

my $undef_table = Unicode::Collate->new(
  table => undef,
  normalization => undef,
  level => 1,
);

# in the Unicode code point order
ok($undef_table->lt('', 'A'));
ok($undef_table->lt('ABC', 'B'));

# Hangul should be decomposed (even w/o Unicode::Normalize).
ok($undef_table->lt("Perl", "\x{AC00}"));
ok($undef_table->eq("\x{AC00}", "\x{1100}\x{1161}"));
ok($undef_table->eq("\x{AE00}", "\x{1100}\x{1173}\x{11AF}"));
ok($undef_table->lt("\x{AE00}", "\x{3042}"));
  # U+AC00: Hangul GA
  # U+AE00: Hangul GEUL
  # U+3042: Hiragana A

# Weight for CJK Ideographs is defined, though.
ok($undef_table->lt("", "\x{4E00}"));
ok($undef_table->lt("\x{4E8C}","ABC"));
ok($undef_table->lt("\x{4E00}","\x{3042}"));
ok($undef_table->lt("\x{4E00}","\x{4E8C}"));
  # U+4E00: Ideograph "ONE"
  # U+4E8C: Ideograph "TWO"


##### 70..74

my $few_entries = Unicode::Collate->new(
  entry => <<'ENTRIES',
0050 ; [.0101.0020.0002.0050]  # P
0045 ; [.0102.0020.0002.0045]  # E
0052 ; [.0103.0020.0002.0052]  # R
004C ; [.0104.0020.0002.004C]  # L
1100 ; [.0105.0020.0002.1100]  # Hangul Jamo initial G
1175 ; [.0106.0020.0002.1175]  # Hangul Jamo middle I
5B57 ; [.0107.0020.0002.5B57]  # CJK Ideograph "Letter"
ENTRIES
  table => undef,
  normalization => undef,
);

# defined before undefined

my $sortABC = join '',
    $few_entries->sort(split //, "ABCDEFGHIJKLMNOPQRSTUVWXYZ ");

ok($sortABC eq "PERL ABCDFGHIJKMNOQSTUVWXYZ");

ok($few_entries->lt('E', 'D'));
ok($few_entries->lt("\x{5B57}", "\x{4E00}"));
ok($few_entries->lt("\x{AE30}", "\x{AC00}"));

# Hangul must be decomposed.

ok($few_entries->eq("\x{AC00}", "\x{1100}\x{1161}"));

##### 75..79

my $dropArticles = Unicode::Collate->new(
  table => "keys.txt",
  normalization => undef,
  preprocess => sub {
    my $string = shift;
    $string =~ s/\b(?:an?|the)\s+//ig;
    $string;
  },
);

ok($dropArticles->eq("camel", "a    camel"));
ok($dropArticles->eq("Perl", "The Perl"));
ok($dropArticles->lt("the pen", "a pencil"));
ok($Collator->lt("Perl", "The Perl"));
ok($Collator->gt("the pen", "a pencil"));

##### 80..81

my $backLevel1 = Unicode::Collate->new(
  table => undef,
  normalization => undef,
  backwards => [ 1 ],
);

# all strings are reversed at level 1.

ok($backLevel1->gt("AB", "BA"));
ok($backLevel1->gt("\x{3042}\x{3044}", "\x{3044}\x{3042}"));

##### 82..89

my $backLevel2 = Unicode::Collate->new(
  table => "keys.txt",
  normalization => undef,
  undefName => qr/HANGUL|HIRAGANA|KATAKANA|BOPOMOFO/,
  backwards => 2,
);

ok($backLevel2->gt("Ca\x{300}ca\x{302}", "ca\x{302}ca\x{300}"));
ok($backLevel2->gt("ca\x{300}ca\x{302}", "Ca\x{302}ca\x{300}"));
ok($Collator  ->lt("Ca\x{300}ca\x{302}", "ca\x{302}ca\x{300}"));
ok($Collator  ->lt("ca\x{300}ca\x{302}", "Ca\x{302}ca\x{300}"));

# HIRAGANA and KATAKANA are made undefined via undefName.
# So they are after CJK Unified Ideographs.

ok($backLevel2->lt("\x{4E00}", $hiragana));
ok($backLevel2->lt("\x{4E03}", $katakana));
ok($Collator  ->gt("\x{4E00}", $hiragana));
ok($Collator  ->gt("\x{4E03}", $katakana));


##### 90..96

my $O_str = Unicode::Collate->new(
  table => "keys.txt",
  normalization => undef,
  entry => <<'ENTRIES',
0008  ; [*0008.0000.0000.0000] # BACKSPACE (need to be non-ignorable)
004F 0337 ; [.0B53.0020.0008.004F] # capital O WITH SHORT SOLIDUS OVERLAY
006F 0008 002F ; [.0B53.0020.0002.006F] # LATIN SMALL LETTER O WITH STROKE
004F 0008 002F ; [.0B53.0020.0008.004F] # LATIN CAPITAL LETTER O WITH STROKE
006F 0337 ; [.0B53.0020.0002.004F] # small O WITH SHORT SOLIDUS OVERLAY
200B  ; [.2000.0000.0000.0000] # ZERO WIDTH SPACE (may be non-sense but ...)
#00F8 ; [.0B53.0020.0002.00F8] # LATIN SMALL LETTER O WITH STROKE
#00D8 ; [.0B53.0020.0008.00D8] # LATIN CAPITAL LETTER O WITH STROKE
ENTRIES
);

my $o_BS_slash = _pack_U(0x006F, 0x0008, 0x002F);
my $O_BS_slash = _pack_U(0x004F, 0x0008, 0x002F);
my $o_sol    = _pack_U(0x006F, 0x0337);
my $O_sol    = _pack_U(0x004F, 0x0337);
my $o_stroke = _pack_U(0x00F8);
my $O_stroke = _pack_U(0x00D8);

ok($O_str->eq($o_stroke, $o_BS_slash));
ok($O_str->eq($O_stroke, $O_BS_slash));

ok($O_str->eq($o_stroke, $o_sol));
ok($O_str->eq($O_stroke, $O_sol));

ok($Collator->eq("\x{200B}", "\0"));
ok($O_str   ->gt("\x{200B}", "\0"));
ok($O_str   ->gt("\x{200B}", "A"));

##### 97..107

my %origVer = $Collator->change(UCA_Version => 8);

$Collator->change(level => 3);

ok($Collator->gt("!\x{300}", ""));
ok($Collator->gt("!\x{300}", "!"));
ok($Collator->eq("!\x{300}", "\x{300}"));

$Collator->change(level => 2);

ok($Collator->eq("!\x{300}", "\x{300}"));

$Collator->change(level => 4);

ok($Collator->gt("!\x{300}", "!"));
ok($Collator->lt("!\x{300}", "\x{300}"));

$Collator->change(%origVer, level => 3);

ok($Collator->eq("!\x{300}", ""));
ok($Collator->eq("!\x{300}", "!"));
ok($Collator->lt("!\x{300}", "\x{300}"));

$Collator->change(level => 4);

ok($Collator->gt("!\x{300}", ""));
ok($Collator->eq("!\x{300}", "!"));

##### 108..113

$_ = 'Foo';

my $c = Unicode::Collate->new(
  table => 'keys.txt',
  normalization => undef,
  upper_before_lower => 1,
);

ok($_, 'Foo'); # fixed at v. 0.52; no longer clobber $_

my($temp, @temp); # Not the result but the side effect matters.

$_ = 'Foo';
$temp = $c->getSortKey("abc");
ok($_, 'Foo');

$_ = 'Foo';
$temp = $c->viewSortKey("abc");
ok($_, 'Foo');

$_ = 'Foo';
@temp = $c->sort("abc", "xyz", "def");
ok($_, 'Foo');

$_ = 'Foo';
@temp = $c->index("perl5", "RL");
ok($_, 'Foo');

$_ = 'Foo';
@temp = $c->index("perl5", "LR");
ok($_, 'Foo');

#####


--- NEW FILE: altern.t ---

BEGIN {
    unless ("A" eq pack('U', 0x41)) {
	print "1..0 # Unicode::Collate " .
	    "cannot stringify a Unicode code point\n";
	exit 0;
    }
    if ($ENV{PERL_CORE}) {
	chdir('t') if -d 't';
	@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
    }
}

use Test;
BEGIN { plan tests => 37 };

use strict;
use warnings;
use Unicode::Collate;

ok(1);

#########################

sub _pack_U   { Unicode::Collate::pack_U(@_) }
sub _unpack_U { Unicode::Collate::unpack_U(@_) }

my $A_acute = _pack_U(0xC1);
my $acute   = _pack_U(0x0301);

my $Collator = Unicode::Collate->new(
  table => 'keys.txt',
  normalization => undef,
);

my %origAlt = $Collator->change(alternate => 'Blanked');

ok($Collator->lt("death", "de luge"));
ok($Collator->lt("de luge", "de-luge"));
ok($Collator->lt("de-luge", "deluge"));
ok($Collator->lt("deluge", "de\x{2010}luge"));
ok($Collator->lt("deluge", "de Luge"));

$Collator->change(alternate => 'Non-ignorable');

ok($Collator->lt("de luge", "de Luge"));
ok($Collator->lt("de Luge", "de-luge"));
ok($Collator->lt("de-Luge", "de\x{2010}luge"));
ok($Collator->lt("de-luge", "death"));
ok($Collator->lt("death", "deluge"));

$Collator->change(alternate => 'Shifted');

ok($Collator->lt("death", "de luge"));
ok($Collator->lt("de luge", "de-luge"));
ok($Collator->lt("de-luge", "deluge"));
ok($Collator->lt("deluge", "de Luge"));
ok($Collator->lt("de Luge", "deLuge"));

$Collator->change(alternate => 'Shift-Trimmed');

ok($Collator->lt("death", "deluge"));
ok($Collator->lt("deluge", "de luge"));
ok($Collator->lt("de luge", "de-luge"));
ok($Collator->lt("de-luge", "deLuge"));
ok($Collator->lt("deLuge", "de Luge"));

$Collator->change(%origAlt);

ok($Collator->{alternate}, 'shifted');

##############

# ignorable after alternate

# Shifted;
ok($Collator->eq("?\x{300}!\x{301}\x{315}", "?!"));
ok($Collator->eq("?\x{300}A\x{301}", "?$A_acute"));
ok($Collator->eq("?\x{300}", "?"));
ok($Collator->eq("?\x{344}", "?")); # U+0344 has two CEs.

$Collator->change(level => 3);
ok($Collator->eq("\cA", "?"));

$Collator->change(alternate => 'blanked', level => 4);
ok($Collator->eq("?\x{300}!\x{301}\x{315}", "?!"));
ok($Collator->eq("?\x{300}A\x{301}", "?$A_acute"));
ok($Collator->eq("?\x{300}", "?"));
ok($Collator->eq("?\x{344}", "?")); # U+0344 has two CEs.

$Collator->change(level => 3);
ok($Collator->eq("\cA", "?"));

$Collator->change(alternate => 'Non-ignorable', level => 4);

ok($Collator->lt("?\x{300}", "?!"));
ok($Collator->gt("?\x{300}A$acute", "?$A_acute"));
ok($Collator->gt("?\x{300}", "?"));
ok($Collator->gt("?\x{344}", "?"));

$Collator->change(level => 3);
ok($Collator->lt("\cA", "?"));

$Collator->change(alternate => 'Shifted', level => 4);


--- NEW FILE: illegalp.t ---

BEGIN {
    unless ("A" eq pack('U', 0x41)) {
	print "1..0 # Unicode::Collate " .
	    "cannot stringify a Unicode code point\n";
	exit 0;
    }
    if ($ENV{PERL_CORE}) {
	chdir('t') if -d 't';
	@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
    }
}

use Test;
BEGIN { plan tests => 17 };

use strict;
use warnings;

ok(1);

#
# No test for Unicode::Collate is included in this .t file.
#
# UCA conformance test requires completely ignorable characters
# (including noncharacters) must be able to be sorted in code point order.
# If not so, Unicode::Collate must not be compliant with UCA.
#
# ~~~ CollationTest_SHIFTED.txt in CollationTest-4.0.0
#
# 206F 0021;	# ! NOMINAL DIGIT SHAPES	[| | | 0251]
# D800 0021;	# ! <surrogate-D800>	[| | | 0251]
# DFFF 0021;	# ! <surrogate-DFFF>	[| | | 0251]
# FDD0 0021;	# ! <noncharacter-FDD0>	[| | | 0251]
# FFFB 0021;	# ! INTERLINEAR ANNOTATION TERMINATOR	[| | | 0251]
# FFFE 0021;	# ! <noncharacter-FFFE>	[| | | 0251]
# FFFF 0021;	# ! <noncharacter-FFFF>	[| | | 0251]
# 1D165 0021;	# ! MS. Cm. STEM	[| | | 0251]
#
# ~~~ CollationTest_NON_IGNORABLE.txt in CollationTest-4.0.0
#
# 206F 0021;	# ! NOMINAL DIGIT SHAPES	[0251 | 0020 | 0002 |]
# D800 0021;	# ! <surrogate-D800>	[0251 | 0020 | 0002 |]
# DFFF 0021;	# ! <surrogate-DFFF>	[0251 | 0020 | 0002 |]
# FDD0 0021;	# ! <noncharacter-FDD0>	[0251 | 0020 | 0002 |]
# FFFB 0021;	# ! INTERLINEAR ANNOTATION TERMINATOR	[0251 | 0020 | 0002 |]
# FFFE 0021;	# ! <noncharacter-FFFE>	[0251 | 0020 | 0002 |]
# FFFF 0021;	# ! <noncharacter-FFFF>	[0251 | 0020 | 0002 |]
# 1D165 0021;	# ! MS. Cm. STEM	[0251 | 0020 | 0002 |]
#

no warnings 'utf8';

ok("\x{206F}!" lt "\x{D800}!");
ok(pack('U*', 0x206F, 0x21) lt pack('U*', 0xD800, 0x21));

ok("\x{D800}!" lt "\x{DFFF}!");
ok(pack('U*', 0xD800, 0x21) lt pack('U*', 0xDFFF, 0x21));

ok("\x{DFFF}!" lt "\x{FDD0}!");
ok(pack('U*', 0xDFFF, 0x21) lt pack('U*', 0xFDD0, 0x21) );

ok("\x{FDD0}!" lt "\x{FFFB}!");
ok(pack('U*', 0xFDD0, 0x21) lt pack('U*', 0xFFFB, 0x21));

ok("\x{FFFB}!" lt "\x{FFFE}!");
ok(pack('U*', 0xFFFB, 0x21) lt pack('U*', 0xFFFE, 0x21));

ok("\x{FFFE}!" lt "\x{FFFF}!");
ok(pack('U*', 0xFFFE, 0x21) lt pack('U*', 0xFFFF, 0x21));

ok("\x{FFFF}!" lt "\x{1D165}!");
ok(pack('U*', 0xFFFF, 0x21) lt pack('U*', 0x1D165, 0x21));

ok("\000!" lt "\x{FFFF}!");
ok(pack('U*', 0, 0x21) lt pack('U*', 0xFFFF, 0x21));





More information about the dslinux-commit mailing list