Files

177 lines
5.6 KiB
Perl
Raw Permalink Normal View History

#!perl
use strict;
use utf8;
use warnings;
use Test::More;
use Test::Differences;
use Test::Exception;
use Zonemaster::Engine::Normalization qw( normalize_name trim_space );
sub char_to_hex_esc {
my ($char) = @_;
my $ord = ord($char);
if ($ord >= 32 && $ord <= 127) {
return $char;
} elsif ($ord <= 255) {
return sprintf('\x%02X', $ord);
} else {
return sprintf('\x{%04X}', $ord);
}
}
sub to_hex_esc {
my ($str) = @_;
return join('', map({ char_to_hex_esc($_) } (split //, $str)));
}
subtest 'Valid domains' => sub {
my %input_domains = (
# Roots
'.' => '.', # Full stop
'' => '.', # Fullwidth full stop
'。' => '.', # Ideographic full stop
'。' => '.', # Halfwidth ideographic full stop
# Mixed dots with trailing dot
'example。com.' => 'example.com',
'example。com' => 'example.com',
'sub.examplecom。' => 'sub.example.com',
'subexample.com。' => 'sub.example.com',
# Mixed dots without trailing dot
'example。com' => 'example.com',
'example。com' => 'example.com',
'sub.examplecom' => 'sub.example.com',
'subexample.com' => 'sub.example.com',
# Domains with U-Labels
'café.example.com' => 'xn--caf-dma.example.com',
'エグザンプル。example。com' => 'xn--ickqs6k2dyb.example.com',
'αβγδε.example.com' => 'xn--mxacdef.example.com',
# Domains with uppercase unicode
'CafÉ.example.com' => 'xn--caf-dma.example.com',
'ΑβΓΔε.example.com' => 'xn--mxacdef.example.com',
# All ascii domains (lowercase)
'example.com' => 'example.com',
'0/28.2.0.192.example.com' => '0/28.2.0.192.example.com',
'_http._tcp.example.com.' => '_http._tcp.example.com',
'sub-domain.example.com' => 'sub-domain.example.com',
# All ascii domains with uppercase characters
'suB-doMaIN.ExamPlE.cOm' => 'sub-domain.example.com',
# Single label domains
'test' => 'test',
'テスト' => 'xn--zckzah',
# Length limits
"a" x 63 . ".example.com" => "a" x 63 . ".example.com",
# this is 253 characters
("a" x 15 . ".") x 15 . "b" . ".example.com" => ("a" x 15 . ".") x 15 . "b" . ".example.com",
# NFC conversion (for each group first is non-NFC, second is equivalent NFC)
"d\x{006F}\x{0308}d" => 'xn--dd-fka',
'död' => 'xn--dd-fka',
"aq\x{0307}\x{0323}a" => 'xn--aqa-9dc3l',
"aq\x{0323}\x{0307}a" => 'xn--aqa-9dc3l',
"aḋ\x{0323}a" => 'xn--aa-rub587y',
"aḍ\x{0307}a" => 'xn--aa-rub587y',
);
for my $domain ( sort keys %input_domains ) {
my $expected_output = $input_domains{$domain};
my $safe_domain = to_hex_esc($domain);
subtest "Domain: '$safe_domain'" => sub {
my ( $errors, $final_domain );
lives_ok(sub {
($errors, $final_domain) = normalize_name($domain);
}, 'correct domain should live');
my $actual = { domain => $final_domain, errors => $errors };
my $expected = { domain => $expected_output, errors => [] };
eq_or_diff $actual, $expected;
}
}
};
subtest 'Bad domains' => sub {
my %input_domains = (
# Empty labels
'.。.' => ['INITIAL_DOT'],
'example。.com.' => ['REPEATED_DOTS'],
'example。com.。' => ['REPEATED_DOTS'],
'.example。com' => ['INITIAL_DOT'],
# Bad ascii
'bad:%;!$.example.com.' => ['INVALID_ASCII'],
" \x{205F} example.com. \x{0009}" => ['INVALID_ASCII'],
' ' => ['INVALID_ASCII'],
# Label to long
"a" x 64 . ".example.com" => ['LABEL_TOO_LONG'],
# Length too long after idn conversion (libidn fails)
'チョコレート' x 8 . 'a' . '.example.com' => ['INVALID_U_LABEL'],
# Emoji in names are invalid as per IDNA2008
'❤examplecom' => ['INVALID_U_LABEL'],
# Domain to long
# this is 254 characters
( "a" x 15 . "." ) x 15 . "bc" . ".example.com" => ['DOMAIN_NAME_TOO_LONG'],
# Empty domain
'' => ['EMPTY_DOMAIN_NAME'],
# Ambiguous downcasing
'İ.example.com' => ['AMBIGUOUS_DOWNCASING'],
);
for my $domain ( sort keys %input_domains ) {
my $expected_errors = $input_domains{$domain};
my $safe_domain = to_hex_esc( $domain );
subtest "Domain: '$safe_domain'" => sub {
my ( $errors, $final_domain );
lives_ok(sub {
($errors, $final_domain) = normalize_name($domain);
}, 'incorrect domain should live');
my $actual = {
domain => $final_domain,
errors => [ map { $_->tag } @$errors ]
};
my $expected = {
domain => undef,
errors => $expected_errors
};
eq_or_diff $actual, $expected;
}
}
};
subtest 'Trimming space' => sub {
my %cases = (
"example." => 'example.',
"exam ." => 'exam .',
" \x{205F} example. \x{0009}" => 'example.',
);
for my $str ( sort keys %cases ) {
my $expected = $cases{$str};
my $safe_str = to_hex_esc($str);
subtest "Domain: '$safe_str'" => sub {
my $actual = trim_space( $str );
is $actual, $expected, 'Match expected string';
}
}
};
done_testing;