Files
zonemaster.es/zonemaster-engine/t/normalization.t
Malin 8d4eaa1489 feat: add full Zonemaster stack with Docker and Spanish UI
- Clone all 5 Zonemaster component repos (LDNS, Engine, CLI, Backend, GUI)
- Dockerfile.backend: 8-stage multi-stage build LDNS→Engine→CLI→Backend
- Dockerfile.gui: Astro static build served via nginx
- docker-compose.yml: backend (internal) + frontend (port 5353)
- nginx.conf: root redirects to /es/, /api/ proxied to backend
- zonemaster-gui/config.ts: defaultLanguage set to 'es' (Spanish)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-21 08:19:24 +02:00

177 lines
5.6 KiB
Perl
Raw Permalink Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!perl
use strict;
use utf8;
use warnings;
use Test::More;
use Test::Differences;
use Test::Exception;
use Zonemaster::Engine::Normalization qw( normalize_name trim_space );
sub char_to_hex_esc {
my ($char) = @_;
my $ord = ord($char);
if ($ord >= 32 && $ord <= 127) {
return $char;
} elsif ($ord <= 255) {
return sprintf('\x%02X', $ord);
} else {
return sprintf('\x{%04X}', $ord);
}
}
sub to_hex_esc {
my ($str) = @_;
return join('', map({ char_to_hex_esc($_) } (split //, $str)));
}
subtest 'Valid domains' => sub {
my %input_domains = (
# Roots
'.' => '.', # Full stop
'' => '.', # Fullwidth full stop
'。' => '.', # Ideographic full stop
'。' => '.', # Halfwidth ideographic full stop
# Mixed dots with trailing dot
'example。com.' => 'example.com',
'example。com' => 'example.com',
'sub.examplecom。' => 'sub.example.com',
'subexample.com。' => 'sub.example.com',
# Mixed dots without trailing dot
'example。com' => 'example.com',
'example。com' => 'example.com',
'sub.examplecom' => 'sub.example.com',
'subexample.com' => 'sub.example.com',
# Domains with U-Labels
'café.example.com' => 'xn--caf-dma.example.com',
'エグザンプル。example。com' => 'xn--ickqs6k2dyb.example.com',
'αβγδε.example.com' => 'xn--mxacdef.example.com',
# Domains with uppercase unicode
'CafÉ.example.com' => 'xn--caf-dma.example.com',
'ΑβΓΔε.example.com' => 'xn--mxacdef.example.com',
# All ascii domains (lowercase)
'example.com' => 'example.com',
'0/28.2.0.192.example.com' => '0/28.2.0.192.example.com',
'_http._tcp.example.com.' => '_http._tcp.example.com',
'sub-domain.example.com' => 'sub-domain.example.com',
# All ascii domains with uppercase characters
'suB-doMaIN.ExamPlE.cOm' => 'sub-domain.example.com',
# Single label domains
'test' => 'test',
'テスト' => 'xn--zckzah',
# Length limits
"a" x 63 . ".example.com" => "a" x 63 . ".example.com",
# this is 253 characters
("a" x 15 . ".") x 15 . "b" . ".example.com" => ("a" x 15 . ".") x 15 . "b" . ".example.com",
# NFC conversion (for each group first is non-NFC, second is equivalent NFC)
"d\x{006F}\x{0308}d" => 'xn--dd-fka',
'död' => 'xn--dd-fka',
"aq\x{0307}\x{0323}a" => 'xn--aqa-9dc3l',
"aq\x{0323}\x{0307}a" => 'xn--aqa-9dc3l',
"aḋ\x{0323}a" => 'xn--aa-rub587y',
"aḍ\x{0307}a" => 'xn--aa-rub587y',
);
for my $domain ( sort keys %input_domains ) {
my $expected_output = $input_domains{$domain};
my $safe_domain = to_hex_esc($domain);
subtest "Domain: '$safe_domain'" => sub {
my ( $errors, $final_domain );
lives_ok(sub {
($errors, $final_domain) = normalize_name($domain);
}, 'correct domain should live');
my $actual = { domain => $final_domain, errors => $errors };
my $expected = { domain => $expected_output, errors => [] };
eq_or_diff $actual, $expected;
}
}
};
subtest 'Bad domains' => sub {
my %input_domains = (
# Empty labels
'.。.' => ['INITIAL_DOT'],
'example。.com.' => ['REPEATED_DOTS'],
'example。com.。' => ['REPEATED_DOTS'],
'.example。com' => ['INITIAL_DOT'],
# Bad ascii
'bad:%;!$.example.com.' => ['INVALID_ASCII'],
" \x{205F} example.com. \x{0009}" => ['INVALID_ASCII'],
' ' => ['INVALID_ASCII'],
# Label to long
"a" x 64 . ".example.com" => ['LABEL_TOO_LONG'],
# Length too long after idn conversion (libidn fails)
'チョコレート' x 8 . 'a' . '.example.com' => ['INVALID_U_LABEL'],
# Emoji in names are invalid as per IDNA2008
'❤examplecom' => ['INVALID_U_LABEL'],
# Domain to long
# this is 254 characters
( "a" x 15 . "." ) x 15 . "bc" . ".example.com" => ['DOMAIN_NAME_TOO_LONG'],
# Empty domain
'' => ['EMPTY_DOMAIN_NAME'],
# Ambiguous downcasing
'İ.example.com' => ['AMBIGUOUS_DOWNCASING'],
);
for my $domain ( sort keys %input_domains ) {
my $expected_errors = $input_domains{$domain};
my $safe_domain = to_hex_esc( $domain );
subtest "Domain: '$safe_domain'" => sub {
my ( $errors, $final_domain );
lives_ok(sub {
($errors, $final_domain) = normalize_name($domain);
}, 'incorrect domain should live');
my $actual = {
domain => $final_domain,
errors => [ map { $_->tag } @$errors ]
};
my $expected = {
domain => undef,
errors => $expected_errors
};
eq_or_diff $actual, $expected;
}
}
};
subtest 'Trimming space' => sub {
my %cases = (
"example." => 'example.',
"exam ." => 'exam .',
" \x{205F} example. \x{0009}" => 'example.',
);
for my $str ( sort keys %cases ) {
my $expected = $cases{$str};
my $safe_str = to_hex_esc($str);
subtest "Domain: '$safe_str'" => sub {
my $actual = trim_space( $str );
is $actual, $expected, 'Match expected string';
}
}
};
done_testing;