Net-IDN-Encode-1.000/000075500000000000000000000000001135223703100140365ustar00rootroot00000000000000Net-IDN-Encode-1.000/Build.PL000064400000000000000000000013031135223703100153270ustar00rootroot00000000000000use 5.006; use strict; use utf8; use Module::Build; my $b = Module::Build->new( 'module_name' => 'Net::IDN::Encode', 'license' => 'perl', 'dist_author' => 'Claus Färber ', 'dist_abstract' => 'Internationalizing Domain Names in Applications (RFC 3490)', 'sign' => 1, 'create_license' => 1, 'create_makefile_pl' => 'traditional', 'requires' => { 'Net::IDN::Nameprep' => 1, }, 'build_requires' => { 'Test::More' => 0, 'Test::NoWarnings' => 0, }, 'no_index' => { 'directory' => 'eg' }, 'resources' => { 'homepage' => '', 'repository' => '', }, ); $b->create_build_script; Net-IDN-Encode-1.000/Changes000064400000000000000000000036371135223703100153420ustar00rootroot00000000000000Revision history for Perl extension Net::IDN::Encode 1.000 (2010-01-13) - clean-up - release 0.999_20090112 (2010-01-10) - add XS for decode_punycode 0.999_20090110 (2010-01-10) - add XS for encode_punycode - include Net::IDN::Nameprep into Net::IDN::Encode *sigh* - drop IDNA::Punycode 0.99_20091231 - depend on perl 5.8.3 - optimise Net::IDN::Punycode 0.99_20091226 Sat Dec 26 00:00:00 2009 - some clean-ups 0.99_20091216 Wed Dec 16 00:00:00 2009 - switch to Module::Build - switch to Github, remove svn:keywords, add .gitignore - use ASCII in POD, fixes FAILs with perl 5.6.x - add examples in eg/ 0.99_20080913 Sun Sep 13 12:00:00 2008 - fixed perl 5.6.x (no warnings 'utf8') 0.99_20080913 Sat Sep 13 12:00:00 2008 - require perl version 5.6.0 instead of 5.6.6 - skip more tests in lower perl versions - removed Encode::Punycode; Encode is only available from perl 5.7.3 - renamed back to Net::IDN::Encode; without Encode::Punycode, the new name does not make sense. 0.99_20071012 Fri Oct 12 12:00:00 2007 - renamed Net-IDN-Encode distribution to Net-IDN-tools - includes Net::IDN::Punycode (from IDNA::Punycode v0.02) - includes Net::IDN::Nameprep (complete rewrite, uses Unicode::Stringprep) - includes IDNA::Punycode (deprecated, new version based on Net::IDN::Punycode/::Encode) - includes Encode::Punycode (new version based on Net::IDN::Punycode) - uses Unicode::Stringprep - more tests, including test vectors from Internet Draft draft-josefsson-idn-test-vectors-00. - FIXES: #16150: Net::IDN::Encode depends on non-modulelist module IDNA::Punycode - FIXES: #16145: IDNA::Punycode 0.03 - FIXES: #28123: Undeclared dependency on Unicode::String (reported by ANDK) - FIXES WARNING: v-string in use/require non-portable (Net::IDN::Nameprep[::*]) 0.02 Sun Jun 20 00:00:00 2004 - fixed handling of incomplete/empty email addresses 0.01 Sun May 30 00:00:00 2004 - first release Net-IDN-Encode-1.000/LICENSE000064400000000000000000000436101135223703100150470ustar00rootroot00000000000000This software is copyright (c) 2010 by Claus Färber . ($local_part.'@'.($domain || $domain_literal)) : ($local_part); } sub domain_to_ascii { _domain(shift,\&to_ascii) } sub domain_to_unicode { _domain(shift,\&to_unicode) } sub email_to_ascii { _email(shift,\&to_ascii) } sub email_to_unicode { _email(shift,\&to_unicode) } use Unicode::Stringprep; use Unicode::Stringprep::Mapping; use Unicode::Stringprep::Prohibited; ## NB: Do not rely on this function being here. It will go away with IDNA2008. ## If you need a separate nameprep, use Net::IDN::Nameprep (when it's fixed). ## *_nameprep = Unicode::Stringprep->new( 3.2, [ @Unicode::Stringprep::Mapping::B1, @Unicode::Stringprep::Mapping::B2 ], 'KC', [ @Unicode::Stringprep::Prohibited::C12, @Unicode::Stringprep::Prohibited::C22, @Unicode::Stringprep::Prohibited::C3, @Unicode::Stringprep::Prohibited::C4, @Unicode::Stringprep::Prohibited::C5, @Unicode::Stringprep::Prohibited::C6, @Unicode::Stringprep::Prohibited::C7, @Unicode::Stringprep::Prohibited::C8, @Unicode::Stringprep::Prohibited::C9 ], 1, ); 1; __END__ =encoding utf8 =head1 NAME Net::IDN::Encode - Internationalizing Domain Names in Applications (S) =head1 SYNOPSIS use Net::IDN::Encode ':all'; my $a = domain_to_ascii("mü"); my $e = email_to_ascii("POSTMASTER@例。テスト"); my $u = domain_to_unicode('EXAMPLE.XN--11B5BS3A9AJ6G'); =head1 DESCRIPTION This module provides an easy-to-use interface for encoding and decoding Internationalized Domain Names (IDNs). IDNs use characters drawn from a large repertoire (Unicode), but IDNA allows the non-ASCII characters to be represented using only the ASCII characters already allowed in so-called host names today (letter-digit-hypen, C). =head1 FUNCTIONS By default, this module does not export any subroutines. You may use the C<:all> tag to import everything. You can also use regular expressions such as C or C to select some of the functions, see L for details. The following functions are available: =over =item to_ascii( $label [, 'UseSTD3ASCIIRules' => 1 ] ) Converts a single label C<$label> to ASCII. Will throw an exception on invalid input. This function takes the following parameter: =over =item UseSTD3ASCIIRules (boolean) If set to a true value, checks the label for compliance with S (S) syntax for host name parts. =back This function does not try to handle strings that consist of multiple lables (such as domain names). =item to_unicode( $label ) Converts a single label C<$label> to Unicode. Will throw an exception on invalid input. This function does not try to handle strings that consist of multiple lables (such as domain names). =item domain_to_unicode( $domain ) Converts all labels of the hostname C<$domain> (with labels seperated by dots) to Unicode. Will throw an exception on invalid input. =item domain_to_ascii( $domain ) Converts all labels of the hostname C<$domain> (with labels seperated by dots) to ASCII. Will throw an exception on invalid input. The following characters are recognized as dots: U+002E (full stop), U+3002 (ideographic full stop), U+FF0E (fullwidth full stop), U+FF61 (halfwidth ideographic full stop). =item domain_to_unicode( $domain ) Converts all labels of the hostname C<$domain> (with labels seperated by dots) to Unicode. Will throw an exception on invalid input. The following characters are recognized as dots: U+002E (full stop), U+3002 (ideographic full stop), U+FF0E (fullwidth full stop), U+FF61 (halfwidth ideographic full stop). =item email_to_ascii( $email ) Converts the domain part (right hand side, separated by an at sign) of the S/2822 email address to ASCII. May throw an exception on invalid input. This function currently does not handle internationalization of the local-part (left hand side). This may change in future versions. The follwing characters are recognized as at signs: U+0040 (commercial at), U+FF20 (fullwidth commercial at). =item email_to_unicode( $email ) Converts the domain part (right hand side, separated by an at sign) of the S/2822 email address to Unicode. May throw an exception on invalid input. This function currently does not handle internationalization of the local-part (left hand side). This may change in future versions. The follwing characters are recognized as at signs: U+0040 (commercial at), U+FF20 (fullwidth commercial at). =back =head1 AUTHOR Claus FErber =head1 LICENSE Copyright 2007-2010 Claus FErber. This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =head1 SEE ALSO L, L, S (L) =cut Net-IDN-Encode-1.000/lib/Net/IDN/Punycode.pm000064400000000000000000000042171135223703100200740ustar00rootroot00000000000000package Net::IDN::Punycode; use 5.006; use strict; use utf8; use warnings; use Exporter; our $VERSION = "1.000"; our @ISA = qw(Exporter); our @EXPORT = (); our @EXPORT_OK = qw(encode_punycode decode_punycode); our %EXPORT_TAGS = ( 'all' => \@EXPORT_OK ); eval { require XSLoader; XSLoader::load('Net::IDN::Punycode'); }; if (!defined(&encode_punycode)) { require Net::IDN::Punycode::PP; Net::IDN::Punycode::PP->import(qw(:all)); } 1; __END__ =head1 NAME Net::IDN::Punycode - A Bootstring encoding of Unicode for IDNA (S) =head1 SYNOPSIS use Net::IDN::Punycode qw(:all); $punycode = encode_punycode($unicode); $unicode = decode_punycode($punycode); =head1 DESCRIPTION This module implements the Punycode encoding. Punycode is an instance of a more general algorithm called Bootstring, which allows strings composed from a small set of "basic" code points to uniquely represent any string of code points drawn from a larger set. Punycode is Bootstring with particular parameter values appropriate for IDNA. Note that this module does not do any string preparation as specified by I/I. It does not do add any prefix or suffix, either. =head1 FUNCTIONS No functions are exported by default. You can use the tag C<:all> or import them individually. The following functions are available: =over 4 =item encode_punycode($input) Decodes C<$input> with Punycode and returns the result. This function will throw an exception on invalid input. =item decode_punycode($input) Decodes C<$input> with Punycode and returns the result. This function will throw an exception on invalid input. =back =head1 AUTHORS Tatsuhiko Miyagawa Emiyagawa@bulknews.netE (versions 0.01 to 0.02) Claus FErber ECFAERBER@cpan.orgE (from version 1.00) =head1 LICENSE Copyright 2002-2004 Tatsuhiko Miyagawa Emiyagawa@bulknews.netE Copyright 2007-2010 Claus FErber ECFAERBER@cpan.orgE This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =head1 SEE ALSO S (L), L, L =cut Net-IDN-Encode-1.000/lib/Net/IDN/Punycode.xs000064400000000000000000000146711135223703100201170ustar00rootroot00000000000000#include "EXTERN.h" #include "perl.h" #include "XSUB.h" #define BASE 36 #define TMIN 1 #define TMAX 26 #define SKEW 38 #define DAMP 700 #define INITIAL_BIAS 72 #define INITIAL_N 128 #define isBASE(x) UTF8_IS_INVARIANT((unsigned char)x) #define DELIM '-' #define TMIN_MAX(t) (((t) < TMIN) ? (TMIN) : ((t) > TMAX) ? (TMAX) : (t)) static char enc_digit[BASE] = { 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', }; static UV dec_digit[0x80] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 00..0F */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 10..1F */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 20..2F */ 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1, -1, -1, /* 30..3F */ -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, /* 40..4F */ 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, /* 50..5F */ -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, /* 60..6F */ 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, /* 70..7F */ }; static int adapt(int delta, int numpoints, int first) { int k; delta /= first ? DAMP : 2; delta += delta/numpoints; for(k=0; delta > ((BASE-TMIN) * TMAX)/2; k += BASE) delta /= BASE-TMIN; return k + (((BASE-TMIN+1) * delta) / (delta+SKEW)); }; MODULE = Net::IDN::Punycode PACKAGE = Net::IDN::Punycode SV* encode_punycode(input) SV * input PREINIT: UV c, m, n = INITIAL_N; int k, q, t; int bias = INITIAL_BIAS; int delta = 0, skip_delta; char *in_s, *in_p, *in_e, *re_s, *re_p, *re_e, *skip_p; int first = 1; STRLEN h, remain; STRLEN length_guess, u8; PPCODE: if(!SvOK(input)) XSRETURN_UNDEF; length_guess = sv_utf8_upgrade(input); in_s = in_p = SvPV_nolen(input); in_e = SvEND(input); /* copy basic code points */ if(length_guess < 64) length_guess = 64; /* optimise for maximum length of domain names */ length_guess += 2; /* plus DELIM + '\0' */ RETVAL = NEWSV('P',length_guess); sv_2mortal(RETVAL); /* so we can use croak w/o memory leaks */ SvPOK_only(RETVAL); /* UTF8 is off (BASE chars only) */ re_s = re_p = SvPV_nolen(RETVAL); re_e = re_s + SvLEN(RETVAL); while(in_p < in_e) { if( isBASE(*in_p) ) *re_p++ = *in_p; in_p++; } h = re_p - re_s; /* add DELIM if needed */ if(h) *re_p++ = DELIM; for(;;) { /* find smallest code point not yet handled */ m = UV_MAX; q = skip_delta = 0; for(in_p = skip_p = in_s; in_p < in_e;) { c = utf8_to_uvuni(in_p, &u8); if(c >= n && c < m) { m = c; skip_p = in_p; skip_delta = q; } if(c < n) ++q; in_p += u8; } if(m == UV_MAX) break; /* increase delta to the state corresponding to the m code point at the beginning of the string */ delta += (m-n) * (h+1); n = m; /* now find the chars to be encoded in this round */ delta += skip_delta; for(in_p = skip_p; in_p < in_e;) { c = utf8_to_uvuni(in_p, &u8); if(c < n) { ++delta; } else if( c == n ) { q = delta; for(k = BASE;; k += BASE) { if(re_p >= re_e) { length_guess = re_e - re_s + 16; re_e = SvGROW(RETVAL, length_guess); re_p = re_e + (re_p - re_s); re_s = re_e; re_e = re_s + length_guess; } t = TMIN_MAX(k - bias); if(q < t) break; *re_p++ = enc_digit[t + ((q-t) % (BASE-t))]; q = (q-t) / (BASE-t); } if(q > BASE) croak("input exceeds punycode limit"); *re_p++ = enc_digit[q]; bias = adapt(delta, h+1, first); delta = first = 0; ++h; --remain; } in_p += u8; } ++delta; ++n; } *re_p = 0; SvCUR_set(RETVAL, re_p - re_s); ST(0) = RETVAL; XSRETURN(1); SV* decode_punycode(input) SV * input PREINIT: UV c, n = INITIAL_N; int i = 0, oldi, j, k, t, w; int bias = INITIAL_BIAS; int delta = 0, skip_delta; char *in_s, *in_p, *in_e, *re_s, *re_p, *re_e, *skip_p; int first = 1; STRLEN length_guess, h, u8; PPCODE: if(!SvOK(input)) XSRETURN_UNDEF; in_s = in_p = SvPV_nolen(input); in_e = SvEND(input); length_guess = SvCUR(input); length_guess *= 2; if(length_guess < 256) length_guess = 256; RETVAL = NEWSV('D',length_guess); sv_2mortal(RETVAL); /* so we can use croak w/o memory leaks */ SvPOK_only(RETVAL); re_s = re_p = SvPV_nolen(RETVAL); re_e = re_s + SvLEN(RETVAL); skip_p = NULL; for(in_p = in_s; in_p < in_e; in_p++) { c = *in_p; /* we don't care whether it's UTF-8 */ if(!isBASE(c)) croak("non-base character in input for decode_punycode"); if(c == DELIM) skip_p = in_p; *re_p++ = c; /* copy it */ } if(skip_p) { h = skip_p - in_s; /* base chars handled */ re_p = re_s + h; /* points to end of base chars */ skip_p++; /* skip over DELIM */ } else { h = 0; /* no base chars */ re_p = re_s; skip_p = in_s; /* read everything */ } for(in_p = skip_p; in_p < in_e; i++) { oldi = i; w = 1; for(k = BASE;; k+= BASE) { if(!(in_p < in_e)) croak("incomplete encoded code point in decode_punycode"); c = dec_digit[*in_p++]; /* we already know it's in 0..127 */ if(((IV)c) < 0) croak("invalid digit in input for decode_punycode"); i += c * w; t = TMIN_MAX(k - bias); if(c < t) break; w *= BASE-t; } h++; bias = adapt(i-oldi, h, first); first = 0; n += i / h; /* code point n to insert */ i = i % h; /* at position i */ u8 = UNISKIP(n); /* how many bytes we need */ if(re_p + u8 >= re_e) { length_guess = re_e - re_p + u8 + 16; re_e = SvGROW(RETVAL, length_guess); re_p = re_e + (re_p - re_s); re_s = re_e; re_e = re_s + SvLEN(RETVAL); } j = i; for(skip_p = re_s; j > 0; j--) /* find position in UTF-8 */ skip_p+=UTF8SKIP(skip_p); if(skip_p < re_p) /* move succeeding chars */ Move(skip_p, skip_p + u8, re_p - skip_p, char); re_p += u8; uvuni_to_utf8_flags(skip_p, n, UNICODE_ALLOW_ANY); } if(!first) SvUTF8_on(RETVAL); /* UTF-8 chars have been inserted */ *re_p = 0; SvCUR_set(RETVAL, re_p - re_s); ST(0) = RETVAL; XSRETURN(1); Net-IDN-Encode-1.000/lib/Net/IDN/Punycode/000075500000000000000000000000001135223703100175325ustar00rootroot00000000000000Net-IDN-Encode-1.000/lib/Net/IDN/Punycode/PP.pm000064400000000000000000000106511135223703100204120ustar00rootroot00000000000000package Net::IDN::Punycode::PP; use 5.007_001; use strict; use utf8; use warnings; use Carp; use Exporter; our $VERSION = "1.000"; our @ISA = qw(Exporter); our @EXPORT = (); our @EXPORT_OK = qw(encode_punycode decode_punycode); our %EXPORT_TAGS = ( 'all' => \@EXPORT_OK ); use integer; use constant BASE => 36; use constant TMIN => 1; use constant TMAX => 26; use constant SKEW => 38; use constant DAMP => 700; use constant INITIAL_BIAS => 72; use constant INITIAL_N => 128; my $Delimiter = chr 0x2D; my $BasicRE = "\x00-\x7f"; my $PunyRE = "A-Za-z0-9"; sub _adapt { my($delta, $numpoints, $firsttime) = @_; $delta = $firsttime ? $delta / DAMP : $delta / 2; $delta += $delta / $numpoints; my $k = 0; while ($delta > ((BASE - TMIN) * TMAX) / 2) { $delta /= BASE - TMIN; $k += BASE; } return $k + (((BASE - TMIN + 1) * $delta) / ($delta + SKEW)); } sub decode_punycode { die("Usage: Net::IDN::Punycode::decode_punycode(input)") unless @_; my $input = shift; my $n = INITIAL_N; my $i = 0; my $bias = INITIAL_BIAS; my @output; return undef unless defined $input; return '' unless length $input; if($input =~ s/(.*)$Delimiter//os) { my $base_chars = $1; croak("non-base character in input for decode_punycode") if $base_chars =~ m/[^$BasicRE]/os; push @output, split //, $base_chars; } my $code = $input; croak('invalid digit in input for decode_punycode') if $code =~ m/[^$PunyRE]/os; utf8::downgrade($input); ## handling failure of downgrade is more expensive than ## doing the above regexp w/ utf8 semantics while(length $code) { my $oldi = $i; my $w = 1; LOOP: for (my $k = BASE; 1; $k += BASE) { my $cp = substr($code, 0, 1, ''); croak("incomplete encoded code point in decode_punycode") if !defined $cp; my $digit = ord $cp; ## NB: this depends on the PunyRE catching invalid digit characters ## before they turn up here ## $digit = $digit < 0x40 ? $digit + (26-0x30) : ($digit & 0x1f) -1; $i += $digit * $w; my $t = $k - $bias; $t = $t < TMIN ? TMIN : $t > TMAX ? TMAX : $t; last LOOP if $digit < $t; $w *= (BASE - $t); } $bias = _adapt($i - $oldi, @output + 1, $oldi == 0); $n += $i / (@output + 1); $i = $i % (@output + 1); splice(@output, $i, 0, chr($n)); $i++; } return join '', @output; } sub encode_punycode { die("Usage: Net::IDN::Punycode::encode_punycode(input)") unless @_; my $input = shift; my $input_length = length $input; ## my $output = join '', $input =~ m/([$BasicRE]+)/og; ## slower my $output = $input; $output =~ s/[^$BasicRE]+//ogs; my $h = my $b = length $output; $output .= $Delimiter if $b > 0; utf8::downgrade($output); ## no unnecessary use of utf8 semantics my @input = map ord, split //, $input; my @chars = sort grep { $_ >= INITIAL_N } @input; my $n = INITIAL_N; my $delta = 0; my $bias = INITIAL_BIAS; foreach my $m (@chars) { next if $m < $n; $delta += ($m - $n) * ($h + 1); $n = $m; for(my $i = 0; $i < $input_length; $i++) { my $c = $input[$i]; $delta++ if $c < $n; if ($c == $n) { my $q = $delta; LOOP: for (my $k = BASE; 1; $k += BASE) { my $t = $k - $bias; $t = $t < TMIN ? TMIN : $t > TMAX ? TMAX : $t; last LOOP if $q < $t; my $o = $t + (($q - $t) % (BASE - $t)); $output .= chr $o + ($o < 26 ? 0x61 : 0x30-26); $q = ($q - $t) / (BASE - $t); } croak("input exceeds punycode limit") if $q > BASE; $output .= chr $q + ($q < 26 ? 0x61 : 0x30-26); $bias = _adapt($delta, $h + 1, $h == $b); $delta = 0; $h++; } } $delta++; $n++; } return $output; } 1; __END__ =head1 NAME Net::IDN::Punycode::PP - pure-perl implementation of Net::IDN::Punycode =head1 DESCRIPTION See L. =head1 AUTHORS Tatsuhiko Miyagawa Emiyagawa@bulknews.netE (versions 0.01 to 0.02) Claus FErber ECFAERBER@cpan.orgE (from version 1.00) =head1 LICENSE Copyright 2002-2004 Tatsuhiko Miyagawa Emiyagawa@bulknews.netE Copyright 2007-2010 Claus FErber ECFAERBER@cpan.orgE This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =head1 SEE ALSO S (L), L, L =cut Net-IDN-Encode-1.000/t/000075500000000000000000000000001135223703100143015ustar00rootroot00000000000000Net-IDN-Encode-1.000/t/00use.t000064400000000000000000000002661135223703100154260ustar00rootroot00000000000000use strict; use Test::More tests => 4; use Test::NoWarnings; # use_ok 'Net::IDN::Nameprep'; use_ok 'Net::IDN::Punycode'; use_ok 'Net::IDN::Punycode::PP'; use_ok 'Net::IDN::Encode'; Net-IDN-Encode-1.000/t/encode_bytes.t000064400000000000000000000025331135223703100171340ustar00rootroot00000000000000use bytes; use strict; use Test::More tests => 24; use Test::NoWarnings; use Net::IDN::Encode qw(:all); is(to_ascii('mueller'),'mueller'); is(to_ascii('xn--mller-kva'),'xn--mller-kva'); is(to_ascii('müller'),'xn--mller-kva'); is(to_unicode('mueller'),'mueller'); is(to_unicode('xn--mller-kva'),'müller'); is(to_unicode('müller'),'müller'); is(domain_to_ascii(''),''); is(domain_to_ascii(''),''); is(domain_to_ascii('mü'),''); is(domain_to_unicode(''),''); is(domain_to_unicode(''),'mü'); is(domain_to_unicode('mü'),'mü'); is(email_to_ascii(''),''); is(email_to_ascii(''),''); is(email_to_ascii('hans@mü'),''); is(email_to_ascii(''), ''); is(email_to_ascii(undef), undef); is(email_to_ascii('test'), 'test'); is(email_to_unicode(''),''); is(email_to_unicode(''),'hans@mü'); is(email_to_unicode(''),''); is(email_to_unicode(undef), undef); is(email_to_unicode('test'),'test'); Net-IDN-Encode-1.000/t/encode_utf8.t000064400000000000000000000035661135223703100167030ustar00rootroot00000000000000use utf8; use strict; use Test::More tests => 32; use Test::NoWarnings; use Net::IDN::Encode qw(:all); is(to_ascii('mueller'),'mueller'); is(to_ascii('xn--mller-kva'),'xn--mller-kva'); is(to_ascii('müller'),'xn--mller-kva'); is(to_ascii('中央大学'),'xn--fiq80yua78t'); is(to_unicode('mueller'),'mueller'); is(to_unicode('xn--mller-kva'),'müller'); is(to_unicode('müller'),'müller'); is(to_unicode('xn--fiq80yua78t'),'中央大学'); is(domain_to_ascii(''),''); is(domain_to_ascii(''),''); is(domain_to_ascii('mü'),''); is(domain_to_ascii('中央大学.tw'),''); is(domain_to_unicode(''),''); is(domain_to_unicode(''),'mü'); is(domain_to_unicode('mü'),'mü'); is(domain_to_unicode(''),'中央大学.tw'); is(email_to_ascii(''),''); is(email_to_ascii(''),''); is(email_to_ascii('hans@mü'),''); is(email_to_ascii('test@中央大学.tw'),''); is(email_to_ascii(''), ''); is(email_to_ascii(undef), undef); is(email_to_ascii('test'), 'test'); is(email_to_unicode(''),''); is(email_to_unicode('hansï¼'),''); is(email_to_unicode(''),'hans@mü'); is(email_to_unicode('hansï¼'),'hans@mü'); is(email_to_unicode(''),'test@中央大学.tw'); is(email_to_unicode(''),''); is(email_to_unicode(undef), undef); is(email_to_unicode('test'),'test'); Net-IDN-Encode-1.000/t/punycode_vec-pp.t000064400000000000000000000140231135223703100175660ustar00rootroot00000000000000use strict; use utf8; use Test::More; use Test::NoWarnings; use Net::IDN::Punycode::PP ':all'; our @idna = ( ["Arabic (Egyptian)", "\x{0644}\x{064A}\x{0647}\x{0645}\x{0627}\x{0628}\x{062A}\x{0643}". "\x{0644}\x{0645}\x{0648}\x{0634}\x{0639}\x{0631}\x{0628}\x{064A}\x{061F}", "egbpdaj6bu4bxfgehfvwxn", 0, 0, 1, 1 ], ["Chinese (simplified)", "\x{4ED6}\x{4EEC}\x{4E3A}\x{4EC0}\x{4E48}\x{4E0D}\x{8BF4}\x{4E2D}". "\x{6587}", "ihqwcrb4cv8a8dqg056pqjye", 0, 0, 1, 1 ], ["Chinese (traditional)", "\x{4ED6}\x{5011}\x{7232}\x{4EC0}\x{9EBD}\x{4E0D}\x{8AAA}\x{4E2D}". "\x{6587}", "ihqwctvzc91f659drss3x8bo0yb", 0, 0, 1, 1 ], ["Czech", "\x{0050}\x{0072}\x{006F}\x{010D}\x{0070}\x{0072}\x{006F}\x{0073}". "\x{0074}\x{011B}\x{006E}\x{0065}\x{006D}\x{006C}\x{0075}\x{0076}\x{00ED}". "\x{010D}\x{0065}\x{0073}\x{006B}\x{0079}", "Proprostnemluvesky-uyb24dma41a", 0, 0, 1, 1 ], ["Hebrew", "\x{05DC}\x{05DE}\x{05D4}\x{05D4}\x{05DD}\x{05E4}\x{05E9}\x{05D5}". "\x{05D8}\x{05DC}\x{05D0}\x{05DE}\x{05D3}\x{05D1}\x{05E8}\x{05D9}\x{05DD}". "\x{05E2}\x{05D1}\x{05E8}\x{05D9}\x{05EA}", "4dbcagdahymbxekheh6e0a7fei0b", 0, 0, 1, 1 ], ["Hindi (Devanagari)", "\x{092F}\x{0939}\x{0932}\x{094B}\x{0917}\x{0939}\x{093F}\x{0928}". "\x{094D}\x{0926}\x{0940}\x{0915}\x{094D}\x{092F}\x{094B}\x{0902}\x{0928}". "\x{0939}\x{0940}\x{0902}\x{092C}\x{094B}\x{0932}\x{0938}\x{0915}\x{0924}". "\x{0947}\x{0939}\x{0948}\x{0902}", "i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd", 0, 0, 1 ], ["Japanese (kanji and hiragana)", "\x{306A}\x{305C}\x{307F}\x{3093}\x{306A}\x{65E5}\x{672C}\x{8A9E}". "\x{3092}\x{8A71}\x{3057}\x{3066}\x{304F}\x{308C}\x{306A}\x{3044}\x{306E}". "\x{304B}", "n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa", 0, 0, 1 ], ["Russian (Cyrillic)", "\x{043F}\x{043E}\x{0447}\x{0435}\x{043C}\x{0443}\x{0436}\x{0435}". "\x{043E}\x{043D}\x{0438}\x{043D}\x{0435}\x{0433}\x{043E}\x{0432}\x{043E}". "\x{0440}\x{044F}\x{0442}\x{043F}\x{043E}\x{0440}\x{0443}\x{0441}\x{0441}". "\x{043A}\x{0438}", "b1abfaaepdrnnbgefbadotcwatmq2g4l", 0, 0, 1, 1 ], ["Spanish", "\x{0050}\x{006F}\x{0072}\x{0071}\x{0075}\x{00E9}\x{006E}\x{006F}". "\x{0070}\x{0075}\x{0065}\x{0064}\x{0065}\x{006E}\x{0073}\x{0069}\x{006D}". "\x{0070}\x{006C}\x{0065}\x{006D}\x{0065}\x{006E}\x{0074}\x{0065}\x{0068}". "\x{0061}\x{0062}\x{006C}\x{0061}\x{0072}\x{0065}\x{006E}\x{0045}\x{0073}". "\x{0070}\x{0061}\x{00F1}\x{006F}\x{006C}", "PorqunopuedensimplementehablarenEspaol-fmd56a", 0, 0, 1 ], ["Vietnamese", "\x{0054}\x{1EA1}\x{0069}\x{0073}\x{0061}\x{006F}\x{0068}\x{1ECD}". "\x{006B}\x{0068}\x{00F4}\x{006E}\x{0067}\x{0074}\x{0068}\x{1EC3}\x{0063}". "\x{0068}\x{1EC9}\x{006E}\x{00F3}\x{0069}\x{0074}\x{0069}\x{1EBF}\x{006E}". "\x{0067}\x{0056}\x{0069}\x{1EC7}\x{0074}", "TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g", 0, 0, 1 ], ["Japanese", "\x{0033}\x{5E74}\x{0042}\x{7D44}\x{91D1}\x{516B}\x{5148}\x{751F}", "3B-ww4c5e180e575a65lsy2b", 0, 0, 1, 1 ], ["Japanese", "\x{5B89}\x{5BA4}\x{5948}\x{7F8E}\x{6075}\x{002D}\x{0077}\x{0069}". "\x{0074}\x{0068}\x{002D}\x{0053}\x{0055}\x{0050}\x{0045}\x{0052}\x{002D}". "\x{004D}\x{004F}\x{004E}\x{004B}\x{0045}\x{0059}\x{0053}", "-with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n", 0, 0, 1 ], ["Japanese", "\x{0048}\x{0065}\x{006C}\x{006C}\x{006F}\x{002D}\x{0041}\x{006E}". "\x{006F}\x{0074}\x{0068}\x{0065}\x{0072}\x{002D}\x{0057}\x{0061}\x{0079}". "\x{002D}\x{305D}\x{308C}\x{305E}\x{308C}\x{306E}\x{5834}\x{6240}", "Hello-Another-Way--fc4qua05auwb3674vfr0b", 0, 0, 1 ], ["Japanese", "\x{3072}\x{3068}\x{3064}\x{5C4B}\x{6839}\x{306E}\x{4E0B}\x{0032}", "2-u9tlzr9756bt3uc0v", 0, 0, 1, 1 ], ["Japanese", "\x{004D}\x{0061}\x{006A}\x{0069}\x{3067}\x{004B}\x{006F}\x{0069}". "\x{3059}\x{308B}\x{0035}\x{79D2}\x{524D}", "MajiKoi5-783gue6qz075azm5e", 0, 0, 1, 1 ], ["Japanese", "\x{30D1}\x{30D5}\x{30A3}\x{30FC}\x{0064}\x{0065}\x{30EB}\x{30F3}". "\x{30D0}", "de-jg4avhby1noc0d", 0, 0, 1, 1 ], ["Japanese", "\x{305D}\x{306E}\x{30B9}\x{30D4}\x{30FC}\x{30C9}\x{3067}", "d9juau41awczczp", 0, 0, 1, 1 ], ["Greek", "\x{03b5}\x{03bb}\x{03bb}\x{03b7}\x{03bd}\x{03b9}\x{03ba}\x{03ac}", "hxargifdar", 0, 0, 1, 1 ], ["Maltese (Malti)", "\x{0062}\x{006f}\x{006e}\x{0121}\x{0075}\x{0073}\x{0061}\x{0127}". "\x{0127}\x{0061}", "bonusaa-5bb1da", 0, 0, 1, 1 ], ["Russian (Cyrillic)", "\x{043f}\x{043e}\x{0447}\x{0435}\x{043c}\x{0443}\x{0436}\x{0435}". "\x{043e}\x{043d}\x{0438}\x{043d}\x{0435}\x{0433}\x{043e}\x{0432}\x{043e}". "\x{0440}\x{044f}\x{0442}\x{043f}\x{043e}\x{0440}\x{0443}\x{0441}\x{0441}". "\x{043a}\x{0438}", "b1abfaaepdrnnbgefbadotcwatmq2g4l", 0, 0, 1, 1 ], ); plan tests => ($#idna+1)*2 + 1; foreach my $test (@idna) { my ($comment,$in,$out,$allowunassigned,$usestd3asciirules,$toascii,$tounicode) = @{$test}; is(encode_punycode($in), $out, $comment.' (encode_punycode)'); is(decode_punycode($out), $in, $comment.' (decode_punycode)'); } # Test vectors extracted from: # # Nameprep and IDNA Test Vectors # draft-josefsson-idn-test-vectors # # Copyright (C) The Internet Society (2003). All Rights Reserved. # # This document and translations of it may be copied and furnished # to others, and derivative works that comment on or otherwise # explain it or assist in its implementation may be prepared, # copied, published and distributed, in whole or in part, without # restriction of any kind, provided that the above copyright # notice and this paragraph are included on all such copies and # derivative works. However, this document itself may not be # modified in any way, such as by removing the copyright notice or # references to the Internet Society or other Internet # organizations, except as needed for the purpose of developing # Internet standards in which case the procedures for copyrights # defined in the Internet Standards process must be followed, or # as required to translate it into languages other than English. Net-IDN-Encode-1.000/t/punycode_vec.t000064400000000000000000000140171135223703100171540ustar00rootroot00000000000000use strict; use utf8; use Test::More; use Test::NoWarnings; use Net::IDN::Punycode ':all'; our @idna = ( ["Arabic (Egyptian)", "\x{0644}\x{064A}\x{0647}\x{0645}\x{0627}\x{0628}\x{062A}\x{0643}". "\x{0644}\x{0645}\x{0648}\x{0634}\x{0639}\x{0631}\x{0628}\x{064A}\x{061F}", "egbpdaj6bu4bxfgehfvwxn", 0, 0, 1, 1 ], ["Chinese (simplified)", "\x{4ED6}\x{4EEC}\x{4E3A}\x{4EC0}\x{4E48}\x{4E0D}\x{8BF4}\x{4E2D}". "\x{6587}", "ihqwcrb4cv8a8dqg056pqjye", 0, 0, 1, 1 ], ["Chinese (traditional)", "\x{4ED6}\x{5011}\x{7232}\x{4EC0}\x{9EBD}\x{4E0D}\x{8AAA}\x{4E2D}". "\x{6587}", "ihqwctvzc91f659drss3x8bo0yb", 0, 0, 1, 1 ], ["Czech", "\x{0050}\x{0072}\x{006F}\x{010D}\x{0070}\x{0072}\x{006F}\x{0073}". "\x{0074}\x{011B}\x{006E}\x{0065}\x{006D}\x{006C}\x{0075}\x{0076}\x{00ED}". "\x{010D}\x{0065}\x{0073}\x{006B}\x{0079}", "Proprostnemluvesky-uyb24dma41a", 0, 0, 1, 1 ], ["Hebrew", "\x{05DC}\x{05DE}\x{05D4}\x{05D4}\x{05DD}\x{05E4}\x{05E9}\x{05D5}". "\x{05D8}\x{05DC}\x{05D0}\x{05DE}\x{05D3}\x{05D1}\x{05E8}\x{05D9}\x{05DD}". "\x{05E2}\x{05D1}\x{05E8}\x{05D9}\x{05EA}", "4dbcagdahymbxekheh6e0a7fei0b", 0, 0, 1, 1 ], ["Hindi (Devanagari)", "\x{092F}\x{0939}\x{0932}\x{094B}\x{0917}\x{0939}\x{093F}\x{0928}". "\x{094D}\x{0926}\x{0940}\x{0915}\x{094D}\x{092F}\x{094B}\x{0902}\x{0928}". "\x{0939}\x{0940}\x{0902}\x{092C}\x{094B}\x{0932}\x{0938}\x{0915}\x{0924}". "\x{0947}\x{0939}\x{0948}\x{0902}", "i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd", 0, 0, 1 ], ["Japanese (kanji and hiragana)", "\x{306A}\x{305C}\x{307F}\x{3093}\x{306A}\x{65E5}\x{672C}\x{8A9E}". "\x{3092}\x{8A71}\x{3057}\x{3066}\x{304F}\x{308C}\x{306A}\x{3044}\x{306E}". "\x{304B}", "n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa", 0, 0, 1 ], ["Russian (Cyrillic)", "\x{043F}\x{043E}\x{0447}\x{0435}\x{043C}\x{0443}\x{0436}\x{0435}". "\x{043E}\x{043D}\x{0438}\x{043D}\x{0435}\x{0433}\x{043E}\x{0432}\x{043E}". "\x{0440}\x{044F}\x{0442}\x{043F}\x{043E}\x{0440}\x{0443}\x{0441}\x{0441}". "\x{043A}\x{0438}", "b1abfaaepdrnnbgefbadotcwatmq2g4l", 0, 0, 1, 1 ], ["Spanish", "\x{0050}\x{006F}\x{0072}\x{0071}\x{0075}\x{00E9}\x{006E}\x{006F}". "\x{0070}\x{0075}\x{0065}\x{0064}\x{0065}\x{006E}\x{0073}\x{0069}\x{006D}". "\x{0070}\x{006C}\x{0065}\x{006D}\x{0065}\x{006E}\x{0074}\x{0065}\x{0068}". "\x{0061}\x{0062}\x{006C}\x{0061}\x{0072}\x{0065}\x{006E}\x{0045}\x{0073}". "\x{0070}\x{0061}\x{00F1}\x{006F}\x{006C}", "PorqunopuedensimplementehablarenEspaol-fmd56a", 0, 0, 1 ], ["Vietnamese", "\x{0054}\x{1EA1}\x{0069}\x{0073}\x{0061}\x{006F}\x{0068}\x{1ECD}". "\x{006B}\x{0068}\x{00F4}\x{006E}\x{0067}\x{0074}\x{0068}\x{1EC3}\x{0063}". "\x{0068}\x{1EC9}\x{006E}\x{00F3}\x{0069}\x{0074}\x{0069}\x{1EBF}\x{006E}". "\x{0067}\x{0056}\x{0069}\x{1EC7}\x{0074}", "TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g", 0, 0, 1 ], ["Japanese", "\x{0033}\x{5E74}\x{0042}\x{7D44}\x{91D1}\x{516B}\x{5148}\x{751F}", "3B-ww4c5e180e575a65lsy2b", 0, 0, 1, 1 ], ["Japanese", "\x{5B89}\x{5BA4}\x{5948}\x{7F8E}\x{6075}\x{002D}\x{0077}\x{0069}". "\x{0074}\x{0068}\x{002D}\x{0053}\x{0055}\x{0050}\x{0045}\x{0052}\x{002D}". "\x{004D}\x{004F}\x{004E}\x{004B}\x{0045}\x{0059}\x{0053}", "-with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n", 0, 0, 1 ], ["Japanese", "\x{0048}\x{0065}\x{006C}\x{006C}\x{006F}\x{002D}\x{0041}\x{006E}". "\x{006F}\x{0074}\x{0068}\x{0065}\x{0072}\x{002D}\x{0057}\x{0061}\x{0079}". "\x{002D}\x{305D}\x{308C}\x{305E}\x{308C}\x{306E}\x{5834}\x{6240}", "Hello-Another-Way--fc4qua05auwb3674vfr0b", 0, 0, 1 ], ["Japanese", "\x{3072}\x{3068}\x{3064}\x{5C4B}\x{6839}\x{306E}\x{4E0B}\x{0032}", "2-u9tlzr9756bt3uc0v", 0, 0, 1, 1 ], ["Japanese", "\x{004D}\x{0061}\x{006A}\x{0069}\x{3067}\x{004B}\x{006F}\x{0069}". "\x{3059}\x{308B}\x{0035}\x{79D2}\x{524D}", "MajiKoi5-783gue6qz075azm5e", 0, 0, 1, 1 ], ["Japanese", "\x{30D1}\x{30D5}\x{30A3}\x{30FC}\x{0064}\x{0065}\x{30EB}\x{30F3}". "\x{30D0}", "de-jg4avhby1noc0d", 0, 0, 1, 1 ], ["Japanese", "\x{305D}\x{306E}\x{30B9}\x{30D4}\x{30FC}\x{30C9}\x{3067}", "d9juau41awczczp", 0, 0, 1, 1 ], ["Greek", "\x{03b5}\x{03bb}\x{03bb}\x{03b7}\x{03bd}\x{03b9}\x{03ba}\x{03ac}", "hxargifdar", 0, 0, 1, 1 ], ["Maltese (Malti)", "\x{0062}\x{006f}\x{006e}\x{0121}\x{0075}\x{0073}\x{0061}\x{0127}". "\x{0127}\x{0061}", "bonusaa-5bb1da", 0, 0, 1, 1 ], ["Russian (Cyrillic)", "\x{043f}\x{043e}\x{0447}\x{0435}\x{043c}\x{0443}\x{0436}\x{0435}". "\x{043e}\x{043d}\x{0438}\x{043d}\x{0435}\x{0433}\x{043e}\x{0432}\x{043e}". "\x{0440}\x{044f}\x{0442}\x{043f}\x{043e}\x{0440}\x{0443}\x{0441}\x{0441}". "\x{043a}\x{0438}", "b1abfaaepdrnnbgefbadotcwatmq2g4l", 0, 0, 1, 1 ], ); plan tests => ($#idna+1)*2 + 1; foreach my $test (@idna) { my ($comment,$in,$out,$allowunassigned,$usestd3asciirules,$toascii,$tounicode) = @{$test}; is(encode_punycode($in), $out, $comment.' (encode_punycode)'); is(decode_punycode($out), $in, $comment.' (decode_punycode)'); } # Test vectors extracted from: # # Nameprep and IDNA Test Vectors # draft-josefsson-idn-test-vectors # # Copyright (C) The Internet Society (2003). All Rights Reserved. # # This document and translations of it may be copied and furnished # to others, and derivative works that comment on or otherwise # explain it or assist in its implementation may be prepared, # copied, published and distributed, in whole or in part, without # restriction of any kind, provided that the above copyright # notice and this paragraph are included on all such copies and # derivative works. However, this document itself may not be # modified in any way, such as by removing the copyright notice or # references to the Internet Society or other Internet # organizations, except as needed for the purpose of developing # Internet standards in which case the procedures for copyrights # defined in the Internet Standards process must be followed, or # as required to translate it into languages other than English.