Encode-Escape-0.14/000075500000000000000000000000001112142532100140175ustar00rootroot00000000000000Encode-Escape-0.14/Build.PL000064400000000000000000000006661112142532100153230ustar00rootroot00000000000000use Module::Build; use strict; use warnings; my $build = Module::Build->new ( module_name => 'Encode::Escape', license => 'perl', dist_author => 'You Hyun Jo', dist_version_from => 'lib/Encode/Escape.pm', create_readme => 1, requires => { 'perl' => 5.008, }, build_requires => { 'Test::More' => 0, }, add_to_cleanup => [ 'Encode-Escape-*' ], ); $build->create_build_script(); Encode-Escape-0.14/Changes000064400000000000000000000010241112142532100153070ustar00rootroot00000000000000Revision history for Perl extension Encode::Escape. 0.14 2007/12/05 13:28:03 you - second upload to CPAN. - Doucmentation - Add more test 0.10 2007/12/05 06:20:08 you - first upload to CPAN. Encode::Escape::ASCII - implemented basic functions. Encode::Escape::Unicode - implemented basic functions - implemented mode system - python(or java) mode enable to use "\uxxxx" unicode escape seqs. 0.01 Sun Dec 2 21:45:59 2007 - original version; created by h2xs 1.23 with options -X -n Encode::Escape Encode-Escape-0.14/INSTALL000064400000000000000000000013641112142532100150540ustar00rootroot00000000000000Installation instructions for Encode::Escape To install this module type the following: perl Build.PL ./Build ./Build test ./Build install Or, if you're on a platform (like DOS or Windows) that doesn't require the "./" notation: perl Build.PL Build Build test Build install If you want use "make": perl Makefile.PL make make test make install DEPENDENCIES This module requires these other modules and libraries: Encode, Encode::Encoding COPYRIGHT AND LICENCE Copyright (C) 2007 by You Hyun Jo This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself, either Perl version 5.8.8 or, at your option, any later version of Perl 5 you may have available. Encode-Escape-0.14/MANIFEST000064400000000000000000000003171112142532100151510ustar00rootroot00000000000000Changes Makefile.PL MANIFEST README Build.PL INSTALL t/Encode-Escape.t t/Encode-Escape-ASCII.t t/Encode-Escape-Unicode.t lib/Encode/Escape.pm lib/Encode/Escape/ASCII.pm lib/Encode/Escape/Unicode.pm META.yml Encode-Escape-0.14/META.yml000064400000000000000000000011621112142532100152700ustar00rootroot00000000000000--- name: Encode-Escape version: 0.14 author: - You Hyun Jo abstract: Perl extension for Encodings of various escape sequences license: perl requires: perl: 5.008 build_requires: Test::More: 0 generated_by: Module::Build version 0.280801 meta-spec: url: http://module-build.sourceforge.net/META-spec-v1.2.html version: 1.2 provides: Encode::Escape: file: lib/Encode/Escape.pm version: 0.14 Encode::Escape::ASCII: file: lib/Encode/Escape/ASCII.pm version: 0.19 Encode::Escape::Unicode: file: lib/Encode/Escape/Unicode.pm version: 0.13 resources: license: http://dev.perl.org/licenses/ Encode-Escape-0.14/Makefile.PL000064400000000000000000000010441112142532100157700ustar00rootroot00000000000000use 5.008008; use ExtUtils::MakeMaker; # See lib/ExtUtils/MakeMaker.pm for details of how to influence # the contents of the Makefile that is written. WriteMakefile( NAME => 'Encode::Escape', VERSION_FROM => 'lib/Encode/Escape.pm', # finds $VERSION PREREQ_PM => {}, # e.g., Module::Name => 1.1 ($] >= 5.005 ? ## Add these new keywords supported since 5.005 (ABSTRACT_FROM => 'lib/Encode/Escape.pm', # retrieve abstract from module AUTHOR => 'you ') : ()), ); Encode-Escape-0.14/README000064400000000000000000000151151112142532100147020ustar00rootroot00000000000000NAME Encode::Escape - Perl extension for Encodings of various escape sequences SYNOPSIS use Encode::Escape; $escaped_ascii = "Perl\\tPathologically Eclectic Rubbish Lister\\n"; $ascii = decode 'ascii-escape', $escaped_ascii; # Now, $ascii is equivalent to # double quote string "Perl\tPathologically Eclectic Rubbish Lister\n" $escaped_unicode = "Perl \\x{041F}\\x{0435}\\x{0440}\\x{043B} \\x{D384}" $string = decode 'unicode-escape', $escaped_unicode; # Now, $string is equvialent to # double quote string "Perl \x{041F}\x{0435}\x{0440}\x{043B} \x{D384}" It may looks non-sense. Here's another case. If you have a text data file 'ascii-escape.txt'. It contains a line: Perl\tPathologically Eclectic Rubbish Lister\n And you want to use it as if it were a normal double quote string in source code. Try this: open(FILE, 'ascii-escape.txt'); while() { chomp; print decode 'ascii-escape', $_; } DESCRIPTION Encode::Escape module is a wrapper class for encodings of escape sequences. It is NOT for an escape-based encoding (eg. ISO-2022-JP). It is for encoding/decoding escape sequences, generally used in source codes. Many programming languages, markup languages, and typesetting languages provide methods for encoding special (or functional) characters and non-keyboard symbols in the form of escape sequence. That's what I concern. Yes, you're right. There already exist many modules. See String::Escape, Unicode::Escape, TeX::Encode, HTML::Mason::Escape, Template::Plugin::XML::Escape, URI::Escape, etc. But for some reason I need to do it in a different way. There is more than one way to do it! After that, I asked myself if this module is useful. May be not except for me. At this time, Zhuangzi reminds me, ""The useless has its use"". ASCII See Encode::Escape::ASCII Unicode See Encode::Escape::Unicode ESCAPE SEQUENCES Character Escape Codes ASCII defines 128 characters: 33 non-printing control characters (0x00 -- 0x1f, 0x7f) and 95 printable characters (0x20 -- 0x7e). Character Escape Codes in C programming language provide a method to express control characters, using only printable ones. These are accepted by Perl and many other languages. CEC HEX Description --- ---- -------------- \0 00 Null character \a 07 Bell \b 08 Backspace \t 09 Horizontal Tab \n 0a Line feed \v 0b Vertical Tab \f 0c Form feed \r 0d Carriage return Programming languages provide escape sequences for printable characters, which have significant meaning in that language. Otherwise, it would be harder to print them literally. ESC HEX Description --- ---- --------------- \" 22 double quote \\ 52 backslash Refer to ASCII, Escape character, Escape sequence at , for more details. Perl Escape Sequences Perl use backslash as an escape character. These work in normal strings and regular expressions except \b. ESC Description --- -------------------------- \a Alarm (beep) \b Backspace \e Escape \f Formfeed \n Newline \r Carriage return \t Tab \037 Any octal ASCII value \x7f Any hexadecimal ASCII value \x{263a} A wide hexadecimal value \cx Control-x \N{name} name is a name for the Unicode character (use charnames) The following escape sequences are available in constructs that interpolate. \l Lowercase next character \u Titlecase next character \L Lowercase until \E \U Uppercase until \E \E End case modification In regular expresssions: \b An assetion, not backspace, except in a character class \Q Disable pattern metacharacters until \E Unlike C and other languages, Perl has no \v escape sequence for the vertical tab (VT - ASCII 11). For constructs that do interpolate, variable begining with "$" or "@" are interpolated. \$ Dollar Sign \@ Ampersand \" Print double quotes \ Escape next character if know otherwise print See perlreref, perlop Python Escape Sequences \newline Ignored \\ Backslash (\) \' Single quote (') \" Double quote (") \a ASCII Bell (BEL) \b ASCII Backspace (BA) \f ASCII Formfeed (FF) \n ASCII Linefeed (LF) \N{name} Character named 'name' in the Unicode database \r ASCII Carriage Return (CR) \t ASCII Horizontal Tab (TAB) \uxxxx Character with 16-bit hex value xxxx (Unicode only) \Uxxxxxxxx Character with 32-bit hex value xxxxxxxx (Unicode only) \v ASCII Vertical Tab (VT) \ooo Character with octal value ooo \xhh Character with hex value hh See Unicode escape sequences in the form of "\u"*xxxx* are used in Java, Python, C#, JavaScript. Unicode::Escape module implements it. LaTeX Escapes TeX::Encode implements encodings of LaTeX escapes. It converts (encodes) utf8 string to LaTeX escapes. HTML Escapes See HTML::Mason::Escapes SEE ALSO The useless has its use Hui Tzu said to Chuang Tzu, "Your words are useless!" Chuang Tzu said, "A man has to understand the useless before you can talk to him about the useful. The earth is certainly vast and broad, though a man uses no more of it than the area he puts his feet on. If, however, you were to dig away all the earth from around his feet until you reached the Yellow Springs, then would the man still be able to make use of it?" "No, it would be useless," said Hui Tzu. "It is obvious, then," said Chuang Tzu, "that the useless has its use." AUTHOR You Hyun Jo, ACKNOWLEDGEMENTS Matthew Simon Cavlletto for String::Escape. It worked as good reference when writing the first working version of Encode::Escape::ASCII. COPYRIGHT AND LICENSE Copyright (C) 2007 by You Hyun Jo This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself, either Perl version 5.8.8 or, at your option, any later version of Perl 5 you may have available. Encode-Escape-0.14/lib/000075500000000000000000000000001112142532100145655ustar00rootroot00000000000000Encode-Escape-0.14/lib/Encode/000075500000000000000000000000001112142532100157625ustar00rootroot00000000000000Encode-Escape-0.14/lib/Encode/Escape.pm000064400000000000000000000167601112142532100175320ustar00rootroot00000000000000# Encodings based on Escape Sequence # $Id: Escape.pm,v 1.14 2007-12-05 22:08:33+09 you Exp $ package Encode::Escape; use 5.008008; use strict; use warnings; our $VERSION = do { q$Revision: 1.14 $ =~ /\d+\.(\d+)/; sprintf "%.2f", $1 / 100 }; sub import { if ( defined $_[1] and $_[1] eq ':modes') { require Exporter; our @ISA = qw(Exporter); our @EXPORT_OK = qw(enmode demode); __PACKAGE__->export_to_level(1, $_[0], 'enmode', 'demode'); splice @_, 1, 1; } require Encode; Encode->export_to_level(1, @_); } use Encode::Escape::ASCII; use Encode::Escape::Unicode; sub enmode ($@) { my $enc = shift; my $obj = Encode::find_encoding($enc); unless (defined $obj) { require Carp; Carp::croak("Unknown encoding '$enc'"); } $obj->enmode(@_); } sub demode ($@) { my $enc = shift; my $obj = Encode::find_encoding($enc); unless (defined $obj) { require Carp; Carp::croak("Unknown encoding '$enc'"); } $obj->demode(@_); } 1; __END__ =head1 NAME Encode::Escape - Perl extension for Encodings of various escape sequences =head1 SYNOPSIS use Encode::Escape; $escaped_ascii = "Perl\\tPathologically Eclectic Rubbish Lister\\n"; $ascii = decode 'ascii-escape', $escaped_ascii; # Now, $ascii is equivalent to # double quote string "Perl\tPathologically Eclectic Rubbish Lister\n" $escaped_unicode = "Perl \\x{041F}\\x{0435}\\x{0440}\\x{043B} \\x{D384}" $string = decode 'unicode-escape', $escaped_unicode; # Now, $string is equvialent to # double quote string "Perl \x{041F}\x{0435}\x{0440}\x{043B} \x{D384}" It may looks non-sense. Here's another case. If you have a text data file 'ascii-escape.txt'. It contains a line: Perl\tPathologically Eclectic Rubbish Lister\n And you want to use it as if it were a normal double quote string in source code. Try this: open(FILE, 'ascii-escape.txt'); while() { chomp; print decode 'ascii-escape', $_; } =head1 DESCRIPTION L module is a wrapper class for encodings of escape sequences. It is NOT for an escape-based encoding (eg. ISO-2022-JP). It is for encoding/decoding escape sequences, generally used in source codes. Many programming languages, markup languages, and typesetting languages provide methods for encoding special (or functional) characters and non-keyboard symbols in the form of escape sequence. That's what I concern. Yes, you're right. There already exist many modules. See L, L, L, L, L, L, etc. But for some reason I need to do it in a different way. There is more than one way to do it! After that, I asked myself if this module is useful. May be not except for me. At this time, Zhuangzi reminds me, "L". =head2 ASCII See L =head2 Unicode See L =head1 ESCAPE SEQUENCES =head2 Character Escape Codes ASCII defines 128 characters: 33 non-printing control characters (0x00 -- 0x1f, 0x7f) and 95 printable characters (0x20 -- 0x7e). Character Escape Codes in C programming language provide a method to express control characters, using only printable ones. These are accepted by Perl and many other languages. CEC HEX Description --- ---- -------------- \0 00 Null character \a 07 Bell \b 08 Backspace \t 09 Horizontal Tab \n 0a Line feed \v 0b Vertical Tab \f 0c Form feed \r 0d Carriage return Programming languages provide escape sequences for printable characters, which have significant meaning in that language. Otherwise, it would be harder to print them literally. ESC HEX Description --- ---- --------------- \" 22 double quote \\ 52 backslash Refer to ASCII, Escape character, Escape sequence at , for more details. =head2 Perl Escape Sequences Perl use backslash as an escape character. These work in normal strings and regular expressions except \b. ESC Description --- -------------------------- \a Alarm (beep) \b Backspace \e Escape \f Formfeed \n Newline \r Carriage return \t Tab \037 Any octal ASCII value \x7f Any hexadecimal ASCII value \x{263a} A wide hexadecimal value \cx Control-x \N{name} name is a name for the Unicode character (use charnames) The following escape sequences are available in constructs that interpolate. \l Lowercase next character \u Titlecase next character \L Lowercase until \E \U Uppercase until \E \E End case modification In regular expresssions: \b An assetion, not backspace, except in a character class \Q Disable pattern metacharacters until \E Unlike C and other languages, Perl has no \v escape sequence for the vertical tab (VT - ASCII 11). For constructs that do interpolate, variable begining with "$" or "@" are interpolated. \$ Dollar Sign \@ Ampersand \" Print double quotes \ Escape next character if know otherwise print See L, L =head2 Python Escape Sequences \newline Ignored \\ Backslash (\) \' Single quote (') \" Double quote (") \a ASCII Bell (BEL) \b ASCII Backspace (BA) \f ASCII Formfeed (FF) \n ASCII Linefeed (LF) \N{name} Character named 'name' in the Unicode database \r ASCII Carriage Return (CR) \t ASCII Horizontal Tab (TAB) \uxxxx Character with 16-bit hex value xxxx (Unicode only) \Uxxxxxxxx Character with 32-bit hex value xxxxxxxx (Unicode only) \v ASCII Vertical Tab (VT) \ooo Character with octal value ooo \xhh Character with hex value hh See L Unicode escape sequences in the form of C<\u>I are used in Java, Python, C#, JavaScript. L module implements it. =head2 LaTeX Escapes L implements encodings of LaTeX escapes. It converts (encodes) utf8 string to LaTeX escapes. =head2 HTML Escapes See L =head1 SEE ALSO =head1 The useless has its use Hui Tzu said to Chuang Tzu, "Your words are useless!" Chuang Tzu said, "A man has to understand the useless before you can talk to him about the useful. The earth is certainly vast and broad, though a man uses no more of it than the area he puts his feet on. If, however, you were to dig away all the earth from around his feet until you reached the Yellow Springs, then would the man still be able to make use of it?" "No, it would be useless," said Hui Tzu. "It is obvious, then," said Chuang Tzu, "that the useless has its use." =begin html --- from External Things, Chuang Tzu translated by Burton Watson =end html =head1 AUTHOR You Hyun Jo, Eyou at cpan dot orgE =head1 ACKNOWLEDGEMENTS Matthew Simon Cavlletto for L. It worked as good reference when writing the first working version of L. =head1 COPYRIGHT AND LICENSE Copyright (C) 2007 by You Hyun Jo This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself, either Perl version 5.8.8 or, at your option, any later version of Perl 5 you may have available. =cut #set ts=4 sts=4 sw=4 et Encode-Escape-0.14/lib/Encode/Escape/000075500000000000000000000000001112142532100171625ustar00rootroot00000000000000Encode-Escape-0.14/lib/Encode/Escape/ASCII.pm000064400000000000000000000122111112142532100203450ustar00rootroot00000000000000# Encoding of ASCII Escape Sequences (or Escaped ASCII) # $Id: ASCII.pm,v 1.19 2007-12-05 22:11:11+09 you Exp $ package Encode::Escape::ASCII; our $VERSION = do { q$Revision: 1.19 $ =~ /\d+\.(\d+)/; sprintf "%.2f", $1 / 100 }; use 5.008008; use strict; use warnings; use Encode::Encoding; use base qw(Encode::Encoding); __PACKAGE__->Define(qw/ascii-escape ascii_escape/); sub import { require Encode; Encode->export_to_level(1, @_); } sub enmode ($$) { my ($class, $mode) = @_; } sub demode ($$) { my ($class, $mode) = @_; } sub encode($$;$) { my ($obj, $str, $chk) = @_; my $escaped = escape($str); return $escaped; } sub decode($$;$) { my ($obj, $str, $chk) = @_; my $unescaped = unescape($str); return $unescaped; } my %ESCAPED = ( "\\" => '\\', "\r" => 'r', "\n" => 'n', "\t" => 't', "\a" => 'a', "\b" => 'b', "\e" => 'e', "\f" => 'f', "\"" => '"', "\$" => '$', "\@" => '@', ); my %UNESCAPED = ( reverse %ESCAPED ); sub chr2hex { my($c) = @_; if ( ord($c) < 128 ) { return sprintf("\\x%02x", ord($c)); } else { require Carp; Carp::croak ( "'ascii-escape' codec can't encode character: ordinal " . ord($c) ); } } sub escape ($) { local $_ = ( defined $_[0] ? $_[0] : '' ); s/([\a\b\e\f\r\n\t\"\\\$\@])/\\$ESCAPED{$1}/sg; s/([\x00-\x1f\x7f-\xff])/chr2hex($1)/gse; return $_; } sub hex2chr { my($hex) = @_; if ( hex($hex) >= 0 and hex($hex) < 128) { return chr(hex($hex)); } else { require Carp; Carp::croak( "'ascii-escape' codec can't decode escape sequence: " . "\\x$hex (ordinal " . hex($hex) . ")" ); } } sub oct2chr { my($oct) = @_; if ( oct($oct) >= 0 and oct($oct) < 128 ) { return chr(oct($oct)); } else { require Carp; Carp::croak ( "'ascii-escape' codec can't decode escape sequence: " . "\\$oct (ordinal " . oct($oct). ")" ); } } # $original_string = unprintable( $special_characters_escaped ); sub unescape ($) { local $_ = ( defined $_[0] ? $_[0] : '' ); s/((?:\A|\G|[^\\]))\\x([\da-fA-F]{1,2})/$1.hex2chr($2)/gse; s/((?:\A|\G|[^\\]))\\x\{([\da-fA-F]{1,4})\}/$1.hex2chr($2)/gse; s/((?:\A|\G|[^\\]))\\([0-7]{1,3})/$1.oct2chr($2)/gse; s/((?:\A|\G|[^\\]))\\([^aAbBeEfFrRnNtT\\\"\$\@])/$1$2/g; s/((?:\A|\G|[^\\]))\\([aAbBeEfFrRnNtT\\\"\$\@])/$1.$UNESCAPED{lc($2)}/gse; return $_; } 1; __END__ =head1 NAME Encode::Escape::ASCII - Perl extension for Encoding of ASCII Escape Sequnces (or Escaped ASCII) =head1 SYNOPSIS use Encode::Escape::ASCII; $escaped = "Perl\\tPathologically Eclectic Rubbish Lister\\n"; $string = decode 'ascii-escape', $escaped; # Now, $string is equivalent to "Perl\tPathologically Eclectic Rubish Lister\n"; $escaped = "\\x65\\x50\\x6c\\x72\\x50\\x09\\x74\\x61\\x6f\\x68\\x6f\\x6c" . "\\x69\\x67\\x61\\x63\\x6c\\x6c\\x20\\x79\\x63\\x45\\x65\\x6c" . "\\x74\\x63\\x63\\x69\\x52\\x20\\x62\\x75\\x73\\x69\\x20\\x68" . "\\x69\\x4c\\x74\\x73\\x72\\x65\\x0a"; $string = decode 'ascii-escape', $escaped; # Now, $string is equivalent to "Perl\tPathologically Eclectic Rubish Lister\n"; If you have a text data file 'ascii-escape.txt'. It contains a line: Perl\tPathologically Eclectic Rubbish Lister\n And you want to use it as if it were a normal double quote string in source code. Try this: open(FILE, 'ascii-escape.txt'); while() { chomp; print decode 'ascii-escape', $_; } =head1 DESCRIPTION L module implements encoding of ASCII escape sequences. Simply saying, it converts (decodes) escape sequences into ASCII chracters (0x00 -- 0x7f) and converts (encodes) non-printable control characters into escape sequences. It supports only ASCII character. ASCII is called as low ASCII or 7-bit ASCII when one distinguishes it from various extended ASCII codes, i.e. high ASCII or 8-bit ASCII. =head2 Supproted Escape Sequences Escape Sequcnes Description --------------- -------------------------- \a Alarm (beep) \b Backspace \e Escape \f Formfeed \n Newline \r Carriage return \t Tab \000 - \177 octal ASCII value. \0, \00, and \000 are equivalent. \x00 - \x7f hexadecimal ASCII value. \x0 and \x00 are equivalent. \x{0000} - \x{007f} hexadecimal ASCII value. \x{0}, \x{00}, x\{000}, \x{0000} \\ Backslash \$ Dollar Sign \@ Ampersand \" Print double quotes \ Escape next character if known otherwise print =head1 AUTHOR you, Eyou at cpan dot orgE =head1 COPYRIGHT AND LICENSE Copyright (C) 2007 by you This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself, either Perl version 5.8.8 or, at your option, any later version of Perl 5 you may have available. =cut # vi: set ts=4 sts=4 sw=4 et Encode-Escape-0.14/lib/Encode/Escape/Unicode.pm000064400000000000000000000206761112142532100211210ustar00rootroot00000000000000# Encoding of Unicode Escape Sequences (or Escaped Unicode) # $Id: Unicode.pm,v 1.13 2007-12-05 22:11:11+09 you Exp $ package Encode::Escape::Unicode; our $VERSION = do { q$Revision: 1.13 $ =~ /\d+\.(\d+)/; sprintf "%.2f", $1 / 100 }; use 5.008008; use strict; use warnings; use Encode::Encoding; use base qw(Encode::Encoding); __PACKAGE__->Define(qw/unicode-escape unicode_escape/); sub import { __PACKAGE__->enmode('default'); __PACKAGE__->demode('default'); require Encode; Encode->export_to_level(1, @_); } our $enmode; our $demode; sub encoder($); sub decoder($); # # == encoder/decoder modes == # our %encoder = ( undef => \&perl_encoder, '' => \&perl_encoder, default => \&perl_encoder, perl => \&perl_encoder, java => \&python_encoder, python => \&python_encoder, csharp => \&python_encoder, ); our %decoder = ( undef => \&perl_decoder, '' => \&perl_decoder, default => \&perl_decoder, perl => \&perl_decoder, java => \&python_decoder, python => \&python_decoder, csharp => \&python_decoder, ); # # == encode/decode == # sub encode($$;$) { my ($obj, $str, $chk) = @_; $_[1] = '' if $chk; return encoder $str; } sub decode($$;$) { my ($obj, $str, $chk) = @_; $_[1] = '' if $chk; return decoder $str; } # # == enmode/demode == # sub enmode ($$) { my ($class, $mode) = @_; $mode = 'undef' unless defined $mode; unless (exists $encoder{$mode}) { require Carp; Carp::croak( "Unknown enmode '$mode' for encoding '" . $class->name() . "'" ); } $enmode = $mode; } sub demode ($$) { my ($class, $mode) = @_; $mode = 'undef' unless defined $mode; unless (exists $decoder{$mode}) { require Carp; Carp::croak( "Unknown demode '$mode' for encoding '" . $class->name() . "'" ); } $demode = $mode; } # # = DATA AND SUBROUTINES FOR INTERNAL USE = # # # == encoder/decoder == # sub encoder($) { local $_ = ( defined $_[0] ? $_[0] : '' ); return $encoder{$enmode}->($_); } sub decoder($) { local $_ = ( defined $_[0] ? $_[0] : '' ); return $decoder{$demode}->($_); } # # == enmode_encoder / demode_decoder == # # default (perl) escape sequences # sub perl_encoder($) { local $_ = ( defined $_[0] ? $_[0] : '' ); $_ = escape($_); s/([\x00-\x1f\x{7f}-\x{ffff}])/"\\x\{".uc(chr2hex($1))."\}"/gse; return $_; } sub perl_decoder($) { local $_ = ( defined $_[0] ? $_[0] : '' ); s/((?:\A|\G|[^\\]))\\x([\da-fA-F]{1,2})/$1.hex2chr($2)/gse; s/((?:\A|\G|[^\\]))\\x\{([\da-fA-F]{1,4})\}/$1.hex2chr($2)/gse; return unescape($_); } # python (or java, c#) escape sequences # sub python_encoder($) { local $_ = ( defined $_[0] ? $_[0] : '' ); $_ = escape($_); s/([\x00-\x1f\x{7f}-\x{ffff}])/'\u'.chr2hex($1)/gse; return $_; } sub python_decoder { local $_ = ( defined $_[0] ? $_[0] : '' ); s/((?:\A|\G|[^\\]))\\u([\da-fA-F]{4})/$1.hex2chr($2)/gse; return unescape($_); } # # == common data and subroutines == # my %ESCAPED = ( "\\" => '\\', "\r" => 'r', "\n" => 'n', "\t" => 't', "\a" => 'a', "\b" => 'b', "\e" => 'e', "\f" => 'f', "\"" => '"', "\$" => '$', "\@" => '@', ); my %UNESCAPED = ( reverse %ESCAPED ); sub escape ($) { local $_ = ( defined $_[0] ? $_[0] : '' ); s/([\a\b\e\f\r\n\t\"\\\$\@])/\\$ESCAPED{$1}/sg; return $_; } sub unescape ($) { local $_ = ( defined $_[0] ? $_[0] : '' ); s/((?:\A|\G|[^\\]))\\([0-7]{1,3})/$1.oct2chr($2)/gse; s/((?:\A|\G|[^\\]))\\([^aAbBeEfFrRnNtT\\\"\$\@])/$1$2/g; s/((?:\A|\G|[^\\]))\\([aAbBeEfFrRnNtT\\\"\$\@])/$1.$UNESCAPED{lc($2)}/gse; return $_; } sub chr2hex { my($c) = @_; if ( ord($c) < 65536 ) { return sprintf("%04x", ord($c)); } else { require Carp; Carp::croak ( "'unicode-escape' codec can't encode character: ordinal " . ord($c) ); } } sub hex2chr { my($hex) = @_; if ( hex($hex) >= 0 and hex($hex) < 65536) { return chr(hex($hex)); } else { require Carp; Carp::croak( "'unicode-escape' codec can't decode escape sequence: " . "\\x$hex (ordinal " . hex($hex) . ")" ); } } sub oct2chr { my($oct) = @_; if ( oct($oct) >= 0 and oct($oct) < 256 ) { return chr(oct($oct)); } else { require Carp; Carp::croak ( "'unicode-escape' codec can't decode escape sequence: " . "\\$oct (ordinal " . oct($oct). ")" ); } } $\ = "\n"; 1; __END__ =head1 NAME Encode::Escape::Unicode - Perl extension for Encoding of Unicode Escape Sequnces =head1 SYNOPSIS use Encode::Escape::Unicode; $escaped = "What is \\x{D384}? It's Perl!"; $string = decode 'unicode-escape', $escaped; # Now, $string is equivalent "What is \x{D384}? It's Perl!" Encode::Escape::Unicode->demode('python'); $python_unicode_escape = "And \\u041f\\u0435\\u0440\\u043b? It's Perl, too."; $string = decode 'unicode-escape', $python_unicode_escape; # Now, $string eq "And \x{041F}\x{0435}\x{0440}\x{043B}? It's Perl, too." If you have a text data file 'unicode-escape.txt'. It contains a line: What is \x{D384}? It's Perl!\n And \x{041F}\x{0435}\x{0440}\x{043B}? It's Perl, too.\n And you want to use it as if it were a normal double quote string in source code. Try this: use Encode::Escape::Unicode; open(FILE, 'unicode-escape.txt'); while() { chomp; print encode 'utf8', decode 'unicode-escape', $_; } =head1 DESCRIPTION L module implements encodings of escape sequences. Simply saying, it converts (decodes) escape sequences into Perl internal string (\x{0000} -- \x{ffff}) and encodes Perl strings to escape sequences. =head2 MODES AND SUPPORTED ESCAPE SEQUENCES =head3 default or perl mode Escape Sequcnes Description --------------- -------------------------- \a Alarm (beep) \b Backspace \e Escape \f Formfeed \n Newline \r Carriage return \t Tab \000 - \377 octal ASCII value. \0, \00, and \000 are equivalent. \x00 - \xff hexadecimal ASCII value. \x0 and \x00 are equivalent. \x{0000} - \x{ffff} hexadecimal ASCII value. \x{0}, \x{00}, x\{000}, \x{0000} \\ Backslash \$ Dollar Sign \@ Ampersand \" Print double quotes \ Escape next character if known otherwise print This is the default mode. You don't need to invoke it since you haven't invoke other mode previously. =head3 python or java mode Python, Java, and C# languages use C<\u>I escape sequence for Unicode character. Escape Sequcnes Description --------------- -------------------------- \a Alarm (beep) \b Backspace \e Escape \f Formfeed \n Newline \r Carriage return \t Tab \000 - \377 octal ASCII value. \0, \00, and \000 are equivalent. \x00 - \xff hexadecimal ASCII value. \x0 and \x00 are equivalent. \u0000 - \uffff hexadecimal ASCII value. \\ Backslash \$ Dollar Sign \@ Ampersand \" Print double quotes \ Escape next character if known otherwise print If you have data which contains C<\u>I escape sequences, this will translate them to utf8-encoded characters: use Encode::Escape; Encode::Escape::demode 'unicode-escape', 'python'; while(<>) { chomp; print encode 'utf8', decode 'unicode-escape', $_; } And this will translate C<\u>I to C<\x{>IC<}>. use Encode::Escape; Encode::Escape::enmode 'unicode-escape', 'perl'; Encode::Escape::demode 'unicode-escape', 'python'; while(<>) { chomp; print encode 'unicode-escape', decode 'unicode-escape', $_; } =head1 SEEALSO See L. =head1 AUTHOR you, Eyou at cpan dot orgE =head1 COPYRIGHT AND LICENSE Copyright (C) 2007 by you This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself, either Perl version 5.8.8 or, at your option, any later version of Perl 5 you may have available. =cut # vi: set ts=4 sts=4 sw=4 et Encode-Escape-0.14/t/000075500000000000000000000000001112142532100142625ustar00rootroot00000000000000Encode-Escape-0.14/t/Encode-Escape-ASCII.t000064400000000000000000000034201112142532100176670ustar00rootroot00000000000000# Before `make install' is performed this script should be runnable with # `make test'. After `make install' it should work as `perl Encode-Escape-ASCII.t' ######################### # change 'tests => 1' to 'tests => last_test_to_print'; use Test::More tests => 9; BEGIN { use_ok('Encode::Escape::ASCII') }; ######################### # Insert your test code below, the Test::More module is use()ed here so read # its man page ( perldoc Test::More ) for help writing this test script. ok defined &encode, "import Encode's encode() function"; ok defined &decode, "import Encode's decode() function"; $string = "\a\b\e\f\n\r\t\\\"\$\@"; $escaped = "\\a\\b\\e\\f\\n\\r\\t\\\\\\\"\\\$\\\@"; is $string, (decode 'ascii-escape', $escaped), 'decoded character escape sequences'; is $escaped, (encode 'ascii-escape', $string), 'encoded character escape sequences'; $string_oct = "\0\00\000\11\011\100"; $escaped_oct = "\\0\\00\\000\\11\\011\\100"; is $string_oct, (decode 'ascii-escape', $escaped_oct), 'decoded octal escape sequences'; $string_hex = "\x09\x47\x57\x67\x77"; $escaped_hex = "\\x09\\x47\\x57\\x67\\x77"; is $string_hex, (decode 'ascii-escape', $escaped_hex), 'decoded hex escape sequences'; $string_non_printing = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e" . "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e" . "\x7f"; $escaped_non_printing = "\\x00\\x01\\x02\\x03\\x04\\x05\\x06\\a\\b\\t\\n\\x0b\\f\\r\\x0e" . "\\x10\\x11\\x12\\x13\\x14\\x15\\x16\\x17\\x18\\x19\\x1a\\e\\x1c\\x1d\\x1e" . "\\x7f"; is $string_non_printing, (decode 'ascii-escape', $escaped_non_printing), 'decoded non-printing characters'; is $escaped_non_printing, (encode 'ascii-escape', $string_non_printing), 'encoded non-printing characters'; Encode-Escape-0.14/t/Encode-Escape-Unicode.t000064400000000000000000000426301112142532100204330ustar00rootroot00000000000000# Before `make install' is performed this script should be runnable with # `make test'. After `make install' it should work as `perl Encode-Escape-ASCII.t' ######################### # change 'tests => 1' to 'tests => last_test_to_print'; use Test::More tests => 491; BEGIN { use_ok('Encode::Escape::Unicode') }; ######################### # Insert your test code below, the Test::More module is use()ed here so read # its man page ( perldoc Test::More ) for help writing this test script. ok defined &encode, "import Encode's encode() function"; ok defined &decode, "import Encode's decode() function"; $string = "\a\b\e\f\n\r\t\\\"\$\@"; $escaped = "\\a\\b\\e\\f\\n\\r\\t\\\\\\\"\\\$\\\@"; is $string, (decode 'unicode-escape', $escaped), 'decoded character escape sequences'; is $escaped, (encode 'unicode-escape', $string), 'encoded character escape sequences'; $string_oct = "\0\00\000\1\01\001\11\011\100\111"; $escaped_oct = "\\0\\00\\000\\1\\01\\001\\11\\011\\100\\111"; is $string_oct, (decode 'unicode-escape', $escaped_oct), 'decoded octal escape sequences'; $string_hex = "\x27\x37\x47\x57\x67\x77\x87\x97\xa7\xb7\xc7\xd7\xe7\xf7"; $escaped_hex = "\\x27\\x37\\x47\\x57\\x67\\x77\\x87\\x97\\xa7\\xb7\\xc7\\xd7\\xe7\\xf7"; is $string_hex, (decode 'unicode-escape', $escaped_hex), 'decoded hex escape sequences'; $string_non_printing = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e" . "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e" . "\x7f"; $escaped_non_printing = "\\x{0000}\\x{0001}\\x{0002}\\x{0003}\\x{0004}\\x{0005}\\x{0006}\\a\\b\\t\\n\\x{000B}\\f\\r\\x{000E}" . "\\x{0010}\\x{0011}\\x{0012}\\x{0013}\\x{0014}\\x{0015}\\x{0016}\\x{0017}\\x{0018}\\x{0019}\\x{001A}\\e\\x{001C}\\x{001D}\\x{001E}" . "\\x{007F}"; is $string_non_printing, (decode 'unicode-escape', $escaped_non_printing), 'decoded non-printing characters'; is $escaped_non_printing, (encode 'unicode-escape', $string_non_printing), 'encoded non-printing characters'; @string_unicode = ( "English", "Deutsch", "Fran\x{00E7}ais", "Polski", "\x{65E5}\x{672C}\x{8A9E}", "Nederlands", "Italiano", "Portugu\x{00EA}s", "Espa\x{00F1}ol", "Svenska", "\x{0420}\x{0443}\x{0441}\x{0441}\x{043A}\x{0438}\x{0439}", "\x{4E2D}\x{6587}", "\x{202A}Norsk (bokm\x{00E5}l)\x{202C}", "Suomi", "Volap\x{00FC}k", "Lumbaart", "Rom\x{00E2}n\x{0103}", "T\x{00FC}rk\x{00E7}e", "Esperanto", "Catal\x{00E0}", "Sloven\x{010D}ina", "\x{010C}esky", "\x{0423}\x{043A}\x{0440}\x{0430}\x{0457}\x{043D}\x{0441}\x{044C}\x{043A}\x{0430}", "Magyar", "Dansk", "Bahasa Indonesia", "\x{05E2}\x{05D1}\x{05E8}\x{05D9}\x{05EA}", "Lietuvi\x{0173}", "\x{0421}\x{0440}\x{043F}\x{0441}\x{043A}\x{0438} / Srpski", "Sloven\x{0161}\x{010D}ina", "\x{0411}\x{044A}\x{043B}\x{0433}\x{0430}\x{0440}\x{0441}\x{043A}\x{0438}", "\x{D55C}\x{AD6D}\x{C5B4}", "\x{0627}\x{0644}\x{0639}\x{0631}\x{0628}\x{064A}\x{0629}", "Eesti", "\x{0C24}\x{0C46}\x{0C32}\x{0C41}\x{0C17}\x{0C41}", "\x{0928}\x{0947}\x{092A}\x{093E}\x{0932} \x{092D}\x{093E}\x{0937}\x{093E}", "Hrvatski", "Cebuano", "Galego", "\x{0395}\x{03BB}\x{03BB}\x{03B7}\x{03BD}\x{03B9}\x{03BA}\x{03AC}", "\x{0E44}\x{0E17}\x{0E22}", "\x{0641}\x{0627}\x{0631}\x{0633}\x{06CC}", "\x{202A}Norsk (nynorsk)\x{202C}", "Ti\x{1EBF}ng Vi\x{1EC7}t", "Bahasa Melayu", "\x{0987}\x{09AE}\x{09BE}\x{09B0} \x{09A0}\x{09BE}\x{09B0}/\x{09AC}\x{09BF}\x{09B7}\x{09CD}\x{09A3}\x{09C1}\x{09AA}\x{09CD}\x{09B0}\x{09BF}\x{09AF}\x{09BC}\x{09BE} \x{09AE}\x{09A3}\x{09BF}\x{09AA}\x{09C1}\x{09B0}\x{09C0}", "Euskara", "Bosanski", "Simple English", "\x{00CD}slenska", "L\x{00EB}tzebuergesch", "\x{10E5}\x{10D0}\x{10E0}\x{10D7}\x{10E3}\x{10DA}\x{10D8}", "Shqip", "Az\x{0259}rbaycan", "Brezhoneg", "Latina", "\x{09AC}\x{09BE}\x{0982}\x{09B2}\x{09BE}", "Ido", "\x{0939}\x{093F}\x{0928}\x{094D}\x{0926}\x{0940}", "Srpskohrvatski / \x{0421}\x{0440}\x{043F}\x{0441}\x{043A}\x{043E}\x{0445}\x{0440}\x{0432}\x{0430}\x{0442}\x{0441}\x{043A}\x{0438}", "\x{092E}\x{0930}\x{093E}\x{0920}\x{0940}", "\x{041C}\x{0430}\x{043A}\x{0435}\x{0434}\x{043E}\x{043D}\x{0441}\x{043A}\x{0438}", "Basa Sunda", "Nnapulitano", "Cymraeg", "Tagalog", "\x{0BA4}\x{0BAE}\x{0BBF}\x{0BB4}\x{0BCD}", "Latvie\x{0161}u", "Piemont\x{00E8}is", "Sicilianu", "Plattd\x{00FC}\x{00FC}tsch", "Basa Jawa", "Occitan", "Kurd\x{00EE} / \x{0643}\x{0648}\x{0631}\x{062F}\x{06CC}", "Asturianu", "Walon", "Afrikaans", "\x{0422}\x{043E}\x{04B7}\x{0438}\x{043A}\x{04E3}", "\x{0411}\x{0435}\x{043B}\x{0430}\x{0440}\x{0443}\x{0441}\x{043A}\x{0430}\x{044F} (\x{0442}\x{0430}\x{0440}\x{0430}\x{0448}\x{043A}\x{0435}\x{0432}\x{0456}\x{0446}\x{0430})", "Aragon\x{00E9}s", "Tarand\x{00ED}ne", "\x{0411}\x{0435}\x{043B}\x{0430}\x{0440}\x{0443}\x{0441}\x{043A}\x{0430}\x{044F}", "Ripoarisch", "Krey\x{00F2}l ayisyen", "\x{0427}\x{0103}\x{0432}\x{0430}\x{0448}\x{043B}\x{0430}", "\x{0627}\x{0631}\x{062F}\x{0648}", "M\x{0101}ori", "Kiswahili", "Frysk", "\x{7CB5}\x{8A9E}", "O'zbek", "V\x{00E8}neto", "Gaeilge", "Runa Simi", "Corsu", "\x{0C95}\x{0CA8}\x{0CCD}\x{0CA8}\x{0CA1}", "\x{0D2E}\x{0D32}\x{0D2F}\x{0D3E}\x{0D33}\x{0D02}", "G\x{00E0}idhlig", "\x{05D9}\x{05D9}\x{05B4}\x{05D3}\x{05D9}\x{05E9}", "Hornjoserbsce", "Yor\x{00F9}b\x{00E1}", "\x{017D}emait\x{0117}\x{0161}ka", "Kapampangan", "Nahuatl", "Tatar\x{00E7}a", "Interlingua", "\x{0938}\x{0902}\x{0938}\x{094D}\x{0915}\x{0943}\x{0924}", "Limburgs", "\x{0540}\x{0561}\x{0575}\x{0565}\x{0580}\x{0565}\x{0576}", "Alemannisch", "Basa Banyumasan", "\x{12A0}\x{121B}\x{122D}\x{129B}", "B\x{00E2}n-l\x{00E2}m-g\x{00FA}", "Pangasinan", "Nouormand", "F\x{00F8}royskt", "S\x{00E1}megiella", "Nedersaksisch", "Winaray", "Furlan", "\x{092D}\x{094B}\x{091C}\x{092A}\x{0941}\x{0930}\x{0940}", "West-Vlams", "Novial", "\x{092A}\x{093E}\x{093F}\x{0934}", "Ilokano", "L\x{00ED}guru", "Scots", "\x{0928}\x{0947}\x{092A}\x{093E}\x{0932}\x{0940}", "Arpetan", "\x{078B}\x{07A8}\x{0788}\x{07AC}\x{0780}\x{07A8}\x{0784}\x{07A6}\x{0790}\x{07B0}", "\x{0418}\x{0440}\x{043E}\x{043D}\x{0430}\x{0443}", "\x{53E4}\x{6587} / \x{6587}\x{8A00}\x{6587}", "Zazaki", "\x{049A}\x{0430}\x{0437}\x{0430}\x{049B}\x{0448}\x{0430}", "Malti", "Kasz\x{00EB}bsczi", "Rumantsch", "Deitsch", "Ladino", "V\x{00F5}ro", "Kernewek", "faka-Tonga", "Boarisch", "\x{067E}\x{069A}\x{062A}\x{0648}", "Arm\x{00E3}neashce", "\x{041C}\x{043E}\x{043D}\x{0433}\x{043E}\x{043B}", "Ling\x{00E1}la", "T\x{00FC}rkmen", "Anglo Saxon", "Lojban", "Tok Pisin", "\x{0B13}\x{0B5C}\x{0B3F}\x{0B06}", "Wolof", "Emili\x{00E0}n e rumagn\x{00F2}l", "Reo M\x{0101}`ohi", "\x{041A}\x{044B}\x{0440}\x{0433}\x{044B}\x{0437}\x{0447}\x{0430}", "Interlingue", "\x{0710}\x{072A}\x{0721}\x{071D}\x{0710}", "\x{06AF}\x{06CC}\x{0644}\x{06A9}\x{06CC}", "Igbo", "\x{1797}\x{17B6}\x{179F}\x{17B6}\x{1781}\x{17D2}\x{1798}\x{17C2}\x{179A}", "Ze\x{00EA}uws", "\x{0A97}\x{0AC1}\x{0A9C}\x{0AB0}\x{0ABE}\x{0AA4}\x{0AC0}", "Taqbaylit", "\x{0915}\x{0936}\x{094D}\x{092E}\x{0940}\x{0930}\x{0940} - (\x{0643}\x{0634}\x{0645}\x{064A}\x{0631}\x{064A})", "\x{0DC3}\x{0DD2}\x{0D82}\x{0DC4}\x{0DBD}", "\x{5434}\x{8BED}", "\x{0421}\x{043B}\x{043E}\x{0432}\x{0463}\x{043D}\x{044C}\x{0441}\x{043A}\x{044A}", "Malagasy", "Soomaaliga", "Tetun", "\x{0423}\x{0434}\x{043C}\x{0443}\x{0440}\x{0442}", "Sardu", "Gaelg", "Zamboangue\x{00F1}o", "Kongo", "\x{1403}\x{14C4}\x{1483}\x{144E}\x{1450}\x{1466}/inuktitut", "Aymar", "\x{0A2A}\x{0A70}\x{0A1C}\x{0A3E}\x{0A2C}\x{0A40}", "Hawai`i", "Romani", "Ekakair\x{0169} Naoero", "\x{0F56}\x{0F7C}\x{0F51}\x{0F0B}\x{0F61}\x{0F72}\x{0F42}", "\x{0EA5}\x{0EB2}\x{0EA7}", "\x{0633}\x{0646}\x{068C}\x{064A}", "\x{0411}\x{0430}\x{0448}\x{04A1}\x{043E}\x{0440}\x{0442}", "\x{13E3}\x{13B3}\x{13A9}", "\x{041D}\x{043E}\x{0445}\x{0447}\x{0438}\x{0439}\x{043D}", "Oromoo", "E\x{028B}egbe", "\x{1275}\x{130D}\x{122D}\x{129B}", "Gagana Samoa", "\x{0985}\x{09B8}\x{09AE}\x{09C0}\x{09DF}\x{09BE}", "Uyghurche\x{200E} / \x{0626}\x{06C7}\x{064A}\x{063A}\x{06C7}\x{0631}\x{0686}\x{06D5}", "Hak-k\x{00E2}-fa", "\x{0410}\x{0432}\x{0430}\x{0440}", "M\x{00EC}ng-d\x{0115}\x{0324}ng-ng\x{1E73}\x{0304}", "\x{0645}\x{064E}\x{0632}\x{0650}\x{0631}\x{0648}\x{0646}\x{064A}", "Bamanankan", "Din\x{00E9} bizaad", "Papiamentu", "N\x{0113}hiyaw\x{0113}win / \x{14C0}\x{1426}\x{1403}\x{152D}\x{140D}\x{140F}\x{1423}", "isiZulu", "Norfuk / Pitkern", "Kinyarwanda", "Myanmasa", "isiXhosa", "Tshivenda", "I\x{00F1}upiak", "SiSwati", "Ava\x{00F1}e'\x{1EBD}", "\x{0F47}\x{0F7C}\x{0F44}\x{0F0B}\x{0F41}", "\x{0410}\x{04A7}\x{0441}\x{0443}\x{0430}", "Kalaallisut", "\x{041A}\x{043E}\x{043C}\x{0438}", "\x{0425}\x{0430}\x{043B}\x{044C}\x{043C}\x{0433}", "\x{1A05}\x{1A14} \x{1A15}\x{1A18}\x{1A01}\x{1A17}", "Bislama", "Akan", "\x{0411}\x{0443}\x{0440}\x{044F}\x{0430}\x{0434}", "Sesotho", "Setswana", "Twi", "Chi-Chewa", "Na Vosa Vakaviti", "\x{041B}\x{0430}\x{043A}\x{043A}\x{0443}", "Fulfulde", "(Cuengh)", "\x{0647}\x{064E}\x{0648}\x{064F}\x{0633}\x{064E}", "chiShona", "chiTumbuka", "G\x{0129}k\x{0169}y\x{0169}", "S\x{00E4}ng\x{00F6}", "Luganda", "Xitsonga", "Kirundi", "Afar", "Ebon", "Tsets\x{00EA}hest\x{00E2}hese", "Chamoru", "Oshiwambo", ); @escaped_unicode = ( "English", "Deutsch", "Fran\\x{00E7}ais", "Polski", "\\x{65E5}\\x{672C}\\x{8A9E}", "Nederlands", "Italiano", "Portugu\\x{00EA}s", "Espa\\x{00F1}ol", "Svenska", "\\x{0420}\\x{0443}\\x{0441}\\x{0441}\\x{043A}\\x{0438}\\x{0439}", "\\x{4E2D}\\x{6587}", "\\x{202A}Norsk (bokm\\x{00E5}l)\\x{202C}", "Suomi", "Volap\\x{00FC}k", "Lumbaart", "Rom\\x{00E2}n\\x{0103}", "T\\x{00FC}rk\\x{00E7}e", "Esperanto", "Catal\\x{00E0}", "Sloven\\x{010D}ina", "\\x{010C}esky", "\\x{0423}\\x{043A}\\x{0440}\\x{0430}\\x{0457}\\x{043D}\\x{0441}\\x{044C}\\x{043A}\\x{0430}", "Magyar", "Dansk", "Bahasa Indonesia", "\\x{05E2}\\x{05D1}\\x{05E8}\\x{05D9}\\x{05EA}", "Lietuvi\\x{0173}", "\\x{0421}\\x{0440}\\x{043F}\\x{0441}\\x{043A}\\x{0438} / Srpski", "Sloven\\x{0161}\\x{010D}ina", "\\x{0411}\\x{044A}\\x{043B}\\x{0433}\\x{0430}\\x{0440}\\x{0441}\\x{043A}\\x{0438}", "\\x{D55C}\\x{AD6D}\\x{C5B4}", "\\x{0627}\\x{0644}\\x{0639}\\x{0631}\\x{0628}\\x{064A}\\x{0629}", "Eesti", "\\x{0C24}\\x{0C46}\\x{0C32}\\x{0C41}\\x{0C17}\\x{0C41}", "\\x{0928}\\x{0947}\\x{092A}\\x{093E}\\x{0932} \\x{092D}\\x{093E}\\x{0937}\\x{093E}", "Hrvatski", "Cebuano", "Galego", "\\x{0395}\\x{03BB}\\x{03BB}\\x{03B7}\\x{03BD}\\x{03B9}\\x{03BA}\\x{03AC}", "\\x{0E44}\\x{0E17}\\x{0E22}", "\\x{0641}\\x{0627}\\x{0631}\\x{0633}\\x{06CC}", "\\x{202A}Norsk (nynorsk)\\x{202C}", "Ti\\x{1EBF}ng Vi\\x{1EC7}t", "Bahasa Melayu", "\\x{0987}\\x{09AE}\\x{09BE}\\x{09B0} \\x{09A0}\\x{09BE}\\x{09B0}/\\x{09AC}\\x{09BF}\\x{09B7}\\x{09CD}\\x{09A3}\\x{09C1}\\x{09AA}\\x{09CD}\\x{09B0}\\x{09BF}\\x{09AF}\\x{09BC}\\x{09BE} \\x{09AE}\\x{09A3}\\x{09BF}\\x{09AA}\\x{09C1}\\x{09B0}\\x{09C0}", "Euskara", "Bosanski", "Simple English", "\\x{00CD}slenska", "L\\x{00EB}tzebuergesch", "\\x{10E5}\\x{10D0}\\x{10E0}\\x{10D7}\\x{10E3}\\x{10DA}\\x{10D8}", "Shqip", "Az\\x{0259}rbaycan", "Brezhoneg", "Latina", "\\x{09AC}\\x{09BE}\\x{0982}\\x{09B2}\\x{09BE}", "Ido", "\\x{0939}\\x{093F}\\x{0928}\\x{094D}\\x{0926}\\x{0940}", "Srpskohrvatski / \\x{0421}\\x{0440}\\x{043F}\\x{0441}\\x{043A}\\x{043E}\\x{0445}\\x{0440}\\x{0432}\\x{0430}\\x{0442}\\x{0441}\\x{043A}\\x{0438}", "\\x{092E}\\x{0930}\\x{093E}\\x{0920}\\x{0940}", "\\x{041C}\\x{0430}\\x{043A}\\x{0435}\\x{0434}\\x{043E}\\x{043D}\\x{0441}\\x{043A}\\x{0438}", "Basa Sunda", "Nnapulitano", "Cymraeg", "Tagalog", "\\x{0BA4}\\x{0BAE}\\x{0BBF}\\x{0BB4}\\x{0BCD}", "Latvie\\x{0161}u", "Piemont\\x{00E8}is", "Sicilianu", "Plattd\\x{00FC}\\x{00FC}tsch", "Basa Jawa", "Occitan", "Kurd\\x{00EE} / \\x{0643}\\x{0648}\\x{0631}\\x{062F}\\x{06CC}", "Asturianu", "Walon", "Afrikaans", "\\x{0422}\\x{043E}\\x{04B7}\\x{0438}\\x{043A}\\x{04E3}", "\\x{0411}\\x{0435}\\x{043B}\\x{0430}\\x{0440}\\x{0443}\\x{0441}\\x{043A}\\x{0430}\\x{044F} (\\x{0442}\\x{0430}\\x{0440}\\x{0430}\\x{0448}\\x{043A}\\x{0435}\\x{0432}\\x{0456}\\x{0446}\\x{0430})", "Aragon\\x{00E9}s", "Tarand\\x{00ED}ne", "\\x{0411}\\x{0435}\\x{043B}\\x{0430}\\x{0440}\\x{0443}\\x{0441}\\x{043A}\\x{0430}\\x{044F}", "Ripoarisch", "Krey\\x{00F2}l ayisyen", "\\x{0427}\\x{0103}\\x{0432}\\x{0430}\\x{0448}\\x{043B}\\x{0430}", "\\x{0627}\\x{0631}\\x{062F}\\x{0648}", "M\\x{0101}ori", "Kiswahili", "Frysk", "\\x{7CB5}\\x{8A9E}", "O'zbek", "V\\x{00E8}neto", "Gaeilge", "Runa Simi", "Corsu", "\\x{0C95}\\x{0CA8}\\x{0CCD}\\x{0CA8}\\x{0CA1}", "\\x{0D2E}\\x{0D32}\\x{0D2F}\\x{0D3E}\\x{0D33}\\x{0D02}", "G\\x{00E0}idhlig", "\\x{05D9}\\x{05D9}\\x{05B4}\\x{05D3}\\x{05D9}\\x{05E9}", "Hornjoserbsce", "Yor\\x{00F9}b\\x{00E1}", "\\x{017D}emait\\x{0117}\\x{0161}ka", "Kapampangan", "Nahuatl", "Tatar\\x{00E7}a", "Interlingua", "\\x{0938}\\x{0902}\\x{0938}\\x{094D}\\x{0915}\\x{0943}\\x{0924}", "Limburgs", "\\x{0540}\\x{0561}\\x{0575}\\x{0565}\\x{0580}\\x{0565}\\x{0576}", "Alemannisch", "Basa Banyumasan", "\\x{12A0}\\x{121B}\\x{122D}\\x{129B}", "B\\x{00E2}n-l\\x{00E2}m-g\\x{00FA}", "Pangasinan", "Nouormand", "F\\x{00F8}royskt", "S\\x{00E1}megiella", "Nedersaksisch", "Winaray", "Furlan", "\\x{092D}\\x{094B}\\x{091C}\\x{092A}\\x{0941}\\x{0930}\\x{0940}", "West-Vlams", "Novial", "\\x{092A}\\x{093E}\\x{093F}\\x{0934}", "Ilokano", "L\\x{00ED}guru", "Scots", "\\x{0928}\\x{0947}\\x{092A}\\x{093E}\\x{0932}\\x{0940}", "Arpetan", "\\x{078B}\\x{07A8}\\x{0788}\\x{07AC}\\x{0780}\\x{07A8}\\x{0784}\\x{07A6}\\x{0790}\\x{07B0}", "\\x{0418}\\x{0440}\\x{043E}\\x{043D}\\x{0430}\\x{0443}", "\\x{53E4}\\x{6587} / \\x{6587}\\x{8A00}\\x{6587}", "Zazaki", "\\x{049A}\\x{0430}\\x{0437}\\x{0430}\\x{049B}\\x{0448}\\x{0430}", "Malti", "Kasz\\x{00EB}bsczi", "Rumantsch", "Deitsch", "Ladino", "V\\x{00F5}ro", "Kernewek", "faka-Tonga", "Boarisch", "\\x{067E}\\x{069A}\\x{062A}\\x{0648}", "Arm\\x{00E3}neashce", "\\x{041C}\\x{043E}\\x{043D}\\x{0433}\\x{043E}\\x{043B}", "Ling\\x{00E1}la", "T\\x{00FC}rkmen", "Anglo Saxon", "Lojban", "Tok Pisin", "\\x{0B13}\\x{0B5C}\\x{0B3F}\\x{0B06}", "Wolof", "Emili\\x{00E0}n e rumagn\\x{00F2}l", "Reo M\\x{0101}`ohi", "\\x{041A}\\x{044B}\\x{0440}\\x{0433}\\x{044B}\\x{0437}\\x{0447}\\x{0430}", "Interlingue", "\\x{0710}\\x{072A}\\x{0721}\\x{071D}\\x{0710}", "\\x{06AF}\\x{06CC}\\x{0644}\\x{06A9}\\x{06CC}", "Igbo", "\\x{1797}\\x{17B6}\\x{179F}\\x{17B6}\\x{1781}\\x{17D2}\\x{1798}\\x{17C2}\\x{179A}", "Ze\\x{00EA}uws", "\\x{0A97}\\x{0AC1}\\x{0A9C}\\x{0AB0}\\x{0ABE}\\x{0AA4}\\x{0AC0}", "Taqbaylit", "\\x{0915}\\x{0936}\\x{094D}\\x{092E}\\x{0940}\\x{0930}\\x{0940} - (\\x{0643}\\x{0634}\\x{0645}\\x{064A}\\x{0631}\\x{064A})", "\\x{0DC3}\\x{0DD2}\\x{0D82}\\x{0DC4}\\x{0DBD}", "\\x{5434}\\x{8BED}", "\\x{0421}\\x{043B}\\x{043E}\\x{0432}\\x{0463}\\x{043D}\\x{044C}\\x{0441}\\x{043A}\\x{044A}", "Malagasy", "Soomaaliga", "Tetun", "\\x{0423}\\x{0434}\\x{043C}\\x{0443}\\x{0440}\\x{0442}", "Sardu", "Gaelg", "Zamboangue\\x{00F1}o", "Kongo", "\\x{1403}\\x{14C4}\\x{1483}\\x{144E}\\x{1450}\\x{1466}/inuktitut", "Aymar", "\\x{0A2A}\\x{0A70}\\x{0A1C}\\x{0A3E}\\x{0A2C}\\x{0A40}", "Hawai`i", "Romani", "Ekakair\\x{0169} Naoero", "\\x{0F56}\\x{0F7C}\\x{0F51}\\x{0F0B}\\x{0F61}\\x{0F72}\\x{0F42}", "\\x{0EA5}\\x{0EB2}\\x{0EA7}", "\\x{0633}\\x{0646}\\x{068C}\\x{064A}", "\\x{0411}\\x{0430}\\x{0448}\\x{04A1}\\x{043E}\\x{0440}\\x{0442}", "\\x{13E3}\\x{13B3}\\x{13A9}", "\\x{041D}\\x{043E}\\x{0445}\\x{0447}\\x{0438}\\x{0439}\\x{043D}", "Oromoo", "E\\x{028B}egbe", "\\x{1275}\\x{130D}\\x{122D}\\x{129B}", "Gagana Samoa", "\\x{0985}\\x{09B8}\\x{09AE}\\x{09C0}\\x{09DF}\\x{09BE}", "Uyghurche\\x{200E} / \\x{0626}\\x{06C7}\\x{064A}\\x{063A}\\x{06C7}\\x{0631}\\x{0686}\\x{06D5}", "Hak-k\\x{00E2}-fa", "\\x{0410}\\x{0432}\\x{0430}\\x{0440}", "M\\x{00EC}ng-d\\x{0115}\\x{0324}ng-ng\\x{1E73}\\x{0304}", "\\x{0645}\\x{064E}\\x{0632}\\x{0650}\\x{0631}\\x{0648}\\x{0646}\\x{064A}", "Bamanankan", "Din\\x{00E9} bizaad", "Papiamentu", "N\\x{0113}hiyaw\\x{0113}win / \\x{14C0}\\x{1426}\\x{1403}\\x{152D}\\x{140D}\\x{140F}\\x{1423}", "isiZulu", "Norfuk / Pitkern", "Kinyarwanda", "Myanmasa", "isiXhosa", "Tshivenda", "I\\x{00F1}upiak", "SiSwati", "Ava\\x{00F1}e'\\x{1EBD}", "\\x{0F47}\\x{0F7C}\\x{0F44}\\x{0F0B}\\x{0F41}", "\\x{0410}\\x{04A7}\\x{0441}\\x{0443}\\x{0430}", "Kalaallisut", "\\x{041A}\\x{043E}\\x{043C}\\x{0438}", "\\x{0425}\\x{0430}\\x{043B}\\x{044C}\\x{043C}\\x{0433}", "\\x{1A05}\\x{1A14} \\x{1A15}\\x{1A18}\\x{1A01}\\x{1A17}", "Bislama", "Akan", "\\x{0411}\\x{0443}\\x{0440}\\x{044F}\\x{0430}\\x{0434}", "Sesotho", "Setswana", "Twi", "Chi-Chewa", "Na Vosa Vakaviti", "\\x{041B}\\x{0430}\\x{043A}\\x{043A}\\x{0443}", "Fulfulde", "(Cuengh)", "\\x{0647}\\x{064E}\\x{0648}\\x{064F}\\x{0633}\\x{064E}", "chiShona", "chiTumbuka", "G\\x{0129}k\\x{0169}y\\x{0169}", "S\\x{00E4}ng\\x{00F6}", "Luganda", "Xitsonga", "Kirundi", "Afar", "Ebon", "Tsets\\x{00EA}hest\\x{00E2}hese", "Chamoru", "Oshiwambo", ); for (my $i = 0; $i < 240; ++$i) { is $string_unicode[$i], (decode 'unicode-escape', $escaped_unicode[$i]), "decoded unicode escape"; is $escaped_unicode[$i], (encode 'unicode-escape', $string_unicode[$i]), "encoded unicode string"; } Encode::Escape::Unicode->enmode('python'); Encode::Escape::Unicode->demode('python'); $string_unicode = "\x{0420}\x{0443}\x{0441}\x{0441}\x{043A}\x{0438}\x{0439}"; $escaped_python = "\\u0420\\u0443\\u0441\\u0441\\u043a\\u0438\\u0439"; is $string_unicode, (decode 'unicode-escape', $escaped_python), "decoded python mode"; is $escaped_python, (encode 'unicode-escape', $string_unicode), "encoded python mode"; Encode-Escape-0.14/t/Encode-Escape.t000064400000000000000000000007351112142532100170470ustar00rootroot00000000000000# Before `make install' is performed this script should be runnable with # `make test'. After `make install' it should work as `perl Encode-Escape.t' ######################### # change 'tests => 1' to 'tests => last_test_to_print'; use Test::More tests => 1; BEGIN { use_ok('Encode::Escape') }; ######################### # Insert your test code below, the Test::More module is use()ed here so read # its man page ( perldoc Test::More ) for help writing this test script.