This is a revert of commit 518b72c7273278d24cb79ac1485972e42cf98ed4 Author: Tim Rühsen Date: Thu Jun 29 12:52:57 2017 +0200 Increase speed of stringprep by ~20x diff -up libidn-1.34/lib/Makefile.am.tablesize-revert libidn-1.34/lib/Makefile.am --- libidn-1.34/lib/Makefile.am.tablesize-revert 2017-07-14 20:18:42.000000000 +0200 +++ libidn-1.34/lib/Makefile.am 2018-05-04 12:45:38.293778700 +0200 @@ -28,7 +28,7 @@ lib_LTLIBRARIES = libidn.la idn_int = idn-int.h -BUILT_SOURCES = $(idn_int) gunibreak.h gunicomp.h gunidecomp.h rfc3454.c rfc3454.h tlds.c +BUILT_SOURCES = $(idn_int) gunibreak.h gunicomp.h gunidecomp.h rfc3454.c tlds.c DISTCLEANFILES = $(idn_int) include_HEADERS = stringprep.h idna.h punycode.h idn-free.h pr29.h if TLD @@ -46,7 +46,7 @@ idn-int.h: libidn_la_SOURCES = libidn.map \ gunibreak.h gunicomp.h gunidecomp.h \ nfkc.c toutf8.c version.c \ - stringprep.h stringprep.c rfc3454.c rfc3454.h profiles.c \ + stringprep.h stringprep.c rfc3454.c profiles.c \ punycode.h punycode.c \ idna.h idna.c \ pr29.h pr29.c \ @@ -82,7 +82,7 @@ SPEC = $(top_srcdir)/doc/specifications #gunibreak.h gunicomp.h gunidecomp.h: gen-unicode-tables.pl $(SPEC)/UnicodeData-3.2.0.txt $(SPEC)/LineBreak-3.2.0.txt $(SPEC)/SpecialCasing-3.2.0.txt $(SPEC)/CaseFolding-3.2.0.txt $(SPEC)/CompositionExclusions-3.2.0.txt # $(PERL) $(srcdir)/gen-unicode-tables.pl -decomp 3.2 $(SPEC) -rfc3454.c rfc3454.h: gen-stringprep-tables.pl +rfc3454.c: gen-stringprep-tables.pl $(PERL) $(srcdir)/gen-stringprep-tables.pl $(SPEC)/rfc3454.txt tlds.c: gen-tld-tables.pl diff -up libidn-1.34/lib/gen-stringprep-tables.pl.tablesize-revert libidn-1.34/lib/gen-stringprep-tables.pl --- libidn-1.34/lib/gen-stringprep-tables.pl.tablesize-revert 2017-06-29 21:11:13.000000000 +0200 +++ libidn-1.34/lib/gen-stringprep-tables.pl 2018-05-04 12:44:24.696588209 +0200 @@ -22,13 +22,11 @@ use strict; my ($tab) = 59; my ($intable) = 0; -my ($entries) = 0; my ($tablename); my ($varname); my ($starheader, $header); my ($profile) = "rfc3454"; my ($filename) = "$profile.c"; -my ($headername) = "$profile.h"; my ($line, $start, $end, @map); open(FH, ">$filename") or die "cannot open $filename for writing"; @@ -39,10 +37,6 @@ print FH " Instead, edit gen-stringpre print FH "#include \n"; print FH "#include \"stringprep.h\"\n"; -open(FHH, ">$headername") or die "cannot open $headername for writing"; -print FHH "/* This file is automatically generated. DO NOT EDIT!\n"; -print FHH " Instead, edit gen-stringprep-tables.pl and re-run. */\n\n"; - while(<>) { s/^ (.*)/$1/g; # for rfc $line = $_; @@ -53,11 +47,9 @@ while(<>) { if ($intable && m,^----- End Table (.*) -----,) { die "table error" unless $1 eq $tablename || ($1 eq "C.1.2" && $tablename eq "C.1.1"); # Typo in draft + $intable = 0; print FH " { 0 },\n"; print FH "};\n\n"; - print FHH "#define N_STRINGPREP_${profile}_${varname} ${entries}\n"; - $intable = 0; - $entries = 0; } if (m,^[A-Z],) { @@ -92,28 +84,27 @@ while(<>) { die "tables tried to map a range" if $end && $map[0]; if ($map[3]) { - printf FH " { 0x%06s, 0x%06s, { 0x%06s,%*s/* %s */\n 0x%06s, 0x%06s, 0x%06s }},\n", - $start, $start, $map[0], $tab-length($line)-13, " ", $line, + printf FH " { 0x%06s, 0, { 0x%06s,%*s/* %s */\n 0x%06s, 0x%06s, 0x%06s }},\n", + $start, $map[0], $tab-length($line)-13, " ", $line, $map[1], $map[2], $map[3]; } elsif ($map[2]) { - printf FH " { 0x%06s, 0x%06s, { 0x%06s,%*s/* %s */\n 0x%06s, 0x%06s }},\n", - $start, $start, $map[0], $tab-length($line)-14, " ", $line, + printf FH " { 0x%06s, 0, { 0x%06s,%*s/* %s */\n 0x%06s, 0x%06s }},\n", + $start, $map[0], $tab-length($line)-14, " ", $line, $map[1], $map[2]; } elsif ($map[1]) { - printf FH " { 0x%06s, 0x%06s, { 0x%06s,%*s/* %s */\n 0x%06s }},\n", - $start, $start, $map[0], $tab-length($line)-14, " ", $line, + printf FH " { 0x%06s, 0, { 0x%06s,%*s/* %s */\n 0x%06s }},\n", + $start, $map[0], $tab-length($line)-14, " ", $line, $map[1]; } elsif ($map[0]) { - printf FH " { 0x%06s, 0x%06s, { 0x%06s }},%*s/* %s */\n", - $start, $start, $map[0], $tab-length($line)-17, " ", $line; + printf FH " { 0x%06s, 0, { 0x%06s }},%*s/* %s */\n", + $start, $map[0], $tab-length($line)-17, " ", $line; } elsif ($end) { printf FH " { 0x%06s, 0x%06s },%*s/* %s */\n", $start, $end, $tab-length($line)-11, " ", $line; } else { - printf FH " { 0x%06s, 0x%06s },%*s/* %s */\n", - $start, $start, $tab-length($line)-11, " ", $line; + printf FH " { 0x%06s },%*s/* %s */\n", + $start, $tab-length($line)-11, " ", $line; } - $entries++; } else { $intable = 1 if !$intable; $tablename = $1; @@ -126,5 +117,4 @@ while(<>) { } } -close FHH or die "cannot close $headername"; close FH or die "cannot close $filename"; diff -up libidn-1.34/lib/profiles.c.tablesize-revert libidn-1.34/lib/profiles.c --- libidn-1.34/lib/profiles.c.tablesize-revert 2017-06-29 21:11:13.000000000 +0200 +++ libidn-1.34/lib/profiles.c 2018-05-04 12:44:24.696588209 +0200 @@ -29,7 +29,6 @@ #include #include "stringprep.h" -#include "rfc3454.h" const Stringprep_profiles stringprep_profiles[] = { {"Nameprep", stringprep_nameprep}, @@ -44,219 +43,280 @@ const Stringprep_profiles stringprep_pro {NULL, NULL} }; -/* number of elements within an array */ -#define countof(a) (sizeof(a)/sizeof(*(a))) - -/* helper for profile definitions */ -#define TABLE(x) stringprep_rfc3454_##x, N_STRINGPREP_rfc3454_##x - const Stringprep_profile stringprep_nameprep[] = { - {STRINGPREP_MAP_TABLE, 0, TABLE(B_1)}, - {STRINGPREP_MAP_TABLE, 0, TABLE(B_2)}, - {STRINGPREP_NFKC, 0, 0, 0}, - {STRINGPREP_PROHIBIT_TABLE, 0, TABLE(C_1_2)}, - {STRINGPREP_PROHIBIT_TABLE, 0, TABLE(C_2_2)}, - {STRINGPREP_PROHIBIT_TABLE, 0, TABLE(C_3)}, - {STRINGPREP_PROHIBIT_TABLE, 0, TABLE(C_4)}, - {STRINGPREP_PROHIBIT_TABLE, 0, TABLE(C_5)}, - {STRINGPREP_PROHIBIT_TABLE, 0, TABLE(C_6)}, - {STRINGPREP_PROHIBIT_TABLE, 0, TABLE(C_7)}, - {STRINGPREP_PROHIBIT_TABLE, 0, TABLE(C_8)}, - {STRINGPREP_PROHIBIT_TABLE, 0, TABLE(C_9)}, - {STRINGPREP_BIDI, 0, 0, 0}, - {STRINGPREP_BIDI_PROHIBIT_TABLE, ~STRINGPREP_NO_BIDI, TABLE(C_8)}, - {STRINGPREP_BIDI_RAL_TABLE, 0, TABLE(D_1)}, - {STRINGPREP_BIDI_L_TABLE, 0, TABLE(D_2)}, - {STRINGPREP_UNASSIGNED_TABLE, ~STRINGPREP_NO_UNASSIGNED, TABLE(A_1)}, + {STRINGPREP_MAP_TABLE, 0, stringprep_rfc3454_B_1}, + {STRINGPREP_MAP_TABLE, 0, stringprep_rfc3454_B_2}, + {STRINGPREP_NFKC, 0, 0}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_1_2}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_2_2}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_3}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_4}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_5}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_6}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_7}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_8}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_9}, + {STRINGPREP_BIDI, 0, 0}, + {STRINGPREP_BIDI_PROHIBIT_TABLE, ~STRINGPREP_NO_BIDI, + stringprep_rfc3454_C_8}, + {STRINGPREP_BIDI_RAL_TABLE, 0, stringprep_rfc3454_D_1}, + {STRINGPREP_BIDI_L_TABLE, 0, stringprep_rfc3454_D_2}, + {STRINGPREP_UNASSIGNED_TABLE, ~STRINGPREP_NO_UNASSIGNED, + stringprep_rfc3454_A_1}, {0} }; const Stringprep_profile stringprep_kerberos5[] = { /* XXX this is likely to be wrong as the specification is a rough draft. */ - {STRINGPREP_MAP_TABLE, 0, TABLE(B_1)}, - {STRINGPREP_MAP_TABLE, 0, TABLE(B_3)}, - {STRINGPREP_NFKC, 0, 0, 0}, - {STRINGPREP_PROHIBIT_TABLE, 0, TABLE(C_1_2)}, - {STRINGPREP_PROHIBIT_TABLE, 0, TABLE(C_2_2)}, - {STRINGPREP_PROHIBIT_TABLE, 0, TABLE(C_3)}, - {STRINGPREP_PROHIBIT_TABLE, 0, TABLE(C_4)}, - {STRINGPREP_PROHIBIT_TABLE, 0, TABLE(C_5)}, - {STRINGPREP_PROHIBIT_TABLE, 0, TABLE(C_6)}, - {STRINGPREP_PROHIBIT_TABLE, 0, TABLE(C_7)}, - {STRINGPREP_PROHIBIT_TABLE, 0, TABLE(C_8)}, - {STRINGPREP_PROHIBIT_TABLE, 0, TABLE(C_9)}, - {STRINGPREP_BIDI, 0, 0, 0}, - {STRINGPREP_BIDI_PROHIBIT_TABLE, ~STRINGPREP_NO_BIDI, TABLE(C_8)}, - {STRINGPREP_BIDI_RAL_TABLE, 0, TABLE(D_1)}, - {STRINGPREP_BIDI_L_TABLE, 0, TABLE(D_2)}, - {STRINGPREP_UNASSIGNED_TABLE, ~STRINGPREP_NO_UNASSIGNED, TABLE(A_1)}, + {STRINGPREP_MAP_TABLE, 0, stringprep_rfc3454_B_1}, + {STRINGPREP_MAP_TABLE, 0, stringprep_rfc3454_B_3}, + {STRINGPREP_NFKC, 0, 0}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_1_2}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_2_2}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_3}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_4}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_5}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_6}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_7}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_8}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_9}, + {STRINGPREP_BIDI, 0, 0}, + {STRINGPREP_BIDI_PROHIBIT_TABLE, ~STRINGPREP_NO_BIDI, + stringprep_rfc3454_C_8}, + {STRINGPREP_BIDI_RAL_TABLE, 0, stringprep_rfc3454_D_1}, + {STRINGPREP_BIDI_L_TABLE, 0, stringprep_rfc3454_D_2}, + {STRINGPREP_UNASSIGNED_TABLE, ~STRINGPREP_NO_UNASSIGNED, + stringprep_rfc3454_A_1}, {0} }; const Stringprep_table_element stringprep_xmpp_nodeprep_prohibit[] = { - {0x000022, 0x000022}, /* #x22 (") */ - {0x000026, 0x000026}, /* #x26 (&) */ - {0x000027, 0x000027}, /* #x27 (') */ - {0x00002F, 0x00002F}, /* #x2F (/) */ - {0x00003A, 0x00003A}, /* #x3A (:) */ - {0x00003C, 0x00003C}, /* #x3C (<) */ - {0x00003E, 0x00003E}, /* #x3E (>) */ - {0x000040, 0x000040}, /* #x40 (@) */ + {0x000022}, /* #x22 (") */ + {0x000026}, /* #x26 (&) */ + {0x000027}, /* #x27 (') */ + {0x00002F}, /* #x2F (/) */ + {0x00003A}, /* #x3A (:) */ + {0x00003C}, /* #x3C (<) */ + {0x00003E}, /* #x3E (>) */ + {0x000040}, /* #x40 (@) */ {0} }; const Stringprep_profile stringprep_xmpp_nodeprep[] = { - {STRINGPREP_MAP_TABLE, 0, TABLE(B_1)}, - {STRINGPREP_MAP_TABLE, 0, TABLE(B_2)}, - {STRINGPREP_NFKC, 0, 0, 0}, - {STRINGPREP_PROHIBIT_TABLE, 0, TABLE(C_1_1)}, - {STRINGPREP_PROHIBIT_TABLE, 0, TABLE(C_1_2)}, - {STRINGPREP_PROHIBIT_TABLE, 0, TABLE(C_2_1)}, - {STRINGPREP_PROHIBIT_TABLE, 0, TABLE(C_2_2)}, - {STRINGPREP_PROHIBIT_TABLE, 0, TABLE(C_3)}, - {STRINGPREP_PROHIBIT_TABLE, 0, TABLE(C_4)}, - {STRINGPREP_PROHIBIT_TABLE, 0, TABLE(C_5)}, - {STRINGPREP_PROHIBIT_TABLE, 0, TABLE(C_6)}, - {STRINGPREP_PROHIBIT_TABLE, 0, TABLE(C_7)}, - {STRINGPREP_PROHIBIT_TABLE, 0, TABLE(C_8)}, - {STRINGPREP_PROHIBIT_TABLE, 0, TABLE(C_9)}, - {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_xmpp_nodeprep_prohibit, - countof(stringprep_xmpp_nodeprep_prohibit) - 1}, - {STRINGPREP_BIDI, 0, 0, 0}, - {STRINGPREP_BIDI_PROHIBIT_TABLE, 0, TABLE(C_8)}, - {STRINGPREP_BIDI_RAL_TABLE, 0, TABLE(D_1)}, - {STRINGPREP_BIDI_L_TABLE, 0, TABLE(D_2)}, - {STRINGPREP_UNASSIGNED_TABLE, ~STRINGPREP_NO_UNASSIGNED, TABLE(A_1)}, + {STRINGPREP_MAP_TABLE, 0, stringprep_rfc3454_B_1}, + {STRINGPREP_MAP_TABLE, 0, stringprep_rfc3454_B_2}, + {STRINGPREP_NFKC, 0, 0}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_1_1}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_1_2}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_2_1}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_2_2}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_3}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_4}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_5}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_6}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_7}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_8}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_9}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_xmpp_nodeprep_prohibit}, + {STRINGPREP_BIDI, 0, 0}, + {STRINGPREP_BIDI_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_8}, + {STRINGPREP_BIDI_RAL_TABLE, 0, stringprep_rfc3454_D_1}, + {STRINGPREP_BIDI_L_TABLE, 0, stringprep_rfc3454_D_2}, + {STRINGPREP_UNASSIGNED_TABLE, ~STRINGPREP_NO_UNASSIGNED, + stringprep_rfc3454_A_1}, {0} }; const Stringprep_profile stringprep_xmpp_resourceprep[] = { - {STRINGPREP_MAP_TABLE, 0, TABLE(B_1)}, - {STRINGPREP_NFKC, 0, 0, 0}, - {STRINGPREP_PROHIBIT_TABLE, 0, TABLE(C_1_2)}, - {STRINGPREP_PROHIBIT_TABLE, 0, TABLE(C_2_1)}, - {STRINGPREP_PROHIBIT_TABLE, 0, TABLE(C_2_2)}, - {STRINGPREP_PROHIBIT_TABLE, 0, TABLE(C_3)}, - {STRINGPREP_PROHIBIT_TABLE, 0, TABLE(C_4)}, - {STRINGPREP_PROHIBIT_TABLE, 0, TABLE(C_5)}, - {STRINGPREP_PROHIBIT_TABLE, 0, TABLE(C_6)}, - {STRINGPREP_PROHIBIT_TABLE, 0, TABLE(C_7)}, - {STRINGPREP_PROHIBIT_TABLE, 0, TABLE(C_8)}, - {STRINGPREP_PROHIBIT_TABLE, 0, TABLE(C_9)}, - {STRINGPREP_BIDI, 0, 0, 0}, - {STRINGPREP_BIDI_PROHIBIT_TABLE, 0, TABLE(C_8)}, - {STRINGPREP_BIDI_RAL_TABLE, ~STRINGPREP_NO_BIDI, TABLE(D_1)}, - {STRINGPREP_BIDI_L_TABLE, ~STRINGPREP_NO_BIDI, TABLE(D_2)}, - {STRINGPREP_UNASSIGNED_TABLE, ~STRINGPREP_NO_UNASSIGNED, TABLE(A_1)}, + {STRINGPREP_MAP_TABLE, 0, stringprep_rfc3454_B_1}, + {STRINGPREP_NFKC, 0, 0}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_1_2}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_2_1}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_2_2}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_3}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_4}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_5}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_6}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_7}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_8}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_9}, + {STRINGPREP_BIDI, 0, 0}, + {STRINGPREP_BIDI_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_8}, + {STRINGPREP_BIDI_RAL_TABLE, ~STRINGPREP_NO_BIDI, stringprep_rfc3454_D_1}, + {STRINGPREP_BIDI_L_TABLE, ~STRINGPREP_NO_BIDI, stringprep_rfc3454_D_2}, + {STRINGPREP_UNASSIGNED_TABLE, ~STRINGPREP_NO_UNASSIGNED, + stringprep_rfc3454_A_1}, {0} }; const Stringprep_profile stringprep_plain[] = { - {STRINGPREP_PROHIBIT_TABLE, 0, TABLE(C_2_1)}, - {STRINGPREP_PROHIBIT_TABLE, 0, TABLE(C_2_2)}, - {STRINGPREP_PROHIBIT_TABLE, 0, TABLE(C_3)}, - {STRINGPREP_PROHIBIT_TABLE, 0, TABLE(C_4)}, - {STRINGPREP_PROHIBIT_TABLE, 0, TABLE(C_5)}, - {STRINGPREP_PROHIBIT_TABLE, 0, TABLE(C_6)}, - {STRINGPREP_PROHIBIT_TABLE, 0, TABLE(C_8)}, - {STRINGPREP_PROHIBIT_TABLE, 0, TABLE(C_9)}, - {STRINGPREP_BIDI, 0, 0, 0}, - {STRINGPREP_BIDI_PROHIBIT_TABLE, 0, TABLE(C_8)}, - {STRINGPREP_BIDI_RAL_TABLE, ~STRINGPREP_NO_BIDI, TABLE(D_1)}, - {STRINGPREP_BIDI_L_TABLE, ~STRINGPREP_NO_BIDI, TABLE(D_2)}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_2_1}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_2_2}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_3}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_4}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_5}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_6}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_8}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_9}, + {STRINGPREP_BIDI, 0, 0}, + {STRINGPREP_BIDI_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_8}, + {STRINGPREP_BIDI_RAL_TABLE, ~STRINGPREP_NO_BIDI, stringprep_rfc3454_D_1}, + {STRINGPREP_BIDI_L_TABLE, ~STRINGPREP_NO_BIDI, stringprep_rfc3454_D_2}, {0} }; const Stringprep_profile stringprep_trace[] = { - {STRINGPREP_PROHIBIT_TABLE, 0, TABLE(C_2_1)}, - {STRINGPREP_PROHIBIT_TABLE, 0, TABLE(C_2_2)}, - {STRINGPREP_PROHIBIT_TABLE, 0, TABLE(C_3)}, - {STRINGPREP_PROHIBIT_TABLE, 0, TABLE(C_4)}, - {STRINGPREP_PROHIBIT_TABLE, 0, TABLE(C_5)}, - {STRINGPREP_PROHIBIT_TABLE, 0, TABLE(C_6)}, - {STRINGPREP_PROHIBIT_TABLE, 0, TABLE(C_8)}, - {STRINGPREP_PROHIBIT_TABLE, 0, TABLE(C_9)}, - {STRINGPREP_BIDI, 0, 0, 0}, - {STRINGPREP_BIDI_PROHIBIT_TABLE, 0, TABLE(C_8)}, - {STRINGPREP_BIDI_RAL_TABLE, ~STRINGPREP_NO_BIDI, TABLE(D_1)}, - {STRINGPREP_BIDI_L_TABLE, ~STRINGPREP_NO_BIDI, TABLE(D_2)}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_2_1}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_2_2}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_3}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_4}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_5}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_6}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_8}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_9}, + {STRINGPREP_BIDI, 0, 0}, + {STRINGPREP_BIDI_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_8}, + {STRINGPREP_BIDI_RAL_TABLE, ~STRINGPREP_NO_BIDI, stringprep_rfc3454_D_1}, + {STRINGPREP_BIDI_L_TABLE, ~STRINGPREP_NO_BIDI, stringprep_rfc3454_D_2}, {0} }; const Stringprep_table_element stringprep_iscsi_prohibit[] = { - {0x0000, 0x002C}, /* [ASCII CONTROL CHARACTERS and SPACE through ,] */ - {0x002F, 0x002F}, /* [ASCII /] */ - {0x003B, 0x0040}, /* [ASCII ; through @] */ - {0x005B, 0x0060}, /* [ASCII [ through `] */ - {0x007B, 0x007F}, /* [ASCII { through DEL] */ - {0x3002, 0x3002}, /* ideographic full stop */ + /* NB, since start == 0, we must have that end != 0 for the + end-of-table logic to work. */ + {0x0000, 1}, /* [ASCII CONTROL CHARACTERS and SPACE through ,] */ + {0x0001}, + {0x0002}, + {0x0003}, + {0x0004}, + {0x0005}, + {0x0006}, + {0x0007}, + {0x0008}, + {0x0009}, + {0x000A}, + {0x000B}, + {0x000C}, + {0x000D}, + {0x000E}, + {0x000F}, + {0x0010}, + {0x0011}, + {0x0012}, + {0x0013}, + {0x0014}, + {0x0015}, + {0x0016}, + {0x0017}, + {0x0018}, + {0x0019}, + {0x001A}, + {0x001B}, + {0x001C}, + {0x001D}, + {0x001E}, + {0x001F}, + {0x0020}, + {0x0021}, + {0x0022}, + {0x0023}, + {0x0024}, + {0x0025}, + {0x0026}, + {0x0027}, + {0x0028}, + {0x0029}, + {0x002A}, + {0x002B}, + {0x002C}, + {0x002F}, /* [ASCII /] */ + {0x003B}, /* [ASCII ; through @] */ + {0x003C}, + {0x003D}, + {0x003E}, + {0x003F}, + {0x0040}, + {0x005B}, /* [ASCII [ through `] */ + {0x005C}, + {0x005D}, + {0x005E}, + {0x005F}, + {0x0060}, + {0x007B}, /* [ASCII { through DEL] */ + {0x007C}, + {0x007D}, + {0x007E}, + {0x007F}, + {0x3002}, /* ideographic full stop */ {0} }; const Stringprep_profile stringprep_iscsi[] = { - {STRINGPREP_MAP_TABLE, 0, TABLE(B_1)}, - {STRINGPREP_MAP_TABLE, 0, TABLE(B_2)}, - {STRINGPREP_NFKC, 0, 0, 0}, - {STRINGPREP_PROHIBIT_TABLE, 0, TABLE(C_1_1)}, - {STRINGPREP_PROHIBIT_TABLE, 0, TABLE(C_1_2)}, - {STRINGPREP_PROHIBIT_TABLE, 0, TABLE(C_2_1)}, - {STRINGPREP_PROHIBIT_TABLE, 0, TABLE(C_2_2)}, - {STRINGPREP_PROHIBIT_TABLE, 0, TABLE(C_3)}, - {STRINGPREP_PROHIBIT_TABLE, 0, TABLE(C_4)}, - {STRINGPREP_PROHIBIT_TABLE, 0, TABLE(C_5)}, - {STRINGPREP_PROHIBIT_TABLE, 0, TABLE(C_6)}, - {STRINGPREP_PROHIBIT_TABLE, 0, TABLE(C_7)}, - {STRINGPREP_PROHIBIT_TABLE, 0, TABLE(C_8)}, - {STRINGPREP_PROHIBIT_TABLE, 0, TABLE(C_9)}, - {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_iscsi_prohibit, countof(stringprep_iscsi_prohibit) - 1}, - {STRINGPREP_BIDI, 0, 0, 0}, - {STRINGPREP_BIDI_PROHIBIT_TABLE, 0, TABLE(C_8)}, - {STRINGPREP_BIDI_RAL_TABLE, ~STRINGPREP_NO_BIDI, TABLE(D_1)}, - {STRINGPREP_BIDI_L_TABLE, ~STRINGPREP_NO_BIDI, TABLE(D_2)}, - {STRINGPREP_UNASSIGNED_TABLE, ~STRINGPREP_NO_UNASSIGNED, TABLE(A_1)}, + {STRINGPREP_MAP_TABLE, 0, stringprep_rfc3454_B_1}, + {STRINGPREP_MAP_TABLE, 0, stringprep_rfc3454_B_2}, + {STRINGPREP_NFKC, 0, 0}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_1_1}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_1_2}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_2_1}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_2_2}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_3}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_4}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_5}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_6}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_7}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_8}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_9}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_iscsi_prohibit}, + {STRINGPREP_BIDI, 0, 0}, + {STRINGPREP_BIDI_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_8}, + {STRINGPREP_BIDI_RAL_TABLE, ~STRINGPREP_NO_BIDI, stringprep_rfc3454_D_1}, + {STRINGPREP_BIDI_L_TABLE, ~STRINGPREP_NO_BIDI, stringprep_rfc3454_D_2}, + {STRINGPREP_UNASSIGNED_TABLE, ~STRINGPREP_NO_UNASSIGNED, + stringprep_rfc3454_A_1}, {0} }; const Stringprep_table_element stringprep_saslprep_space_map[] = { - {0x00A0, 0x00A0, {0x0020}}, /* 00A0; NO-BREAK SPACE */ - {0x1680, 0x1680, {0x0020}}, /* 1680; OGHAM SPACE MARK */ - {0x2000, 0x200B, {0x0020}}, /* 2000; EN QUAD */ - /* 2001; EM QUAD */ - /* 2002; EN SPACE */ - /* 2003; EM SPACE */ - /* 2004; THREE-PER-EM SPACE */ - /* 2005; FOUR-PER-EM SPACE */ - /* 2006; SIX-PER-EM SPACE */ - /* 2007; FIGURE SPACE */ - /* 2008; PUNCTUATION SPACE */ - /* 2009; THIN SPACE */ - /* 200A; HAIR SPACE */ - /* 200B; ZERO WIDTH SPACE */ - {0x202F, 0x202F, {0x0020}}, /* 202F; NARROW NO-BREAK SPACE */ - {0x205F, 0x205F, {0x0020}}, /* 205F; MEDIUM MATHEMATICAL SPACE */ - {0x3000, 0x3000, {0x0020}}, /* 3000; IDEOGRAPHIC SPACE */ + {0x0000A0, 0, {0x0020}}, /* 00A0; NO-BREAK SPACE */ + {0x001680, 0, {0x0020}}, /* 1680; OGHAM SPACE MARK */ + {0x002000, 0, {0x0020}}, /* 2000; EN QUAD */ + {0x002001, 0, {0x0020}}, /* 2001; EM QUAD */ + {0x002002, 0, {0x0020}}, /* 2002; EN SPACE */ + {0x002003, 0, {0x0020}}, /* 2003; EM SPACE */ + {0x002004, 0, {0x0020}}, /* 2004; THREE-PER-EM SPACE */ + {0x002005, 0, {0x0020}}, /* 2005; FOUR-PER-EM SPACE */ + {0x002006, 0, {0x0020}}, /* 2006; SIX-PER-EM SPACE */ + {0x002007, 0, {0x0020}}, /* 2007; FIGURE SPACE */ + {0x002008, 0, {0x0020}}, /* 2008; PUNCTUATION SPACE */ + {0x002009, 0, {0x0020}}, /* 2009; THIN SPACE */ + {0x00200A, 0, {0x0020}}, /* 200A; HAIR SPACE */ + {0x00200B, 0, {0x0020}}, /* 200B; ZERO WIDTH SPACE */ + {0x00202F, 0, {0x0020}}, /* 202F; NARROW NO-BREAK SPACE */ + {0x00205F, 0, {0x0020}}, /* 205F; MEDIUM MATHEMATICAL SPACE */ + {0x003000, 0, {0x0020}}, /* 3000; IDEOGRAPHIC SPACE */ {0} }; const Stringprep_profile stringprep_saslprep[] = { - {STRINGPREP_MAP_TABLE, 0, stringprep_saslprep_space_map, countof(stringprep_saslprep_space_map) - 1}, - {STRINGPREP_MAP_TABLE, 0, TABLE(B_1)}, - {STRINGPREP_NFKC, 0, 0, 0}, - {STRINGPREP_PROHIBIT_TABLE, 0, TABLE(C_1_2)}, - {STRINGPREP_PROHIBIT_TABLE, 0, TABLE(C_2_1)}, - {STRINGPREP_PROHIBIT_TABLE, 0, TABLE(C_2_2)}, - {STRINGPREP_PROHIBIT_TABLE, 0, TABLE(C_3)}, - {STRINGPREP_PROHIBIT_TABLE, 0, TABLE(C_4)}, - {STRINGPREP_PROHIBIT_TABLE, 0, TABLE(C_5)}, - {STRINGPREP_PROHIBIT_TABLE, 0, TABLE(C_6)}, - {STRINGPREP_PROHIBIT_TABLE, 0, TABLE(C_7)}, - {STRINGPREP_PROHIBIT_TABLE, 0, TABLE(C_8)}, - {STRINGPREP_PROHIBIT_TABLE, 0, TABLE(C_9)}, - {STRINGPREP_BIDI, 0, 0, 0}, - {STRINGPREP_BIDI_PROHIBIT_TABLE, 0, TABLE(C_8)}, - {STRINGPREP_BIDI_RAL_TABLE, ~STRINGPREP_NO_BIDI, TABLE(D_1)}, - {STRINGPREP_BIDI_L_TABLE, ~STRINGPREP_NO_BIDI, TABLE(D_2)}, - {STRINGPREP_UNASSIGNED_TABLE, ~STRINGPREP_NO_UNASSIGNED, TABLE(A_1)}, + {STRINGPREP_MAP_TABLE, 0, stringprep_saslprep_space_map}, + {STRINGPREP_MAP_TABLE, 0, stringprep_rfc3454_B_1}, + {STRINGPREP_NFKC, 0, 0}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_1_2}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_2_1}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_2_2}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_3}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_4}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_5}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_6}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_7}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_8}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_9}, + {STRINGPREP_BIDI, 0, 0}, + {STRINGPREP_BIDI_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_8}, + {STRINGPREP_BIDI_RAL_TABLE, ~STRINGPREP_NO_BIDI, stringprep_rfc3454_D_1}, + {STRINGPREP_BIDI_L_TABLE, ~STRINGPREP_NO_BIDI, stringprep_rfc3454_D_2}, + {STRINGPREP_UNASSIGNED_TABLE, ~STRINGPREP_NO_UNASSIGNED, + stringprep_rfc3454_A_1}, {0} }; diff -up libidn-1.34/lib/stringprep.c.tablesize-revert libidn-1.34/lib/stringprep.c --- libidn-1.34/lib/stringprep.c.tablesize-revert 2017-07-12 21:40:56.000000000 +0200 +++ libidn-1.34/lib/stringprep.c 2018-05-04 12:44:24.697588211 +0200 @@ -36,20 +36,9 @@ #include "stringprep.h" -static int -_compare_table_element (const uint32_t * c, const Stringprep_table_element * e) -{ - if (*c < e->start) - return -1; - if (*c > e->end) - return 1; - return 0; -} - static ssize_t stringprep_find_character_in_table (uint32_t ucs4, - const Stringprep_table_element * table, - size_t table_size) + const Stringprep_table_element * table) { /* This is where typical uses of Libidn spends very close to all CPU time and causes most cache misses. One could easily do a binary @@ -57,17 +46,8 @@ stringprep_find_character_in_table (uint slowness is at all relevant in typical applications. (I don't dispute optimization may improve matters significantly, I'm mostly interested in having someone give real-world benchmark on - the impact of libidn.) - * - * Answer (Tim Rühsen rockdaboot@gmx.de): - * Testing the fuzz corpora just once via make check takes ~54 billion CPU cycles. - * That is almost 20s on my Intel i3 3.1GHz !!! - * That even makes fuzzing almost useless, eating up CPU cycles for nothing. - * - * The bsearch() approach takes ~3 billion CPU cycles. - * Almost a factor of 20 faster (but still pretty slow). - * There are still ~2 million calls to bsearch() which make ~30% of CPU time used. - * Most time is spent in _g_utf8_normalize_wc(). + the impact of libidn.) */ + ssize_t i; @@ -75,27 +55,21 @@ stringprep_find_character_in_table (uint if (ucs4 >= table[i].start && ucs4 <= (table[i].end ? table[i].end : table[i].start)) return i; - */ - - const Stringprep_table_element * p = - bsearch (&ucs4, table, table_size, sizeof(Stringprep_table_element), - (int (*)(const void *, const void *)) _compare_table_element); - return p ? (p - table) : -1; + return -1; } static ssize_t stringprep_find_string_in_table (uint32_t * ucs4, size_t ucs4len, size_t * tablepos, - const Stringprep_table_element * table, - size_t table_size) + const Stringprep_table_element * table) { size_t j; ssize_t pos; for (j = 0; j < ucs4len; j++) - if ((pos = stringprep_find_character_in_table (ucs4[j], table, table_size)) != -1) + if ((pos = stringprep_find_character_in_table (ucs4[j], table)) != -1) { if (tablepos) *tablepos = pos; @@ -109,8 +83,7 @@ static int stringprep_apply_table_to_string (uint32_t * ucs4, size_t * ucs4len, size_t maxucs4len, - const Stringprep_table_element * table, - size_t table_size) + const Stringprep_table_element * table) { ssize_t pos; size_t i, maplen; @@ -118,7 +91,7 @@ stringprep_apply_table_to_string (uint32 size_t srclen = *ucs4len; /* length of unprocessed data */ while ((pos = stringprep_find_string_in_table (src, srclen, - &i, table, table_size)) != -1) + &i, table)) != -1) { for (maplen = STRINGPREP_MAX_MAP_CHARS; maplen > 0 && table[i].map[maplen - 1] == 0; maplen--) @@ -220,7 +193,7 @@ stringprep_4i (uint32_t * ucs4, size_t * case STRINGPREP_PROHIBIT_TABLE: k = stringprep_find_string_in_table (ucs4, ucs4len, - NULL, profile[i].table, profile[i].table_size); + NULL, profile[i].table); if (k != -1) return STRINGPREP_CONTAINS_PROHIBITED; break; @@ -231,7 +204,7 @@ stringprep_4i (uint32_t * ucs4, size_t * if (flags & STRINGPREP_NO_UNASSIGNED) { k = stringprep_find_string_in_table - (ucs4, ucs4len, NULL, profile[i].table, profile[i].table_size); + (ucs4, ucs4len, NULL, profile[i].table); if (k != -1) return STRINGPREP_CONTAINS_UNASSIGNED; } @@ -241,7 +214,7 @@ stringprep_4i (uint32_t * ucs4, size_t * if (UNAPPLICAPLEFLAGS (flags, profile[i].flags)) break; rc = stringprep_apply_table_to_string - (ucs4, &ucs4len, maxucs4len, profile[i].table, profile[i].table_size); + (ucs4, &ucs4len, maxucs4len, profile[i].table); if (rc != STRINGPREP_OK) return rc; break; @@ -265,7 +238,7 @@ stringprep_4i (uint32_t * ucs4, size_t * done_prohibited = 1; k = stringprep_find_string_in_table (ucs4, ucs4len, NULL, - profile[j].table, profile[j].table_size); + profile[j].table); if (k != -1) return STRINGPREP_BIDI_CONTAINS_PROHIBITED; } @@ -273,14 +246,14 @@ stringprep_4i (uint32_t * ucs4, size_t * { done_ral = 1; if (stringprep_find_string_in_table - (ucs4, ucs4len, NULL, profile[j].table, profile[j].table_size) != -1) + (ucs4, ucs4len, NULL, profile[j].table) != -1) contains_ral = j; } else if (profile[j].operation == STRINGPREP_BIDI_L_TABLE) { done_l = 1; if (stringprep_find_string_in_table - (ucs4, ucs4len, NULL, profile[j].table, profile[j].table_size) != -1) + (ucs4, ucs4len, NULL, profile[j].table) != -1) contains_l = j; } @@ -293,9 +266,9 @@ stringprep_4i (uint32_t * ucs4, size_t * if (contains_ral != SIZE_MAX) { if (!(stringprep_find_character_in_table - (ucs4[0], profile[contains_ral].table, profile[contains_ral].table_size) != -1 && + (ucs4[0], profile[contains_ral].table) != -1 && stringprep_find_character_in_table - (ucs4[ucs4len - 1], profile[contains_ral].table, profile[contains_ral].table_size) != -1)) + (ucs4[ucs4len - 1], profile[contains_ral].table) != -1)) return STRINGPREP_BIDI_LEADTRAIL_NOT_RAL; } } diff -up libidn-1.34/lib/stringprep.h.tablesize-revert libidn-1.34/lib/stringprep.h --- libidn-1.34/lib/stringprep.h.tablesize-revert 2017-06-29 21:11:13.000000000 +0200 +++ libidn-1.34/lib/stringprep.h 2018-05-04 12:44:24.697588211 +0200 @@ -110,7 +110,6 @@ extern "C" Stringprep_profile_steps operation; Stringprep_profile_flags flags; const Stringprep_table_element *table; - size_t table_size; }; typedef struct Stringprep_table Stringprep_profile;