diff -Naur dict-web1913-1.4-0.47pd/Makefile.in dict-web1913-1.4-0.47pd.new/Makefile.in --- dict-web1913-1.4+0.47pd/Makefile.in Mon Jun 14 00:10:37 1999 +++ dict-web1913-1.4-0.47pd.new/Makefile.in Mon Feb 5 06:47:08 2001 @@ -98,18 +98,8 @@ # cide: $(CIDEBASE) $(DATABASE) $(DATAINDEX): $(EXES) -# ./webfilter $(DATAFILES) > $(TMPPATH)/$(TMPFILE) -# Here is a fix for a bug in the Carbonyl definition in the public -# domain web1913_0.47-pd source. In it (unlike the restrictive -# license source), <\sub> tags are placed around every number in -# chemical formulas (). In the definition of Carbonyl, this -# is done to the 7 in the hex number \'b7 (representing a positive -# charge), turning it into \'b7<\sub> which confuses webfilter. - sed '/^

Car"bon\*yl//g' $(DATAFILES) \ - |sed 's/&fist;//g'\ - | ./webfilter > $(TMPPATH)/$(TMPFILE) - ./webfmt < $(TMPPATH)/$(TMPFILE) - -rm -f $(TMPPATH)/$(TMPFILE) + sed -f sedfile $(DATAFILES) \ + | ./webfilter|./webfmt if [ "$(DICTZIP)" != "cat" ]; then \ dictzip -v $(DATABASE); \ fi diff -Naur dict-web1913-1.4-0.47pd/orig.scan.l dict-web1913-1.4-0.47pd.new/orig.scan.l --- dict-web1913-1.4+0.47pd/orig.scan.l Thu Jan 1 03:00:00 1970 +++ dict-web1913-1.4-0.47pd.new/orig.scan.l Mon Feb 5 06:47:08 2001 @@ -0,0 +1,336 @@ +/* scan.l -- Scanner for Project Gutenberg Webster converter + * Created: Sun Mar 16 09:26:43 1997 by faith@cs.unc.edu + * Revised: Sun Feb 22 13:07:39 1998 by faith@acm.org + * Copyright 1997, 1998 Rickard E. Faith (faith@acm.org) + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 1, or (at your option) any + * later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 675 Mass Ave, Cambridge, MA 02139, USA. + * + * $Id: scan.l,v 1.13 1998/02/22 18:24:15 faith Exp $ + * + * HW-like: spn, plw + */ + +%option stack debug +%{ +#include "webfmt.h" +#include "parse.h" +#include + +#define DEBUG 0 + +#if !DEBUG +#define src_line(t,l) +#define src_get(x) NULL +#define src_advance(l) +#endif + +extern int yylex( void ); +extern int yydebug; +extern void yyerror( const char *message ); +extern int database; +static int match(const char *buf, int length); +static void comment( const char *text, int length ); +#define RETURN(val) do { \ + yylval.token.src = src_get( yyleng ); \ + return val; \ +} while (0) +%} +%x OTHER COMMENT SUBINIT +letter [a-zA-Z] +digit [0-9] +INTEGER {digit}({digit})* +comment <--.*--> +word ([[:alpha:]_/\-\.\*][[:alnum:]_/\-\.\*]*) +string \"(([^\"\n])|\"\")*\" +badstring \"(([^\"\n])|\"\")* +NL \n +ID ([[:alpha:]_][[:alnum:]_]*) +WS [[:blank:]]+ +%% +{ + .*{NL} src_line(yytext,yyleng); yyless(0); BEGIN(OTHER); + .* src_line(yytext,yyleng); yyless(0); BEGIN(OTHER); +} +{ + .*{NL} src_line(yytext,yyleng); yyless(0); yy_pop_state(); + .* src_line(yytext,yyleng); yyless(0); yy_pop_state(); +} +{ + "-->" { src_advance(yyleng); + comment(NULL,0); + yy_pop_state(); + } + "-" src_advance(yyleng); comment(yytext,yyleng); + [^-\n]+ src_advance(yyleng); comment(yytext,yyleng); + {NL} { src_advance(yyleng); + comment(yytext,yyleng); + yy_push_state(SUBINIT); + } +} +{ +"<--" src_advance(yyleng); yy_push_state(COMMENT); +"" RETURN(T_HW_START); +"\." RETURN(T_HW_STOP); +"" RETURN(T_HW_STOP); +"" RETURN(T_ALTNAME_START); +"" RETURN(T_ALTNAME_STOP); +"" RETURN(T_ALTNPLUF_START); +"" RETURN(T_ALTNPLUF_STOP); +"" RETURN(T_DEF_START); +"" RETURN(T_DEF_START); +"" RETURN(T_SN_START); +"" RETURN(T_SN_STOP); +"" RETURN(T_SD_START); +"" RETURN(T_SD_STOP); +"" RETURN(T_STYPE_START); +"" RETURN(T_STYPE_STOP); +"" RETURN(T_PLW_START); +"" RETURN(T_PLW_STOP); +"" RETURN(T_SINGW_START); +"" RETURN(T_SINGW_STOP); +"" RETURN(T_CONJF_START); +"" RETURN(T_CONJF_STOP); +"" RETURN(T_ADJF_START); +"" RETURN(T_ADJF_STOP); +"" RETURN(T_DECF_START); +"" RETURN(T_DECF_STOP); +"" RETURN(T_WF_START); +"" RETURN(T_WF_STOP); +"" RETURN(T_ASP_START); +"" RETURN(T_ASP_STOP); +"" RETURN(T_EXP_START); +"" RETURN(T_EXP_STOP); +"" RETURN(T_ROOT_START); +"" RETURN(T_ROOT_STOP); +"" RETURN(T_VINC_START); +"" RETURN(T_VINC_STOP); +"" RETURN(T_AU_START); +"" RETURN(T_AU_STOP); +"" RETURN(T_Q_START); +"" RETURN(T_Q_STOP); +"" RETURN(T_QAU_START); +"" RETURN(T_QAU_STOP); +"" RETURN(T_NOTE_START); +"[hand]" RETURN(T_NOTE_START); +" -- " RETURN(T_COL_START); +"-- " RETURN(T_COL_START); +"" RETURN(T_COL_START); +"" RETURN(T_COL_STOP); +"" RETURN(T_MCOL_START); +"" RETURN(T_MCOL_STOP); +"Syn. -- " RETURN(T_SYN_START); +"" RETURN(T_SYN_START); +"" RETURN(T_ANT_START); +"" RETURN(T_ANT_STOP); +"" RETURN(T_UEX_START); +"" RETURN(T_UEX_STOP); +"" RETURN(T_CONTR_START); +"" RETURN(T_CONTR_STOP); +"" RETURN(T_CHFORM_START); +"" RETURN(T_CHFORM_STOP); +" -- " RETURN(T_USAGE_START); +"-- " RETURN(T_USAGE_START); +"" RETURN(T_USAGE_START); +" --" RETURN(T_USAGE_START); +"" RETURN(T_PERSON_STOP); +"" RETURN(T_SPN_START); +"" RETURN(T_SPN_STOP); +"" RETURN(T_PROD_START); +"" RETURN(T_PROD_STOP); +"" RETURN(T_SUPR_START); +"" RETURN(T_SUPR_STOP); +"" RETURN(T_SUPS_START); +"" RETURN(T_SUPS_STOP); +"" RETURN(T_SUBS_START); +"" RETURN(T_SUBS_STOP); +"" RETURN(T_CREF_START); +"" RETURN(T_CREF_STOP); +"" RETURN(T_BREAK); /* line break command -- rik */ +" -*" src_advance(yyleng); /* ignore */ +"(Zo[\"o]l.)" { src_advance(yyleng); + yylval.token.string="(Zool.)"; + RETURN(T_OTHER); + } +"{" src_advance(yyleng); /* ignore */ +\ ?\}?"," { src_advance(yyleng); + yylval.token.string=","; + RETURN(T_OTHER); + } +\}?" " src_advance(yyleng); /* ignore */ +\}?" " src_advance(yyleng); /* ignore */ +"".*"" src_advance(yyleng); /*ignore*/ +\<[^->/]+\> { yylval.token.string + = str_findn(yytext+1, yyleng-2); + /* fprintf( stderr, "Found <%s>\n", yylval.token.string ); */ + RETURN(T_TAG_START); + } +\<\/[^>]+\> { yylval.token.string + = str_findn(yytext+2, yyleng-3); + /* fprintf( stderr, "Found \n", yylval.token.string ); */ + RETURN(T_TAG_STOP); + } +[^<\n]+ { yylval.token.string = str_copyn(yytext, yyleng); + RETURN(T_OTHER); + } +{NL} { yylval.token.string = " "; + BEGIN(INITIAL); + RETURN(T_OTHER); + } +. { yylval.token.string = str_findn(yytext,yyleng); + BEGIN(OTHER); + RETURN(OTHER); + } +} +<> RETURN(0); +%% +int yywrap( void ) +{ + return 1; +} + +void yyerror( const char *message ) +{ + fprintf( stderr, "\nError: %*.*s\n", yyleng, yyleng, yytext ); + src_parse_error( stderr, src_get( yyleng ), message ); + err_fatal( __FUNCTION__, "parse error\n" ); +#if !DEBUG + fprintf( stderr, "For line numbers, recompile with DEBUG set to 1\n" ); +#endif + exit( 1 ); +} + +void comment( const char *text, int length ) +{ + static int have = 0; + static int fin = 0; + time_t t; + + if (!text && !length) { + ++fin; + fmt_indent(0); + fmt_newline(1); fmt_flush_index(); fmt_newline(1); fmt_flush_index(); + return; + } + + if (fin) { +#if 0 + fprintf( stderr, " %s", str_copyn(text,length) ); +#endif + return; + } + + if (!have) { + ++have; + time(&t); + + fmt_indent(0); + fmt_newline(1); fmt_flush_index(); fmt_newline(1); fmt_flush_index(); + fmt_add_index( "00-database-url" ); + if (database == 1) fmt_add_index( "00-cide-url" ); + else fmt_add_index( "00-web1913-url" ); + fmt_string( "00-database-url" ); + fmt_newline(1); + fmt_indent(5); + fmt_string( "ftp://ftp.uga.edu/pub/misc/webster/" ); + + fmt_indent(0); + fmt_newline(1); fmt_flush_index(); fmt_newline(1); fmt_flush_index(); + fmt_string( "00-database-short" ); + fmt_newline(1); + fmt_indent(5); + fmt_add_index( "00-database-short" ); + if (database == 1) { + fmt_add_index( "00-cide-short" ); + fmt_string( "The Collaborative International Dictionary of English" ); + } else { + fmt_add_index( "00-web1913-short" ); + fmt_string( "Webster's Revised Unabridged Dictionary (1913)" ); + } + + fmt_indent(0); + fmt_newline(1); fmt_flush_index(); fmt_newline(1); fmt_flush_index(); + fmt_add_index( "00-database-long" ); + if (database == 1) fmt_add_index( "00-cide-long" ); + else fmt_add_index( "00-web1913-long" ); + fmt_string( "00-database-long" ); + fmt_newline(1); + fmt_indent(5); + if (database == 1) { + fmt_string( + "The Collaborative International Dictionary of English, derived from" + " Webster's Revised Unabridged Dictionary, 1913, C. & G. Merriam Co.," + " Springfield, Mass., under the direction of Noah Porter, D.D., LL.D.;" + " and from WordNet(R), a semantic network created by the Cognitive" + " Science Department of Princeton University under the direction of" + " Prof. George Miller. Online version prepared by MICRA," + " Inc., Plainfield, N.J. and edited by Patrick Cassidy" + " ." ); + + } else { + fmt_string( + "Webster's Revised Unabridged Dictionary (G & C. Merriam Co.," + " 1913, edited by Noah Porter). Online version prepared by MICRA," + " Inc., Plainfield, N.J. and edited by Patrick Cassidy" + " ." ); + } + fmt_newline(1); + fmt_string( "ftp://ftp.uga.edu/pub/misc/webster/" ); + fmt_newline(1); + fmt_string( + "ftp://uiarchive.cso.uiuc.edu/pub/etext/gutenberg/etext96/pgw*" ); + fmt_newline(1); + fmt_string( + "http://humanities.uchicago.edu/forms_unrest/webster.form.html" ); + + fmt_indent(0); + fmt_newline(1); fmt_flush_index(); fmt_newline(1); fmt_flush_index(); + fmt_string( "00-database-info" ); + fmt_newline(1); + fmt_indent(5); + fmt_add_index( "00-database-info" ); + if (database == 1) fmt_add_index( "00-web1913-info" ); + else fmt_add_index( "00-cide-info" ); + fmt_string("This file was converted from the original database on:" ); + fmt_indent_add(10); + fmt_newline(1); + fmt_string("%25.25s", ctime(&t) ); + fmt_newline(1); + fmt_indent_add(-10); + fmt_newline(2); + fmt_string( "The original data is available from:" ); + fmt_indent_add(10); + fmt_newline(1); + fmt_string( "ftp://ftp.uga.edu/pub/misc/webster/" ); + fmt_newline(1); + fmt_indent_add(-10); + fmt_newline(2); + fmt_string( + "The original data was distributed with the notice shown below." + " No additional restrictions are claimed. Please redistribute" + " this changed version under the same conditions and restriction" + " that apply to the original version." ); + fmt_newline(2); + fmt_string( + "===============================================================" ); + fmt_newline(2); + } + +#if 0 + fprintf( stderr, "GOT \"%s\"\n", str_copyn(text,length)); +#endif + fmt_indent(5); + fmt_literal( "%s", str_copyn(text,length) ); +} diff -Naur dict-web1913-1.4-0.47pd/orig.webfilter.l dict-web1913-1.4-0.47pd.new/orig.webfilter.l --- dict-web1913-1.4+0.47pd/orig.webfilter.l Thu Jan 1 03:00:00 1970 +++ dict-web1913-1.4-0.47pd.new/orig.webfilter.l Mon Feb 5 06:47:09 2001 @@ -0,0 +1,548 @@ +/* webfilter.l -- + * Created: Thu Oct 3 00:51:04 1996 by faith@cs.unc.edu + * Revised: Sun Feb 22 11:26:25 1998 by faith@acm.org + * Copyright 1996, 1997, 1998 Rickard E. Faith (faith@acm.org) + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 1, or (at your option) any + * later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 675 Mass Ave, Cambridge, MA 02139, USA. + * + * $Id: webfilter.l,v 1.9 1998/02/22 18:24:15 faith Exp $ + */ + + +%option stack +%{ +#include "maa.h" +#include +#include + +#define DBG_VERBOSE 0x0001 +#define DBG_DEBUG 0x0002 +#define DBG_SEARCH2 0x0004 +#define DBG_SEARCH3 0x0008 + +extern int yylex( void ); +extern int yydebug; +extern void yyerror( const char *message ); +static stk_Stack stk; +static hsh_HashTable entityHash, hexHash; +static void push(const char *text, int length); +static void pop(const char *text, int length); +static void entity(const char *text, int length); +static void hex(const char *text, int length); +static void other(const char *text, int length); +static void comment(const char *text, int length); + +typedef struct trans { + const char *name; + const char *rep; + int count; +} *trans_t; +%} +%x OTHER COMMENT SUBINIT +NL \n +WS [[:blank:]]+ +%% +{ + .*{NL} src_line(yytext,yyleng); yyless(0); BEGIN(OTHER); + .* src_line(yytext,yyleng); yyless(0); BEGIN(OTHER); +} +{ + .*{NL} src_line(yytext,yyleng); yyless(0); yy_pop_state(); + .* src_line(yytext,yyleng); yyless(0); yy_pop_state(); +} +{ + "-->" { src_advance(yyleng); + comment(yytext,yyleng); + yy_pop_state(); + } + "-" src_advance(yyleng); comment(yytext,yyleng); + [^-\n]+ src_advance(yyleng); comment(yytext,yyleng); + {NL} comment(yytext,yyleng); yy_push_state(SUBINIT); +} +{ +"<--" { src_advance(yyleng); + other(yytext,yyleng); + yy_push_state(COMMENT); + } +"

" src_advance(yyleng); /* ignore */ +"

" src_advance(yyleng); /* ignore */ +"" src_advance(yyleng); /* ignore */ +"" src_advance(yyleng); /* ignore */ +"" { src_advance(yyleng); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +"" { src_advance(yyleng); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +"" { src_advance(yyleng); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +"" { src_advance(yyleng); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +"" { src_advance(yyleng); other("{",1); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +"" { src_advance(yyleng); other("}",1); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +"" { src_advance(yyleng); other("{",1); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +"" { src_advance(yyleng); other("}",1); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +"" { src_advance(yyleng); other("{",1); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +"" { src_advance(yyleng); other("}",1); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +"" { src_advance(yyleng); other("{",1); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +"" { src_advance(yyleng); other("}",1); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +"" { src_advance(yyleng); other("{",1); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +"" { src_advance(yyleng); other("}",1); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +"" { src_advance(yyleng); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +"" { src_advance(yyleng); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +"" { src_advance(yyleng); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +"" { src_advance(yyleng); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +"" { src_advance(yyleng); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +"" { src_advance(yyleng); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +"" { src_advance(yyleng); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +"" { src_advance(yyleng); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +"" { src_advance(yyleng); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +"" { src_advance(yyleng); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +"" { src_advance(yyleng); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +"" { src_advance(yyleng); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +"" { src_advance(yyleng); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +"" { src_advance(yyleng); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +"" { src_advance(yyleng); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +"" { src_advance(yyleng); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +"" { src_advance(yyleng); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +"" { src_advance(yyleng); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +"" { src_advance(yyleng); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +"" { src_advance(yyleng); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +"" { src_advance(yyleng); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +"" { src_advance(yyleng); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +"" { src_advance(yyleng); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +"" { src_advance(yyleng); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +"" { src_advance(yyleng); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +"" { src_advance(yyleng); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +"" { src_advance(yyleng); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +"" { src_advance(yyleng); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +"" { src_advance(yyleng); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +"" { src_advance(yyleng); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +"" { src_advance(yyleng); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +"" { src_advance(yyleng); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +"" { src_advance(yyleng); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +"" { src_advance(yyleng); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +"" { src_advance(yyleng); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +"" { src_advance(yyleng); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +"" { src_advance(yyleng); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +"" { src_advance(yyleng); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +"" { src_advance(yyleng); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +"" { src_advance(yyleng); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +"" { src_advance(yyleng); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +"" { src_advance(yyleng); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +"" { src_advance(yyleng); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +"" { src_advance(yyleng); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +"" { src_advance(yyleng); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +"" { src_advance(yyleng); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +"" { src_advance(yyleng); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +"" { src_advance(yyleng); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +"" { src_advance(yyleng); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +"" { src_advance(yyleng); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +"" { src_advance(yyleng); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +"" { src_advance(yyleng); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +"" { src_advance(yyleng); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +"" { src_advance(yyleng); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +"" { src_advance(yyleng); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +"" { src_advance(yyleng); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +"" { src_advance(yyleng); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +\ ?"{"\ ? { src_advance(yyleng); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +"" { src_advance(yyleng); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +\ ?\}?""\ ? { src_advance(yyleng); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +\ ?"("\ ? { src_advance(yyleng); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +\ ?"(?)"\ ? { src_advance(yyleng); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +\ ?"(#)"\ ? { src_advance(yyleng); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +\ ?"(-n"\ ? { src_advance(yyleng); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +"
" { src_advance(yyleng); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +"&fist;" { src_advance(yyleng); + if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } +\<\/[^>]+\> src_advance(yyleng); pop(yytext+2,yyleng-3); +\<[^->/]+\> src_advance(yyleng); push(yytext+1,yyleng-2); +\<[^->/]+\/ src_advance(yyleng); entity(yytext+1,yyleng-2); +\\\'.. src_advance(yyleng); hex(yytext+2,yyleng-2); +[^{}<\\\n]+ src_advance(yyleng); other(yytext,yyleng); +{NL} other(yytext,yyleng); BEGIN(INITIAL); +\{ src_advance(yyleng); other("[",1); +\} src_advance(yyleng); other("]",1); +. src_advance(yyleng); other(yytext,yyleng); +} +<> return 0; +%% + +int yywrap( void ) +{ + return 1; +} + +void yyerror( const char *message ) +{ + src_parse_error( stderr, src_get( yyleng ), message ); + err_fatal( __FUNCTION__, "parse error\n" ); + exit( 1 ); +} + +static void entity( const char *text, int length ) +{ + const char *buf = str_findn(text,length); + char buf2[512]; + trans_t t; + + if (!(t = (trans_t)hsh_retrieve(entityHash,buf))) { + t = xmalloc(sizeof(struct trans)); + memset((char *)t,0,sizeof(struct trans)); + hsh_insert(entityHash,buf,t); + if (dbg_test(DBG_DEBUG)) { + sprintf( buf2, "WARNING: Adding %s to entity table ******",buf); + src_parse_error( stderr, src_get( yyleng ), buf2 ); + } + } + ++t->count; + + if (!dbg_test(DBG_DEBUG)) { + if (t->rep) + printf( "%s", t->rep); + else + printf( "[%s]", buf); + } +} + +static void hex( const char *text, int length ) +{ + const char *buf = str_findn(text,length); + char buf2[512]; + trans_t t; + + if (!(t = (trans_t)hsh_retrieve(hexHash,buf))) { + t = xmalloc(sizeof(struct trans)); + memset(t,0,sizeof(struct trans)); + hsh_insert(hexHash,buf,t); + if (dbg_test(DBG_DEBUG)) { + sprintf( buf2, "WARNING: Adding %s to hex table ******",buf); + src_parse_error( stderr, src_get( yyleng ), buf2 ); + } + } + ++t->count; + + if (!dbg_test(DBG_DEBUG)) { + if (t->rep) + printf( "%s", t->rep); + else + printf( "[%s]", buf); + } +} + +static void other( const char *text, int length ) +{ + char *buf = alloca(length + 1); + char *d; + const char *s; + int i; + char p = 0; + + if (!dbg_test(DBG_DEBUG)) { + for (s = text, d = buf, i = 0; *s && i < length; i++, s++) { + if (i < length-1 && *s == ' ' + && (s[1] == ',' || s[1] == ';' || s[1] == ' ')) + continue; + *d++ = *s; + } + *d = '\0'; + printf( "%s", buf); + } +} + +static void comment( const char *text, int length ) +{ + char *buf = alloca(length + 1); + char *d; + const char *s; + int i; + char p = 0; + + if (!dbg_test(DBG_DEBUG)) { + for (s = text, d = buf, i = 0; *s && i < length; i++, s++) { + if (i < length-1 && *s == ' ' && (s[1] == ',' || s[1] == ';')) + continue; + *d++ = *s; + } + *d = '\0'; + printf( "%s", buf); + } +} + +static void push( const char *text, int length ) +{ + const char *name; + + if (!dbg_test(DBG_DEBUG)) { + printf( "%*.*s",yyleng,yyleng,yytext); + } else { + name = str_findn(text,length); + stk_push(stk,(void *)name); + } +} + +static void pop( const char *text, int length ) +{ + const char *name = str_findn(text,length); + char *want; + char *want2; + char *want3; + char buf[256]; + + + if (!dbg_test(DBG_DEBUG)) { + printf( "%*.*s",yyleng,yyleng,yytext); + } else { + if (!(want = stk_pop(stk))) { + src_parse_error( stderr, src_get(yyleng), "ERROR: Stack underflow ******" ); + return; + } + + if (dbg_test(DBG_SEARCH2)) { + if (name != want) { + want2 = stk_pop(stk); + if (name != want2) { + stk_push(stk,want2); + sprintf( buf, "ERROR: Expected , but found ******", + want, name ); + src_parse_error( stderr, src_get( yyleng ), buf ); + } else { + stk_push(stk,want); + } + } + } else if (dbg_test(DBG_SEARCH3)) { + if (name != want) { + want2 = stk_pop(stk); + if (name != want2) { + want3 = stk_pop(stk); + if (name != want3) { + stk_push(stk,want3); + stk_push(stk,want2); + sprintf( buf, "ERROR: Expected , but found ******", + want, name ); + src_parse_error( stderr, src_get( yyleng ), buf ); + } else { + stk_push(stk,want2); + stk_push(stk,want); + } + } else { + stk_push(stk,want); + } + } + } else if (name != want) { + sprintf( buf, "ERROR: Expected , but found ******", + want, name ); + src_parse_error( stderr, src_get( yyleng ), buf ); + } + } +} + +static int printer( const void *name, const void *datum ) +{ + const char *s = (const char *)name; + trans_t t = (trans_t)datum; + + if (t->rep) + fprintf( stderr, "%10d %s => \"%s\"\n", t->count, s, t->rep ); + else + fprintf( stderr, "%10d %s => *** NO TRANSLATION ***\n", t->count, s ); + + return 0; +} + +int main( int argc, char **argv ) +{ + char *pt; + FILE *str; + char buf[4096]; + arg_List a; + int c; + char **v; + trans_t t; + int i; + int debug = 0; + const char *filterFile = "filter.dat"; + + maa_init(argv[0]); + stk = stk_create(); + entityHash = hsh_create(NULL,NULL); + hexHash = hsh_create(NULL,NULL); + + dbg_register( DBG_VERBOSE, "verbose" ); + dbg_register( DBG_DEBUG, "debug" ); + dbg_register( DBG_SEARCH2, "search2" ); + dbg_register( DBG_SEARCH3, "search3" ); + + while ((c = getopt( argc, argv, "vd:Df:" )) != EOF) + switch(c) { + case 'v': dbg_set( "verbose" ); break; + case 'd': dbg_set( optarg ); break; + case 'D': dbg_set( "debug" ); break; + case 'f': filterFile = optarg; break; + } + + if ((str = fopen(filterFile,"r"))) { + if (dbg_test(DBG_VERBOSE)) + fprintf( stderr, "Reading data from %s\n", filterFile ); + while (fgets(buf,4096,str)) { + buf[strlen(buf)-1] = '\0'; + if ((pt = strchr(buf,'#'))) *pt = '\0'; + if (buf[0] == '\0') continue; + + a = arg_argify(buf, ARG_NO_ESCAPE|ARG_NO_QUOTE); + arg_get_vector( a, &c, &v ); + + if (c == 2) { + if (dbg_test(DBG_VERBOSE)) + fprintf( stderr, "\"%s\" \"%s\" %d\n", v[0],v[1], c); + t = xmalloc(sizeof(struct trans)); + memset((char *)t,0,sizeof(struct trans)); + t->name = str_find(v[0]); + t->rep = str_find(v[1]); + hsh_insert(entityHash,t->name,t); + } else if (c == 3) { + if (dbg_test(DBG_VERBOSE)) + fprintf( stderr, "\"%s\" \"%s\" \"%s\" %d\n", + v[0],v[1], v[2], c); + t = xmalloc(sizeof(struct trans)); + memset((char *)t,0,sizeof(struct trans)); + t->name = str_find(v[0]); + t->rep = str_find(v[2]); + hsh_insert(hexHash,t->name,t); + + t = xmalloc(sizeof(struct trans)); + memset((char *)t,0,sizeof(struct trans)); + t->name = str_find(v[1]); + t->rep = str_find(v[2]); + hsh_insert(entityHash,t->name,t); + } else if (c) { + if (dbg_test(DBG_VERBOSE)) + fprintf( stderr, "\"%s\" %d\n", v[0], c); + t = xmalloc(sizeof(struct trans)); + memset((char *)t,0,sizeof(struct trans)); + t->name = str_find(v[0]); + t->rep = NULL; + hsh_insert(entityHash,t->name,t); + } + arg_destroy(a); + } + fclose(str); + } + + if (argc-optind >= 1) { + for (i = optind; i < argc; i++) { + if (dbg_test(DBG_VERBOSE)) + fprintf( stderr, "Opening %s\n",argv[i]); + if (!(str = fopen( argv[i], "r" ))) { + err_fatal_errno( __FUNCTION__, + "Cannot open \"%s\" for read\n", argv[1]); + } + if ((pt = strrchr(argv[i],'/'))) src_new_file(pt+1); + else src_new_file(argv[i]); + + yyrestart(str); + yylex(); + fclose(str); + } + } else if (argc-optind == 0) { + src_new_file("[stdin]"); + yyrestart(stdin); + yylex(); + } + + if (dbg_test(DBG_VERBOSE)) { + fflush(stdout); + fprintf( stderr, "Entity table:\n" ); + hsh_iterate( entityHash, printer ); + fprintf( stderr, "Hex table:\n" ); + hsh_iterate( hexHash, printer ); + } + return 0; +} diff -Naur dict-web1913-1.4-0.47pd/scan.l dict-web1913-1.4-0.47pd.new/scan.l --- dict-web1913-1.4+0.47pd/scan.l Sun Feb 22 22:31:45 1998 +++ dict-web1913-1.4-0.47pd.new/scan.l Mon Feb 5 06:47:08 2001 @@ -90,6 +90,7 @@ "" RETURN(T_ALTNPLUF_STOP); "" RETURN(T_DEF_START); "" RETURN(T_DEF_START); +"" RETURN(T_DEF_START); "" RETURN(T_SN_START); "" RETURN(T_SN_STOP); "" RETURN(T_SD_START); diff -Naur dict-web1913-1.4-0.47pd/webfilter.l dict-web1913-1.4-0.47pd.new/webfilter.l --- dict-web1913-1.4+0.47pd/webfilter.l Thu Jun 10 05:40:06 1999 +++ dict-web1913-1.4-0.47pd.new/webfilter.l Mon Feb 5 06:47:08 2001 @@ -140,8 +140,6 @@ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } "" { src_advance(yyleng); if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } -"" { src_advance(yyleng); - if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } "" { src_advance(yyleng); if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); } "" { src_advance(yyleng); @@ -246,7 +244,7 @@ \<[^->/]+\> src_advance(yyleng); push(yytext+1,yyleng-2); \<[^->/]+\/ src_advance(yyleng); entity(yytext+1,yyleng-2); \\\'.. src_advance(yyleng); hex(yytext+2,yyleng-2); -[^{}<\\\n]+ src_advance(yyleng); other(yytext,yyleng); +[^&{}<\\\n]+ src_advance(yyleng); other(yytext,yyleng); {NL} other(yytext,yyleng); BEGIN(INITIAL); \{ src_advance(yyleng); other("[",1); \} src_advance(yyleng); other("]",1);