Репозитории ALT
S: | 1.4_0.47pd-alt3 |
5.1: | 1.4_0.47pd-alt1 |
4.1: | 1.4_0.47pd-alt1 |
4.0: | 1.4_0.47pd-ipl6.1 |
3.0: | 1.4_0.47pd-ipl6.1 |
Группа :: Работа с текстами
Пакет: dict-web1913
Главная Изменения Спек Патчи Sources Загрузить Gear Bugs and FR Repocop
Патч: debian-webster.patch
Скачать
Скачать
diff -Naur dict-web1913-1.4-0.47pd/Makefile.in dict-web1913-1.4-0.47pd.new/Makefile.in
--- dict-web1913-1.4+0.47pd/Makefile.in Mon Jun 14 00:10:37 1999
+++ dict-web1913-1.4-0.47pd.new/Makefile.in Mon Feb 5 06:47:08 2001
@@ -98,18 +98,8 @@
# cide: $(CIDEBASE)
$(DATABASE) $(DATAINDEX): $(EXES)
-# ./webfilter $(DATAFILES) > $(TMPPATH)/$(TMPFILE)
-# Here is a fix for a bug in the Carbonyl definition in the public
-# domain web1913_0.47-pd source. In it (unlike the restrictive
-# license source), <sub><\sub> tags are placed around every number in
-# chemical formulas (<chform>). In the definition of Carbonyl, this
-# is done to the 7 in the hex number \'b7 (representing a positive
-# charge), turning it into \'b<sub>7<\sub> which confuses webfilter.
- sed '/^<p><hw>Car"bon\*yl</s/<\/\{0,1\}sub>//g' $(DATAFILES) \
- |sed 's/&fist;//g'\
- | ./webfilter > $(TMPPATH)/$(TMPFILE)
- ./webfmt < $(TMPPATH)/$(TMPFILE)
- -rm -f $(TMPPATH)/$(TMPFILE)
+ sed -f sedfile $(DATAFILES) \
+ | ./webfilter|./webfmt
if [ "$(DICTZIP)" != "cat" ]; then \
dictzip -v $(DATABASE); \
fi
diff -Naur dict-web1913-1.4-0.47pd/orig.scan.l dict-web1913-1.4-0.47pd.new/orig.scan.l
--- dict-web1913-1.4+0.47pd/orig.scan.l Thu Jan 1 03:00:00 1970
+++ dict-web1913-1.4-0.47pd.new/orig.scan.l Mon Feb 5 06:47:08 2001
@@ -0,0 +1,336 @@
+/* scan.l -- Scanner for Project Gutenberg Webster converter
+ * Created: Sun Mar 16 09:26:43 1997 by faith@cs.unc.edu
+ * Revised: Sun Feb 22 13:07:39 1998 by faith@acm.org
+ * Copyright 1997, 1998 Rickard E. Faith (faith@acm.org)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 1, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * $Id: scan.l,v 1.13 1998/02/22 18:24:15 faith Exp $
+ *
+ * HW-like: spn, plw
+ */
+
+%option stack debug
+%{
+#include "webfmt.h"
+#include "parse.h"
+#include <string.h>
+
+#define DEBUG 0
+
+#if !DEBUG
+#define src_line(t,l)
+#define src_get(x) NULL
+#define src_advance(l)
+#endif
+
+extern int yylex( void );
+extern int yydebug;
+extern void yyerror( const char *message );
+extern int database;
+static int match(const char *buf, int length);
+static void comment( const char *text, int length );
+#define RETURN(val) do { \
+ yylval.token.src = src_get( yyleng ); \
+ return val; \
+} while (0)
+%}
+%x OTHER COMMENT SUBINIT
+letter [a-zA-Z]
+digit [0-9]
+INTEGER {digit}({digit})*
+comment <--.*-->
+word ([[:alpha:]_/\-\.\*][[:alnum:]_/\-\.\*]*)
+string \"(([^\"\n])|\"\")*\"
+badstring \"(([^\"\n])|\"\")*
+NL \n
+ID ([[:alpha:]_][[:alnum:]_]*)
+WS [[:blank:]]+
+%%
+<INITIAL>{
+ .*{NL} src_line(yytext,yyleng); yyless(0); BEGIN(OTHER);
+ .* src_line(yytext,yyleng); yyless(0); BEGIN(OTHER);
+}
+<SUBINIT>{
+ .*{NL} src_line(yytext,yyleng); yyless(0); yy_pop_state();
+ .* src_line(yytext,yyleng); yyless(0); yy_pop_state();
+}
+<COMMENT>{
+ "-->" { src_advance(yyleng);
+ comment(NULL,0);
+ yy_pop_state();
+ }
+ "-" src_advance(yyleng); comment(yytext,yyleng);
+ [^-\n]+ src_advance(yyleng); comment(yytext,yyleng);
+ {NL} { src_advance(yyleng);
+ comment(yytext,yyleng);
+ yy_push_state(SUBINIT);
+ }
+}
+<OTHER>{
+"<--" src_advance(yyleng); yy_push_state(COMMENT);
+"<hw>" RETURN(T_HW_START);
+"</hw>\." RETURN(T_HW_STOP);
+"</hw>" RETURN(T_HW_STOP);
+"<altname>" RETURN(T_ALTNAME_START);
+"</altname>" RETURN(T_ALTNAME_STOP);
+"<altnpluf>" RETURN(T_ALTNPLUF_START);
+"</altnpluf>" RETURN(T_ALTNPLUF_STOP);
+"<def>" RETURN(T_DEF_START);
+"<def2>" RETURN(T_DEF_START);
+"<sn>" RETURN(T_SN_START);
+"</sn>" RETURN(T_SN_STOP);
+"<sd>" RETURN(T_SD_START);
+"</sd>" RETURN(T_SD_STOP);
+"<stype>" RETURN(T_STYPE_START);
+"</stype>" RETURN(T_STYPE_STOP);
+"<plw>" RETURN(T_PLW_START);
+"</plw>" RETURN(T_PLW_STOP);
+"<singw>" RETURN(T_SINGW_START);
+"</singw>" RETURN(T_SINGW_STOP);
+"<conjf>" RETURN(T_CONJF_START);
+"</conjf>" RETURN(T_CONJF_STOP);
+"<adjf>" RETURN(T_ADJF_START);
+"</adjf>" RETURN(T_ADJF_STOP);
+"<decf>" RETURN(T_DECF_START);
+"</decf>" RETURN(T_DECF_STOP);
+"<wf>" RETURN(T_WF_START);
+"</wf>" RETURN(T_WF_STOP);
+"<asp>" RETURN(T_ASP_START);
+"</asp>" RETURN(T_ASP_STOP);
+"<exp>" RETURN(T_EXP_START);
+"</exp>" RETURN(T_EXP_STOP);
+"<root>" RETURN(T_ROOT_START);
+"</root>" RETURN(T_ROOT_STOP);
+"<vinc>" RETURN(T_VINC_START);
+"</vinc>" RETURN(T_VINC_STOP);
+"<au>" RETURN(T_AU_START);
+"</au>" RETURN(T_AU_STOP);
+"<q>" RETURN(T_Q_START);
+"</q>" RETURN(T_Q_STOP);
+"<qau>" RETURN(T_QAU_START);
+"</qau>" RETURN(T_QAU_STOP);
+"<note>" RETURN(T_NOTE_START);
+"<note>[hand]" RETURN(T_NOTE_START);
+" -- <col>" RETURN(T_COL_START);
+"-- <col>" RETURN(T_COL_START);
+"<col>" RETURN(T_COL_START);
+"</col>" RETURN(T_COL_STOP);
+"<mcol>" RETURN(T_MCOL_START);
+"</mcol>" RETURN(T_MCOL_STOP);
+"<syn>Syn. -- " RETURN(T_SYN_START);
+"<syn>" RETURN(T_SYN_START);
+"<ant>" RETURN(T_ANT_START);
+"</ant>" RETURN(T_ANT_STOP);
+"<uex>" RETURN(T_UEX_START);
+"</uex>" RETURN(T_UEX_STOP);
+"<contr>" RETURN(T_CONTR_START);
+"</contr>" RETURN(T_CONTR_STOP);
+"<chform>" RETURN(T_CHFORM_START);
+"</chform>" RETURN(T_CHFORM_STOP);
+" -- <usage>" RETURN(T_USAGE_START);
+"-- <usage>" RETURN(T_USAGE_START);
+"<usage>" RETURN(T_USAGE_START);
+"<usage> --" RETURN(T_USAGE_START);
+"<person" RETURN(T_PERSON_START);
+"</person>" RETURN(T_PERSON_STOP);
+"<spn>" RETURN(T_SPN_START);
+"</spn>" RETURN(T_SPN_STOP);
+"<prod>" RETURN(T_PROD_START);
+"</prod>" RETURN(T_PROD_STOP);
+"<supr>" RETURN(T_SUPR_START);
+"</supr>" RETURN(T_SUPR_STOP);
+"<sups>" RETURN(T_SUPS_START);
+"</sups>" RETURN(T_SUPS_STOP);
+"<subs>" RETURN(T_SUBS_START);
+"</subs>" RETURN(T_SUBS_STOP);
+"<cref>" RETURN(T_CREF_START);
+"</cref>" RETURN(T_CREF_STOP);
+"<break>" RETURN(T_BREAK); /* line break command -- rik */
+"</cd> -*" src_advance(yyleng); /* ignore */
+"<fld>(Zo[\"o]l.)</fld>" { src_advance(yyleng);
+ yylval.token.string="(Zool.)";
+ RETURN(T_OTHER);
+ }
+"<mhw>{" src_advance(yyleng); /* ignore */
+\ ?\}?"</mhw>," { src_advance(yyleng);
+ yylval.token.string=",";
+ RETURN(T_OTHER);
+ }
+\}?"</mhw> " src_advance(yyleng); /* ignore */
+\}?"</mhw> <pos>" src_advance(yyleng); /* ignore */
+"<centered><point26>".*"</point26></centered>" src_advance(yyleng); /*ignore*/
+\<[^->/]+\> { yylval.token.string
+ = str_findn(yytext+1, yyleng-2);
+ /* fprintf( stderr, "Found <%s>\n", yylval.token.string ); */
+ RETURN(T_TAG_START);
+ }
+\<\/[^>]+\> { yylval.token.string
+ = str_findn(yytext+2, yyleng-3);
+ /* fprintf( stderr, "Found </%s>\n", yylval.token.string ); */
+ RETURN(T_TAG_STOP);
+ }
+[^<\n]+ { yylval.token.string = str_copyn(yytext, yyleng);
+ RETURN(T_OTHER);
+ }
+{NL} { yylval.token.string = " ";
+ BEGIN(INITIAL);
+ RETURN(T_OTHER);
+ }
+. { yylval.token.string = str_findn(yytext,yyleng);
+ BEGIN(OTHER);
+ RETURN(OTHER);
+ }
+}
+<<EOF>> RETURN(0);
+%%
+int yywrap( void )
+{
+ return 1;
+}
+
+void yyerror( const char *message )
+{
+ fprintf( stderr, "\nError: %*.*s\n", yyleng, yyleng, yytext );
+ src_parse_error( stderr, src_get( yyleng ), message );
+ err_fatal( __FUNCTION__, "parse error\n" );
+#if !DEBUG
+ fprintf( stderr, "For line numbers, recompile with DEBUG set to 1\n" );
+#endif
+ exit( 1 );
+}
+
+void comment( const char *text, int length )
+{
+ static int have = 0;
+ static int fin = 0;
+ time_t t;
+
+ if (!text && !length) {
+ ++fin;
+ fmt_indent(0);
+ fmt_newline(1); fmt_flush_index(); fmt_newline(1); fmt_flush_index();
+ return;
+ }
+
+ if (fin) {
+#if 0
+ fprintf( stderr, " %s", str_copyn(text,length) );
+#endif
+ return;
+ }
+
+ if (!have) {
+ ++have;
+ time(&t);
+
+ fmt_indent(0);
+ fmt_newline(1); fmt_flush_index(); fmt_newline(1); fmt_flush_index();
+ fmt_add_index( "00-database-url" );
+ if (database == 1) fmt_add_index( "00-cide-url" );
+ else fmt_add_index( "00-web1913-url" );
+ fmt_string( "00-database-url" );
+ fmt_newline(1);
+ fmt_indent(5);
+ fmt_string( "ftp://ftp.uga.edu/pub/misc/webster/" );
+
+ fmt_indent(0);
+ fmt_newline(1); fmt_flush_index(); fmt_newline(1); fmt_flush_index();
+ fmt_string( "00-database-short" );
+ fmt_newline(1);
+ fmt_indent(5);
+ fmt_add_index( "00-database-short" );
+ if (database == 1) {
+ fmt_add_index( "00-cide-short" );
+ fmt_string( "The Collaborative International Dictionary of English" );
+ } else {
+ fmt_add_index( "00-web1913-short" );
+ fmt_string( "Webster's Revised Unabridged Dictionary (1913)" );
+ }
+
+ fmt_indent(0);
+ fmt_newline(1); fmt_flush_index(); fmt_newline(1); fmt_flush_index();
+ fmt_add_index( "00-database-long" );
+ if (database == 1) fmt_add_index( "00-cide-long" );
+ else fmt_add_index( "00-web1913-long" );
+ fmt_string( "00-database-long" );
+ fmt_newline(1);
+ fmt_indent(5);
+ if (database == 1) {
+ fmt_string(
+ "The Collaborative International Dictionary of English, derived from"
+ " Webster's Revised Unabridged Dictionary, 1913, C. & G. Merriam Co.,"
+ " Springfield, Mass., under the direction of Noah Porter, D.D., LL.D.;"
+ " and from WordNet(R), a semantic network created by the Cognitive"
+ " Science Department of Princeton University under the direction of"
+ " Prof. George Miller. Online version prepared by MICRA,"
+ " Inc., Plainfield, N.J. and edited by Patrick Cassidy"
+ " <cassidy@micra.com>." );
+
+ } else {
+ fmt_string(
+ "Webster's Revised Unabridged Dictionary (G & C. Merriam Co.,"
+ " 1913, edited by Noah Porter). Online version prepared by MICRA,"
+ " Inc., Plainfield, N.J. and edited by Patrick Cassidy"
+ " <cassidy@micra.com>." );
+ }
+ fmt_newline(1);
+ fmt_string( "ftp://ftp.uga.edu/pub/misc/webster/" );
+ fmt_newline(1);
+ fmt_string(
+ "ftp://uiarchive.cso.uiuc.edu/pub/etext/gutenberg/etext96/pgw*" );
+ fmt_newline(1);
+ fmt_string(
+ "http://humanities.uchicago.edu/forms_unrest/webster.form.html" );
+
+ fmt_indent(0);
+ fmt_newline(1); fmt_flush_index(); fmt_newline(1); fmt_flush_index();
+ fmt_string( "00-database-info" );
+ fmt_newline(1);
+ fmt_indent(5);
+ fmt_add_index( "00-database-info" );
+ if (database == 1) fmt_add_index( "00-web1913-info" );
+ else fmt_add_index( "00-cide-info" );
+ fmt_string("This file was converted from the original database on:" );
+ fmt_indent_add(10);
+ fmt_newline(1);
+ fmt_string("%25.25s", ctime(&t) );
+ fmt_newline(1);
+ fmt_indent_add(-10);
+ fmt_newline(2);
+ fmt_string( "The original data is available from:" );
+ fmt_indent_add(10);
+ fmt_newline(1);
+ fmt_string( "ftp://ftp.uga.edu/pub/misc/webster/" );
+ fmt_newline(1);
+ fmt_indent_add(-10);
+ fmt_newline(2);
+ fmt_string(
+ "The original data was distributed with the notice shown below."
+ " No additional restrictions are claimed. Please redistribute"
+ " this changed version under the same conditions and restriction"
+ " that apply to the original version." );
+ fmt_newline(2);
+ fmt_string(
+ "===============================================================" );
+ fmt_newline(2);
+ }
+
+#if 0
+ fprintf( stderr, "GOT \"%s\"\n", str_copyn(text,length));
+#endif
+ fmt_indent(5);
+ fmt_literal( "%s", str_copyn(text,length) );
+}
diff -Naur dict-web1913-1.4-0.47pd/orig.webfilter.l dict-web1913-1.4-0.47pd.new/orig.webfilter.l
--- dict-web1913-1.4+0.47pd/orig.webfilter.l Thu Jan 1 03:00:00 1970
+++ dict-web1913-1.4-0.47pd.new/orig.webfilter.l Mon Feb 5 06:47:09 2001
@@ -0,0 +1,548 @@
+/* webfilter.l --
+ * Created: Thu Oct 3 00:51:04 1996 by faith@cs.unc.edu
+ * Revised: Sun Feb 22 11:26:25 1998 by faith@acm.org
+ * Copyright 1996, 1997, 1998 Rickard E. Faith (faith@acm.org)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 1, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * $Id: webfilter.l,v 1.9 1998/02/22 18:24:15 faith Exp $
+ */
+
+
+%option stack
+%{
+#include "maa.h"
+#include <string.h>
+#include <getopt.h>
+
+#define DBG_VERBOSE 0x0001
+#define DBG_DEBUG 0x0002
+#define DBG_SEARCH2 0x0004
+#define DBG_SEARCH3 0x0008
+
+extern int yylex( void );
+extern int yydebug;
+extern void yyerror( const char *message );
+static stk_Stack stk;
+static hsh_HashTable entityHash, hexHash;
+static void push(const char *text, int length);
+static void pop(const char *text, int length);
+static void entity(const char *text, int length);
+static void hex(const char *text, int length);
+static void other(const char *text, int length);
+static void comment(const char *text, int length);
+
+typedef struct trans {
+ const char *name;
+ const char *rep;
+ int count;
+} *trans_t;
+%}
+%x OTHER COMMENT SUBINIT
+NL \n
+WS [[:blank:]]+
+%%
+<INITIAL>{
+ .*{NL} src_line(yytext,yyleng); yyless(0); BEGIN(OTHER);
+ .* src_line(yytext,yyleng); yyless(0); BEGIN(OTHER);
+}
+<SUBINIT>{
+ .*{NL} src_line(yytext,yyleng); yyless(0); yy_pop_state();
+ .* src_line(yytext,yyleng); yyless(0); yy_pop_state();
+}
+<COMMENT>{
+ "-->" { src_advance(yyleng);
+ comment(yytext,yyleng);
+ yy_pop_state();
+ }
+ "-" src_advance(yyleng); comment(yytext,yyleng);
+ [^-\n]+ src_advance(yyleng); comment(yytext,yyleng);
+ {NL} comment(yytext,yyleng); yy_push_state(SUBINIT);
+}
+<OTHER>{
+"<--" { src_advance(yyleng);
+ other(yytext,yyleng);
+ yy_push_state(COMMENT);
+ }
+"<p>" src_advance(yyleng); /* ignore */
+"</p>" src_advance(yyleng); /* ignore */
+"<plain>" src_advance(yyleng); /* ignore */
+"</plain>" src_advance(yyleng); /* ignore */
+"<xex>" { src_advance(yyleng);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+"</xex>" { src_advance(yyleng);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+"<mark>" { src_advance(yyleng);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+"</mark>" { src_advance(yyleng);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+"<er>" { src_advance(yyleng); other("{",1);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+"</er>" { src_advance(yyleng); other("}",1);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+"<gen>" { src_advance(yyleng); other("{",1);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+"</gen>" { src_advance(yyleng); other("}",1);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+"<fam>" { src_advance(yyleng); other("{",1);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+"</fam>" { src_advance(yyleng); other("}",1);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+"<ord>" { src_advance(yyleng); other("{",1);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+"</ord>" { src_advance(yyleng); other("}",1);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+"<suborder>" { src_advance(yyleng); other("{",1);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+"</suborder>" { src_advance(yyleng); other("}",1);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+"<it>" { src_advance(yyleng);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+"</it>" { src_advance(yyleng);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+"<i>" { src_advance(yyleng);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+"</i>" { src_advance(yyleng);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+"<sub>" { src_advance(yyleng);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+"</sub>" { src_advance(yyleng);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+"<b>" { src_advance(yyleng);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+"</b>" { src_advance(yyleng);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+"<rj>" { src_advance(yyleng);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+"</rj>" { src_advance(yyleng);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+"<colf>" { src_advance(yyleng);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+"</colf>" { src_advance(yyleng);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+"<abbr>" { src_advance(yyleng);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+"</abbr>" { src_advance(yyleng);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+"<plu>" { src_advance(yyleng);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+"</plu>" { src_advance(yyleng);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+"<cd>" { src_advance(yyleng);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+"</cd>" { src_advance(yyleng);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+"<ex>" { src_advance(yyleng);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+"</ex>" { src_advance(yyleng);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+"<ety>" { src_advance(yyleng);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+"</ety>" { src_advance(yyleng);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+"<ets>" { src_advance(yyleng);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+"</ets>" { src_advance(yyleng);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+"<pos>" { src_advance(yyleng);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+"</pos>" { src_advance(yyleng);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+"<as>" { src_advance(yyleng);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+"</as>" { src_advance(yyleng);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+"<grk>" { src_advance(yyleng);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+"</grk>" { src_advance(yyleng);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+"<fld>" { src_advance(yyleng);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+"</fld>" { src_advance(yyleng);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+"<qex>" { src_advance(yyleng);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+"</qex>" { src_advance(yyleng);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+"<vmorph>" { src_advance(yyleng);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+"</vmorph>" { src_advance(yyleng);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+"<pr>" { src_advance(yyleng);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+"</pr>" { src_advance(yyleng);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+"<member>" { src_advance(yyleng);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+"</member>" { src_advance(yyleng);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+"<cs>" { src_advance(yyleng);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+"</cs>" { src_advance(yyleng);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+"<altsp>" { src_advance(yyleng);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+"</altsp>" { src_advance(yyleng);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+"<pluf>" { src_advance(yyleng);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+"</pluf>" { src_advance(yyleng);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+"<specif>" { src_advance(yyleng);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+"</specif>" { src_advance(yyleng);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+"<wordforms>" { src_advance(yyleng);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+"</wordforms>" { src_advance(yyleng);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+"<amorph>" { src_advance(yyleng);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+"</amorph>" { src_advance(yyleng);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+"</note>" { src_advance(yyleng);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+"</def>" { src_advance(yyleng);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+"</def2>" { src_advance(yyleng);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+"</usage>" { src_advance(yyleng);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+"</syn>" { src_advance(yyleng);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+\ ?"<mhw>{"\ ? { src_advance(yyleng);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+"<mhw>" { src_advance(yyleng);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+\ ?\}?"</mhw>"\ ? { src_advance(yyleng);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+\ ?"<pr>(<?/)</pr>"\ ? { src_advance(yyleng);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+\ ?"<pr>(?)</pr>"\ ? { src_advance(yyleng);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+\ ?"<pr>(#)</pr>"\ ? { src_advance(yyleng);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+\ ?"<pr>(-n<?/)</pr>"\ ? { src_advance(yyleng);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+"<br/" { src_advance(yyleng);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+"<break>" { src_advance(yyleng);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+"&fist;" { src_advance(yyleng);
+ if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
+\<\/[^>]+\> src_advance(yyleng); pop(yytext+2,yyleng-3);
+\<[^->/]+\> src_advance(yyleng); push(yytext+1,yyleng-2);
+\<[^->/]+\/ src_advance(yyleng); entity(yytext+1,yyleng-2);
+\\\'.. src_advance(yyleng); hex(yytext+2,yyleng-2);
+[^{}<\\\n]+ src_advance(yyleng); other(yytext,yyleng);
+{NL} other(yytext,yyleng); BEGIN(INITIAL);
+\{ src_advance(yyleng); other("[",1);
+\} src_advance(yyleng); other("]",1);
+. src_advance(yyleng); other(yytext,yyleng);
+}
+<<EOF>> return 0;
+%%
+
+int yywrap( void )
+{
+ return 1;
+}
+
+void yyerror( const char *message )
+{
+ src_parse_error( stderr, src_get( yyleng ), message );
+ err_fatal( __FUNCTION__, "parse error\n" );
+ exit( 1 );
+}
+
+static void entity( const char *text, int length )
+{
+ const char *buf = str_findn(text,length);
+ char buf2[512];
+ trans_t t;
+
+ if (!(t = (trans_t)hsh_retrieve(entityHash,buf))) {
+ t = xmalloc(sizeof(struct trans));
+ memset((char *)t,0,sizeof(struct trans));
+ hsh_insert(entityHash,buf,t);
+ if (dbg_test(DBG_DEBUG)) {
+ sprintf( buf2, "WARNING: Adding %s to entity table ******",buf);
+ src_parse_error( stderr, src_get( yyleng ), buf2 );
+ }
+ }
+ ++t->count;
+
+ if (!dbg_test(DBG_DEBUG)) {
+ if (t->rep)
+ printf( "%s", t->rep);
+ else
+ printf( "[%s]", buf);
+ }
+}
+
+static void hex( const char *text, int length )
+{
+ const char *buf = str_findn(text,length);
+ char buf2[512];
+ trans_t t;
+
+ if (!(t = (trans_t)hsh_retrieve(hexHash,buf))) {
+ t = xmalloc(sizeof(struct trans));
+ memset(t,0,sizeof(struct trans));
+ hsh_insert(hexHash,buf,t);
+ if (dbg_test(DBG_DEBUG)) {
+ sprintf( buf2, "WARNING: Adding %s to hex table ******",buf);
+ src_parse_error( stderr, src_get( yyleng ), buf2 );
+ }
+ }
+ ++t->count;
+
+ if (!dbg_test(DBG_DEBUG)) {
+ if (t->rep)
+ printf( "%s", t->rep);
+ else
+ printf( "[%s]", buf);
+ }
+}
+
+static void other( const char *text, int length )
+{
+ char *buf = alloca(length + 1);
+ char *d;
+ const char *s;
+ int i;
+ char p = 0;
+
+ if (!dbg_test(DBG_DEBUG)) {
+ for (s = text, d = buf, i = 0; *s && i < length; i++, s++) {
+ if (i < length-1 && *s == ' '
+ && (s[1] == ',' || s[1] == ';' || s[1] == ' '))
+ continue;
+ *d++ = *s;
+ }
+ *d = '\0';
+ printf( "%s", buf);
+ }
+}
+
+static void comment( const char *text, int length )
+{
+ char *buf = alloca(length + 1);
+ char *d;
+ const char *s;
+ int i;
+ char p = 0;
+
+ if (!dbg_test(DBG_DEBUG)) {
+ for (s = text, d = buf, i = 0; *s && i < length; i++, s++) {
+ if (i < length-1 && *s == ' ' && (s[1] == ',' || s[1] == ';'))
+ continue;
+ *d++ = *s;
+ }
+ *d = '\0';
+ printf( "%s", buf);
+ }
+}
+
+static void push( const char *text, int length )
+{
+ const char *name;
+
+ if (!dbg_test(DBG_DEBUG)) {
+ printf( "%*.*s",yyleng,yyleng,yytext);
+ } else {
+ name = str_findn(text,length);
+ stk_push(stk,(void *)name);
+ }
+}
+
+static void pop( const char *text, int length )
+{
+ const char *name = str_findn(text,length);
+ char *want;
+ char *want2;
+ char *want3;
+ char buf[256];
+
+
+ if (!dbg_test(DBG_DEBUG)) {
+ printf( "%*.*s",yyleng,yyleng,yytext);
+ } else {
+ if (!(want = stk_pop(stk))) {
+ src_parse_error( stderr, src_get(yyleng), "ERROR: Stack underflow ******" );
+ return;
+ }
+
+ if (dbg_test(DBG_SEARCH2)) {
+ if (name != want) {
+ want2 = stk_pop(stk);
+ if (name != want2) {
+ stk_push(stk,want2);
+ sprintf( buf, "ERROR: Expected </%s>, but found </%s> ******",
+ want, name );
+ src_parse_error( stderr, src_get( yyleng ), buf );
+ } else {
+ stk_push(stk,want);
+ }
+ }
+ } else if (dbg_test(DBG_SEARCH3)) {
+ if (name != want) {
+ want2 = stk_pop(stk);
+ if (name != want2) {
+ want3 = stk_pop(stk);
+ if (name != want3) {
+ stk_push(stk,want3);
+ stk_push(stk,want2);
+ sprintf( buf, "ERROR: Expected </%s>, but found </%s> ******",
+ want, name );
+ src_parse_error( stderr, src_get( yyleng ), buf );
+ } else {
+ stk_push(stk,want2);
+ stk_push(stk,want);
+ }
+ } else {
+ stk_push(stk,want);
+ }
+ }
+ } else if (name != want) {
+ sprintf( buf, "ERROR: Expected </%s>, but found </%s> ******",
+ want, name );
+ src_parse_error( stderr, src_get( yyleng ), buf );
+ }
+ }
+}
+
+static int printer( const void *name, const void *datum )
+{
+ const char *s = (const char *)name;
+ trans_t t = (trans_t)datum;
+
+ if (t->rep)
+ fprintf( stderr, "%10d %s => \"%s\"\n", t->count, s, t->rep );
+ else
+ fprintf( stderr, "%10d %s => *** NO TRANSLATION ***\n", t->count, s );
+
+ return 0;
+}
+
+int main( int argc, char **argv )
+{
+ char *pt;
+ FILE *str;
+ char buf[4096];
+ arg_List a;
+ int c;
+ char **v;
+ trans_t t;
+ int i;
+ int debug = 0;
+ const char *filterFile = "filter.dat";
+
+ maa_init(argv[0]);
+ stk = stk_create();
+ entityHash = hsh_create(NULL,NULL);
+ hexHash = hsh_create(NULL,NULL);
+
+ dbg_register( DBG_VERBOSE, "verbose" );
+ dbg_register( DBG_DEBUG, "debug" );
+ dbg_register( DBG_SEARCH2, "search2" );
+ dbg_register( DBG_SEARCH3, "search3" );
+
+ while ((c = getopt( argc, argv, "vd:Df:" )) != EOF)
+ switch(c) {
+ case 'v': dbg_set( "verbose" ); break;
+ case 'd': dbg_set( optarg ); break;
+ case 'D': dbg_set( "debug" ); break;
+ case 'f': filterFile = optarg; break;
+ }
+
+ if ((str = fopen(filterFile,"r"))) {
+ if (dbg_test(DBG_VERBOSE))
+ fprintf( stderr, "Reading data from %s\n", filterFile );
+ while (fgets(buf,4096,str)) {
+ buf[strlen(buf)-1] = '\0';
+ if ((pt = strchr(buf,'#'))) *pt = '\0';
+ if (buf[0] == '\0') continue;
+
+ a = arg_argify(buf, ARG_NO_ESCAPE|ARG_NO_QUOTE);
+ arg_get_vector( a, &c, &v );
+
+ if (c == 2) {
+ if (dbg_test(DBG_VERBOSE))
+ fprintf( stderr, "\"%s\" \"%s\" %d\n", v[0],v[1], c);
+ t = xmalloc(sizeof(struct trans));
+ memset((char *)t,0,sizeof(struct trans));
+ t->name = str_find(v[0]);
+ t->rep = str_find(v[1]);
+ hsh_insert(entityHash,t->name,t);
+ } else if (c == 3) {
+ if (dbg_test(DBG_VERBOSE))
+ fprintf( stderr, "\"%s\" \"%s\" \"%s\" %d\n",
+ v[0],v[1], v[2], c);
+ t = xmalloc(sizeof(struct trans));
+ memset((char *)t,0,sizeof(struct trans));
+ t->name = str_find(v[0]);
+ t->rep = str_find(v[2]);
+ hsh_insert(hexHash,t->name,t);
+
+ t = xmalloc(sizeof(struct trans));
+ memset((char *)t,0,sizeof(struct trans));
+ t->name = str_find(v[1]);
+ t->rep = str_find(v[2]);
+ hsh_insert(entityHash,t->name,t);
+ } else if (c) {
+ if (dbg_test(DBG_VERBOSE))
+ fprintf( stderr, "\"%s\" %d\n", v[0], c);
+ t = xmalloc(sizeof(struct trans));
+ memset((char *)t,0,sizeof(struct trans));
+ t->name = str_find(v[0]);
+ t->rep = NULL;
+ hsh_insert(entityHash,t->name,t);
+ }
+ arg_destroy(a);
+ }
+ fclose(str);
+ }
+
+ if (argc-optind >= 1) {
+ for (i = optind; i < argc; i++) {
+ if (dbg_test(DBG_VERBOSE))
+ fprintf( stderr, "Opening %s\n",argv[i]);
+ if (!(str = fopen( argv[i], "r" ))) {
+ err_fatal_errno( __FUNCTION__,
+ "Cannot open \"%s\" for read\n", argv[1]);
+ }
+ if ((pt = strrchr(argv[i],'/'))) src_new_file(pt+1);
+ else src_new_file(argv[i]);
+
+ yyrestart(str);
+ yylex();
+ fclose(str);
+ }
+ } else if (argc-optind == 0) {
+ src_new_file("[stdin]");
+ yyrestart(stdin);
+ yylex();
+ }
+
+ if (dbg_test(DBG_VERBOSE)) {
+ fflush(stdout);
+ fprintf( stderr, "Entity table:\n" );
+ hsh_iterate( entityHash, printer );
+ fprintf( stderr, "Hex table:\n" );
+ hsh_iterate( hexHash, printer );
+ }
+ return 0;
+}
diff -Naur dict-web1913-1.4-0.47pd/scan.l dict-web1913-1.4-0.47pd.new/scan.l
--- dict-web1913-1.4+0.47pd/scan.l Sun Feb 22 22:31:45 1998
+++ dict-web1913-1.4-0.47pd.new/scan.l Mon Feb 5 06:47:08 2001
@@ -90,6 +90,7 @@
"</altnpluf>" RETURN(T_ALTNPLUF_STOP);
"<def>" RETURN(T_DEF_START);
"<def2>" RETURN(T_DEF_START);
+"<cd>" RETURN(T_DEF_START);
"<sn>" RETURN(T_SN_START);
"</sn>" RETURN(T_SN_STOP);
"<sd>" RETURN(T_SD_START);
diff -Naur dict-web1913-1.4-0.47pd/webfilter.l dict-web1913-1.4-0.47pd.new/webfilter.l
--- dict-web1913-1.4+0.47pd/webfilter.l Thu Jun 10 05:40:06 1999
+++ dict-web1913-1.4-0.47pd.new/webfilter.l Mon Feb 5 06:47:08 2001
@@ -140,8 +140,6 @@
if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
"</plu>" { src_advance(yyleng);
if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
-"<cd>" { src_advance(yyleng);
- if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
"</cd>" { src_advance(yyleng);
if (dbg_test(DBG_DEBUG)) other(yytext,yyleng); }
"<ex>" { src_advance(yyleng);
@@ -246,7 +244,7 @@
\<[^->/]+\> src_advance(yyleng); push(yytext+1,yyleng-2);
\<[^->/]+\/ src_advance(yyleng); entity(yytext+1,yyleng-2);
\\\'.. src_advance(yyleng); hex(yytext+2,yyleng-2);
-[^{}<\\\n]+ src_advance(yyleng); other(yytext,yyleng);
+[^&{}<\\\n]+ src_advance(yyleng); other(yytext,yyleng);
{NL} other(yytext,yyleng); BEGIN(INITIAL);
\{ src_advance(yyleng); other("[",1);
\} src_advance(yyleng); other("]",1);