--- ispell-3.2.06/Makefile.NO Mon Jun 3 12:07:55 2002 +++ ispell-3.2.06/Makefile Mon Jun 3 12:14:35 2002 @@ -213,7 +213,7 @@ programs: buildhash findaffix tryaffix ispell programs: icombine ijoin munchlist -programs: subset zapdups +programs: subset sq unsq zapdups deformatter-programs: cd deformatters; $(MAKE) all @@ -289,13 +289,14 @@ @. ./config.sh; \ set -x; \ $$INSTALL buildhash icombine ijoin munchlist findaffix tryaffix \ + sq unsq \ $$BINDIR @. ./config.sh; \ set -x; \ cd $$BINDIR; \ - strip buildhash icombine ijoin; \ + strip buildhash icombine ijoin sq unsq; \ chmod 755 buildhash icombine ijoin \ - munchlist findaffix tryaffix + munchlist findaffix tryaffix sq unsq @. ./config.sh; \ set -x; \ [ -d $$MAN1DIR ] || (mkdir $$MAN1DIR; chmod 755 $$MAN1DIR); \ @@ -304,14 +305,17 @@ rm -f sq$$MAN1EXT unsq$$MAN1EXT @. ./config.sh; \ set -x; \ + $$INSTALL sq.1 $$MAN1DIR/sq$$MAN1EXT; \ + echo ".so `basename $$MAN1DIR`/sq$$MAN1EXT" \ + > $$MAN1DIR/unsq$$MAN1EXT; \ for m in buildhash munchlist findaffix tryaffix; do \ echo ".so `basename $$MAN1DIR`/ispell$$MAN1EXT" \ > $$MAN1DIR/$$m$$MAN1EXT; \ done @. ./config.sh; \ set -x; \ cd $$MAN1DIR; \ - chmod 644 buildhash$$MAN1EXT \ + chmod 644 sq$$MAN1EXT buildhash$$MAN1EXT \ munchlist$$MAN1EXT findaffix$$MAN1EXT tryaffix$$MAN1EXT install-languages: @@ -328,6 +332,7 @@ munchable: findaffix tryaffix munchlist buildhash ispell icombine munchable: ijoin +munchable: sq unsq # # The following auxiliary dependency is used to make targets in @@ -460,6 +465,18 @@ @. ./config.sh; \ set -x; \ $$CC $$CFLAGS -o ispell $(OBJS) $$TERMLIB $$REGLIB $$LIBES + +# Since some makes don't have appropriate built-in rules, here are +# dependencies for sq and unsq. Sigh. +sq: config.sh msgs.h sq.c + @. ./config.sh; \ + set -x; \ + $$CC $$CFLAGS -o sq sq.c + +unsq: config.sh msgs.h unsq.c + @. ./config.sh; \ + set -x; \ + $$CC $$CFLAGS -o unsq unsq.c $(OBJS) buildhash.o icombine.o hash.o parse.o: config.h ispell.h local.h $(OBJS) buildhash.o icombine.o hash.o parse.o: proto.h msgs.h --- /dev/null Thu Jan 1 01:00:00 1970 +++ ispell-3.2.06/sq.1 Mon Jun 3 11:46:07 2002 @@ -0,0 +1,82 @@ +.\" +.\" $Id: sq.1,v 1.6 1994/01/25 07:12:07 geoff Exp $ +.\" +.\" Copyright 1992, 1993, Geoff Kuenning, Granada Hills, CA +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. All modifications to the source code must be clearly marked as +.\" such. Binary redistributions based on modified source code +.\" must be clearly marked as modified versions in the documentation +.\" and/or other materials provided with the distribution. +.\" 4. All advertising materials mentioning features or use of this software +.\" must display the following acknowledgment: +.\" This product includes software developed by Geoff Kuenning and +.\" other unpaid contributors. +.\" 5. The name of Geoff Kuenning may not be used to endorse or promote +.\" products derived from this software without specific prior +.\" written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY GEOFF KUENNING AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL GEOFF KUENNING OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $Log: sq.1,v $ +.\" Revision 1.6 1994/01/25 07:12:07 geoff +.\" Get rid of all old RCS log lines in preparation for the 3.1 release. +.\" +.\" +.TH SQ 1 LOCAL +.SH NAME +sq \- squeeze a sorted word list +unsq \- unsqueeze a sorted word list +.SH SYNOPSIS +.B sq +< infile > outfile +.PP +.B unsq +< infile > outfile +.SH DESCRIPTION +.I sq +compresses a sorted list of words (a dictionary). +For example: +.RS +sort /usr/share/dict/words | sq | compress > words.sq.Z +.RE +will compress dict by about a factor of 4. +.PP +.I unsq +uncompress the output of +.I sq. +For example: +.RS +compress -d < words.sq.Z | unsq | sort -f -o words +.RE +will uncompress a dictionary compressed with +.I sq. +.P +The squeezing is achieved by eliminating common prefixes, and replacing +them with a single character which encodes the number of characters +shared with the preceding word. +The prefix size is encoded as a single printable character: +0-9 represent 0-9, A-Z represent 10-35, and a-z represent 36-61. +.SH AUTHOR +Mike Wexler +.SH SEE ALSO +compress(1), sort(1). --- /dev/null Thu Jan 1 01:00:00 1970 +++ ispell-3.2.06/sq.c Mon Jun 3 11:46:05 2002 @@ -0,0 +1,111 @@ +#ifndef lint +static char Rcs_Id[] = + "$Id: sq.c,v 1.12 1994/01/25 07:12:09 geoff Exp $"; +#endif + +/* + * Copyright 1993, Geoff Kuenning, Granada Hills, CA + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All modifications to the source code must be clearly marked as + * such. Binary redistributions based on modified source code + * must be clearly marked as modified versions in the documentation + * and/or other materials provided with the distribution. + * 4. All advertising materials mentioning features or use of this software + * must display the following acknowledgment: + * This product includes software developed by Geoff Kuenning and + * other unpaid contributors. + * 5. The name of Geoff Kuenning may not be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY GEOFF KUENNING AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL GEOFF KUENNING OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * $Log: sq.c,v $ + * Revision 1.12 1994/01/25 07:12:09 geoff + * Get rid of all old RCS log lines in preparation for the 3.1 release. + * + */ + +#include + +#ifdef __STDC__ +#define P(x) x +#else /* __STDC__ */ +#define P(x) () +#endif /* __STDC__ */ + +int main P ((int argc, char * argv[])); +static void trunc P ((char * word, char * prev)); + +/* + * The following table encodes prefix sizes as a single character. A + * matching table will be found in unsq.c. + */ +static char size_encodings[] = + { + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', /* 00-09 */ + 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', /* 10-19 */ + 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', /* 20-29 */ + 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', /* 30-39 */ + 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', /* 40-49 */ + 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', /* 50-59 */ + 'y', 'z' /* 60-61 */ + }; + +#define MAX_PREFIX (sizeof (size_encodings) - 1) + +int main (argc, argv) + int argc; + char * argv[]; + { + char word[257]; + static char prev[257] = ""; + + while ( fgets(word, 256, stdin) != NULL) { + word[256]='\0'; + trunc (word, prev); + } + return 0; + } + +static void trunc (word, prev) + char * word; + char * prev; + { + register char * wordp; + register char * prevp; + register int same_count; + + wordp = word; + prevp = prev; + for (same_count = 0; *wordp == *prevp++; ++wordp, ++same_count) + ; + if (same_count>MAX_PREFIX) + same_count = MAX_PREFIX; + (void) putchar (size_encodings[same_count]); + (void) fputs (wordp, stdout); + (void) strcpy (prev, word); + } + --- /dev/null Thu Jan 1 01:00:00 1970 +++ ispell-3.2.06/unsq.c Mon Jun 3 11:46:05 2002 @@ -0,0 +1,129 @@ +#ifndef lint +static char Rcs_Id[] = + "$Id: unsq.c,v 1.14 1994/01/25 07:12:19 geoff Exp $"; +#endif + +/* + * Copyright 1993, Geoff Kuenning, Granada Hills, CA + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All modifications to the source code must be clearly marked as + * such. Binary redistributions based on modified source code + * must be clearly marked as modified versions in the documentation + * and/or other materials provided with the distribution. + * 4. All advertising materials mentioning features or use of this software + * must display the following acknowledgment: + * This product includes software developed by Geoff Kuenning and + * other unpaid contributors. + * 5. The name of Geoff Kuenning may not be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY GEOFF KUENNING AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL GEOFF KUENNING OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * $Log: unsq.c,v $ + * Revision 1.14 1994/01/25 07:12:19 geoff + * Get rid of all old RCS log lines in preparation for the 3.1 release. + * + */ + +#include +#include "msgs.h" + +#ifdef __STDC__ +#define P(x) x +#else /* __STDC__ */ +#define P(x) () +#endif /* __STDC__ */ + +int main P ((int argc, char * argv[])); +static int expand P ((char * word, char * prev)); + +/* + * The following table encodes prefix sizes as a single character. A + * matching table will be found in sq.c. + */ +static char size_encodings[] = + { + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', /* 00-09 */ + 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', /* 10-19 */ + 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', /* 20-29 */ + 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', /* 30-39 */ + 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', /* 40-49 */ + 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', /* 50-59 */ + 'y', 'z' /* 60-61 */ + }; + +#define MAX_PREFIX (sizeof (size_encodings) - 1) + +extern void exit P ((int status)); + +int main (argc, argv) + int argc; + char * argv[]; + { + char word[257]; + static char prev[257] = ""; + + while (!expand (word, prev)) + fputs (word, stdout); + return 0; + } + +static int expand (word, prev) + char * word; + char * prev; + { + register char * wordp; + register char * prevp; + register int same_count; + register int count_char; + + count_char = getchar (); + if (count_char == EOF) + return(1); + for (same_count = 0; + same_count < MAX_PREFIX && size_encodings[same_count] != count_char; + same_count++) + ; + if (same_count == MAX_PREFIX) + { + (void) fprintf (stderr, UNSQ_C_BAD_COUNT, (unsigned int) count_char); + exit (1); + } + prevp = prev; + wordp = word; + count_char = same_count; + while (same_count--) + *wordp++ = (*prevp++); + if (fgets (wordp, 256-count_char, stdin) == NULL) + { + (void) fprintf (stderr, UNSQ_C_SURPRISE_EOF); + exit (1); + } + word[256]='\0'; + (void) strcpy (prev, word); + return 0 ; + } +