--- dict.c.orig 2004-11-17 15:39:44 +0300 +++ dict.c 2005-09-22 01:35:02 +0400 @@ -25,6 +25,11 @@ #include "parse.h" #include "md5.h" #include +#include +#include +#include +#include +#include extern int yy_flex_debug; lst_List dict_Servers; @@ -32,6 +37,7 @@ FILE *dict_output; #define BUFFERSIZE 2048 +#define UTFBUFFERSIZE BUFFERSIZE * 8 #define PIPESIZE 256 #define DEF_STRAT "." #define DEF_DB "*" @@ -98,6 +104,12 @@ #define EXST_INVALID_STRATEGY 40 #define EXST_CONNECTION_FAILED 41 + +int utf8_mode = 1; + +iconv_t iconv_object = (iconv_t) -1 ; +iconv_t iconv_reverse_object = (iconv_t) -1 ; + struct def { lst_List data; const char *word; @@ -265,18 +277,118 @@ { lst_List l = lst_create(); char line[BUFFERSIZE]; - int len; + static char utfline[BUFFERSIZE]; + static char decodedline[UTFBUFFERSIZE]; + int len; + size_t utflen; + size_t avail; + + char *source; + char *destination; + size_t recoded; + wchar_t wide_char; + - while ((len = net_read(s, line, BUFFERSIZE - 1)) >= 0) { - line [len] = 0; + while ((len = net_read(s, utfline, BUFFERSIZE - 1)) >= 0) { + utfline[len] = 0; client_bytes += len; - PRINTF(DBG_RAW,("* Text: %s\n",line)); - if (line[0] == '.' && line[1] == '\0') break; - if (len >= 2 && line[0] == '.' && line[1] == '.') - lst_append( l, xstrdup(line + 1) ); + PRINTF(DBG_RAW,("* Text: %s\n",utfline)); + if (utfline[0] == '.' && utfline[1] == '\0') break; + if( utf8_mode ) + { + if (len >= 2 && line[0] == '.' && line[1] == '.') + lst_append( l, xstrdup(utfline + 1) ); + else + lst_append( l, xstrdup(utfline) ); + } else - lst_append( l, xstrdup(line) ); + { + avail = UTFBUFFERSIZE; + source = utfline; + destination = decodedline; + recoded = 0; + utflen = len; + + while( source < ( utfline + len ) ) + { + recoded = iconv( iconv_object, &source, &utflen, &destination, &avail ); + if( recoded == -1 ) + { + switch ( errno ) + { + case EILSEQ: + case EINVAL: + { + int mb_len = mbtowc( &wide_char, source, 6 ); + switch( wide_char ) + { + case 0x00E4: // a umlaut small + *destination = 'a'; + destination++; + *destination = ':'; + break; + case 0x00C4: // a umlaut big + *destination = 'A'; + destination++; + *destination = ':'; + break; + case 0x00F6: // o umlaut small + *destination = 'o'; + destination++; + *destination = ':'; + break; + case 0x00D6: // o umlaut big + *destination = 'O'; + destination++; + *destination = ':'; + break; + case 0x00FC: // u umlaut small + *destination = 'u'; + destination++; + *destination = ':'; + break; + case 0x00DC: // u umlaut small + *destination = 'u'; + destination++; + *destination = ':'; + break; + case 0x00DF: // eszet small + *destination = 's'; + destination++; + *destination = 's'; + break; + default : + *destination = '?'; + break; + } + + destination++; + + if( mb_len < 1 ) + mb_len = 1; + + source += mb_len; + utflen -= mb_len; + avail -= mb_len; + break; + } + default: + source = utfline + len; + break; + }; + }; + }; + if( destination < decodedline + UTFBUFFERSIZE ) + *destination = 0; + else + decodedline[UTFBUFFERSIZE-1] = 0; + if (len >= 2 && utfline[0] == '.' && utfline[1] == '.') + lst_append( l, xstrdup( decodedline + 1 ) ); + else + lst_append( l, xstrdup( decodedline ) ); + + }; } if (len < 0) { client_close_pager(); @@ -689,9 +801,43 @@ PRINTF(DBG_PIPE,("* Sending %d commands (%d bytes)\n",count,len)); PRINTF(DBG_RAW,("* Send/%d: %s",c->command,buffer)); - pt = alloca(2*len); - client_crlf(pt,buffer); - net_write( cmd_reply.s, pt, strlen(pt) ); + + if( utf8_mode ) + { + pt = alloca(2*len); + client_crlf(pt,buffer); + net_write( cmd_reply.s, pt, strlen(pt) ); + } + else + { + char *decodedline = alloca( 8 * len ); + size_t utflen = len; + size_t avail = 8 * len - 1; + + char *source = buffer; + char *destination = decodedline; + size_t recoded = 0; + + recoded = iconv( iconv_reverse_object, &source, &utflen, &destination, &avail ); + if( recoded == -1 ) + { + pt = alloca(2*len); + client_crlf(pt,buffer); + net_write( cmd_reply.s, pt, strlen(pt) ); + } + else + { + if( destination < decodedline + 8 * len - 1 ) + *destination = 0; + else + decodedline[8 * len - 1] = 0; + + pt = alloca( 2 * strlen( decodedline ) ); + client_crlf( pt, decodedline ); + net_write( cmd_reply.s, pt, strlen(pt) ); + }; + + }; } else { PRINTF(DBG_PIPE,("* Sending nothing\n")); PRINTF(DBG_RAW,("* Send/%d\n",c->command)); @@ -1161,6 +1307,7 @@ "-s --strategy strategy for matching or defining", "-c --config specify configuration file", "-C --nocorrect disable attempted spelling correction", + "-n --notranslate disable UTF-8 -> client encoding translation", "-D --dbs show available databases", "-S --strats show available search strategies", "-H --serverhelp show server help", @@ -1217,6 +1364,7 @@ { "match", 0, 0, 'm' }, { "strategy", 1, 0, 's' }, { "nocorrect", 0, 0, 'C' }, + { "notranslate",0, 0, 'n' }, { "config", 1, 0, 'c' }, { "dbs", 0, 0, 'D' }, { "strats", 0, 0, 'S' }, @@ -1236,6 +1384,31 @@ { 0, 0, 0, 0 } }; + if( !setlocale(LC_CTYPE, "") ) + { + utf8_mode = 1; + } + else + { + utf8_mode = ( strcmp( nl_langinfo(CODESET), "UTF-8" ) == 0 ); + }; + + if( !utf8_mode ) + { + iconv_object = iconv_open( nl_langinfo(CODESET), "UTF-8" ); + if( iconv_object == (iconv_t) -1 ) + utf8_mode = 1; + iconv_reverse_object = iconv_open( "UTF-8", nl_langinfo(CODESET) ); + if( iconv_object == (iconv_t) -1 ) + { + utf8_mode = 1; + if( iconv_object != (iconv_t) -1 ) + iconv_close( iconv_object ); + }; + }; + + + dict_output = stdout; maa_init(argv[0]); @@ -1249,7 +1422,7 @@ dbg_register( DBG_URL, "url" ); while ((c = getopt_long( argc, argv, - "h:p:d:i:Ims:DSHau:c:Ck:VLvrP:", + "h:p:d:i:Imns:DSHau:c:Ck:VLvrP:", longopts, NULL )) != EOF) { switch (c) { @@ -1259,6 +1432,16 @@ case 'i': database = optarg; function |= INFO; break; case 'I': function |= SERVER; break; case 'm': function = MATCH; break; + case 'n': + { + if( utf8_mode == 0 && iconv_object != (iconv_t) -1 ) + iconv_close( iconv_object ); + + if( utf8_mode == 0 && iconv_reverse_object != (iconv_t) -1 ) + iconv_close( iconv_reverse_object ); + utf8_mode = 1; + } + break; case 's': strategy = optarg; break; case 'D': function |= DBS; break; case 'S': function |= STRATS; break; @@ -1549,6 +1732,12 @@ tim_get_system( "total" ), client_bytes / tim_get_real( "total" ) ); } + + if( utf8_mode == 0 && iconv_object != (iconv_t) -1 ) + iconv_close( iconv_object ); + + if( utf8_mode == 0 && iconv_reverse_object != (iconv_t) -1 ) + iconv_close( iconv_reverse_object ); return ex_status; }