#!/bin/bash # from to-dict.sh with small modifications MK_DATA () { echo sed '/^ /d; s/$/\ /g; s/[^]]*\ \ /%h&\ %d/; s/_[IVX][IVX]* /\ &/g; s/ [1-9]\. /\ &/g; s/[1-9][0-9]*>/\ &/g; s/[ÁÂ×ÇÄÅÖÚÉËÌÍÎÏÐÒÓÔÕÆÞÃÞÛÈÝßØÜÀÑ]>/(&>/g; s/>>/)/g; s/\ \_[AISE][a-z]*:/\ &/g; s/>/:/g' } # from slovnyktodict.awk with small modifications TO_SLOV () { awk ' \ function stardict_nbo(string) { s1=and(string,255); string=rshift(string,8); s2=and(string,255); string=rshift(string,8); s3=and(string,255); string=rshift(string,8); s4=and(string,255); string=rshift(string,8); return sprintf("%c", s4) sprintf("%c", s3) sprintf("%c", s2) sprintf("%c", s1); } BEGIN{ ORS=""; word=""; dictname="'$1'"; } { if (substr($0,0,2) == "%h") { gsub(/ +$/,"",$0); word=tolower(substr($0,3)); } else { if (substr($0,0,2) == "%d") dict[word]=substr($0,3); else dict[word]=dict[word] "\n" $0; } } END{ cnt=asorti(dict,idx); if ( cnt == 0) { print "Empty dictionary\n"; exit 1; } pos=0; for (z=1;z<=cnt;++z) { i=idx[z]; v=dict[i]; len=length(v); print i sprintf("%c", 0) stardict_nbo(pos) stardict_nbo(len) >dictname".idx"; print v >dictname".dict"; pos=pos+len; } close(dictname".idx"); com= "stat --printf=\"%s\" " dictname ".idx"; com| getline size; print "StarDict'\''s dict ifo file\nversion=2.4.2\nwordcount="cnt"\nidxfilesize="size"\nbookname="dictname"\nsametypesequence=m\n">dictname".ifo"; }'; } # from mueller2stardict with small modifications FROM_MUELLER () { perl -p -e ' require Unicode::Map8; my $koi_map = Unicode::Map8->new("koi8-r") || die; my $ipa_map = Unicode::Map8->new(); $ipa_map->default_to16( 0x20 ); $ipa_map->addpair( 0x61, 0x61 ); $ipa_map->addpair( 0x62, 0x62 ); $ipa_map->addpair( 0x63, 0x63 ); $ipa_map->addpair( 0x64, 0x64 ); $ipa_map->addpair( 0x65, 0x65 ); $ipa_map->addpair( 0x66, 0x66 ); $ipa_map->addpair( 0x67, 0x67 ); $ipa_map->addpair( 0x68, 0x68 ); $ipa_map->addpair( 0x69, 0x69 ); $ipa_map->addpair( 0x6a, 0x6a ); $ipa_map->addpair( 0x6b, 0x6b ); $ipa_map->addpair( 0x6c, 0x6c ); $ipa_map->addpair( 0x6d, 0x6d ); $ipa_map->addpair( 0x6e, 0x6e ); $ipa_map->addpair( 0x6f, 0x6f ); $ipa_map->addpair( 0x70, 0x70 ); $ipa_map->addpair( 0x71, 0x71 ); $ipa_map->addpair( 0x72, 0x72 ); $ipa_map->addpair( 0x73, 0x73 ); $ipa_map->addpair( 0x74, 0x74 ); $ipa_map->addpair( 0x75, 0x75 ); $ipa_map->addpair( 0x76, 0x76 ); $ipa_map->addpair( 0x77, 0x77 ); $ipa_map->addpair( 0x78, 0x78 ); $ipa_map->addpair( 0x79, 0x79 ); $ipa_map->addpair( 0x7a, 0x7a ); $ipa_map->addpair( 0x2d, 0x2e ); $ipa_map->addpair( 0x2e, 0x2e ); $ipa_map->addpair( 0x54, 0x03b8 ); $ipa_map->addpair( 0x53, 0x0283 ); $ipa_map->addpair( 0x4e, 0x014b ); $ipa_map->addpair( 0xd7, 0x02a7 ); $ipa_map->addpair( 0x44, 0x00F0 ); $ipa_map->addpair( 0x5a, 0x0292 ); $ipa_map->addpair( 0x51, 0x00e6 ); $ipa_map->addpair( 0x49, 0x0131 ); $ipa_map->addpair( 0xc3, 0x028c ); $ipa_map->addpair( 0x55, 0x028a ); $ipa_map->addpair( 0x81, 0x0252 ); $ipa_map->addpair( 0x45, 0x025b ); $ipa_map->addpair( 0xab, 0x0259 ); $ipa_map->addpair( 0x41, 0x0251 ); $ipa_map->addpair( 0xce, 0x025c ); $ipa_map->addpair( 0x8d, 0x0254 ); $ipa_map->addpair( 0xc7, 0x02cc ); $ipa_map->addpair( 0xc8, 0x02c8 ); $ipa_map->addpair( 0xf9, 0x02d0 ); $ipa_map->addpair( 0x3e, 0x02d1 ); sub to_utf { return $koi_map->tou($_[0])->utf8; } sub ipa_code { return $ipa_map->tou($_[0])->utf8; } while( <> ) { $string = $_; if( /\[\S+\]/ ) { if( substr( $string, 0, 1 ) ne "[" ) { if( $string =~ /([^[]+)/ ){ print to_utf($1); } }; while( $string =~ /\[([^]]+)\]([^[]*)/g ){ print "[".ipa_code($1)."]".to_utf($2); }; }else{ print to_utf($string); } }; ' } fatal() { printf >&2 '%s\n' "${0##*/}: $*" exit 1 } [ -n "$1" ] || fatal Please specify input file [ -f "$1" ] || fatal Input file does not exist [ -n "$2" ] || fatal Please specify output name if [ -a "$2.idx" -a "$2.ifo" -a "$2.dict" ]; then fatal "$2: dictionary already exists" fi cat $1 |MK_DATA|FROM_MUELLER|LC_ALL=C LC_COLLATE=ru_RU.UTF8 LC_CTYPE=ru_RU.UTF8 TO_SLOV $2