diff -pruN dillo-0.7.2/dillorc dillo-0.7.2-encodings/dillorc --- dillo-0.7.2/dillorc 2003-02-14 02:59:30.000000000 +0600 +++ dillo-0.7.2-encodings/dillorc 2003-05-06 16:29:34.000000000 +0700 @@ -60,6 +60,8 @@ home=http://dillo.auriga.wearlab.de/ #no_proxy = ".mynet.com" #no_proxy = ".mynet.com .other.net .foo.bar.org" +# Accept-Language string value to send to http server +#accept_language="*" #------------------------------------------------------------------------- # COLORS SECTION diff -pruN dillo-0.7.2/encodings dillo-0.7.2-encodings/encodings --- dillo-0.7.2/encodings 1970-01-01 07:00:00.000000000 +0700 +++ dillo-0.7.2-encodings/encodings 2003-05-06 16:29:34.000000000 +0700 @@ -0,0 +1,19 @@ +Auto-guessed +7-bit ASCII +Western European (ISO 8859-1) +Western European (ISO 8859-15) +Western European (CodePage1252) +Central European (ISO 8859-2) +Central European (CodePage1250) +Southern European (ISO 8859-3) +Cyrillic (CodePage1251) +Cyrillic (KOI8-R) +Cyrillic (IBM866) +Baltic (ISO 8859-13) +Baltic (ISO 8859-4) +Baltic (CodePage1257) +Nordic (ISO 8859-10) +Celtic (ISO 8859-14) +Ukraine (KOI8-U) +Unicode (UTF-8) +Unicode (UTF-16) diff -pruN dillo-0.7.2/Makefile.am dillo-0.7.2-encodings/Makefile.am --- dillo-0.7.2/Makefile.am 2003-04-01 03:55:25.000000000 +0700 +++ dillo-0.7.2-encodings/Makefile.am 2003-05-06 16:29:34.000000000 +0700 @@ -1,5 +1,5 @@ SUBDIRS = doc src dpi -EXTRA_DIST = ChangeLog.old dillorc +EXTRA_DIST = ChangeLog.old dillorc encodings sysconf_DATA = dillorc diff -pruN dillo-0.7.2/src/browser.h dillo-0.7.2-encodings/src/browser.h --- dillo-0.7.2/src/browser.h 2003-04-16 02:16:10.000000000 +0700 +++ dillo-0.7.2-encodings/src/browser.h 2003-05-06 16:29:34.000000000 +0700 @@ -6,7 +6,7 @@ #include #include "url.h" /* for DilloUrl */ - +#include "encodings-types.h" typedef struct _BrowserWindow BrowserWindow; typedef struct _DilloMenuPopup DilloMenuPopup; @@ -62,6 +62,9 @@ struct _BrowserWindow /* The bookmarks menu so that we can add things to it. */ GtkWidget *bookmarks_menu; + /* The encoding menu */ + GtkWidget *enc_menu; + /* The "Headings" and "Anchors" menus */ GtkWidget *pagemarks_menuitem; GtkWidget *pagemarks_menu; @@ -123,6 +126,11 @@ struct _BrowserWindow /* The tag for the idle function that sets button sensitivity. */ guint sens_idle_tag; + + /* encodings variable */ + gchar* encoding; /* the selected character set (pointer to static content - don't free it) */ + gchar* autoencoding; /* the auto-guessed encoding (from META), not static */ + deConversion dcv; /* handle for character set conversion */ }; diff -pruN dillo-0.7.2/src/cache.c dillo-0.7.2-encodings/src/cache.c --- dillo-0.7.2/src/cache.c 2003-04-16 02:16:10.000000000 +0700 +++ dillo-0.7.2-encodings/src/cache.c 2003-05-06 16:29:34.000000000 +0700 @@ -1071,3 +1071,14 @@ void a_Cache_freeall(void) /* Remove the cache hash */ g_hash_table_destroy(CacheHash); } + +char *a_Cache_get_url_header (const DilloUrl *url, const gchar *fieldname) +{ + CacheData_t *entry; + + entry = Cache_entry_search (url); + if (entry) + return Cache_parse_field (entry->Header->str, fieldname); + else + return NULL; +} diff -pruN dillo-0.7.2/src/cache.h dillo-0.7.2-encodings/src/cache.h --- dillo-0.7.2/src/cache.h 2003-04-16 02:16:10.000000000 +0700 +++ dillo-0.7.2-encodings/src/cache.h 2003-05-06 16:29:34.000000000 +0700 @@ -54,7 +54,7 @@ char *a_Cache_url_read(const DilloUrl *u void a_Cache_freeall(void); void a_Cache_null_client(int Op, CacheClient_t *Client); void a_Cache_stop_client(gint Key); - +char *a_Cache_get_url_header (const DilloUrl *url, const gchar *fieldname); #endif /* __CACHE_H__ */ diff -pruN dillo-0.7.2/src/char_encodings.c dillo-0.7.2-encodings/src/char_encodings.c --- dillo-0.7.2/src/char_encodings.c 1970-01-01 07:00:00.000000000 +0700 +++ dillo-0.7.2-encodings/src/char_encodings.c 2003-05-06 16:29:34.000000000 +0700 @@ -0,0 +1,555 @@ +/* cruelty :) */ + +/* Copyright (C) 2002 Grigory Bakunov */ + +/* Copyright (C) 1997 Ian Main + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include + +#include +#include +#include +#include +#include + +#define HAVE_LANGINFO_CODESET 1 + +#ifdef HAVE_LANGINFO_CODESET +#include +#if ! defined(CODESET) && defined (_NL_CTYPE_CODESET_NAME) +#define CODESET _NL_CTYPE_CODESET_NAME +#endif +#endif + +#include "list.h" +#include "interface.h" +#include "nav.h" +#include "browser.h" +#include "menu.h" +#include "misc.h" +#include "char_encodings.h" + +#define LOAD_ENCODINGS 1 +#define SAVE_ENCODING 2 +#define CLOSE_ENCODINGS 3 + +/* this #define will cut page title if > 39 chars */ +#define TITLE39 + +/* double quote */ +#define D_QUOTE 0x22 + +/* Data types */ + +typedef struct _Encoding Encoding; +typedef struct _CallbackInfo CallbackInfo; + +struct _Encoding { + char *title; + char *iconv_name; + GtkWidget *menuitem; +}; + +struct _CallbackInfo { + BrowserWindow *bw; + guint index; +}; + +/* + * Forward declarations + */ +static void + Char_encoding_load_to_menu(FILE *fp); +static int + Char_encoding_file_op(gint operation, const char *title, const char *iconv_name); +/* Char_encoding_save_to_file(FILE *fp, const char *title, const char *iconv_name); */ + +static char* + Char_encoding_search_line(char *line, char *start_text, char *end_text); + +/* + * extern data + */ + +char *locale_charset = "C"; +char *default_charset = "ASCII"; + +/* + * Local data + */ +static Encoding *encodings = NULL; +static gint num_encodings = 0; +static gint num_encodings_max = 16; + + +/* + * Allocate memory and load the encodings list + */ +void a_Char_encoding_init(void) +{ + gchar *file; + const gchar *locale; +#ifndef HAVE_LANGINFO_CODESET + gchar **charsets; +#endif + +#ifdef HAVE_LANGINFO_CODESET + locale_charset = nl_langinfo(CODESET); +#else + locale = setlocale(LC_ALL, ""); + + if (locale != NULL) + { + /* Search charset part of locale */ + locale = strchr(locale, '.'); + + if (locale != NULL) { + charsets = g_strsplit(++locale, ";", 1); + locale_charset = g_strdup(charsets[0]); + g_strfreev(charsets); + } + } +#endif + g_print("Locale charset is %s\n",locale_charset); + + default_charset = locale_charset; + + /* Here we load and set the encodings */ + file = a_Misc_prepend_user_home(".dillo/encodings"); + if (!Char_encoding_file_op(LOAD_ENCODINGS, file, NULL)) + Char_encoding_file_op(LOAD_ENCODINGS, "/etc/dillo.encodings", NULL); + g_free(file); +} + +/* + * ? + */ +void Char_encoding_set_encoding(GtkWidget *widget, CallbackInfo *CbInfo) +{ + if ((CbInfo->index >= num_encodings) || (CbInfo->index < 0)) { + g_warning("encoding not found!\n"); + return; + } + g_print("Selected encoding: %s\n",encodings[CbInfo->index].iconv_name); + CbInfo->bw->encoding=encodings[CbInfo->index].iconv_name; + a_Nav_reload(CbInfo->bw); + /*HERE!!! LOOKOUT!!!! */ +} + +/* + * Add a encoding to the encodings menu of a particular browser window + */ +void Char_encoding_add_to_menu(BrowserWindow *bw, GtkWidget *menuitem, guint index) +{ + CallbackInfo *CbInfo; + GtkAccelGroup *menu_accels; + GdkModifierType accel_mods; + guint accel_key; + char accel[10]; + + gtk_menu_append(GTK_MENU(bw->enc_menu), menuitem); + menu_accels = gtk_menu_ensure_uline_accel_group(GTK_MENU(bw->enc_menu)); + + CbInfo = g_new(CallbackInfo, 1); + CbInfo->bw = bw; + CbInfo->index = index; + + /* When we call this at first time bw->encoding is null because + * bw is initialized in a_Interface_browser_window_new as + * bw = g_new0(BrowserWindow, 1); */ + if (bw->encoding == NULL) + bw->encoding = encodings [index].iconv_name; + + /* accelerator goes here */ + gtk_signal_connect(GTK_OBJECT (menuitem), "activate", + (GtkSignalFunc)Char_encoding_set_encoding, CbInfo); + + if (index < 10) + { + strcpy (accel, "0"); + accel [strlen (accel) - 1] = (char) (index + 48); + accel [strlen (accel)] = 0x0; + + gtk_accelerator_parse(accel, &accel_key, &accel_mods); + gtk_widget_add_accelerator(menuitem, "activate", bw->accel_group, + accel_key, (guint)accel_mods, GTK_ACCEL_VISIBLE); + } +} + +/* + * ? + */ +static GtkWidget *Char_encoding_insert(const char *title, const char *iconv_name) +{ + GtkWidget *menuitem; + + menuitem = gtk_menu_item_new_with_label(title); + gtk_widget_show(menuitem); + + a_List_add(encodings, num_encodings, num_encodings_max); + encodings[num_encodings].title = g_strdup(title); + encodings[num_encodings].iconv_name = g_strdup(iconv_name); + encodings[num_encodings].menuitem = menuitem; + num_encodings++; + return menuitem; +} + +/* + * Add the new encoding to encodings menu of _all_ browser windows and then + * write the new encoding to file + */ +/* +void a_Char_encoding_add(GtkWidget *widget, gpointer client_data) +{ + BrowserWindow *bw = (BrowserWindow *)client_data; + gint i; +#ifdef TITLE39 + gboolean allocated = FALSE; +#endif + char *title; + char *iconv_name; + GtkWidget *menuitem; + + title = bw->menu_popup.info.title; + iconv_name = bw->menu_popup.info.iconv_name; + +#ifdef TITLE39 + if (strlen (title) > 39) { + char buf1[20]; + char buf2[20]; + + memcpy (buf1, title, 18); + buf1[18] = '\0'; + strcpy (buf2, title + strlen (title) - 18); + buf2[18] = '\0'; + title = g_strconcat (buf1, "...", buf2, NULL); + allocated = TRUE; + } +#endif + + menuitem = Char_encoding_insert(title, iconv_name); + Char_encoding_add_to_menu(browser_window[0], menuitem, num_encodings-1); + for (i = 1; i < num_bw; i++) { + menuitem= gtk_menu_item_new_with_label(encodings[num_encodings-1].title); + gtk_widget_show(menuitem); + Char_encoding_add_to_menu(browser_window[i], menuitem, num_encodings-1); + } + + Char_encoding_file_op(SAVE_ENCODING, title, iconv_name); + +#ifdef TITLE39 + if (allocated) + g_free (title); +#endif +} +*/ +/* + * Never called (the file remains open all the time) + */ +void Char_encoding_close(void) +{ + Char_encoding_file_op(CLOSE_ENCODINGS, NULL, NULL); +} + +/* + * Performs operations on the encoding file.. + * for first call, title is the filename + */ +static int + Char_encoding_file_op(gint operation, const char *title, const char *iconv_name) +{ + static FILE *fp; + static gint initialized = 0; + + if (!initialized) { + if (operation == LOAD_ENCODINGS) { + if ((fp = fopen(title, "r")) == NULL) /* we don't need to a+ yet */ + g_print("dillo: opening encoding file %s: %s\n", + title, strerror(errno)); + else + initialized = 1; + } else + g_print("Error: invalid call to Char_encoding_file_op.\n"); + } + + if (!initialized) + return 0; + + switch (operation) { + case LOAD_ENCODINGS: + Char_encoding_load_to_menu(fp); + break; + + case SAVE_ENCODING: + g_print("Error: Save unimplemented yet.\n"); + /* Char_encoding_save_to_file(fp, title, iconv_name); */ + break; + + case CLOSE_ENCODINGS: + fclose(fp); + break; + + default: + break; + } + return 1; +} + +/* + * Save encodings to ~/.dillo/encodings + */ +/* +static void + Char_encoding_save_to_file(FILE *fp, const char *title, const char* iconv_name) +{ + fseek(fp, 0L, SEEK_END); + fprintf(fp, "%s\n", iconv_name,title); + fflush(fp); +} +*/ + +/* + * Load encodings + */ +static void Char_encoding_load_to_menu(FILE *fp) +{ + gchar *title=NULL; + gchar *iconv_name=NULL; + char buf[4096]; + gint i = 0; + GtkWidget *menuitem; + + rewind(fp); + + g_print("Loading encodings...\n"); + while (1) { + /* Read a whole line from the file */ + if ((fgets(buf, 4096, fp)) == NULL) + break; + + /* get url from line */ + if ( !(iconv_name = Char_encoding_search_line(buf, "=\"", "\">")) ) + continue; + + /* get title from line */ + if ( !(title = Char_encoding_search_line(buf, "\">", "dcv.c_from=iconv_open(locale_charset,charset); + bw->dcv.c_to=iconv_open(charset,locale_charset); + + if (bw->dcv.c_from == (iconv_t)-1 || bw->dcv.c_to == (iconv_t)-1) + g_warning ("could not allocate character encoding converter."); + + return; +}; + +/* + * Configure iconv descriptors in BrowserWindow for text reencoding + * with respect to encoding and autoencoding fields + */ +void a_Char_encoding_configure_conversion(BrowserWindow *bw) +{ + const char *enc; + + /* only do _anything_ if told so */ + enc = bw->encoding; + + if (enc != NULL) + { + if (!strcasecmp (enc, "auto")) + { + enc = bw->autoencoding; + + if (!enc) + enc = default_charset; + } + + a_Char_encoding_set_conversion(bw, enc); + } +} + +/* + * Translate a buffer content with respect to BrowserWindow's + * iconv descriptors and direction (forward/backward translation) + */ +gchar *a_Char_encoding_translate_encoding(BrowserWindow *bw, const char *buf, + gint bufsize, deDirection dir) +{ + char *result, *source, *dest; + size_t s_left, d_left; + iconv_t conversion; + + if (dir == DE_DECODE) + conversion = bw->dcv.c_from; + else + conversion = bw->dcv.c_to; + + /* no conversion is needed, or none is available */ + if (conversion == (iconv_t) -1) + return g_strndup(buf, bufsize); + + /* Note that for some conversions, the translated buffer can be larger + * than the input buffer. This is particularly important for conversions + * to UTF8 (check the unicode standard to find out the scale factor). */ + result = g_malloc((bufsize + 1) * ENCODINGS_MAX_CHAR_SIZE); + + source = (char *) buf; /* iconv wouldn't change source */ + dest = result; + s_left = bufsize; + d_left = bufsize * ENCODINGS_MAX_CHAR_SIZE; + if (iconv(conversion, &source, &s_left, &dest, &d_left) == (size_t) -1) { + /* g_warning ("unable to fully convert page to native character set"); */ + /* This just skips past unconvertable characters, putting "?" in the + * output, then retries the conversion. This is a hack, but it seems + * like the best course of action in the circumstances. */ + while (s_left > 0 && d_left > 0 && errno == EILSEQ) { + source++; + s_left--; + *dest = '?'; + dest++; + d_left--; + if (s_left > 0 && d_left > 0) + iconv(conversion, &source, &s_left, &dest, &d_left); + } + } + *dest = 0; /* terminate the string */ + + return result; +} + +/* + * Translate character from UCS-2 UNICODE to local charset + */ +gint a_Char_encoding_translate_char_from_unicode (gint unicode) +{ + iconv_t cd; + gint i, out; + size_t toin, toout; + char *in, *pin, *pout; + + cd = iconv_open (locale_charset, "UCS-2"); + + if (cd == (iconv_t) -1) + return ' '; + + in = g_malloc (2); + /* endian independent */ + in [0] = (char) (unicode & 0xff); + in [1] = (char) ((unicode & 0xff00) >> 8); + pin = &in[0]; + toin = 2; + + out = 0; + pout = (char*) &out; + toout = 4; + + i = iconv (cd, &pin, &toin, &pout, &toout); + + iconv_close (cd); + + g_free (in); + + if (i == (size_t) (-1)) + { + g_warning ("Unable to convert UNICODE char U%04x to native charset\n", unicode); + return -1; + } + else + return out; +} + +/* + * Build a HTTP header Accept-Charset string + */ +char *a_Char_encoding_accept_charset_string () +{ + GString *string; + char *result; + int i; + + if (num_encodings == 0) + return NULL; + + string = g_string_new ("Accept-Charset: "); + + for (i = 0; i < num_encodings; i++) + { + if (!strcasecmp (encodings [i].iconv_name, "Auto")) + continue; + + g_string_append (string, encodings [i].iconv_name); + g_string_append (string, ", "); + } + + g_string_append (string, "*"); + + result = string -> str; + g_string_free (string, 0); + + return result; +} diff -pruN dillo-0.7.2/src/char_encodings.h dillo-0.7.2-encodings/src/char_encodings.h --- dillo-0.7.2/src/char_encodings.h 1970-01-01 07:00:00.000000000 +0700 +++ dillo-0.7.2-encodings/src/char_encodings.h 2003-05-06 16:29:34.000000000 +0700 @@ -0,0 +1,25 @@ +#ifndef __DILLO_ENCODING_H__ +#define __DILLO_ENCODING_H__ + +#include +#include "browser.h" +#include "encodings-types.h" + +#define ENCODINGS_MAX_CHAR_SIZE 4 + +/* Copyright (C) 2002 Grigory Bakunov */ + +char *locale_charset; +char *default_charset; + +void a_Char_encoding_init(); +void a_Char_encoding_add(GtkWidget *widget, gpointer client_data); +void a_Char_encoding_fill_new_menu(BrowserWindow *bw); +void a_Char_encoding_set_conversion(BrowserWindow *bw, const char* charset); +void a_Char_encoding_configure_conversion(BrowserWindow *bw); +char *a_Char_encoding_translate_encoding(BrowserWindow *bw, const char *buf, + gint bufsize,deDirection dir); +gint a_Char_encoding_translate_char_from_unicode (gint unicode); + +char *a_Char_encoding_accept_charset_string (); +#endif /* __DILLO_ENCODING_H__ */ diff -pruN dillo-0.7.2/src/commands.c dillo-0.7.2-encodings/src/commands.c --- dillo-0.7.2/src/commands.c 2003-04-16 02:16:10.000000000 +0700 +++ dillo-0.7.2-encodings/src/commands.c 2003-05-06 16:29:34.000000000 +0700 @@ -25,6 +25,8 @@ #include "prefs.h" #include "menu.h" #include "capi.h" +#include "char_encodings.h" +#include "encodings-types.h" /* * Local data @@ -97,7 +99,7 @@ void a_Commands_exit_callback(GtkWidget void a_Commands_viewsource_callback (GtkWidget *widget, gpointer client_data) { BrowserWindow *bw = (BrowserWindow *)client_data; - char *buf; + char *buf, *translated_buf; gint size, xsize, ysize; GtkWidget *window, *box1, *button, *scrolled_window, *text; @@ -139,7 +141,11 @@ void a_Commands_viewsource_callback (Gtk gtk_text_freeze (GTK_TEXT (text)); buf = a_Capi_url_read(a_History_get_url(NAV_TOP(bw)), &size); - gtk_text_insert (GTK_TEXT (text), NULL, NULL, NULL, buf, size); + translated_buf = a_Char_encoding_translate_encoding (bw, buf, + size, DE_DECODE); + gtk_text_insert (GTK_TEXT (text), NULL, NULL, NULL, translated_buf, + strlen (translated_buf)); + g_free (translated_buf); gtk_text_thaw (GTK_TEXT (text)); button = gtk_button_new_with_label ("close"); @@ -379,6 +385,13 @@ void a_Commands_open_link_nw_callback(Gt gdk_window_get_size (bw->main_window->window, &width, &height); newbw = a_Interface_browser_window_new(width, height, 0); + newbw -> encoding = bw -> encoding; + /* + * for use last encoding in case of document which not provide + * its charset by itself (e.g. plain text document) we should copy + * autodetected charset value + */ + newbw -> autoencoding = g_strdup (bw -> autoencoding); a_Nav_push(newbw, a_Menu_popup_get_url(bw)); } diff -pruN dillo-0.7.2/src/dillo.c dillo-0.7.2-encodings/src/dillo.c --- dillo-0.7.2/src/dillo.c 2003-04-24 23:15:19.000000000 +0700 +++ dillo-0.7.2-encodings/src/dillo.c 2003-05-06 16:29:34.000000000 +0700 @@ -40,6 +40,7 @@ #include "nav.h" #include "history.h" #include "bookmark.h" +#include "char_encodings.h" #include "dicache.h" #include "dns.h" #include "IO/mime.h" @@ -196,8 +197,9 @@ gint main(int argc, char *argv[]) char **opt_argv = NULL; /* set locale */ - curr_locale = g_strdup(setlocale(LC_ALL, NULL)); + curr_locale = g_strdup(setlocale(LC_ALL, "")); gtk_set_locale(); + /* Initialize GUI and parse GTK related args */ gtk_init(&argc, &argv); gdk_rgb_init(); @@ -260,6 +262,7 @@ gint main(int argc, char *argv[]) a_Interface_init(); a_Dw_init(); a_Cookies_init(); + a_Char_encoding_init(); /* -f overrides dillorc */ if (options_got & DILLO_CLI_FULLWINDOW) diff -pruN dillo-0.7.2/src/encodings-types.h dillo-0.7.2-encodings/src/encodings-types.h --- dillo-0.7.2/src/encodings-types.h 1970-01-01 07:00:00.000000000 +0700 +++ dillo-0.7.2-encodings/src/encodings-types.h 2003-05-06 16:29:34.000000000 +0700 @@ -0,0 +1,18 @@ +#ifndef __DILLO_ENCODING_TYPES_H__ +#define __DILLO_ENCODING_TYPES_H__ +#include +/* Copyright (C) 2002 Grigory Bakunov */ + +typedef struct _deConversion deConversion; + +struct _deConversion { + iconv_t c_from; + iconv_t c_to; +}; + +typedef enum { + DE_DECODE, + DE_ENCODE +} deDirection; + +#endif /* __DILLO_ENCODING_TYPES_H__ */ diff -pruN dillo-0.7.2/src/html.c dillo-0.7.2-encodings/src/html.c --- dillo-0.7.2/src/html.c 2003-04-16 02:16:14.000000000 +0700 +++ dillo-0.7.2-encodings/src/html.c 2003-05-06 17:41:15.000000000 +0700 @@ -48,7 +48,9 @@ #include "progressbar.h" #include "prefs.h" #include "misc.h" +#include "char_encodings.h" #include "capi.h" +#include "cache.h" #include "html.h" #define DEBUG_LEVEL 10 @@ -114,6 +116,17 @@ DwWidget *a_Html_text(const char *Type, DilloWeb *web = P; DilloHtml *html = Html_new(web->bw, web->url); + if (html->bw->autoencoding) + { + /* + * to conform HTML 4.01 charset handling we must use only the first + * occurence of charset element + * Later we will check html->bw->autoencoding for zero + */ + g_free (html->bw->autoencoding); + html->bw->autoencoding = 0; + } + *Data = (void *) html; *Call = (CA_Callback_t) Html_callback; @@ -188,6 +201,51 @@ static void Html_lb_free(void *lb) g_free(html_lb); } +/* + * Translate buffer content from HTML encoding to local encoding + */ +static char *Html_translate_encoding(DilloHtml *html, + const char *buf, gint bufsize) +{ + char* result; + + result = a_Char_encoding_translate_encoding(html->bw, buf, bufsize, DE_DECODE); + return result; +} + +/* + * Actually handle Content header either came from http response or META tag + * content is 'Content-Type: text/html; charset=***' with optional charset field + */ +static void Html_handle_content (DilloHtml *html, const char* content) +{ + gchar *strtemp, *charset; + + if (content == NULL) + content = ""; /* fallback to default charset */ + + strtemp = g_strdup (content); + g_strdown (strtemp); + + /* + * We should use only the first occurence of charset element + * so this is a check: + */ + if (!html->bw->autoencoding) + { + g_free (html->bw->autoencoding); + + charset = strstr (strtemp, "charset="); + if (charset) + html->bw->autoencoding = g_strdup (charset + 8); + else /* charset is missing */ + html->bw->autoencoding = 0; + + a_Char_encoding_configure_conversion (html->bw); + } + + g_free (strtemp); +} /* * Set the URL data for image maps. @@ -713,7 +771,7 @@ static int Html_entity_search(char *key) } /* - * Given an entity, return the ISO-Latin1 character code. + * Given an entity, return the current locale character code. * (-1 if not a valid entity) */ static gint Html_parse_entity(const gchar *token, gint toksize) @@ -814,7 +814,7 @@ static gint Html_parse_entity(const gcha ret = isocode; } else { /* Try a few UCS translations to Latin1 */ - ret = Html_try_ucs2latin1(isocode); + ret = a_Char_encoding_translate_char_from_unicode (isocode); } } else { /* Search for named entity */ @@ -825,7 +825,7 @@ static gint Html_parse_entity(const gcha if (Entities[i].isocode > 0 && Entities[i].isocode <= 255) ret = Entities[i].isocode; else - ret = Html_try_ucs2latin1(Entities[i].isocode); + ret = a_Char_encoding_translate_char_from_unicode (Entities[i].isocode); } } } @@ -833,7 +833,7 @@ static gint Html_parse_entity(const gcha } /* - * Convert all the entities in a token to plain ISO character codes. Takes + * Convert all the entities in a token to plain locale characters. Takes * a token and its length, and returns a newly allocated string. */ static char *Html_parse_entities(gchar *token, gint toksize) @@ -753,7 +811,7 @@ static char *Html_parse_entities(gchar * if ( memchr(token, '&', toksize) == NULL ) return g_strndup(token, toksize); - new_str = g_new(char, toksize + 1); + new_str = g_new(char, (toksize + 1) * ENCODINGS_MAX_CHAR_SIZE); for (i = j = 0; i < toksize; i++) { if (token[i] == '&' && (isocode = Html_parse_entity(token + i, toksize - i)) != -1) { @@ -837,34 +895,38 @@ static void Html_process_space(DilloHtml */ static void Html_process_word(DilloHtml *html, char *word, gint size) { - gint i, start; - gchar *Pword; + gint i, start, translated_size; + gchar *Pword, *translated_word; DilloHtmlParseMode parse_mode = html->stack[html->stack_top].parse_mode; + /* translate word from page to local encoding */ + translated_word = Html_translate_encoding (html, word, size); + translated_size = strlen (translated_word); + if ( parse_mode == DILLO_HTML_PARSE_MODE_STASH || parse_mode == DILLO_HTML_PARSE_MODE_STASH_AND_BODY ) { if ( html->StashSpace ) { g_string_append_c(html->Stash, ' '); html->StashSpace = FALSE; } - Pword = Html_parse_entities(word, size); + Pword = Html_parse_entities(translated_word, translated_size); g_string_append(html->Stash, Pword); g_free(Pword); } else if ( parse_mode == DILLO_HTML_PARSE_MODE_VERBATIM ) { /* word goes in untouched, it is not processed here. */ - Pword = g_strndup(word, size); - g_string_append(html->Stash, Pword); - g_free(Pword); + g_string_append(html->Stash, translated_word); } if ( parse_mode == DILLO_HTML_PARSE_MODE_STASH || - parse_mode == DILLO_HTML_PARSE_MODE_VERBATIM ) + parse_mode == DILLO_HTML_PARSE_MODE_VERBATIM ) { + g_free (translated_word); return; + } if ( parse_mode == DILLO_HTML_PARSE_MODE_PRE ) { /* all this overhead is to catch white-space entities */ - Pword = Html_parse_entities(word, size); + Pword = Html_parse_entities(translated_word, translated_size); for (start = i = 0; Pword[i]; start = i) if (isspace(Pword[i])) { while (Pword[++i] && isspace(Pword[i])); @@ -880,22 +942,24 @@ static void Html_process_word(DilloHtml g_free(Pword); } else { - if (memchr(word, '&', size) == NULL) { + if (memchr(translated_word, '&', translated_size) == NULL) { a_Dw_page_add_text(DW_PAGE (html->dw), - g_strndup(word, size), + g_strndup(translated_word, translated_size), html->stack[html->stack_top].style); } else { /* actually white-space entities inside the word should be * collapsed (except  ), but that's too much overhead * for a very rare case of bad-formed HTML --Jcid */ - Pword = Html_parse_entities(word, size); + Pword = Html_parse_entities(translated_word, translated_size); g_strdelimit(Pword, "\t\f\n\r", ' '); a_Dw_page_add_text(DW_PAGE (html->dw), Pword, html->stack[html->stack_top].style); } } + + g_free (translated_word); } /* @@ -2604,6 +2668,13 @@ static void Html_tag_open_meta(DilloHtml Html_write_raw(html, html_msg, strlen(html_msg), 0); g_free(html_msg); } + + /* Charset handling */ + if ((equiv = Html_get_attr(html, tag, tagsize, "http-equiv")) && + !g_strcasecmp(equiv, "Content-Type") && + (content = Html_get_attr(html, tag, tagsize, "content"))) { + Html_handle_content (html, content); + } } /* @@ -2845,6 +2916,7 @@ static void Html_submit_form(GtkWidget * if ((form->method == DILLO_HTML_METHOD_GET) || (form->method == DILLO_HTML_METHOD_POST)) { GString *DataStr = g_string_sized_new(4096); + GString *TranslatedDataStr; DEBUG_MSG(3,"Html_submit_form form->action=%s\n",URL_STR_(form->action)); @@ -2909,6 +2981,12 @@ static void Html_submit_form(GtkWidget * break; } /* switch */ } /* for (inputs) */ + TranslatedDataStr = + g_string_new (a_Char_encoding_translate_encoding(html_lb->bw, + DataStr->str, + DataStr->len, DE_ENCODE)); + g_string_free(DataStr,TRUE); + DataStr = TranslatedDataStr; if ( DataStr->str[DataStr->len - 1] == '&' ) g_string_truncate(DataStr, DataStr->len - 1); @@ -3224,7 +3302,8 @@ static void Html_tag_close_textarea(Dill DilloHtmlLB *html_lb = html->linkblock; char *str; DilloHtmlForm *form; - gint i; + gint i, translated_size; + gchar *translated_str; if (!(html->InFlags & IN_FORM) || !(html->InFlags & IN_TEXTAREA)) @@ -3247,8 +3326,14 @@ static void Html_tag_close_textarea(Dill } } + /* translate canonified text stream to local encoding */ + translated_str = Html_translate_encoding (html, html->Stash->str, html->Stash->len); + translated_size = strlen (translated_str); + /* The HTML3.2 spec says it can have "text and character entities". */ - str = Html_parse_entities(html->Stash->str, html->Stash->len); + str = Html_parse_entities(translated_str, translated_size); + + g_free (translated_str); form = &(html_lb->forms[html_lb->num_forms - 1]); form->inputs[form->num_inputs - 1].init_str = str; @@ -3876,8 +3961,9 @@ static const char *Html_get_attr2(DilloH const char *attrname, DilloHtmlTagParsingFlags flags) { - gint i, isocode, Found = 0, delimiter = 0, attr_pos = 0; + gint i, isocode=0 , Found = 0, delimiter = 0, attr_pos = 0; GString *Buf = html->attr_data; + char *translated_attr, *strtemp; DilloHtmlTagParsingState state = SEEK_ATTR_START; g_return_val_if_fail(*attrname, NULL); @@ -3956,7 +4035,24 @@ static const char *Html_get_attr2(DilloH while (Buf->len && isspace(Buf->str[Buf->len - 1])) g_string_truncate(Buf, Buf->len - 1); - return (Found) ? Buf->str : NULL; + if (Found) { + if (Buf->len > 0) { + translated_attr = Html_translate_encoding (html, Buf->str, Buf->len); + if (flags & HTML_ParseEntities) + { + strtemp = Html_parse_entities (translated_attr, strlen (translated_attr)); + g_free (translated_attr); + } + else + strtemp = translated_attr; + + g_string_assign (Buf, strtemp); + g_free (strtemp); + } + return Buf->str; + } + else + return NULL; } /* @@ -4142,9 +4238,17 @@ static void Html_write(DilloHtml *html, gint token_start; char *buf = Buf + html->Start_Ofs; gint bufsize = BufSize - html->Start_Ofs; + char *content; g_return_if_fail ( (page = DW_PAGE (html->dw)) != NULL ); + if (!html->Start_Ofs && + (content = a_Cache_get_url_header (html->linkblock->base_url, "Content-Type"))) + { + Html_handle_content (html, content); + g_free (content); + } + token_start = Html_write_raw(html, buf, bufsize, Eof); html->Start_Ofs += token_start; diff -pruN dillo-0.7.2/src/interface.c dillo-0.7.2-encodings/src/interface.c --- dillo-0.7.2/src/interface.c 2003-04-26 02:04:45.000000000 +0700 +++ dillo-0.7.2-encodings/src/interface.c 2003-05-06 16:29:34.000000000 +0700 @@ -19,6 +19,7 @@ #include #include #include +#include #include "list.h" #include "misc.h" @@ -285,6 +286,9 @@ static gboolean Interface_quit(GtkWidget if (bw->question_dialog_window != NULL) gtk_widget_destroy(bw->question_dialog_window); + if (bw->autoencoding) + g_free (bw->autoencoding); + if (bw->menu_popup.over_back) gtk_widget_destroy(bw->menu_popup.over_back); if (bw->menu_popup.over_forw) @@ -891,6 +895,15 @@ a_Interface_browser_window_new(gint widt bw->question_dialog_window = NULL; bw->question_dialog_data = NULL; bw->viewsource_window = NULL; + /* bw->encoding is filled in + * Char_encoding_add_to_menu called from + * a_Char_encoding_fill_new_menu called from + * a_Menu_mainbar_new called from + * this function */ + /* bw->encoding = NULL; */ + bw->autoencoding = NULL; + bw->dcv.c_from = (iconv_t)-1; /* no conversion yet */ + bw->dcv.c_to = (iconv_t)-1; /* now that the bw is made, let's customize it.. */ Interface_browser_window_customize(bw); diff -pruN dillo-0.7.2/src/IO/file.c dillo-0.7.2-encodings/src/IO/file.c --- dillo-0.7.2/src/IO/file.c 2003-04-16 02:16:21.000000000 +0700 +++ dillo-0.7.2-encodings/src/IO/file.c 2003-05-06 16:29:34.000000000 +0700 @@ -37,6 +37,7 @@ #include "../misc.h" #include "../web.h" #include "../interface.h" +#include "../char_encodings.h" typedef struct _DilloDir { gint FD_Write, FD_Read; @@ -256,8 +257,10 @@ static void *File_transfer_dir(void *dat Hdirname = (s1 = File_html_escape(Ddir->dirname)) ? s1 : Ddir->dirname; CHdirname = (s2 = a_Misc_escape_chars(Hdirname, "% ")) ? s2 : Hdirname; g_string_sprintf(gstr, "\n\n \n" + " \n" " %s%s\n\n", "file:", CHdirname, + locale_charset, "file:", Hdirname); write(Ddir->FD_Write, gstr->str, gstr->len); g_string_sprintf(gstr, "

%s %s

\n
\n",
diff -pruN dillo-0.7.2/src/IO/http.c dillo-0.7.2-encodings/src/IO/http.c
--- dillo-0.7.2/src/IO/http.c	2003-04-16 02:16:21.000000000 +0700
+++ dillo-0.7.2-encodings/src/IO/http.c	2003-05-06 16:29:34.000000000 +0700
@@ -33,6 +33,7 @@
 #include "../dns.h"
 #include "../cache.h"
 #include "../web.h"
+#include "../char_encodings.h"
 #include "../interface.h"
 #include "../cookies.h"
 #include "../prefs.h"
@@ -124,7 +125,7 @@ static void Http_socket_free(gint SKey)
  */
 static char *Http_query(const DilloUrl *url, gboolean use_proxy)
 {
-   gchar *str, *ptr, *cookies;
+   gchar *str, *ptr, *cookies, *accept_charsets;
    GString *s_port    = g_string_new(""),
            *query     = g_string_new(""),
            *full_path = g_string_new("");
@@ -148,10 +149,14 @@ static char *Http_query(const DilloUrl *
    }
 
    cookies = a_Cookies_get(url);
+   accept_charsets = a_Char_encoding_accept_charset_string ();
+
    if ( URL_FLAGS(url) & URL_Post ){
       g_string_sprintfa(
          query,
          "POST %s HTTP/1.0\r\n"
+         "%s\r\n"
+         "Accept-Language: %s\r\n"
          "Host: %s%s\r\n"
          "User-Agent: Dillo/%s\r\n"
          "Cookie2: $Version=\"1\"\r\n"
@@ -160,7 +165,8 @@ static char *Http_query(const DilloUrl *
          "Content-length: %ld\r\n"
          "\r\n"
          "%s",
-         full_path->str, URL_HOST(url), s_port->str, VERSION, cookies,
+         full_path->str, accept_charsets, prefs.accept_language,
+         URL_HOST(url), s_port->str, VERSION, cookies,
          (glong)strlen(URL_DATA(url)),
          URL_DATA(url));
 
@@ -169,17 +175,21 @@ static char *Http_query(const DilloUrl *
          query,
          "GET %s HTTP/1.0\r\n"
          "%s"
+         "%s\r\n"
+         "Accept-Language: %s\r\n"
          "Host: %s%s\r\n"
          "User-Agent: Dillo/%s\r\n"
          "Cookie2: $Version=\"1\"\r\n"
          "%s"
          "\r\n",
-         full_path->str,
+         full_path->str, 
          (URL_FLAGS(url) & URL_E2EReload) ?
             "Cache-Control: no-cache\r\nPragma: no-cache\r\n" : "",
+         accept_charsets, prefs.accept_language,
          URL_HOST(url), s_port->str, VERSION,
          cookies);
    }
+   g_free(accept_charsets);
    g_free(cookies);
 
    str = query->str;
diff -pruN dillo-0.7.2/src/Makefile.am dillo-0.7.2-encodings/src/Makefile.am
--- dillo-0.7.2/src/Makefile.am	2003-04-03 22:42:12.000000000 +0700
+++ dillo-0.7.2-encodings/src/Makefile.am	2003-05-06 16:29:34.000000000 +0700
@@ -59,6 +59,9 @@ dillo_SOURCES = \
 	dw_tooltip.h \
 	dw_widget.c \
 	dw_widget.h \
+	char_encodings.c \
+	char_encodings.h \
+	char_encoding-types.h \
 	findtext.c \
 	findtext.h \
 	selection.c \
diff -pruN dillo-0.7.2/src/menu.c dillo-0.7.2-encodings/src/menu.c
--- dillo-0.7.2/src/menu.c	2003-04-16 02:16:14.000000000 +0700
+++ dillo-0.7.2-encodings/src/menu.c	2003-05-06 16:29:34.000000000 +0700
@@ -33,6 +33,7 @@
 #include "bookmark.h"
 #include "interface.h"
 #include "menu.h"
+#include "char_encodings.h"
 
 /*
  * Forward declarations
@@ -123,6 +124,7 @@ GtkWidget *a_Menu_mainbar_new(BrowserWin
    GtkWidget *menubar;
    GtkWidget *file_menu;
    /* GtkWidget *bookmarks_menu; */
+   GtkWidget *enc_menu;
    /* GtkWidget *help_menu; */
 
    bw->menubar = menubar = gtk_menu_bar_new();
@@ -160,6 +162,11 @@ GtkWidget *a_Menu_mainbar_new(BrowserWin
    Menu_add(help_menu, "Dillo _Manual", NULL, bw,
             a_Commands_manual_callback, bw);
    */
+
+   enc_menu = Menu_new(menubar, tiny ? "_E" : "_Char_encoding", FALSE, bw);
+   bw->enc_menu = enc_menu;
+   a_Char_encoding_fill_new_menu(bw);
+
    return menubar;
 }
 
diff -pruN dillo-0.7.2/src/plain.c dillo-0.7.2-encodings/src/plain.c
--- dillo-0.7.2/src/plain.c	2003-04-16 02:16:15.000000000 +0700
+++ dillo-0.7.2-encodings/src/plain.c	2003-05-06 16:29:34.000000000 +0700
@@ -29,7 +29,8 @@
 #include "history.h"
 #include "nav.h"
 #include "menu.h"
-
+#include "encodings-types.h"
+#include "char_encodings.h"
 
 typedef struct _DilloPlain {
    DwWidget *dw;
@@ -98,6 +99,8 @@ static DilloPlain *Plain_new(BrowserWind
                                   plain->bw->main_window->window);
    //a_Dw_widget_set_style (plain->dw, plain->style);
 
+   a_Char_encoding_configure_conversion (bw);
+
    /* The context menu */
    gtk_signal_connect_while_alive
       (GTK_OBJECT(GTK_BIN(plain->bw->docwin)->child),"button_press_event",
@@ -156,7 +159,7 @@ static void Plain_write(DilloPlain *plai
 {
    DwPage *page = (DwPage *)plain->dw;
    char *Start;
-   char *data;
+   char *translated_data;
    gint i, len, MaxBytes;
 
    Start = (char*)Buf + plain->Start_Ofs;
@@ -172,9 +175,10 @@ static void Plain_write(DilloPlain *plai
          }
          break;
       case ST_Eol:
-         data = g_strndup(Start + i - len, len);
-         a_Dw_page_add_text(page, a_Misc_expand_tabs(data), plain->style);
-         g_free(data);
+         translated_data = a_Char_encoding_translate_encoding (
+                              plain->bw, Start + i - len, len, DE_DECODE);
+         a_Dw_page_add_text(page, a_Misc_expand_tabs(translated_data), plain->style);
+         g_free(translated_data);
          a_Dw_page_add_parbreak(page, 0, plain->style);
          if ( Start[i] == '\r' && Start[i + 1] == '\n' ) ++i;
          if ( i < MaxBytes ) ++i;
@@ -185,9 +189,10 @@ static void Plain_write(DilloPlain *plai
    }
    plain->Start_Ofs += i - len;
    if ( Eof && len ) {
-      data = g_strndup(Start + i - len, len);
-      a_Dw_page_add_text(page, a_Misc_expand_tabs(data), plain->style);
-      g_free(data);
+      translated_data = a_Char_encoding_translate_encoding (
+                           plain->bw, Start + i - len, len, DE_DECODE);
+      a_Dw_page_add_text(page, a_Misc_expand_tabs(translated_data), plain->style);
+      g_free(translated_data);
       a_Dw_page_add_parbreak(page, 0, plain->style);
       plain->Start_Ofs += len;
    }
diff -pruN dillo-0.7.2/src/prefs.c dillo-0.7.2-encodings/src/prefs.c
--- dillo-0.7.2/src/prefs.c	2003-04-16 02:16:15.000000000 +0700
+++ dillo-0.7.2-encodings/src/prefs.c	2003-05-06 16:29:34.000000000 +0700
@@ -37,6 +37,7 @@ static const struct {
    { "geometry", DRC_TOKEN_GEOMETRY },
    { "http_proxy", DRC_TOKEN_PROXY },
    { "no_proxy", DRC_TOKEN_NOPROXY },
+   { "accept_language", DRC_TOKEN_ACCEPT_LANGUAGE },
    { "link_color", DRC_TOKEN_LINK_COLOR },
    { "visited_color", DRC_TOKEN_VISITED_COLOR, },
    { "bg_color", DRC_TOKEN_BG_COLOR },
@@ -123,6 +124,10 @@ static guint Prefs_parser(GScanner *scan
       prefs.no_proxy = g_strdup(scanner->value.v_string);
       prefs.no_proxy_vec = g_strsplit(prefs.no_proxy, " ", 0);
       break;
+   case DRC_TOKEN_ACCEPT_LANGUAGE:
+      g_free (prefs.accept_language);
+      prefs.accept_language = g_strdup(scanner->value.v_string);
+      break;
    case DRC_TOKEN_LINK_COLOR:
       prefs.link_color = a_Color_parse(scanner->value.v_string,
                                        prefs.link_color);
@@ -332,6 +337,7 @@ void a_Prefs_init(void)
    prefs.http_proxy = NULL;
    prefs.no_proxy = NULL;
    prefs.no_proxy_vec = NULL;
+   prefs.accept_language = g_strdup("*");
    prefs.link_color = DW_COLOR_DEFAULT_BLUE;
    prefs.visited_color = DW_COLOR_DEFAULT_PURPLE;
    prefs.bg_color = DW_COLOR_DEFAULT_BGND;
@@ -381,6 +387,7 @@ void a_Prefs_init(void)
  */
 void a_Prefs_freeall(void)
 {
+   g_free(prefs.accept_language);
    g_free(prefs.no_proxy);
    if (prefs.no_proxy_vec)
       g_strfreev(prefs.no_proxy_vec);
diff -pruN dillo-0.7.2/src/prefs.h dillo-0.7.2-encodings/src/prefs.h
--- dillo-0.7.2/src/prefs.h	2003-04-16 02:16:15.000000000 +0700
+++ dillo-0.7.2-encodings/src/prefs.h	2003-05-06 16:29:34.000000000 +0700
@@ -29,6 +29,7 @@ typedef enum {
    DRC_TOKEN_GEOMETRY,
    DRC_TOKEN_PROXY,
    DRC_TOKEN_NOPROXY,
+   DRC_TOKEN_ACCEPT_LANGUAGE,
    DRC_TOKEN_LINK_COLOR,
    DRC_TOKEN_VISITED_COLOR,
    DRC_TOKEN_BG_COLOR,
@@ -73,6 +74,7 @@ struct _DilloPrefs {
    DilloUrl *http_proxy;
    gchar *no_proxy;
    gchar **no_proxy_vec;
+   gchar *accept_language;
    DilloUrl *home;
    guint32 link_color;
    guint32 visited_color;