Репозитории ALT
5.1: | 4.64L-alt6 |
4.1: | 4.64L-alt5 |
4.0: | 4.64L-alt4.1 |
3.0: | 4.58L-alt4 |
+backports: | 4.64L-alt0.M30.4 |
Группа :: Сети/Почта
Пакет: pine
Главная Изменения Спек Патчи Sources Загрузить Gear Bugs and FR Repocop
Патч: pine-utf8-1b.patch
Скачать
Скачать
Submission of this patch with documentation:
================================================================================
--- pine4.64/pico/composer.c
+++ pine4.64/pico/composer.c
@@ -344,7 +344,189 @@
return(TRUE);
}
+/*
+ * check_utf8 - check for UTF-8 bytes
+ * Takes two arguments:
+ * char *c - a byte of the stream
+ * char *utf_seq - a status array holding the function's state
+ * utf_seq must be provided by the caller this way:
+ * (static) char utf_seq[7] = ""; (content must be retained over calls)
+ * and must be initialized at start using: utf_seq[0] = 0;
+ *
+ * Returns NULL if an UTF-8 sequence has been started and is not completed.
+ * If an UTF-8 sequence is complete, it returns a pointer to a static string
+ * which is valid until the next use of the function.
+ * If the character is a double width character, a space(' ') is prepended
+ * to the returned string.
+ * If a character < 128 is passed, the UTF-8 state in utf_seq[] is cleared,
+ * because a valid UTF-8 sequence only consists of bytes >= 0x80. The pointer
+ * returned points to the address of the passed character to indicate this.
+ * Features: Supports UTF-8 seqencies up to 4 bytes.
+ * Todo: Instead of passing a pointer to the char and comparing the returned
+ * pointer to this address afterwards, the Interface could be changed
+ * to just pass the character as simple char(thus not requesting the
+ * address of a variable which might be declared as register) and replace
+ * the check of the return value with a check of (c & 0x80) and if this
+ * is not the case, assuming that (utf_seq[0] == 0) means that this last
+ * non-ASCII byte completed the UTF-8 sequence, while having
+ * utf_seq[0] != 0 means having an incomplete UTF-8 sequence.
+ */
+char *
+check_utf8(c, utf_seq, sizeof_utf_seq)
+ char *c;
+ char *utf_seq;
+ size_t sizeof_utf_seq;
+{
+ static char char_string[8]; /* (six UTF-8 sequence bytes + ' ' + '\0') */
+ int ix;
+ unsigned char dbl_wide[7][2][4] = {0xe1,0x84,0x80,0x00, 0xe1,0x85,0x9F,0x00,
+ 0xe2,0x8c,0xa9,0x00, 0xe2,0x8c,0xaa,0x00,
+ 0xe2,0xba,0x80,0x00, 0xed,0x9e,0xa3,0x00,
+ 0xef,0xa4,0x80,0x00, 0xef,0xa9,0xaa,0x00,
+ 0xef,0xb8,0xb0,0x00, 0xef,0xb9,0xa8,0x00,
+ 0xef,0xbc,0x81,0x00, 0xef,0xbd,0xad,0x00,
+ 0xef,0xbf,0xa0,0x00, 0xef,0xbf,0xa6,0x00};
+ if (*c & 0x80) {
+ char_string[0] = *c;
+ char_string[1] = 0;
+ if (strlen(utf_seq) == sizeof_utf_seq - 1)
+ utf_seq[0] = 0; /* don't allow a overlong UTF-8 sequence */
+ if ((*c & 0xF0) >= 0xC0) {
+ strncpy(utf_seq, char_string, sizeof_utf_seq);
+ return NULL; /* possible UTF-8 sequence, need next byte */
+ } else if (utf_seq[0]) {
+ strncat(utf_seq, char_string, sizeof_utf_seq); /* append to string */
+ switch (utf_seq[0] & 0xF0) {
+ case 0xC0 :
+ case 0xD0 :
+ strncpy(char_string, utf_seq, sizeof(char_string));
+ utf_seq[0] = 0; /* sequence complete, clear for next */
+ return char_string; /* pass the new UTF-8 sequence on */
+ case 0xE0 :
+ if (strlen(utf_seq) < 3)
+ return NULL; // 3-byte UTF-8, need next byte
+ char_string[0] = '\0'; // init
+ for (ix = 0; ix < 7; ix++)
+ if (strcmp(utf_seq, &dbl_wide[ix][0][0]) >= 0
+ && strcmp(utf_seq, &dbl_wide[ix][1][0]) <= 0) {
+ char_string[0] = ' '; /* flag as double-width char */
+ break;
+ }
+ strncat(char_string, utf_seq, sizeof(char_string));
+ utf_seq[0] = 0; // this sequence is over, clear for restart
+ return char_string; // process this UTF-8 char...
+ case 0xF0 :
+ if (strlen(utf_seq) < 4)
+ return NULL; /* 4-byte UTF-8 sequence, need next byte */
+ char_string[0] = '\0'; /* init the sequence space */
+ if ((utf_seq[1] & 0xF0) == 0xA0)
+ char_string[0] = ' '; /* flag as double-width UTF-8 char */
+ strncat(char_string, utf_seq, sizeof(char_string));
+ utf_seq[0] = 0; /* sequence complete, clear for next */
+ return char_string; /* pass the new UTF-8 sequence on */
+ }
+ }
+ }
+ utf_seq[0] = 0; /* clear sequence buffer in case of an invalid sequence */
+ return c; /* single-byte, NON-UTF-8 chars are process it as usual */
+}
+
+/*
+ * wrapper to check_utf8 for pico, if not in UTF-8 mode, do not check UTF-8
+ */
+char *
+pico_check_utf8(c, utf_seq, sizeof_utf_seq)
+ char *c;
+ char *utf_seq;
+ size_t sizeof_utf_seq;
+{
+ if(!(Pmaster->pine_flags & P_UNICODE))
+ return c;
+ return check_utf8(c, utf_seq, sizeof_utf_seq);
+}
+
+/*
+ * Get the number of columns which are filled by the text in the current
+ * line of LineEdit(from the start of the line to the current position)
+ */
+static int
+count_screencols(void)
+{
+ char utf_seq[7] = "", *cp, *r;
+ int seq = 0, w = 0;
+
+ for(cp = ods.cur_l->text; *cp && cp < ods.cur_l->text + ods.p_off;
+ cp++) {
+ if (!(r = pico_check_utf8(cp, utf_seq, sizeof(utf_seq)))) {
+ seq = 1;
+ continue;
+ }
+ if (seq)
+ w++;
+ seq = 0;
+ if (r == cp)
+ w++;
+ else if (*r == ' ')
+ w++;
+ }
+ return w;
+}
+/*
+ * Get the offset in screen positions which must be subsctracted from the
+ * byte count in the LineEdit line in order to reach the line position on
+ * screen(because of double wide characters and multible UTF-8 bytes)
+ */
+static int
+offset_on_screen(void)
+{
+ return ods.p_off - count_screencols();
+}
+
+/*
+ * Move current position in LineEdit one character left, return the number
+ * of byte positons which were neccesary to jump left in order to
+ * arrive at the start of the previous multibyte character(UTF-8).
+ */
+static int
+LineEditCharLeft()
+{
+ int col_right = ods.p_off, cols = count_screencols();
+
+ do
+ if (--ods.p_off < 0)
+ break;
+ while (count_screencols() - cols == -1);
+
+ ods.p_off++;
+
+ if (col_right - ods.p_off > 0)
+ return col_right - ods.p_off;
+
+ do
+ if (--ods.p_off < 0)
+ break;
+ while (count_screencols() - cols == -2);
+
+ ods.p_off++;
+
+ return col_right - ods.p_off;
+}
+
+/*
+ * Move current position in LineEdit one character right, if UTF-8
+ * mode is active, the ods.p_off is assumed to be at the start of
+ * a UTF-8 sequence or at a normal ASCII character. It is moved to
+ * the next character, jumping past the end of the current UTF-8
+ * sequence, if UTF8 mode is active.
+ */
+static void
+LineEditCharRight()
+{
+ char utf_seq[7] = "";
+ while(ods.p_off < ods.p_len && ods.cur_l->text[ods.p_off] &&
+ !pico_check_utf8(ods.cur_l->text + ods.p_off++, utf_seq, sizeof(utf_seq)));
+}
/*
* ResizeHeader - Handle resizing display when SIGWINCH received.
@@ -397,7 +579,7 @@
PaintBody(0);
if(ComposerEditing)
- movecursor(ods.p_line, ods.p_off+headents[ods.cur_e].prlen);
+ HeaderPaintCursor();
(*term.t_flush)();
return(TRUE);
@@ -1596,6 +1778,7 @@
int skipmove = 0;
char *strng;
int last_key; /* last keystroke */
+ unsigned char utf_seq[7] = "";
strng = ods.cur_l->text; /* initialize offsets */
ods.p_len = strlen(strng);
@@ -1678,7 +1861,7 @@
}
clearcursor();
- movecursor(ods.p_line, ods.p_off+headents[ods.cur_e].prlen);
+ HeaderPaintCursor();
if(ch == NODATA) /* GetKey timed out */
continue;
@@ -1688,7 +1871,7 @@
if(mpresf){ /* blast old messages */
if(mpresf++ > NMMESSDELAY){ /* every few keystrokes */
mlerase();
- movecursor(ods.p_line, ods.p_off+headents[ods.cur_e].prlen);
+ HeaderPaintCursor();
}
}
@@ -1734,12 +1917,40 @@
/*
* then find out where things fit...
+ *
+ * For UTF-8, the < LINELEN check should need to do it's
+ * calculation based on count_screencols() plus the width
+ * of the new char as provided by pico_check_utf8.
+ * The buffer size may need to be increased for this.
*/
if(ods.p_len < LINELEN()){
CELL c;
+ char tmp;
- c.c = ch;
c.a = 0;
+ if(Pmaster->pine_flags & P_UNICODE) {
+ tmp = ch;
+ char * chp = pico_check_utf8(&tmp, utf_seq, sizeof(utf_seq));
+ if (chp == NULL)
+ continue; /* on to the next! */
+ if (chp != &tmp && *chp == ' ')
+ chp++;
+ if (*chp & 0x80) {
+ while (*chp && ods.p_len < LINELEN()) {
+ c.c = *chp++;
+ pinsert(c); /* add char to str */
+ }
+ /* update the display: */
+ PaintHeader(COMPOSER_TOP_LINE, TRUE);
+ /* If end char was inserted, set physical .. */
+ if (ods.p_off == ods.p_len)
+ /* cursor pos on next movecursor_offset: */
+ movecursor_offset(-1, 0, 0);
+ continue; /* on to the next! */
+ }
+ }
+
+ c.c = ch;
if(pinsert(c)){ /* add char to str */
skipmove++; /* must'a been optimal */
continue; /* on to the next! */
@@ -1776,6 +1987,7 @@
}
}
else { /* interpret ch as a command */
+ utf_seq[0] = '\0';
switch (ch = normalize_cmd(ch, ckm, 2)) {
case (CTRL|'\\') :
if (ch = GetAccent())
@@ -1868,9 +2080,7 @@
case (CTRL|'F') :
case KEY_RIGHT: /* move character right */
if(ods.p_off < ods.p_len){
- pputc(pscr(ods.p_line,
- (ods.p_off++)+headents[ods.cur_e].prlen)->c,0);
- skipmove++;
+ LineEditCharRight();
continue;
}
else if(gmode & MDHDRONLY)
@@ -1882,7 +2092,7 @@
case (CTRL|'B') :
case KEY_LEFT : /* move character left */
if(ods.p_off > 0){
- ods.p_off--;
+ LineEditCharLeft();
continue;
}
if(ods.p_line != COMPOSER_TOP_LINE)
@@ -1917,7 +2127,8 @@
continue;
}
- pputc(strng[ods.p_off++], 0); /* drop through and rubout */
+ LineEditCharRight(); /* jump to next char */
+ /* and fall thru */
case DEL : /* blast previous char */
case (CTRL|'H') :
@@ -1931,20 +2142,27 @@
continue;
}
- if(ods.p_off > 0){ /* just shift left one char */
- ods.p_len--;
+ if(ods.p_off > 0){ /* shift left one char */
+ int todelete = LineEditCharLeft();
+
+ ods.p_len -= todelete;
+
headents[ods.cur_e].dirty = 1;
if(ods.p_len == 0)
headents[ods.cur_e].sticky = 0;
else
headents[ods.cur_e].sticky = 1;
- tbufp = &strng[--ods.p_off];
- while(*tbufp++ != '\0')
- tbufp[-1] = *tbufp;
tbufp = &strng[ods.p_off];
+
+ while(*tbufp++ != '\0')
+ tbufp[-1] = tbufp[todelete-1];
+
if(pdel()) /* physical screen delete */
skipmove++; /* must'a been optimal */
+
+ /* needed if pine bgcolor != terminal background color */
+ PaintHeader(ods.p_line, TRUE);
}
else{ /* may have work to do */
if(ods.cur_l->prev == NULL){
@@ -1955,18 +2173,16 @@
ods.p_line--;
ods.cur_l = ods.cur_l->prev;
strng = ods.cur_l->text;
- if((i=strlen(strng)) > 0){
- strng[i-1] = '\0'; /* erase the character */
- ods.p_off = i-1;
+ if((ods.p_off=strlen(strng)) > 0){
+ ods.p_off -= LineEditCharLeft() - 1;
+ strng[ods.p_off] = '\0'; /* erase the character */
}
- else{
+ else
headents[ods.cur_e].sticky = 0;
- ods.p_off = 0;
- }
-
- tbufp = &strng[ods.p_off];
}
+ tbufp = &strng[ods.p_off];
+
if((status = FormatLines(ods.cur_l, "", LINELEN(),
headents[ods.cur_e].break_on_comma,0))==-1){
(*term.t_beep)();
@@ -1991,7 +2207,7 @@
PaintBody(1);
}
- movecursor(ods.p_line, ods.p_off+headents[ods.cur_e].prlen);
+ HeaderPaintCursor();
if(skipmove)
continue;
@@ -2016,7 +2232,8 @@
void
HeaderPaintCursor()
{
- movecursor(ods.p_line, ods.p_off+headents[ods.cur_e].prlen);
+ movecursor_offset(ods.p_line, ods.p_off + headents[ods.cur_e].prlen,
+ offset_on_screen());
}
--- pine4.64/pico/display.c
+++ pine4.64/pico/display.c
@@ -1295,7 +1295,22 @@
}
}
+void
+movecursor_offset(row, col, offs)
+int row, col, offs;
+{
+ static int force_next = 0;
+ if(row == -1) {
+ force_next = row;
+ return;
+ }
+ if(row!=ttrow || col!=ttcol || force_next) {
+ (*term.t_move)(row, col - offs);
+ ttrow = row;
+ ttcol = col;
+ }
+}
/*
* Send a command to the terminal to move the hardware cursor to row "row"
--- pine4.64/pico/efunc.h
+++ pine4.64/pico/efunc.h
@@ -118,6 +118,7 @@
extern VARS_TO_SAVE *save_pico_state PROTO((void));
extern void restore_pico_state PROTO((VARS_TO_SAVE *));
extern void free_pico_state PROTO((VARS_TO_SAVE *));
+extern char *check_utf8 PROTO((char *, char *, size_t));
extern void HeaderPaintCursor PROTO((void));
extern void PaintBody PROTO((int));
--- pine4.64/pine/osdep/termout.unx
+++ pine4.64/pine/osdep/termout.unx
@@ -750,7 +750,8 @@
register unsigned int ch;
int new_esc_len;
{
- static int esc_len = 0;
+ static int esc_len = 0, seq = 0;
+ static unsigned char utf_seq[7] = "";
if(ps_global->in_init_seq /* silent */
|| (F_ON(F_BLANK_KEYMENU, ps_global) /* or bottom, */
@@ -759,6 +768,35 @@
&& _col + 1 == ps_global->ttyo->screen_cols))
return;
+ /* Treat UTF-8 sequences if we are not in a special escape sequence */
+ if(esc_len <= 0) {
+ unsigned char *chp;
+ char tmp;
+ tmp = (char)ch;
+ if ((chp = pine_check_utf8(&tmp, utf_seq, sizeof(utf_seq))) == NULL) {
+ seq = 1; /* flag that we are in a open UTF-8 sequence */
+ return; /* UTF-8 sequence not complete, need next char */
+ }
+ if (chp != (unsigned char*)&tmp) {
+ seq = 0; /* flag that we are not in a open UTF-8 sequence */
+ _col++;
+ if (*chp == ' ') {
+ if(++_col > ps_global->ttyo->screen_cols) {
+ printf("\342\200\246"); /* UTF-8 points... */
+ goto wrap;
+ }
+ chp++;
+ }
+ while(*chp)
+ putchar(*chp++);
+ return;
+ }
+ if (seq) { /* incomplete UTF-8 sequence */
+ seq = 0; /* flag that we are not in a open UTF-8 sequence */
+ putchar('?'); /* print question mark at place of sequence */
+ }
+ }
+
if(ch == LINE_FEED || ch == RETURN || ch == BACKSPACE || ch == BELL
|| ch == TAB || ch == ESCAPE){
switch(ch){
@@ -836,7 +874,9 @@
like case 1. A little expensive but worth it to avoid problems
with terminals configured so they don't match termcap
*/
- if(_col == ps_global->ttyo->screen_cols) {
+ if(_col >= ps_global->ttyo->screen_cols) {
+wrap:
+ dprint(9, (debugfile, "%d,%02d, wrap(%x)\n",_line,_col,ch));
_col = 0;
if(_line + 1 < ps_global->ttyo->screen_rows)
_line++;
--- pine4.64/pine/reply.c
+++ pine4.64/pine/reply.c
@@ -1566,18 +1566,32 @@
&& (decoded[0] == 'R' || decoded[0] == 'r')
&& (decoded[1] == 'E' || decoded[1] == 'e')){
- if(decoded[2] == ':')
- sprintf(buf, "%.*s", buflen-1, subject);
+ if(decoded[2] == ':'){
+ strncpy(buf, subject, l);
+ buf[l]='\0';
+ }
else if((decoded[2] == '[') && (p = strchr(decoded, ']'))){
p++;
while(*p && isspace((unsigned char)*p)) p++;
- if(p[0] == ':')
- sprintf(buf, "%.*s", buflen-1, subject);
+ if(p[0] == ':'){
+ strncpy(buf, subject, l);
+ buf[l]='\0';
+ }
}
}
- if(!buf[0])
- sprintf(buf, "Re: %.*s", buflen-1,
- (subject && *subject) ? subject : "your mail");
+ if(!buf[0]) {
+ /*
+ * Used to be
+ * sprintf(buf, "Re: %.200s", (subject && *subject) ? subject :
+ * "your mail");
+ * Some implementations of sprintf() are locale-dependent and
+ * don't pass through an invalid sequence of bytes blindly.
+ * Use strncpy() instead:
+ */
+ strcpy(buf,"Re: ");
+ strncpy(buf+4, (subject && *subject) ? subject : "your mail", l);
+ buf[l+4]='\0';
+ }
fs_give((void **) &tmp);
return(buf);
--- pine4.64/pine/strings.c
+++ pine4.64/pine/strings.c
@@ -682,6 +682,151 @@
(*d)++;
}
+/* ------------------------- UTF-8 functions -------------------------- */
+
+char *
+pine_check_utf8(c, utf_seq, sizeof_utf_seq)
+ char *c;
+ char *utf_seq;
+ size_t sizeof_utf_seq;
+{
+ if(!ps_global->VAR_CHAR_SET
+ || strucmp(ps_global->VAR_CHAR_SET, "UTF-8"))
+ return c;
+ return check_utf8(c, utf_seq, sizeof_utf_seq);
+}
+
+/*
+ * Like istrncpy but since it's used in the mail index, it also converts
+ * line feed and tab to space to prevent odd effects in mail index paint.
+ *
+ * If charset is UTF-8, do not count bytes for the string width but real
+ * screen widths. The control char and escape sequence filter is also not
+ * active inside UTF-8 sequencies there because UTF-8 requires bytes in
+ * the range from 0x80 to 0x9f to be processed. If a series of not recognized
+ * characters in the range of 0x80 to 0xff is encountered, '?' is copied.
+ */
+void
+charset_istrncpy(dest, source, width, padding)
+ char *dest;
+ char *source; /* const */
+ int width;
+ int padding;
+{
+ char *cp, *chp, *destp = dest;
+ int seq = 0, screencols=0;
+ unsigned char utf_seq[10] = "";
+
+ for(cp = source; *cp && screencols < width; cp++){
+ if((chp = pine_check_utf8(cp, utf_seq, sizeof(utf_seq))) == NULL){
+ seq = 1;
+ continue;
+ }
+ if(chp != cp){
+ seq = 0;
+ screencols++;
+ if(*chp == ' '){
+ if(screencols >= width){
+ sstrcpy(&destp, "\342\200\246");
+ break; /* UTF-8 points... */
+ }
+ screencols++;
+ chp++;
+ }
+ while(*chp)
+ *destp++ = *chp++;
+ *destp = '\0';
+ continue;
+ }
+ if(seq){
+ seq = 0;
+ screencols++;
+ *destp++ = '?';
+ }
+ screencols++;
+ if(*cp && FILTER_THIS(*cp)
+ && !(*(cp+1) && *cp == ESCAPE && match_escapes(cp+1))){
+ *destp++ = '^';
+ if(screencols < width){
+ screencols++;
+ *destp++ = (*cp & 0x7f) + '@';
+ }
+ }
+ else if(*cp == '\n' || *cp == '\t')
+ *destp++ = ' ';
+ else
+ *destp++ = *cp;
+ *destp = '\0';
+ }
+ if(padding == 1)
+ while(screencols < width){
+ screencols++;
+ *destp++ = ' ';
+ }
+ *destp = '\0';
+}
+
+/*
+ * Like istrncpy but do not remove UTF-8 sequencies.
+ *
+ * The control char and escape sequence filter is also not active inside
+ * UTF-8 sequencies because UTF-8 requires bytes in the range from 0x80
+ * to 0x9f to be processed. If a series of not recognized characters in
+ * the range of 0x80 to 0xff is encountered, '?' is copied.
+ */
+static char *
+utf8_istrncpy(dest, cp, length)
+ char *dest;
+ char *cp; /* const */
+ int length;
+{
+ char *chp, *destp = dest;
+ int seq = 0;
+ unsigned char utf_seq[7] = "";
+
+ *destp = '\0';
+ for(; length > 0 && *cp; cp++){
+ if((chp = check_utf8(cp, utf_seq, sizeof(utf_seq))) == NULL) {
+ seq = 1;
+ continue;
+ }
+ if(chp != cp){
+ seq = 0;
+ if(*chp == ' ')
+ chp++;
+ if(strlen(chp) < length){
+ while(*chp && length--)
+ *destp++ = *chp++;
+ *destp = '\0';
+ continue;
+ }
+ while(length--)
+ *destp++ = '.';
+ *destp = '\0';
+ break;
+ }
+ if(seq){
+ *destp++ = '?';
+ length--;
+ seq = 0;
+ }
+ if(*cp && FILTER_THIS(*cp)
+ && !(*(cp+1) && *cp == ESCAPE && match_escapes(cp+1))){
+ if(length-- > 0){
+ *destp++ = '^';
+
+ if(length-- > 0)
+ *destp++ = (*cp & 0x7f) + '@';
+ }
+ }
+ else if(length-- > 0)
+ *destp++ = *cp;
+ *destp = '\0';
+ }
+
+ return dest;
+}
+
/*----------------------------------------------------------------------
copy at most n chars of the source string onto the destination string
@@ -720,6 +865,10 @@
/* src is either original source or the translation string */
s = src;
+ if(!ps_global->pass_ctrl_chars && ps_global->VAR_CHAR_SET
+ && !strucmp(ps_global->VAR_CHAR_SET, "UTF-8"))
+ return utf8_istrncpy(d, s, n);
+
/* copy while escaping evil chars */
do
if(*s && FILTER_THIS(*s)){