X-Git-Url: http://git.marmaro.de/?p=mmh;a=blobdiff_plain;f=sbr%2Ffmt_rfc2047.c;h=9f5b26d092c87bfc115cb1c805a53e1536dc6888;hp=b4bf4eb893f194252c8753513d7a856d39f39fb6;hb=6630f05d0a6d631c9ed2edfef9951df892287794;hpb=6c42153ad9362cc676ea66563bf400d7511b3b68 diff --git a/sbr/fmt_rfc2047.c b/sbr/fmt_rfc2047.c index b4bf4eb..9f5b26d 100644 --- a/sbr/fmt_rfc2047.c +++ b/sbr/fmt_rfc2047.c @@ -10,6 +10,10 @@ */ #include +#ifdef HAVE_ICONV +# include +# include +#endif static signed char hexindex[] = { -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, @@ -51,8 +55,11 @@ unqp (unsigned char byte1, unsigned char byte2) * Decode the string as a RFC-2047 header field */ +/* Add character to the destination buffer, and bomb out if it fills up */ +#define ADDCHR(C) do { *q++ = (C); dstlen--; if (!dstlen) goto buffull; } while (0) + int -decode_rfc2047 (char *str, char *dst) +decode_rfc2047 (char *str, char *dst, size_t dstlen) { char *p, *q, *pp; char *startofmime, *endofmime; @@ -61,6 +68,13 @@ decode_rfc2047 (char *str, char *dst) int between_encodings = 0; /* are we between two encodings? */ int equals_pending = 0; /* is there a '=' pending? */ int whitespace = 0; /* how much whitespace between encodings? */ +#ifdef HAVE_ICONV + int use_iconv = 0; /* are we converting encoding with iconv? */ + iconv_t cd; + int fromutf8 = 0; + char *saveq, *convbuf = NULL; + size_t savedstlen; +#endif if (!str) return 0; @@ -73,12 +87,20 @@ decode_rfc2047 (char *str, char *dst) return 0; for (p = str, q = dst; *p; p++) { + + /* reset iconv */ +#ifdef HAVE_ICONV + if (use_iconv) { + iconv_close(cd); + use_iconv = 0; + } +#endif /* * If we had an '=' character pending from * last iteration, then add it first. */ if (equals_pending) { - *q++ = '='; + ADDCHR('='); equals_pending = 0; between_encodings = 0; /* we have added non-whitespace text */ } @@ -89,7 +111,7 @@ decode_rfc2047 (char *str, char *dst) whitespace++; else between_encodings = 0; /* we have added non-whitespace text */ - *q++ = *p; + ADDCHR(*p); continue; } @@ -106,9 +128,20 @@ decode_rfc2047 (char *str, char *dst) if (!*pp) continue; - /* Check if character set is OK */ - if (!check_charset(startofmime, pp - startofmime)) + /* Check if character set can be handled natively */ + if (!check_charset(startofmime, pp - startofmime)) { +#ifdef HAVE_ICONV + /* .. it can't. We'll use iconv then. */ + *pp = '\0'; + cd = iconv_open(get_charset(), startofmime); + fromutf8 = !strcasecmp(startofmime, "UTF-8"); + *pp = '?'; + if (cd == (iconv_t)-1) continue; + use_iconv = 1; +#else continue; +#endif + } startofmime = pp + 1; @@ -156,8 +189,26 @@ decode_rfc2047 (char *str, char *dst) * We will roll back the buffer the number of whitespace * characters we've seen since last encoded word. */ - if (between_encodings) + if (between_encodings) { q -= whitespace; + dstlen += whitespace; + } + +#ifdef HAVE_ICONV + if (use_iconv) { + saveq = q; + savedstlen = dstlen; + if (!(q = convbuf = (char *)malloc(endofmime - startofmime))) + continue; + } +/* ADDCHR2 is for adding characters when q is or might be convbuf: + * in this case on buffer-full we want to run iconv before returning. + * I apologise for the dreadful name. + */ +#define ADDCHR2(C) do { *q++ = (C); dstlen--; if (!dstlen) goto iconvbuffull; } while (0) +#else +#define ADDCHR2(C) ADDCHR(C) +#endif /* Now decode the text */ if (quoted_printable) { @@ -170,9 +221,9 @@ decode_rfc2047 (char *str, char *dst) *q++ = c; pp += 2; } else if (*pp == '_') { - *q++ = ' '; + ADDCHR2(' '); } else { - *q++ = *pp; + ADDCHR2(*pp); } } } else { @@ -194,7 +245,7 @@ decode_rfc2047 (char *str, char *dst) pp++; } if (pp < endofmime && c1 != -1 && c2 != -1) { - *q++ = (c1 << 2) | (c2 >> 4); + ADDCHR2((c1 << 2) | (c2 >> 4)); pp++; } /* 4 + 4 bits */ @@ -203,7 +254,7 @@ decode_rfc2047 (char *str, char *dst) pp++; } if (pp < endofmime && c2 != -1 && c3 != -1) { - *q++ = ((c2 & 0xF) << 4) | (c3 >> 2); + ADDCHR2(((c2 & 0xF) << 4) | (c3 >> 2)); pp++; } /* 2 + 6 bits */ @@ -212,12 +263,58 @@ decode_rfc2047 (char *str, char *dst) pp++; } if (pp < endofmime && c3 != -1 && c4 != -1) { - *q++ = ((c3 & 0x3) << 6) | (c4); + ADDCHR2(((c3 & 0x3) << 6) | (c4)); pp++; } } } +#ifdef HAVE_ICONV + iconvbuffull: + /* NB that the string at convbuf is not necessarily NUL terminated here: + * q points to the first byte after the valid part. + */ + /* Convert to native character set */ + if (use_iconv) { + size_t inbytes = q - convbuf; + ICONV_CONST char *start = convbuf; + + while (inbytes) { + if (iconv(cd, &start, &inbytes, &saveq, &savedstlen) == + (size_t)-1) { + if (errno != EILSEQ) break; + /* character couldn't be converted. we output a `?' + * and try to carry on which won't work if + * either encoding was stateful */ + iconv (cd, 0, 0, &saveq, &savedstlen); + if (!savedstlen) + break; + *saveq++ = '?'; + savedstlen--; + if (!savedstlen) + break; + /* skip to next input character */ + if (fromutf8) { + for (start++;(start < q) && ((*start & 192) == 128);start++) + inbytes--; + } else + start++, inbytes--; + if (start >= q) + break; + } + } + q = saveq; + /* Stop now if (1) we hit the end of the buffer trying to do + * MIME decoding and have just iconv-converted a partial string + * or (2) our iconv-conversion hit the end of the buffer. + */ + if (!dstlen || !savedstlen) + goto buffull; + dstlen = savedstlen; + free(convbuf); + } +#endif + /* * Now that we are done decoding this particular * encoded word, advance string to trailing '='. @@ -229,11 +326,20 @@ decode_rfc2047 (char *str, char *dst) whitespace = 0; /* re-initialize amount of whitespace */ } } +#ifdef HAVE_ICONV + if (use_iconv) iconv_close(cd); +#endif /* If an equals was pending at end of string, add it now. */ if (equals_pending) - *q++ = '='; + ADDCHR('='); *q = '\0'; return encoding_found; + + buffull: + /* q is currently just off the end of the buffer, so rewind to NUL terminate */ + q--; + *q = '\0'; + return encoding_found; }