X-Git-Url: http://git.marmaro.de/?p=mmh;a=blobdiff_plain;f=sbr%2Ffmt_rfc2047.c;h=92e168baca8f21afe00d0fb694570d63301ac3fe;hp=4d3fc296d0e90d987d5002035446f30884dd0e5e;hb=a485ed478abbd599d8c9aab48934e7a26733ecb1;hpb=f480c03187724e54e5391ee61b810827da319a6c diff --git a/sbr/fmt_rfc2047.c b/sbr/fmt_rfc2047.c index 4d3fc29..92e168b 100644 --- a/sbr/fmt_rfc2047.c +++ b/sbr/fmt_rfc2047.c @@ -1,6 +1,5 @@ - /* - * fmt_rfc2047.c -- decode RFC-2047 header format + * fmt_rfc2047.c -- decode RFC-2047 header format * * This code is Copyright (c) 2002, by the authors of nmh. See the * COPYRIGHT file in the root directory of the nmh distribution for @@ -15,25 +14,25 @@ #endif static signed char hexindex[] = { - -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, - -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, - -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1, - -1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1, - -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, - -1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1, - -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1, + -1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 }; static signed char index_64[128] = { - -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, - -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, - -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63, - 52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1,-1,-1,-1, - -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14, - 15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1, - -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40, - 41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1 + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63, + 52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1,-1,-1,-1, + -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14, + 15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1, + -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40, + 41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1 }; #define char64(c) (((unsigned char) (c) > 127) ? -1 : index_64[(unsigned char) (c)]) @@ -41,9 +40,9 @@ static signed char index_64[128] = { static int unqp (unsigned char byte1, unsigned char byte2) { - if (hexindex[byte1] == -1 || hexindex[byte2] == -1) - return -1; - return (hexindex[byte1] << 4 | hexindex[byte2]); + if (hexindex[byte1] == -1 || hexindex[byte2] == -1) + return -1; + return (hexindex[byte1] << 4 | hexindex[byte2]); } /* Check if character is linear whitespace */ @@ -60,294 +59,296 @@ unqp (unsigned char byte1, unsigned char byte2) int decode_rfc2047 (char *str, char *dst, size_t dstlen) { - char *p, *q, *pp; - char *startofmime, *endofmime; - int c, quoted_printable; - int encoding_found = 0; /* did we decode anything? */ - int between_encodings = 0; /* are we between two encodings? */ - int equals_pending = 0; /* is there a '=' pending? */ - int whitespace = 0; /* how much whitespace between encodings? */ + char *p, *q, *pp; + char *startofmime, *endofmime; + int c, quoted_printable; + int encoding_found = 0; /* did we decode anything? */ + int between_encodings = 0; /* are we between two encodings? */ + int equals_pending = 0; /* is there a '=' pending? */ + int whitespace = 0; /* how much whitespace between encodings? */ #ifdef HAVE_ICONV - int use_iconv = 0; /* are we converting encoding with iconv? */ - iconv_t cd = NULL; - int fromutf8 = 0; - char *saveq, *convbuf = NULL; - size_t savedstlen; + int use_iconv = 0; /* are we converting encoding with iconv? */ + iconv_t cd = NULL; + int fromutf8 = 0; + char *saveq, *convbuf = NULL; + size_t savedstlen; #endif - if (!str) - return 0; + if (!str) + return 0; - /* - * Do a quick and dirty check for the '=' character. - * This should quickly eliminate many cases. - */ - if (!strchr (str, '=')) - return 0; + /* + * Do a quick and dirty check for the '=' character. + * This should quickly eliminate many cases. + */ + if (!strchr (str, '=')) + return 0; - for (p = str, q = dst; *p; p++) { + for (p = str, q = dst; *p; p++) { - /* reset iconv */ + /* reset iconv */ #ifdef HAVE_ICONV - if (use_iconv) { - iconv_close(cd); - use_iconv = 0; - } + if (use_iconv) { + iconv_close(cd); + use_iconv = 0; + } #endif - /* - * If we had an '=' character pending from - * last iteration, then add it first. - */ - if (equals_pending) { - ADDCHR('='); - equals_pending = 0; - between_encodings = 0; /* we have added non-whitespace text */ - } + /* + * If we had an '=' character pending from + * last iteration, then add it first. + */ + if (equals_pending) { + ADDCHR('='); + equals_pending = 0; + between_encodings = 0; /* we have added non-whitespace text */ + } - if (*p != '=') { - /* count linear whitespace while between encodings */ - if (between_encodings && is_lws(*p)) - whitespace++; - else - between_encodings = 0; /* we have added non-whitespace text */ - ADDCHR(*p); - continue; - } + if (*p != '=') { + /* count linear whitespace while between encodings */ + if (between_encodings && is_lws(*p)) + whitespace++; + else + between_encodings = 0; /* we have added non-whitespace text */ + ADDCHR(*p); + continue; + } - equals_pending = 1; /* we have a '=' pending */ + equals_pending = 1; /* we have a '=' pending */ - /* Check for initial =? */ - if (*p == '=' && p[1] && p[1] == '?' && p[2]) { - startofmime = p + 2; + /* Check for initial =? */ + if (*p == '=' && p[1] && p[1] == '?' && p[2]) { + startofmime = p + 2; - /* Scan ahead for the next '?' character */ - for (pp = startofmime; *pp && *pp != '?'; pp++) - ; + /* Scan ahead for the next '?' character */ + for (pp = startofmime; *pp && *pp != '?'; pp++) + ; - if (!*pp) - continue; + if (!*pp) + continue; - /* Check if character set can be handled natively */ - if (!check_charset(startofmime, pp - startofmime)) { + /* Check if character set can be handled natively */ + if (!check_charset(startofmime, pp - startofmime)) { #ifdef HAVE_ICONV - /* .. it can't. We'll use iconv then. */ - *pp = '\0'; - cd = iconv_open(get_charset(), startofmime); - fromutf8 = !mh_strcasecmp(startofmime, "UTF-8"); - *pp = '?'; - if (cd == (iconv_t)-1) continue; - use_iconv = 1; + /* .. it can't. We'll use iconv then. */ + *pp = '\0'; + cd = iconv_open(get_charset(), startofmime); + fromutf8 = !mh_strcasecmp(startofmime, "UTF-8"); + *pp = '?'; + if (cd == (iconv_t)-1) continue; + use_iconv = 1; #else - continue; + continue; #endif - } - - startofmime = pp + 1; - - /* Check for valid encoding type */ - if (*startofmime != 'B' && *startofmime != 'b' && - *startofmime != 'Q' && *startofmime != 'q') - continue; - - /* Is encoding quoted printable or base64? */ - quoted_printable = (*startofmime == 'Q' || *startofmime == 'q'); - startofmime++; - - /* Check for next '?' character */ - if (*startofmime != '?') - continue; - startofmime++; - - /* - * Scan ahead for the ending ?= - * - * While doing this, we will also check if encoded - * word has any embedded linear whitespace. - */ - endofmime = NULL; - for (pp = startofmime; *pp && *(pp+1); pp++) { - if (is_lws(*pp)) { - break; - } else if (*pp == '?' && pp[1] == '=') { - endofmime = pp; - break; - } - } - if (is_lws(*pp) || endofmime == NULL) - continue; - - /* - * We've found an encoded word, so we can drop - * the '=' that was pending - */ - equals_pending = 0; - - /* - * If we are between two encoded words separated only by - * linear whitespace, then we ignore the whitespace. - * We will roll back the buffer the number of whitespace - * characters we've seen since last encoded word. - */ - if (between_encodings) { - q -= whitespace; - dstlen += whitespace; - } + } + + startofmime = pp + 1; + + /* Check for valid encoding type */ + if (*startofmime != 'B' && *startofmime != 'b' && + *startofmime != 'Q' && *startofmime != 'q') + continue; + + /* Is encoding quoted printable or base64? */ + quoted_printable = (*startofmime == 'Q' || *startofmime == 'q'); + startofmime++; + + /* Check for next '?' character */ + if (*startofmime != '?') + continue; + startofmime++; + + /* + * Scan ahead for the ending ?= + * + * While doing this, we will also check if encoded + * word has any embedded linear whitespace. + */ + endofmime = NULL; + for (pp = startofmime; *pp && *(pp+1); pp++) { + if (is_lws(*pp)) { + break; + } else if (*pp == '?' && pp[1] == '=') { + endofmime = pp; + break; + } + } + if (is_lws(*pp) || endofmime == NULL) + continue; + + /* + * We've found an encoded word, so we can drop + * the '=' that was pending + */ + equals_pending = 0; + + /* + * If we are between two encoded words separated only by + * linear whitespace, then we ignore the whitespace. + * We will roll back the buffer the number of whitespace + * characters we've seen since last encoded word. + */ + if (between_encodings) { + q -= whitespace; + dstlen += whitespace; + } #ifdef HAVE_ICONV - /* - * empty encoded text. This ensures that we don't - * malloc 0 bytes but skip on to the end - */ - if (endofmime == startofmime && use_iconv) { - use_iconv = 0; - iconv_close(cd); - } - - if (use_iconv) { - saveq = q; - savedstlen = dstlen; - q = convbuf = (char *) mh_xmalloc(endofmime - startofmime); - } + /* + * empty encoded text. This ensures that we don't + * malloc 0 bytes but skip on to the end + */ + if (endofmime == startofmime && use_iconv) { + use_iconv = 0; + iconv_close(cd); + } + + if (use_iconv) { + saveq = q; + savedstlen = dstlen; + q = convbuf = (char *) mh_xmalloc(endofmime - startofmime); + } /* ADDCHR2 is for adding characters when q is or might be convbuf: * in this case on buffer-full we want to run iconv before returning. * I apologise for the dreadful name. */ -#define ADDCHR2(C) do { *q++ = (C); dstlen--; if (!dstlen) goto iconvbuffull; } while (0) +# define ADDCHR2(C) do { *q++ = (C); dstlen--; if (!dstlen) goto iconvbuffull; } while (0) #else -#define ADDCHR2(C) ADDCHR(C) +# define ADDCHR2(C) ADDCHR(C) #endif - /* Now decode the text */ - if (quoted_printable) { - for (pp = startofmime; pp < endofmime; pp++) { - if (*pp == '=') { - c = unqp (pp[1], pp[2]); - if (c == -1) - continue; - if (c != 0) - *q++ = c; - pp += 2; - } else if (*pp == '_') { - ADDCHR2(' '); - } else { - ADDCHR2(*pp); - } - } - } else { - /* base64 */ - int c1, c2, c3, c4; - c1 = c2 = c3 = c4 = -1; - - pp = startofmime; - while (pp < endofmime) { - /* 6 + 2 bits */ - while ((pp < endofmime) && - ((c1 = char64(*pp)) == -1)) { - pp++; - } - if (pp < endofmime) { - pp++; - } - while ((pp < endofmime) && - ((c2 = char64(*pp)) == -1)) { - pp++; - } - if (pp < endofmime && c1 != -1 && c2 != -1) { - ADDCHR2((c1 << 2) | (c2 >> 4)); - pp++; - } - /* 4 + 4 bits */ - while ((pp < endofmime) && - ((c3 = char64(*pp)) == -1)) { - pp++; - } - if (pp < endofmime && c2 != -1 && c3 != -1) { - ADDCHR2(((c2 & 0xF) << 4) | (c3 >> 2)); - pp++; - } - /* 2 + 6 bits */ - while ((pp < endofmime) && - ((c4 = char64(*pp)) == -1)) { - pp++; - } - if (pp < endofmime && c3 != -1 && c4 != -1) { - ADDCHR2(((c3 & 0x3) << 6) | (c4)); - pp++; - } - } - } + /* Now decode the text */ + if (quoted_printable) { + for (pp = startofmime; pp < endofmime; pp++) { + if (*pp == '=') { + c = unqp (pp[1], pp[2]); + if (c == -1) + continue; + if (c != 0) + *q++ = c; + pp += 2; + } else if (*pp == '_') { + ADDCHR2(' '); + } else { + ADDCHR2(*pp); + } + } + } else { + /* base64 */ + int c1, c2, c3, c4; + c1 = c2 = c3 = c4 = -1; + + pp = startofmime; + while (pp < endofmime) { + /* 6 + 2 bits */ + while ((pp < endofmime) && + ((c1 = char64(*pp)) == -1)) { + pp++; + } + if (pp < endofmime) { + pp++; + } + while ((pp < endofmime) && + ((c2 = char64(*pp)) == -1)) { + pp++; + } + if (pp < endofmime && c1 != -1 && c2 != -1) { + ADDCHR2((c1 << 2) | (c2 >> 4)); + pp++; + } + /* 4 + 4 bits */ + while ((pp < endofmime) && + ((c3 = char64(*pp)) == -1)) { + pp++; + } + if (pp < endofmime && c2 != -1 && c3 != -1) { + ADDCHR2(((c2 & 0xF) << 4) | (c3 >> 2)); + pp++; + } + /* 2 + 6 bits */ + while ((pp < endofmime) && + ((c4 = char64(*pp)) == -1)) { + pp++; + } + if (pp < endofmime && c3 != -1 && c4 != -1) { + ADDCHR2(((c3 & 0x3) << 6) | (c4)); + pp++; + } + } + } #ifdef HAVE_ICONV - iconvbuffull: - /* NB that the string at convbuf is not necessarily NUL terminated here: - * q points to the first byte after the valid part. - */ - /* Convert to native character set */ - if (use_iconv) { - size_t inbytes = q - convbuf; - ICONV_CONST char *start = convbuf; - - while (inbytes) { - if (iconv(cd, &start, &inbytes, &saveq, &savedstlen) == - (size_t)-1) { - if (errno != EILSEQ) break; - /* character couldn't be converted. we output a `?' - * and try to carry on which won't work if - * either encoding was stateful */ - iconv (cd, 0, 0, &saveq, &savedstlen); - if (!savedstlen) - break; - *saveq++ = '?'; - savedstlen--; - if (!savedstlen) - break; - /* skip to next input character */ - if (fromutf8) { - for (start++;(start < q) && ((*start & 192) == 128);start++) - inbytes--; - } else - start++, inbytes--; - if (start >= q) - break; - } - } - q = saveq; - /* Stop now if (1) we hit the end of the buffer trying to do - * MIME decoding and have just iconv-converted a partial string - * or (2) our iconv-conversion hit the end of the buffer. - */ - if (!dstlen || !savedstlen) - goto buffull; - dstlen = savedstlen; - free(convbuf); - } + iconvbuffull: + /* NB that the string at convbuf is not necessarily + * NUL terminated here: + * q points to the first byte after the valid part. + */ + /* Convert to native character set */ + if (use_iconv) { + size_t inbytes = q - convbuf; + ICONV_CONST char *start = convbuf; + + while (inbytes) { + if (iconv(cd, &start, &inbytes, &saveq, &savedstlen) == + (size_t)-1) { + if (errno != EILSEQ) + break; + /* character couldn't be converted. we output a `?' + * and try to carry on which won't work if + * either encoding was stateful */ + iconv (cd, 0, 0, &saveq, &savedstlen); + if (!savedstlen) + break; + *saveq++ = '?'; + savedstlen--; + if (!savedstlen) + break; + /* skip to next input character */ + if (fromutf8) { + for (start++;(start < q) && ((*start & 192) == 128);start++) + inbytes--; + } else + start++, inbytes--; + if (start >= q) + break; + } + } + q = saveq; + /* Stop now if (1) we hit the end of the buffer trying to do + * MIME decoding and have just iconv-converted a partial string + * or (2) our iconv-conversion hit the end of the buffer. + */ + if (!dstlen || !savedstlen) + goto buffull; + dstlen = savedstlen; + free(convbuf); + } #endif - - /* - * Now that we are done decoding this particular - * encoded word, advance string to trailing '='. - */ - p = endofmime + 1; - - encoding_found = 1; /* we found (at least 1) encoded word */ - between_encodings = 1; /* we have just decoded something */ - whitespace = 0; /* re-initialize amount of whitespace */ + + /* + * Now that we are done decoding this particular + * encoded word, advance string to trailing '='. + */ + p = endofmime + 1; + + encoding_found = 1; /* we found (at least 1) encoded word */ + between_encodings = 1; /* we have just decoded something */ + whitespace = 0; /* re-initialize amount of whitespace */ + } } - } #ifdef HAVE_ICONV - if (use_iconv) iconv_close(cd); + if (use_iconv) iconv_close(cd); #endif - /* If an equals was pending at end of string, add it now. */ - if (equals_pending) - ADDCHR('='); - *q = '\0'; + /* If an equals was pending at end of string, add it now. */ + if (equals_pending) + ADDCHR('='); + *q = '\0'; - return encoding_found; + return encoding_found; buffull: - /* q is currently just off the end of the buffer, so rewind to NUL terminate */ - q--; - *q = '\0'; - return encoding_found; + /* q is currently just off the end of the buffer, so rewind to NUL terminate */ + q--; + *q = '\0'; + return encoding_found; }