Replace mh_xmalloc() with mh_xcalloc()
[mmh] / sbr / fmt_rfc2047.c
index 87fddb9..e9db55e 100644 (file)
@@ -1,13 +1,10 @@
-
 /*
- * fmt_rfc2047.c -- decode RFC-2047 header format 
- *
- * $Id$
- *
- * This code is Copyright (c) 2002, by the authors of nmh.  See the
- * COPYRIGHT file in the root directory of the nmh distribution for
- * complete copyright information.
- */
+** fmt_rfc2047.c -- decode RFC-2047 header format
+**
+** This code is Copyright (c) 2002, by the authors of nmh.  See the
+** COPYRIGHT file in the root directory of the nmh distribution for
+** complete copyright information.
+*/
 
 #include <h/mh.h>
 #include <h/utils.h>
 #endif
 
 static signed char hexindex[] = {
-    -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
-    -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
-    -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
-     0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1,
-    -1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1,
-    -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
-    -1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1,
-    -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
+       -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+       -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+       -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+        0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1,
+       -1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+       -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+       -1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+       -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
 };
 
 static signed char index_64[128] = {
-    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
-    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
-    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
-    52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1,-1,-1,-1,
-    -1, 0, 1, 2,  3, 4, 5, 6,  7, 8, 9,10, 11,12,13,14,
-    15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
-    -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
-    41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
+       -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+       -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+       -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
+       52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1,-1,-1,-1,
+       -1, 0, 1, 2,  3, 4, 5, 6,  7, 8, 9,10, 11,12,13,14,
+       15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
+       -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
+       41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
 };
 
 #define char64(c) (((unsigned char) (c) > 127) ? -1 : index_64[(unsigned char) (c)])
 
 static int
-unqp (unsigned char byte1, unsigned char byte2)
+unqp(unsigned char byte1, unsigned char byte2)
 {
-    if (hexindex[byte1] == -1 || hexindex[byte2] == -1)
-       return -1;
-    return (hexindex[byte1] << 4 | hexindex[byte2]);
+       if (hexindex[byte1] == -1 || hexindex[byte2] == -1)
+               return -1;
+       return (hexindex[byte1] << 4 | hexindex[byte2]);
 }
 
 /* Check if character is linear whitespace */
@@ -53,302 +50,325 @@ unqp (unsigned char byte1, unsigned char byte2)
 
 
 /*
- * Decode the string as a RFC-2047 header field
- */
+** Decode the string as a RFC-2047 header field
+*/
 
 /* Add character to the destination buffer, and bomb out if it fills up */
 #define ADDCHR(C) do { *q++ = (C); dstlen--; if (!dstlen) goto buffull; } while (0)
 
 int
-decode_rfc2047 (char *str, char *dst, size_t dstlen)
+decode_rfc2047(char *str, char *dst, size_t dstlen)
 {
-    char *p, *q, *pp;
-    char *startofmime, *endofmime;
-    int c, quoted_printable;
-    int encoding_found = 0;    /* did we decode anything?                */
-    int between_encodings = 0; /* are we between two encodings?          */
-    int equals_pending = 0;    /* is there a '=' pending?                */
-    int whitespace = 0;                /* how much whitespace between encodings? */
+       char *p, *q, *pp;
+       char *startofmime, *endofmime;
+       int c, quoted_printable;
+       int encoding_found = 0;  /* did we decode anything? */
+       int between_encodings = 0;  /* are we between two encodings? */
+       int equals_pending = 0;  /* is there a '=' pending?  */
+       int whitespace = 0;  /* how much whitespace between encodings? */
 #ifdef HAVE_ICONV
-    int use_iconv = 0;          /* are we converting encoding with iconv? */
-    iconv_t cd = NULL;
-    int fromutf8 = 0;
-    char *saveq, *convbuf = NULL;
-    size_t savedstlen;
+       int use_iconv = 0;  /* are we converting encoding with iconv? */
+       iconv_t cd = NULL;
+       int fromutf8 = 0;
+       char *saveq, *convbuf = NULL;
+       size_t savedstlen;
 #endif
 
-    if (!str)
-       return 0;
+       if (!str)
+               return 0;
 
-    /*
-     * Do a quick and dirty check for the '=' character.
-     * This should quickly eliminate many cases.
-     */
-    if (!strchr (str, '='))
-       return 0;
+       /*
+       ** Do a quick and dirty check for the '=' character.
+       ** This should quickly eliminate many cases.
+       */
+       if (!strchr(str, '='))
+               return 0;
 
-    for (p = str, q = dst; *p; p++) {
+       for (p = str, q = dst; *p; p++) {
 
-        /* reset iconv */
+               /* reset iconv */
 #ifdef HAVE_ICONV
-        if (use_iconv) {
-           iconv_close(cd);
-           use_iconv = 0;
-        }
+               if (use_iconv) {
+                       iconv_close(cd);
+                       use_iconv = 0;
+               }
 #endif
-       /*
-        * If we had an '=' character pending from
-        * last iteration, then add it first.
-        */
-       if (equals_pending) {
-           ADDCHR('=');
-           equals_pending = 0;
-           between_encodings = 0;      /* we have added non-whitespace text */
-       }
+               /*
+               ** If we had an '=' character pending from
+               ** last iteration, then add it first.
+               */
+               if (equals_pending) {
+                       ADDCHR('=');
+                       equals_pending = 0;
+                       between_encodings = 0;  /* we have added non-whitespace text */
+               }
 
-       if (*p != '=') {
-           /* count linear whitespace while between encodings */
-           if (between_encodings && is_lws(*p))
-               whitespace++;
-           else
-               between_encodings = 0;  /* we have added non-whitespace text */
-           ADDCHR(*p);
-           continue;
-       }
+               if (*p != '=') {
+                       /* count linear whitespace while between encodings */
+                       if (between_encodings && is_lws(*p))
+                               whitespace++;
+                       else
+                               between_encodings = 0;  /* we have added non-whitespace text */
+                       ADDCHR(*p);
+                       continue;
+               }
 
-       equals_pending = 1;     /* we have a '=' pending */
+               equals_pending = 1;  /* we have a '=' pending */
 
-       /* Check for initial =? */
-       if (*p == '=' && p[1] && p[1] == '?' && p[2]) {
-           startofmime = p + 2;
+               /* Check for initial =? */
+               if (*p == '=' && p[1] && p[1] == '?' && p[2]) {
+                       startofmime = p + 2;
 
-           /* Scan ahead for the next '?' character */
-           for (pp = startofmime; *pp && *pp != '?'; pp++)
-               ;
+                       /* Scan ahead for the next '?' character */
+                       for (pp = startofmime; *pp && *pp != '?'; pp++)
+                               ;
 
-           if (!*pp)
-               continue;
+                       if (!*pp)
+                               continue;
+                       *pp = '\0';
 
-           /* Check if character set can be handled natively */
-           if (!check_charset(startofmime, pp - startofmime)) {
+                       /* Check if character set can be handled natively */
+                       if (!is_native_charset(startofmime)) {
 #ifdef HAVE_ICONV
-               /* .. it can't. We'll use iconv then. */
-               *pp = '\0';
-               cd = iconv_open(get_charset(), startofmime);
-               fromutf8 = !mh_strcasecmp(startofmime, "UTF-8");
-               *pp = '?';
-                if (cd == (iconv_t)-1) continue;
-               use_iconv = 1;
+                               /* .. it can't. We'll use iconv then. */
+                               cd = iconv_open(get_charset(), startofmime);
+                               fromutf8 = !mh_strcasecmp(startofmime, "UTF-8");
+                               *pp = '?';
+                               if (cd == (iconv_t)-1)
+                                       continue;
+                               use_iconv = 1;
 #else
-               continue;
+                               *pp = '?';
+                               continue;
 #endif
-           }
-
-           startofmime = pp + 1;
-
-           /* Check for valid encoding type */
-           if (*startofmime != 'B' && *startofmime != 'b' &&
-               *startofmime != 'Q' && *startofmime != 'q')
-               continue;
-
-           /* Is encoding quoted printable or base64? */
-           quoted_printable = (*startofmime == 'Q' || *startofmime == 'q');
-           startofmime++;
-
-           /* Check for next '?' character */
-           if (*startofmime != '?')
-               continue;
-           startofmime++;
-
-           /*
-            * Scan ahead for the ending ?=
-            *
-            * While doing this, we will also check if encoded
-            * word has any embedded linear whitespace.
-            */
-           endofmime = NULL;
-           for (pp = startofmime; *pp && *(pp+1); pp++) {
-               if (is_lws(*pp)) {
-                   break;
-               } else if (*pp == '?' && pp[1] == '=') {
-                   endofmime = pp;
-                   break;
-               }
-           }
-           if (is_lws(*pp) || endofmime == NULL)
-               continue;
-
-           /*
-            * We've found an encoded word, so we can drop
-            * the '=' that was pending
-            */
-           equals_pending = 0;
-
-           /*
-            * If we are between two encoded words separated only by
-            * linear whitespace, then we ignore the whitespace.
-            * We will roll back the buffer the number of whitespace
-            * characters we've seen since last encoded word.
-            */
-           if (between_encodings) {
-               q -= whitespace;
-               dstlen += whitespace;
-           }
+                       }
+
+                       *pp = '?';
+                       startofmime = pp + 1;
+
+                       /* Check for valid encoding type */
+                       if (*startofmime != 'B' && *startofmime != 'b' &&
+                               *startofmime != 'Q' && *startofmime != 'q')
+                               continue;
+
+                       /* Is encoding quoted printable or base64? */
+                       quoted_printable = (*startofmime == 'Q' || *startofmime == 'q');
+                       startofmime++;
+
+                       /* Check for next '?' character */
+                       if (*startofmime != '?')
+                               continue;
+                       startofmime++;
+
+                       /*
+                       ** Scan ahead for the ending ?=
+                       **
+                       ** While doing this, we will also check if encoded
+                       ** word has any embedded linear whitespace.
+                       */
+                       endofmime = NULL;
+                       for (pp = startofmime; *pp && *(pp+1); pp++) {
+                               if (is_lws(*pp)) {
+                                       break;
+                               } else if (*pp == '?' && pp[1] == '=') {
+                                       endofmime = pp;
+                                       break;
+                               }
+                       }
+                       if (is_lws(*pp) || endofmime == NULL)
+                               continue;
+
+                       /*
+                       ** We've found an encoded word, so we can drop
+                       ** the '=' that was pending
+                       */
+                       equals_pending = 0;
+
+                       /*
+                       ** If we are between two encoded words separated
+                       ** only by linear whitespace, then we ignore
+                       ** the whitespace.  We will roll back the buffer
+                       ** the number of whitespace characters we've seen
+                       ** since last encoded word.
+                       */
+                       if (between_encodings) {
+                               q -= whitespace;
+                               dstlen += whitespace;
+                       }
 
 #ifdef HAVE_ICONV
-           /*
-            * empty encoded text. This ensures that we don't
-            * malloc 0 bytes but skip on to the end
-            */
-           if (endofmime == startofmime && use_iconv) {
-               use_iconv = 0;
-               iconv_close(cd);
-            }
-
-           if (use_iconv) {
-               saveq = q;
-               savedstlen = dstlen;
-                q = convbuf = (char *) mh_xmalloc(endofmime - startofmime);
-            }
-/* ADDCHR2 is for adding characters when q is or might be convbuf:
- * in this case on buffer-full we want to run iconv before returning.
- * I apologise for the dreadful name.
- */
-#define ADDCHR2(C) do { *q++ = (C); dstlen--; if (!dstlen) goto iconvbuffull; } while (0)
+                       /*
+                       ** empty encoded text. This ensures that we don't
+                       ** malloc 0 bytes but skip on to the end
+                       */
+                       if (endofmime == startofmime && use_iconv) {
+                               use_iconv = 0;
+                               iconv_close(cd);
+                       }
+
+                       if (use_iconv) {
+                               saveq = q;
+                               savedstlen = dstlen;
+                               q = convbuf = (char *) mh_xcalloc(endofmime - startofmime, sizeof(char));
+                       }
+/*
+** ADDCHR2 is for adding characters when q is or might be convbuf:
+** in this case on buffer-full we want to run iconv before returning.
+** I apologise for the dreadful name.
+*/
+# define ADDCHR2(C) do { *q++ = (C); dstlen--; if (!dstlen) goto iconvbuffull; } while (0)
 #else
-#define ADDCHR2(C) ADDCHR(C)
+# define ADDCHR2(C) ADDCHR(C)
 #endif
 
-           /* Now decode the text */
-           if (quoted_printable) {
-               for (pp = startofmime; pp < endofmime; pp++) {
-                   if (*pp == '=') {
-                       c = unqp (pp[1], pp[2]);
-                       if (c == -1)
-                           continue;
-                       if (c != 0)
-                           *q++ = c;
-                       pp += 2;
-                   } else if (*pp == '_') {
-                       ADDCHR2(' ');
-                   } else {
-                       ADDCHR2(*pp);
-                   }
-               }
-           } else {
-               /* base64 */
-               int c1, c2, c3, c4;
-
-               pp = startofmime;
-               while (pp < endofmime) {
-                   /* 6 + 2 bits */
-                   while ((pp < endofmime) &&
-                          ((c1 = char64(*pp)) == -1)) {
-                       pp++;
-                   }
-                   if (pp < endofmime) {
-                       pp++;
-                   }
-                   while ((pp < endofmime) &&
-                          ((c2 = char64(*pp)) == -1)) {
-                       pp++;
-                   }
-                   if (pp < endofmime && c1 != -1 && c2 != -1) {
-                       ADDCHR2((c1 << 2) | (c2 >> 4));
-                       pp++;
-                   }
-                   /* 4 + 4 bits */
-                   while ((pp < endofmime) &&
-                          ((c3 = char64(*pp)) == -1)) {
-                       pp++;
-                   }
-                   if (pp < endofmime && c2 != -1 && c3 != -1) {
-                       ADDCHR2(((c2 & 0xF) << 4) | (c3 >> 2));
-                       pp++;
-                   }
-                   /* 2 + 6 bits */
-                   while ((pp < endofmime) &&
-                          ((c4 = char64(*pp)) == -1)) {
-                       pp++;
-                   }
-                   if (pp < endofmime && c3 != -1 && c4 != -1) {
-                       ADDCHR2(((c3 & 0x3) << 6) | (c4));
-                       pp++;
-                   }
-               }
-           }
+                       /* Now decode the text */
+                       if (quoted_printable) {
+                               for (pp = startofmime; pp < endofmime; pp++) {
+                                       if (*pp == '=') {
+                                               c = unqp(pp[1], pp[2]);
+                                               if (c == -1)
+                                                       continue;
+                                               if (c != 0)
+                                                       *q++ = c;
+                                               pp += 2;
+                                       } else if (*pp == '_') {
+                                               ADDCHR2(' ');
+                                       } else {
+                                               ADDCHR2(*pp);
+                                       }
+                               }
+                       } else {
+                               /* base64 */
+                               int c1, c2, c3, c4;
+                               c1 = c2 = c3 = c4 = -1;
+
+                               pp = startofmime;
+                               while (pp < endofmime) {
+                                       /* 6 + 2 bits */
+                                       while ((pp < endofmime) &&
+                                                       ((c1 = char64(*pp))
+                                                       == -1)) {
+                                               pp++;
+                                       }
+                                       if (pp < endofmime) {
+                                               pp++;
+                                       }
+                                       while ((pp < endofmime) &&
+                                                       ((c2 = char64(*pp))
+                                                       == -1)) {
+                                               pp++;
+                                       }
+                                       if (pp < endofmime && c1 != -1 && c2 != -1) {
+                                               ADDCHR2((c1 << 2) | (c2 >> 4));
+                                               pp++;
+                                       }
+                                       /* 4 + 4 bits */
+                                       while ((pp < endofmime) &&
+                                                       ((c3 = char64(*pp))
+                                                       == -1)) {
+                                               pp++;
+                                       }
+                                       if (pp < endofmime && c2 != -1 && c3 != -1) {
+                                               ADDCHR2(((c2 & 0xF) << 4) | (c3 >> 2));
+                                               pp++;
+                                       }
+                                       /* 2 + 6 bits */
+                                       while ((pp < endofmime) &&
+                                                       ((c4 = char64(*pp))
+                                                       == -1)) {
+                                               pp++;
+                                       }
+                                       if (pp < endofmime && c3 != -1 && c4 != -1) {
+                                               ADDCHR2(((c3 & 0x3) << 6) | (c4));
+                                               pp++;
+                                       }
+                               }
+                       }
 
 #ifdef HAVE_ICONV
-       iconvbuffull:
-           /* NB that the string at convbuf is not necessarily NUL terminated here:
-            * q points to the first byte after the valid part.
-            */
-            /* Convert to native character set */
-           if (use_iconv) {
-               size_t inbytes = q - convbuf;
-               ICONV_CONST char *start = convbuf;
-               
-               while (inbytes) {
-                   if (iconv(cd, &start, &inbytes, &saveq, &savedstlen) ==
-                           (size_t)-1) {
-                       if (errno != EILSEQ) break;
-                       /* character couldn't be converted. we output a `?'
-                        * and try to carry on which won't work if
-                        * either encoding was stateful */
-                       iconv (cd, 0, 0, &saveq, &savedstlen);
-                       if (!savedstlen)
-                           break;
-                       *saveq++ = '?';
-                       savedstlen--;
-                       if (!savedstlen)
-                           break;
-                       /* skip to next input character */
-                       if (fromutf8) {
-                           for (start++;(start < q) && ((*start & 192) == 128);start++)
-                               inbytes--;
-                       } else
-                           start++, inbytes--;
-                       if (start >= q)
-                           break;
-                   }
-               }
-               q = saveq;
-               /* Stop now if (1) we hit the end of the buffer trying to do
-                * MIME decoding and have just iconv-converted a partial string
-                * or (2) our iconv-conversion hit the end of the buffer.
-                */
-               if (!dstlen || !savedstlen)
-                   goto buffull;
-               dstlen = savedstlen;
-               free(convbuf);
-           }
+               iconvbuffull:
+                       /*
+                       ** NB that the string at convbuf is not necessarily
+                       ** NUL terminated here:
+                       ** q points to the first byte after the valid part.
+                       */
+                       /* Convert to native character set */
+                       if (use_iconv) {
+                               size_t inbytes = q - convbuf;
+                               ICONV_CONST char *start = convbuf;
+
+                               while (inbytes) {
+                                       if (iconv(cd, &start, &inbytes, &saveq, &savedstlen) ==
+                                               (size_t)-1) {
+                                               if (errno != EILSEQ)
+                                                       break;
+                                               /*
+                                               ** character couldn't be
+                                               ** converted. we output a
+                                               ** `?'  and try to carry on
+                                               ** which won't work if either
+                                               ** encoding was stateful
+                                               */
+                                               iconv(cd, 0, 0, &saveq, &savedstlen);
+                                               if (!savedstlen)
+                                                       break;
+                                               *saveq++ = '?';
+                                               savedstlen--;
+                                               if (!savedstlen)
+                                                       break;
+                                               /* skip to next input character */
+                                               if (fromutf8) {
+                                                       for (start++;(start < q) && ((*start & 192) == 128);start++)
+                                                               inbytes--;
+                                               } else
+                                                       start++, inbytes--;
+                                               if (start >= q)
+                                                       break;
+                                       }
+                               }
+                               q = saveq;
+                               /*
+                               ** Stop now if (1) we hit the end of the
+                               ** buffer trying to do MIME decoding and
+                               ** have just iconv-converted a partial
+                               ** string or (2) our iconv-conversion hit
+                               ** the end of the buffer.
+                               */
+                               if (!dstlen || !savedstlen)
+                                       goto buffull;
+                               dstlen = savedstlen;
+                               free(convbuf);
+                       }
 #endif
-           
-           /*
-            * Now that we are done decoding this particular
-            * encoded word, advance string to trailing '='.
-            */
-           p = endofmime + 1;
-
-           encoding_found = 1;         /* we found (at least 1) encoded word */
-           between_encodings = 1;      /* we have just decoded something     */
-           whitespace = 0;             /* re-initialize amount of whitespace */
+
+                       /*
+                       ** Now that we are done decoding this particular
+                       ** encoded word, advance string to trailing '='.
+                       */
+                       p = endofmime + 1;
+
+                       encoding_found = 1;  /* we found (at least 1) encoded word */
+                       between_encodings = 1;  /* we have just decoded something */
+                       whitespace = 0;  /* re-initialize amount of whitespace */
+               }
        }
-    }
 #ifdef HAVE_ICONV
-    if (use_iconv) iconv_close(cd);
+       if (use_iconv) iconv_close(cd);
 #endif
 
-    /* If an equals was pending at end of string, add it now. */
-    if (equals_pending)
-       ADDCHR('=');
-    *q = '\0';
+       /* If an equals was pending at end of string, add it now. */
+       if (equals_pending)
+               ADDCHR('=');
+       *q = '\0';
 
-    return encoding_found;
+       return encoding_found;
 
   buffull:
-    /* q is currently just off the end of the buffer, so rewind to NUL terminate */
-    q--;
-    *q = '\0';
-    return encoding_found;
+       /*
+       ** q is currently just off the end of the buffer,
+       ** so rewind to NUL terminate
+       */
+       q--;
+       *q = '\0';
+       return encoding_found;
 }