Remove stdio internals manipulation in sbr/m_getfld.c
authorDavid Levine <levinedl@acm.org>
Tue, 25 Dec 2012 17:57:31 +0000 (11:57 -0600)
committermarkus schnalke <meillo@marmaro.de>
Sat, 29 Aug 2015 16:32:03 +0000 (18:32 +0200)
Replaced all the stdio buffer access in sbr/m_getfld.c with a single call
to fread() and then some almost straightforward buffer manipulations.

This commit is ported to mmh.

sbr/m_getfld.c

index 1cc041b..108af13 100644 (file)
@@ -149,16 +149,73 @@ static int edelimlen;
 
 static int (*eom_action)(int) = NULL;
 
-#ifdef _FSTDIO
-# define _ptr _p  /* Gag   */
-# define _cnt _r  /* Retch */
-# define _filbuf __srget  /* Puke  */
-# define DEFINED__FILBUF_TO_SOMETHING_SPECIFIC
-#endif
+/*
+** This replaces the old approach, which included direct access to
+** stdio internals.  It uses one fread() to load a buffer that we
+** manage.
+*/
+#define MSG_INPUT_SIZE 8192
+static struct m_getfld_buffer {
+       unsigned char msg_buf[2 * MSG_INPUT_SIZE];
+       unsigned char *readpos;
+       unsigned char *end;  /* One past the last character read in. */
+} m;
+
+static void
+setup_buffer(FILE *iob, struct m_getfld_buffer *m)
+{
+       /*
+       ** Rely on Restrictions that m_getfld() calls on different file
+       ** streams are not interleaved, and no other file stream read
+       ** methods are used.  And, the first call to m_getfld (), etc., on
+       ** a stream always reads at least 1 byte.
+       ** I don't think it's necessary to use ftello() because we just
+       ** need to determine whether the current offset is 0 or not.
+       */
+       if (ftell(iob) == 0) {
+               /* A new file stream, so reset the buffer state. */
+               m->readpos = m->end = m->msg_buf;
+       }
+}
+
+static size_t
+read_more(struct m_getfld_buffer *m, FILE *iob)
+{
+       size_t num_read;
+
+       /* Move any leftover at the end of buf to the beginning. */
+       if (m->end > m->readpos) {
+               memmove(m->msg_buf, m->readpos, m->end - m->readpos);
+       }
+       m->readpos = m->msg_buf + (m->end - m->readpos);
+       num_read = fread(m->readpos, 1, MSG_INPUT_SIZE, iob);
+       m->end = m->readpos + num_read;
+
+       return num_read;
+}
+
+static int
+Getc(FILE *iob)
+{
+       if (m.end - m.readpos < 1) {
+               if (read_more(&m, iob) == 0) {
+                       /*
+                       **  Pretend that we read a character.
+                       ** That's what stdio does.
+                       */
+                       ++m.readpos;
+                       return EOF;
+               }
+       }
+       return (m.readpos < m.end) ? *m.readpos++ : EOF;
+}
+
+static int
+Ungetc(int c, FILE *iob)
+{
+       return (m.readpos == m.msg_buf) ? EOF : (*--m.readpos = c);
+}
 
-#ifndef DEFINED__FILBUF_TO_SOMETHING_SPECIFIC
-extern int  _filbuf(FILE*);
-#endif
 
 
 int
@@ -166,9 +223,11 @@ m_getfld(int state, unsigned char *name, unsigned char *buf,
        int bufsz, FILE *iob)
 {
        unsigned char  *bp, *cp, *ep, *sp;
-       int cnt, c, i, j;
+       int cnt, c, i, j, k;
+
+       setup_buffer(iob, &m);
 
-       if ((c = getc(iob)) < 0) {
+       if ((c = Getc(iob)) < 0) {
                msg_count = 0;
                *buf = 0;
                return FILEEOF;
@@ -176,10 +235,10 @@ m_getfld(int state, unsigned char *name, unsigned char *buf,
        if (eom(c, iob)) {
                if (! eom_action) {
                        /* flush null messages */
-                       while ((c = getc(iob)) >= 0 && eom(c, iob))
+                       while ((c = Getc(iob)) >= 0 && eom(c, iob))
                                ;
                        if (c >= 0)
-                               ungetc(c, iob);
+                               Ungetc(c, iob);
                }
                msg_count = 0;
                *buf = 0;
@@ -192,16 +251,16 @@ m_getfld(int state, unsigned char *name, unsigned char *buf,
        case FLD:
                if (c == '\n' || c == '-') {
                        /* we hit the header/body separator */
-                       while (c != '\n' && (c = getc(iob)) >= 0)
+                       while (c != '\n' && (c = Getc(iob)) >= 0)
                                ;
 
-                       if (c < 0 || (c = getc(iob)) < 0 || eom(c, iob)) {
+                       if (c < 0 || (c = Getc(iob)) < 0 || eom(c, iob)) {
                                if (!eom_action) {
                                        /* flush null messages */
-                                       while ((c = getc(iob)) >= 0 && eom(c, iob))
+                                       while ((c = Getc(iob)) >= 0 && eom(c, iob))
                                                ;
                                        if (c >= 0)
-                                               ungetc(c, iob);
+                                               Ungetc(c, iob);
                                }
                                msg_count = 0;
                                *buf = 0;
@@ -218,47 +277,28 @@ m_getfld(int state, unsigned char *name, unsigned char *buf,
                cp = name;
                i = NAMESZ - 1;
                for (;;) {
-#ifdef LINUX_STDIO
-                       bp = sp = (unsigned char *) iob->_IO_read_ptr - 1;
-                       j = (cnt = ((long) iob->_IO_read_end -
-                               (long) iob->_IO_read_ptr)  + 1) < i ? cnt : i;
-#elif defined(__DragonFly__)
-                       bp = sp = (unsigned char *) ((struct __FILE_public *)iob)->_p - 1;
-                       j = (cnt = ((struct __FILE_public *)iob)->_r+1) < i ? cnt : i;
-#else
-                       bp = sp = (unsigned char *) iob->_ptr - 1;
-                       j = (cnt = iob->_cnt+1) < i ? cnt : i;
-#endif
+                       /* Store current pos, ungetting the last char. */
+                       bp = sp = (unsigned char *) m.readpos - 1;
+                       j = ((cnt = m.end - m.readpos + 1) < i) ? cnt : i;
+
                        while (--j >= 0 && (c = *bp++) != ':' && c != '\n')
                                *cp++ = c;
 
                        j = bp - sp;
                        if ((cnt -= j) <= 0) {
-#ifdef LINUX_STDIO
-                               iob->_IO_read_ptr = iob->_IO_read_end;
-                               if (__underflow(iob) == EOF) {
-#elif defined(__DragonFly__)
-                               if (__srget(iob) == EOF) {
-#else
-                               if (_filbuf(iob) == EOF) {
-#endif
+                               /*
+                               ** Used to explicitly force refill of the
+                               ** buffer here, but Getc() will do that
+                               ** if necessary.
+                               */
+                               if (Getc (iob) == EOF) {
                                        *cp = *buf = 0;
                                        advise(NULL, "eof encountered in field \"%s\"", name);
                                        return FMTERR;
                                }
-#ifdef LINUX_STDIO
-                               iob->_IO_read_ptr++; /* NOT automatic in __underflow()! */
-#endif
                        } else {
-#ifdef LINUX_STDIO
-                               iob->_IO_read_ptr = bp + 1;
-#elif defined(__DragonFly__)
-                               ((struct __FILE_public *)iob)->_p = bp + 1;
-                               ((struct __FILE_public *)iob)->_r = cnt - 1;
-#else
-                               iob->_ptr = bp + 1;
-                               iob->_cnt = cnt - 1;
-#endif
+                               /* Restore the current offset. */
+                               m.readpos = bp + 1;
                        }
                        if (c == ':')
                                break;
@@ -312,7 +352,13 @@ m_getfld(int state, unsigned char *name, unsigned char *buf,
                                ** that should be harmless enough, right?
                                ** This is a corrupt message anyway.
                                */
-                               fseek(iob, ftell(iob) - 2, SEEK_SET);
+                               /* emulates:  fseek(iob, ftell(iob) -(-2 + cnt + 1), SEEK_SET) */
+                               m.readpos += cnt - 1;
+                               /*
+                               ** Reset file stream position so caller,
+                               ** e.g., get_content, can use ftell(), etc.
+                               */
+                               fseek(iob, -cnt - 1, SEEK_CUR);
                                return BODY;
                        }
                        if ((i -= j) <= 0) {
@@ -336,16 +382,9 @@ m_getfld(int state, unsigned char *name, unsigned char *buf,
                */
                cp = buf; i = bufsz-1;
                for (;;) {
-#ifdef LINUX_STDIO
-                       cnt = (long) iob->_IO_read_end - (long) iob->_IO_read_ptr;
-                       bp = (unsigned char *) --iob->_IO_read_ptr;
-#elif defined(__DragonFly__)
-                       cnt = ((struct __FILE_public *)iob)->_r++;
-                       bp = (unsigned char *) --((struct __FILE_public *)iob)->_p;
-#else
-                       cnt = iob->_cnt++;
-                       bp = (unsigned char *) --iob->_ptr;
-#endif
+                       /* Set and save the current pos and update cnt. */
+                       cnt = m.end - m.readpos;
+                       bp = --m.readpos;
                        c = cnt < i ? cnt : i;
                        while ((ep = locc( c, bp, '\n' ))) {
                                /*
@@ -353,21 +392,13 @@ m_getfld(int state, unsigned char *name, unsigned char *buf,
                                ** return.
                                */
                                if ((j = *++ep) != ' ' && j != '\t') {
-#ifdef LINUX_STDIO
-                                       j = ep - (unsigned char *) iob->_IO_read_ptr;
-                                       memcpy(cp, iob->_IO_read_ptr, j);
-                                       iob->_IO_read_ptr = ep;
-#elif defined(__DragonFly__)
-                                       j = ep - (unsigned char *) ((struct __FILE_public *)iob)->_p;
-                                       memcpy(cp, ((struct __FILE_public *)iob)->_p, j);
-                                       ((struct __FILE_public *)iob)->_p = ep;
-                                       ((struct __FILE_public *)iob)->_r -= j;
-#else
-                                       j = ep - (unsigned char *) iob->_ptr;
-                                       memcpy(cp, iob->_ptr, j);
-                                       iob->_ptr = ep;
-                                       iob->_cnt -= j;
-#endif
+                                       /*
+                                       ** Save the text and update the
+                                       ** current position.
+                                       */
+                                       j = ep - m.readpos;
+                                       memcpy (cp, m.readpos, j);
+                                       m.readpos = ep;
                                        cp += j;
                                        state = FLD;
                                        goto finish;
@@ -379,64 +410,34 @@ m_getfld(int state, unsigned char *name, unsigned char *buf,
                        ** end of input or dest buffer - copy what
                        ** we've found.
                        */
-#ifdef LINUX_STDIO
-                       c += bp - (unsigned char *) iob->_IO_read_ptr;
-                       memcpy(cp, iob->_IO_read_ptr, c);
-#elif defined(__DragonFly__)
-                       c += bp - (unsigned char *) ((struct __FILE_public *)iob)->_p;
-                       memcpy(cp, ((struct __FILE_public *)iob)->_p, c);
-#else
-                       c += bp - (unsigned char *) iob->_ptr;
-                       memcpy(cp, iob->_ptr, c);
-#endif
-                       i -= c;
-                       cp += c;
+                       c += bp - m.readpos;
+                       for (k = 0; k < c; ++k, --i) {
+                               *cp++ = Getc (iob);
+                       }
                        if (i <= 0) {
                                /* the dest buffer is full */
-#ifdef LINUX_STDIO
-                               iob->_IO_read_ptr += c;
-#elif defined(__DragonFly__)
-                               ((struct __FILE_public *)iob)->_r -= c;
-                               ((struct __FILE_public *)iob)->_p += c;
-#else
-                               iob->_cnt -= c;
-                               iob->_ptr += c;
-#endif
                                state = FLDPLUS;
                                break;
                        }
                        /*
                        ** There's one character left in the input
-                       ** buffer.  Copy it & fill the buffer.
-                       ** If the last char was a newline and the
-                       ** next char is not whitespace, this is
-                       ** the end of the field.  Otherwise loop.
+                       ** buffer.  Copy it & fill the buffer (that
+                       ** fill used to be explicit, but now Getc()
+                       ** does it). If the last char was a newline
+                       ** and the next char is not whitespace, this
+                       ** is the end of the field.  Otherwise loop.
                        */
                        --i;
-#ifdef LINUX_STDIO
-                       *cp++ = j = *(iob->_IO_read_ptr + c);
-                       iob->_IO_read_ptr = iob->_IO_read_end;
-                       c = __underflow(iob);
-                       iob->_IO_read_ptr++;  /* NOT automatic! */
-#elif defined(__DragonFly__)
-                       *cp++ =j = *(((struct __FILE_public *)iob)->_p + c);
-                       c = __srget(iob);
-#else
-                       *cp++ = j = *(iob->_ptr + c);
-                       c = _filbuf(iob);
-#endif
+                       *cp++ = j = Getc(iob);
+                       c = Getc(iob);
                        if (c == EOF || ((j == '\0' || j == '\n')
                                        && c != ' ' && c != '\t')) {
                                if (c != EOF) {
-#ifdef LINUX_STDIO
-                                       --iob->_IO_read_ptr;
-#elif defined(__DragonFly__)
-                                       --((struct __FILE_public *)iob)->_p;
-                                       ++((struct __FILE_public *)iob)->_r;
-#else
-                                       --iob->_ptr;
-                                       ++iob->_cnt;
-#endif
+                                       /*
+                                       ** Put the character back for
+                                       ** the next call.
+                                       */
+                                       --m.readpos;
                                }
                                state = FLD;
                                break;
@@ -454,17 +455,10 @@ m_getfld(int state, unsigned char *name, unsigned char *buf,
                ** don't add an eos.
                */
                i = (bufsz < 0) ? -bufsz : bufsz-1;
-#ifdef LINUX_STDIO
-               bp = (unsigned char *) --iob->_IO_read_ptr;
-               cnt = (long) iob->_IO_read_end - (long) iob->_IO_read_ptr;
-#elif defined(__DragonFly__)
-               bp = (unsigned char *) --((struct __FILE_public *)iob)->_p;
-               cnt = ++((struct __FILE_public *)iob)->_r;
-#else
-               bp = (unsigned char *) --iob->_ptr;
-               cnt = ++iob->_cnt;
-#endif
-               c = (cnt < i ? cnt : i);
+               /* Back up and store the current position and update cnt. */
+               bp = --m.readpos;
+               cnt = m.end - m.readpos;
+               c = (cnt < i) ? cnt : i;
                if (ismbox && c > 1) {
                        /*
                        ** packed maildrop - only take up to the (possible)
@@ -547,15 +541,8 @@ m_getfld(int state, unsigned char *name, unsigned char *buf,
                        }
                }
                memcpy( buf, bp, c );
-#ifdef LINUX_STDIO
-               iob->_IO_read_ptr += c;
-#elif defined(__DragonFly__)
-               ((struct __FILE_public *)iob)->_r -= c;
-               ((struct __FILE_public *)iob)->_p += c;
-#else
-               iob->_cnt -= c;
-               iob->_ptr += c;
-#endif
+               /* Advance the current position to reflect the copy out. */
+               m.readpos += c;
                if (bufsz < 0) {
                        msg_count = c;
                        return (state);
@@ -581,11 +568,13 @@ thisisanmbox(FILE *iob)
        char *cp;
        char *delimstr;
 
-       c = getc(iob); 
+       setup_buffer(iob, &m);
+
+       c = Getc(iob); 
        if (feof(iob)) {
                return;
        }
-       ungetc(c, iob);
+       Ungetc(c, iob);
 
        /*
        ** Figure out what the message delimitter string is for this
@@ -598,8 +587,12 @@ thisisanmbox(FILE *iob)
        ** say the style is MBOX and eat the rest of the line.  Otherwise
        ** abort.
        */
-
-       if (fread(text, sizeof(*text), 5, iob) != 5) {
+       for (c=0, cp=text; c<5; ++c, ++cp) {
+               if ((*cp = Getc(iob)) == EOF) {
+                       break;
+               }
+       }
+       if (c != 5) {
                adios(EX_IOERR, NULL, "Read error");
        }
        if (strncmp(text, "From ", 5)!=0) {
@@ -607,7 +600,7 @@ thisisanmbox(FILE *iob)
        }
        ismbox = TRUE;
        delimstr = "\nFrom ";
-       while ((c = getc(iob)) != '\n' && c >= 0) {
+       while ((c = Getc(iob)) != '\n' && c >= 0) {
                continue;
        }
        c = strlen(delimstr);
@@ -620,8 +613,9 @@ thisisanmbox(FILE *iob)
        edelimlen = c - 1;
        strcpy(msg_delim, delimstr);
        delimend = (unsigned char *)msg_delim + edelimlen;
-       if (edelimlen <= 1)
+       if (edelimlen <= 1) {
                adios(EX_DATAERR, NULL, "maildrop delimiter must be at least 2 bytes");
+       }
        /*
        ** build a Boyer-Moore end-position map for the matcher in m_getfld.
        ** N.B. - we don't match just the first char (since it's the newline
@@ -630,8 +624,9 @@ thisisanmbox(FILE *iob)
        */
        pat_map = (unsigned char **) calloc(256, sizeof(unsigned char *));
 
-       for (cp = (char *) fdelim + 1; cp < (char *) delimend; cp++ )
+       for (cp = (char *) fdelim + 1; cp < (char *) delimend; cp++) {
                pat_map[(unsigned char)*cp] = (unsigned char *) cp;
+       }
 }
 
 
@@ -642,27 +637,33 @@ thisisanmbox(FILE *iob)
 static int
 m_Eom(int c, FILE *iob)
 {
-       long pos = 0L;
+       unsigned char *pos;
        int i;
        char text[10];
+       char *cp;
 
-       pos = ftell(iob);
-       if ((i = fread(text, sizeof *text, edelimlen, iob)) != edelimlen ||
+       pos = m.readpos; /* ftell */
+       for (i=0, cp=text; i<edelimlen; ++i, ++cp) {
+               if ((*cp = Getc(iob)) == EOF) {
+                       break;
+               }
+       }
+       if (i != edelimlen ||
                        (strncmp(text, (char *)edelim, edelimlen)!=0)) {
-               if (i == 0 && ismbox)
+               if (i == 0 && ismbox) {
                        /*
                        ** the final newline in the (brain damaged) unix-format
                        ** maildrop is part of the delimitter - delete it.
                        */
                        return 1;
-
-               fseek(iob, (long)(pos-1), SEEK_SET);
-               getc(iob);  /* should be OK */
+               }
+               m.readpos = pos - 1;  /* fseek(iob, pos - 1, SEEK_SET) */
+               Getc(iob);  /* should be OK */
                return 0;
        }
 
        if (ismbox) {
-               while ((c = getc(iob)) != '\n' && c >= 0) {
+               while ((c = Getc(iob)) != '\n' && c >= 0) {
                        continue;
                }
        }