From: Philipp Takacs Date: Sat, 5 Sep 2015 11:10:33 +0000 (+0200) Subject: Revert "Remove stdio internals manipulation in sbr/m_getfld.c" X-Git-Tag: mmh-0.2-RC1~28 X-Git-Url: http://git.marmaro.de/?p=mmh;a=commitdiff_plain;h=219ca2478b42be77eee20866a8a6cf8b87f3a59c Revert "Remove stdio internals manipulation in sbr/m_getfld.c" This only temporaly, to ensure mmh is working correct. This reverts commit 976303d04d3bc2cad0afd5e3d364264783da56c2. --- diff --git a/sbr/m_getfld.c b/sbr/m_getfld.c index 108af13..1cc041b 100644 --- a/sbr/m_getfld.c +++ b/sbr/m_getfld.c @@ -149,73 +149,16 @@ static int edelimlen; static int (*eom_action)(int) = NULL; -/* -** This replaces the old approach, which included direct access to -** stdio internals. It uses one fread() to load a buffer that we -** manage. -*/ -#define MSG_INPUT_SIZE 8192 -static struct m_getfld_buffer { - unsigned char msg_buf[2 * MSG_INPUT_SIZE]; - unsigned char *readpos; - unsigned char *end; /* One past the last character read in. */ -} m; - -static void -setup_buffer(FILE *iob, struct m_getfld_buffer *m) -{ - /* - ** Rely on Restrictions that m_getfld() calls on different file - ** streams are not interleaved, and no other file stream read - ** methods are used. And, the first call to m_getfld (), etc., on - ** a stream always reads at least 1 byte. - ** I don't think it's necessary to use ftello() because we just - ** need to determine whether the current offset is 0 or not. - */ - if (ftell(iob) == 0) { - /* A new file stream, so reset the buffer state. */ - m->readpos = m->end = m->msg_buf; - } -} - -static size_t -read_more(struct m_getfld_buffer *m, FILE *iob) -{ - size_t num_read; - - /* Move any leftover at the end of buf to the beginning. */ - if (m->end > m->readpos) { - memmove(m->msg_buf, m->readpos, m->end - m->readpos); - } - m->readpos = m->msg_buf + (m->end - m->readpos); - num_read = fread(m->readpos, 1, MSG_INPUT_SIZE, iob); - m->end = m->readpos + num_read; - - return num_read; -} - -static int -Getc(FILE *iob) -{ - if (m.end - m.readpos < 1) { - if (read_more(&m, iob) == 0) { - /* - ** Pretend that we read a character. - ** That's what stdio does. - */ - ++m.readpos; - return EOF; - } - } - return (m.readpos < m.end) ? *m.readpos++ : EOF; -} - -static int -Ungetc(int c, FILE *iob) -{ - return (m.readpos == m.msg_buf) ? EOF : (*--m.readpos = c); -} +#ifdef _FSTDIO +# define _ptr _p /* Gag */ +# define _cnt _r /* Retch */ +# define _filbuf __srget /* Puke */ +# define DEFINED__FILBUF_TO_SOMETHING_SPECIFIC +#endif +#ifndef DEFINED__FILBUF_TO_SOMETHING_SPECIFIC +extern int _filbuf(FILE*); +#endif int @@ -223,11 +166,9 @@ m_getfld(int state, unsigned char *name, unsigned char *buf, int bufsz, FILE *iob) { unsigned char *bp, *cp, *ep, *sp; - int cnt, c, i, j, k; - - setup_buffer(iob, &m); + int cnt, c, i, j; - if ((c = Getc(iob)) < 0) { + if ((c = getc(iob)) < 0) { msg_count = 0; *buf = 0; return FILEEOF; @@ -235,10 +176,10 @@ m_getfld(int state, unsigned char *name, unsigned char *buf, if (eom(c, iob)) { if (! eom_action) { /* flush null messages */ - while ((c = Getc(iob)) >= 0 && eom(c, iob)) + while ((c = getc(iob)) >= 0 && eom(c, iob)) ; if (c >= 0) - Ungetc(c, iob); + ungetc(c, iob); } msg_count = 0; *buf = 0; @@ -251,16 +192,16 @@ m_getfld(int state, unsigned char *name, unsigned char *buf, case FLD: if (c == '\n' || c == '-') { /* we hit the header/body separator */ - while (c != '\n' && (c = Getc(iob)) >= 0) + while (c != '\n' && (c = getc(iob)) >= 0) ; - if (c < 0 || (c = Getc(iob)) < 0 || eom(c, iob)) { + if (c < 0 || (c = getc(iob)) < 0 || eom(c, iob)) { if (!eom_action) { /* flush null messages */ - while ((c = Getc(iob)) >= 0 && eom(c, iob)) + while ((c = getc(iob)) >= 0 && eom(c, iob)) ; if (c >= 0) - Ungetc(c, iob); + ungetc(c, iob); } msg_count = 0; *buf = 0; @@ -277,28 +218,47 @@ m_getfld(int state, unsigned char *name, unsigned char *buf, cp = name; i = NAMESZ - 1; for (;;) { - /* Store current pos, ungetting the last char. */ - bp = sp = (unsigned char *) m.readpos - 1; - j = ((cnt = m.end - m.readpos + 1) < i) ? cnt : i; - +#ifdef LINUX_STDIO + bp = sp = (unsigned char *) iob->_IO_read_ptr - 1; + j = (cnt = ((long) iob->_IO_read_end - + (long) iob->_IO_read_ptr) + 1) < i ? cnt : i; +#elif defined(__DragonFly__) + bp = sp = (unsigned char *) ((struct __FILE_public *)iob)->_p - 1; + j = (cnt = ((struct __FILE_public *)iob)->_r+1) < i ? cnt : i; +#else + bp = sp = (unsigned char *) iob->_ptr - 1; + j = (cnt = iob->_cnt+1) < i ? cnt : i; +#endif while (--j >= 0 && (c = *bp++) != ':' && c != '\n') *cp++ = c; j = bp - sp; if ((cnt -= j) <= 0) { - /* - ** Used to explicitly force refill of the - ** buffer here, but Getc() will do that - ** if necessary. - */ - if (Getc (iob) == EOF) { +#ifdef LINUX_STDIO + iob->_IO_read_ptr = iob->_IO_read_end; + if (__underflow(iob) == EOF) { +#elif defined(__DragonFly__) + if (__srget(iob) == EOF) { +#else + if (_filbuf(iob) == EOF) { +#endif *cp = *buf = 0; advise(NULL, "eof encountered in field \"%s\"", name); return FMTERR; } +#ifdef LINUX_STDIO + iob->_IO_read_ptr++; /* NOT automatic in __underflow()! */ +#endif } else { - /* Restore the current offset. */ - m.readpos = bp + 1; +#ifdef LINUX_STDIO + iob->_IO_read_ptr = bp + 1; +#elif defined(__DragonFly__) + ((struct __FILE_public *)iob)->_p = bp + 1; + ((struct __FILE_public *)iob)->_r = cnt - 1; +#else + iob->_ptr = bp + 1; + iob->_cnt = cnt - 1; +#endif } if (c == ':') break; @@ -352,13 +312,7 @@ m_getfld(int state, unsigned char *name, unsigned char *buf, ** that should be harmless enough, right? ** This is a corrupt message anyway. */ - /* emulates: fseek(iob, ftell(iob) -(-2 + cnt + 1), SEEK_SET) */ - m.readpos += cnt - 1; - /* - ** Reset file stream position so caller, - ** e.g., get_content, can use ftell(), etc. - */ - fseek(iob, -cnt - 1, SEEK_CUR); + fseek(iob, ftell(iob) - 2, SEEK_SET); return BODY; } if ((i -= j) <= 0) { @@ -382,9 +336,16 @@ m_getfld(int state, unsigned char *name, unsigned char *buf, */ cp = buf; i = bufsz-1; for (;;) { - /* Set and save the current pos and update cnt. */ - cnt = m.end - m.readpos; - bp = --m.readpos; +#ifdef LINUX_STDIO + cnt = (long) iob->_IO_read_end - (long) iob->_IO_read_ptr; + bp = (unsigned char *) --iob->_IO_read_ptr; +#elif defined(__DragonFly__) + cnt = ((struct __FILE_public *)iob)->_r++; + bp = (unsigned char *) --((struct __FILE_public *)iob)->_p; +#else + cnt = iob->_cnt++; + bp = (unsigned char *) --iob->_ptr; +#endif c = cnt < i ? cnt : i; while ((ep = locc( c, bp, '\n' ))) { /* @@ -392,13 +353,21 @@ m_getfld(int state, unsigned char *name, unsigned char *buf, ** return. */ if ((j = *++ep) != ' ' && j != '\t') { - /* - ** Save the text and update the - ** current position. - */ - j = ep - m.readpos; - memcpy (cp, m.readpos, j); - m.readpos = ep; +#ifdef LINUX_STDIO + j = ep - (unsigned char *) iob->_IO_read_ptr; + memcpy(cp, iob->_IO_read_ptr, j); + iob->_IO_read_ptr = ep; +#elif defined(__DragonFly__) + j = ep - (unsigned char *) ((struct __FILE_public *)iob)->_p; + memcpy(cp, ((struct __FILE_public *)iob)->_p, j); + ((struct __FILE_public *)iob)->_p = ep; + ((struct __FILE_public *)iob)->_r -= j; +#else + j = ep - (unsigned char *) iob->_ptr; + memcpy(cp, iob->_ptr, j); + iob->_ptr = ep; + iob->_cnt -= j; +#endif cp += j; state = FLD; goto finish; @@ -410,34 +379,64 @@ m_getfld(int state, unsigned char *name, unsigned char *buf, ** end of input or dest buffer - copy what ** we've found. */ - c += bp - m.readpos; - for (k = 0; k < c; ++k, --i) { - *cp++ = Getc (iob); - } +#ifdef LINUX_STDIO + c += bp - (unsigned char *) iob->_IO_read_ptr; + memcpy(cp, iob->_IO_read_ptr, c); +#elif defined(__DragonFly__) + c += bp - (unsigned char *) ((struct __FILE_public *)iob)->_p; + memcpy(cp, ((struct __FILE_public *)iob)->_p, c); +#else + c += bp - (unsigned char *) iob->_ptr; + memcpy(cp, iob->_ptr, c); +#endif + i -= c; + cp += c; if (i <= 0) { /* the dest buffer is full */ +#ifdef LINUX_STDIO + iob->_IO_read_ptr += c; +#elif defined(__DragonFly__) + ((struct __FILE_public *)iob)->_r -= c; + ((struct __FILE_public *)iob)->_p += c; +#else + iob->_cnt -= c; + iob->_ptr += c; +#endif state = FLDPLUS; break; } /* ** There's one character left in the input - ** buffer. Copy it & fill the buffer (that - ** fill used to be explicit, but now Getc() - ** does it). If the last char was a newline - ** and the next char is not whitespace, this - ** is the end of the field. Otherwise loop. + ** buffer. Copy it & fill the buffer. + ** If the last char was a newline and the + ** next char is not whitespace, this is + ** the end of the field. Otherwise loop. */ --i; - *cp++ = j = Getc(iob); - c = Getc(iob); +#ifdef LINUX_STDIO + *cp++ = j = *(iob->_IO_read_ptr + c); + iob->_IO_read_ptr = iob->_IO_read_end; + c = __underflow(iob); + iob->_IO_read_ptr++; /* NOT automatic! */ +#elif defined(__DragonFly__) + *cp++ =j = *(((struct __FILE_public *)iob)->_p + c); + c = __srget(iob); +#else + *cp++ = j = *(iob->_ptr + c); + c = _filbuf(iob); +#endif if (c == EOF || ((j == '\0' || j == '\n') && c != ' ' && c != '\t')) { if (c != EOF) { - /* - ** Put the character back for - ** the next call. - */ - --m.readpos; +#ifdef LINUX_STDIO + --iob->_IO_read_ptr; +#elif defined(__DragonFly__) + --((struct __FILE_public *)iob)->_p; + ++((struct __FILE_public *)iob)->_r; +#else + --iob->_ptr; + ++iob->_cnt; +#endif } state = FLD; break; @@ -455,10 +454,17 @@ m_getfld(int state, unsigned char *name, unsigned char *buf, ** don't add an eos. */ i = (bufsz < 0) ? -bufsz : bufsz-1; - /* Back up and store the current position and update cnt. */ - bp = --m.readpos; - cnt = m.end - m.readpos; - c = (cnt < i) ? cnt : i; +#ifdef LINUX_STDIO + bp = (unsigned char *) --iob->_IO_read_ptr; + cnt = (long) iob->_IO_read_end - (long) iob->_IO_read_ptr; +#elif defined(__DragonFly__) + bp = (unsigned char *) --((struct __FILE_public *)iob)->_p; + cnt = ++((struct __FILE_public *)iob)->_r; +#else + bp = (unsigned char *) --iob->_ptr; + cnt = ++iob->_cnt; +#endif + c = (cnt < i ? cnt : i); if (ismbox && c > 1) { /* ** packed maildrop - only take up to the (possible) @@ -541,8 +547,15 @@ m_getfld(int state, unsigned char *name, unsigned char *buf, } } memcpy( buf, bp, c ); - /* Advance the current position to reflect the copy out. */ - m.readpos += c; +#ifdef LINUX_STDIO + iob->_IO_read_ptr += c; +#elif defined(__DragonFly__) + ((struct __FILE_public *)iob)->_r -= c; + ((struct __FILE_public *)iob)->_p += c; +#else + iob->_cnt -= c; + iob->_ptr += c; +#endif if (bufsz < 0) { msg_count = c; return (state); @@ -568,13 +581,11 @@ thisisanmbox(FILE *iob) char *cp; char *delimstr; - setup_buffer(iob, &m); - - c = Getc(iob); + c = getc(iob); if (feof(iob)) { return; } - Ungetc(c, iob); + ungetc(c, iob); /* ** Figure out what the message delimitter string is for this @@ -587,12 +598,8 @@ thisisanmbox(FILE *iob) ** say the style is MBOX and eat the rest of the line. Otherwise ** abort. */ - for (c=0, cp=text; c<5; ++c, ++cp) { - if ((*cp = Getc(iob)) == EOF) { - break; - } - } - if (c != 5) { + + if (fread(text, sizeof(*text), 5, iob) != 5) { adios(EX_IOERR, NULL, "Read error"); } if (strncmp(text, "From ", 5)!=0) { @@ -600,7 +607,7 @@ thisisanmbox(FILE *iob) } ismbox = TRUE; delimstr = "\nFrom "; - while ((c = Getc(iob)) != '\n' && c >= 0) { + while ((c = getc(iob)) != '\n' && c >= 0) { continue; } c = strlen(delimstr); @@ -613,9 +620,8 @@ thisisanmbox(FILE *iob) edelimlen = c - 1; strcpy(msg_delim, delimstr); delimend = (unsigned char *)msg_delim + edelimlen; - if (edelimlen <= 1) { + if (edelimlen <= 1) adios(EX_DATAERR, NULL, "maildrop delimiter must be at least 2 bytes"); - } /* ** build a Boyer-Moore end-position map for the matcher in m_getfld. ** N.B. - we don't match just the first char (since it's the newline @@ -624,9 +630,8 @@ thisisanmbox(FILE *iob) */ pat_map = (unsigned char **) calloc(256, sizeof(unsigned char *)); - for (cp = (char *) fdelim + 1; cp < (char *) delimend; cp++) { + for (cp = (char *) fdelim + 1; cp < (char *) delimend; cp++ ) pat_map[(unsigned char)*cp] = (unsigned char *) cp; - } } @@ -637,33 +642,27 @@ thisisanmbox(FILE *iob) static int m_Eom(int c, FILE *iob) { - unsigned char *pos; + long pos = 0L; int i; char text[10]; - char *cp; - pos = m.readpos; /* ftell */ - for (i=0, cp=text; i= 0) { + while ((c = getc(iob)) != '\n' && c >= 0) { continue; } }