git.marmaro.de Git - mmh/blob - sbr/m_getfld.c

   1
   2 /*
   3  * m_getfld.c -- read/parse a message
   4  *
   5  * $Id$
   6  *
   7  * This code is Copyright (c) 2002, by the authors of nmh.  See the
   8  * COPYRIGHT file in the root directory of the nmh distribution for
   9  * complete copyright information.
  10  */
  11
  12 #include <h/mh.h>
  13 #include <h/mts.h>
  14 #include <h/utils.h>
  15
  16 /* This module has a long and checkered history.  First, it didn't burst
  17    maildrops correctly because it considered two CTRL-A:s in a row to be
  18    an inter-message delimiter.  It really is four CTRL-A:s followed by a
  19    newline.  Unfortunately, MMDF will convert this delimiter *inside* a
  20    message to a CTRL-B followed by three CTRL-A:s and a newline.  This
  21    caused the old version of m_getfld() to declare eom prematurely.  The
  22    fix was a lot slower than
  23
  24                 c == '\001' && peekc (iob) == '\001'
  25
  26    but it worked, and to increase generality, MBOX style maildrops could
  27    be parsed as well.  Unfortunately the speed issue finally caught up with
  28    us since this routine is at the very heart of MH.
  29
  30    To speed things up considerably, the routine Eom() was made an auxilary
  31    function called by the macro eom().  Unless we are bursting a maildrop,
  32    the eom() macro returns FALSE saying we aren't at the end of the
  33    message.
  34
  35    The next thing to do is to read the mts.conf file and initialize
  36    delimiter[] and delimlen accordingly...
  37
  38    After mhl was made a built-in in msh, m_getfld() worked just fine
  39    (using m_unknown() at startup).  Until one day: a message which was
  40    the result of a bursting was shown. Then, since the burst boundaries
  41    aren't CTRL-A:s, m_getfld() would blinding plunge on past the boundary.
  42    Very sad.  The solution: introduce m_eomsbr().  This hook gets called
  43    after the end of each line (since testing for eom involves an fseek()).
  44    This worked fine, until one day: a message with no body portion arrived.
  45    Then the
  46
  47                    while (eom (c = Getc (iob), iob))
  48                         continue;
  49
  50    loop caused m_getfld() to return FMTERR.  So, that logic was changed to
  51    check for (*eom_action) and act accordingly.
  52
  53    This worked fine, until one day: someone didn't use four CTRL:A's as
  54    their delimiters.  So, the bullet got bit and we read mts.h and
  55    continue to struggle on.  It's not that bad though, since the only time
  56    the code gets executed is when inc (or msh) calls it, and both of these
  57    have already called mts_init().
  58
  59    ------------------------
  60    (Written by Van Jacobson for the mh6 m_getfld, January, 1986):
  61
  62    This routine was accounting for 60% of the cpu time used by most mh
  63    programs.  I spent a bit of time tuning and it now accounts for <10%
  64    of the time used.  Like any heavily tuned routine, it's a bit
  65    complex and you want to be sure you understand everything that it's
  66    doing before you start hacking on it.  Let me try to emphasize
  67    that:  every line in this atrocity depends on every other line,
  68    sometimes in subtle ways.  You should understand it all, in detail,
  69    before trying to change any part.  If you do change it, test the
  70    result thoroughly (I use a hand-constructed test file that exercises
  71    all the ways a header name, header body, header continuation,
  72    header-body separator, body line and body eom can align themselves
  73    with respect to a buffer boundary).  "Minor" bugs in this routine
  74    result in garbaged or lost mail.
  75
  76    If you hack on this and slow it down, I, my children and my
  77    children's children will curse you.
  78
  79    This routine gets used on three different types of files: normal,
  80    single msg files, "packed" unix or mmdf mailboxs (when used by inc)
  81    and packed, directoried bulletin board files (when used by msh).
  82    The biggest impact of different file types is in "eom" testing.  The
  83    code has been carefully organized to test for eom at appropriate
  84    times and at no other times (since the check is quite expensive).
  85    I have tried to arrange things so that the eom check need only be
  86    done on entry to this routine.  Since an eom can only occur after a
  87    newline, this is easy to manage for header fields.  For the msg
  88    body, we try to efficiently search the input buffer to see if
  89    contains the eom delimiter.  If it does, we take up to the
  90    delimiter, otherwise we take everything in the buffer.  (The change
  91    to the body eom/copy processing produced the most noticeable
  92    performance difference, particularly for "inc" and "show".)
  93
  94    There are three qualitatively different things this routine busts
  95    out of a message: field names, field text and msg bodies.  Field
  96    names are typically short (~8 char) and the loop that extracts them
  97    might terminate on a colon, newline or max width.  I considered
  98    using a Vax "scanc" to locate the end of the field followed by a
  99    "bcopy" but the routine call overhead on a Vax is too large for this
 100    to work on short names.  If Berkeley ever makes "inline" part of the
 101    C optimiser (so things like "scanc" turn into inline instructions) a
 102    change here would be worthwhile.
 103
 104    Field text is typically 60 - 100 characters so there's (barely)
 105    a win in doing a routine call to something that does a "locc"
 106    followed by a "bmove".  About 30% of the fields have continuations
 107    (usually the 822 "received:" lines) and each continuation generates
 108    another routine call.  "Inline" would be a big win here, as well.
 109
 110    Messages, as of this writing, seem to come in two flavors: small
 111    (~1K) and long (>2K).  Most messages have 400 - 600 bytes of headers
 112    so message bodies average at least a few hundred characters.
 113    Assuming your system uses reasonably sized stdio buffers (1K or
 114    more), this routine should be able to remove the body in large
 115    (>500 byte) chunks.  The makes the cost of a call to "bcopy"
 116    small but there is a premium on checking for the eom in packed
 117    maildrops.  The eom pattern is always a simple string so we can
 118    construct an efficient pattern matcher for it (e.g., a Vax "matchc"
 119    instruction).  Some thought went into recognizing the start of
 120    an eom that has been split across two buffers.
 121
 122    This routine wants to deal with large chunks of data so, rather
 123    than "getc" into a local buffer, it uses stdio's buffer.  If
 124    you try to use it on a non-buffered file, you'll get what you
 125    deserve.  This routine "knows" that struct FILEs have a _ptr
 126    and a _cnt to describe the current state of the buffer and
 127    it knows that _filbuf ignores the _ptr & _cnt and simply fills
 128    the buffer.  If stdio on your system doesn't work this way, you
 129    may have to make small changes in this routine.
 130
 131    This routine also "knows" that an EOF indication on a stream is
 132    "sticky" (i.e., you will keep getting EOF until you reposition the
 133    stream).  If your system doesn't work this way it is broken and you
 134    should complain to the vendor.  As a consequence of the sticky
 135    EOF, this routine will never return any kind of EOF status when
 136    there is data in "name" or "buf").
 137   */
 138
 139
 140 /*
 141  * static prototypes
 142  */
 143 static int m_Eom (int, FILE *);
 144 static unsigned char *matchc(int, char *, int, char *);
 145 static unsigned char *locc(int, unsigned char *, unsigned char);
 146
 147 #define Getc(iob)       getc(iob)
 148 #define eom(c,iob)      (msg_style != MS_DEFAULT && \
 149                          (((c) == *msg_delim && m_Eom(c,iob)) ||\
 150                           (eom_action && (*eom_action)(c))))
 151
 152 static unsigned char **pat_map;
 153
 154 /*
 155  * defined in sbr/m_msgdef.c = 0
 156  * This is a disgusting hack for "inc" so it can know how many
 157  * characters were stuffed in the buffer on the last call
 158  * (see comments in uip/scansbr.c).
 159  */
 160 extern int msg_count;
 161
 162 /*
 163  * defined in sbr/m_msgdef.c = MS_DEFAULT
 164  */
 165 extern int msg_style;
 166
 167 /*
 168  * The "full" delimiter string for a packed maildrop consists
 169  * of a newline followed by the actual delimiter.  E.g., the
 170  * full string for a Unix maildrop would be: "\n\nFrom ".
 171  * "Fdelim" points to the start of the full string and is used
 172  * in the BODY case of the main routine to search the buffer for
 173  * a possible eom.  Msg_delim points to the first character of
 174  * the actual delim. string (i.e., fdelim+1).  Edelim
 175  * points to the 2nd character of actual delimiter string.  It
 176  * is used in m_Eom because the first character of the string
 177  * has been read and matched before m_Eom is called.
 178  */
 179 extern char *msg_delim;         /* defined in sbr/m_msgdef.c = "" */
 180 static unsigned char *fdelim;
 181 static unsigned char *delimend;
 182 static int fdelimlen;
 183 static unsigned char *edelim;
 184 static int edelimlen;
 185
 186 static int (*eom_action)(int) = NULL;
 187
 188 #ifdef _FSTDIO
 189 # define _ptr    _p             /* Gag   */
 190 # define _cnt    _r             /* Retch */
 191 # define _filbuf __srget        /* Puke  */
 192 # define DEFINED__FILBUF_TO_SOMETHING_SPECIFIC
 193 #endif
 194
 195 #ifdef SCO_5_STDIO
 196 # define _ptr  __ptr
 197 # define _cnt  __cnt
 198 # define _base __base
 199 # define _filbuf(fp)  ((fp)->__cnt = 0, __filbuf(fp))
 200 # define DEFINED__FILBUF_TO_SOMETHING_SPECIFIC
 201 #endif
 202
 203 #ifndef DEFINED__FILBUF_TO_SOMETHING_SPECIFIC
 204 extern int  _filbuf(FILE*);
 205 #endif
 206
 207
 208 int
 209 m_getfld (int state, unsigned char *name, unsigned char *buf,
 210           int bufsz, FILE *iob)
 211 {
 212     register unsigned char  *bp, *cp, *ep, *sp;
 213     register int cnt, c, i, j;
 214
 215     if ((c = Getc(iob)) < 0) {
 216         msg_count = 0;
 217         *buf = 0;
 218         return FILEEOF;
 219     }
 220     if (eom (c, iob)) {
 221         if (! eom_action) {
 222             /* flush null messages */
 223             while ((c = Getc(iob)) >= 0 && eom (c, iob))
 224                 ;
 225             if (c >= 0)
 226                 ungetc(c, iob);
 227         }
 228         msg_count = 0;
 229         *buf = 0;
 230         return FILEEOF;
 231     }
 232
 233     switch (state) {
 234         case FLDEOF:
 235         case BODYEOF:
 236         case FLD:
 237             if (c == '\n' || c == '-') {
 238                 /* we hit the header/body separator */
 239                 while (c != '\n' && (c = Getc(iob)) >= 0)
 240                     ;
 241
 242                 if (c < 0 || (c = Getc(iob)) < 0 || eom (c, iob)) {
 243                     if (! eom_action) {
 244                         /* flush null messages */
 245                         while ((c = Getc(iob)) >= 0 && eom (c, iob))
 246                             ;
 247                         if (c >= 0)
 248                             ungetc(c, iob);
 249                     }
 250                     msg_count = 0;
 251                     *buf = 0;
 252                     return FILEEOF;
 253                 }
 254                 state = BODY;
 255                 goto body;
 256             }
 257             /*
 258              * get the name of this component.  take characters up
 259              * to a ':', a newline or NAMESZ-1 characters, whichever
 260              * comes first.
 261              */
 262             cp = name;
 263             i = NAMESZ - 1;
 264             for (;;) {
 265 #ifdef LINUX_STDIO
 266                 bp = sp = (unsigned char *) iob->_IO_read_ptr - 1;
 267                 j = (cnt = ((long) iob->_IO_read_end -
 268                         (long) iob->_IO_read_ptr)  + 1) < i ? cnt : i;
 269 #elif defined(__DragonFly__)
 270                 bp = sp = (unsigned char *) ((struct __FILE_public *)iob)->_p - 1;
 271                 j = (cnt = ((struct __FILE_public *)iob)->_r+1) < i ? cnt : i;
 272 #else
 273                 bp = sp = (unsigned char *) iob->_ptr - 1;
 274                 j = (cnt = iob->_cnt+1) < i ? cnt : i;
 275 #endif
 276                 while (--j >= 0 && (c = *bp++) != ':' && c != '\n')
 277                     *cp++ = c;
 278
 279                 j = bp - sp;
 280                 if ((cnt -= j) <= 0) {
 281 #ifdef LINUX_STDIO
 282                     iob->_IO_read_ptr = iob->_IO_read_end;
 283                     if (__underflow(iob) == EOF) {
 284 #elif defined(__DragonFly__)
 285                     if (__srget(iob) == EOF) {
 286 #else
 287                     if (_filbuf(iob) == EOF) {
 288 #endif
 289                         *cp = *buf = 0;
 290                         advise (NULL, "eof encountered in field \"%s\"", name);
 291                         return FMTERR;
 292                     }
 293 #ifdef LINUX_STDIO
 294                 iob->_IO_read_ptr++; /* NOT automatic in __underflow()! */
 295 #endif
 296                 } else {
 297 #ifdef LINUX_STDIO
 298                     iob->_IO_read_ptr = bp + 1;
 299 #elif defined(__DragonFly__)
 300                     ((struct __FILE_public *)iob)->_p = bp + 1;
 301                     ((struct __FILE_public *)iob)->_r = cnt - 1;
 302 #else
 303                     iob->_ptr = bp + 1;
 304                     iob->_cnt = cnt - 1;
 305 #endif
 306                 }
 307                 if (c == ':')
 308                     break;
 309
 310                 /*
 311                  * something went wrong.  possibilities are:
 312                  *  . hit a newline (error)
 313                  *  . got more than namesz chars. (error)
 314                  *  . hit the end of the buffer. (loop)
 315                  */
 316                 if (c == '\n') {
 317                     *cp = *buf = 0;
 318                     advise (NULL, "eol encountered in field \"%s\"", name);
 319                     state = FMTERR;
 320                     goto finish;
 321                 }
 322                 if ((i -= j) <= 0) {
 323                     *cp = *buf = 0;
 324                     advise (NULL, "field name \"%s\" exceeds %d bytes", name, NAMESZ - 1);
 325                     state = LENERR;
 326                     goto finish;
 327                 }
 328             }
 329
 330             while (isspace (*--cp) && cp >= name)
 331                 ;
 332             *++cp = 0;
 333             /* fall through */
 334
 335         case FLDPLUS:
 336             /*
 337              * get (more of) the text of a field.  take
 338              * characters up to the end of this field (newline
 339              * followed by non-blank) or bufsz-1 characters.
 340              */
 341             cp = buf; i = bufsz-1;
 342             for (;;) {
 343 #ifdef LINUX_STDIO
 344                 cnt = (long) iob->_IO_read_end - (long) iob->_IO_read_ptr;
 345                 bp = (unsigned char *) --iob->_IO_read_ptr;
 346 #elif defined(__DragonFly__)
 347                 cnt = ((struct __FILE_public *)iob)->_r++;
 348                 bp = (unsigned char *) --((struct __FILE_public *)iob)->_p;
 349 #else
 350                 cnt = iob->_cnt++;
 351                 bp = (unsigned char *) --iob->_ptr;
 352 #endif
 353                 c = cnt < i ? cnt : i;
 354                 while ((ep = locc( c, bp, '\n' ))) {
 355                     /*
 356                      * if we hit the end of this field, return.
 357                      */
 358                     if ((j = *++ep) != ' ' && j != '\t') {
 359 #ifdef LINUX_STDIO
 360                         j = ep - (unsigned char *) iob->_IO_read_ptr;
 361                         memcpy (cp, iob->_IO_read_ptr, j);
 362                         iob->_IO_read_ptr = ep;
 363 #elif defined(__DragonFly__)
 364                         j = ep - (unsigned char *) ((struct __FILE_public *)iob)->_p;
 365                         memcpy (cp, ((struct __FILE_public *)iob)->_p, j);
 366                         ((struct __FILE_public *)iob)->_p = ep;
 367                         ((struct __FILE_public *)iob)->_r -= j;
 368 #else
 369                         j = ep - (unsigned char *) iob->_ptr;
 370                         memcpy (cp, iob->_ptr, j);
 371                         iob->_ptr = ep;
 372                         iob->_cnt -= j;
 373 #endif
 374                         cp += j;
 375                         state = FLD;
 376                         goto finish;
 377                     }
 378                     c -= ep - bp;
 379                     bp = ep;
 380                 }
 381                 /*
 382                  * end of input or dest buffer - copy what we've found.
 383                  */
 384 #ifdef LINUX_STDIO
 385                 c += bp - (unsigned char *) iob->_IO_read_ptr;
 386                 memcpy( cp, iob->_IO_read_ptr, c);
 387 #elif defined(__DragonFly__)
 388                 c += bp - (unsigned char *) ((struct __FILE_public *)iob)->_p;
 389                 memcpy( cp, ((struct __FILE_public *)iob)->_p, c);
 390 #else
 391                 c += bp - (unsigned char *) iob->_ptr;
 392                 memcpy( cp, iob->_ptr, c);
 393 #endif
 394                 i -= c;
 395                 cp += c;
 396                 if (i <= 0) {
 397                     /* the dest buffer is full */
 398 #ifdef LINUX_STDIO
 399                     iob->_IO_read_ptr += c;
 400 #elif defined(__DragonFly__)
 401                     ((struct __FILE_public *)iob)->_r -= c;
 402                     ((struct __FILE_public *)iob)->_p += c;
 403 #else
 404                     iob->_cnt -= c;
 405                     iob->_ptr += c;
 406 #endif
 407                     state = FLDPLUS;
 408                     break;
 409                 }
 410                 /*
 411                  * There's one character left in the input buffer.
 412                  * Copy it & fill the buffer.  If the last char
 413                  * was a newline and the next char is not whitespace,
 414                  * this is the end of the field.  Otherwise loop.
 415                  */
 416                 --i;
 417 #ifdef LINUX_STDIO
 418                 *cp++ = j = *(iob->_IO_read_ptr + c);
 419                 iob->_IO_read_ptr = iob->_IO_read_end;
 420                 c = __underflow(iob);
 421                 iob->_IO_read_ptr++;    /* NOT automatic! */
 422 #elif defined(__DragonFly__)
 423                 *cp++ =j = *(((struct __FILE_public *)iob)->_p + c);
 424                 c = __srget(iob);
 425 #else
 426                 *cp++ = j = *(iob->_ptr + c);
 427                 c = _filbuf(iob);
 428 #endif
 429                 if (c == EOF ||
 430                   ((j == '\0' || j == '\n') && c != ' ' && c != '\t')) {
 431                     if (c != EOF) {
 432 #ifdef LINUX_STDIO
 433                         --iob->_IO_read_ptr;
 434 #elif defined(__DragonFly__)
 435                         --((struct __FILE_public *)iob)->_p;
 436                         ++((struct __FILE_public *)iob)->_r;
 437 #else
 438                         --iob->_ptr;
 439                         ++iob->_cnt;
 440 #endif
 441                     }
 442                     state = FLD;
 443                     break;
 444                 }
 445             }
 446             break;
 447
 448         case BODY:
 449         body:
 450             /*
 451              * get the message body up to bufsz characters or the
 452              * end of the message.  Sleazy hack: if bufsz is negative
 453              * we assume that we were called to copy directly into
 454              * the output buffer and we don't add an eos.
 455              */
 456             i = (bufsz < 0) ? -bufsz : bufsz-1;
 457 #ifdef LINUX_STDIO
 458             bp = (unsigned char *) --iob->_IO_read_ptr;
 459             cnt = (long) iob->_IO_read_end - (long) iob->_IO_read_ptr;
 460 #elif defined(__DragonFly__)
 461             bp = (unsigned char *) --((struct __FILE_public *)iob)->_p;
 462             cnt = ++((struct __FILE_public *)iob)->_r;
 463 #else
 464             bp = (unsigned char *) --iob->_ptr;
 465             cnt = ++iob->_cnt;
 466 #endif
 467             c = (cnt < i ? cnt : i);
 468             if (msg_style != MS_DEFAULT && c > 1) {
 469                 /*
 470                  * packed maildrop - only take up to the (possible)
 471                  * start of the next message.  This "matchc" should
 472                  * probably be a Boyer-Moore matcher for non-vaxen,
 473                  * particularly since we have the alignment table
 474                  * all built for the end-of-buffer test (next).
 475                  * But our vax timings indicate that the "matchc"
 476                  * instruction is 50% faster than a carefully coded
 477                  * B.M. matcher for most strings.  (So much for elegant
 478                  * algorithms vs. brute force.)  Since I (currently)
 479                  * run MH on a vax, we use the matchc instruction. --vj
 480                  */
 481                 if ((ep = matchc( fdelimlen, fdelim, c, bp )))
 482                     c = ep - bp + 1;
 483                 else {
 484                     /*
 485                      * There's no delim in the buffer but there may be
 486                      * a partial one at the end.  If so, we want to leave
 487                      * it so the "eom" check on the next call picks it up.
 488                      * Use a modified Boyer-Moore matcher to make this
 489                      * check relatively cheap.  The first "if" figures
 490                      * out what position in the pattern matches the last
 491                      * character in the buffer.  The inner "while" matches
 492                      * the pattern against the buffer, backwards starting
 493                      * at that position.  Note that unless the buffer
 494                      * ends with one of the characters in the pattern
 495                      * (excluding the first and last), we do only one test.
 496                      */
 497                     ep = bp + c - 1;
 498                     if ((sp = pat_map[*ep])) {
 499                         do {
 500                             cp = sp;
 501                             while (*--ep == *--cp)
 502                             ;
 503                             if (cp < fdelim) {
 504                                 if (ep >= bp)
 505                                     /*
 506                                      * ep < bp means that all the buffer
 507                                      * contains is a prefix of delim.
 508                                      * If this prefix is really a delim, the
 509                                      * m_eom call at entry should have found
 510                                      * it.  Thus it's not a delim and we can
 511                                      * take all of it.
 512                                      */
 513                                     c = (ep - bp) + 2;
 514                             break;
 515                         }
 516                             /* try matching one less char of delim string */
 517                             ep = bp + c - 1;
 518                         } while (--sp > fdelim);
 519                     }
 520                 }
 521             }
 522             memcpy( buf, bp, c );
 523 #ifdef LINUX_STDIO
 524             iob->_IO_read_ptr += c;
 525 #elif defined(__DragonFly__)
 526             ((struct __FILE_public *)iob)->_r -= c;
 527             ((struct __FILE_public *)iob)->_p += c;
 528 #else
 529             iob->_cnt -= c;
 530             iob->_ptr += c;
 531 #endif
 532             if (bufsz < 0) {
 533                 msg_count = c;
 534                 return (state);
 535             }
 536             cp = buf + c;
 537             break;
 538
 539         default:
 540             adios (NULL, "m_getfld() called with bogus state of %d", state);
 541     }
 542 finish:
 543     *cp = 0;
 544     msg_count = cp - buf;
 545     return (state);
 546 }
 547
 548
 549 #ifdef RPATHS
 550 static char unixbuf[BUFSIZ] = "";
 551 #endif /* RPATHS */
 552
 553 void
 554 m_unknown(FILE *iob)
 555 {
 556     register int c;
 557     register long pos;
 558     char text[10];
 559     register char *cp;
 560     register char *delimstr;
 561
 562 /*
 563  * Figure out what the message delimitter string is for this
 564  * maildrop.  (This used to be part of m_Eom but I didn't like
 565  * the idea of an "if" statement that could only succeed on the
 566  * first call to m_Eom getting executed on each call, i.e., at
 567  * every newline in the message).
 568  *
 569  * If the first line of the maildrop is a Unix "From " line, we
 570  * say the style is MBOX and eat the rest of the line.  Otherwise
 571  * we say the style is MMDF and look for the delimiter string
 572  * specified when nmh was built (or from the mts.conf file).
 573  */
 574
 575     msg_style = MS_UNKNOWN;
 576
 577     pos = ftell (iob);
 578     if (fread (text, sizeof(*text), 5, iob) == 5
 579             && strncmp (text, "From ", 5) == 0) {
 580         msg_style = MS_MBOX;
 581         delimstr = "\nFrom ";
 582 #ifndef RPATHS
 583         while ((c = getc (iob)) != '\n' && c >= 0)
 584             ;
 585 #else /* RPATHS */
 586         cp = unixbuf;
 587         while ((c = getc (iob)) != '\n' && cp - unixbuf < BUFSIZ - 1)
 588             *cp++ = c;
 589         *cp = 0;
 590 #endif /* RPATHS */
 591     } else {
 592         /* not a Unix style maildrop */
 593         fseek (iob, pos, SEEK_SET);
 594         if (mmdlm2 == NULL || *mmdlm2 == 0)
 595             mmdlm2 = "\001\001\001\001\n";
 596         delimstr = mmdlm2;
 597         msg_style = MS_MMDF;
 598     }
 599     c = strlen (delimstr);
 600     fdelim = (unsigned char *) mh_xmalloc((size_t) (c + 3));
 601     *fdelim++ = '\0';
 602     *fdelim = '\n';
 603     msg_delim = (char *)fdelim+1;
 604     edelim = (unsigned char *)msg_delim+1;
 605     fdelimlen = c + 1;
 606     edelimlen = c - 1;
 607     strcpy (msg_delim, delimstr);
 608     delimend = (unsigned char *)msg_delim + edelimlen;
 609     if (edelimlen <= 1)
 610         adios (NULL, "maildrop delimiter must be at least 2 bytes");
 611     /*
 612      * build a Boyer-Moore end-position map for the matcher in m_getfld.
 613      * N.B. - we don't match just the first char (since it's the newline
 614      * separator) or the last char (since the matchc would have found it
 615      * if it was a real delim).
 616      */
 617     pat_map = (unsigned char **) calloc (256, sizeof(unsigned char *));
 618
 619     for (cp = (char *) fdelim + 1; cp < (char *) delimend; cp++ )
 620         pat_map[(unsigned char)*cp] = (unsigned char *) cp;
 621
 622     if (msg_style == MS_MMDF) {
 623         /* flush extra msg hdrs */
 624         while ((c = Getc(iob)) >= 0 && eom (c, iob))
 625             ;
 626         if (c >= 0)
 627             ungetc(c, iob);
 628     }
 629 }
 630
 631
 632 void
 633 m_eomsbr (int (*action)(int))
 634 {
 635     if ((eom_action = action)) {
 636         msg_style = MS_MSH;
 637         *msg_delim = 0;
 638         fdelimlen = 1;
 639         delimend = fdelim;
 640     } else {
 641         msg_style = MS_MMDF;
 642         msg_delim = (char *)fdelim + 1;
 643         fdelimlen = strlen((char *)fdelim);
 644         delimend = (unsigned char *)(msg_delim + edelimlen);
 645     }
 646 }
 647
 648
 649 /*
 650  * test for msg delimiter string
 651  */
 652
 653 static int
 654 m_Eom (int c, FILE *iob)
 655 {
 656     register long pos = 0L;
 657     register int i;
 658     char text[10];
 659 #ifdef RPATHS
 660     register char *cp;
 661 #endif /* RPATHS */
 662
 663     pos = ftell (iob);
 664     if ((i = fread (text, sizeof *text, edelimlen, iob)) != edelimlen
 665             || strncmp (text, (char *)edelim, edelimlen)) {
 666         if (i == 0 && msg_style == MS_MBOX)
 667             /* the final newline in the (brain damaged) unix-format
 668              * maildrop is part of the delimitter - delete it.
 669              */
 670             return 1;
 671
 672 #if 0
 673         fseek (iob, pos, SEEK_SET);
 674 #endif
 675
 676         fseek (iob, (long)(pos-1), SEEK_SET);
 677         getc (iob);             /* should be OK */
 678         return 0;
 679     }
 680
 681     if (msg_style == MS_MBOX) {
 682 #ifndef RPATHS
 683         while ((c = getc (iob)) != '\n')
 684             if (c < 0)
 685                 break;
 686 #else /* RPATHS */
 687         cp = unixbuf;
 688         while ((c = getc (iob)) != '\n' && c >= 0 && cp - unixbuf < BUFSIZ - 1)
 689             *cp++ = c;
 690         *cp = 0;
 691 #endif /* RPATHS */
 692     }
 693
 694     return 1;
 695 }
 696
 697
 698 #ifdef RPATHS
 699 /*
 700  * Return the Return-Path and Delivery-Date
 701  * header information.
 702  *
 703  * Currently, I'm assuming that the "From " line
 704  * takes one of the following forms.
 705  *
 706  * From sender date remote from host   (for UUCP delivery)
 707  * From sender@host  date              (for sendmail delivery)
 708  */
 709
 710 int
 711 get_returnpath (char *rp, int rplen, char *dd, int ddlen)
 712 {
 713     char *ap, *bp, *cp, *dp;
 714
 715     ap = unixbuf;
 716     if (!(bp = cp = strchr(ap, ' ')))
 717         return 0;
 718
 719     /*
 720      * Check for "remote from" in envelope to see
 721      * if this message uses UUCP style addressing
 722      */
 723     while ((cp = strchr(++cp, 'r'))) {
 724         if (strncmp (cp, "remote from", 11) == 0) {
 725             cp = strrchr (cp, ' ');
 726             break;
 727         }
 728     }
 729
 730     /*
 731      * Get the Return-Path information from
 732      * the "From " envelope.
 733      */
 734     if (cp) {
 735         /* return path for UUCP style addressing */
 736         dp = strchr (++cp, '\n');
 737         snprintf (rp, rplen, "%.*s!%.*s\n", (int)(dp - cp), cp, (int)(bp - ap), ap);
 738     } else {
 739         /* return path for standard domain addressing */
 740         snprintf (rp, rplen, "%.*s\n", (int)(bp - ap), ap);
 741     }
 742
 743     /*
 744      * advance over the spaces to get to
 745      * delivery date on envelope
 746      */
 747     while (*bp == ' ')
 748         bp++;
 749
 750     /* Now get delivery date from envelope */
 751     snprintf (dd, ddlen, "%.*s\n", 24, bp);
 752
 753     unixbuf[0] = 0;
 754     return 1;
 755 }
 756 #endif /* RPATHS */
 757
 758
 759 static unsigned char *
 760 matchc(int patln, char *pat, int strln, char *str)
 761 {
 762         register char *es = str + strln - patln;
 763         register char *sp;
 764         register char *pp;
 765         register char *ep = pat + patln;
 766         register char pc = *pat++;
 767
 768         for(;;) {
 769                 while (pc != *str++)
 770                         if (str > es)
 771                                 return 0;
 772                 if (str > es+1)
 773                         return 0;
 774                 sp = str; pp = pat;
 775                 while (pp < ep && *sp++ == *pp)
 776                         pp++;
 777                 if (pp >= ep)
 778                         return ((unsigned char *)--str);
 779         }
 780 }
 781
 782
 783 /*
 784  * Locate character "term" in the next "cnt" characters of "src".
 785  * If found, return its address, otherwise return 0.
 786  */
 787
 788 static unsigned char *
 789 locc(int cnt, unsigned char *src, unsigned char term)
 790 {
 791     while (*src++ != term && --cnt > 0);
 792
 793     return (cnt > 0 ? --src : (unsigned char *)0);
 794 }
 795