git.marmaro.de Git - mmh/blob - sbr/m_getfld.c

   1
   2 /*
   3  * m_getfld.c -- read/parse a message
   4  *
   5  * $Id$
   6  */
   7
   8 #include <h/mh.h>
   9 #include <zotnet/mts/mts.h>
  10
  11 /* This module has a long and checkered history.  First, it didn't burst
  12    maildrops correctly because it considered two CTRL-A:s in a row to be
  13    an inter-message delimiter.  It really is four CTRL-A:s followed by a
  14    newline.  Unfortunately, MMDF will convert this delimiter *inside* a
  15    message to a CTRL-B followed by three CTRL-A:s and a newline.  This
  16    caused the old version of m_getfld() to declare eom prematurely.  The
  17    fix was a lot slower than
  18
  19                 c == '\001' && peekc (iob) == '\001'
  20
  21    but it worked, and to increase generality, MBOX style maildrops could
  22    be parsed as well.  Unfortunately the speed issue finally caught up with
  23    us since this routine is at the very heart of MH.
  24
  25    To speed things up considerably, the routine Eom() was made an auxilary
  26    function called by the macro eom().  Unless we are bursting a maildrop,
  27    the eom() macro returns FALSE saying we aren't at the end of the
  28    message.
  29
  30    The next thing to do is to read the mts.conf file and initialize
  31    delimiter[] and delimlen accordingly...
  32
  33    After mhl was made a built-in in msh, m_getfld() worked just fine
  34    (using m_unknown() at startup).  Until one day: a message which was
  35    the result of a bursting was shown. Then, since the burst boundaries
  36    aren't CTRL-A:s, m_getfld() would blinding plunge on past the boundary.
  37    Very sad.  The solution: introduce m_eomsbr().  This hook gets called
  38    after the end of each line (since testing for eom involves an fseek()).
  39    This worked fine, until one day: a message with no body portion arrived.
  40    Then the
  41
  42                    while (eom (c = Getc (iob), iob))
  43                         continue;
  44
  45    loop caused m_getfld() to return FMTERR.  So, that logic was changed to
  46    check for (*eom_action) and act accordingly.
  47
  48    This worked fine, until one day: someone didn't use four CTRL:A's as
  49    their delimiters.  So, the bullet got bit and we read mts.h and
  50    continue to struggle on.  It's not that bad though, since the only time
  51    the code gets executed is when inc (or msh) calls it, and both of these
  52    have already called mts_init().
  53
  54    ------------------------
  55    (Written by Van Jacobson for the mh6 m_getfld, January, 1986):
  56
  57    This routine was accounting for 60% of the cpu time used by most mh
  58    programs.  I spent a bit of time tuning and it now accounts for <10%
  59    of the time used.  Like any heavily tuned routine, it's a bit
  60    complex and you want to be sure you understand everything that it's
  61    doing before you start hacking on it.  Let me try to emphasize
  62    that:  every line in this atrocity depends on every other line,
  63    sometimes in subtle ways.  You should understand it all, in detail,
  64    before trying to change any part.  If you do change it, test the
  65    result thoroughly (I use a hand-constructed test file that exercises
  66    all the ways a header name, header body, header continuation,
  67    header-body separator, body line and body eom can align themselves
  68    with respect to a buffer boundary).  "Minor" bugs in this routine
  69    result in garbaged or lost mail.
  70
  71    If you hack on this and slow it down, I, my children and my
  72    children's children will curse you.
  73
  74    This routine gets used on three different types of files: normal,
  75    single msg files, "packed" unix or mmdf mailboxs (when used by inc)
  76    and packed, directoried bulletin board files (when used by msh).
  77    The biggest impact of different file types is in "eom" testing.  The
  78    code has been carefully organized to test for eom at appropriate
  79    times and at no other times (since the check is quite expensive).
  80    I have tried to arrange things so that the eom check need only be
  81    done on entry to this routine.  Since an eom can only occur after a
  82    newline, this is easy to manage for header fields.  For the msg
  83    body, we try to efficiently search the input buffer to see if
  84    contains the eom delimiter.  If it does, we take up to the
  85    delimiter, otherwise we take everything in the buffer.  (The change
  86    to the body eom/copy processing produced the most noticeable
  87    performance difference, particularly for "inc" and "show".)
  88
  89    There are three qualitatively different things this routine busts
  90    out of a message: field names, field text and msg bodies.  Field
  91    names are typically short (~8 char) and the loop that extracts them
  92    might terminate on a colon, newline or max width.  I considered
  93    using a Vax "scanc" to locate the end of the field followed by a
  94    "bcopy" but the routine call overhead on a Vax is too large for this
  95    to work on short names.  If Berkeley ever makes "inline" part of the
  96    C optimiser (so things like "scanc" turn into inline instructions) a
  97    change here would be worthwhile.
  98
  99    Field text is typically 60 - 100 characters so there's (barely)
 100    a win in doing a routine call to something that does a "locc"
 101    followed by a "bmove".  About 30% of the fields have continuations
 102    (usually the 822 "received:" lines) and each continuation generates
 103    another routine call.  "Inline" would be a big win here, as well.
 104
 105    Messages, as of this writing, seem to come in two flavors: small
 106    (~1K) and long (>2K).  Most messages have 400 - 600 bytes of headers
 107    so message bodies average at least a few hundred characters.
 108    Assuming your system uses reasonably sized stdio buffers (1K or
 109    more), this routine should be able to remove the body in large
 110    (>500 byte) chunks.  The makes the cost of a call to "bcopy"
 111    small but there is a premium on checking for the eom in packed
 112    maildrops.  The eom pattern is always a simple string so we can
 113    construct an efficient pattern matcher for it (e.g., a Vax "matchc"
 114    instruction).  Some thought went into recognizing the start of
 115    an eom that has been split across two buffers.
 116
 117    This routine wants to deal with large chunks of data so, rather
 118    than "getc" into a local buffer, it uses stdio's buffer.  If
 119    you try to use it on a non-buffered file, you'll get what you
 120    deserve.  This routine "knows" that struct FILEs have a _ptr
 121    and a _cnt to describe the current state of the buffer and
 122    it knows that _filbuf ignores the _ptr & _cnt and simply fills
 123    the buffer.  If stdio on your system doesn't work this way, you
 124    may have to make small changes in this routine.
 125
 126    This routine also "knows" that an EOF indication on a stream is
 127    "sticky" (i.e., you will keep getting EOF until you reposition the
 128    stream).  If your system doesn't work this way it is broken and you
 129    should complain to the vendor.  As a consequence of the sticky
 130    EOF, this routine will never return any kind of EOF status when
 131    there is data in "name" or "buf").
 132   */
 133
 134
 135 /*
 136  * static prototypes
 137  */
 138 static int m_Eom (int, FILE *);
 139 static unsigned char *matchc(int, char *, int, char *);
 140 static unsigned char *locc(int, unsigned char *, unsigned char);
 141
 142 #define Getc(iob)       getc(iob)
 143 #define eom(c,iob)      (msg_style != MS_DEFAULT && \
 144                          (((c) == *msg_delim && m_Eom(c,iob)) ||\
 145                           (eom_action && (*eom_action)(c))))
 146
 147 static unsigned char **pat_map;
 148
 149 /*
 150  * defined in sbr/m_msgdef.c = 0
 151  * This is a disgusting hack for "inc" so it can know how many
 152  * characters were stuffed in the buffer on the last call
 153  * (see comments in uip/scansbr.c).
 154  */
 155 extern int msg_count;
 156
 157 /*
 158  * defined in sbr/m_msgdef.c = MS_DEFAULT
 159  */
 160 extern int msg_style;
 161
 162 /*
 163  * The "full" delimiter string for a packed maildrop consists
 164  * of a newline followed by the actual delimiter.  E.g., the
 165  * full string for a Unix maildrop would be: "\n\nFrom ".
 166  * "Fdelim" points to the start of the full string and is used
 167  * in the BODY case of the main routine to search the buffer for
 168  * a possible eom.  Msg_delim points to the first character of
 169  * the actual delim. string (i.e., fdelim+1).  Edelim
 170  * points to the 2nd character of actual delimiter string.  It
 171  * is used in m_Eom because the first character of the string
 172  * has been read and matched before m_Eom is called.
 173  */
 174 extern char *msg_delim;         /* defined in sbr/m_msgdef.c = "" */
 175 static unsigned char *fdelim;
 176 static unsigned char *delimend;
 177 static int fdelimlen;
 178 static unsigned char *edelim;
 179 static int edelimlen;
 180
 181 static int (*eom_action)() = NULL;
 182
 183 #ifdef _FSTDIO
 184 # define _ptr    _p             /* Gag   */
 185 # define _cnt    _r             /* Retch */
 186 # define _filbuf __srget        /* Puke  */
 187 #endif
 188
 189 #ifdef SCO_5_STDIO
 190 # define _ptr  __ptr
 191 # define _cnt  __cnt
 192 # define _base __base
 193 # define _filbuf(fp)  ((fp)->__cnt = 0, __filbuf(fp))
 194 #endif
 195
 196
 197 int
 198 m_getfld (int state, unsigned char *name, unsigned char *buf,
 199           int bufsz, FILE *iob)
 200 {
 201     register unsigned char  *bp, *cp, *ep, *sp;
 202     register int cnt, c, i, j;
 203
 204     if ((c = Getc(iob)) < 0) {
 205         msg_count = 0;
 206         *buf = 0;
 207         return FILEEOF;
 208     }
 209     if (eom (c, iob)) {
 210         if (! eom_action) {
 211             /* flush null messages */
 212             while ((c = Getc(iob)) >= 0 && eom (c, iob))
 213                 ;
 214             if (c >= 0)
 215                 ungetc(c, iob);
 216         }
 217         msg_count = 0;
 218         *buf = 0;
 219         return FILEEOF;
 220     }
 221
 222     switch (state) {
 223         case FLDEOF:
 224         case BODYEOF:
 225         case FLD:
 226             if (c == '\n' || c == '-') {
 227                 /* we hit the header/body separator */
 228                 while (c != '\n' && (c = Getc(iob)) >= 0)
 229                     ;
 230
 231                 if (c < 0 || (c = Getc(iob)) < 0 || eom (c, iob)) {
 232                     if (! eom_action) {
 233                         /* flush null messages */
 234                         while ((c = Getc(iob)) >= 0 && eom (c, iob))
 235                             ;
 236                         if (c >= 0)
 237                             ungetc(c, iob);
 238                     }
 239                     msg_count = 0;
 240                     *buf = 0;
 241                     return FILEEOF;
 242                 }
 243                 state = BODY;
 244                 goto body;
 245             }
 246             /*
 247              * get the name of this component.  take characters up
 248              * to a ':', a newline or NAMESZ-1 characters, whichever
 249              * comes first.
 250              */
 251             cp = name;
 252             i = NAMESZ - 1;
 253             for (;;) {
 254 #ifdef LINUX_STDIO
 255                 bp = sp = (unsigned char *) iob->_IO_read_ptr - 1;
 256                 j = (cnt = ((long) iob->_IO_read_end -
 257                         (long) iob->_IO_read_ptr)  + 1) < i ? cnt : i;
 258 #else
 259                 bp = sp = (unsigned char *) iob->_ptr - 1;
 260                 j = (cnt = iob->_cnt+1) < i ? cnt : i;
 261 #endif
 262                 while (--j >= 0 && (c = *bp++) != ':' && c != '\n')
 263                     *cp++ = c;
 264
 265                 j = bp - sp;
 266                 if ((cnt -= j) <= 0) {
 267 #ifdef LINUX_STDIO
 268                     iob->_IO_read_ptr = iob->_IO_read_end;
 269                     if (__underflow(iob) == EOF) {
 270 #else
 271                     if (_filbuf(iob) == EOF) {
 272 #endif
 273                         *cp = *buf = 0;
 274                         advise (NULL, "eof encountered in field \"%s\"", name);
 275                         return FMTERR;
 276                     }
 277 #ifdef LINUX_STDIO
 278                 iob->_IO_read_ptr++; /* NOT automatic in __underflow()! */
 279 #endif
 280                 } else {
 281 #ifdef LINUX_STDIO
 282                     iob->_IO_read_ptr = bp + 1;
 283 #else
 284                     iob->_ptr = bp + 1;
 285                     iob->_cnt = cnt - 1;
 286 #endif
 287                 }
 288                 if (c == ':')
 289                     break;
 290
 291                 /*
 292                  * something went wrong.  possibilities are:
 293                  *  . hit a newline (error)
 294                  *  . got more than namesz chars. (error)
 295                  *  . hit the end of the buffer. (loop)
 296                  */
 297                 if (c == '\n') {
 298                     *cp = *buf = 0;
 299                     advise (NULL, "eol encountered in field \"%s\"", name);
 300                     state = FMTERR;
 301                     goto finish;
 302                 }
 303                 if ((i -= j) <= 0) {
 304                     *cp = *buf = 0;
 305                     advise (NULL, "field name \"%s\" exceeds %d bytes", name, NAMESZ - 1);
 306                     state = LENERR;
 307                     goto finish;
 308                 }
 309             }
 310
 311             while (isspace (*--cp) && cp >= name)
 312                 ;
 313             *++cp = 0;
 314             /* fall through */
 315
 316         case FLDPLUS:
 317             /*
 318              * get (more of) the text of a field.  take
 319              * characters up to the end of this field (newline
 320              * followed by non-blank) or bufsz-1 characters.
 321              */
 322             cp = buf; i = bufsz-1;
 323             for (;;) {
 324 #ifdef LINUX_STDIO
 325                 cnt = (long) iob->_IO_read_end - (long) iob->_IO_read_ptr;
 326                 bp = (unsigned char *) --iob->_IO_read_ptr;
 327 #else
 328                 cnt = iob->_cnt++;
 329                 bp = (unsigned char *) --iob->_ptr;
 330 #endif
 331                 c = cnt < i ? cnt : i;
 332                 while ((ep = locc( c, bp, '\n' ))) {
 333                     /*
 334                      * if we hit the end of this field, return.
 335                      */
 336                     if ((j = *++ep) != ' ' && j != '\t') {
 337 #ifdef LINUX_STDIO
 338                         j = ep - (unsigned char *) iob->_IO_read_ptr;
 339                         memcpy (cp, iob->_IO_read_ptr, j);
 340                         iob->_IO_read_ptr = ep;
 341 #else
 342                         j = ep - (unsigned char *) iob->_ptr;
 343                         memcpy (cp, iob->_ptr, j);
 344                         iob->_ptr = ep;
 345                         iob->_cnt -= j;
 346 #endif
 347                         cp += j;
 348                         state = FLD;
 349                         goto finish;
 350                     }
 351                     c -= ep - bp;
 352                     bp = ep;
 353                 }
 354                 /*
 355                  * end of input or dest buffer - copy what we've found.
 356                  */
 357 #ifdef LINUX_STDIO
 358                 c += bp - (unsigned char *) iob->_IO_read_ptr;
 359                 memcpy( cp, iob->_IO_read_ptr, c);
 360 #else
 361                 c += bp - (unsigned char *) iob->_ptr;
 362                 memcpy( cp, iob->_ptr, c);
 363 #endif
 364                 i -= c;
 365                 cp += c;
 366                 if (i <= 0) {
 367                     /* the dest buffer is full */
 368 #ifdef LINUX_STDIO
 369                     iob->_IO_read_ptr += c;
 370 #else
 371                     iob->_cnt -= c;
 372                     iob->_ptr += c;
 373 #endif
 374                     state = FLDPLUS;
 375                     break;
 376                 }
 377                 /*
 378                  * There's one character left in the input buffer.
 379                  * Copy it & fill the buffer.  If the last char
 380                  * was a newline and the next char is not whitespace,
 381                  * this is the end of the field.  Otherwise loop.
 382                  */
 383                 --i;
 384 #ifdef LINUX_STDIO
 385                 *cp++ = j = *(iob->_IO_read_ptr + c);
 386                 iob->_IO_read_ptr = iob->_IO_read_end;
 387                 c = __underflow(iob);
 388                 iob->_IO_read_ptr++;    /* NOT automatic! */
 389 #else
 390                 *cp++ = j = *(iob->_ptr + c);
 391                 c = _filbuf(iob);
 392 #endif
 393                 if (c == EOF ||
 394                   ((j == '\0' || j == '\n') && c != ' ' && c != '\t')) {
 395                     if (c != EOF) {
 396 #ifdef LINUX_STDIO
 397                         --iob->_IO_read_ptr;
 398 #else
 399                         --iob->_ptr;
 400                         ++iob->_cnt;
 401 #endif
 402                     }
 403                     state = FLD;
 404                     break;
 405                 }
 406             }
 407             break;
 408
 409         case BODY:
 410         body:
 411             /*
 412              * get the message body up to bufsz characters or the
 413              * end of the message.  Sleazy hack: if bufsz is negative
 414              * we assume that we were called to copy directly into
 415              * the output buffer and we don't add an eos.
 416              */
 417             i = (bufsz < 0) ? -bufsz : bufsz-1;
 418 #ifdef LINUX_STDIO
 419             bp = (unsigned char *) --iob->_IO_read_ptr;
 420             cnt = (long) iob->_IO_read_end - (long) iob->_IO_read_ptr;
 421 #else
 422             bp = (unsigned char *) --iob->_ptr;
 423             cnt = ++iob->_cnt;
 424 #endif
 425             c = (cnt < i ? cnt : i);
 426             if (msg_style != MS_DEFAULT && c > 1) {
 427                 /*
 428                  * packed maildrop - only take up to the (possible)
 429                  * start of the next message.  This "matchc" should
 430                  * probably be a Boyer-Moore matcher for non-vaxen,
 431                  * particularly since we have the alignment table
 432                  * all built for the end-of-buffer test (next).
 433                  * But our vax timings indicate that the "matchc"
 434                  * instruction is 50% faster than a carefully coded
 435                  * B.M. matcher for most strings.  (So much for elegant
 436                  * algorithms vs. brute force.)  Since I (currently)
 437                  * run MH on a vax, we use the matchc instruction. --vj
 438                  */
 439                 if ((ep = matchc( fdelimlen, fdelim, c, bp )))
 440                     c = ep - bp + 1;
 441                 else {
 442                     /*
 443                      * There's no delim in the buffer but there may be
 444                      * a partial one at the end.  If so, we want to leave
 445                      * it so the "eom" check on the next call picks it up.
 446                      * Use a modified Boyer-Moore matcher to make this
 447                      * check relatively cheap.  The first "if" figures
 448                      * out what position in the pattern matches the last
 449                      * character in the buffer.  The inner "while" matches
 450                      * the pattern against the buffer, backwards starting
 451                      * at that position.  Note that unless the buffer
 452                      * ends with one of the characters in the pattern
 453                      * (excluding the first and last), we do only one test.
 454                      */
 455                     ep = bp + c - 1;
 456                     if ((sp = pat_map[*ep])) {
 457                         do {
 458                             cp = sp;
 459                             while (*--ep == *--cp)
 460                             ;
 461                             if (cp < fdelim) {
 462                                 if (ep >= bp)
 463                                     /*
 464                                      * ep < bp means that all the buffer
 465                                      * contains is a prefix of delim.
 466                                      * If this prefix is really a delim, the
 467                                      * m_eom call at entry should have found
 468                                      * it.  Thus it's not a delim and we can
 469                                      * take all of it.
 470                                      */
 471                                     c = (ep - bp) + 2;
 472                             break;
 473                         }
 474                             /* try matching one less char of delim string */
 475                             ep = bp + c - 1;
 476                         } while (--sp > fdelim);
 477                     }
 478                 }
 479             }
 480             memcpy( buf, bp, c );
 481 #ifdef LINUX_STDIO
 482             iob->_IO_read_ptr += c;
 483 #else
 484             iob->_cnt -= c;
 485             iob->_ptr += c;
 486 #endif
 487             if (bufsz < 0) {
 488                 msg_count = c;
 489                 return (state);
 490             }
 491             cp = buf + c;
 492             break;
 493
 494         default:
 495             adios (NULL, "m_getfld() called with bogus state of %d", state);
 496     }
 497 finish:
 498     *cp = 0;
 499     msg_count = cp - buf;
 500     return (state);
 501 }
 502
 503
 504 #ifdef RPATHS
 505 static char unixbuf[BUFSIZ] = "";
 506 #endif /* RPATHS */
 507
 508 void
 509 m_unknown(FILE *iob)
 510 {
 511     register int c;
 512     register long pos;
 513     char text[10];
 514     register char *cp;
 515     register char *delimstr;
 516
 517 /*
 518  * Figure out what the message delimitter string is for this
 519  * maildrop.  (This used to be part of m_Eom but I didn't like
 520  * the idea of an "if" statement that could only succeed on the
 521  * first call to m_Eom getting executed on each call, i.e., at
 522  * every newline in the message).
 523  *
 524  * If the first line of the maildrop is a Unix "From " line, we
 525  * say the style is MBOX and eat the rest of the line.  Otherwise
 526  * we say the style is MMDF and look for the delimiter string
 527  * specified when nmh was built (or from the mts.conf file).
 528  */
 529
 530     msg_style = MS_UNKNOWN;
 531
 532     pos = ftell (iob);
 533     if (fread (text, sizeof(*text), 5, iob) == 5
 534             && strncmp (text, "From ", 5) == 0) {
 535         msg_style = MS_MBOX;
 536         delimstr = "\nFrom ";
 537 #ifndef RPATHS
 538         while ((c = getc (iob)) != '\n' && c >= 0)
 539             ;
 540 #else /* RPATHS */
 541         cp = unixbuf;
 542         while ((c = getc (iob)) != '\n' && cp - unixbuf < BUFSIZ - 1)
 543             *cp++ = c;
 544         *cp = 0;
 545 #endif /* RPATHS */
 546     } else {
 547         /* not a Unix style maildrop */
 548         fseek (iob, pos, SEEK_SET);
 549         if (mmdlm2 == NULL || *mmdlm2 == 0)
 550             mmdlm2 = "\001\001\001\001\n";
 551         delimstr = mmdlm2;
 552         msg_style = MS_MMDF;
 553     }
 554     c = strlen (delimstr);
 555     fdelim = (unsigned char *) malloc((size_t) (c + 3));
 556     *fdelim++ = '\0';
 557     *fdelim = '\n';
 558     msg_delim = (char *)fdelim+1;
 559     edelim = (unsigned char *)msg_delim+1;
 560     fdelimlen = c + 1;
 561     edelimlen = c - 1;
 562     strcpy (msg_delim, delimstr);
 563     delimend = (unsigned char *)msg_delim + edelimlen;
 564     if (edelimlen <= 1)
 565         adios (NULL, "maildrop delimiter must be at least 2 bytes");
 566     /*
 567      * build a Boyer-Moore end-position map for the matcher in m_getfld.
 568      * N.B. - we don't match just the first char (since it's the newline
 569      * separator) or the last char (since the matchc would have found it
 570      * if it was a real delim).
 571      */
 572     pat_map = (unsigned char **) calloc (256, sizeof(unsigned char *));
 573
 574     for (cp = (char *) fdelim + 1; cp < (char *) delimend; cp++ )
 575         pat_map[*cp] = (unsigned char *) cp;
 576
 577     if (msg_style == MS_MMDF) {
 578         /* flush extra msg hdrs */
 579         while ((c = Getc(iob)) >= 0 && eom (c, iob))
 580             ;
 581         if (c >= 0)
 582             ungetc(c, iob);
 583     }
 584 }
 585
 586
 587 void
 588 m_eomsbr (int (*action)())
 589 {
 590     if ((eom_action = action)) {
 591         msg_style = MS_MSH;
 592         *msg_delim = 0;
 593         fdelimlen = 1;
 594         delimend = fdelim;
 595     } else {
 596         msg_style = MS_MMDF;
 597         msg_delim = (char *)fdelim + 1;
 598         fdelimlen = strlen((char *)fdelim);
 599         delimend = (unsigned char *)(msg_delim + edelimlen);
 600     }
 601 }
 602
 603
 604 /*
 605  * test for msg delimiter string
 606  */
 607
 608 static int
 609 m_Eom (int c, FILE *iob)
 610 {
 611     register long pos = 0L;
 612     register int i;
 613     char text[10];
 614 #ifdef RPATHS
 615     register char *cp;
 616 #endif /* RPATHS */
 617
 618     pos = ftell (iob);
 619     if ((i = fread (text, sizeof *text, edelimlen, iob)) != edelimlen
 620             || strncmp (text, (char *)edelim, edelimlen)) {
 621         if (i == 0 && msg_style == MS_MBOX)
 622             /* the final newline in the (brain damaged) unix-format
 623              * maildrop is part of the delimitter - delete it.
 624              */
 625             return 1;
 626
 627 #if 0
 628         fseek (iob, pos, SEEK_SET);
 629 #endif
 630
 631         fseek (iob, (long)(pos-1), SEEK_SET);
 632         getc (iob);             /* should be OK */
 633         return 0;
 634     }
 635
 636     if (msg_style == MS_MBOX) {
 637 #ifndef RPATHS
 638         while ((c = getc (iob)) != '\n')
 639             if (c < 0)
 640                 break;
 641 #else /* RPATHS */
 642         cp = unixbuf;
 643         while ((c = getc (iob)) != '\n' && c >= 0 && cp - unixbuf < BUFSIZ - 1)
 644             *cp++ = c;
 645         *cp = 0;
 646 #endif /* RPATHS */
 647     }
 648
 649     return 1;
 650 }
 651
 652
 653 #ifdef RPATHS
 654 /*
 655  * Return the Return-Path and Delivery-Date
 656  * header information.
 657  *
 658  * Currently, I'm assuming that the "From " line
 659  * takes one of the following forms.
 660  *
 661  * From sender date remote from host   (for UUCP delivery)
 662  * From sender@host  date              (for sendmail delivery)
 663  */
 664
 665 int
 666 get_returnpath (char *rp, int rplen, char *dd, int ddlen)
 667 {
 668     char *ap, *bp, *cp, *dp;
 669
 670     ap = unixbuf;
 671     if (!(bp = cp = strchr(ap, ' ')))
 672         return 0;
 673
 674     /*
 675      * Check for "remote from" in envelope to see
 676      * if this message uses UUCP style addressing
 677      */
 678     while ((cp = strchr(++cp, 'r'))) {
 679         if (strncmp (cp, "remote from", 11) == 0) {
 680             cp = strrchr (cp, ' ');
 681             break;
 682         }
 683     }
 684
 685     /*
 686      * Get the Return-Path information from
 687      * the "From " envelope.
 688      */
 689     if (cp) {
 690         /* return path for UUCP style addressing */
 691         dp = strchr (++cp, '\n');
 692         snprintf (rp, rplen, "%.*s!%.*s\n", dp - cp, cp, bp - ap, ap);
 693     } else {
 694         /* return path for standard domain addressing */
 695         snprintf (rp, rplen, "%.*s\n", bp - ap, ap);
 696     }
 697
 698     /*
 699      * advance over the spaces to get to
 700      * delivery date on envelope
 701      */
 702     while (*bp == ' ')
 703         bp++;
 704
 705     /* Now get delivery date from envelope */
 706     snprintf (dd, ddlen, "%.*s\n", 24, bp);
 707
 708     unixbuf[0] = 0;
 709     return 1;
 710 }
 711 #endif /* RPATHS */
 712
 713
 714 static unsigned char *
 715 matchc(int patln, char *pat, int strln, char *str)
 716 {
 717         register char *es = str + strln - patln;
 718         register char *sp;
 719         register char *pp;
 720         register char *ep = pat + patln;
 721         register char pc = *pat++;
 722
 723         for(;;) {
 724                 while (pc != *str++)
 725                         if (str > es)
 726                                 return 0;
 727
 728                 sp = str; pp = pat;
 729                 while (pp < ep && *sp++ == *pp)
 730                         pp++;
 731                 if (pp >= ep)
 732                         return ((unsigned char *)--str);
 733         }
 734 }
 735
 736
 737 /*
 738  * Locate character "term" in the next "cnt" characters of "src".
 739  * If found, return its address, otherwise return 0.
 740  */
 741
 742 static unsigned char *
 743 locc(int cnt, unsigned char *src, unsigned char term)
 744 {
 745     while (*src++ != term && --cnt > 0);
 746
 747     return (cnt > 0 ? --src : (unsigned char *)0);
 748 }
 749