git.marmaro.de Git - mmh/blob - sbr/m_getfld.c

   1
   2 /*
   3  * m_getfld.c -- read/parse a message
   4  *
   5  * $Id$
   6  *
   7  * This code is Copyright (c) 2002, by the authors of nmh.  See the
   8  * COPYRIGHT file in the root directory of the nmh distribution for
   9  * complete copyright information.
  10  */
  11
  12 #include <h/mh.h>
  13 #include <h/mts.h>
  14
  15 /* This module has a long and checkered history.  First, it didn't burst
  16    maildrops correctly because it considered two CTRL-A:s in a row to be
  17    an inter-message delimiter.  It really is four CTRL-A:s followed by a
  18    newline.  Unfortunately, MMDF will convert this delimiter *inside* a
  19    message to a CTRL-B followed by three CTRL-A:s and a newline.  This
  20    caused the old version of m_getfld() to declare eom prematurely.  The
  21    fix was a lot slower than
  22
  23                 c == '\001' && peekc (iob) == '\001'
  24
  25    but it worked, and to increase generality, MBOX style maildrops could
  26    be parsed as well.  Unfortunately the speed issue finally caught up with
  27    us since this routine is at the very heart of MH.
  28
  29    To speed things up considerably, the routine Eom() was made an auxilary
  30    function called by the macro eom().  Unless we are bursting a maildrop,
  31    the eom() macro returns FALSE saying we aren't at the end of the
  32    message.
  33
  34    The next thing to do is to read the mts.conf file and initialize
  35    delimiter[] and delimlen accordingly...
  36
  37    After mhl was made a built-in in msh, m_getfld() worked just fine
  38    (using m_unknown() at startup).  Until one day: a message which was
  39    the result of a bursting was shown. Then, since the burst boundaries
  40    aren't CTRL-A:s, m_getfld() would blinding plunge on past the boundary.
  41    Very sad.  The solution: introduce m_eomsbr().  This hook gets called
  42    after the end of each line (since testing for eom involves an fseek()).
  43    This worked fine, until one day: a message with no body portion arrived.
  44    Then the
  45
  46                    while (eom (c = Getc (iob), iob))
  47                         continue;
  48
  49    loop caused m_getfld() to return FMTERR.  So, that logic was changed to
  50    check for (*eom_action) and act accordingly.
  51
  52    This worked fine, until one day: someone didn't use four CTRL:A's as
  53    their delimiters.  So, the bullet got bit and we read mts.h and
  54    continue to struggle on.  It's not that bad though, since the only time
  55    the code gets executed is when inc (or msh) calls it, and both of these
  56    have already called mts_init().
  57
  58    ------------------------
  59    (Written by Van Jacobson for the mh6 m_getfld, January, 1986):
  60
  61    This routine was accounting for 60% of the cpu time used by most mh
  62    programs.  I spent a bit of time tuning and it now accounts for <10%
  63    of the time used.  Like any heavily tuned routine, it's a bit
  64    complex and you want to be sure you understand everything that it's
  65    doing before you start hacking on it.  Let me try to emphasize
  66    that:  every line in this atrocity depends on every other line,
  67    sometimes in subtle ways.  You should understand it all, in detail,
  68    before trying to change any part.  If you do change it, test the
  69    result thoroughly (I use a hand-constructed test file that exercises
  70    all the ways a header name, header body, header continuation,
  71    header-body separator, body line and body eom can align themselves
  72    with respect to a buffer boundary).  "Minor" bugs in this routine
  73    result in garbaged or lost mail.
  74
  75    If you hack on this and slow it down, I, my children and my
  76    children's children will curse you.
  77
  78    This routine gets used on three different types of files: normal,
  79    single msg files, "packed" unix or mmdf mailboxs (when used by inc)
  80    and packed, directoried bulletin board files (when used by msh).
  81    The biggest impact of different file types is in "eom" testing.  The
  82    code has been carefully organized to test for eom at appropriate
  83    times and at no other times (since the check is quite expensive).
  84    I have tried to arrange things so that the eom check need only be
  85    done on entry to this routine.  Since an eom can only occur after a
  86    newline, this is easy to manage for header fields.  For the msg
  87    body, we try to efficiently search the input buffer to see if
  88    contains the eom delimiter.  If it does, we take up to the
  89    delimiter, otherwise we take everything in the buffer.  (The change
  90    to the body eom/copy processing produced the most noticeable
  91    performance difference, particularly for "inc" and "show".)
  92
  93    There are three qualitatively different things this routine busts
  94    out of a message: field names, field text and msg bodies.  Field
  95    names are typically short (~8 char) and the loop that extracts them
  96    might terminate on a colon, newline or max width.  I considered
  97    using a Vax "scanc" to locate the end of the field followed by a
  98    "bcopy" but the routine call overhead on a Vax is too large for this
  99    to work on short names.  If Berkeley ever makes "inline" part of the
 100    C optimiser (so things like "scanc" turn into inline instructions) a
 101    change here would be worthwhile.
 102
 103    Field text is typically 60 - 100 characters so there's (barely)
 104    a win in doing a routine call to something that does a "locc"
 105    followed by a "bmove".  About 30% of the fields have continuations
 106    (usually the 822 "received:" lines) and each continuation generates
 107    another routine call.  "Inline" would be a big win here, as well.
 108
 109    Messages, as of this writing, seem to come in two flavors: small
 110    (~1K) and long (>2K).  Most messages have 400 - 600 bytes of headers
 111    so message bodies average at least a few hundred characters.
 112    Assuming your system uses reasonably sized stdio buffers (1K or
 113    more), this routine should be able to remove the body in large
 114    (>500 byte) chunks.  The makes the cost of a call to "bcopy"
 115    small but there is a premium on checking for the eom in packed
 116    maildrops.  The eom pattern is always a simple string so we can
 117    construct an efficient pattern matcher for it (e.g., a Vax "matchc"
 118    instruction).  Some thought went into recognizing the start of
 119    an eom that has been split across two buffers.
 120
 121    This routine wants to deal with large chunks of data so, rather
 122    than "getc" into a local buffer, it uses stdio's buffer.  If
 123    you try to use it on a non-buffered file, you'll get what you
 124    deserve.  This routine "knows" that struct FILEs have a _ptr
 125    and a _cnt to describe the current state of the buffer and
 126    it knows that _filbuf ignores the _ptr & _cnt and simply fills
 127    the buffer.  If stdio on your system doesn't work this way, you
 128    may have to make small changes in this routine.
 129
 130    This routine also "knows" that an EOF indication on a stream is
 131    "sticky" (i.e., you will keep getting EOF until you reposition the
 132    stream).  If your system doesn't work this way it is broken and you
 133    should complain to the vendor.  As a consequence of the sticky
 134    EOF, this routine will never return any kind of EOF status when
 135    there is data in "name" or "buf").
 136   */
 137
 138
 139 /*
 140  * static prototypes
 141  */
 142 static int m_Eom (int, FILE *);
 143 static unsigned char *matchc(int, char *, int, char *);
 144 static unsigned char *locc(int, unsigned char *, unsigned char);
 145
 146 #define Getc(iob)       getc(iob)
 147 #define eom(c,iob)      (msg_style != MS_DEFAULT && \
 148                          (((c) == *msg_delim && m_Eom(c,iob)) ||\
 149                           (eom_action && (*eom_action)(c))))
 150
 151 static unsigned char **pat_map;
 152
 153 /*
 154  * defined in sbr/m_msgdef.c = 0
 155  * This is a disgusting hack for "inc" so it can know how many
 156  * characters were stuffed in the buffer on the last call
 157  * (see comments in uip/scansbr.c).
 158  */
 159 extern int msg_count;
 160
 161 /*
 162  * defined in sbr/m_msgdef.c = MS_DEFAULT
 163  */
 164 extern int msg_style;
 165
 166 /*
 167  * The "full" delimiter string for a packed maildrop consists
 168  * of a newline followed by the actual delimiter.  E.g., the
 169  * full string for a Unix maildrop would be: "\n\nFrom ".
 170  * "Fdelim" points to the start of the full string and is used
 171  * in the BODY case of the main routine to search the buffer for
 172  * a possible eom.  Msg_delim points to the first character of
 173  * the actual delim. string (i.e., fdelim+1).  Edelim
 174  * points to the 2nd character of actual delimiter string.  It
 175  * is used in m_Eom because the first character of the string
 176  * has been read and matched before m_Eom is called.
 177  */
 178 extern char *msg_delim;         /* defined in sbr/m_msgdef.c = "" */
 179 static unsigned char *fdelim;
 180 static unsigned char *delimend;
 181 static int fdelimlen;
 182 static unsigned char *edelim;
 183 static int edelimlen;
 184
 185 static int (*eom_action)() = NULL;
 186
 187 #ifdef _FSTDIO
 188 # define _ptr    _p             /* Gag   */
 189 # define _cnt    _r             /* Retch */
 190 # define _filbuf __srget        /* Puke  */
 191 # define DEFINED__FILBUF_TO_SOMETHING_SPECIFIC
 192 #endif
 193
 194 #ifdef SCO_5_STDIO
 195 # define _ptr  __ptr
 196 # define _cnt  __cnt
 197 # define _base __base
 198 # define _filbuf(fp)  ((fp)->__cnt = 0, __filbuf(fp))
 199 # define DEFINED__FILBUF_TO_SOMETHING_SPECIFIC
 200 #endif
 201
 202 #ifndef DEFINED__FILBUF_TO_SOMETHING_SPECIFIC
 203 extern int  _filbuf(FILE*);
 204 #endif
 205
 206
 207 int
 208 m_getfld (int state, unsigned char *name, unsigned char *buf,
 209           int bufsz, FILE *iob)
 210 {
 211     register unsigned char  *bp, *cp, *ep, *sp;
 212     register int cnt, c, i, j;
 213
 214     if ((c = Getc(iob)) < 0) {
 215         msg_count = 0;
 216         *buf = 0;
 217         return FILEEOF;
 218     }
 219     if (eom (c, iob)) {
 220         if (! eom_action) {
 221             /* flush null messages */
 222             while ((c = Getc(iob)) >= 0 && eom (c, iob))
 223                 ;
 224             if (c >= 0)
 225                 ungetc(c, iob);
 226         }
 227         msg_count = 0;
 228         *buf = 0;
 229         return FILEEOF;
 230     }
 231
 232     switch (state) {
 233         case FLDEOF:
 234         case BODYEOF:
 235         case FLD:
 236             if (c == '\n' || c == '-') {
 237                 /* we hit the header/body separator */
 238                 while (c != '\n' && (c = Getc(iob)) >= 0)
 239                     ;
 240
 241                 if (c < 0 || (c = Getc(iob)) < 0 || eom (c, iob)) {
 242                     if (! eom_action) {
 243                         /* flush null messages */
 244                         while ((c = Getc(iob)) >= 0 && eom (c, iob))
 245                             ;
 246                         if (c >= 0)
 247                             ungetc(c, iob);
 248                     }
 249                     msg_count = 0;
 250                     *buf = 0;
 251                     return FILEEOF;
 252                 }
 253                 state = BODY;
 254                 goto body;
 255             }
 256             /*
 257              * get the name of this component.  take characters up
 258              * to a ':', a newline or NAMESZ-1 characters, whichever
 259              * comes first.
 260              */
 261             cp = name;
 262             i = NAMESZ - 1;
 263             for (;;) {
 264 #ifdef LINUX_STDIO
 265                 bp = sp = (unsigned char *) iob->_IO_read_ptr - 1;
 266                 j = (cnt = ((long) iob->_IO_read_end -
 267                         (long) iob->_IO_read_ptr)  + 1) < i ? cnt : i;
 268 #else
 269                 bp = sp = (unsigned char *) iob->_ptr - 1;
 270                 j = (cnt = iob->_cnt+1) < i ? cnt : i;
 271 #endif
 272                 while (--j >= 0 && (c = *bp++) != ':' && c != '\n')
 273                     *cp++ = c;
 274
 275                 j = bp - sp;
 276                 if ((cnt -= j) <= 0) {
 277 #ifdef LINUX_STDIO
 278                     iob->_IO_read_ptr = iob->_IO_read_end;
 279                     if (__underflow(iob) == EOF) {
 280 #else
 281                     if (_filbuf(iob) == EOF) {
 282 #endif
 283                         *cp = *buf = 0;
 284                         advise (NULL, "eof encountered in field \"%s\"", name);
 285                         return FMTERR;
 286                     }
 287 #ifdef LINUX_STDIO
 288                 iob->_IO_read_ptr++; /* NOT automatic in __underflow()! */
 289 #endif
 290                 } else {
 291 #ifdef LINUX_STDIO
 292                     iob->_IO_read_ptr = bp + 1;
 293 #else
 294                     iob->_ptr = bp + 1;
 295                     iob->_cnt = cnt - 1;
 296 #endif
 297                 }
 298                 if (c == ':')
 299                     break;
 300
 301                 /*
 302                  * something went wrong.  possibilities are:
 303                  *  . hit a newline (error)
 304                  *  . got more than namesz chars. (error)
 305                  *  . hit the end of the buffer. (loop)
 306                  */
 307                 if (c == '\n') {
 308                     *cp = *buf = 0;
 309                     advise (NULL, "eol encountered in field \"%s\"", name);
 310                     state = FMTERR;
 311                     goto finish;
 312                 }
 313                 if ((i -= j) <= 0) {
 314                     *cp = *buf = 0;
 315                     advise (NULL, "field name \"%s\" exceeds %d bytes", name, NAMESZ - 1);
 316                     state = LENERR;
 317                     goto finish;
 318                 }
 319             }
 320
 321             while (isspace (*--cp) && cp >= name)
 322                 ;
 323             *++cp = 0;
 324             /* fall through */
 325
 326         case FLDPLUS:
 327             /*
 328              * get (more of) the text of a field.  take
 329              * characters up to the end of this field (newline
 330              * followed by non-blank) or bufsz-1 characters.
 331              */
 332             cp = buf; i = bufsz-1;
 333             for (;;) {
 334 #ifdef LINUX_STDIO
 335                 cnt = (long) iob->_IO_read_end - (long) iob->_IO_read_ptr;
 336                 bp = (unsigned char *) --iob->_IO_read_ptr;
 337 #else
 338                 cnt = iob->_cnt++;
 339                 bp = (unsigned char *) --iob->_ptr;
 340 #endif
 341                 c = cnt < i ? cnt : i;
 342                 while ((ep = locc( c, bp, '\n' ))) {
 343                     /*
 344                      * if we hit the end of this field, return.
 345                      */
 346                     if ((j = *++ep) != ' ' && j != '\t') {
 347 #ifdef LINUX_STDIO
 348                         j = ep - (unsigned char *) iob->_IO_read_ptr;
 349                         memcpy (cp, iob->_IO_read_ptr, j);
 350                         iob->_IO_read_ptr = ep;
 351 #else
 352                         j = ep - (unsigned char *) iob->_ptr;
 353                         memcpy (cp, iob->_ptr, j);
 354                         iob->_ptr = ep;
 355                         iob->_cnt -= j;
 356 #endif
 357                         cp += j;
 358                         state = FLD;
 359                         goto finish;
 360                     }
 361                     c -= ep - bp;
 362                     bp = ep;
 363                 }
 364                 /*
 365                  * end of input or dest buffer - copy what we've found.
 366                  */
 367 #ifdef LINUX_STDIO
 368                 c += bp - (unsigned char *) iob->_IO_read_ptr;
 369                 memcpy( cp, iob->_IO_read_ptr, c);
 370 #else
 371                 c += bp - (unsigned char *) iob->_ptr;
 372                 memcpy( cp, iob->_ptr, c);
 373 #endif
 374                 i -= c;
 375                 cp += c;
 376                 if (i <= 0) {
 377                     /* the dest buffer is full */
 378 #ifdef LINUX_STDIO
 379                     iob->_IO_read_ptr += c;
 380 #else
 381                     iob->_cnt -= c;
 382                     iob->_ptr += c;
 383 #endif
 384                     state = FLDPLUS;
 385                     break;
 386                 }
 387                 /*
 388                  * There's one character left in the input buffer.
 389                  * Copy it & fill the buffer.  If the last char
 390                  * was a newline and the next char is not whitespace,
 391                  * this is the end of the field.  Otherwise loop.
 392                  */
 393                 --i;
 394 #ifdef LINUX_STDIO
 395                 *cp++ = j = *(iob->_IO_read_ptr + c);
 396                 iob->_IO_read_ptr = iob->_IO_read_end;
 397                 c = __underflow(iob);
 398                 iob->_IO_read_ptr++;    /* NOT automatic! */
 399 #else
 400                 *cp++ = j = *(iob->_ptr + c);
 401                 c = _filbuf(iob);
 402 #endif
 403                 if (c == EOF ||
 404                   ((j == '\0' || j == '\n') && c != ' ' && c != '\t')) {
 405                     if (c != EOF) {
 406 #ifdef LINUX_STDIO
 407                         --iob->_IO_read_ptr;
 408 #else
 409                         --iob->_ptr;
 410                         ++iob->_cnt;
 411 #endif
 412                     }
 413                     state = FLD;
 414                     break;
 415                 }
 416             }
 417             break;
 418
 419         case BODY:
 420         body:
 421             /*
 422              * get the message body up to bufsz characters or the
 423              * end of the message.  Sleazy hack: if bufsz is negative
 424              * we assume that we were called to copy directly into
 425              * the output buffer and we don't add an eos.
 426              */
 427             i = (bufsz < 0) ? -bufsz : bufsz-1;
 428 #ifdef LINUX_STDIO
 429             bp = (unsigned char *) --iob->_IO_read_ptr;
 430             cnt = (long) iob->_IO_read_end - (long) iob->_IO_read_ptr;
 431 #else
 432             bp = (unsigned char *) --iob->_ptr;
 433             cnt = ++iob->_cnt;
 434 #endif
 435             c = (cnt < i ? cnt : i);
 436             if (msg_style != MS_DEFAULT && c > 1) {
 437                 /*
 438                  * packed maildrop - only take up to the (possible)
 439                  * start of the next message.  This "matchc" should
 440                  * probably be a Boyer-Moore matcher for non-vaxen,
 441                  * particularly since we have the alignment table
 442                  * all built for the end-of-buffer test (next).
 443                  * But our vax timings indicate that the "matchc"
 444                  * instruction is 50% faster than a carefully coded
 445                  * B.M. matcher for most strings.  (So much for elegant
 446                  * algorithms vs. brute force.)  Since I (currently)
 447                  * run MH on a vax, we use the matchc instruction. --vj
 448                  */
 449                 if ((ep = matchc( fdelimlen, fdelim, c, bp )))
 450                     c = ep - bp + 1;
 451                 else {
 452                     /*
 453                      * There's no delim in the buffer but there may be
 454                      * a partial one at the end.  If so, we want to leave
 455                      * it so the "eom" check on the next call picks it up.
 456                      * Use a modified Boyer-Moore matcher to make this
 457                      * check relatively cheap.  The first "if" figures
 458                      * out what position in the pattern matches the last
 459                      * character in the buffer.  The inner "while" matches
 460                      * the pattern against the buffer, backwards starting
 461                      * at that position.  Note that unless the buffer
 462                      * ends with one of the characters in the pattern
 463                      * (excluding the first and last), we do only one test.
 464                      */
 465                     ep = bp + c - 1;
 466                     if ((sp = pat_map[*ep])) {
 467                         do {
 468                             cp = sp;
 469                             while (*--ep == *--cp)
 470                             ;
 471                             if (cp < fdelim) {
 472                                 if (ep >= bp)
 473                                     /*
 474                                      * ep < bp means that all the buffer
 475                                      * contains is a prefix of delim.
 476                                      * If this prefix is really a delim, the
 477                                      * m_eom call at entry should have found
 478                                      * it.  Thus it's not a delim and we can
 479                                      * take all of it.
 480                                      */
 481                                     c = (ep - bp) + 2;
 482                             break;
 483                         }
 484                             /* try matching one less char of delim string */
 485                             ep = bp + c - 1;
 486                         } while (--sp > fdelim);
 487                     }
 488                 }
 489             }
 490             memcpy( buf, bp, c );
 491 #ifdef LINUX_STDIO
 492             iob->_IO_read_ptr += c;
 493 #else
 494             iob->_cnt -= c;
 495             iob->_ptr += c;
 496 #endif
 497             if (bufsz < 0) {
 498                 msg_count = c;
 499                 return (state);
 500             }
 501             cp = buf + c;
 502             break;
 503
 504         default:
 505             adios (NULL, "m_getfld() called with bogus state of %d", state);
 506     }
 507 finish:
 508     *cp = 0;
 509     msg_count = cp - buf;
 510     return (state);
 511 }
 512
 513
 514 #ifdef RPATHS
 515 static char unixbuf[BUFSIZ] = "";
 516 #endif /* RPATHS */
 517
 518 void
 519 m_unknown(FILE *iob)
 520 {
 521     register int c;
 522     register long pos;
 523     char text[10];
 524     register char *cp;
 525     register char *delimstr;
 526
 527 /*
 528  * Figure out what the message delimitter string is for this
 529  * maildrop.  (This used to be part of m_Eom but I didn't like
 530  * the idea of an "if" statement that could only succeed on the
 531  * first call to m_Eom getting executed on each call, i.e., at
 532  * every newline in the message).
 533  *
 534  * If the first line of the maildrop is a Unix "From " line, we
 535  * say the style is MBOX and eat the rest of the line.  Otherwise
 536  * we say the style is MMDF and look for the delimiter string
 537  * specified when nmh was built (or from the mts.conf file).
 538  */
 539
 540     msg_style = MS_UNKNOWN;
 541
 542     pos = ftell (iob);
 543     if (fread (text, sizeof(*text), 5, iob) == 5
 544             && strncmp (text, "From ", 5) == 0) {
 545         msg_style = MS_MBOX;
 546         delimstr = "\nFrom ";
 547 #ifndef RPATHS
 548         while ((c = getc (iob)) != '\n' && c >= 0)
 549             ;
 550 #else /* RPATHS */
 551         cp = unixbuf;
 552         while ((c = getc (iob)) != '\n' && cp - unixbuf < BUFSIZ - 1)
 553             *cp++ = c;
 554         *cp = 0;
 555 #endif /* RPATHS */
 556     } else {
 557         /* not a Unix style maildrop */
 558         fseek (iob, pos, SEEK_SET);
 559         if (mmdlm2 == NULL || *mmdlm2 == 0)
 560             mmdlm2 = "\001\001\001\001\n";
 561         delimstr = mmdlm2;
 562         msg_style = MS_MMDF;
 563     }
 564     c = strlen (delimstr);
 565     fdelim = (unsigned char *) malloc((size_t) (c + 3));
 566     *fdelim++ = '\0';
 567     *fdelim = '\n';
 568     msg_delim = (char *)fdelim+1;
 569     edelim = (unsigned char *)msg_delim+1;
 570     fdelimlen = c + 1;
 571     edelimlen = c - 1;
 572     strcpy (msg_delim, delimstr);
 573     delimend = (unsigned char *)msg_delim + edelimlen;
 574     if (edelimlen <= 1)
 575         adios (NULL, "maildrop delimiter must be at least 2 bytes");
 576     /*
 577      * build a Boyer-Moore end-position map for the matcher in m_getfld.
 578      * N.B. - we don't match just the first char (since it's the newline
 579      * separator) or the last char (since the matchc would have found it
 580      * if it was a real delim).
 581      */
 582     pat_map = (unsigned char **) calloc (256, sizeof(unsigned char *));
 583
 584     for (cp = (char *) fdelim + 1; cp < (char *) delimend; cp++ )
 585         pat_map[(unsigned char)*cp] = (unsigned char *) cp;
 586
 587     if (msg_style == MS_MMDF) {
 588         /* flush extra msg hdrs */
 589         while ((c = Getc(iob)) >= 0 && eom (c, iob))
 590             ;
 591         if (c >= 0)
 592             ungetc(c, iob);
 593     }
 594 }
 595
 596
 597 void
 598 m_eomsbr (int (*action)())
 599 {
 600     if ((eom_action = action)) {
 601         msg_style = MS_MSH;
 602         *msg_delim = 0;
 603         fdelimlen = 1;
 604         delimend = fdelim;
 605     } else {
 606         msg_style = MS_MMDF;
 607         msg_delim = (char *)fdelim + 1;
 608         fdelimlen = strlen((char *)fdelim);
 609         delimend = (unsigned char *)(msg_delim + edelimlen);
 610     }
 611 }
 612
 613
 614 /*
 615  * test for msg delimiter string
 616  */
 617
 618 static int
 619 m_Eom (int c, FILE *iob)
 620 {
 621     register long pos = 0L;
 622     register int i;
 623     char text[10];
 624 #ifdef RPATHS
 625     register char *cp;
 626 #endif /* RPATHS */
 627
 628     pos = ftell (iob);
 629     if ((i = fread (text, sizeof *text, edelimlen, iob)) != edelimlen
 630             || strncmp (text, (char *)edelim, edelimlen)) {
 631         if (i == 0 && msg_style == MS_MBOX)
 632             /* the final newline in the (brain damaged) unix-format
 633              * maildrop is part of the delimitter - delete it.
 634              */
 635             return 1;
 636
 637 #if 0
 638         fseek (iob, pos, SEEK_SET);
 639 #endif
 640
 641         fseek (iob, (long)(pos-1), SEEK_SET);
 642         getc (iob);             /* should be OK */
 643         return 0;
 644     }
 645
 646     if (msg_style == MS_MBOX) {
 647 #ifndef RPATHS
 648         while ((c = getc (iob)) != '\n')
 649             if (c < 0)
 650                 break;
 651 #else /* RPATHS */
 652         cp = unixbuf;
 653         while ((c = getc (iob)) != '\n' && c >= 0 && cp - unixbuf < BUFSIZ - 1)
 654             *cp++ = c;
 655         *cp = 0;
 656 #endif /* RPATHS */
 657     }
 658
 659     return 1;
 660 }
 661
 662
 663 #ifdef RPATHS
 664 /*
 665  * Return the Return-Path and Delivery-Date
 666  * header information.
 667  *
 668  * Currently, I'm assuming that the "From " line
 669  * takes one of the following forms.
 670  *
 671  * From sender date remote from host   (for UUCP delivery)
 672  * From sender@host  date              (for sendmail delivery)
 673  */
 674
 675 int
 676 get_returnpath (char *rp, int rplen, char *dd, int ddlen)
 677 {
 678     char *ap, *bp, *cp, *dp;
 679
 680     ap = unixbuf;
 681     if (!(bp = cp = strchr(ap, ' ')))
 682         return 0;
 683
 684     /*
 685      * Check for "remote from" in envelope to see
 686      * if this message uses UUCP style addressing
 687      */
 688     while ((cp = strchr(++cp, 'r'))) {
 689         if (strncmp (cp, "remote from", 11) == 0) {
 690             cp = strrchr (cp, ' ');
 691             break;
 692         }
 693     }
 694
 695     /*
 696      * Get the Return-Path information from
 697      * the "From " envelope.
 698      */
 699     if (cp) {
 700         /* return path for UUCP style addressing */
 701         dp = strchr (++cp, '\n');
 702         snprintf (rp, rplen, "%.*s!%.*s\n", dp - cp, cp, bp - ap, ap);
 703     } else {
 704         /* return path for standard domain addressing */
 705         snprintf (rp, rplen, "%.*s\n", bp - ap, ap);
 706     }
 707
 708     /*
 709      * advance over the spaces to get to
 710      * delivery date on envelope
 711      */
 712     while (*bp == ' ')
 713         bp++;
 714
 715     /* Now get delivery date from envelope */
 716     snprintf (dd, ddlen, "%.*s\n", 24, bp);
 717
 718     unixbuf[0] = 0;
 719     return 1;
 720 }
 721 #endif /* RPATHS */
 722
 723
 724 static unsigned char *
 725 matchc(int patln, char *pat, int strln, char *str)
 726 {
 727         register char *es = str + strln - patln;
 728         register char *sp;
 729         register char *pp;
 730         register char *ep = pat + patln;
 731         register char pc = *pat++;
 732
 733         for(;;) {
 734                 while (pc != *str++)
 735                         if (str > es)
 736                                 return 0;
 737                 if (str > es+1)
 738                         return 0;
 739                 sp = str; pp = pat;
 740                 while (pp < ep && *sp++ == *pp)
 741                         pp++;
 742                 if (pp >= ep)
 743                         return ((unsigned char *)--str);
 744         }
 745 }
 746
 747
 748 /*
 749  * Locate character "term" in the next "cnt" characters of "src".
 750  * If found, return its address, otherwise return 0.
 751  */
 752
 753 static unsigned char *
 754 locc(int cnt, unsigned char *src, unsigned char term)
 755 {
 756     while (*src++ != term && --cnt > 0);
 757
 758     return (cnt > 0 ? --src : (unsigned char *)0);
 759 }
 760