git.marmaro.de Git - mmh/blob - docs/contrib/replyfilter

   1 #!/usr/bin/perl
   2 #
   3 # replyfilter - A reply filter for nmh
   4 #
   5 # The idea behind this program is that it will act as a format filter
   6 # for nmh.  It will try to extract out all text/plain parts and format
   7 # them if necessary using a filter program.
   8 #
   9 # To use this program, configure nmh in the following way (nmh 1.5 or later):
  10 #
  11 # - Put the path to this program in your .mh_profile under formatproc:
  12 #
  13 #   formatproc: replyfilter
  14 #
  15 # - Create a mhl reply filter that consists of the following line:
  16 #
  17 #   body:nocomponent,format,nowrap,formatarg="%(trim{content-type})%(putstr)",formatarg="%(trim{content-transfer-encoding})%(putstr)",formatarg=">"
  18 #
  19 #   To decode this a bit:
  20 #
  21 #   body        - Output the "body" component
  22 #   nocomponent - Don't output a component prefix (normally here we use a
  23 #                 component prefix of ">" as a quote character, but we're
  24 #                 going to have replyfilter do that).
  25 #   nowrap      - Don't wrap lines if they exceed the column width
  26 #   formatarg   - Arguments to fmtproc.  The first argument is the value of
  27 #                 the Content-type header; the second is the value of the
  28 #                 Content-Transfer-Encoding header.  The last "formatarg"
  29 #                 is used as your quoting prefix.  Replace it with whatever
  30 #                 you want.
  31 #
  32
  33 use Mail::Field;
  34 use MIME::Head;
  35 use MIME::QuotedPrint;
  36 use MIME::Base64;
  37 use Encode;
  38
  39 #
  40 # The program we use to format "long" text
  41 #
  42
  43 $filterprogram = 'par';
  44
  45 #
  46 # Our output character set.  This script assumes a UTF-8 locale, but if you
  47 # want to run under a different locale the change it here.
  48 #
  49
  50 $outcharset = 'utf-8';
  51
  52 #
  53 # Maximum column width (used by the HTML converter and to decide if we need
  54 # to invoke the filter program
  55 #
  56
  57 $maxcolwidth = 78;
  58
  59 #
  60 # Out HTML converter program & arguments
  61 #
  62
  63 @htmlconv = ('w3m', '-dump', '-cols', $maxcolwidth - 2, '-T', 'text/html',
  64              '-O', $outcharset);
  65
  66
  67 die "Usage: $0 Content-type content-transfer-encoding quote-prefix\n"
  68                                 if $#ARGV != 2;
  69
  70 if ($ARGV[0] ne "") {
  71         my $ctype = Mail::Field->new('Content-Type', $ARGV[0]);
  72         $content_type =  $ctype->type;
  73         $charset = $ctype->charset;
  74         $boundary = $ctype->boundary;
  75 } else {
  76         $content_type = 'text/plain';
  77         $charset = 'us-ascii';
  78 }
  79
  80 $encoding = $ARGV[1] eq "" ? '7bit' : lc($ARGV[1]);
  81 $quoteprefix = $ARGV[2];
  82
  83 #
  84 # Set up our output to be in our character set
  85 #
  86
  87 binmode(STDOUT, ":encoding($outcharset)");
  88
  89 #
  90 # The simplest case: if we have a single type of text/plain, send it
  91 # to our format subroutine.
  92 #
  93
  94 if ($content_type eq 'text/plain') {
  95         process_text(\*STDIN, $encoding, $charset);
  96         exit 0;
  97 }
  98
  99 #
 100 # Alright, here's what we need to do.
 101 #
 102 # Find any text/plain parts and decode them.  Decode them via base64 or
 103 # quoted-printable, and feed them to our formatting filter when appropriate.
 104 # Put markers in the output for other content types.
 105 #
 106
 107 ($type) = (split('/', $content_type));
 108
 109 if ($type eq 'multipart') {
 110
 111         #
 112         # For multipart messages we have to do a little extra.
 113         # Eat the MIME prologue (everything up until the first boundary)
 114         #
 115
 116         if (! defined $boundary || $boundary eq '') {
 117                 print "No boundary in Content-Type header!\n";
 118                 eat_part(\*STDIN);
 119                 exit 1;
 120         }
 121
 122         while (<STDIN>) {
 123                 last if match_boundary($_, $boundary);
 124         }
 125
 126         if (eof(STDIN)) {
 127                 print "Unable to find boundary in message\n";
 128                 exit 1;
 129         }
 130 } else {
 131         undef $boundary;
 132 }
 133
 134 process_part(\*STDIN, $content_type, $encoding, $charset, $boundary);
 135
 136 if ($boundary) {
 137         #
 138         # Eat the MIME epilog
 139         #
 140         eat_part(\*STDIN);
 141 }
 142
 143 exit 0;
 144
 145 #
 146 # Handled encoded text.  I think we can assume if the encoding is q-p
 147 # or base64 to feed it into a formatting filter.
 148 #
 149
 150 sub process_text (*$$;$)
 151 {
 152         my ($input, $encoding, $charset, $boundary) = @_;
 153         my $text, $filterpid, $prefixpid, $finread, $finwrite;
 154         my $foutread, $foutwrite, $decoder, $ret, $filterflag;
 155         my @text = ( '' ), $maxline = 0;
 156
 157         #
 158         # In the simple case, just spit out the text prefixed by the
 159         # quote character
 160         #
 161
 162         if ($encoding eq '7bit' || $encoding eq '8bit') {
 163                 #
 164                 # Switch the character set to whatever is specified by
 165                 # the MIME message
 166                 #
 167                 binmode($input, ":encoding($charset)");
 168                 while (<$input>) {
 169                         $ret = match_boundary($_, $boundary);
 170                         if (defined $ret) {
 171                                 return $ret;
 172                         }
 173                         print $quoteprefix, $_;
 174                 }
 175                 return 'EOF';
 176         } else {
 177                 #
 178                 # If we've got some other encoding, the input text is almost
 179                 # certainly US-ASCII
 180                 #
 181
 182                 binmode($input, ":encoding(us-ascii)");
 183
 184                 $decoder = find_decoder($encoding);
 185                 if (! defined $decoder) {
 186                         return 'EOF';
 187                 }
 188         }
 189
 190         #
 191         # Okay, assume that the encoding will make it so that we MIGHT need
 192         # to filter it.  Read it in; if it's too long, filter it.
 193         #
 194
 195         my $chardecode = find_encoding($charset);
 196
 197         while (<$input>) {
 198                 my $line, $len;
 199
 200                 last if ($ret = match_boundary($_, $boundary));
 201
 202                 $line = $chardecode->decode(&$decoder($_));
 203
 204                 if (substr($text[$#text], -1, 1) eq "\n") {
 205                         push @text, $line;
 206                 } else {
 207                         $text[$#text] .= $line;
 208                 }
 209                 if (($len = length($text[$#text])) > $maxline) {
 210                         $maxline = $len;
 211                 }
 212         }
 213
 214         if (! defined $ret) {
 215                 $ret = 'EOF';
 216         }
 217
 218         if ($maxline <= $maxcolwidth) {
 219                 #
 220                 # These are short enough; just output it now as-is
 221                 #
 222                 print STDOUT @text;
 223                 return $ret;
 224         }
 225
 226         #
 227         # We fork a copy of ourselves to read the output from the filter
 228         # program and prefix the quote character.
 229         #
 230
 231         pipe($finread, $finwrite) || die "pipe() failed: $!\n";
 232         pipe($foutread, $foutwrite) || die "pipe() (second) failed: $!\n";
 233
 234         binmode($finread, ":encoding($outcharset)");
 235         binmode($finwrite, ":encoding($outcharset)");
 236         binmode($foutread, ":encoding($outcharset)");
 237         binmode($foutwrite, ":encoding($outcharset)");
 238
 239         if ($filterpid = fork) {
 240                 #
 241                 # Close the pipes in the parent that we're not using
 242                 #
 243
 244                 close($finread);
 245                 close($foutwrite);
 246         } elsif (defined $filterpid) {
 247                 #
 248                 # Close our ununsed filehandles
 249                 #
 250
 251                 close($finwrite);
 252                 close($foutread);
 253
 254                 #
 255                 # Dup() down the filehandles to standard input and output
 256                 #
 257
 258                 open(STDIN, "<&", $finread) ||
 259                                         die "dup(filterin) failed: $!\n";
 260                 open(STDOUT, ">&", $foutwrite) ||
 261                                         die "dup(filterout) failed: $!\n";
 262
 263                 #
 264                 # Close our copies.
 265                 #
 266
 267                 close($finread);
 268                 close($foutwrite);
 269
 270                 #
 271                 # Exec our filter
 272                 #
 273
 274                 exec $filterprogram ||
 275                                 die "Unable to exec $filterprogram: $!\n";
 276         } else {
 277                 die "Fork for $filterprogram failed: $!\n";
 278         }
 279
 280         #
 281         # Fork our output handler.
 282         #
 283
 284         if ($prefixpid = fork) {
 285                 #
 286                 # We don't need these anymore
 287                 #
 288                 close($foutread);
 289
 290         } elsif (defined $prefixpid) {
 291                 #
 292                 # Read from foutwrite, and output (with prefix) to stdout
 293                 #
 294
 295                 close($finwrite);
 296
 297                 while (<$foutread>) {
 298                         print STDOUT $quoteprefix, $_;
 299                 }
 300
 301                 exit 0;
 302         }
 303
 304         #
 305         # Send our input to the filter program
 306         #
 307
 308         print $finwrite @text;
 309
 310         close($finwrite);
 311         waitpid $filterpid, 0;
 312         warn "Filter process exited with ", ($? >> 8), "\n" if $?;
 313         waitpid $prefixpid, 0;
 314         warn "Pipe reader process exited with ", ($? >> 8), "\n" if $?;
 315
 316         return $ret;
 317 }
 318
 319 #
 320 # Filter HTML through a converter program
 321 #
 322
 323 sub process_html (*$$;$)
 324 {
 325         my ($input, $encoding, $charset, $boundary) = @_;
 326         my $filterpid, $prefixpid, $finread, $finwrite;
 327         my $foutread, $foutwrite, $decoder, $ret;
 328
 329         if (! defined($decoder = find_decoder($encoding))) {
 330                 return 'EOF';
 331         }
 332
 333         #
 334         # We fork a copy of ourselves to read the output from the filter
 335         # program and prefix the quote character.
 336         #
 337
 338         pipe($finread, $finwrite) || die "pipe() failed: $!\n";
 339         pipe($foutread, $foutwrite) || die "pipe() (second) failed: $!\n";
 340
 341         binmode($finread, ":encoding($outcharset)");
 342         binmode($finread, ":encoding($outcharset)");
 343         binmode($foutread, ":encoding($outcharset)");
 344         binmode($foutwrite, ":encoding($outcharset)");
 345
 346         if ($filterpid = fork) {
 347                 #
 348                 # Close the pipes in the parent that we're not using
 349                 #
 350
 351                 close($finread);
 352                 close($foutwrite);
 353         } elsif (defined $filterpid) {
 354                 #
 355                 # Close our ununsed filehandles
 356                 #
 357
 358                 close($finwrite);
 359                 close($foutread);
 360
 361                 #
 362                 # Dup() down the filehandles to standard input and output
 363                 #
 364
 365                 open(STDIN, "<&", $finread) ||
 366                                         die "dup(filterin) failed: $!\n";
 367                 open(STDOUT, ">&", $foutwrite) ||
 368                                         die "dup(filterout) failed: $!\n";
 369
 370                 #
 371                 # Close our copies.
 372                 #
 373
 374                 close($finread);
 375                 close($foutwrite);
 376
 377                 #
 378                 # Exec our converter
 379                 #
 380
 381                 exec (@htmlconv) ||
 382                                 die "Unable to exec $filterprogram: $!\n";
 383         } else {
 384                 die "Fork for $htmlconv[0] failed: $!\n";
 385         }
 386
 387         #
 388         # Fork our output handler.
 389         #
 390
 391         if ($prefixpid = fork) {
 392                 #
 393                 # We don't need these anymore
 394                 #
 395                 close($foutread);
 396
 397         } elsif (defined $prefixpid) {
 398                 #
 399                 # Read from foutwrite, and output (with prefix) to stdout
 400                 #
 401
 402                 close($finwrite);
 403
 404                 while (<$foutread>) {
 405                         print STDOUT $quoteprefix, $_;
 406                 }
 407
 408                 exit 0;
 409         }
 410
 411         #
 412         # Send our input to the filter program
 413         #
 414
 415         while (<$input>) {
 416                 last if ($ret = match_boundary($_, $boundary));
 417                 print $finwrite (&$decoder($_));
 418         }
 419
 420         if (! defined $ret) {
 421                 $ret = 'EOF';
 422         }
 423
 424         close($finwrite);
 425         waitpid $filterpid, 0;
 426         warn "HTML converter process exited with ", scalar($? >> 8), "\n" if $?;
 427         waitpid $prefixpid, 0;
 428         warn "Pipe reader process exited with ", $? >> 8, "\n" if $?;
 429
 430         return $ret;
 431 }
 432
 433 #
 434 # Decide what to do, based on what kind of content it is.
 435 #
 436
 437 sub process_part (*$$$$;$)
 438 {
 439         my ($input, $content_type, $encoding, $charset, $boundary, $name) = @_;
 440         my ($type, $subtype) = (split('/', $content_type, -1), '');
 441
 442         if ($type eq 'text') {
 443                 #
 444                 # If this is a text part, right now we only deal with
 445                 # plain and HTML parts.
 446                 #
 447                 if ($subtype eq 'plain') {
 448                         return process_text($input, $encoding, $charset,
 449                                             $boundary);
 450                 } elsif ($subtype eq 'html') {
 451                         return process_html($input, $encoding, $charset,
 452                                             $boundary);
 453                 } else {
 454                         print ">>> $content_type content\n";
 455                         return eat_part($input, $boundary);
 456                 }
 457         } elsif ($type eq 'multipart') {
 458                 return process_multipart($input, $subtype, $boundary);
 459         } else {
 460                 #
 461                 # Other types we're not sure what to do with right now
 462                 # Just put a marker in there
 463                 #
 464
 465                 print ">>> $content_type attachment";
 466                 if (defined $name) {
 467                         print ", name=$name";
 468                 }
 469                 print "\n";
 470
 471                 return eat_part($input, $boundary);
 472         }
 473 }
 474
 475 #
 476 # Process a multipart message.
 477 #
 478 # When called, we should be right after the beginning of the first
 479 # boundary marker.  So we should be pointed at header lines which describe
 480 # the content of this part
 481 #
 482
 483 sub process_multipart ($$$)
 484 {
 485         my ($input, $subtype, $boundary) = @_;
 486         my $altout;
 487
 488         while (1) {
 489                 my $encoding, $type, $end, $name, $charset;
 490
 491                 #
 492                 # Use the Mail::Header package to read in any headers
 493                 # corresponding to this part
 494                 #
 495
 496                 my $head = Mail::Header->new($input, (MailFrom => 'IGNORE'));
 497
 498                 #
 499                 # Extract out any Content-Type, Content-Transfer-Encoding, and
 500                 # Content-Disposition headers
 501                 #
 502
 503                 my $ctype = Mail::Field->extract('Content-Type', $head);
 504                 my $cte = Mail::Field->extract('Content-Transfer-Encoding',
 505                                                $head);
 506                 my $cdispo = Mail::Field->extract('Content-Disposition', $head);
 507
 508                 if (defined $ctype) {
 509                         $type = $ctype->type;
 510                         $charset = $ctype->charset;
 511                 } else {
 512                         $type = 'text/plain';
 513                         $charset = 'us-ascii';
 514                 }
 515
 516                 $encoding = defined $cte ? $cte->param('_') : '7bit';
 517                 $name = defined $cdispo ? $cdispo->param('filename') : undef;
 518
 519                 #
 520                 # Special handling for multipart/alternative; pick
 521                 # the "first" one we can handle (which is usually
 522                 # text/plain) and silently eat the rest, but output a
 523                 # warning if we can't handle anything.
 524                 #
 525
 526                 if ($altout) {
 527                         $end = eat_part($input, $boundary);
 528                 } else {
 529                         my $subboundary = $boundary;
 530                         my $maintype = (split('/', $type))[0];
 531
 532                         if ($maintype eq 'multipart') {
 533                                 $subboundary = $ctype->boundary;
 534                                 #
 535                                 # Go until we find our beginning of this
 536                                 # part
 537                                 #
 538                                 my $subend = eat_part($input, $subboundary);
 539
 540                                 if ($subend ne 'EOP') {
 541                                         print ">>> WARNING: malformed ",
 542                                                 "nested multipart\n";
 543                                         return $subend;
 544                                 }
 545                         }
 546
 547                         $end = process_part($input, $type, $encoding,
 548                                             $charset, $subboundary, $name);
 549
 550                         if ($subtype eq 'alternative' && ! defined $altout &&
 551                             $type eq 'text/plain') {
 552                                 $altout = 1;
 553                         }
 554
 555                         #
 556                         # Since we changed the semantics of $boundary
 557                         # above for nested multiparts, if we are
 558                         # handling a nested multipart then find the end
 559                         # of our current part
 560                         #
 561
 562                         if ($maintype eq 'multipart') {
 563                                 $end = eat_part($input, $boundary);
 564                         }
 565
 566                 }
 567
 568                 if ($end eq 'EOM' || $end eq 'EOF') {
 569                         if ($subtype eq 'alternative' && !defined $altout) {
 570                                 print ">>>multipart/alternative: no suitable ",
 571                                         "parts\n";
 572                         }
 573                         return $end;
 574                 }
 575         }
 576 }
 577
 578 #
 579 # "Eat" a MIME part; consume content until we hit the boundary or EOF
 580 #
 581
 582 sub eat_part ($$)
 583 {
 584         my ($input, $boundary) = @_;
 585         my $ret;
 586
 587         #
 588         # If we weren't given a boundary just eat input until EOF
 589         #
 590
 591         if (! defined $boundary) {
 592                 while (<$input>) { }
 593                 return 'EOF';
 594         }
 595
 596         #
 597         # Otherwise, consume data until we hit our boundary
 598         #
 599
 600         while (<$input>) {
 601                 if ($ret = match_boundary($_, $boundary)) {
 602                         return $ret;
 603                 }
 604         }
 605
 606         return 'EOF';
 607 }
 608
 609 #
 610 # Return the decoder subroutine to use
 611 #
 612
 613 sub find_decoder ($)
 614 {
 615         my ($encoding) = @_;
 616
 617         if ($encoding eq '7bit' || $encoding eq '8bit') {
 618                 return \&null_decoder;
 619         } elsif ($encoding eq 'base64') {
 620                 return \&decode_base64;
 621         } elsif ($encoding eq 'quoted-printable') {
 622                 return \&decode_qp;
 623         } else {
 624                 warn "Unknown encoding: $encoding\n";
 625                 return undef;
 626         }
 627 }
 628
 629 sub null_decoder ($)
 630 {
 631         my ($input) = @_;
 632
 633         return $input;
 634 }
 635
 636 #
 637 # Match a line against the boundary string
 638 #
 639
 640 sub match_boundary($$)
 641 {
 642         my ($_, $boundary) = @_;
 643
 644         if (substr($_, 0, 2) eq '--') {
 645                 s/[ \t\r\n]+\Z//;
 646                 if ($_ eq "--$boundary") {
 647                         return 'EOP';
 648                 } elsif ($_ eq "--$boundary--") {
 649                         return 'EOM';
 650                 }
 651         }
 652
 653         return undef;
 654 }