git.marmaro.de Git - mmh/blob - docs/replyfilter

   1 #!/usr/bin/perl
   2 #
   3 # replyfilter - A reply filter for nmh
   4 #
   5 # The idea behind this program is that it will act as a format filter
   6 # for nmh.  It will try to extract out all text/plain parts and format
   7 # them if necessary using a filter program.
   8 #
   9 # To use this program, configure nmh in the following way (nmh 1.5 or later):
  10 #
  11 # - Put the path to this program in your .mh_profile under formatproc:
  12 #
  13 #   formatproc: replyfilter
  14 #
  15 # - Create a mhl reply filter that consists of the following line:
  16 #
  17 #   body:nocomponent,format,nowrap,formatarg="%(trim{content-type})%(putstr)",formatarg="%(trim{content-transfer-encoding})%(putstr)",formatarg=">"
  18 #
  19 #   To decode this a bit:
  20 #
  21 #   body        - Output the "body" component
  22 #   nocomponent - Don't output a component prefix (normally here we use a
  23 #                 component prefix of ">" as a quote character, but we're
  24 #                 going to have replyfilter do that).
  25 #   nowrap      - Don't wrap lines if they exceed the column width
  26 #   formatarg   - Arguments to fmtproc.  The first argument is the value of
  27 #                 the Content-type header; the second is the value of the
  28 #                 Content-Transfer-Encoding header.  The last "formatarg"
  29 #                 is used as your quoting prefix.  Replace it with whatever
  30 #                 you want.
  31 #
  32
  33 use Mail::Field;
  34 use MIME::Head;
  35 use MIME::QuotedPrint;
  36 use MIME::Base64;
  37 use Encode;
  38
  39 #
  40 # The program we use to format "long" text
  41 #
  42
  43 $filterprogram = 'par';
  44
  45 #
  46 # Our output character set.  This script assumes a UTF-8 locale, but if you
  47 # want to run under a different locale the change it here.
  48 #
  49
  50 $outcharset = 'utf-8';
  51
  52 #
  53 # Maximum column width (used by the HTML converter and to decide if we need
  54 # to invoke the filter program
  55 #
  56
  57 $maxcolwidth = 78;
  58
  59 #
  60 # Out HTML converter program & arguments
  61 #
  62
  63 @htmlconv = ('w3m', '-dump', '-cols', $maxcolwidth - 2, '-T', 'text/html',
  64              '-O', $outcharset);
  65
  66
  67 die "Usage: $0 Content-type content-transfer-encoding quote-prefix\n"
  68                                 if $#ARGV != 2;
  69
  70 if ($ARGV[0] ne "") {
  71         my $ctype = Mail::Field->new('Content-Type', $ARGV[0]);
  72         $content_type =  $ctype->type;
  73         $charset = $ctype->charset;
  74         $boundary = $ctype->boundary;
  75 } else {
  76         $content_type = 'text/plain';
  77         $charset = 'us-ascii';
  78 }
  79
  80 $encoding = $ARGV[1] eq "" ? '7bit' : lc($ARGV[1]);
  81 $quoteprefix = $ARGV[2];
  82
  83 #
  84 # Set up our output to be in our character set
  85 #
  86
  87 binmode(STDOUT, ":encoding($outcharset)");
  88
  89 #
  90 # The simplest case: if we have a single type of text/plain, send it
  91 # to our format subroutine.
  92 #
  93
  94 if ($content_type eq 'text/plain') {
  95         process_text(\*STDIN, $encoding, $charset);
  96         exit 0;
  97 }
  98
  99 #
 100 # Alright, here's what we need to do.
 101 #
 102 # Find any text/plain parts and decode them.  Decode them via base64 or
 103 # quoted-printable, and feed them to our formatting filter when appropriate.
 104 # Put markers in the output for other content types.
 105 #
 106
 107 ($type) = (split('/', $content_type));
 108
 109 if ($type eq 'multipart') {
 110
 111         #
 112         # For multipart messages we have to do a little extra.
 113         # Eat the MIME prologue (everything up until the first boundary)
 114         #
 115
 116         if (! defined $boundary || $boundary eq '') {
 117                 print "No boundary in Content-Type header!\n";
 118                 eat_part(\*STDIN);
 119                 exit 1;
 120         }
 121
 122         while (<STDIN>) {
 123                 last if match_boundary($_, $boundary);
 124         }
 125
 126         if (eof(STDIN)) {
 127                 print "Unable to find boundary in message\n";
 128                 exit 1;
 129         }
 130 } else {
 131         undef $boundary;
 132 }
 133
 134 process_part(\*STDIN, $content_type, $encoding, $charset, $boundary);
 135
 136 if ($boundary) {
 137         #
 138         # Eat the MIME eplilog
 139         #
 140         eat_part(\*STDIN);
 141 }
 142
 143 exit 0;
 144
 145 #
 146 # Handled encoded text.  I think we can assume if the encoding is q-p
 147 # or base64 to feed it into a formatting filter.
 148 #
 149
 150 sub process_text (*$$;$)
 151 {
 152         my ($input, $encoding, $charset, $boundary) = @_;
 153         my $text, $filterpid, $prefixpid, $finread, $finwrite;
 154         my $foutread, $foutwrite, $decoder, $ret, $filterflag;
 155         my @text = ( '' ), $maxline = 0;
 156
 157         #
 158         # In the simple case, just spit out the text prefixed by the
 159         # quote character
 160         #
 161
 162         if ($encoding eq '7bit' || $encoding ne '8bit') {
 163                 while (<$input>) {
 164                         $ret = match_boundary($_, $boundary);
 165                         if (defined $ret) {
 166                                 return $ret;
 167                         }
 168                         print $quoteprefix, $_;
 169                 }
 170                 return 'EOF';
 171         } else {
 172                 $decoder = find_decoder($encoding);
 173                 if (! defined $decoder) {
 174                         return 'EOF';
 175                 }
 176         }
 177
 178         #
 179         # Okay, assume that the encoding will make it so that we MIGHT need
 180         # to filter it.  Read it in; if it's too long, filter it.
 181         #
 182
 183         while (<$input>) {
 184                 my $line, $len;
 185
 186                 last if ($ret = match_boundary($_, $boundary));
 187
 188                 $line = decode($charset, &$decoder($_));
 189
 190                 if (substr($text[$#text], -1, 1) eq "\n") {
 191                         push @text, $line;
 192                 } else {
 193                         $text[$#text] .= $line;
 194                 }
 195                 if (($len = length($text[$#text])) > $maxline) {
 196                         $maxline = $len;
 197                 }
 198         }
 199
 200         if (! defined $ret) {
 201                 $ret = 'EOF';
 202         }
 203
 204         if ($maxline <= $maxcolwidth) {
 205                 #
 206                 # These are short enough; just output it now as-is
 207                 #
 208                 print STDOUT @text;
 209                 return $ret;
 210         }
 211
 212         #
 213         # We fork a copy of ourselves to read the output from the filter
 214         # program and prefix the quote character.
 215         #
 216
 217         pipe($finread, $finwrite) || die "pipe() failed: $!\n";
 218         pipe($foutread, $foutwrite) || die "pipe() (second) failed: $!\n";
 219
 220         binmode($finread, ":encoding($outcharset)");
 221         binmode($finread, ":encoding($outcharset)");
 222         binmode($foutread, ":encoding($outcharset)");
 223         binmode($foutwrite, ":encoding($outcharset)");
 224
 225         if ($filterpid = fork) {
 226                 #
 227                 # Close the pipes in the parent that we're not using
 228                 #
 229
 230                 close($finread);
 231                 close($foutwrite);
 232         } elsif (defined $filterpid) {
 233                 #
 234                 # Close our ununsed filehandles
 235                 #
 236
 237                 close($finwrite);
 238                 close($foutread);
 239
 240                 #
 241                 # Dup() down the filehandles to standard input and output
 242                 #
 243
 244                 open(STDIN, "<&", $finread) ||
 245                                         die "dup(filterin) failed: $!\n";
 246                 open(STDOUT, ">&", $foutwrite) ||
 247                                         die "dup(filterout) failed: $!\n";
 248
 249                 #
 250                 # Close our copies.
 251                 #
 252
 253                 close($finread);
 254                 close($foutwrite);
 255
 256                 #
 257                 # Exec our filter
 258                 #
 259
 260                 exec $filterprogram ||
 261                                 die "Unable to exec $filterprogram: $!\n";
 262         } else {
 263                 die "Fork for $filterprogram failed: $!\n";
 264         }
 265
 266         #
 267         # Fork our output handler.
 268         #
 269
 270         if ($prefixpid = fork) {
 271                 #
 272                 # We don't need these anymore
 273                 #
 274                 close($foutread);
 275
 276         } elsif (defined $prefixpid) {
 277                 #
 278                 # Read from foutwrite, and output (with prefix) to stdout
 279                 #
 280
 281                 close($finwrite);
 282
 283                 while (<$foutread>) {
 284                         print STDOUT $quoteprefix, $_;
 285                 }
 286
 287                 exit 0;
 288         }
 289
 290         #
 291         # Send our input to the filter program
 292         #
 293
 294         print $finwrite @text;
 295
 296         close($finwrite);
 297         waitpid $filterpid, 0;
 298         warn "Filter process exited with ", ($? >> 8), "\n" if $?;
 299         waitpid $prefixpid, 0;
 300         warn "Pipe reader process exited with ", ($? >> 8), "\n" if $?;
 301
 302         return $ret;
 303 }
 304
 305 #
 306 # Filter HTML through a converter program
 307 #
 308
 309 sub process_html (*$$;$)
 310 {
 311         my ($input, $encoding, $charset, $boundary) = @_;
 312         my $filterpid, $prefixpid, $finread, $finwrite;
 313         my $foutread, $foutwrite, $decoder, $ret;
 314
 315         if (! defined($decoder = find_decoder($encoding))) {
 316                 return 'EOF';
 317         }
 318
 319         #
 320         # We fork a copy of ourselves to read the output from the filter
 321         # program and prefix the quote character.
 322         #
 323
 324         pipe($finread, $finwrite) || die "pipe() failed: $!\n";
 325         pipe($foutread, $foutwrite) || die "pipe() (second) failed: $!\n";
 326
 327         binmode($finread, ":encoding($outcharset)");
 328         binmode($finread, ":encoding($outcharset)");
 329         binmode($foutread, ":encoding($outcharset)");
 330         binmode($foutwrite, ":encoding($outcharset)");
 331
 332         if ($filterpid = fork) {
 333                 #
 334                 # Close the pipes in the parent that we're not using
 335                 #
 336
 337                 close($finread);
 338                 close($foutwrite);
 339         } elsif (defined $filterpid) {
 340                 #
 341                 # Close our ununsed filehandles
 342                 #
 343
 344                 close($finwrite);
 345                 close($foutread);
 346
 347                 #
 348                 # Dup() down the filehandles to standard input and output
 349                 #
 350
 351                 open(STDIN, "<&", $finread) ||
 352                                         die "dup(filterin) failed: $!\n";
 353                 open(STDOUT, ">&", $foutwrite) ||
 354                                         die "dup(filterout) failed: $!\n";
 355
 356                 #
 357                 # Close our copies.
 358                 #
 359
 360                 close($finread);
 361                 close($foutwrite);
 362
 363                 #
 364                 # Exec our converter
 365                 #
 366
 367                 exec (@htmlconv) ||
 368                                 die "Unable to exec $filterprogram: $!\n";
 369         } else {
 370                 die "Fork for $htmlconv[0] failed: $!\n";
 371         }
 372
 373         #
 374         # Fork our output handler.
 375         #
 376
 377         if ($prefixpid = fork) {
 378                 #
 379                 # We don't need these anymore
 380                 #
 381                 close($foutread);
 382
 383         } elsif (defined $prefixpid) {
 384                 #
 385                 # Read from foutwrite, and output (with prefix) to stdout
 386                 #
 387
 388                 close($finwrite);
 389
 390                 while (<$foutread>) {
 391                         print STDOUT $quoteprefix, $_;
 392                 }
 393
 394                 exit 0;
 395         }
 396
 397         #
 398         # Send our input to the filter program
 399         #
 400
 401         while (<$input>) {
 402                 last if ($ret = match_boundary($_, $boundary));
 403                 print $finwrite (&$decoder($_));
 404         }
 405
 406         if (! defined $ret) {
 407                 $ret = 'EOF';
 408         }
 409
 410         close($finwrite);
 411         waitpid $filterpid, 0;
 412         warn "HTML converter process exited with ", scalar($? >> 8), "\n" if $?;
 413         waitpid $prefixpid, 0;
 414         warn "Pipe reader process exited with ", $? >> 8, "\n" if $?;
 415
 416         return $ret;
 417 }
 418
 419 #
 420 # Decide what to do, based on what kind of content it is.
 421 #
 422
 423 sub process_part (*$$$$;$)
 424 {
 425         my ($input, $content_type, $encoding, $charset, $boundary, $name) = @_;
 426         my ($type, $subtype) = (split('/', $content_type, -1), '');
 427
 428         if ($type eq 'text') {
 429                 #
 430                 # If this is a text part, right now we only deal with
 431                 # plain and HTML parts.
 432                 #
 433                 if ($subtype eq 'plain') {
 434                         return process_text($input, $encoding, $charset,
 435                                             $boundary);
 436                 } elsif ($subtype eq 'html') {
 437                         return process_html($input, $encoding, $charset,
 438                                             $boundary);
 439                 } else {
 440                         print ">>> $content_type content\n";
 441                         return eat_part($input, $boundary);
 442                 }
 443         } elsif ($type eq 'multipart') {
 444                 return process_multipart($input, $subtype, $boundary);
 445         } else {
 446                 #
 447                 # Other types we're not sure what to do with right now
 448                 # Just put a marker in there
 449                 #
 450
 451                 print ">>> $content_type attachment";
 452                 if (defined $name) {
 453                         print ", name=$name";
 454                 }
 455                 print "\n";
 456
 457                 return eat_part($input, $boundary);
 458         }
 459 }
 460
 461 #
 462 # Process a multipart message.
 463 #
 464 # When called, we should be right after the beginning of the first
 465 # boundary marker.  So we should be pointed at header lines which describe
 466 # the content of this part
 467 #
 468
 469 sub process_multipart ($$$)
 470 {
 471         my ($input, $subtype, $boundary) = @_;
 472         my $altout;
 473
 474         while (1) {
 475                 my $encoding, $type, $end, $name, $charset;
 476
 477                 #
 478                 # Use the Mail::Header package to read in any headers
 479                 # corresponding to this part
 480                 #
 481
 482                 my $head = Mail::Header->new($input, (MailFrom => 'IGNORE'));
 483
 484                 #
 485                 # Extract out any Content-Type, Content-Transfer-Encoding, and
 486                 # Content-Disposition headers
 487                 #
 488
 489                 my $ctype = Mail::Field->extract('Content-Type', $head);
 490                 my $cte = Mail::Field->extract('Content-Transfer-Encoding',
 491                                                $head);
 492                 my $cdispo = Mail::Field->extract('Content-Disposition', $head);
 493
 494                 if (defined $ctype) {
 495                         $type = $ctype->type;
 496                         $charset = $ctype->charset;
 497                 } else {
 498                         $type = 'text/plain';
 499                         $charset = 'us-ascii';
 500                 }
 501
 502                 $encoding = defined $cte ? $cte->param('_') : '7bit';
 503                 $name = defined $cdispo ? $cdispo->param('filename') : undef;
 504
 505                 #
 506                 # Special handling for multipart/alternative; pick
 507                 # the "first" one we can handle (which is usually
 508                 # text/plain) and silently eat the rest, but output a
 509                 # warning if we can't handle anything.
 510                 #
 511
 512                 if ($altout) {
 513                         $end = eat_part($input, $boundary);
 514                 } else {
 515                         my $subboundary = $boundary;
 516                         my $maintype = (split('/', $type))[0];
 517
 518                         if ($maintype eq 'multipart') {
 519                                 $subboundary = $ctype->boundary;
 520                                 #
 521                                 # Go until we find our beginning of this
 522                                 # part
 523                                 #
 524                                 my $subend = eat_part($input, $subboundary);
 525
 526                                 if ($subend ne 'EOP') {
 527                                         print ">>> WARNING: malformed ",
 528                                                 "nested multipart\n";
 529                                         return $subend;
 530                                 }
 531                         }
 532
 533                         $end = process_part($input, $type, $encoding,
 534                                             $charset, $subboundary, $name);
 535
 536                         if ($subtype eq 'alternative' && ! defined $altout &&
 537                             $type eq 'text/plain') {
 538                                 $altout = 1;
 539                         }
 540
 541                         #
 542                         # Since we changed the semantics of $boundary
 543                         # above for nested multiparts, if we are
 544                         # handling a nested multipart then find the end
 545                         # of our current part
 546                         #
 547
 548                         if ($maintype eq 'multipart') {
 549                                 $end = eat_part($input, $boundary);
 550                         }
 551
 552                 }
 553
 554                 if ($end eq 'EOM' || $end eq 'EOF') {
 555                         if ($subtype eq 'alternative' && !defined $altout) {
 556                                 print ">>>multipart/alternative: no suitable ",
 557                                         "parts\n";
 558                         }
 559                         return $end;
 560                 }
 561         }
 562 }
 563
 564 #
 565 # "Eat" a MIME part; consume content until we hit the boundary or EOF
 566 #
 567
 568 sub eat_part ($$)
 569 {
 570         my ($input, $boundary) = @_;
 571         my $ret;
 572
 573         #
 574         # If we weren't given a boundary just eat input until EOF
 575         #
 576
 577         if (! defined $boundary) {
 578                 while (<$input>) { }
 579                 return 'EOF';
 580         }
 581
 582         #
 583         # Otherwise, consume data until we hit our boundary
 584         #
 585
 586         while (<$input>) {
 587                 if ($ret = match_boundary($_, $boundary)) {
 588                         return $ret;
 589                 }
 590         }
 591
 592         return 'EOF';
 593 }
 594
 595 #
 596 # Return the decoder subroutine to use
 597 #
 598
 599 sub find_decoder ($)
 600 {
 601         my ($encoding) = @_;
 602
 603         if ($encoding eq '7bit' || $encoding eq '8bit') {
 604                 return \&null_decoder;
 605         } elsif ($encoding eq 'base64') {
 606                 return \&decode_base64;
 607         } elsif ($encoding eq 'quoted-printable') {
 608                 return \&decode_qp;
 609         } else {
 610                 warn "Unknown encoding: $encoding\n";
 611                 return undef;
 612         }
 613 }
 614
 615 sub null_decoder ($)
 616 {
 617         my ($input) = @_;
 618
 619         return $input;
 620 }
 621
 622 #
 623 # Match a line against the boundary string
 624 #
 625
 626 sub match_boundary($$)
 627 {
 628         my ($_, $boundary) = @_;
 629
 630         if (substr($_, 0, 2) eq '--') {
 631                 s/[ \t\r\n]+\Z//;
 632                 if ($_ eq "--$boundary") {
 633                         return 'EOP';
 634                 } elsif ($_ eq "--$boundary--") {
 635                         return 'EOM';
 636                 }
 637         }
 638
 639         return undef;
 640 }