3 # replyfilter - A reply filter for nmh
5 # The idea behind this program is that it will act as a format filter
6 # for nmh. It will try to extract out all text/plain parts and format
7 # them if necessary using a filter program.
9 # To use this program, configure nmh in the following way (nmh 1.5 or later):
11 # - Put the path to this program in your .mh_profile under formatproc:
13 # formatproc: replyfilter
15 # - Create a mhl reply filter that consists of the following line:
17 # body:nocomponent,format,nowrap,formatarg="%(trim{content-type})%(putstr)",formatarg="%(trim{content-transfer-encoding})%(putstr)",formatarg=">"
19 # To decode this a bit:
21 # body - Output the "body" component
22 # nocomponent - Don't output a component prefix (normally here we use a
23 # component prefix of ">" as a quote character, but we're
24 # going to have replyfilter do that).
25 # nowrap - Don't wrap lines if they exceed the column width
26 # formatarg - Arguments to fmtproc. The first argument is the value of
27 # the Content-type header; the second is the value of the
28 # Content-Transfer-Encoding header. The last "formatarg"
29 # is used as your quoting prefix. Replace it with whatever
35 use MIME::QuotedPrint;
40 # The program we use to format "long" text
43 $filterprogram = 'par';
46 # Our output character set. This script assumes a UTF-8 locale, but if you
47 # want to run under a different locale the change it here.
50 $outcharset = 'utf-8';
53 # Maximum column width (used by the HTML converter and to decide if we need
54 # to invoke the filter program
60 # Out HTML converter program & arguments
63 @htmlconv = ('w3m', '-dump', '-cols', $maxcolwidth - 2, '-T', 'text/html',
67 die "Usage: $0 Content-type content-transfer-encoding quote-prefix\n"
71 my $ctype = Mail::Field->new('Content-Type', $ARGV[0]);
72 $content_type = $ctype->type;
73 $charset = $ctype->charset;
74 $boundary = $ctype->boundary;
76 $content_type = 'text/plain';
77 $charset = 'us-ascii';
80 $encoding = $ARGV[1] eq "" ? '7bit' : lc($ARGV[1]);
81 $quoteprefix = $ARGV[2];
84 # Set up our output to be in our character set
87 binmode(STDOUT, ":encoding($outcharset)");
90 # The simplest case: if we have a single type of text/plain, send it
91 # to our format subroutine.
94 if ($content_type eq 'text/plain') {
95 process_text(\*STDIN, $encoding, $charset);
100 # Alright, here's what we need to do.
102 # Find any text/plain parts and decode them. Decode them via base64 or
103 # quoted-printable, and feed them to our formatting filter when appropriate.
104 # Put markers in the output for other content types.
107 ($type) = (split('/', $content_type));
109 if ($type eq 'multipart') {
112 # For multipart messages we have to do a little extra.
113 # Eat the MIME prologue (everything up until the first boundary)
116 if (! defined $boundary || $boundary eq '') {
117 print "No boundary in Content-Type header!\n";
123 last if match_boundary($_, $boundary);
127 print "Unable to find boundary in message\n";
134 process_part(\*STDIN, $content_type, $encoding, $charset, $boundary);
138 # Eat the MIME eplilog
146 # Handled encoded text. I think we can assume if the encoding is q-p
147 # or base64 to feed it into a formatting filter.
150 sub process_text (*$$;$)
152 my ($input, $encoding, $charset, $boundary) = @_;
153 my $text, $filterpid, $prefixpid, $finread, $finwrite;
154 my $foutread, $foutwrite, $decoder, $ret, $filterflag;
155 my @text = ( '' ), $maxline = 0;
158 # In the simple case, just spit out the text prefixed by the
162 if ($encoding eq '7bit' || $encoding ne '8bit') {
164 $ret = match_boundary($_, $boundary);
168 print $quoteprefix, $_;
172 $decoder = find_decoder($encoding);
173 if (! defined $decoder) {
179 # Okay, assume that the encoding will make it so that we MIGHT need
180 # to filter it. Read it in; if it's too long, filter it.
186 last if ($ret = match_boundary($_, $boundary));
188 $line = decode($charset, &$decoder($_));
190 if (substr($text[$#text], -1, 1) eq "\n") {
193 $text[$#text] .= $line;
195 if (($len = length($text[$#text])) > $maxline) {
200 if (! defined $ret) {
204 if ($maxline <= $maxcolwidth) {
206 # These are short enough; just output it now as-is
213 # We fork a copy of ourselves to read the output from the filter
214 # program and prefix the quote character.
217 pipe($finread, $finwrite) || die "pipe() failed: $!\n";
218 pipe($foutread, $foutwrite) || die "pipe() (second) failed: $!\n";
220 binmode($finread, ":encoding($outcharset)");
221 binmode($finread, ":encoding($outcharset)");
222 binmode($foutread, ":encoding($outcharset)");
223 binmode($foutwrite, ":encoding($outcharset)");
225 if ($filterpid = fork) {
227 # Close the pipes in the parent that we're not using
232 } elsif (defined $filterpid) {
234 # Close our ununsed filehandles
241 # Dup() down the filehandles to standard input and output
244 open(STDIN, "<&", $finread) ||
245 die "dup(filterin) failed: $!\n";
246 open(STDOUT, ">&", $foutwrite) ||
247 die "dup(filterout) failed: $!\n";
260 exec $filterprogram ||
261 die "Unable to exec $filterprogram: $!\n";
263 die "Fork for $filterprogram failed: $!\n";
267 # Fork our output handler.
270 if ($prefixpid = fork) {
272 # We don't need these anymore
276 } elsif (defined $prefixpid) {
278 # Read from foutwrite, and output (with prefix) to stdout
283 while (<$foutread>) {
284 print STDOUT $quoteprefix, $_;
291 # Send our input to the filter program
294 print $finwrite @text;
297 waitpid $filterpid, 0;
298 warn "Filter process exited with ", ($? >> 8), "\n" if $?;
299 waitpid $prefixpid, 0;
300 warn "Pipe reader process exited with ", ($? >> 8), "\n" if $?;
306 # Filter HTML through a converter program
309 sub process_html (*$$;$)
311 my ($input, $encoding, $charset, $boundary) = @_;
312 my $filterpid, $prefixpid, $finread, $finwrite;
313 my $foutread, $foutwrite, $decoder, $ret;
315 if (! defined($decoder = find_decoder($encoding))) {
320 # We fork a copy of ourselves to read the output from the filter
321 # program and prefix the quote character.
324 pipe($finread, $finwrite) || die "pipe() failed: $!\n";
325 pipe($foutread, $foutwrite) || die "pipe() (second) failed: $!\n";
327 binmode($finread, ":encoding($outcharset)");
328 binmode($finread, ":encoding($outcharset)");
329 binmode($foutread, ":encoding($outcharset)");
330 binmode($foutwrite, ":encoding($outcharset)");
332 if ($filterpid = fork) {
334 # Close the pipes in the parent that we're not using
339 } elsif (defined $filterpid) {
341 # Close our ununsed filehandles
348 # Dup() down the filehandles to standard input and output
351 open(STDIN, "<&", $finread) ||
352 die "dup(filterin) failed: $!\n";
353 open(STDOUT, ">&", $foutwrite) ||
354 die "dup(filterout) failed: $!\n";
368 die "Unable to exec $filterprogram: $!\n";
370 die "Fork for $htmlconv[0] failed: $!\n";
374 # Fork our output handler.
377 if ($prefixpid = fork) {
379 # We don't need these anymore
383 } elsif (defined $prefixpid) {
385 # Read from foutwrite, and output (with prefix) to stdout
390 while (<$foutread>) {
391 print STDOUT $quoteprefix, $_;
398 # Send our input to the filter program
402 last if ($ret = match_boundary($_, $boundary));
403 print $finwrite (&$decoder($_));
406 if (! defined $ret) {
411 waitpid $filterpid, 0;
412 warn "HTML converter process exited with ", scalar($? >> 8), "\n" if $?;
413 waitpid $prefixpid, 0;
414 warn "Pipe reader process exited with ", $? >> 8, "\n" if $?;
420 # Decide what to do, based on what kind of content it is.
423 sub process_part (*$$$$;$)
425 my ($input, $content_type, $encoding, $charset, $boundary, $name) = @_;
426 my ($type, $subtype) = (split('/', $content_type, -1), '');
428 if ($type eq 'text') {
430 # If this is a text part, right now we only deal with
431 # plain and HTML parts.
433 if ($subtype eq 'plain') {
434 return process_text($input, $encoding, $charset,
436 } elsif ($subtype eq 'html') {
437 return process_html($input, $encoding, $charset,
440 print ">>> $content_type content\n";
441 return eat_part($input, $boundary);
443 } elsif ($type eq 'multipart') {
444 return process_multipart($input, $subtype, $boundary);
447 # Other types we're not sure what to do with right now
448 # Just put a marker in there
451 print ">>> $content_type attachment";
453 print ", name=$name";
457 return eat_part($input, $boundary);
462 # Process a multipart message.
464 # When called, we should be right after the beginning of the first
465 # boundary marker. So we should be pointed at header lines which describe
466 # the content of this part
469 sub process_multipart ($$$)
471 my ($input, $subtype, $boundary) = @_;
475 my $encoding, $type, $end, $name, $charset;
478 # Use the Mail::Header package to read in any headers
479 # corresponding to this part
482 my $head = Mail::Header->new($input, (MailFrom => 'IGNORE'));
485 # Extract out any Content-Type, Content-Transfer-Encoding, and
486 # Content-Disposition headers
489 my $ctype = Mail::Field->extract('Content-Type', $head);
490 my $cte = Mail::Field->extract('Content-Transfer-Encoding',
492 my $cdispo = Mail::Field->extract('Content-Disposition', $head);
494 if (defined $ctype) {
495 $type = $ctype->type;
496 $charset = $ctype->charset;
498 $type = 'text/plain';
499 $charset = 'us-ascii';
502 $encoding = defined $cte ? $cte->param('_') : '7bit';
503 $name = defined $cdispo ? $cdispo->param('filename') : undef;
506 # Special handling for multipart/alternative; pick
507 # the "first" one we can handle (which is usually
508 # text/plain) and silently eat the rest, but output a
509 # warning if we can't handle anything.
513 $end = eat_part($input, $boundary);
515 my $subboundary = $boundary;
516 my $maintype = (split('/', $type))[0];
518 if ($maintype eq 'multipart') {
519 $subboundary = $ctype->boundary;
521 # Go until we find our beginning of this
524 my $subend = eat_part($input, $subboundary);
526 if ($subend ne 'EOP') {
527 print ">>> WARNING: malformed ",
528 "nested multipart\n";
533 $end = process_part($input, $type, $encoding,
534 $charset, $subboundary, $name);
536 if ($subtype eq 'alternative' && ! defined $altout &&
537 $type eq 'text/plain') {
542 # Since we changed the semantics of $boundary
543 # above for nested multiparts, if we are
544 # handling a nested multipart then find the end
545 # of our current part
548 if ($maintype eq 'multipart') {
549 $end = eat_part($input, $boundary);
554 if ($end eq 'EOM' || $end eq 'EOF') {
555 if ($subtype eq 'alternative' && !defined $altout) {
556 print ">>>multipart/alternative: no suitable ",
565 # "Eat" a MIME part; consume content until we hit the boundary or EOF
570 my ($input, $boundary) = @_;
574 # If we weren't given a boundary just eat input until EOF
577 if (! defined $boundary) {
583 # Otherwise, consume data until we hit our boundary
587 if ($ret = match_boundary($_, $boundary)) {
596 # Return the decoder subroutine to use
603 if ($encoding eq '7bit' || $encoding eq '8bit') {
604 return \&null_decoder;
605 } elsif ($encoding eq 'base64') {
606 return \&decode_base64;
607 } elsif ($encoding eq 'quoted-printable') {
610 warn "Unknown encoding: $encoding\n";
623 # Match a line against the boundary string
626 sub match_boundary($$)
628 my ($_, $boundary) = @_;
630 if (substr($_, 0, 2) eq '--') {
632 if ($_ eq "--$boundary") {
634 } elsif ($_ eq "--$boundary--") {