use Encode;
#
-# The program we use to format "long" text
+# The program we use to format "long" text. Should be capable of reading
+# from standard input and sending the formatted text to standard output.
#
$filterprogram = 'par';
#
+# If the above filter program has problems with some input, use the following
+# regular expression to remove any problematic input. In this example we
+# filter out the UTF-8 non-breaking space (U+00A0) because that makes par
+# mangle the output. Uncomment this if this ends up being a problem for
+# you, or feel free to add others.
+#
+
+#%filterreplace = ( "\N{U+a0}" => " " );
+
+#
# Our output character set. This script assumes a UTF-8 locale, but if you
# want to run under a different locale the change it here.
#
my ($input, $encoding, $charset, $boundary) = @_;
my $text, $filterpid, $prefixpid, $finread, $finwrite;
my $foutread, $foutwrite, $decoder, $ret, $filterflag;
- my @text = ( '' ), $maxline = 0;
+ my $text, $maxline = 0;
#
# In the simple case, just spit out the text prefixed by the
while (<$input>) {
$ret = match_boundary($_, $boundary);
if (defined $ret) {
+ binmode($input, ':encoding(us-ascii)');
return $ret;
}
print $quoteprefix, $_;
# certainly US-ASCII
#
- binmode($input, ":encoding(us-ascii)");
+ binmode($input, ':encoding(us-ascii)');
- $decoder = find_decoder($encoding);
+ $decoder = find_decoder(lc($encoding));
if (! defined $decoder) {
return 'EOF';
}
#
# Okay, assume that the encoding will make it so that we MIGHT need
- # to filter it. Read it in; if it's too long, filter it.
+ # to filter it. Read it in; if the lines are too long, filter it
#
my $chardecode = find_encoding($charset);
while (<$input>) {
- my $line, $len;
+ my @lines, $len;
last if ($ret = match_boundary($_, $boundary));
- $line = $chardecode->decode(&$decoder($_));
+ $text .= $_;
- if (substr($text[$#text], -1, 1) eq "\n") {
- push @text, $line;
- } else {
- $text[$#text] .= $line;
- }
- if (($len = length($text[$#text])) > $maxline) {
- $maxline = $len;
- }
}
+ binmode($input, ':encoding(us-ascii)');
+
+ $text = $chardecode->decode(&$decoder($text));
+
+ grep {
+ my $len;
+ if (($len = length) > $maxline) {
+ $maxline = $len;
+ }} split(/^/, $text);
+
if (! defined $ret) {
$ret = 'EOF';
}
#
# These are short enough; just output it now as-is
#
- print STDOUT @text;
+ foreach my $line (split(/^/, $text)) {
+ print STDOUT $quoteprefix, $line;
+ }
return $ret;
}
# Send our input to the filter program
#
- print $finwrite @text;
+ if (%filterreplace) {
+ foreach my $match (keys %filterreplace) {
+ $text =~ s/$match/$filterreplace{$match}/g;
+ }
+ }
+
+ print $finwrite $text;
close($finwrite);
waitpid $filterpid, 0;
my $filterpid, $prefixpid, $finread, $finwrite;
my $foutread, $foutwrite, $decoder, $ret;
- if (! defined($decoder = find_decoder($encoding))) {
+ if (! defined($decoder = find_decoder(lc($encoding)))) {
return 'EOF';
}
$charset = 'us-ascii';
}
- $encoding = defined $cte ? $cte->param('_') : '7bit';
+ $encoding = defined $cte ? lc($cte->param('_')) : '7bit';
$name = defined $cdispo ? $cdispo->param('filename') : undef;
#
sub match_boundary($$)
{
- my ($_, $boundary) = @_;
+ my ($line, $boundary) = @_;
+
+ return if ! defined $boundary;
- if (substr($_, 0, 2) eq '--') {
- s/[ \t\r\n]+\Z//;
- if ($_ eq "--$boundary") {
+ if (substr($line, 0, 2) eq '--') {
+ $line =~ s/[ \t\r\n]+\Z//;
+ if ($line eq "--$boundary") {
return 'EOP';
- } elsif ($_ eq "--$boundary--") {
+ } elsif ($line eq "--$boundary--") {
return 'EOM';
}
}