|
Lines 75-80
Link Here
|
| 75 |
C<spamassassin -d>, which only needs the pristine header and body which |
75 |
C<spamassassin -d>, which only needs the pristine header and body which |
| 76 |
is always handled when the object is created. |
76 |
is always handled when the object is created. |
| 77 |
|
77 |
|
|
|
78 |
C<subparse> specifies how many levels of message/* attachment should be parsed |
| 79 |
into a subtree. Defaults to 1. |
| 80 |
|
| 78 |
=cut |
81 |
=cut |
| 79 |
|
82 |
|
| 80 |
# month mappings (ripped from Util.pm) |
83 |
# month mappings (ripped from Util.pm) |
|
Lines 103-108
Link Here
|
| 103 |
my $message = $opts->{'message'} || \*STDIN; |
106 |
my $message = $opts->{'message'} || \*STDIN; |
| 104 |
my $parsenow = $opts->{'parsenow'} || 0; |
107 |
my $parsenow = $opts->{'parsenow'} || 0; |
| 105 |
|
108 |
|
|
|
109 |
# Specifies whether or not to parse message/rfc822 parts into its own tree. |
| 110 |
# If the # > 0, it'll subparse, otherwise it won't. By default, do one |
| 111 |
# level deep. |
| 112 |
$self->{subparse} = defined $opts->{'subparse'} ? $opts->{'subparse'} : 1; |
| 113 |
|
| 106 |
# protect it from abuse ... |
114 |
# protect it from abuse ... |
| 107 |
local $_; |
115 |
local $_; |
| 108 |
|
116 |
|
|
Lines 120-229
Link Here
|
| 120 |
@message = split ( /^/m, $message ); |
128 |
@message = split ( /^/m, $message ); |
| 121 |
} |
129 |
} |
| 122 |
|
130 |
|
|
|
131 |
return $self unless @message; |
| 132 |
|
| 133 |
# Pull off mbox and mbx separators |
| 134 |
if ( $message[0] =~ /^From\s/ ) { |
| 135 |
# mbox formated mailbox |
| 136 |
$self->{'mbox_sep'} = shift @message; |
| 137 |
} elsif ($message[0] =~ MBX_SEPARATOR) { |
| 138 |
$_ = shift @message; |
| 139 |
|
| 140 |
# Munge the mbx message separator into mbox format as a sort of |
| 141 |
# de facto portability standard in SA's internals. We need to |
| 142 |
# to this so that Mail::SpamAssassin::Util::parse_rfc822_date |
| 143 |
# can parse the date string... |
| 144 |
if (/([\s|\d]\d)-([a-zA-Z]{3})-(\d{4})\s(\d{2}):(\d{2}):(\d{2})/) { |
| 145 |
# $1 = day of month |
| 146 |
# $2 = month (text) |
| 147 |
# $3 = year |
| 148 |
# $4 = hour |
| 149 |
# $5 = min |
| 150 |
# $6 = sec |
| 151 |
my @arr = localtime(timelocal($6,$5,$4,$1,$MONTH{lc($2)}-1,$3)); |
| 152 |
my $address; |
| 153 |
foreach (@message) { |
| 154 |
if (/From:\s[^<]+<([^>]+)>/) { |
| 155 |
$address = $1; |
| 156 |
last; |
| 157 |
} elsif (/From:\s([^<^>]+)/) { |
| 158 |
$address = $1; |
| 159 |
last; |
| 160 |
} |
| 161 |
} |
| 162 |
$self->{'mbox_sep'} = "From $address $DAY_OF_WEEK[$arr[6]] $2 $1 $4:$5:$6 $3\n"; |
| 163 |
} |
| 164 |
} |
| 165 |
|
| 123 |
# Go through all the headers of the message |
166 |
# Go through all the headers of the message |
| 124 |
my $header = ''; |
167 |
my $header = ''; |
| 125 |
my $boundary; |
168 |
while ( my $current = shift @message ) { |
| 126 |
while ( my $last = shift @message ) { |
169 |
unless ($self->{'missing_head_body_separator'}) { |
| 127 |
if ( $last =~ /^From\s/ ) { |
170 |
$self->{'pristine_headers'} .= $current; |
| 128 |
# mbox formated mailbox |
|
|
| 129 |
$self->{'mbox_sep'} = $last; |
| 130 |
next; |
| 131 |
} elsif ($last =~ MBX_SEPARATOR) { |
| 132 |
# Munge the mbx message separator into mbox format as a sort of |
| 133 |
# de facto portability standard in SA's internals. We need to |
| 134 |
# to this so that Mail::SpamAssassin::Util::parse_rfc822_date |
| 135 |
# can parse the date string... |
| 136 |
if (/([\s|\d]\d)-([a-zA-Z]{3})-(\d{4})\s(\d{2}):(\d{2}):(\d{2})/o) { |
| 137 |
# $1 = day of month |
| 138 |
# $2 = month (text) |
| 139 |
# $3 = year |
| 140 |
# $4 = hour |
| 141 |
# $5 = min |
| 142 |
# $6 = sec |
| 143 |
my @arr = localtime(timelocal($6,$5,$4,$1,$MONTH{lc($2)}-1,$3)); |
| 144 |
my $address; |
| 145 |
foreach (@message) { |
| 146 |
if (/From:\s[^<]+<([^>]+)>/) { |
| 147 |
$address = $1; |
| 148 |
last; |
| 149 |
} elsif (/From:\s([^<^>]+)/) { |
| 150 |
$address = $1; |
| 151 |
last; |
| 152 |
} |
| 153 |
} |
| 154 |
$self->{'mbox_sep'} = "From $address $DAY_OF_WEEK[$arr[6]] $2 $1 $4:$5:$6 $3\n"; |
| 155 |
next; |
| 156 |
} |
| 157 |
} |
171 |
} |
| 158 |
|
172 |
|
| 159 |
# Store the non-modified headers in a scalar |
173 |
# NB: Really need to figure out special folding rules here! |
| 160 |
$self->{'pristine_headers'} .= $last; |
174 |
if ( $current =~ /^[ \t]/ ) { |
|
|
175 |
# This wasn't useful in terms of a rule, but we may want to treat it |
| 176 |
# specially at some point. Perhaps ignore it? |
| 177 |
#unless ($current =~ /\S/) { |
| 178 |
# $self->{'obsolete_folding_whitespace'} = 1; |
| 179 |
#} |
| 161 |
|
180 |
|
| 162 |
# NB: Really need to figure out special folding rules here! |
181 |
# append continuations if there's a header in process |
| 163 |
if ( $last =~ /^[ \t]+/ ) { # if its a continuation |
|
|
| 164 |
if ($header) { |
182 |
if ($header) { |
| 165 |
$header .= $last; # fold continuations |
183 |
$header .= $current; |
|
|
184 |
} |
| 185 |
} |
| 186 |
else { |
| 187 |
# Ok, there's a header here, let's go ahead and add it in. |
| 188 |
if ($header) { |
| 189 |
# Yes, the /s is needed to match \n too. |
| 190 |
my ($key, $value) = split (/:\s*(?=.)/s, $header, 2); |
| 166 |
|
191 |
|
| 167 |
# If we're currently dealing with a content-type header, and there's a |
192 |
# If it's not a valid header (aka: not in the form "foo: bar"), skip it. |
| 168 |
# boundary defined, use it. Since there could be multiple |
193 |
if (defined $value) { |
| 169 |
# content-type headers in a message, the last one will be the one we |
194 |
# limit the length of the pairs we store |
| 170 |
# should use, so just keep updating as they come in. |
195 |
if (length($key) > MAX_HEADER_KEY_LENGTH) { |
| 171 |
if ($header =~ /^content-type:\s*(\S.*)$/is) { |
196 |
$key = substr($key, 0, MAX_HEADER_KEY_LENGTH); |
| 172 |
my($type,$temp_boundary) = Mail::SpamAssassin::Util::parse_content_type($1); |
197 |
$self->{'truncated_header'} = 1; |
| 173 |
$boundary = $temp_boundary if ($type =~ /^multipart/ && defined $temp_boundary); |
198 |
} |
| 174 |
} |
199 |
if (length($value) > MAX_HEADER_VALUE_LENGTH) { |
|
|
200 |
$value = substr($value, 0, MAX_HEADER_VALUE_LENGTH); |
| 201 |
$self->{'truncated_header'} = 1; |
| 202 |
} |
| 203 |
$self->header($key, $value); |
| 204 |
} |
| 205 |
} |
| 175 |
|
206 |
|
| 176 |
# Go onto the next header line, unless the next line is a |
207 |
# not a continuation... |
| 177 |
# multipart mime boundary, where we know we're going to stop |
208 |
$header = $current; |
| 178 |
# below, so drop through for final header processing. |
|
|
| 179 |
next unless (defined $boundary && @message && $message[0] =~ /^--\Q$boundary\E(?:--|\s*$)/); |
| 180 |
} |
| 181 |
else { |
| 182 |
# There was no previous header and this is just "out there"? |
| 183 |
# Ignore it! |
| 184 |
next; |
| 185 |
} |
| 186 |
} |
209 |
} |
| 187 |
|
210 |
|
| 188 |
# Ok, there's a header here, let's go ahead and add it in. |
|
|
| 189 |
if ($header) { |
211 |
if ($header) { |
| 190 |
# Yes, the /s is needed to match \n too. |
212 |
if ($header =~ /^\r?$/) { |
| 191 |
my ($key, $value) = split (/:\s*(?=.)/s, $header, 2); |
213 |
last; |
| 192 |
|
|
|
| 193 |
# If it's not a valid header (aka: not in the form "foo: bar"), skip it. |
| 194 |
if (defined $value) { |
| 195 |
# limit the length of the pairs we store |
| 196 |
if (length($key) > MAX_HEADER_KEY_LENGTH) { |
| 197 |
$key = substr($key, 0, MAX_HEADER_KEY_LENGTH); |
| 198 |
$self->{'truncated_header'} = 1; |
| 199 |
} |
| 200 |
if (length($value) > MAX_HEADER_VALUE_LENGTH) { |
| 201 |
$value = substr($value, 0, MAX_HEADER_VALUE_LENGTH); |
| 202 |
$self->{'truncated_header'} = 1; |
| 203 |
} |
| 204 |
$self->header($key, $value); |
| 205 |
|
| 206 |
# If we're currently dealing with a content-type header, and there's a |
| 207 |
# boundary defined, use it. Since there could be multiple |
| 208 |
# content-type headers in a message, the last one will be the one we |
| 209 |
# should use, so just keep updating as they come in. |
| 210 |
if (lc $key eq 'content-type') { |
| 211 |
my($type,$temp_boundary) = Mail::SpamAssassin::Util::parse_content_type($value); |
| 212 |
$boundary = $temp_boundary if ($type =~ /^multipart/ && defined $temp_boundary); |
| 213 |
} |
| 214 |
} |
214 |
} |
|
|
215 |
else { |
| 216 |
# Check for missing head/body separator |
| 217 |
# RFC 2822, s2.2: |
| 218 |
# A field name MUST be composed of printable US-ASCII characters |
| 219 |
# (i.e., characters that have values between 33 (041) and 126 (176), inclusive), |
| 220 |
# except colon (072). |
| 221 |
# FOR THIS NEXT PART: list off the valid REs for what can be next: |
| 222 |
# Header, header continuation, blank line |
| 223 |
if (!@message || $message[0] !~ /^(?:[\041-\071\073-\176]+:|[ \t]|\r?$)/ || $message[0] =~ /^--/) { |
| 224 |
# No body or no separator before mime boundary is invalid |
| 225 |
$self->{'missing_head_body_separator'} = 1; |
| 226 |
|
| 227 |
# we *have* to go back through again to make sure we catch the last |
| 228 |
# header, so fake a separator and loop again. |
| 229 |
unshift(@message, "\n"); |
| 230 |
} |
| 231 |
} |
| 215 |
} |
232 |
} |
| 216 |
|
|
|
| 217 |
# not a continuation... |
| 218 |
$header = $last; |
| 219 |
|
| 220 |
# Ok, we found the header/body blank line ... |
| 221 |
last if ($last =~ /^\r?$/m); |
| 222 |
|
| 223 |
# Alternately, if a multipart mime boundary is found in the header area, |
| 224 |
# aka it's malformed, exit out as well and treat it as part of the body. |
| 225 |
last if (defined $boundary && @message && $message[0] =~ /^--\Q$boundary\E(?:--|\s*$)/); |
| 226 |
} |
233 |
} |
|
|
234 |
undef $header; |
| 227 |
|
235 |
|
| 228 |
# Store the pristine body for later -- store as a copy since @message |
236 |
# Store the pristine body for later -- store as a copy since @message |
| 229 |
# will get modified below |
237 |
# will get modified below |
|
Lines 484-490
Link Here
|
| 484 |
# Else, there's no boundary, so leave the whole part... |
492 |
# Else, there's no boundary, so leave the whole part... |
| 485 |
} |
493 |
} |
| 486 |
|
494 |
|
| 487 |
my $part_msg = Mail::SpamAssassin::Message::Node->new(); # prepare a new tree node |
495 |
# prepare a new tree node |
|
|
496 |
my $part_msg = Mail::SpamAssassin::Message::Node->new({ subparse=>$msg->{subparse} }); |
| 488 |
my $in_body = 0; |
497 |
my $in_body = 0; |
| 489 |
my $header; |
498 |
my $header; |
| 490 |
my $part_array; |
499 |
my $part_array; |
|
Lines 531-576
Link Here
|
| 531 |
|
540 |
|
| 532 |
# make sure we start with a new clean node |
541 |
# make sure we start with a new clean node |
| 533 |
$in_body = 0; |
542 |
$in_body = 0; |
| 534 |
$part_msg = Mail::SpamAssassin::Message::Node->new(); |
543 |
$part_msg = Mail::SpamAssassin::Message::Node->new({ subparse=>$msg->{subparse} }); |
| 535 |
undef $part_array; |
544 |
undef $part_array; |
| 536 |
undef $header; |
545 |
undef $header; |
| 537 |
|
546 |
|
| 538 |
next; |
547 |
next; |
| 539 |
} |
548 |
} |
| 540 |
|
549 |
|
| 541 |
if ($in_body) { |
550 |
if (!$in_body) { |
| 542 |
# we run into a perl bug if the lines are astronomically long (probably |
|
|
| 543 |
# due to lots of regexp backtracking); so cut short any individual line |
| 544 |
# over MAX_BODY_LINE_LENGTH bytes in length. This can wreck HTML |
| 545 |
# totally -- but IMHO the only reason a luser would use |
| 546 |
# MAX_BODY_LINE_LENGTH-byte lines is to crash filters, anyway. |
| 547 |
while (length ($_) > MAX_BODY_LINE_LENGTH) { |
| 548 |
push (@{$part_array}, substr($_, 0, MAX_BODY_LINE_LENGTH)."\n"); |
| 549 |
substr($_, 0, MAX_BODY_LINE_LENGTH) = ''; |
| 550 |
} |
| 551 |
push ( @{$part_array}, $_ ); |
| 552 |
} |
| 553 |
else { |
| 554 |
s/\s+$//; |
551 |
s/\s+$//; |
| 555 |
if (m/^\S/) { |
552 |
if (m/^[\041-\071\073-\176]+:/) { |
| 556 |
if ($header) { |
553 |
if ($header) { |
| 557 |
my ( $key, $value ) = split ( /:\s*/, $header, 2 ); |
554 |
my ( $key, $value ) = split ( /:\s*/, $header, 2 ); |
| 558 |
$part_msg->header( $key, $value ); |
555 |
$part_msg->header( $key, $value ); |
| 559 |
} |
556 |
} |
| 560 |
$header = $_; |
557 |
$header = $_; |
|
|
558 |
next; |
| 561 |
} |
559 |
} |
| 562 |
elsif (/^$/) { |
560 |
elsif (/^[ \t]/) { |
|
|
561 |
$_ =~ s/^\s*//; |
| 562 |
$header .= $_; |
| 563 |
next; |
| 564 |
} |
| 565 |
else { |
| 563 |
if ($header) { |
566 |
if ($header) { |
| 564 |
my ( $key, $value ) = split ( /:\s*/, $header, 2 ); |
567 |
my ( $key, $value ) = split ( /:\s*/, $header, 2 ); |
| 565 |
$part_msg->header( $key, $value ); |
568 |
$part_msg->header( $key, $value ); |
| 566 |
} |
569 |
} |
| 567 |
$in_body = 1; |
570 |
$in_body = 1; |
|
|
571 |
|
| 572 |
# if there's a blank line separator, that's good. if there isn't, |
| 573 |
# it's a body line, so drop through. |
| 574 |
if (/^\r?$/) { |
| 575 |
next; |
| 576 |
} |
| 577 |
else { |
| 578 |
$self->{'missing_mime_head_body_separator'} = 1; |
| 579 |
} |
| 568 |
} |
580 |
} |
| 569 |
else { |
|
|
| 570 |
$_ =~ s/^\s*//; |
| 571 |
$header .= $_; |
| 572 |
} |
| 573 |
} |
581 |
} |
|
|
582 |
|
| 583 |
# we run into a perl bug if the lines are astronomically long (probably |
| 584 |
# due to lots of regexp backtracking); so cut short any individual line |
| 585 |
# over MAX_BODY_LINE_LENGTH bytes in length. This can wreck HTML |
| 586 |
# totally -- but IMHO the only reason a luser would use |
| 587 |
# MAX_BODY_LINE_LENGTH-byte lines is to crash filters, anyway. |
| 588 |
while (length ($_) > MAX_BODY_LINE_LENGTH) { |
| 589 |
push (@{$part_array}, substr($_, 0, MAX_BODY_LINE_LENGTH)."\n"); |
| 590 |
substr($_, 0, MAX_BODY_LINE_LENGTH) = ''; |
| 591 |
} |
| 592 |
push ( @{$part_array}, $_ ); |
| 574 |
} |
593 |
} |
| 575 |
|
594 |
|
| 576 |
} |
595 |
} |
|
Lines 604-615
Link Here
|
| 604 |
|
623 |
|
| 605 |
# If this part is a message/* part, and the parent isn't also a |
624 |
# If this part is a message/* part, and the parent isn't also a |
| 606 |
# message/* part (ie: the main part) go ahead and parse into a tree. |
625 |
# message/* part (ie: the main part) go ahead and parse into a tree. |
| 607 |
if ($part_msg->{'type'} =~ /^message\b/i) { |
626 |
if ($part_msg->{'type'} =~ /^message\b/i && ($msg->{subparse} > 0)) { |
| 608 |
# Get the part ready... |
627 |
# Get the part ready... |
| 609 |
my $message = $part_msg->decode(); |
628 |
my $message = $part_msg->decode(); |
| 610 |
|
629 |
|
| 611 |
if ($message) { |
630 |
if ($message) { |
| 612 |
my $msg_obj = Mail::SpamAssassin::Message->new({message=>$message, parsenow=>1}); |
631 |
my $msg_obj = Mail::SpamAssassin::Message->new({ |
|
|
632 |
message => $message, |
| 633 |
parsenow => 1, |
| 634 |
subparse => $msg->{subparse}-1, |
| 635 |
}); |
| 613 |
|
636 |
|
| 614 |
# main message is a message/* part ... |
637 |
# main message is a message/* part ... |
| 615 |
if ($msg == $part_msg) { |
638 |
if ($msg == $part_msg) { |
|
Lines 627-632
Link Here
|
| 627 |
return; |
650 |
return; |
| 628 |
} |
651 |
} |
| 629 |
} |
652 |
} |
|
|
653 |
else { |
| 654 |
# leaves don't need the subparse value, so get rid of it |
| 655 |
delete $part_msg->{subparse}; |
| 656 |
} |
| 630 |
|
657 |
|
| 631 |
# Add the new part as a child to the parent |
658 |
# Add the new part as a child to the parent |
| 632 |
# NOTE: if the message only has this one part, we'll be recursive so delete |
659 |
# NOTE: if the message only has this one part, we'll be recursive so delete |
|
Lines 656-661
Link Here
|
| 656 |
my @parts = $self->find_parts(qr/^(?:text|message)\b/i,1); |
683 |
my @parts = $self->find_parts(qr/^(?:text|message)\b/i,1); |
| 657 |
return $self->{text_rendered} unless @parts; |
684 |
return $self->{text_rendered} unless @parts; |
| 658 |
|
685 |
|
|
|
686 |
# the html metadata may have already been set, so let's not bother if it's |
| 687 |
# already been done. |
| 688 |
my $html_needs_setting = !exists $self->{metadata}->{html}; |
| 689 |
|
| 659 |
# Go through each part |
690 |
# Go through each part |
| 660 |
my $text = $self->get_header ('subject') || ''; |
691 |
my $text = $self->get_header ('subject') || ''; |
| 661 |
for(my $pt = 0 ; $pt <= $#parts ; $pt++ ) { |
692 |
for(my $pt = 0 ; $pt <= $#parts ; $pt++ ) { |
|
Lines 670-677
Link Here
|
| 670 |
$text .= $rnd; |
701 |
$text .= $rnd; |
| 671 |
|
702 |
|
| 672 |
# TVD - if there are multiple parts, what should we do? |
703 |
# TVD - if there are multiple parts, what should we do? |
| 673 |
# right now, just use the last one ... |
704 |
# right now, just use the last one. we may need to give some priority |
| 674 |
$self->{metadata}->{html} = $p->{html_results} if ( $type eq 'text/html' ); |
705 |
# at some point, ie: use text/html rendered if it exists, or |
|
|
706 |
# text/plain rendered as html otherwise. |
| 707 |
if ($html_needs_setting && $type eq 'text/html') { |
| 708 |
$self->{metadata}->{html} = $p->{html_results}; |
| 709 |
} |
| 675 |
} |
710 |
} |
| 676 |
else { |
711 |
else { |
| 677 |
$text .= $p->decode(); |
712 |
$text .= $p->decode(); |
|
Lines 708-713
Link Here
|
| 708 |
my @parts = $self->find_parts(qr/^(?:text|message)\b/i,1); |
743 |
my @parts = $self->find_parts(qr/^(?:text|message)\b/i,1); |
| 709 |
return $self->{text_visible_rendered} unless @parts; |
744 |
return $self->{text_visible_rendered} unless @parts; |
| 710 |
|
745 |
|
|
|
746 |
# the html metadata may have already been set, so let's not bother if it's |
| 747 |
# already been done. |
| 748 |
my $html_needs_setting = !exists $self->{metadata}->{html}; |
| 749 |
|
| 711 |
# Go through each part |
750 |
# Go through each part |
| 712 |
my $text = $self->get_header ('subject') || ''; |
751 |
my $text = $self->get_header ('subject') || ''; |
| 713 |
for(my $pt = 0 ; $pt <= $#parts ; $pt++ ) { |
752 |
for(my $pt = 0 ; $pt <= $#parts ; $pt++ ) { |
|
Lines 720-725
Link Here
|
| 720 |
if ( defined $rnd ) { |
759 |
if ( defined $rnd ) { |
| 721 |
# Only text/* types are rendered ... |
760 |
# Only text/* types are rendered ... |
| 722 |
$text .= $rnd; |
761 |
$text .= $rnd; |
|
|
762 |
|
| 763 |
# TVD - if there are multiple parts, what should we do? |
| 764 |
# right now, just use the last one. we may need to give some priority |
| 765 |
# at some point, ie: use text/html rendered if it exists, or |
| 766 |
# text/plain rendered as html otherwise. |
| 767 |
if ($html_needs_setting && $type eq 'text/html') { |
| 768 |
$self->{metadata}->{html} = $p->{html_results}; |
| 769 |
} |
| 723 |
} |
770 |
} |
| 724 |
else { |
771 |
else { |
| 725 |
$text .= $p->decode(); |
772 |
$text .= $p->decode(); |
|
Lines 727-735
Link Here
|
| 727 |
} |
774 |
} |
| 728 |
|
775 |
|
| 729 |
# whitespace handling (warning: small changes have large effects!) |
776 |
# whitespace handling (warning: small changes have large effects!) |
| 730 |
$text =~ s/\n+\s*\n+/\f/gs; # double newlines => form feed |
777 |
$text =~ s/\n+\s*\n+/\f/gs; # double newlines => form feed |
| 731 |
$text =~ tr/ \t\n\r\x0b\xa0/ /s; # whitespace => space |
778 |
$text =~ tr/ \t\n\r\x0b\xa0/ /s; # whitespace => space |
| 732 |
$text =~ tr/\f/\n/; # form feeds => newline |
779 |
$text =~ tr/\f/\n/; # form feeds => newline |
| 733 |
|
780 |
|
| 734 |
my @textary = split_into_array_of_short_lines ($text); |
781 |
my @textary = split_into_array_of_short_lines ($text); |
| 735 |
$self->{text_visible_rendered} = \@textary; |
782 |
$self->{text_visible_rendered} = \@textary; |
|
Lines 750-755
Link Here
|
| 750 |
my @parts = $self->find_parts(qr/^(?:text|message)\b/i,1); |
797 |
my @parts = $self->find_parts(qr/^(?:text|message)\b/i,1); |
| 751 |
return $self->{text_invisible_rendered} unless @parts; |
798 |
return $self->{text_invisible_rendered} unless @parts; |
| 752 |
|
799 |
|
|
|
800 |
# the html metadata may have already been set, so let's not bother if it's |
| 801 |
# already been done. |
| 802 |
my $html_needs_setting = !exists $self->{metadata}->{html}; |
| 803 |
|
| 753 |
# Go through each part |
804 |
# Go through each part |
| 754 |
my $text = ''; |
805 |
my $text = ''; |
| 755 |
for(my $pt = 0 ; $pt <= $#parts ; $pt++ ) { |
806 |
for(my $pt = 0 ; $pt <= $#parts ; $pt++ ) { |
|
Lines 762-774
Link Here
|
| 762 |
if ( defined $rnd ) { |
813 |
if ( defined $rnd ) { |
| 763 |
# Only text/* types are rendered ... |
814 |
# Only text/* types are rendered ... |
| 764 |
$text .= $rnd; |
815 |
$text .= $rnd; |
|
|
816 |
|
| 817 |
# TVD - if there are multiple parts, what should we do? |
| 818 |
# right now, just use the last one. we may need to give some priority |
| 819 |
# at some point, ie: use text/html rendered if it exists, or |
| 820 |
# text/plain rendered as html otherwise. |
| 821 |
if ($html_needs_setting && $type eq 'text/html') { |
| 822 |
$self->{metadata}->{html} = $p->{html_results}; |
| 823 |
} |
| 765 |
} |
824 |
} |
| 766 |
} |
825 |
} |
| 767 |
|
826 |
|
| 768 |
# whitespace handling (warning: small changes have large effects!) |
827 |
# whitespace handling (warning: small changes have large effects!) |
| 769 |
$text =~ s/\n+\s*\n+/\f/gs; # double newlines => form feed |
828 |
$text =~ s/\n+\s*\n+/\f/gs; # double newlines => form feed |
| 770 |
$text =~ tr/ \t\n\r\x0b\xa0/ /s; # whitespace => space |
829 |
$text =~ tr/ \t\n\r\x0b\xa0/ /s; # whitespace => space |
| 771 |
$text =~ tr/\f/\n/; # form feeds => newline |
830 |
$text =~ tr/\f/\n/; # form feeds => newline |
| 772 |
|
831 |
|
| 773 |
my @textary = split_into_array_of_short_lines ($text); |
832 |
my @textary = split_into_array_of_short_lines ($text); |
| 774 |
$self->{text_invisible_rendered} = \@textary; |
833 |
$self->{text_invisible_rendered} = \@textary; |