# $Id: String.pm,v 1.48 2005/06/13 21:09:59 vipul Exp $
package Razor2::String;
use URI::Escape;
use Razor2::Preproc::enBase64;
use Data::Dumper;
BEGIN {
eval { require Digest::SHA; import Digest::SHA qw(sha1_hex); 1 }
or do { require Digest::SHA1; import Digest::SHA1 qw(sha1_hex) }
}
#use MIME::Parser;
require Exporter;
our @ISA = qw(Exporter);
our @EXPORT = qw( hmac_sha1 xor_key
from_batched_query
to_batched_query findsimilar debugobj
makesis parsesis makesis_nue parsesis_nue
hextobase64 base64tohex
randstr round
hex_dump prep_mail
prehash printb64table
hexbits2hash hmac2_sha1
fisher_yates_shuffle
);
# Same as the alphabet from RFC 1521, except s:/:_: and s:+:-:
my %b64table;
BEGIN {
# ASCII
# 33-126 printable chars
# 48-57 numbers
# 65-90 uppercase alpha
# 97-122 lowercase alpha
foreach ( 0 .. 25 ) { $b64table{$_} = chr( $_ + 65 ); }
foreach ( 26 .. 51 ) { $b64table{$_} = chr( $_ + 71 ); }
foreach ( 52 .. 61 ) { $b64table{$_} = chr( $_ - 4 ); }
$b64table{62} = "-";
$b64table{63} = "_";
}
sub printb64table {
foreach ( 0 .. 63 ) {
print "$_ = $b64table{$_}\n";
}
}
sub hmac_sha1 {
my $text = shift;
my $iv1 = shift;
my $iv2 = shift;
my ( $b64, $hex ) = hmac2_sha1( $text, $iv1, $iv2 );
return $b64;
}
# taken in part from RFC 2104
# http://www.cs.ucsd.edu/users/mihir/papers/hmac.html
sub hmac2_sha1 {
my $text = shift;
my $iv1 = shift;
my $iv2 = shift;
return unless $text && $iv1 && $iv2;
die "no ref's allowed" if ref($text);
my $digest = sha1_hex($iv2, $text);
$digest = sha1_hex($iv1, $digest);
return ( hextobase64($digest), $digest );
}
sub hmac3_sha1 {
my $text = shift;
my $iv1 = shift;
my $iv2 = shift;
return unless $text && $iv1 && $iv2;
die "no ref's allowed" if ref($text);
my $digest = $text;
$digest = sha1_hex( $iv1 . $digest );
$digest = sha1_hex( $iv2 . $digest );
return ( hextobase64($digest), $digest );
}
# part of RFC 2104 - see hmac_sha1()
sub xor_key {
my $key = shift;
# key length should never be > 64 chars;
#
# dont need this ... see Bitwise String Operators
# $enc .= '\0' x (64 - length($pass));
my $iv1 = "\x36" x 64 ^ $key;
my $iv2 = "\x5C" x 64 ^ $key;
return ( $iv1, $iv2 );
}
# converts a string where each char is a hex (4-bit) value
# to a string where each char is a base64 (6-bit) value
sub hextobase64 {
my $hs = shift;
my @b64s;
my $i = 0;
while ( $i < length($hs) ) {
# process 3 hex char chunks at a time
my $hex3 = substr $hs, $i, 3;
$i += 3;
my $bv = pack "h3", $hex3;
my $cur = 0;
foreach ( 0 .. 5 ) { my $bt = vec( $bv, $_, 1 ); $cur += $bt; $cur *= 2; }
push @b64s, $cur / 2;
$cur = 0;
foreach ( 6 .. 11 ) { my $bt = vec( $bv, $_, 1 ); $cur += $bt; $cur *= 2; }
push @b64s, $cur / 2;
#foreach (0..15) { my $bt = vec($bv,$_,1); print "$_=$bt, cur=$cur\n"; }
#print " -- hex=$hex3; @b64s\n";
}
my $bs = "";
foreach (@b64s) { $bs .= $b64table{$_}; }
# print "b64=$bs; hex=". base64tohex($bs) ."\n";
# Fixme - change encoding so 1 hex char ==> 1 b64 char
# 64-char hex string ==> 44-char b64 string. truncate to 43.
# 40-char hex string ==> 28-char b64 string. truncate to 27.
# $bs = substr($bs, 0, 43) if (length $bs == 44) && (substr($bs, -1) eq '0');
# $bs = substr($bs, 0, 27) if (length $bs == 28) && (substr($bs, -1) eq '0');
return $bs;
}
# converts a string where each char is a base64 (6-bit) value
# to a string where each char is a hex (4-bit) value
sub base64tohex {
my $bs = shift;
my @b64s;
my $hexstr;
# convert string to list of numbers base 10
foreach my $chr ( split '', $bs ) {
foreach ( keys %b64table ) {
push @b64s, $_ if $b64table{$_} eq $chr;
}
}
while (@b64s) {
my $bv = "";
vec( $bv, 0, 16 ) = 0;
my $a = shift @b64s;
foreach ( 0 .. 5 ) { my $i = 5 - $_; my $bt = $a % 2; vec( $bv, $i, 1 ) = $bt; $a = int( $a / 2 ); }
$a = shift @b64s;
foreach ( 6 .. 11 ) { my $i = 17 - $_; my $bt = $a % 2; vec( $bv, $i, 1 ) = $bt; $a = int( $a / 2 ); }
$hexstr .= unpack "h3", $bv;
}
# print "hexstr=$hexstr; @b64s\n";
#
# NOTE on padding
# if we pad 4 0-bits, we need to know that there wasn't an actual 0
# on the input string (hexstr).
#
# since padding 4 0's is more common than having the last hex
# be a 0, we could append a special char indicating last 4 0 bits
# were not padding 0's.
#
# But, we will customize these functions for razor2's needs.
# 64-char hex string ==> 43-char b64 string ==> 66-char hex. truncate.
# 40-char hex string ==> 27-char b64 string ==> 42-char hex. truncate.
# 15-char hex string ==> 10-char b64 string ==> 15-char hex. ok.
#
# 20-byte hex string is 40 chars
# $hexstr = substr($hexstr, 0, 20) if (length $hexstr == 21) && (substr($hexstr, -1) eq '0');
# $hexstr = substr($hexstr, 0, 40) if (length $hexstr == 42) && (substr($hexstr, -2) eq '00');
# $hexstr = substr($hexstr, 0, 64) if (length $hexstr == 66) && (substr($hexstr, -2) eq '00');
$hexstr = substr( $hexstr, 0, 40 ) if ( length($hexstr) == 42 );
$hexstr = substr( $hexstr, 0, 64 ) if ( length($hexstr) == 66 );
return $hexstr;
}
# can be called 2 ways
# - makesis(%hash) aka makesis( p => 0, cf => 95 )
# - makesis($hashref) aka makesis({p => 0, cf => 95})
sub makesis {
my $first = shift;
my $data;
if ( ref($first) eq 'HASH' ) {
$data = $first;
}
else {
$data = { $first, @_ };
}
my $sis = '';
foreach ( sort keys %$data ) {
$sis .= "$_=" . ( exists $data->{$_} ? uri_escape( $data->{$_} ) : '' ) . '&';
}
# This is 10x faster than the equivalent regex version.
return substr( $sis, 0, length($sis) - 1 ) . "\r\n";
}
sub parsesis {
my $query = $_[1] || {};
my $wantref = 1 if $_[1];
# Parse the query.
$_[0] =~ s/\n$//; # SIS shouldn't have this!
$_[0] =~ s/\r$//; # SIS shouldn't have this!
my @pairs = split /\&/, $_[0];
for (@pairs) {
my ( $key, $value ) = split /=/, $_;
$query->{$key} = defined $value ? uri_unescape($value) : '';
}
return $query if $wantref;
return %$query;
}
# version of makesis that doesn't to uri escaping
# for things we know don't require escaping
# can be called 2 ways
# - makesis(%hash) aka makesis( p => 0, cf => 95 )
# - makesis($hashref) aka makesis({p => 0, cf => 95})
sub makesis_nue {
my $first = shift;
my $data;
if ( ref($first) eq 'HASH' ) {
$data = $first;
}
else {
$data = { $first, @_ };
}
my $sis = '';
foreach ( sort keys %$data ) {
$sis .= "$_=";
$sis .= $data->{$_} if exists( $data->{$_} );
$sis .= '&';
}
# This is 10x faster than the equivalent regex version.
return substr( $sis, 0, length($sis) - 1 ) . "\r\n";
}
sub parsesis_nue {
my $query = $_[1] || {};
my $wantref = 1 if $_[1];
# Parse the query.
$_[0] =~ s/\r\n$//;
my @pairs = split /\&/, $_[0];
for (@pairs) {
my ( $key, $value ) = split /=/, $_;
$query->{$key} = $value;
}
return $query if $wantref;
return %$query;
}
sub to_batched_query {
my ( $queries, $bql, $bqs, $novar ) = @_;
my @bqueries;
# Breaks up queries into batches, where batches are limited to:
# - at most $bql lines long --OR--
# - at most $bqs kb in size
# if bqs or bql == 0 or undef, no limit.
#
# fixme - optimization for aggregator:
# sort, so all checks are together, all reports together, etc.
# problem is user will want to maintain array order
# $queries is array ref of either:
# strings - sis, ready to go
# hash ref - need to create sis
# my $q = ref($queries->[0]) eq 'HASH' ? makesis_batch($queries) : $queries;
# for right now, we'll just assume hash ref
return unless ref( $queries->[0] ) eq 'HASH';
my $last;
my $line;
my $linecnt = 0;
my $batchmode = 0;
foreach my $cur (@$queries) {
# my $dobj = debugobj($cur); print "dbg-doing obj: $dobj\n";
#
# handle cases where we submit email blob (message = * )
#
if ( exists $cur->{message} ) {
my $msg = $cur->{message};
delete $cur->{message};
$line = "-" . makesis($cur);
$cur->{message} = $msg;
$line =~ s/\r\n$//s;
$line .= "&message=*\r\n$msg\r\n.\r\n";
push @bqueries, $line;
next;
}
unless ($last) {
#
# start beginning of new batch
#
$last = $cur;
next;
}
unless ($batchmode) {
#
# line after beginning of new batch
# if similar, start variable batchmode.
# if not, start batchmode without variables
#
my ( $both, $diff ) = findsimilar( $last, $cur );
if ( $diff && !$novar ) {
$batchmode = 2;
$line = "-" . makesis_nue($both);
# fixme - we might want to uri_escape()
# but everything should be alphanum or our uri-safe base64
$line .= join( ",", map "$last->{$_}", @$diff ) . "\r\n";
$line .= join( ",", map "$cur->{$_}", @$diff ) . "\r\n";
$last = $both; # last is now 'template'
$linecnt = 2;
}
else {
$batchmode = 1;
$line = "-" . makesis($last);
$line .= makesis_nue($cur);
$linecnt = 2;
}
next;
}
else {
#
# We're in batchmode.
# end if batch maxed out (bqs or bql reached)
# end if batchmode with variables and cur doesn't match
# end batch
#
my ( $both, $diff ) = findsimilar( $last, $cur ) if ( $batchmode == 2 );
if ( ( $bqs && ( length($line) > ( $bqs * 1024 ) ) )
|| ( $bql && ( $linecnt >= $bql ) )
|| ( $batchmode == 2 && !$diff ) ) {
$batchmode = 0;
$line .= ".\r\n";
push @bqueries, $line;
$last = $cur;
}
else {
#
# fixme - we might go passed bqs by a little bit. prolly ok.
#
if ( $batchmode == 2 ) {
$line .= join( ",", map "$cur->{$_}", @$diff ) . "\r\n";
}
else {
$line .= makesis_nue($cur);
}
$linecnt++;
}
}
}
if ($batchmode) {
$line .= ".\r\n";
push @bqueries, $line;
}
elsif ($last) {
$line = makesis($last);
push @bqueries, $line;
}
return \@bqueries;
}
# compares keys in hash ref's a & b
#
# return
# if both hashes have different keys
#
# return (1)
# if both hashes have same keys and values,
#
# returns 2 refs
# if both hashes have same keys but different values
# - first is hash, copy of a & b where vals are same.
# where vals are diff, keys are copied with val = '?'
# - second is list contains keys where values are different
sub findsimilar {
my ( $a, $b ) = @_;
my @diffvalues = ();
my %samevalues = ();
foreach ( sort keys %$a ) {
return unless exists $b->{$_};
if ( $b->{$_} eq $a->{$_} ) {
$samevalues{$_} = $a->{$_};
}
else {
$samevalues{$_} = "?";
push @diffvalues, $_;
}
}
foreach ( sort keys %$b ) {
return unless exists $a->{$_};
}
# if too hashes are exactly the same, not sure.
# treat as if they are totally different.
return (1) unless scalar(@diffvalues) > 0;
return ( \%samevalues, \@diffvalues );
}
sub from_batched_query {
my ($queries) = @_;
my @queries;
my ( $fq, $rq ) = $queries =~ m:^\-(.*?)\r\n(.*)$:sm;
unless ( $fq && $rq ) {
# allow from_batched_query to handle non-batches
$fq = $queries;
$rq = "";
}
if ( $fq =~ m:\?: ) {
my %template_query = ();
my @seq = ();
my @pairs = split /\&/, $fq;
for (@pairs) {
my ( $key, $value ) = split /=/, $_;
if ( $value eq "?" ) {
push @seq, $key;
}
else {
$template_query{$key} = $value ? uri_unescape($value) : '';
}
}
for ( split /\r\n/, $rq ) {
my @values = split /,/, $_;
my %foo = %template_query;
@foo{@seq} = @values;
push @queries, \%foo;
}
return undef unless @queries;
}
elsif ( $fq =~ m:\*: ) {
my %query = parsesis($fq);
for ( keys %query ) {
if ( $query{$_} eq "*" ) {
$query{$_} = $rq;
last;
}
}
push @queries, \%query;
}
else {
# Don't split $queries. Use $fq and $rq instead since
# $fq is already normalized.
my %q = parsesis($fq);
push @queries, \%q;
for ( split /\r\n/, $rq ) {
my %q = parsesis($_);
push @queries, \%q;
}
}
return \@queries;
}
sub randstr {
my $size = shift;
my $alphanum = shift;
my $str;
$alphanum = 1 if !defined($alphanum);
# ASCII
# 33-126 printable chars
# 48-57 numbers
# 65-90 uppercase alpha
# 97-122 lowercase alpha
while ( $size-- ) {
if ($alphanum) {
$str .= $b64table{ int( rand 64 ) };
}
else {
$str .= chr( int( rand 94 ) + 33 );
}
}
return $str;
}
sub escape_smtp_terminator {
my ($textref) = @_;
$$textref =~ s/\r\n\./\r\n\.\./gm;
}
sub unescape_smtp_terminator {
my ($textref) = @_;
$$textref =~ s/\r\n\.\./\r\n\./gm;
}
sub hex_dump {
my $string = shift;
for ( split //, $string ) {
print ord($_) . " ";
}
print "\n";
}
sub hash2str {
my $href = shift;
my %hash = %$href;
my ( $str, $key );
for $key ( keys %hash ) {
my $tstr;
if ( ref $hash{$key} eq 'ARRAY' ) {
for ( @{ $hash{$key} } ) { $tstr .= escape($_) . "," }
$str =~ s/,$//;
}
elsif ( !( ref $hash{$key} ) ) {
$tstr .= escape( $hash{$key} );
}
if ($tstr) { $str .= "$key:$tstr&" }
}
$str =~ s/&$//;
return $str;
}
sub str2hash {
my $str = shift;
my %hash;
my @pairs = split /(?<!\\)&/, $str;
for (@pairs) {
my ( $key, $data ) = split /(?<!\\):/, $_, 2;
if ( $data =~ /(?<!\\),/ ) {
my @list = split /(?<!\\),/, $data;
for (@list) { $_ = unescape($_) }
$hash{$key} = [@list];
}
else { $hash{$key} = unescape($data) }
}
return \%hash;
}
#
# If body of an email has mime attachments, the headers
# will indicate this. likewise, each mime attachment
# could also have nested mime attachments with headers that
# must indicate this. standard recursion.
#
# However, all 'leaf node' attachments don't have to have
# headers based on RFC xxx. They must be created before
# sending to razor servers.
#
#
# Example of mail with nested MIME attachments:
#
# level 1 level 2 level 3 ....
# --------------------------------------
# * - Header 1
# * - Body 1
# * -- A -- mime-header 2A
# | mime-body 2A
# | |
# | * ---- a --- mime-header 3a
# | | mime-body 3a
# | |
# | * -----b --- mime-header 3b
# | mime-body 3b
# * -- B -- mime-body 2B
# |
# * -- C -- mime-header 2C
# |
# * ---- c -- mime-body 3c
# |
# * ---- d -- mime-header 3d
# mime-body 3d
#
# should be reported as
#
# Header 1 \r\n
# part 1 = p(header 3a, body 3a) \r\n
# part 2 = p(header 3b, body 3b) \r\n
# part 3 = p(<generated header 2B>, body 2B) \r\n
# part 4 = p(<generated header 3c>, body 3c) \r\n
# part 5 = p(header 3d, body 3d) \r\n
# .\r\n
#
# Notes:
# - Order of parts does not matter.
#
# - Each part is processed by prep_mail, p(), before report/check
#
# - Except for original Header everything but leaf nodes
# are discarded. In the above example,
#
# Body 1, header 2A, header 2C - are discarded
#
#
# Detailed Explanation:
#
# Header 1 says 'Content-Type: multipart' with boundary definition
# Based on the Boundary, Body 1 is split into A, B, C.
#
# A is analyzed, has headers which also say 'Content-Type: multipart'
# with a different boundary, and it is split into 3a, 3b. 2A is what
# appears between header 2a and first boundary, so its ignored.
# 3a and 3b both have header info, so they are sent thru prep_mail
# and reported/checked
#
# <generated header 2B> is based on Header 1 to determine content
# type. if unknown, dummy header is added,
# and both are reported as a body part
#
# C is analyzed, has headers which also say 'Content-Type: multipart'
# with a different boundary, and it is split into 3c, 3d.
#
# <generated header 3c> is based on header 2c to determine content
# type. if unknown, dummy header is added,
# and both are reported as a body part
#
# 3d has header info, so header+body are sent thru prep_mail
# and reported/checked
#
#
#
# prep_mail() basically truncates msgs that are too big and/or
# base64 encodes binaries or 8-bit msgs.
#
# Split mime splits up multi-part mime mails.
#
# returns array of parts, where each part is
# headers\n\nbody
#
# headers will only contain X-Razor2 and Content- headers
#
# If not a mime mail, and the headers do not have any
# Content-* headers, then the only headers will be X-Razor2 ones
# (perhaps create Content-Type in da future?)
#
# body can be blank. nuked in prep_part
#
sub split_mime {
my ( $mailref, $ver, $recursive, $debug ) = @_;
return unless ref($mailref);
# mime-bodies must have header or initial blank lines.
#
my ( $hdr, $body ) = split /\n\r*\n/, $$mailref, 2;
my $no_valid_mime_hdr = 0;
unless ($body) {
# no blank lines, definately no header, so no nested mimes
print "split_mime: no blank lines\n" if $debug > 1;
$no_valid_mime_hdr = 1;
}
# fixme - handle attachments? i.e. if header has this
# Content-Disposition: attachment
# than body is mail, we could recursively call ourselves
# again with body (check body for hdrs first?)
# Make sure $hdr is really a hdr
#
# Details: If mime part is not RFC compliant, it could just
# be a body with blank lines. hdr could have just matched part
# of the body.
#
# valid mime header is determined by existance of 'Content-Type'
# If we're not recursive, we don't check orig_headers, we assume its ok.
# not sure if this is the best way ...
#
if ( $recursive && ( $hdr !~ /^Content-Type:/i ) ) {
$no_valid_mime_hdr = 1;
print "uh-oh, bad mime-body len=" . length($$mailref) . ":\n$$mailref\n" if $debug;
#print "split_mime: recur=($recursive)\n";
}
if ($no_valid_mime_hdr) {
#
# create dummy header and return it
#
# $ver should be '1' or client name + version
my $mimepart = "X-Razor2-Agent: $ver\n";
my $hrdlen = length($mimepart);
# if it has initial blank line, hurray for rfc compliance
if ( $$mailref =~ /^\n/ ) {
$mimepart .= $$mailref;
}
else {
$mimepart .= "\n" . $$mailref;
}
print "split_mime: returning total_len=" . length($mimepart) . "; hdrs=" . $hdrlen . ", body=" . length($$mailref) . "\n" if $debug;
return ( \$mimepart );
}
#
# Now we split mailref into hdr and body
# check hdr for nested mime (boundary)
#
my $orig_hdr = $hdr;
$hdr =~ s/\n\s+//sg; # merge multi-line headers
# nuke everything but X-Razor2 and Content-* headers
my $trimmed_hdr = "";
foreach ( split '\n', $hdr ) {
/^Content-/i and $trimmed_hdr .= "$_\n";
/^X-Razor2/i and $trimmed_hdr .= "$_\n";
}
my $boundary = "";
if ( $trimmed_hdr =~ /Content-Type: multipart.+boundary=("[^"]+"|\S+)/ig ) {
$boundary = $1;
}
if ( $boundary eq "" ) {
#
# valid mime hdr, but no nested mime.
# add razor hdr and return.
#
print "split_mime: valid_mime_hdr [len=" . length($orig_hdr) . "], but no nested mime\n$orig_hdr\n" if $debug > 1;
$trimmed_hdr = "X-Razor2-Agent: $ver\n" . $trimmed_hdr;
my $mimepart = "$trimmed_hdr\n$body";
print "split_mime: returning total=" . length($mimepart) . "; hdrs=" . length($trimmed_hdr) . ", body=" . length($body) . "\n" if $debug;
return ( \$mimepart );
}
$boundary = $1 if $boundary =~ /^"(.*)"$/;
# At this point, we know body has mime parts.
#
my @mimeparts;
#
# According to RFC 1341
# http://www.w3.org/Protocols/rfc1341/7_2_Multipart.html
#
# mimes are separated by \n--boundary\n
# and are followed immediately by header, blank line, body;
# or blank line and body.
#
# if no header in mime part, default content type for mime body is
# based on header where 'Content-Type: multipart*' was defined, where
# multipart/digest --> message/rfc822
# multipart/* --> text/plain
# perhaps we should add a header if none present?
#
# if a body contains mimes, the 'preable', or stuff before
# the first boundary, and the 'epilogue', the stuff after the
# last boudary, are to be ignored.
#
# NOTE: We split up multiparts, but content-type's can also be
# nested. i.e, a header of 'Content-Type: message' can have a body
# of 'Content-Type: image'
#
$body =~ s/\n\Q--$boundary--\E.*$//sg; # trash last boundary and epilogue
if ( $body =~ /^\Q--$boundary\E\r*\n/ ) {
# bug in some mails, make it RFC compliant
# now our split will work correctly
print "bad mime body [len=" . length($body) . "], not doing \\n--boundary, fixed tho.\n" if $debug > 1;
$body = "garbage\n$body";
}
my @tmpparts;
unless ( $body =~ /\Q--$boundary\E/ ) {
# Sometimes there's a boundary in the headers
# but none in email. In such cases, we'll
# treat the entire body as a part.
push @tmpparts, "garbage";
push @tmpparts, $body;
}
else {
@tmpparts = split /\n\Q--$boundary\E\r*\n/, $body;
}
shift @tmpparts; # trash everything up to the first boundary;
foreach (@tmpparts) {
# perhaps we should add a header based on default content-type?
unless (/\S/s) {
print "skipping body part containing only whitespace [len=" . length($_) . "]\n" if $debug;
next;
}
print "boundary: " . $recursive . "$boundary\n" if $debug > 1;
push @mimeparts, split_mime( \$_, $ver, " " . $recursive, $debug );
}
print "Saweeet!!! Boundary (" . scalar(@mimeparts) . "): $boundary\n" if defined($boundary) && ( $debug > 1 );
return @mimeparts;
}
# mailref is not modified by this sub
#
sub prep_part {
my ( $mailref, $maxheader, $maxbody ) = @_;
#print "[". length($$mailref) ."] maxsize=$maxheader + $maxbody\n";
my ( $hdr, $body ) = split /\n\r*\n/, $$mailref, 2;
$hdr .= "\n"; # put newline back on last header line
unless ($body) {
#
# fixme - this should not happen.
# if it does, split_mime needs work
#
# print "prep_part got F**KED-up mimepart [len=". length($$mailref) ."]\n$$mailref\n";
return; # body is empty
}
# fixme - are these the best chars to check for binary?
my $is_binary = ( $hdr =~ /^Content-Type-Encoding: 8-bit/ )
|| ( $body =~ /([\x00-\x1f|\x7f-\xff])/ and $1 !~ /[\r\n\t]/ );
my $enBase64 = new Razor2::Preproc::enBase64;
$is_binary = $enBase64->isit($mailref);
$enBase64->doit( \$body ) if $is_binary;
$body =~ s/\r+\n/\n/sg; # outlook sometimes does \r\r\n
$hdr =~ s/\r+\n/\n/sg;
if ( ( my $len = length($body) ) > $maxbody ) {
$body = substr $body, 0, $maxbody;
substr( $body, -2 ) = "==" if $is_binary;
$hdr = "X-Razor2-Origlen-Body: $len\n" . $hdr;
#print "maxbody=$maxbody body went from $len to ". length($body) ."\n";
}
if ( ( my $len = length($hdr) ) > $maxheader ) {
$hdr = "X-Razor2-Origlen-Header: $len\n" . $hdr;
if ( length($hdr) > $maxheader ) {
$hdr = substr $hdr, 0, $maxheader;
$hdr =~ s/([^\n]+)$//s; # remove last, incomplete line
}
# print "maxhdr=$maxheader header went from $len to ". length($hdr) ."\n";
}
my $dude = "$hdr\n$body";
return $mailref if $dude eq $$mailref; # this happens majority of the time
return \$dude;
}
# NOTE: Important function!
# *must* be kept in sync with server and all clients
# same holds true for prep_part()
#
# This is the preprocessing done on a mail before sent over network
#
sub prep_mail {
my ( $mailref, $report_headers, $maxheader, $maxbody, $maxorighdr, $versionstring, $debug ) = @_;
return unless ref($mailref);
print " prep_mail: orig=" . length($$mailref) . "\n" if ( $debug > 1 );
my ($orig_hdr) = split /\n\r*\n/, $$mailref, 2;
$orig_hdr .= "\n"; # put newline back on last header line
my $ver = $versionstring || 1;
my @mimeparts = split_mime( $mailref, $ver, 0, $debug );
my @mimeparts_prep;
foreach (@mimeparts) {
push @mimeparts_prep, prep_part( $_, $maxheader, $maxbody );
}
unless ($report_headers) {
my $hdr = "X-Razor2-Headers-Suppressed: 1\n";
foreach ( split '\n', $orig_hdr ) {
/^Content-/i and $hdr .= "$_\n";
/^X-Razor2/i and $hdr .= "$_\n";
}
$orig_hdr = $hdr;
}
if ( ( my $len = length($orig_hdr) ) > $maxorighdr ) {
$hdr = "X-Razor2-Origlen-Header: $len\n" . $orig_hdr;
if ( length($hdr) > $maxorighdr ) {
$hdr = substr $hdr, 0, $maxorighdr;
$hdr =~ s/([^\n]+)$//s; # remove last, incomplete line
}
#print "max=$maxorighdr orig_header went from $len to ". length($hdr) ."\n";
$orig_hdr = $hdr;
}
if ( $debug > 1 ) {
print "**** prep_mail done: headers=" . length($orig_hdr);
foreach ( 0 .. $#mimeparts_prep ) {
print "\n**** mail $_ [" . length( ${ $mimeparts_prep[$_] } ) . "] " . substr( ${ $mimeparts_prep[$_] }, 0, 40 );
}
print "\n\n";
}
return ( \$orig_hdr, @mimeparts_prep );
}
# from MIME::Parser
#my $parser = new MIME::Parser;
#my $entity = $parser->parse($body);
# foreach (dump_entity($entity))
sub dump_entity {
my $ent = shift;
my @parts = $ent->parts;
if (@parts) { # multipart...
map { dump_entity($_) } @parts;
}
else { # single part...
return ( $ent->body ); # return text blob
print " Part: ", $ent->bodyhandle->path, " (", scalar( $ent->head->mime_type ), ")\n";
}
}
# input: hex string ("2D")
# output: hash ref or array containg bits that are set
# 2D == (1, 3, 4, 6)
sub hexbits2hash {
my $hex = shift;
my %h;
for ( 0 .. 31 ) {
if ( hex($hex) & ( 2**31 ) >> ( 31 - $_ ) ) { $h{ $_ + 1 } = 1 }
}
return wantarray ? ( sort keys %h ) : \%h;
}
# input: hash ref, array ref, or array containg bits that are set
# output: hex string ("2D")
# 2D == (4, 8, 32)
sub hash2hexbits {
my @bits = @_;
@bits = @{ $bits[0] } if ref( $bits[0] ) eq 'ARRAY';
@bits = ( sort keys %{ $bits[0] } ) if ref( $bits[0] ) eq 'HASH';
my @all;
my $i = 1;
foreach ( sort { $a <=> $b } @bits ) {
while (1) {
push @all, 1 if $_ == $i;
last if $_ == $i;
push @all, 0;
$i++;
}
}
my $bs = join '', reverse @all;
# fixme needs testing
my $hex = ( unpack "H*", pack "B*", join '', reverse @all );
return $hex;
}
# for debugging - dumps a obj to a string
sub debugobj {
my ( $obj, $prefix, $maxwidth ) = @_;
$maxwidth ||= 70;
return if ( defined($prefix) && length($prefix) > $maxwidth );
my $line = "";
$prefix .= " " x 4;
if ( my $r = ref($obj) ) {
if ( $r eq 'HASH' ) {
$line = "$r - $obj," . scalar( keys %$obj ) . " keys\n";
foreach ( sort keys %$obj ) {
$line .= "$prefix$_ => " . debugobj( $obj->{$_}, $prefix );
}
$line .= $prefix . "[empty]\n" unless ( keys %$obj );
}
elsif ( $r eq 'ARRAY' ) {
$line = "$r - $obj," . scalar(@$obj) . " items\n";
foreach (@$obj) {
$line .= $prefix . debugobj( $_, $prefix );
}
$line .= $prefix . "[empty]\n" unless (@$obj);
}
elsif ( $r eq 'REF' ) {
$line = "$r - $obj\n";
$line .= $prefix . debugobj( $$obj, $prefix );
}
elsif ( $r eq 'SCALAR' ) {
$line = "$r - $obj\n";
$line .= $prefix . debugobj( $$obj, $prefix );
}
}
else {
if ( defined $obj ) {
$line = $1 if substr( $obj, 0, $maxwidth - length($prefix) ) =~ /^([^\n]+)/;
$line = "[length=" . length($obj) . "] " . $line
if ( length($line) ne length($obj) );
}
else {
$line = "[empty]";
}
$line .= "\n";
}
return $line;
}
sub clean_body {
my ( $self, $bodyref ) = @_;
my $hasheaders = 1;
if ( $self->{preprocs}->{deBase64}->isit($bodyref) ) {
$self->{preprocs}->{deBase64}->doit($bodyref);
$hasheaders = 0;
}
if ( $self->{preprocs}->{deQP}->isit($bodyref) ) {
$self->{preprocs}->{deQP}->doit($bodyref);
$hasheaders = 0;
}
if ( $self->{preprocs}->{deHTML}->isit($bodyref) ) {
$self->{preprocs}->{deHTML}->doit($bodyref);
}
if ($hasheaders) {
$$bodyref =~ s/^.*?\n\n//s;
}
}
sub round {
my $float = shift;
return sprintf( "%.0f", $float );
}
sub fisher_yates_shuffle {
my $deck = shift; # $deck is a reference to an array
my $i = @$deck;
while ( $i-- ) {
my $j = int rand( $i + 1 );
@$deck[ $i, $j ] = @$deck[ $j, $i ];
}
}
1;