#!/pro/bin/perl
use strict;
use warnings;
our $VERSION = "1.02 - 20190305";
sub usage {
my $err = shift and select STDERR;
print "usage: csvdiff [--no-color] [--html] [-w|-b|-Z] file.csv file.csv\n",
" provides colorized diff on sorted CSV files\n",
" assuming first line is header and first field is the key\n",
" --no-color do not use colors\n",
" -h --html produce HTML output\n",
" -w --ignore-all-space ignore all whistespace is all fields\n",
" -b --ignore-space-change ignore whitespace length changes\n",
" -Z --ignore-trailing-space ignore trailing whitespace per field\n",
" -o F --output=F send output to file F\n";
exit $err;
} # usage
use Getopt::Long qw(:config bundling nopermute );
my $opt_c = !$ENV{NO_COLOR};
GetOptions (
"help|?" => sub { usage (0); },
"V|version" => sub { print "csvdiff [$VERSION]\n"; exit 0 },
"w|ignore-all-space!" => \my $opt_w,
"b|ignore-ws|ignore-space-change!" => \my $opt_b,
"Z|ignore-trailing-space!" => \my $opt_Z,
"c|color|colour!" => \ $opt_c,
"h|html" => \my $opt_h,
"o|output=s" => \my $opt_o,
) or usage (1);
@ARGV == 2 or usage (1);
if ($opt_o) {
open STDOUT, ">", $opt_o or die "$opt_o: $!\n";
}
use HTML::Entities;
use Term::ANSIColor qw(:constants);
use Text::CSV_XS;
my $csv = Text::CSV_XS->new ({ binary => 1, auto_diag => 0 });
if ($opt_h) {
binmode STDOUT, ":encoding(utf-8)";
print <<EOH;
<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
<head>
<title>CFI School updates</title>
<meta name="Generator" content="perl $]" />
<meta name="Author" content="@{[scalar getpwuid $<]}" />
<meta name="Description" content="CSV diff @ARGV" />
<style type="text/css">
.rd { background: #ffe0e0; }
.gr { background: #e0ffe0; }
.hd { background: #e0e0ff; }
.b0 { background: #e0e0e0; }
.b1 { background: #f0f0f0; }
.r { color: red; }
.g { color: green; }
</style>
</head>
<body>
<h1>CSV diff @ARGV</h1>
<table>
EOH
$::{RED} = sub { "\cA\rr"; };
$::{GREEN} = sub { "\cA\rg"; };
$::{RESET} = sub { ""; };
}
elsif (!$opt_c) {
$::{$_} = sub { "" } for qw( RED GREEN RESET );
}
my @f;
my $opt_n = 1;
foreach my $x (0, 1) {
open my $fh, "<", $ARGV[$x] or die "$ARGV[$x]: $!\n";
my $n = 0;
while (1) {
my $row = $csv->getline ($fh) or last;
@$row and push @{$f[$x]}, $row;
$n++ && $row->[0] =~ m/\D/ and $opt_n = 0;
}
}
my @n = map { $#{$f[$_]} } 0, 1;
my @i = (1, 1);
my $hdr = "# csvdiff < $ARGV[0] > $ARGV[1]\n";
$f[$_][1+$n[$_]][0] = $opt_n ? 2147483647 : "\xff\xff\xff\xff" for 0, 1;
my %cls;
%cls = (
"b" => 0,
"-" => sub { "rd" },
"+" => sub { "gr" },
"H" => sub { "hd" },
"<" => sub { $cls{b} ^= 1; "b$cls{b}" },
">" => sub { "b$cls{b}" },
);
sub show {
my ($pfx, $x) = @_;
my $row = $f[$x][$i[$x]++] or return;
if ($opt_h) {
my $bg = $cls{$pfx}->();
print qq{ <tr class="$bg">},
(map{"<td".(s/^\cA\r([gr])//?qq{ class="$1"}:"").">$_</td>"}@$row),
"</tr>\n";
return;
}
print $hdr, $pfx, " ", $pfx eq "-" ? RED : $pfx eq "+" ? GREEN : "";
$csv->print (*STDOUT, $row);
print RESET, "\n";
$hdr = "";
} # show
# Skip first line of both are same: it probably is a header
my @h0 = @{$f[0][0]};
my @h1 = @{$f[1][0]};
if ("@h0" eq "@h1") {
if ($opt_h) {
$i[0]--;
show ("H", 0);
}
shift @{$f[0]};
shift @{$f[1]};
}
my $x = 0;
while ($i[0] <= $n[0] || $i[1] <= $n[1]) {
my @r0 = @{$f[0][$i[0]]};
my @r1 = @{$f[1][$i[1]]};
if ($opt_n) {
$r0[0] < $r1[0] and show ("-", 0), next;
$r0[0] > $r1[0] and show ("+", 1), next;
}
else {
$r0[0] lt $r1[0] and show ("-", 0), next;
$r0[0] gt $r1[0] and show ("+", 1), next;
}
my @v0 = @r0;
my @v1 = @r1;
$opt_Z and s/[\r\n\s]+\z// for @v0, @v1;
$opt_b and s/[\r\n\s]+/ /g for @v0, @v1;
$opt_w and s/[\r\n\s]+//g for @v0, @v1;
{ local $" = "\xFF";
"@v0" eq "@v1" and $i[0]++, $i[1]++, next;
}
foreach my $c (1 .. $#h0) {
my ($L, $R) = map { $_ // "" } $r0[$c], $r1[$c];
$L eq $R and next;
$f[0][$i[0]][$c] = RED . $L . RESET;
$f[1][$i[1]][$c] = GREEN . $R . RESET;
}
show ("<", 0);
show (">", 1);
}
$opt_h and print " </table>\n</body>\n</html>\n";
close STDOUT;