#!/pro/bin/perl
# csv2xlsx: Convert csv to xlsx
# (m)'19 [23 Aug 2019] Copyright H.M.Brand 2007-2019
use 5.14.0;
use warnings;
our $VERSION = "1.05";
sub usage {
my $err = shift and select STDERR;
print <<"EOU";
usage: csv2xlsx [-s <sep>] [-q <quot>] [-w <width>] [-d <dtfmt>]
[-o <xlsx>] [file.csv]
-s <sep> use <sep> as seperator char, auto-detect, default = ','
The string "tab" is allowed.
-e <esc> use <esc> as escape char, auto-detect, default = '"'
The string "undef" is allowed.
-q <quot> use <quot> as quotation char, default = '"'
The string "undef" will disable quotation.
-w <width> use <width> as default minimum column width default = 4
-o <xlsx> write output to file named <xlsx>, defaults
to input file name with .csv replaced with .xlsx
if from standard input, defaults to csv2xlsx.xlsx
-F allow formula's. Otherwise fields starting with
an equal sign are forced to string
--Fa=aaa Define formula action: die/croak/diag/empty/undef
--Fd Formula's will cause a die
--Fc Formula's will cause a croak
--FD Formula's will cause a warning (this is the default)
--Fe Formula's will be replaced by the empty string
--Fu Formula's will be replaced with an undefined cell
-f force usage of <xlsx> if already exists (unlink before use)
-d <dtfmt> use <dtfmt> as date formats. Default = 'dd-mm-yyyy'
-C <C:fmt> use <fmt> as currency formats for currency <C>, no default
-D cols only convert dates in columns <cols>. Default is everywhere.
-u CSV is UTF8
-m merge multiple CSV's into a single xlsx (separate sheets)
-o is required, all arguments should be existing files
-v [<lvl>] verbosity (default = 1)
EOU
exit $err;
} # usage
use Getopt::Long qw(:config bundling passthrough);
my $quo = '"';
my $esc = '"';
my $wdt = 4; # Default minimal column width
my $fac = "diag"; # Formula action (default is warn only)
my $dtf = "dd-mm-yyyy"; # Date format
my $crf = ""; # Currency format, e.g.: $:### ### ##0.00
my $opt_v = 1;
my $dtc;
GetOptions (
"help|?" => sub { usage (0); },
"V|version" => sub { say $0 =~ s{.*/}{}r, " [$VERSION]"; exit 0; },
"c|s|sep=s" => \my $sep, # Set after reading first line in attempt to auto-detect
"q|quo=s" => \$quo,
"e|esc=s" => \$esc,
"w|width=i" => \$wdt,
"o|x|out=s" => \my $xls,
"d|date-fmt=s" => \$dtf,
"D|date-col=s" => \$dtc,
"C|curr-fmt=s" => \$crf,
"f|force!" => \my $frc,
"F|formulas!" => \my $frm,
"Fa=s" => \$fac,
"Fd" => sub { $fac = "die"; },
"Fc" => sub { $fac = "croak"; },
"FD" => sub { $fac = "diag"; },
"Fe" => sub { $fac = "empty"; },
"Fu" => sub { $fac = "undef"; },
"u|utf-8|utf8!" => \my $utf,
"m|merge!" => \my $mrg,
"v|verbose:1" => \$opt_v,
) or usage (1);
if ($mrg) {
my @csv;
for (@ARGV) {
if (m/\.xlsx?$/i) {
$xls and usage (1);
$xls = $_;
next;
}
if (m/\.csv$/i && -s) {
push @csv, $_;
next;
}
warn "Argument $_ is not an existing (CSV) file\n";
usage (1);
}
$xls && @csv or usage (1);
@ARGV = @csv;
}
my $base = @ARGV && -f $ARGV[0] ? $ARGV[0] : "csv2xlsx";
$xls ||= $base =~ s/(?:\.csv)?$/.xlsx/ir;
-s $xls && $frc and unlink $xls;
if (-s $xls) {
print STDERR "File '$xls' already exists. Overwrite? [y/N] > N\b";
scalar <STDIN> =~ m/^[yj](?:es|a)?$/i or exit;
}
# Don't split ourselves when modules do it _much_ better, and follow the standards
use Text::CSV_XS;
use Date::Calc qw( Delta_Days Days_in_Month );
use Excel::Writer::XLSX;
use Encode qw( from_to );
my $wbk = Excel::Writer::XLSX->new ($xls);
$dtf =~ s/j/y/g;
my %fmt = (
date => $wbk->add_format (align => "center", num_format => $dtf),
rest => $wbk->add_format (align => "left"),
wrap => $wbk->add_format (text_wrap => 1),
);
$crf =~ s/^([^:]+):(.*)/$1/ and $fmt{currency} = $wbk->add_format (
num_format => "$1 $2",
align => "right",
);
my @args = @ARGV ? @ARGV : ("");
foreach my $csvf (@args) {
my $sheetname = $csvf =~ s{\.csv$}{}ir =~ s{.*/}{}r || "Sheet 1";
($_ = length $sheetname) > 31 and substr $sheetname, 31, $_ - 31, "";
#say "$sheetname (", length ($sheetname), ") << $csvf";
my $wks = $wbk->add_worksheet ($sheetname);
my ($h, $w, @w) = (0, 1); # data height, -width, and default column widths
my $row;
my $firstline;
my $fh;
if (-f $csvf) {
$opt_v and say "Reading $csvf";
open $fh, "<", $csvf or die "$csvf: $!\n";
}
else {
$opt_v and say "Reading STDIN";
$fh = *ARGV;
}
my $Sep = $sep;
unless ($Sep) { # No sep char passed, try to auto-detect;
while (<$fh>) {
m/\S/ or next; # Skip empty leading blank lines
$Sep = # start auto-detect with quoted strings
m/["\d];["\d;]/ ? ";" :
m/["\d],["\d,]/ ? "," :
m/["\d]\t["\d,]/ ? "\t" :
# If neither, then for unquoted strings
m/\w;[\w;]/ ? ";" :
m/\w,[\w,]/ ? "," :
m/\w\t[\w,]/ ? "\t" :
# And pipes (lowest prio)
m/["\d]\|["\d,]/ ? "|" :
m/\w\|[\w,]/ ? "|" :
m/,/ && !m/[;\t]/ ? "," :
m/;/ && !m/[,\t]/ ? ";" :
m/\t/ && !m/[,;]/ ? "\t" :
"," ;
$firstline = $_;
last;
}
$firstline or die "The sourcefile does not contain any usable data\n";
}
my $csv = Text::CSV_XS-> new ({
sep_char => $Sep eq "tab" ? "\t" : $Sep,
quote_char => $quo eq "undef" ? undef : $quo,
escape_char => $esc eq "undef" ? undef : $esc,
binary => 1,
keep_meta_info => 1,
auto_diag => 1,
formula => $fac,
});
if ($firstline) {
$csv->parse ($firstline) or die $csv->error_diag ();
$row = [ $csv->fields ];
}
if ($opt_v > 3) {
foreach my $k (qw( sep_char quote_char escape_char )) {
my $c = $csv->$k () || "undef";
$c =~ s/\t/\\t/g;
$c =~ s/\r/\\r/g;
$c =~ s/\n/\\n/g;
$c =~ s/\0/\\0/g;
$c =~ s/([\x00-\x1f\x7f-\xff])/sprintf"\\x{%02x}",ord$1/ge;
printf STDERR "%-11s = %s\n", $k, $c;
}
}
if (my $rows = $dtc) {
$rows =~ s/-$/-999/; # 3,6-
$rows =~ s/-/../g;
eval "\$dtc = { map { \$_ => 1 } $rows }";
}
while ($row && @$row or $row = $csv->getline ($fh)) {
my @row = @$row;
@row > $w and push @w, ($wdt) x (($w = @row) - @w);
foreach my $c (0 .. $#row) {
my $val = $row[$c] // "";
my $l = length $val;
$l > ($w[$c] // -1) and $w[$c] = $l;
if ($utf and $csv->is_binary ($c)) {
from_to ($val, "utf-8", "ucs2");
$wks->write_unicode ($h, $c, $val);
next;
}
if ($csv->is_quoted ($c)) {
$val =~ s/\r\n/\n/g;
if ($utf) {
from_to ($val, "utf-8", "ucs2");
$val =~ m/\n/
? $wks->write_unicode ($h, $c, $val, $fmt{wrap})
: $wks->write_unicode ($h, $c, $val);
}
else {
$val =~ m/\n/
? $wks->write_string ($h, $c, $val, $fmt{wrap})
: $wks->write_string ($h, $c, $val);
}
next;
}
if (!$dtc or $dtc->{$c + 1}) {
my @d = (0, 0, 0); # Y, M, D
$val =~ m/^(\d{4})(\d{2})(\d{2})$/ and @d = ($1, $2, $3);
$val =~ m/^(\d{2})-(\d{2})-(\d{4})$/ and @d = ($3, $2, $1);
if ( $d[2] >= 1 && $d[2] <= 31 &&
$d[1] >= 1 && $d[1] <= 12 &&
$d[0] >= 1900 && $d[0] <= 2199) {
my $dm = Days_in_Month (@d[0,1]);
$d[2] < 1 and $d[2] = 1;
$d[2] > $dm and $d[2] = $dm;
my $dt = 2 + Delta_Days (1900, 1, 1, @d);
$wks->write ($h, $c, $dt, $fmt{date});
next;
}
}
if ($crf and $val =~ m/^\s*\Q$crf\E\s*([0-9.]+)$/) {
$wks->write ($h, $c, $1 + 0, $fmt{currency});
next;
}
if (!$frm && $val =~ m/^=/) {
$wks->write_string ($h, $c, $val);
}
else {
$wks->write ($h, $c, $val);
}
}
++$h % 100 or $opt_v && printf STDERR "%6d x %6d\r", $w, $h;
} continue { $row = undef }
close $fh;
$opt_v && printf STDERR "%6d x %6d\n", $w, $h;
$wks->set_column ($_, $_, $w[$_]) for 0 .. $#w;
}
$opt_v and say "Writing $xls";
$wbk->close ();