shell bypass 403
package Spreadsheet::XLSX;
use 5.006000;
use strict;
use warnings;
use base 'Spreadsheet::ParseExcel::Workbook';
our $VERSION = '0.18';
use Archive::Zip;
use Spreadsheet::ParseExcel;
use Spreadsheet::XLSX::Fmt2007;
################################################################################
sub new {
my ($class, $filename, $converter) = @_;
my %shared_info; # shared_strings, styles, style_info, rels, converter
$shared_info{converter} = $converter;
my $self = bless Spreadsheet::ParseExcel::Workbook->new(), $class;
my $zip = __load_zip($filename);
$shared_info{shared_strings}= __load_shared_strings($zip, $shared_info{converter});
my ($styles, $style_info) = __load_styles($zip);
$shared_info{styles} = $styles;
$shared_info{style_info} = $style_info;
$shared_info{rels} = __load_rels($zip);
$self->_load_workbook($zip, \%shared_info);
return $self;
}
sub _load_workbook {
my ($self, $zip, $shared_info) = @_;
my $member_workbook = $zip->memberNamed('xl/workbook.xml') or die("xl/workbook.xml not found in this zip\n");
$self->{SheetCount} = 0;
$self->{FmtClass} = Spreadsheet::XLSX::Fmt2007->new;
$self->{Flg1904} = 0;
if ($member_workbook->contents =~ /date1904="1"/) {
$self->{Flg1904} = 1;
}
foreach ($member_workbook->contents =~ /\<(.*?)\/?\>/g) {
/^(\w+)\s+/;
my ($tag, $other) = ($1, $');
my @pairs = split /\" /, $other;
$tag eq 'sheet' or next;
my $sheet = {
MaxRow => 0,
MaxCol => 0,
MinRow => 1000000,
MinCol => 1000000,
};
foreach ($other =~ /(\S+=".*?")/gsm) {
my ($k, $v) = split /=?"/; #"
if ($k eq 'name') {
$sheet->{Name} = $v;
$sheet->{Name} = $shared_info->{converter}->convert($sheet->{Name}) if defined $shared_info->{converter};
} elsif ($k eq 'r:id') {
$sheet->{path} = $shared_info->{rels}->{$v};
}
}
my $wsheet = Spreadsheet::ParseExcel::Worksheet->new(%$sheet);
$self->{Worksheet}[$self->{SheetCount}] = $wsheet;
$self->{SheetCount} += 1;
}
foreach my $sheet (@{$self->{Worksheet}}) {
my $member_sheet = $zip->memberNamed("xl/$sheet->{path}") or next;
my ($row, $col);
my $parsing_v_tag = 0;
my $s = 0;
my $s2 = 0;
my $sty = 0;
foreach ($member_sheet->contents =~ /(\<.*?\/?\>|.*?(?=\<))/g) {
if (/^\<c\s*.*?\s*r=\"([A-Z])([A-Z]?)(\d+)\"/) {
($row, $col) = __decode_cell_name($1, $2, $3);
$s = m/t=\"s\"/ ? 1 : 0;
$s2 = m/t=\"str\"/ ? 1 : 0;
$sty = m/s="([0-9]+)"/ ? $1 : 0;
} elsif (/^<v>/) {
$parsing_v_tag = 1;
} elsif (/^<\/v>/) {
$parsing_v_tag = 0;
} elsif (length($_) && $parsing_v_tag) {
my $v = $s ? $shared_info->{shared_strings}->[$_] : $_;
if ($v eq "</c>") {
$v = "";
}
my $type = "Text";
my $thisstyle = "";
if (not($s) && not($s2)) {
$type = "Numeric";
if (defined $sty && defined $shared_info->{styles}->[$sty]) {
$thisstyle = $shared_info->{style_info}->{$shared_info->{styles}->[$sty]};
if ($thisstyle =~ /\b(mmm|m|d|yy|h|hh|mm|ss)\b/) {
$type = "Date";
}
}
}
$sheet->{MaxRow} = $row if $sheet->{MaxRow} < $row;
$sheet->{MaxCol} = $col if $sheet->{MaxCol} < $col;
$sheet->{MinRow} = $row if $sheet->{MinRow} > $row;
$sheet->{MinCol} = $col if $sheet->{MinCol} > $col;
if ($v =~ /(.*)E\-(.*)/gsm && $type eq "Numeric") {
$v = $1 / (10**$2); # this handles scientific notation for very small numbers
}
my $cell = Spreadsheet::ParseExcel::Cell->new(
Val => $v,
Format => $thisstyle,
Type => $type
);
$cell->{_Value} = $self->{FmtClass}->ValFmt($cell, $self);
if ($type eq "Date") {
if ($v < 1) { #then this is Excel time field
$cell->{Type} = "Text";
$cell->{Val} = $cell->{_Value};
}
}
$sheet->{Cells}[$row][$col] = $cell;
}
}
$sheet->{MinRow} = 0 if $sheet->{MinRow} > $sheet->{MaxRow};
$sheet->{MinCol} = 0 if $sheet->{MinCol} > $sheet->{MaxCol};
}
return $self;
}
# Convert cell name in the format AA1 to a row and column number.
sub __decode_cell_name {
my ($letter1, $letter2, $digits) = @_;
my $col = ord($letter1) - 65;
if ($letter2) {
$col++;
$col *= 26;
$col += (ord($letter2) - 65);
}
my $row = $digits - 1;
return ($row, $col);
}
sub __load_shared_strings {
my ($zip, $converter) = @_;
my $member_shared_strings = $zip->memberNamed('xl/sharedStrings.xml');
my @shared_strings = ();
if ($member_shared_strings) {
my $mstr = $member_shared_strings->contents;
$mstr =~ s/<t\/>/<t><\/t>/gsm; # this handles an empty t tag in the xml <t/>
foreach my $si ($mstr =~ /<si.*?>(.*?)<\/si/gsm) {
my $str;
foreach my $t ($si =~ /<t.*?>(.*?)<\/t/gsm) {
$t = $converter->convert($t) if defined $converter;
$str .= $t;
}
push @shared_strings, $str;
}
}
return \@shared_strings;
}
sub __load_styles {
my ($zip) = @_;
my $member_styles = $zip->memberNamed('xl/styles.xml');
my @styles = ();
my %style_info = ();
if ($member_styles) {
my $formatter = Spreadsheet::XLSX::Fmt2007->new();
foreach my $t ($member_styles->contents =~ /xf\ numFmtId="([^"]*)"(?!.*\/cellStyleXfs)/gsm) { #"
push @styles, $t;
}
my $default = $1 || '';
foreach my $t1 (@styles) {
$member_styles->contents =~ /numFmtId="$t1" formatCode="([^"]*)/;
my $formatCode = $1 || '';
if ($formatCode eq $default || not($formatCode)) {
if ($t1 == 9 || $t1 == 10) {
$formatCode = '0.00000%';
} elsif ($t1 == 14) {
$formatCode = 'yyyy-mm-dd';
} else {
$formatCode = '';
}
# $formatCode = $formatter->FmtStringDef($t1);
}
$style_info{$t1} = $formatCode;
$default = $1 || '';
}
}
return (\@styles, \%style_info);
}
sub __load_rels {
my ($zip) = @_;
my $member_rels = $zip->memberNamed('xl/_rels/workbook.xml.rels') or die("xl/_rels/workbook.xml.rels not found in this zip\n");
my %rels = ();
foreach ($member_rels->contents =~ /\<Relationship (.*?)\/?\>/g) {
my ($id, $target);
($id) = /Id="(.*?)"/;
($target) = /Target="(.*?)"/;
if (defined $id and defined $target) {
$rels{$id} = $target;
}
}
return \%rels;
}
sub __load_zip {
my ($filename) = @_;
my $zip = Archive::Zip->new();
if (ref $filename) {
$zip->readFromFileHandle($filename) == Archive::Zip::AZ_OK or die("Cannot open data as Zip archive");
} else {
$zip->read($filename) == Archive::Zip::AZ_OK or die("Cannot open $filename as Zip archive");
}
return $zip;
}
1;
__END__
=head1 NAME
Spreadsheet::XLSX - Perl extension for reading MS Excel 2007 files.
=head1 SYNOPSIS
use Text::Iconv;
my $converter = Text::Iconv->new("utf-8", "windows-1251");
# Text::Iconv is not really required.
# This can be any object with the convert method. Or nothing.
use Spreadsheet::XLSX;
my $excel = Spreadsheet::XLSX->new('test.xlsx', $converter);
foreach my $sheet (@{$excel->{Worksheet}}) {
printf("Sheet: %s\n", $sheet->{Name});
$sheet->{MaxRow} ||= $sheet->{MinRow};
foreach my $row ($sheet->{MinRow} .. $sheet->{MaxRow}) {
$sheet->{MaxCol} ||= $sheet->{MinCol};
foreach my $col ($sheet->{MinCol} .. $sheet->{MaxCol}) {
my $cell = $sheet->{Cells}[$row][$col];
if ($cell) {
printf("( %s , %s ) => %s\n", $row, $col, $cell->{Val});
}
}
}
}
=head1 DESCRIPTION
This module is a (quick and dirty) emulation of L<Spreadsheet::ParseExcel> for
Excel 2007 (.xlsx) file format. It supports styles and many of Excel's quirks,
but not all. It populates the classes from L<Spreadsheet::ParseExcel> for interoperability;
including Workbook, Worksheet, and Cell.
=head1 SEE ALSO
=over 2
=item L<Spreadsheet::ParseXLSX>
This module (Spradsheet::XLSX) has some serious issues with the way it uses regexs for parsing the XML.
I would strongly encourage switching to L<Spreadsheet::ParseXLSX> which takes a more reliable approach.
=item L<Text::CSV_XS>, L<Text::CSV_PP>
=item L<Spreadsheet::ParseExcel>
=item L<Spreadsheet::ReadSXC>
=item L<Spreadsheet::BasicRead>
for xlscat likewise functionality (Excel only)
=item Spreadsheet::ConvertAA
for an alternative set of C<cell2cr()> / C<cr2cell()> pair
=item L<Spreadsheet::Perl>
offers a Pure Perl implementation of a
spreadsheet engine. Users that want this format to be supported in
L<Spreadsheet::Read> are hereby motivated to offer patches. It's not high
on my todo-list.
=item xls2csv
L<https://metacpan.org/release/KEN/xls2csv-1.07> offers an alternative for my C<xlscat -c>,
in the xls2csv tool, but this tool focusses on character encoding
transparency, and requires some other modules.
=item L<Spreadsheet::Read>
read the data from a spreadsheet (interface module)
=back
=head1 AUTHOR
Dmitry Ovsyanko, E<lt>do@eludia.ruE<gt>, http://eludia.ru/wiki/
Patches by:
Steve Simms
Joerg Meltzer
Loreyna Yeung
Rob Polocz
Gregor Herrmann
H.Merijn Brand
endacoe
Pat Mariani
Sergey Pushkin
=head1 ACKNOWLEDGEMENTS
Thanks to TrackVia Inc. (http://www.trackvia.com) for paying for Rob Polocz working time.
=head1 COPYRIGHT AND LICENSE
Copyright (C) 2008 by Dmitry Ovsyanko
This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself, either Perl version 5.8.8 or,
at your option, any later version of Perl 5 you may have available.
=cut