shell bypass 403
package XML::Hash::LX;
use 5.006002;
use strict;
use warnings;
use XML::LibXML ();
use Types::Serialiser;
our $PARSER = XML::LibXML->new();
sub _croak { require Carp; goto &Carp::croak }
sub import {
my $me = shift;
no strict 'refs';
my %e = ( xml2hash => 1, hash2xml => 1, ':inject' => 0 );
if (@_) { %e = map { $_=>1 } @_ }
*{caller().'::xml2hash'} = \&xml2hash if delete $e{xml2hash};
*{caller().'::hash2xml'} = \&hash2xml if delete $e{hash2xml};
if ( delete $e{':inject'} ) {
unless (defined &XML::LibXML::Node::toHash) {
*XML::LibXML::Node::toHash = \&xml2hash;
}
}
_croak "@{[keys %e]} is not exported by $me" if %e;
}
=head1 NAME
XML::Hash::LX - Convert hash to xml and xml to hash using LibXML
=cut
our $VERSION = '0.07';
=head1 SYNOPSIS
use XML::Hash::LX;
my $hash = xml2hash $xmlstring, attr => '.', text => '~';
my $hash = xml2hash $xmldoc;
my $xmlstr = hash2html $hash, attr => '+', text => '#text';
my $xmldoc = hash2html $hash, doc => 1, attr => '+';
# Usage with XML::LibXML
my $doc = XML::LibXML->new->parse_string($xml);
my $xp = XML::LibXML::XPathContext->new($doc);
$xp->registerNs('rss', 'http://purl.org/rss/1.0/');
# then process xpath
for ($xp->findnodes('//rss:item')) {
# and convert to hash concrete nodes
my $item = xml2hash($_);
print Dumper+$item
}
=head1 DESCRIPTION
This module is a companion for C<XML::LibXML>. It operates with LibXML objects, could return or accept LibXML objects, and may be used for easy data transformations
It is faster in parsing then L<XML::Simple>, L<XML::Hash>, L<XML::Twig> and of course much slower than L<XML::Bare> ;)
It is faster in composing than L<XML::Hash>, but slower than L<XML::Simple>
Parse benchmark:
Rate Simple Hash Twig Hash::LX Bare
Simple 11.3/s -- -2% -16% -44% -97%
Hash 11.6/s 2% -- -14% -43% -97%
Twig 13.5/s 19% 16% -- -34% -96%
Hash::LX 20.3/s 79% 75% 51% -- -95%
Bare 370/s 3162% 3088% 2650% 1721% --
Compose benchmark:
Rate Hash Hash::LX Simple
Hash 49.2/s -- -18% -40%
Hash::LX 60.1/s 22% -- -26%
Simple 81.5/s 66% 36% --
Benchmark was done on L<http://search.cpan.org/uploads.rdf>
=head1 EXPORT
C<xml2hash> and C<hash2xml> are exported by default
=head2 :inject
Inject toHash method in the namespace of L<XML::LibXML::Node> and allow to call it on any subclass of L<XML::LibXML::Node> directly
By default is disabled
use XML::Hash::LX ':inject';
my $doc = XML::LibXML->new->parse_string($xml);
my $hash = $doc->toHash(%opts);
=head1 FUNCTIONS
=head2 xml2hash $xml, [ OPTIONS ]
XML could be L<XML::LibXML::Document>, L<XML::LibXML::DocumentPart> or string
=head2 hash2xml $hash, [ doc => 1, ] [ OPTIONS ]
Id C<doc> option is true, then returned value is L<XML::LibXML::Document>, not string
=head1 OPTIONS
Every option could be passed as arguments to function or set as global variable in C<XML::Hash::LX> namespace
=head2 %XML::Hash::LX::X2H
Options respecting convertations from xml to hash
=over 4
=item order [ = 0 ]
B<Strictly> keep the output order. When enabled, structures become more complex, but xml could be completely reverted
=item attr [ = '-' ]
Attribute prefix
<node attr="test" /> => { node => { -attr => "test" } }
=item text [ = '#text' ]
Key name for storing text
<node>text<sub /></node> => { node => { sub => '', '#text' => "test" } }
=item join [ = '' ]
Join separator for text nodes, splitted by subnodes
Ignored when C<order> in effect
# default:
xml2hash( '<item>Test1<sub />Test2</item>' )
: { item => { sub => '', '~' => 'Test1Test2' } };
# global
$XML::Hash::LX::X2H{join} = '+';
xml2hash( '<item>Test1<sub />Test2</item>' )
: { item => { sub => '', '~' => 'Test1+Test2' } };
# argument
xml2hash( '<item>Test1<sub />Test2</item>', join => '+' )
: { item => { sub => '', '~' => 'Test1+Test2' } };
=item trim [ = 1 ]
Trim leading and trailing whitespace from text nodes
=item cdata [ = undef ]
When defined, CDATA sections will be stored under this key
# cdata = undef
<node><![CDATA[ test ]]></node> => { node => 'test' }
# cdata = '#'
<node><![CDATA[ test ]]></node> => { node => { '#' => 'test' } }
=item comm [ = undef ]
When defined, comments sections will be stored under this key
When undef, comments will be ignored
# comm = undef
<node><!-- comm --><sub/></node> => { node => { sub => '' } }
# comm = '/'
<node><!-- comm --><sub/></node> => { node => { sub => '', '/' => 'comm' } }
=item load_ext_dtd [ = 0 ]
Load the external DTD
# load_ext_dtd = 0
<!DOCTYPE foo [<!ENTITY % ent1 SYSTEM "rm -rf /">%ent1; ]><node> text</node>
# load_ext_dtd = 1
<!DOCTYPE foo [<!ENTITY % ent1 SYSTEM "rm -rf /">%ent1; ]><node> text</node>
oops!
=item expand_entities [ = 0 ]
Enable XInclude substitution. (See L<XML::LibXML::Parser>)
=item expand_xinclude [ = 0 ]
Enable entities expansion. (See L<XML::LibXML::Parser>). (Enabling also enables load_ext_dtd)
=item validation [ = 0 ]
Enable validating with the DTD. (See L<XML::LibXML::Parser>)
=item no_network [ = 1 ]
Forbid network access; (See L<XML::LibXML::Parser>)
If true, all attempts to fetch non-local resources (such as DTD or external entities) will fail
=back
=head2 $XML::Hash::LX::X2A [ = 0 ]
Global array casing
Ignored when C<X2H{order}> in effect
As option should be passed as
xml2hash $xml, array => 1;
Effect:
# $X2A = 0
<node><sub/></node> => { node => { sub => '' } }
# $X2A = 1
<node><sub/></node> => { node => [ { sub => [ '' ] } ] }
=head2 %XML::Hash::LX::X2A
By element array casing
Ignored when C<X2H{order}> in effect
As option should be passed as
xml2hash $xml, array => [ nodes list ];
Effect:
# %X2A = ()
<node><sub/></node> => { node => { sub => '' } }
# %X2A = ( sub => 1 )
<node><sub/></node> => { node => { sub => [ '' ] } }
=cut
our $X2A = 0;
our %X2A = ();
our %X2H;
%X2H = (
order => 0,
attr => '-',
text => '#text',
join => '',
trim => 1,
cdata => undef,
comm => undef,
#cdata => '#',
#comm => '//',
load_ext_dtd => 0,
expand_entities => 0,
expand_xinclude => 0,
validation => 0,
no_network => 1,
%X2H, # also inject previously user-defined options
);
sub _x2h {
my $doc = shift;
my $res;
if ($doc->hasChildNodes or $doc->hasAttributes) {
if ($X2H{order}) {
$res = [];
my $attr = {};
for ($doc->attributes) {
#warn " .> ".$_->nodeName.'='.$_->getValue;
$attr->{ $X2H{attr} . $_->nodeName } = $_->getValue;
}
push @$res, $attr if %$attr;
} else {
$res = {};
for ($doc->attributes) {
#warn " .> ".$_->nodeName.'='.$_->getValue;
$res->{ $X2H{attr} . $_->nodeName } = $_->getValue;
}
}
for ($doc->childNodes) {
my $ref = ref $_;
my $nn;
if ($ref eq 'XML::LibXML::Text') {
$nn = $X2H{text}
}
elsif ($ref eq 'XML::LibXML::CDATASection') {
$nn = defined $X2H{cdata} ? $X2H{cdata} : $X2H{text};
}
elsif ($ref eq 'XML::LibXML::Comment') {
$nn = defined $X2H{comm} ? $X2H{comm} : next;
}
else {
$nn = $_->nodeName;
}
my $chld = _x2h($_);
if ($X2H{order}) {
if ($nn eq $X2H{text}) {
push @{ $res }, $chld if length $chld;
} else {
push @{ $res }, { $nn => $chld };
}
} else {
if (( $X2A or $X2A{$nn} ) and !$res->{$nn}) { $res->{$nn} = [] }
if (exists $res->{$nn} ) {
#warn "Append to $res->{$nn}: $nn $chld";
$res->{$nn} = [ $res->{$nn} ] unless ref $res->{$nn} eq 'ARRAY';
push @{$res->{$nn}}, $chld if defined $chld;
} else {
if ($nn eq $X2H{text}) {
$res->{$nn} = $chld if length $chld;
} else {
$res->{$nn} = $chld;
}
}
}
}
if($X2H{order}) {
#warn "Ordered mode, have res with ".(0+@$res)." children = @$res";
return $res->[0] if @$res == 1;
} else {
if (defined $X2H{join} and exists $res->{ $X2H{text} } and ref $res->{ $X2H{text} }) {
$res->{ $X2H{text} } = join $X2H{join}, grep length, @{ $res->{ $X2H{text} } };
}
delete $res->{ $X2H{text} } if $X2H{trim} and keys %$res > 1 and exists $res->{ $X2H{text} } and !length $res->{ $X2H{text} };
return $res->{ $X2H{text} } if keys %$res == 1 and exists $res->{ $X2H{text} };
}
}
else {
$res = $doc->textContent;
if ($X2H{trim}) {
$res =~ s{^\s+}{}s;
$res =~ s{\s+$}{}s;
}
}
$res;
}
sub xml2hash($;%) {
my $doc = shift;
defined $doc or _croak("Called xml2hash on undef"),return;
my %opts = @_;
my $arr = delete $opts{array};
local $X2A = 1 if defined $arr and !ref $arr;
local @X2A{@$arr} = (1)x@$arr if defined $arr and ref $arr;
local @X2H{keys %opts} = values %opts if @_;
$PARSER->load_ext_dtd($X2H{load_ext_dtd});
$PARSER->expand_entities($X2H{expand_entities});
$PARSER->expand_xinclude($X2H{expand_xinclude});
$PARSER->validation($X2H{validation});
$PARSER->no_network($X2H{no_network});
$doc = $PARSER->parse_string($doc) if !ref $doc;
#use Data::Dumper;
#warn Dumper \%X2H;
my $root = $doc->isa('XML::LibXML::Document') ? $doc->documentElement : $doc;
return {
scalar $root->nodeName => $X2A || $X2A{$root->nodeName} ? [ _x2h($root) ] : _x2h($root),
};
}
=head2 %XML::Hash::LX::H2X
Options respecting convertations from hash to xml
=over 4
=item encoding [ = 'utf-8' ]
XML output encoding
=item attr [ = '-' ]
Attribute prefix
{ node => { -attr => "test", sub => 'test' } }
<node attr="test"><sub>test</sub></node>
=item text [ = '#text' ]
Key name for storing text
{ node => { sub => '', '#text' => "test" } }
<node>text<sub /></node>
# or
<node><sub />text</node>
# order of keys is not predictable
=item trim [ = 1 ]
Trim leading and trailing whitespace from text nodes
# trim = 1
{ node => { sub => [ ' ', 'test' ], '#text' => "test" } }
<node>test<sub>test</sub></node>
# trim = 0
{ node => { sub => [ ' ', 'test' ], '#text' => "test" } }
<node>test<sub> test</sub></node>
=item cdata [ = undef ]
When defined, such key elements will be saved as CDATA sections
# cdata = undef
{ node => { '#' => 'test' } } => <node><#>test</#></node> # it's bad ;)
# cdata = '#'
{ node => { '#' => 'test' } } => <node><![CDATA[test]]></node>
=item comm [ = undef ]
When defined, such key elements will be saved as comment sections
# comm = undef
{ node => { '/' => 'test' } } => <node></>test<//></node> # it's very bad! ;)
# comm = '/'
{ node => { '/' => 'test' } } => <node><!-- test --></node>
=back
=cut
our %H2X;
%H2X = (
%X2H,
#attr => '-',
#text => '~',
#trim => 1,
# join => '+', # useless
%H2X,
);
our $AL = length $H2X{attr};
our $hd = '/';
sub _h2x {
@_ or return;
my ($data,$parent) = @_;
#warn "> $d";
return unless defined $data;
if ( !ref $data ) {
if ($H2X{trim}) {
$data =~ s/^\s+//s;
$data =~ s/\s+$//s;
#return unless length($data);
}
return XML::LibXML::Text->new($data)
};
my @rv;
if (ref $data eq 'ARRAY') {
#warn "Map @$data";
@rv = map _h2x($_,$parent), @$data;
}
elsif (ref $data eq 'HASH') {
for (keys %$data) {
#warn "$_ $data->{$_}";
#next if !defined $data->{$_} or ( !ref $data->{$_} and !length $data->{$_} );
# What may be empty ?
# - attribute
# - node
# - comment
# Skip empty: text, cdata
my $cdata_or_text;
if ($_ eq $H2X{text}) {
$cdata_or_text = 'XML::LibXML::Text';
}
elsif (defined $H2X{cdata} and $_ eq $H2X{cdata}) {
$cdata_or_text = 'XML::LibXML::CDATASection';
}
if (0) {}
elsif($cdata_or_text) {
push @rv, map {
defined($_) ? do {
$H2X{trim} and s/(?:^\s+|\s+$)//sg;
$H2X{trim} && !length($_) ? () :
$cdata_or_text->new( $_ )
} : (),
} ref $data->{$_} ? @{ $data->{$_} } : $data->{$_};
}
elsif (defined $H2X{comm} and $_ eq $H2X{comm}) {
push @rv, map XML::LibXML::Comment->new(defined $_ ? $_ : ''), ref $data->{$_} ? @{ $data->{$_} } : $data->{$_};
}
elsif (substr($_,0,$AL) eq $H2X{attr} ) {
if ($parent) {
$parent->setAttribute( substr($_,1),defined $data->{$_} ? $data->{$_} : '' );
} else {
warn "attribute $_ without parent"
}
}
elsif ( !defined $data->{$_} or ( !ref $data->{$_} and !length $data->{$_} ) ) {
push @rv,XML::LibXML::Element->new($_);
}
else {
local $hd = $hd.'/'.$_;
my $node = XML::LibXML::Element->new($_);
#warn ("$hd << ".$_->nodeName),
$node->appendChild($_) for _h2x($data->{$_},$node);
push @rv,$node;
}
}
}
elsif (ref $data eq 'SCALAR') { # RAW
my $node = eval { XML::LibXML->new->parse_string($$data) } or _croak "Malformed raw data on $hd: $@";
return $node->documentElement;
}
elsif (ref $data eq 'REF') { # LibXML Node
if (ref $$data and eval{ $$data->isa('XML::LibXML::Node') }) {
return $$data->cloneNode(1);
}
elsif ( ref $$data and do { no strict 'refs'; exists ${ ref($$data).'::' }{'(""'} } ) {
return XML::LibXML::Text->new( "$$data" );
}
else {
_croak ("Bad reference ".ref( $$data ).": <$$data> on $hd");
}
}
elsif (Types::Serialiser::is_bool( $data )) {
return XML::LibXML::Text->new( $data ? "true" : "false" );
}
elsif ( do { no strict 'refs'; exists ${ ref($data).'::' }{'(""'} } ) { # have string overload
return XML::LibXML::Text->new( "$data" );
}
elsif (ref $data and eval{ $data->isa('XML::LibXML::Node') }) {
return $data->cloneNode(1);
}
else {
_croak "Bad reference ".ref( $data ).": <$data> on $hd";
}
#warn "@rv";
return wantarray ? @rv : $rv[0];
}
sub hash2xml($;%) {
#warn "hash2xml(@_) from @{[ (caller)[1,2] ]}";
my $hash = shift;
my %opts = @_;
my $str = delete $opts{doc} ? 0 : 1;
my $encoding = delete $opts{encoding} || delete $opts{enc} || 'utf-8';
my $doc = XML::LibXML::Document->new('1.0', $encoding);
local @H2X{keys %opts} = values %opts if @_;
local $AL = length $H2X{attr};
#use Data::Dumper;
#warn Dumper \%H2X;
my $root = _h2x($hash);
$doc->setDocumentElement($root);
return $str ? $doc->toString : $doc;
}
=head1 BUGS
None known
=head1 SEE ALSO
=over 4
=item * L<XML::Parser::Style::EasyTree>
With default settings should produce the same output as this module. Settings are similar by effect
=back
=head1 AUTHOR
Mons Anderson, C<< <mons at cpan.org> >>
=head1 LICENSE
Copyright 2009-2020 Mons Anderson, all rights reserved.
This program is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.
=cut
1; # End of XML::Hash::LX