# -*- perl -*-
# $Id: Protocol.pm,v 1.10 2004/02/10 15:19:19 langhein Exp $
# derived from: Protocol.pm,v 1.39 2001/10/26 19:00:21 gisle Exp
package LWP::Parallel::Protocol;
=head1 NAME
LWP::Parallel::Protocol - Base class for parallel LWP protocols
=head1 SYNOPSIS
package LWP::Parallel::Protocol::foo;
require LWP::Parallel::Protocol;
@ISA=qw(LWP::Parallel::Protocol);
=head1 DESCRIPTION
This class is used a the base class for all protocol implementations
supported by the LWP::Parallel library. It mirrors the behavior of the
original LWP::Parallel library by subclassing from it and adding a few
subroutines of its own.
Please see the LWP::Protocol for more information about the usage of
this module.
In addition to the inherited methods from LWP::Protocol, The following
methods and functions are provided:
=head1 ADDITIONAL METHODS AND FUNCTIONS
=over 4
=cut
#######################################################
require LWP::Protocol;
@ISA = qw(LWP::Protocol);
$VERSION = sprintf("%d.%02d", q$Revision: 1.10 $ =~ /(\d+)\.(\d+)/);
use HTTP::Status ();
use HTML::HeadParser; # thanks to Kirill
use strict;
use Carp ();
my %ImplementedBy = (); # scheme => classname
=item $prot = LWP::Parallel::Protocol->new();
The LWP::Parallel::Protocol constructor is inherited by subclasses. As this is
a virtual base class this method should B<not> be called directly.
Note: This is inherited from LWP::Protocol
=cut
=item $prot = LWP::Parallel::Protocol::create($schema)
Create an object of the class implementing the protocol to handle the
given scheme. This is a function, not a method. It is more an object
factory than a constructor. This is the function user agents should
use to access protocols.
=cut
sub create
{
my ($scheme, $ua) = @_;
my $impclass = LWP::Parallel::Protocol::implementor($scheme) or
Carp::croak("Protocol scheme '$scheme' is not supported");
# hand-off to scheme specific implementation sub-class
my $protocol = $impclass->new($scheme, $ua);
return $protocol;
}
=item $class = LWP::Parallel::Protocol::implementor($scheme, [$class])
Get and/or set implementor class for a scheme. Returns '' if the
specified scheme is not supported.
=cut
sub implementor
{
my($scheme, $impclass) = @_;
if ($impclass) {
$ImplementedBy{$scheme} = $impclass;
}
my $ic = $ImplementedBy{$scheme};
return $ic if $ic;
return '' unless $scheme =~ /^([.+\-\w]+)$/; # check valid URL schemes
$scheme = $1; # untaint
$scheme =~ s/[.+\-]/_/g; # make it a legal module name
# scheme not yet known, look for a 'use'd implementation
$ic = "LWP::Parallel::Protocol::$scheme"; # default location
no strict 'refs';
# check we actually have one for the scheme:
unless (@{"${ic}::ISA"}) { # fixed in LWP 5.48
# try to autoload it
#LWP::Debug::debug("Try autoloading $ic");
eval "require $ic";
if ($@) {
if ($@ =~ /Can't locate/) { #' #emacs get confused by '
$ic = '';
} else { # this msg never gets to the surface - 1002, JB
die "$@\n";
}
}
}
$ImplementedBy{$scheme} = $ic if $ic;
$ic;
}
=item $prot->receive ($arg, $response, $content)
Called to store a piece of content of a request, and process it
appropriately into a scalar, file, or by calling a callback. If $arg
is undefined, then the content is stored within the $response. If
$arg is a simple scalar, then $arg is interpreted as a file name and
the content is written to this file. If $arg is a reference to a
routine, then content is passed to this routine.
$content must be a reference to a scalar holding the content that
should be processed.
The return value from receive() is undef for errors, positive for
non-zero content processed, 0 for forced EOFs, and potentially a
negative command from a user-defined callback function.
B<Note:> We will only use the file or callback argument if
$response->is_success(). This avoids sendig content data for
redirects and authentization responses to the file or the callback
function.
=cut
sub receive {
my ($self, $arg, $response, $content, $entry) = @_;
LWP::Debug::trace("( [self]" .
", ". (defined $arg ? $arg : '[undef]') .
", ". (defined $response ?
(defined $response->code ?
$response->code : '???') . " " .
(defined $response->message ?
$response->message : 'undef')
: '[undef]') .
", ". (defined $content ?
(ref($content) eq 'SCALAR'?
length($$content) . " bytes"
: '[ref('. ref($content) .')' )
: '[undef]') .
", ". (defined $entry ? $entry : '[undef]') .
")");
my($parse_head, $max_size, $parallel) =
@{$self}{qw(parse_head max_size parallel)};
my $parser;
if ($parse_head && $response->content_type eq 'text/html') {
require HTML::HeadParser; # LWP 5.60
$parser = HTML::HeadParser->new($response->{'_headers'});
}
my $content_size = $entry->content_size;
# Note: We don't need alarms here since we are not making any tcp
# connections. All the data we need is alread in \$content, so we
# just read out a string value -- nothing should slow us down here
# (other than processor speed or memory constraints :) ) PS: You
# can't just add 'alarm' somewhere here unless you fix the calls
# to ->receive in the subclasses such as 'ftp' or 'http' and wrap
# them in an 'eval' statement that will catch our alarm-exceptions
# we would throw here! But since we don't need alarms here, just
# forget what I just said - it's irrelevant.
if (!defined($arg) || !$response->is_success ) {
# scalar
if ($parser) {
$parser->parse($$content) or undef($parser);
}
LWP::Debug::debug("read " . length($$content) . " bytes");
$response->add_content($$content);
$content_size += length($$content);
$entry->content_size($content_size); # update persistant size counter
if (defined($max_size) && $content_size > $max_size) {
LWP::Debug::debug("Aborting because size limit of " .
"$max_size bytes exceeded");
$response->push_header("Client-Aborted", "max_size");
#my $tot = $response->header("Content-Length") || 0;
#$response->header("X-Content-Range", "bytes 0-$content_size/$tot");
return 0; # EOF (kind of)
}
}
elsif (!ref($arg)) {
# Mmmh. Could this take so long that we want to use alarm here?
my $file_open;
if (defined ($entry->content_size) and ($entry->content_size > 0)) {
$file_open = open(OUT, ">>$arg"); # we already have data: append
} else {
$file_open = open(OUT, ">$arg"); # no content received: open new
}
unless ( $file_open ) {
$response->code(&HTTP::Status::RC_INTERNAL_SERVER_ERROR);
$response->message("Cannot write to '$arg': $!");
return; # undef means error
}
binmode(OUT);
local($\) = ""; # ensure standard $OUTPUT_RECORD_SEPARATOR
if ($parser) {
$parser->parse($$content) or undef($parser);
}
LWP::Debug::debug("[FILE] read " . length($$content) . " bytes");
print OUT $$content;
$content_size += length($$content);
$entry->content_size($content_size); # update persistant size counter
close(OUT);
if (defined($max_size) && $content_size > $max_size) {
LWP::Debug::debug("Aborting because size limit exceeded");
$response->push_header("Client-Aborted", "max_size");
#my $tot = $response->header("Content-Length") || 0;
#$response->header("X-Content-Range", "bytes 0-$content_size/$tot");
return 0;
}
}
elsif (ref($arg) eq 'CODE') {
# read into callback
if ($parser) {
$parser->parse($$content) or undef($parser);
}
LWP::Debug::debug("[CODE] read " . length($$content) . " bytes");
my $retval;
eval {
$retval = &$arg($$content, $response, $self, $entry);
};
if ($@) {
chomp($@);
$response->push_header('X-Died' => $@);
$response->push_header("Client-Aborted", "die");
} else {
# pass return value from callback through to implementor class
LWP::Debug::debug("return-code from Callback was '".
(defined $retval ? "$retval'" : "[undef]'"));
return $retval;
}
}
else {
$response->code(&HTTP::Status::RC_INTERNAL_SERVER_ERROR);
$response->message("Unexpected collect argument '$arg'");
}
return length($$content); # otherwise return size of content processed
}
=item $prot->receive_once($arg, $response, $content, $entry)
Can be called when the whole response content is available as
$content. This will invoke receive() with a collector callback that
returns a reference to $content the first time and an empty string the
next.
=cut
sub receive_once {
my ($self, $arg, $response, $content, $entry) = @_;
# read once
my $retval = $self->receive($arg, $response, \$content, $entry);
# and immediately simulate EOF
my $no_content = '';
$retval = $self->receive($arg, $response, \$no_content, $entry)
unless $retval;
return (defined $retval? $retval : 0);
}
1;
=head1 SEE ALSO
Inspect the F<LWP/Parallel/Protocol/http.pm> file for examples of usage.
=head1 COPYRIGHT
Copyright 1997-2004 Marc Langheinrich E<lt>marclang@cpan.org>
Parts copyright 1995-2004 Gisle Aas
This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.
=cut