# -*- perl -*-
# $Id: http.pm,v 1.13 2003/03/11 16:49:35 langhein Exp $
# derived from: http10.pm,v 1.1 2001/10/26 17:27:19 gisle Exp $
package LWP::Parallel::Protocol::http;
use strict;
require LWP::Debug;
require HTTP::Response;
require HTTP::Status;
require Net::HTTP;
require IO::Socket;
require IO::Select;
use Carp ();
use vars qw(@ISA @EXTRA_SOCK_OPTS);
require LWP::Parallel::Protocol;
require LWP::Protocol::http; # until i figure out gisle's http1.1 stuff!
@ISA = qw(LWP::Parallel::Protocol LWP::Protocol::http);
my $CRLF = "\015\012"; # how lines should be terminated;
# "\r\n" is not correct on all systems, for
# instance MacPerl defines it to "\012\015"
# The following 4 methods are more or less a simple breakdown of the
# original $http->request method:
=item ($socket, $fullpath) = $prot->handle_connect ($req, $proxy, $timeout);
This method connects with the server on the machine and port specified
in the $req object. If a $proxy is given, it will translate the
request into an appropriate proxy-request and return the new URL in
the $fullpath argument.
$socket is either an IO::Socket object (in parallel mode), or a
LWP::Socket object (when used via Std. non-parallel modules, such as
LWP::UserAgent)
=cut
sub handle_connect {
my ($self, $request, $proxy, $timeout, $nonblock) = @_;
# check method
my $method = $request->method;
unless ($method =~ /^[A-Za-z0-9_!\#\$%&\'*+\-.^\`|~]+$/) { # HTTP token
return (undef, new HTTP::Response &HTTP::Status::RC_BAD_REQUEST,
'Library does not allow method ' .
"$method for 'http:' URLs");
}
my $url = $request->url;
my($host, $port, $fullpath) = $self->get_address ($proxy, $url, $method);
# connect to remote site
my $socket = $self->_connect ($host, $port, $timeout, $nonblock);
# LWP::Debug::debug("Socket is $socket");
# get LINGER get it!
# my $data = $socket->sockopt(13); #define SO_LINGER = 13
# my @a_data = unpack ("ii",$data);
# $a_data[0] = 1; $a_data[1] = 0;
# $data = pack ("ii",@a_data);
#
# $socket->sockopt(13, $data); #define SO_LINGER = 13
# my $newdata = $socket->sockopt(13); #define SO_LINGER = 13
# @a_data = unpack ("ii",$newdata);
#
# print "Socket $socket: SO_LINGER (", $a_data[0],", ",$a_data[1],")\n";
# got Linger got it!
($socket, $fullpath);
}
sub get_address {
my ($self, $proxy, $url,$method) = @_;
my($host, $port, $fullpath);
# Check if we're proxy'ing
if (defined $proxy) {
# $proxy is an URL to an HTTP server which will proxy this request
$host = $proxy->host;
$port = $proxy->port;
$fullpath = $method && ($method eq "CONNECT") ?
($url->host . ":" . $url->port) :
$url->as_string;
}
else {
$host = $url->host;
$port = $url->port;
$fullpath = $url->path_query;
$fullpath = "/" unless length $fullpath;
}
($host, $port, $fullpath);
}
sub _connect { # renamed to make clear that this is private sub
my ($self, $host, $port, $timeout, $nonblock) = @_;
my ($socket);
unless ($nonblock) {
# perform good ol' blocking behavior
#
# this method inherited from LWP::Protocol::http
$socket = $self->_new_socket($host, $port, $timeout);
# currently empty function in LWP::Protocol::http
# $self->_check_sock($request, $socket);
} else {
# new non-blocking behavior
#
# thanks to http://www.en-directo.net/mail/kirill.html
use Socket();
use POSIX();
$socket =
IO::Socket::INET->new(Proto => 'tcp', # Timeout => $timeout,
$self->_extra_sock_opts ($host, $port));
die "Can't create socket for $host:$port ($@)" unless $socket;
unless ( defined $socket->blocking (0) )
{
# IO::Handle::blocking doesn't (yet?) work on Win32 (ActiveState port)
# The following happens to work though.
# See also: perlport manpage, POE::Kernel, POE::Wheel::SocketFactory,
# Winsock2.h
if ( $^O eq 'MSWin32' )
{
my $set_it = "1";
my $ioctl_val = 0x80000000 | (4 << 16) | (ord('f') << 8) | 126;
$ioctl_val = ioctl ($socket, $ioctl_val, $set_it);
# warn 'Win32 ioctl returned ' . (defined $ioctl_val ? $ioctl_val : '[undef]') . "\n";
# warn "Win32 ioctlsocket failed\n" unless $ioctl_val;
}
}
my $rhost = Socket::inet_aton ($host);
die "Bad hostname $host" unless defined $rhost;
unless ( $socket->connect ($port, $rhost) )
{
my $err = $! + 0;
# More trouble with ActiveState: EINPROGRESS and EWOULDBLOCK
# are missing from POSIX.pm. See Microsoft's Winsock2.h
my ($einprogress, $ewouldblock) = $^O eq 'MSWin32' ?
(10036, 10035) : (POSIX::EINPROGRESS(), POSIX::EWOULDBLOCK());
die "Can't connect to $host:$port ($@)"
if $err and $err != $einprogress and $err != $ewouldblock;
}
}
LWP::Debug::debug("Socket is $socket");
$socket;
}
sub write_request {
my ($self, $request, $socket, $fullpath, $arg, $timeout, $proxy) = @_;
my $method = $request->method;
my $url = $request->url;
LWP::Debug::trace ("write_request (".
(defined $request ? $request : '[undef]').
", ". (defined $socket ? $socket : '[undef]').
", ". (defined $fullpath ? $fullpath : '[undef]').
", ". (defined $arg ? $arg : '[undef]').
", ". (defined $timeout ? $timeout : '[undef]').
", ". (defined $proxy ? $proxy : '[undef]'). ")");
my $sel = IO::Select->new($socket) if $timeout;
my $request_line = "$method $fullpath HTTP/1.0$CRLF";
my $h = $request->headers->clone;
my $cont_ref = $request->content_ref;
$cont_ref = $$cont_ref if ref($$cont_ref);
my $ctype = ref($cont_ref);
# If we're sending content we *have* to specify a content length
# otherwise the server won't know a messagebody is coming.
if ($ctype eq 'CODE') {
die 'No Content-Length header for request with dynamic content'
unless defined($h->header('Content-Length')) ||
$h->content_type =~ /^multipart\//;
# For HTTP/1.1 we could have used chunked transfer encoding...
}
else {
$h->header('Content-Length' => length $$cont_ref)
if defined($$cont_ref) && length($$cont_ref);
}
$self->_fixup_header($h, $url, $proxy);
my $buf = $request_line . $h->as_string($CRLF) . $CRLF;
my $n; # used for return value from syswrite/sysread
my $length;
my $offset;
# die's will be caught if user specified "use_eval".
# syswrite $buf
$length = length($buf);
$offset = 0;
while ( $offset < $length ) {
die "write timeout" if $timeout && !$sel->can_write($timeout);
$n = $socket->syswrite($buf, $length-$offset, $offset );
die $! unless defined($n);
$offset += $n;
}
LWP::Debug::conns($buf);
if ($ctype eq 'CODE') {
while ( ($buf = &$cont_ref()), defined($buf) && length($buf)) {
# syswrite $buf
$length = length($buf);
$offset = 0;
while ( $offset < $length ) {
die "write timeout" if $timeout && !$sel->can_write($timeout);
$n = $socket->syswrite($buf, $length-$offset, $offset );
die $! unless defined($n);
$offset += $n;
}
LWP::Debug::conns($buf);
}
}
elsif (defined($$cont_ref) && length($$cont_ref)) {
# syswrite $$cont_ref
$length = length($$cont_ref);
$offset = 0;
while ( $offset < $length ) {
die "write timeout" if $timeout && !$sel->can_write($timeout);
$n = $socket->syswrite($$cont_ref, $length-$offset, $offset );
die $! unless defined($n);
$offset += $n;
}
LWP::Debug::conns($buf);
}
# For an HTTP request, the 'command' socket is the same as the
# 'listen' socket, so we just return the socket here.
# (In the ftp module, we usually have one socket being the command
# socket, and another one being the read socket, so that's why we
# have this overhead here)
return $socket;
}
# whereas 'handle_connect' (with its submethods 'get_address' and
# 'connect') and 'write_request' mainly just encapsulate different
# parts of the old http->request method, 'read_chunk' has an added
# level of complexity. This is because we have to be content with
# whatever data is available, and somehow 'save' our current state
# between multiple calls.
# To faciliate things later, when we need redirects and
# authentication, we insist that we _always_ have a response object
# available, which is generated outside and initialized with bogus
# data (code = 0). Also, we can then save ourselves the trouble of
# using a call-by-variable for $response in order to return a freshly
# generated $response-object.
# We have to provide IO::Socket-objects with a pushback mechanism,
# which comes pretty handy in case we can't use all the information read
# so far. Instead of changing the IO::Socket code, we just have our own
# little pushback buffer, $pushback, indexed by $socket object here.
my %pushback;
sub read_chunk {
my ($self, $response, $socket, $request, $arg, $size,
$timeout, $entry) = @_;
LWP::Debug::trace ("read_chunk (".
(defined $response ? $response : '[undef]').
", ". (defined $socket ? $socket : '[undef]').
", ". (defined $request ? $request : '[undef]').
", ". (defined $arg ? $arg : '[undef]').
", ". (defined $size ? $size : '[undef]').
", ". (defined $timeout ? $timeout : '[undef]').
", ". (defined $entry ? $entry : '[undef]'). ")");
# hack! Can we just generate a new Select object here? Or do we
# have to take the one we created in &write_request?!?
my $sel = IO::Select->new($socket) if $timeout;
LWP::Debug::debug('reading response ('.
(defined($pushback{$socket})?length($pushback{$socket}):0) .' buffered)');
my $buf = "";
# read one chunk at a time from $socket
if ( $timeout && !$sel->can_read($timeout) ) {
$response->message("Read Timeout");
$response->code(&HTTP::Status::RC_REQUEST_TIMEOUT);
$response->request($request);
return 0; # EOF
};
my $n = $socket->sysread($buf, $size, length($buf));
unless (defined ($n)) {
$response->message("Sysread Error: $!");
$response->code(&HTTP::Status::RC_SERVICE_UNAVAILABLE);
$response->request($request);
return 0; # EOF
};
# need our own EOF detection here
unless ( $n ) {
unless ($response and $response->code) {
$response->message("Unexpected EOF while reading response");
$response->code(&HTTP::Status::RC_BAD_GATEWAY);
$response->request($request);
return 0; # EOF
}
}
# prepend contents of unprocessed buffer content from last read
$buf = $pushback{$socket} . $buf if $pushback{$socket};
LWP::Debug::conns("Buffer contents between dashes -->\n==========\n$buf==========");
# determine Protocol type and create response object
unless ($response and $response->code) {
if ($buf =~ s/^(HTTP\/\d+\.\d+)[ \t]+(\d+)[ \t]*([^\012]*)\012//) { #1.39
# HTTP/1.0 response or better
my($ver,$code,$msg) = ($1, $2, $3);
$msg =~ s/\015$//;
LWP::Debug::debug("Identified HTTP Protocol: $ver $code $msg");
$response->code($code);
$response->message($msg);
$response->protocol($ver);
# store $request info in $response object
$response->request($request);
}
elsif ((length($buf) >= 5 and $buf !~ /^HTTP\//) or
$buf =~ /\012/ ) {
# HTTP/0.9 or worse
LWP::Debug::debug("HTTP/0.9 assume OK");
$response->code(&HTTP::Status::RC_OK);
$response->message("OK");
$response->protocol('HTTP/0.9');
# store $request info in $response object
$response->request($request);
}
else {
# need more data
LWP::Debug::debug("need more data to know which protocol");
}
}
# if we have a protocol, read headers if neccessary
if ( $response && !&headers($response) ) {
# ensure that we have read all headers. The headers will be
# terminated by two blank lines
unless ($buf =~ /^\015?\012/ || $buf =~ /\015?\012\015?\012/) {
# must read more if we can...
LWP::Debug::debug("need more data for headers");
} else {
# now we start parsing the headers. The strategy is to
# remove one line at a time from the beginning of the header
# buffer ($buf).
my($key, $val);
while ($buf =~ s/([^\012]*)\012//) {
my $line = $1;
# if we need to restore as content when illegal headers
# are found.
my $save = "$line\012";
$line =~ s/\015$//;
last unless length $line;
if ($line =~ /^([a-zA-Z0-9_\-.]+)\s*:\s*(.*)/) {
$response->push_header($key, $val) if $key;
($key, $val) = ($1, $2);
} elsif ($line =~ /^\s+(.*)/ && $key) {
$val .= " $1";
} else {
$response->push_header("Client-Bad-Header-Line" =>
$line);
}
}
$response->push_header($key, $val) if $key;
# check to see if we have any header at all
unless (&headers($response)) {
# we need at least one header to go on
LWP::Debug::debug("no headers found, inserting Client-Date");
$response->header ("Client-Date" =>
HTTP::Date::time2str(time));
}
} # of if then else
} # of if $response
# if we have both a response AND the headers, start parsing the rest
if ( $response && &headers($response) && length($buf)) {
$self->_get_sock_info($response, $socket);
# the CONNECT method does not need to read content
if ($request->method eq "CONNECT") { # from LWP 5.48's Protocol/http.pm
$response->{client_socket} = $socket; # so it can be picked up
}
else {
# all other methods want to read content, I guess...
# Note that we can't use $self->collect, since we don't want to give
# up control (by letting Protocol::collect use a $collector callback)
if (my @te = $response->remove_header('Transfer-Encoding')) {
$response->push_header('Client-Transfer-Encoding', \@te);
}
my $retval = $self->receive($arg, $response, \$buf, $entry);
# update pushback buffer (receive handles _all_ of current buffer)
$pushback{$socket} = '';
# return length of response read (or value of $retval, if any, which
# could be one of C_LASTCON, C_ENDCON, or C_ENDALL)
return (defined $retval? $retval : length($buf));
}
}
$pushback{$socket} = $buf;
return $n;
}
# This function indicates if we have already parsed the headers. In
# case of HTTP/0.9 we (obviously?!) don't have any (which means that
# we already 'parsed' them, so return 'true' no matter what)
sub headers {
my ($response) = @_;
return 1 if $response->protocol eq 'HTTP/0.9';
($response->headers_as_string ? 1 : 0);
}
sub close_connection {
my ($self, $response, $listen_socket, $request, $cmd_socket) = @_;
# print "Closing socket $listen_socket\n";
# $listen_socket->close;
# $cmd_socket->close;
}
# the old (single request) frontend, defunct.
sub request {
die "LWP::Parallel::Protocol::http does not support single requests\n";
}
#-----------------------------------------------------------
# copied from LWP::Protocol::http (v1.63 in LWP5.64)
#-----------------------------------------------------------
package LWP::Parallel::Protocol::http::SocketMethods;
sub sysread {
my $self = shift;
if (my $timeout = ${*$self}{io_socket_timeout}) {
die "read timeout" unless $self->can_read($timeout);
}
else {
# since we have made the socket non-blocking we
# use select to wait for some data to arrive
$self->can_read(undef) || die "Assert";
}
sysread($self, $_[0], $_[1], $_[2] || 0);
}
sub can_read {
my($self, $timeout) = @_;
my $fbits = '';
vec($fbits, fileno($self), 1) = 1;
my $nfound = select($fbits, undef, undef, $timeout);
die "select failed: $!" unless defined $nfound;
return $nfound > 0;
}
sub ping {
my $self = shift;
!$self->can_read(0);
}
sub increment_response_count {
my $self = shift;
return ++${*$self}{'myhttp_response_count'};
}
#-----------------------------------------------------------
package LWP::Parallel::Protocol::http::Socket;
use vars qw(@ISA);
@ISA = qw(LWP::Parallel::Protocol::http::SocketMethods Net::HTTP);
#-----------------------------------------------------------
# ^^^ copied from LWP::Protocol::http (v1.63 in LWP5.64)
#-----------------------------------------------------------
1;