#!/usr/bin/perl
#
# html_table_to_db.pm
# Copyright (C) 2005-2015 by John Heidemann <johnh@isi.edu>
# $Id: 025cc75c8e0df7ccdb092d89696480e5dee7dd08 $
#
# This program is distributed under terms of the GNU general
# public license, version 2. See the file COPYING
# in $dblib for details.
#
=head1 NAME
html_table_to_db - convert HTML tables into fsdb
=head1 SYNOPSIS
html_table_to_db <source.html >dest.fsdb
=head1 DESCRIPTION
Converts a HTML table to Fsdb format.
The input is an HTML table (I<not> fsdb).
Column names are taken from C<TH> elements,
or defined as C<column0> through C<columnN> if
no such elements appear.
The output is two-space-separated fsdb.
(Someday more general field separators should be supported.)
Fsdb fields are normalized version of the html file:
multiple spaces are compressed to one.
=for comment
begin_standard_fsdb_options
This module also supports the standard fsdb options:
=over 4
=item B<-d>
Enable debugging output.
=item B<-i> or B<--input> InputSource
Read from InputSource, typically a file name, or C<-> for standard input,
or (if in Perl) a IO::Handle, Fsdb::IO or Fsdb::BoundedQueue objects.
=item B<-o> or B<--output> OutputDestination
Write to OutputDestination, typically a file name, or C<-> for standard output,
or (if in Perl) a IO::Handle, Fsdb::IO or Fsdb::BoundedQueue objects.
=item B<--autorun> or B<--noautorun>
By default, programs process automatically,
but Fsdb::Filter objects in Perl do not run until you invoke
the run() method.
The C<--(no)autorun> option controls that behavior within Perl.
=item B<--help>
Show help.
=item B<--man>
Show full manual.
=back
=for comment
end_standard_fsdb_options
=head1 SAMPLE USAGE
=head2 Input:
<table>
<tr><th>account</th> <th>passwd</th> <th>uid</th> <th>gid</th> <th>fullname</th> <th>homedir</th> <th>shell</th> </tr>
<tr bgcolor="#f0f0f0"><td>johnh</td> <td>*</td> <td>2274</td> <td>134</td> <td>John & Ampersand</td> <td>/home/johnh</td> <td>/bin/bash</td> </tr>
<tr bgcolor="#f0f0f0"><td>greg</td> <td>*</td> <td>2275</td> <td>134</td> <td>Greg < Lessthan</td> <td>/home/greg</td> <td>/bin/bash</td> </tr>
<tr bgcolor="#f0f0f0"><td>root</td> <td>*</td> <td>0</td> <td>0</td> <td>Root ; Semi</td> <td>/root</td> <td>/bin/bash</td> </tr>
<tr bgcolor="#d0d0d0"><td>four</td> <td>*</td> <td>1</td> <td>1</td> <td>Fourth Row</td> <td>/home/four</td> <td>/bin/bash</td> </tr>
</table>
=head2 Command:
html_table_to_db
=head2 Output:
#fsdb -F S account passwd uid gid fullname homedir shell
johnh * 2274 134 John & Ampersand /home/johnh /bin/bash
greg * 2275 134 Greg < Lessthan /home/greg /bin/bash
root * 0 0 Root ; Semi /root /bin/bash
four * 1 1 Fourth Row /home/four /bin/bash
=head1 SEE ALSO
L<Fsdb>.
L<db_to_html_table>.
=cut
# WARNING: This code is derived from html_table_to_db.pm; that is the master copy.
use Fsdb::Filter::html_table_to_db;
my $f = new Fsdb::Filter::html_table_to_db(@ARGV);
$f->setup_run_finish; # or could just --autorun
exit 0;
=head1 AUTHOR and COPYRIGHT
Copyright (C) 1991-2015 by John Heidemann <johnh@isi.edu>
This program is distributed under terms of the GNU general
public license, version 2. See the file COPYING
with the distribution for details.
=cut
1;