#!/usr/bin/perl
#
# dbrowdiff.pm
# Copyright (C) 1991-2024 by John Heidemann <johnh@isi.edu>
#
# This program is distributed under terms of the GNU general
# public license, version 2. See the file COPYING
# in $dblibdir for details.
#
package Fsdb::Filter::dbrowdiff;
=head1 NAME
dbrowdiff - compute row-by-row differences of some column
=head1 SYNOPSIS
dbrowdiff [-B|-I|-F] [-A AbsDiffColumnName] [-P PctDiffColumnName] column
=head1 DESCRIPTION
For a given column, compute the differences between each row
of the table. Differences are output to two new columns,
C<absdiff> and C<pctdiff>.
Differences are either relative to the previous column
(I<incremental> mode), or relative to the first row
(I<baseline> mode), the default.
Alternatively, in I<future> mode, differences
are between the I<next> row and the current row.
If column names are given, with C<-A> or C<-P>,
then only columns with that name are produced.
=head1 OPTIONS
=over 4
=item B<-B> or B<--baseline>
Select baseline mode (the default), where differences are relative to the first row.
=item B<-I> or B<--incremental>
Select incremental mode, where differences are relative to the previous row.
=item B<-F> or B<--future>
Select future incremental mode, where differences are
incremental between the next row and the current one.
=item B<-A> COL or B<--absdiff> COL
Name the absolute difference output column COL,
and don't output percent difference unless C<-P> is given.
=item B<-P> COL or B<--pctdiff> COL
Name the percent difference output column COL,
and don't output absolute difference unless C<-P> is given.
=item B<-f FORMAT> or B<--format FORMAT>
Specify a L<printf(3)>-style format for output statistics.
Defaults to C<%.5g>.
=item B<-e> EmptyValue or B<--empty>
Specify the value for the last row when in future mode.
=back
=for comment
begin_standard_fsdb_options
This module also supports the standard fsdb options:
=over 4
=item B<-d>
Enable debugging output.
=item B<-i> or B<--input> InputSource
Read from InputSource, typically a file name, or C<-> for standard input,
or (if in Perl) a IO::Handle, Fsdb::IO or Fsdb::BoundedQueue objects.
=item B<-o> or B<--output> OutputDestination
Write to OutputDestination, typically a file name, or C<-> for standard output,
or (if in Perl) a IO::Handle, Fsdb::IO or Fsdb::BoundedQueue objects.
=item B<--autorun> or B<--noautorun>
By default, programs process automatically,
but Fsdb::Filter objects in Perl do not run until you invoke
the run() method.
The C<--(no)autorun> option controls that behavior within Perl.
=item B<--help>
Show help.
=item B<--man>
Show full manual.
=back
=for comment
end_standard_fsdb_options
=head1 SAMPLE USAGE
=head2 Input:
#fsdb event clock:d
_null_getpage+128 815812813.281756
_null_getpage+128 815812813.328709
_null_getpage+128 815812813.353830
_null_getpage+128 815812813.357169
_null_getpage+128 815812813.375844
_null_getpage+128 815812813.378358
# | /home/johnh/BIN/DB/dbrow
# | /home/johnh/BIN/DB/dbcol event clock
=head2 Command:
cat DATA/kitrace.fsdb | dbrowdiff clock
=head2 Output:
#fsdb event clock:d absdiff:d pctdiff:d
_null_getpage+128 815812813.281756 0 0
_null_getpage+128 815812813.328709 0.046953 5.7554e-09
_null_getpage+128 815812813.353830 0.072074 8.8346e-09
_null_getpage+128 815812813.357169 0.075413 9.2439e-09
_null_getpage+128 815812813.375844 0.094088 1.1533e-08
_null_getpage+128 815812813.378358 0.096602 1.1841e-08
# | /home/johnh/BIN/DB/dbrow
# | /home/johnh/BIN/DB/dbcol event clock
# | dbrowdiff clock
=head1 SEE ALSO
L<Fsdb>.
L<dbcolmovingstats>.
L<dbrowuniq>.
L<dbfilediff>.
L<dbrowdiff>, L<dbrowuniq>, and L<dbfilediff> are similar but different.
L<dbrowdiff> computes row-by-row differences for a column,
L<dbrowuniq> eliminates rows that have no differences,
and L<dbfilediff> compares fields of two files.
=head1 CLASS FUNCTIONS
=cut
@ISA = qw(Fsdb::Filter);
$VERSION = 2.0;
use strict;
use Pod::Usage;
use Carp;
use Fsdb::Filter;
use Fsdb::IO::Reader;
use Fsdb::IO::Writer;
=head2 new
$filter = new Fsdb::Filter::dbrowdiff(@arguments);
Create a new dbrowdiff object, taking command-line arguments.
=cut
sub new ($@) {
my $class = shift @_;
my $self = $class->SUPER::new(@_);
bless $self, $class;
$self->set_defaults;
$self->parse_options(@_);
$self->SUPER::post_new();
return $self;
}
=head2 set_defaults
$filter->set_defaults();
Internal: set up defaults.
=cut
sub set_defaults ($) {
my($self) = @_;
$self->SUPER::set_defaults();
$self->{_format} = "%.5g";
$self->{_mode} = 'B';
$self->{_absdiff_column} = undef;
$self->{_pctdiff_column} = undef;
}
=head2 parse_options
$filter->parse_options(@ARGV);
Internal: parse command-line arguments.
=cut
sub parse_options ($@) {
my $self = shift @_;
my(@argv) = @_;
$self->get_options(
\@argv,
'help|?' => sub { pod2usage(1); },
'man' => sub { pod2usage(-verbose => 2); },
'autorun!' => \$self->{_autorun},
'A|absdiff=s' => \$self->{_absdiff_column},
'B|baseline' => sub { $self->{_mode} = 'B'; },
'close!' => \$self->{_close},
'd|debug+' => \$self->{_debug},
'e|empty=s' => \$self->{_empty},
'f|format=s' => \$self->{_format},
'F|future' => sub { $self->{_mode} = 'F'; },
'i|input=s' => sub { $self->parse_io_option('input', @_); },
'I|incremental' => sub { $self->{_mode} = 'I'; },
'log!' => \$self->{_logprog},
'o|output=s' => sub { $self->parse_io_option('output', @_); },
'P|pctdiff=s' => \$self->{_pctdiff_column},
) or pod2usage(2);
$self->parse_target_column(\@argv);
}
=head2 setup
$filter->setup();
Internal: setup, parse headers.
=cut
sub setup ($) {
my($self) = @_;
pod2usage(2) if (!defined($self->{_target_column}));
$self->finish_io_option('input', -comment_handler =>
($self->{_mode} eq "F" ?
$self->create_delay_comments_sub :
$self->create_pass_comments_sub));
$self->{_target_coli} = $self->{_in}->col_to_i($self->{_target_column});
croak($self->{_prog} . ": target column " . $self->{_target_column} . " is not in input stream.\n")
if (!defined($self->{_target_coli}));
my(@new_colnames);
if (defined($self->{_absdiff_column} && defined($self->{_pctdiff_column}))) {
@new_colnames = ($self->{_absdiff_column}, $self->{_pctdiff_column});
} elsif (defined($self->{_absdiff_column})) {
@new_colnames = ($self->{_absdiff_column}, undef);
} elsif (defined($self->{_pctdiff_column})) {
@new_colnames = (undef, $self->{_pctdiff_column});
} else {
$self->{_absdiff_column} = 'absdiff';
$self->{_pctdiff_column} = 'pctdiff';
@new_colnames = qw(absdiff pctdiff);
};
$self->finish_io_option('output', -clone => $self->{_in}, -outputheader => 'delay');
my($destination_type) = ($self->{_in}->col_type_is_numeric($self->{_target_column}) == 1 ? 'q' : 'd');
foreach (@new_colnames) {
next if (!defined($_));
$self->{_out}->col_create("$_:$destination_type")
or croak($self->{_prog} . ": cannot create column $_ (maybe it already existed?)\n");
};
}
=head2 run
$filter->run();
Internal: run over each rows.
=cut
sub run ($) {
my($self) = @_;
my $read_fastpath_sub = $self->{_in}->fastpath_sub();
my $write_fastpath_sub = $self->{_out}->fastpath_sub();
my $target_coli = $self->{_target_coli};
my $absdiff_coli = defined($self->{_absdiff_column}) ? $self->{_out}->col_to_i($self->{_absdiff_column}) : undef;
my $pctdiff_coli = defined($self->{_pctdiff_column}) ? $self->{_out}->col_to_i($self->{_pctdiff_column}) : undef;
my $format = $self->{_format};
my $incremental_mode = ($self->{_mode} eq 'I');
my $future_mode = ($self->{_mode} eq 'F');
my $base;
my $absdiff;
my $pctdiff;
my $fref = undef;
my $next_fref;
if ($future_mode) {
my($last_fref) = undef;
my $input_delay_comments = $self->{_delay_comments}[0];
while ($fref = &$read_fastpath_sub()) {
if (!defined($last_fref)) {
$last_fref = $fref;
next;
};
$absdiff = $fref->[$target_coli] - $last_fref->[$target_coli];
$pctdiff = ($absdiff / $last_fref->[$target_coli]) * 100.0;
# emit one behind
if (defined($absdiff_coli)) {
$last_fref->[$absdiff_coli] = sprintf("$format", $absdiff);
};
if (defined($pctdiff_coli)) {
$last_fref->[$pctdiff_coli] = sprintf("$format", $pctdiff);
};
&$write_fastpath_sub($last_fref);
$input_delay_comments->flush($self->{_out});
$last_fref = $fref;
};
# output saved row, if any
if (defined($last_fref)) {
if (defined($absdiff_coli)) {
$last_fref->[$absdiff_coli] = $self->{_empty};
};
if (defined($pctdiff_coli)) {
$last_fref->[$pctdiff_coli] = $self->{_empty};
};
&$write_fastpath_sub($last_fref)
};
} else {
# non-future mode
while ($fref = &$read_fastpath_sub()) {
if (!defined($base)) {
$absdiff = $pctdiff = 0.0;
$base = $fref->[$target_coli];
} else {
$absdiff = $fref->[$target_coli] - $base;
$pctdiff = ($absdiff / $base) * 100.0 if ($base != 0);
};
if (defined($absdiff_coli)) {
$fref->[$absdiff_coli] = sprintf("$format", $absdiff);
};
if (defined($pctdiff_coli)) {
if ($base == 0) {
$fref->[$pctdiff_coli] = $self->{_empty};
} else {
$fref->[$pctdiff_coli] = sprintf("$format", $pctdiff);
};
};
$base = $fref->[$target_coli] if ($incremental_mode);
&$write_fastpath_sub($fref);
};
};
}
=head1 AUTHOR and COPYRIGHT
Copyright (C) 1991-2024 by John Heidemann <johnh@isi.edu>
This program is distributed under terms of the GNU general
public license, version 2. See the file COPYING
with the distribution for details.
=cut
1;