#!/usr/bin/perl
# vim:ts=4 sw=4 ft=perl et:
# TODO:
# YAML display mode
# timing output during fetch
# support different db backends better (something mildly generic).

use strict;
use warnings;
use DBI;
use Getopt::Long;

# FIXME: decide how to share constants between utils and server.
use constant ENDOFTIME => 2147483647;
my %QUEUES = ( 1 => 'FSCK_QUEUE' );
my %valid_stats = map { $_ => 1 } qw/devices fids files replication delete queue all/;

my $DBH_CACHE = '';
my $DB_TYPE = '';

my %opts;

Getopt::Long::Configure("require_order", "pass_through");
GetOptions(
        "config=s"   => \$opts{config},
        "lib=s"      => \$opts{lib},
        "help"       => \$opts{help},
        "verbose"    => \$opts{verbose},
        "stats=s"    => \$opts{stats},
        "db_dsn=s"   => \$opts{db_dsn},
        "db_user=s"  => \$opts{db_user},
        "db_pass=s"  => \$opts{db_pass},
    ) or abort_with_usage();
Getopt::Long::Configure("require_order", "no_pass_through");

my @configs = ($opts{config}, "$ENV{HOME}/.mogilefs.conf", "/etc/mogilefs/mogilefs.conf");
foreach my $fn (reverse @configs) {
    next unless $fn && -e $fn;
    open FILE, "<$fn"
        or die "unable to open $fn: $!\n";
    while (<FILE>) {
        s/\#.*//;
        next unless m/^\s*(\w+)\s*=\s*(.+?)\s*$/;
        $opts{$1} = $2 unless ( defined $opts{$1} );
    }
    close FILE;
}

abort_with_usage() if $opts{help};

cmd_stats($opts{stats});

sub abort_with_usage {
    my $message = shift;
    print "ERROR: $message\n\n" if $message;

    print qq{Usage:
    mogstats --db_dsn="DBI:mysql:mfs:host=mfshost" --db_user="mfs" \
             --db_pass="mfs" --verbose --stats="devices,files"
    mogstats --stats="all"
    mogstats [all options in ~/.mogilefs.conf]

};
    print "valid stats: ", join(', ', sort keys %valid_stats), "\n";
    exit 1;
}

sub cmd_stats {
    my $args = shift;
    $args = 'all' unless $args;
    my %args = map { $_ => 1 } split(/,/, $args);
    for my $arg (keys %args) {
        abort_with_usage("Invalid stat $arg") unless $valid_stats{$arg};
    }

    print "Fetching statistics... ($args)\n";
    my $stats = stats_from_db(\%args)
        or die "Can't fetch stats";

    if ($args{devices} || $args{all}) {
        print "\nStatistics for devices...\n";
        printf "  %-10s %-15s %12s %10s\n", "device", "host", "files", "status";
        printf "  ---------- ---------------- ------------ ----------\n";
        foreach my $device (sort { $a <=> $b } keys %{$stats->{devices}}) {
            my $value = $stats->{devices}->{$device};
            printf "  %-10s %-10s %10s %10s\n", "dev$device", $value->{host}, $value->{files}, $value->{status};
        }
        printf "  ---------- ---------------- ------------ ----------\n";
    }

    if ($args{fids} || $args{all}) {
        print "\nStatistics for file ids...\n";
        printf "  Max file id: %s\n", $stats->{fids}->{max} || 'none';
    }

    if ($args{files} || $args{all}) {
        print "\nStatistics for files...\n";
        printf "  %-20s %-10s %10s\n", 'domain', 'class', 'files';
        printf "  -------------------- ----------- ----------\n";
        foreach my $domain (sort keys %{$stats->{files}}) {
            my $classes = $stats->{files}->{$domain};
            foreach my $class (sort keys %$classes) {
                my $files = $classes->{$class};
                printf "  %-20s %-10s %10s\n", $domain, $class, $files;
            }
        }
        printf "  -------------------- ----------- ----------\n";
    }

    if ($args{replication} || $args{all}) {
        print "\nStatistics for replication...\n";
        printf "  %-20s %-10s %10s %10s\n", 'domain', 'class', 'devcount', 'files';
        printf "  -------------------- ----------- ---------- ----------\n";
        foreach my $domain (sort keys %{$stats->{replication}}) {
            my $classes = $stats->{replication}->{$domain};
            foreach my $class (sort keys %$classes) {
                my $devcounts = $classes->{$class};
                foreach my $devcount (sort { $a <=> $b } keys %$devcounts) {
                    my $files = $devcounts->{$devcount};
                    printf "  %-20s %-10s %10s %10s\n", $domain, $class, $devcount, $files;
                }
            }
        }
        printf "  -------------------- ----------- ---------- ----------\n";
        # Now new stats.
        print "\nStatistics for replication queue...\n";
        printf "  %-20s %12s\n", 'status', 'count';
        printf "  -------------------- ------------\n";
        for my $status (sort keys %{$stats->{to_replicate}}) {
            my $files = $stats->{to_replicate}->{$status};
            printf "  %-20s %12s\n", $status, $files;
        }
        printf "  -------------------- ------------\n";

    }

    if ($args{'delete'} || $args{all}) {
        print "\nStatistics for delete queue...\n";
        printf "  %-20s %12s\n", 'status', 'count';
        printf "  -------------------- ------------\n";
        for my $status (sort keys %{$stats->{to_delete}}) {
            my $files = $stats->{to_delete}->{$status};
            printf "  %-20s %12s\n", $status, $files;
        }
        printf "  -------------------- ------------\n";

    }

    if ($args{queue} || $args{all}) {
        print "\nStatistics for general queues...\n";
        printf "  %-15s %-20s %12s\n", 'queue', 'status', 'count';
        printf "  --------------- -------------------- ------------\n";
        for my $queue (sort keys %{$stats->{queue}}) {
            my $status = $stats->{queue}->{$queue};
            for my $stat (sort keys %{$status}) {
                my $files = $status->{$stat};
                printf "  %-15s %-20s %12s\n", $queue, $stat, $files;
            }
        }
        printf "  --------------- -------------------- ------------\n";
    }

    print "\ndone\n";
}

sub get_dbh {
    return $DBH_CACHE if ($DBH_CACHE && $DBH_CACHE->ping);
    $DBH_CACHE = DBI->connect($opts{db_dsn}, $opts{db_user}, $opts{db_pass}, {
        PrintError => 0,
        AutoCommit => 1,
        RaiseError => 1,
    }) or die "Failed to connect to database: " . DBI->errstr;
    my $dsn = $opts{db_dsn};
    if ($dsn =~ /^DBI:mysql:/i) {
        $DB_TYPE = "MySQL";
    } elsif ($dsn =~ /^DBI:SQLite:/i) {
        $DB_TYPE = "SQLite";
    } elsif ($dsn =~ /^DBI:Pg:/i) {
        $DB_TYPE = "Postgres";
    } else {
        die "Unknown database type: $dsn";
    }
    return $DBH_CACHE;
}

sub stats_from_db {
    my $args = shift;

    # get database handle
    my $ret = {};
    my $dbh = get_dbh() or die "Could not get database handle";

    # get names of all domains and classes for use later
    my %classes;
    my $rows;

    $rows = $dbh->selectall_arrayref('SELECT d.dmid, d.namespace, c.classid, c.classname ' .
                                     'FROM domain d LEFT JOIN class c ON c.dmid=d.dmid');

    foreach my $row (@$rows) {
        $classes{$row->[0]}->{name} = $row->[1];
        $classes{$row->[0]}->{classes}->{$row->[2] || 0} = $row->[3] || 'default';
    }
    $classes{$_}->{classes}->{0} = 'default'
        foreach keys %classes;

    # get host and device information with device status
    my %devices;
    $rows = $dbh->selectall_arrayref('SELECT device.devid, hostname, device.status ' .
                                     'FROM device, host WHERE device.hostid = host.hostid');
    foreach my $row (@$rows) {
        $devices{$row->[0]}->{host} = $row->[1];
        $devices{$row->[0]}->{status} = $row->[2];
    }
    my %globals = ( classes => \%classes, devices => \%devices );

    # if they want replication counts, or didn't specify what they wanted
    if ($args->{replication} || $args->{all}) {
        $ret->{replication}  = stats_for_replication(\%globals);
        $ret->{to_replicate} = stats_for_to_replicate(\%globals);
    }

    if ($args->{'delete'} || $args->{all}) {
        $ret->{to_delete} = stats_for_to_delete(\%globals);
    }

    # file statistics (how many files there are and in what domains/classes)
    if ($args->{files} || $args->{all}) {
        $ret->{files} = stats_for_files(\%globals);
    }

    # device statistics (how many files are on each device)
    if ($args->{devices} || $args->{all}) {
        $ret->{devices} = stats_for_devices(\%globals);
    }

    # now fid statistics
    if ($args->{fids} || $args->{all}) {
        verbose("... fid stats...");
        my $max = $dbh->selectrow_array('SELECT MAX(fid) FROM file');
        $ret->{fids} = { max => $max };
        verbose("... done");
    }

    if ($args->{queue} || $args->{all}) {
        $ret->{queue} = stats_for_to_queue(\%globals);
    }

    return $ret;
}

sub stats_for_devices {
    my $globals = shift;
    my %classes = %{$globals->{classes}};
    my %devices = %{$globals->{devices}};
    my $dbh = get_dbh() or die "Could not get database handle";

    verbose("... per-device stats...");
    my $stats = $dbh->selectall_arrayref('SELECT devid, COUNT(devid) FROM file_on GROUP BY 1');
    my $devs  = {};
    for my $stat (@$stats) {
        my $host   = $devices{$stat->[0]}->{host};
        my $status = $devices{$stat->[0]}->{status};
        $devs->{$stat->[0]} = {
            host => $host,
            status => $status,
            files => $stat->[1],
        };
    }
    verbose("... done");
    return $devs;
}

sub stats_for_files {
    my $globals = shift;
    my %classes = %{$globals->{classes}};
    my %devices = %{$globals->{devices}};
    my $dbh = get_dbh() or die "Could not get database handle";

    verbose("... files stats...");
    my $stats = $dbh->selectall_arrayref('SELECT dmid, classid, COUNT(classid) FROM file GROUP BY 1, 2');
    my $files = {};
    for my $stat (@$stats) {
        my $domain  = $classes{$stat->[0]}->{name};
        my $class   = $classes{$stat->[0]}->{classes}->{$stat->[1]};
        $files->{$domain}->{$class} = $stat->[2];
    }
    verbose("... done");
    return $files;
}

sub stats_for_replication {
    my $globals = shift;
    my %classes = %{$globals->{classes}};
    my %devices = %{$globals->{devices}};
    my $dbh = get_dbh() or die "Could not get database handle";

    verbose("... replication stats...");
    # replication stats
    # This is the old version that used devcount:
    my @stats = get_stats_files_per_devcount();

    my $repl  = {};
    for my $stat (@stats) {
        my $domain = $classes{$stat->{dmid}}->{name};
        my $class  =
            $classes{$stat->{dmid}}->{classes}->{$stat->{classid}};
        $repl->{$domain}->{$class}->{$stat->{devcount}} = $stat->{count};
    }

    verbose("... done");
    return $repl;
}

sub stats_for_to_queue {
    my $ret = {};
    my $dbh = get_dbh() or die "Could not get database handle";

    verbose("... queue stats...");
    my $db_time = $dbh->selectrow_array('SELECT '. unix_timestamp());
    my $stats = $dbh->selectall_arrayref('SELECT type, nexttry, COUNT(*) FROM file_to_queue GROUP BY 1, 2');
    for my $stat (@$stats) {
        my $qname = $QUEUES{$stat->[0]} || "UNKNOWN_QUEUE";
        if ($stat->[1] < 1000) {
            my $name = { 0 => 'new', 1 => 'redo' }->{$stat->[1]} ||
                "unknown";
            $ret->{$qname}->{"$name"} += $stat->[2];
        } elsif ($stat->[1] == ENDOFTIME) {
            $ret->{$qname}->{"manual"} = $stat->[2];
        } elsif ($stat->[0] < $db_time) {
            $ret->{$qname}->{"overdue"} += $stat->[2];
        } else {
            $ret->{$qname}->{"deferred"} += $stat->[2];
        }
    }
    verbose("... done");
    return $ret;
}

# TODO: See how much of this code is collapsable...
sub stats_for_to_delete {
    my $ret = {};
    my $dbh = get_dbh() or die "Could not get database handle";

    verbose("... delete queue stats...");
    my $db_time = $dbh->selectrow_array('SELECT '. unix_timestamp());
    my $stats = $dbh->selectall_arrayref('SELECT nexttry, COUNT(*) FROM file_to_delete2 GROUP BY 1');
    for my $stat (@$stats) {
        if ($stat->[0] < 1000) {
            my $name = { 0 => 'new', 1 => 'redo' }->{$stat->[0]} ||
                "unknown";
            $ret->{$name} += $stat->[1];
        } elsif ($stat->[0] == ENDOFTIME) {
            $ret->{manual} = $stat->[1];
        } elsif ($stat->[0] < $db_time) {
            $ret->{overdue} += $stat->[1];
        } else {
            $ret->{deferred} += $stat->[1];
        }
    }
    verbose("... done");
    return $ret;
}

sub stats_for_to_replicate {
    my $ret = {};
    my $dbh = get_dbh() or die "Could not get database handle";

    # now we want to do the "new" replication stats
    verbose("... replication queue stats...");
    my $db_time = $dbh->selectrow_array('SELECT '. unix_timestamp());
    my $stats = $dbh->selectall_arrayref('SELECT nexttry, COUNT(*) FROM file_to_replicate GROUP BY 1');
    foreach my $stat (@$stats) {
        if ($stat->[0] < 1000) {
            # anything under 1000 is a specific state, so let's define those.  here's the list
            # of short names to describe them.
            my $name = {
                0 => 'newfile', # new files that need to be replicated
                1 => 'redo',    # files that need to go through replication again
            }->{$stat->[0]} || "unknown";

            # now put it in the output hashref.  note that we do += because we might
            # have more than one group of unknowns.
            $ret->{"$name"} += $stat->[1];

        } elsif ($stat->[0] == ENDOFTIME) {
            $ret->{"manual"} = $stat->[1];

        } elsif ($stat->[0] < $db_time) {
            $ret->{"overdue"} += $stat->[1];

        } else {
            $ret->{"deferred"} += $stat->[1];
        }
    }
    verbose("... done");
    return $ret;
}

# FIXME: This is obviously MySQL-only.
sub unix_timestamp { 
    if ($DB_TYPE eq 'MySQL') {
        return "UNIX_TIMESTAMP()";
    } elsif ($DB_TYPE eq 'Postgres') {
        return "EXTRACT(epoch FROM NOW())::int4";
    } elsif ($DB_TYPE eq 'SQLite') {
        return "strftime('%s','now')";
    }
}

sub get_stats_files_per_devcount {
    my $dbh = get_dbh();
    my @ret;
    my $sth = $dbh->prepare('SELECT dmid, classid, devcount, COUNT(devcount) AS "count" FROM file GROUP BY 1, 2, 3');
    $sth->execute;
    while (my $row = $sth->fetchrow_hashref) {
        push @ret, $row;
    }
    return @ret;
}

sub verbose {
    print $_[0], "\n" if $opts{verbose};
}
