#!/usr/bin/perl
# $Id: apt-fast,v 1.24 2019/03/01 00:37:08 jima Exp jima $
# vim: set ts=4 :

use strict; use warnings FATAL => 'all'; use feature qw(say state);
use FindBin qw($Bin $Script);

=pod

=head1 NAME

apt-fast - apt-get with fast parallel downloading

=head1 SYNOPSIS

apt-fast install MODULES...

apt-fast dist-upgrade

apt-fast [ --fast OPTIONS -- ] APT_GET_ARGS

=head1 DESCRIPTION

This is a wrapper for apt-get which pre-downloads needed files using a 
fast, parallel downloader, which can dramatically increase throughput.

There are two levels of parallelism:

  1.  The "axel" program is used to download each file in pieces using 
      several concurrent network connections.

  2.  Multiple (default: 2) files are downloaded concurrently on separate
      threads, trying to access different repo servers concurrently.  
      This is intended to increase thruput if one server is bogged down
      but not another.  It also overlaps checksum computations with downloading.

File size and checksums are verified.

Any initial arguments of the form   C< --fast OPTIONS -- >  are 
consumed by this script; otherwise the arguments are passed to C<apt-get>

=head1 OPTIONS

  -d, --download-only
  -n, --dry-run
  -q, --quiet  (automatic if -q  detected among apt-get args)
  -s, --silent (automatic if -qq detected among apt-get args)
  -j, --num-threads <num>
  -u, --update (do apt-get update first)
  --debug

=head1 ACKNOWLEDGEMENTS

This program was inspired by the apt-fast.sh shell script by 
Matt Parnell  http://www.mattparnell.com

Author: Jim Avera  (jim.avera at gmail.com)
        http://abhweb.org/jima

=cut

use threads;
use threads::shared;

# The parallel downloader has now been made into a library.
# Also things like IO::All may only exist in our local Perl installation
use lib "$Bin/../lib/perl", "$Bin/../lib", "$Bin/../perl5/lib/perl5"; 

use Thread::Queue;
use Pod::Usage qw(pod2usage);
use Getopt::Long qw/GetOptions/;
use Text::ParseWords qw/parse_line shellwords/;
use Digest ();
use IO::All qw/io/;

use FastDownloader; # The guts of the parallel downloader

my @orig_ARGV = @ARGV;
my $MiB = 1024*1024;

my ($num_threads, $dryrun, $quiet, $silent, $debug, $download_only, $do_update);
my ($cachedir);

sub badargs_exit(;$) {
  my $msg = (@_ > 0 ? "$_[0]\n" : "")."$Script --fast -h for help";
  pod2usage(-msg => $msg, -exitval => 2);
}
sub wrap_sudo_cmd(@) {
  my @cmd = qw(sudo /usr/bin/env -i PATH=/bin:/usr/bin:/usr/sbin:/sbin);
  for my $name (qw(DISPLAY TERM TERMCAP TERMINFO HOSTNAME PAGER EDITOR)) {
    push @cmd, "${name}=$ENV{$name}" if defined $ENV{$name};
  }
  push @cmd, @_;
  return @cmd;
}
sub sudo_check(@) {
  warn "> sudo @_\n" unless $quiet;
  my @cmd = wrap_sudo_cmd(@_);
  warn ">> @cmd\n" if $debug;
  0==system(@cmd) or die "CMD FAILED ($!): @cmd";
}

# Arguments to this script may be passed with an initial
#   --fast args... -- 
# (note the final -- before the apt-get args)
#
if (@ARGV && $ARGV[0] =~ /^--fast/i) {
    shift @ARGV;
    # args up through the next "--" are ours!
    my ($help);
    Getopt::Long::Configure ("default", "gnu_getopt");
    GetOptions(
      "h|help"               => \$help,
      "c|cachedir=s"         => \$cachedir,
      "d|download-only"      => \$download_only,
      "debug"                => \$debug,
      "j|jobs|num-threads:i" => \$num_threads,
      "n|dry-run"            => \$dryrun,
      "q|quiet"              => \$quiet,
      "s|silent"             => \$silent,
      "u|update"             => \$do_update,
    ) or badargs_exit;
    pod2usage(-verbose => 2) if $help;
}
badargs_exit if @ARGV==0;

if (! $cachedir) {
  io('/usr/bin/apt-config shell x Dir::Cache::archives/d |')->slurp
    =~ /^x=['"]?([^'"]*)['"]?$/ or die;
  $cachedir = $1;
}

# Allow "update" to be stacked before other apt-get args
if (@ARGV >= 2) {
  for my $i (0..$#ARGV-1) {
    if ($ARGV[$i] =~ /^[^-]/) {
      if ($ARGV[$i] eq "update") {
        $do_update = 1;
        splice @ARGV, $i, 1;
      }
      last
    }
  }
}

# Additionally, imply --fast -q -- with apt-get -q etc.
$quiet  = 1 if grep /^-q/,  @ARGV;
$silent = 1 if grep /^-qq/, @ARGV;

#$num_threads //= 2;
#$num_threads //= 4;
$num_threads //= 10;
die "Invalid number of threads: $num_threads\n" 
    unless $num_threads =~ /^[1-9]\d*$/;
$quiet = $silent = 0 if $debug;
$quiet //= $silent;


# We need root priveliges to write to the apt cache directory
if ($> != 0) {
    warn "Re-running with sudo...\n" unless $quiet;
    #my @cmd = wrap_sudo_cmd("PERL5LIB=$ENV{PERL5LIB}", $0, @orig_ARGV);
    my @cmd = wrap_sudo_cmd($0, @orig_ARGV);
    warn ">> @cmd\n" if $debug;
    exec(@cmd) or die "sudo: $! (@cmd)";
}

warn "cachedir = $cachedir\n" if $debug;

# Do update first if requested
if ($do_update) {
  sudo_check qw(apt-get -qq update);
}

# Did the user request something likely to require downloads?
if (grep /upgrade|install|dist-upgrade|source|build-dep|check/, @ARGV) {
    warn "> Working...\n" unless $quiet;
  
    # Have apt-get just print the information, including the URI's to the 
    # packages and the filenames they should be stored in locally (which 
    # are not necessarily the same as on the remote servers).
    # The expected MD5 sums are also captured.

    my @cmd = wrap_sudo_cmd("apt-get", "-y", "--print-uris", @ARGV);
    warn ">> -| @cmd\n" if $debug;
    open(PIPE, "-|", @cmd) or die "pipe:$!";
    my @fspecs;
    while (<PIPE>) {
        #use lib '/home/jima/lib/perl'; use Vis; warn dvis '$_';
        my ($url, $fname, $size, $csum, @extrawords) = shellwords($_); 
        # Ignore all lines which don't have exactly 4 fields like this:
        #   'http://something' filename size MD5Sum:something
        #   'ftp://something'  filename size MD5Sum:something
        if ($url && $url =~ /^((http|ftp):\/\/)([^\/]+)/ && $fname && $csum && @extrawords==0) {
            push @fspecs, { url           =>  $url,
                            local_file    =>  $fname,
                            destdir       =>  $cachedir,
                            callback      =>  "filecheck_callback",
                            # These are for us, used by the callback:
                            ExpectedSize  =>  $size,
                            Csum          =>  $csum,
                          };
        }
    }
    close PIPE;
  
    my $obj = FastDownloader->new(
                                  filespecs   => \@fspecs,
                                  num_threads => $num_threads,
                                  dryrun      => $dryrun,
                                  quiet       => $quiet,
                                  #silent      => $silent,
                                  debug       => $debug,
                                 );
    $obj->wait();
    my $stats = $obj->get_stats();

    unless ($silent) {
        if ($stats->{file_count} == 0) {
            warn "> No files needed to be downloaded.\n" unless $quiet;
        } else {
            printf STDERR "> Finished downloading $stats->{file_count} files (%.1f MiB",
                          ($stats->{total_bytes} / $MiB);
            if ($stats->{elapsed_secs} != 0) {
                printf STDERR ' @ %.1f MiB/sec', 
                              ($stats->{total_bytes} / $MiB)/$stats->{elapsed_secs};
            } else {
                print STDERR " in less than one second";
            }
            print STDERR ") using $num_threads threads\n";
        }
    } 
} else {
    warn "> This command does not use downloaded files\n" unless $silent;
}

if ($download_only) { 
    warn "Exiting (download-only).\n" unless $silent; 
    exit 0; 
}

exit 0 if $dryrun;

warn "> Running apt-get ",($debug ? "@ARGV":"..."),"\n" unless $quiet;
exec "apt-get", @ARGV;


sub filecheck_callback($$$) {
    my ($fspec, $temp_path, $final_path) = @_;

    # Check the file size
    my $actual_size = (-s $temp_path);
    if ($actual_size != $fspec->{ExpectedSize}) {
        warn "File-size mis-match on $temp_path\n",
             "   Expected $fspec->{ExpectedSize} bytes, got $actual_size bytes\n";
        return 0;
    }

    # Check the hash.  The Csum prefix is normally "MD5Sum:" 
    # but could specify another algorithm.
    my ($hashname, $expected_digest) = ($fspec->{Csum} =~ /(\w+)Sum:(.*)/);
    my $hasher = Digest->new($hashname); # e.g "MD5"
    open(my $fhandle, "<$temp_path") or die "$temp_path : $!";
    binmode $fhandle;
    $hasher->addfile($fhandle);
    my $actual_digest = $hasher->hexdigest();
    if ($actual_digest ne $expected_digest) {
        warn "$hashname mis-match on $temp_path\n",
             "   expected: $expected_digest\n",
             "     actual: $actual_digest\n";
        return 0;
    }

    return 1;
}
