#!/usr/bin/perl

use warnings;
use strict;
use File::Basename;
use Statistics::Descriptive;
use Bio::SeqIO;
use Getopt::Long;

my $format = 'fasta';
my $filename = '';
my ($start, $end, $bin) = qw/10 1000 20/;

GetOptions( 'format=s' => \$format,
			'input=s' => \$filename,
			'start=i' => \$start,
			'end=i' => \$end,
			'bin=i' => \$bin );

if ( $start < 1 ) {
	&usage(1);
} elsif ( $end <= $start ) {
	&usage(2);
} elsif ( $bin > ($end - $start) ) {
	die &usage(3);
}

my $input = Bio::SeqIO->new( -format => $format,
							 -file => $filename );

if ($filename eq '') {
	$input->_fh(\*STDIN);
}

my $stat = Statistics::Descriptive::Full->new();
my (%distrib);

my @bins = ();
for (my $i = $start ; $i <= $end ; $i += $bin ) {
 	push @bins, $i;
}

while ( my $seq = $input->next_seq() ) {
	$stat->add_data($seq->length());
}

%distrib = $stat->frequency_distribution(\@bins);

printf "Total reads:\t%15s\n", &commify($stat->count());
printf "Total nt:\t%15s\n", &commify($stat->sum());
printf "Mean length:\t%17s\n", &commify(sprintf "%.1f", $stat->mean());
printf "Median length:\t%15s\n", &commify($stat->median());
printf "Mode length:\t%15s\n", &commify($stat->mode());
printf "Max length:\t%15s\n", &commify($stat->max());
printf "Min length:\t%15s\n", &commify($stat->min());
printf "%s\t%12s\n", 'Length', '# Seqs';
foreach (sort {$a <=> $b} keys %distrib) {
	my $bin = $_;
	printf "%6s\t%12s\n", &commify($bin), &commify($distrib{$_});
}

sub commify {
	my ($sign, $int, $frac) = ( $_[0] =~ m/^([-+]?)(\d*)(.*)$/ );
	$int  =~ s/(\d{1,3}?)(?=(\d{3})+$)/$1,/g;
	return $sign . $int . $frac;
}

sub usage {
	print "\n";
	
	my $error = shift;
	
	if ($error == 1) {
		print "The start size must be an integer > 0!\n\n";
	} elsif ($error == 2) {
		print "The end size must be an integer larger than the start size!\n\n";
	} elsif ($error == 3) {
		print "The bin bin size must be an integer smaller than the distance between start and end!\n\n";
	}
	
	print <<END;
	  Usage:  %>@{[basename($0)]} -i|--input <filename> [-f|--format <fasta|fastq> -s|--start <n> -e|--end <n> -b|--bin <n>]
	  
END

	exit;
}