I was using the following script to launch bowtie. I think it may be useful to others and so post it here. This wrapper script uses bowtie internally, but does a little bit more things which are at least convenient to me:
1) Take gzip'ed fastq files as input. (Most fastq files are compressed)
2) For paired-end reads, align singletons in bowtie's single-end mode. (This saves a bowtie command line typed by yourself)
3) Fix a minor problem in SAM out. (Names of two ends in a pair should be identical; otherwise duplicate removal may have trouble)
4) More accurate (albeit slower) command line option: [--best --strata] -m 1. (Recommended by a users who uses bowtie a lot)
Please reply to this thread if you think the command line option is not appropriate, or the script is buggy/can be improved. Thanks.
1) Take gzip'ed fastq files as input. (Most fastq files are compressed)
2) For paired-end reads, align singletons in bowtie's single-end mode. (This saves a bowtie command line typed by yourself)
3) Fix a minor problem in SAM out. (Names of two ends in a pair should be identical; otherwise duplicate removal may have trouble)
4) More accurate (albeit slower) command line option: [--best --strata] -m 1. (Recommended by a users who uses bowtie a lot)
Please reply to this thread if you think the command line option is not appropriate, or the script is buggy/can be improved. Thanks.
Code:
#!/usr/bin/perl -w
# Contact: lh3
# Last modified: 05NOV2009
use strict;
use warnings;
use Cwd qw/getcwd abs_path/;
use File::Temp qw/tempfile/;
die("Usage: run_bowtie.pl <bowtieIndex> <read1.fq> [read2.fq] [options] | gzip > aln.sam.gz\n") if (@ARGV < 2 || $ARGV[0] =~ /^-/ || $ARGV[1] =~ /^-/);
my $idx = shift(@ARGV);
my $rd1 = (@ARGV && $ARGV[0] !~ /^-/)? shift(@ARGV) : '-';
my $rd2 = (@ARGV && $ARGV[0] !~ /^-/)? shift(@ARGV) : '';
# set bowtie cmd options
my $opt0 = join(" ", @ARGV);
$opt0 =~ s/--maxins/-X/;
$opt0 =~ s/--minins/-I/;
$opt0 =~ s/--un \S+\b//;
# locate bowtie
my $bowtie = &gwhich('bowtie');
die("[run_bowtie] fail to find the bowtie executable.\n") unless ($bowtie);
my ($fh1, $fh2);
open($fh1, ($rd1 =~ /\.gz$/)? "gzip -dc $rd1 |" : $rd1) || die;
if ($rd2) {
open($fh2, ($rd2 =~ /\.gz$/)? "gzip -dc $rd2 |" : $rd2) || die;
}
if ($fh2) { # paired-end mode
my ($fh, $fn_ump, @col);
if ($opt0 !~ /-X/) {
warn("[run_bowtie] the maximum insert size is set as the default value (250).\n");
}
(undef, $fn_ump) = tempfile("./rb-$$-XXXXXX");
open($fh, qq/| $bowtie $opt0 -S -m 1 $idx --12 -/
. q/ | awk '{if($3!="*"||$12!="XM:i:0")print;else print $1"\t"$10"\t"$11>/ . qq/"$fn_ump"}'/ . q{ | perl -pe 's/^(\S+)\/[12]\t/$1\t/'}) || die;
while (<$fh1>) {
$col[0] = $1 if (/^@(\S+)/);
$col[1] = <$fh1>; chomp($col[1]); <$fh1>;
$col[2] = <$fh1>; chomp($col[2]);
<$fh2>;
$col[3] = <$fh2>; chomp($col[3]); <$fh2>;
$col[4] = <$fh2>; chomp($col[4]);
print $fh join("\t", @col), "\n";
}
close($fh); close($fh1); close($fh2);
# map singletons
my $opt_se = $opt0;
$opt_se =~ s/-X\s*\d+//;
$opt_se =~ s/-I\s*\d+//;
system(qq/$bowtie $opt_se -S -m 1 --best --strata --sam-nohead $idx --12 $fn_ump | awk '\$3!="*"||\$12!="XM:i:0"'/);
unlink($fn_ump);
} else { # single-end mode
my ($fh, @col);
open($fh, qq/| $bowtie $opt0 -S -m 1 --best --strata $idx --12 - | awk '\$3!="*"||\$12!="XM:i:0"'/) || die;
while (<$fh1>) {
$col[0] = $1 if (/^@(\S+)/);
$col[1] = <$fh1>; chomp($col[1]); <$fh1>;
$col[2] = <$fh1>; chomp($col[2]);
print $fh join("\t", @col), "\n";
}
close($fh); close($fh1);
}
# routines to locate an executable
sub dirname {
my $prog = shift;
my $cwd = getcwd;
return $cwd if ($prog !~ /\//);
$prog =~ s/\/[^\s\/]+$//g;
return $prog;
}
sub which {
my $file = shift;
my $path = (@_)? shift : $ENV{PATH};
return if (!defined($path));
foreach my $x (split(":", $path)) {
$x =~ s/\/$//;
return "$x/$file" if (-x "$x/$file" && -f "$x/$file");
}
return;
}
sub gwhich {
my $progname = shift;
my $dirname = &dirname($0);
my $tmp;
chomp($dirname);
if (-x $progname && -f $progname) {
return abs_path($progname);
} elsif (defined($dirname) && (-x "$dirname/$progname" && -f "$dirname/$progname")) {
return abs_path("$dirname/$progname");
} elsif (($tmp = &which($progname))) { # on the $PATH
return $tmp;
} else {
warn("[gwhich] fail to find executable $progname.\n");
return;
}
}
Comment