Here you go. You had a mistake in your code, you took the third column as your contig in this line;
my $contig = $s[2];
while the contig was in the second column.
In addition, since you only have a subset of your data, you'll get these warnings. I removed the use warnings line. Run the script as;
perl script.pl Sample_aligned_reads.txt sample_contig.txt
my $contig = $s[2];
while the contig was in the second column.
In addition, since you only have a subset of your data, you'll get these warnings. I removed the use warnings line. Run the script as;
perl script.pl Sample_aligned_reads.txt sample_contig.txt
Code:
#!usr/bin/perl-w
use strict;
open(SAM,$ARGV[0]);
my %hash = ();
while(<SAM>){
chomp;
next if($_ =~ /^@/); ## remove the headers in sam file
#split the line and obtain the read and contig
my ($read,$contig,$sequence) =split;
#split the read on the '|' character, to obtain the weight
my (undef, $weight) = split(/\|/,$read);
#save the total number of reads and clusters in the hash
$hash{$contig}{'clusters'}++;
$hash{$contig}{'total'}+=$weight;
}
close SAM;
open(CTG,$ARGV[1]);
my ($contigSeq,$prevhead) = ("","");
while(<CTG>){
chomp;
$contigSeq.= $_ if(eof(CTG));
if (/\>(\S+)/ || eof(CTG)){
my $head=$1;
if($contigSeq ne ''){
#$contigSeq is the contig sequence, $prevhead is your contig
my $len = length($contigSeq);
#Now print the results
print "$prevhead\t$len\t$hash{$prevhead}{'clusters'}\t$hash{$prevhead}{'total'}\t$contigSeq\n" if(defined $hash{$prevhead});
}
$prevhead = $head;
$contigSeq='';
}else{
$contigSeq .= $_;
}
}
close CTG;
Comment