Originally posted by maubp
View Post
1Fasta to tab
perl -e ' $count=0; $len=0; while(<>) { s/\r?\n//; s/\t/ /g; if (s/^>//) { if ($. != 1) { print "\n" } s/ |$/\t/; $count++; $_ .= "\t"; } else { s/ //g; $len += length($_) } print $_; } print "\n"; warn "\nConverted $count FASTA records in $. lines to tabular format\nTotal sequence length: $len\n\n"; ' Corrida_16_01RMDSPFR004_2_F3.csfasta > Trepo_2.tab
2Choose lines not containing "."
perl -e ' $string=q{.}; $count=0; while(<>) { if (!/\Q$string\E/) { print $_; $count++ } } warn "\nChose $count lines with string [$string] out of $. total lines.\n"; ' Trepo_2.tab > TrepoNoDot_2.tab
3Choose col 0
perl -e ' @cols=(0); while(<>) { s/\r?\n//; @F=split /\t/, $_; print join("\t", @F[@cols]), "\n" } warn "\nChose columns ", join(", ", @cols), " for $. lines\n\n" ' TrepoNoDot_2.tab > Trepo2ID.lst
4 tab to csfasta
perl -e ' $len=0; while(<>) { s/\r?\n//; @F=split /\t/, $_; print ">$F[0]"; if (length($F[1])) { print " $F[1]" } print "\n"; $s=$F[2]; $len+= length($s); $s=~s/.{60}(?=.)/$&\n/g; print "$s\n"; } warn "\nConverted $. tab-delimited lines to FASTA format\nTotal sequence length: $len\n\n"; ' TrepoNoDot_2.tab > TrepoNoDot_2_F3.csfasta
Qual file
5 Extract the corresponding .qual values from a list of IDs
perl -e ' ($id,$fasta)=@ARGV; open(ID,$id); while (<ID>) { s/\r?\n//; /^>?(\S+)/; $ids{$1}++; } $num_ids = keys %ids; open(F, $fasta); $s_read = $s_wrote = $print_it = 0; while (<F>) { if (/^>(\S+)/) { $s_read++; if ($ids{$1}) { $s_wrote++; $print_it = 1; delete $ids{$1} } else { $print_it = 0 } }; if ($print_it) { print $_ } }; END { warn "Searched $s_read FASTA records.\nFound $s_wrote IDs out of $num_ids in the ID list.\n" } ' Trepo2ID.lst Corrida_16_01RMDSPFR004_2_F3_QV.qual > TrepoNoDot_2_F3_QV.qual
Comment