I am new with bedtools..So I have a perl script to compare three file in overlap manner, however my script is not perfectly improved to compare such thing.
I need to compare file 1 to 2, file 1 to 3 and file 2 to 3, I expect a common file for result...
file1:
AT4G01510.1 1 6993 7241
AT1G01020.2 1 7320 8668
file2:
AT5G35640.1 1 6993 7246
AT5G23980.1 1 7320 8669
file3:
AT1G01010.1 1 6993 7267
AT1G01020.1 1 7320 8634
#########################
Script:
#!/usr/bin/perl -w
use strict;
use Getopt::Std;
use vars qw ($opt_s $opt_p $opt_t);
getopts ('s
:t:');
if(! $opt_s || !$opt_p || !$opt_t){
print "Usage: $0\n";
print "-s file1 \n";
print "-p file2 \n";
print "-t file3 \n";
exit;
}
my $tablefile1 = $opt_s;
my $tablefile2 = $opt_p;
my $tablefile3 = $opt_t;
if(!$tablefile1){
die "Invalid data file name $tablefile1.\n";
}
if(!$tablefile2){
die "Invalid data file name $tablefile2.\n";
}
if(!$tablefile3){
die "Invalid data file name $tablefile3.\n";
}
open(IN,$tablefile1) || die "Can't open $tablefile1..exiting.\n";
my %hash = ();
my @coord_arr = ();
my @chrs = ();
while(<IN>){
chomp;
next if (/^\s*$/);
my @cols = split(/\t/, $_);
my $id = $cols[0];
my $chr = $cols[1];
my $s = $cols[2];
my $e = $cols[3];
if (exists($hash{$chr})) {
my $str = $hash{$chr};
$hash{$chr} = "$str/$id:$s:$e";
} else {
$hash{$chr} = "$id:$s:$e";
}
}
close IN;
open(IN,$tablefile2) || die "Can't open $tablefile2..exiting.\n";
while(<IN>){
chomp;
next if (/^\s*$/);
my @cols = split(/\t/, $_);
my $id = $cols[0];
my $chr = $cols[1];
my $s = $cols[2];
my $e = $cols[3];
if (exists($hash{$chr})) {
my $str = $hash{$chr};
$hash{$chr} = "$str/$id:$s:$e";
} else {
$hash{$chr} = "$id:$s:$e";
}
}
close IN;
open(IN,$tablefile3) || die "Can't open $tablefile3..exiting.\n";
my $found = 0;
print "Chr\tPSF-ID\tStart\tEnd\tPseudopipe-ID\tStart\tEnd\tStart-diff\tend-diff\tShiu-ID\tStart\tEnd\tstart-diff\tend-diff\n";
while(<IN>){
chomp;
next if (/^\s*$/);
my @cols = split(/\t/, $_);
my $id = $cols[0];
my $chr = $cols[1];
my $s = $cols[2];
my $e = $cols[3];
my $info_str = $hash{$chr};
next if (!$info_str);
my @pseudos = split(/\//, $info_str);
#my @pseudos2 = split(/\//, $info_str);
for (my $i = 0; $i < @pseudos; $i++) {
my ($id1, $s1, $e1) = split(/:/, $pseudos[$i]);
if (&is_overlap($s, $e, $s1, $e1)) {
print "$chr\t$id\t$s\t$e\t$id1\t$s1\t$e1\t";
my $d1 = $s1-$s;
my $d2 = $e1-$e;
print "$d1\t$d2\t";
}
}
for (my $j = 0; $j < @pseudos; $j++) {
my ($id2, $s2, $e2) = split(/:/, $pseudos[$j]);
if (&is_overlap2($s, $e, $s2, $e2)) {
print "$id\t$s2\t$e2\t";
my $d3 = $s2-$s;
my $d4 = $e2-$e;
print "$d3\t$d4\n";
}
}
}
close IN;
sub is_overlap {
my ($s, $e, $s1, $e1) = @_;
my $found = 0;
if ($s >= $s1 && $s < $e1 ||
$s1 >= $s && $s1 < $e ||
$s1 >= $s && $e1 <= $e ||
$s1 <= $s && $e1 >= $e) {
$found = 1;
}
return $found;
}
sub is_overlap2 {
my ($s, $e, $s2, $e2) = @_;
my $found = 0;
if ($s >= $s2 && $s < $e2 ||
$s2 >= $s && $s2 < $e ||
$s2 >= $s && $e2 <= $e ||
$s2 <= $s && $e2 >= $e) {
$found = 1;
}
return $found;
}
I need to compare file 1 to 2, file 1 to 3 and file 2 to 3, I expect a common file for result...
file1:
AT4G01510.1 1 6993 7241
AT1G01020.2 1 7320 8668
file2:
AT5G35640.1 1 6993 7246
AT5G23980.1 1 7320 8669
file3:
AT1G01010.1 1 6993 7267
AT1G01020.1 1 7320 8634
#########################
Script:
#!/usr/bin/perl -w
use strict;
use Getopt::Std;
use vars qw ($opt_s $opt_p $opt_t);
getopts ('s

if(! $opt_s || !$opt_p || !$opt_t){
print "Usage: $0\n";
print "-s file1 \n";
print "-p file2 \n";
print "-t file3 \n";
exit;
}
my $tablefile1 = $opt_s;
my $tablefile2 = $opt_p;
my $tablefile3 = $opt_t;
if(!$tablefile1){
die "Invalid data file name $tablefile1.\n";
}
if(!$tablefile2){
die "Invalid data file name $tablefile2.\n";
}
if(!$tablefile3){
die "Invalid data file name $tablefile3.\n";
}
open(IN,$tablefile1) || die "Can't open $tablefile1..exiting.\n";
my %hash = ();
my @coord_arr = ();
my @chrs = ();
while(<IN>){
chomp;
next if (/^\s*$/);
my @cols = split(/\t/, $_);
my $id = $cols[0];
my $chr = $cols[1];
my $s = $cols[2];
my $e = $cols[3];
if (exists($hash{$chr})) {
my $str = $hash{$chr};
$hash{$chr} = "$str/$id:$s:$e";
} else {
$hash{$chr} = "$id:$s:$e";
}
}
close IN;
open(IN,$tablefile2) || die "Can't open $tablefile2..exiting.\n";
while(<IN>){
chomp;
next if (/^\s*$/);
my @cols = split(/\t/, $_);
my $id = $cols[0];
my $chr = $cols[1];
my $s = $cols[2];
my $e = $cols[3];
if (exists($hash{$chr})) {
my $str = $hash{$chr};
$hash{$chr} = "$str/$id:$s:$e";
} else {
$hash{$chr} = "$id:$s:$e";
}
}
close IN;
open(IN,$tablefile3) || die "Can't open $tablefile3..exiting.\n";
my $found = 0;
print "Chr\tPSF-ID\tStart\tEnd\tPseudopipe-ID\tStart\tEnd\tStart-diff\tend-diff\tShiu-ID\tStart\tEnd\tstart-diff\tend-diff\n";
while(<IN>){
chomp;
next if (/^\s*$/);
my @cols = split(/\t/, $_);
my $id = $cols[0];
my $chr = $cols[1];
my $s = $cols[2];
my $e = $cols[3];
my $info_str = $hash{$chr};
next if (!$info_str);
my @pseudos = split(/\//, $info_str);
#my @pseudos2 = split(/\//, $info_str);
for (my $i = 0; $i < @pseudos; $i++) {
my ($id1, $s1, $e1) = split(/:/, $pseudos[$i]);
if (&is_overlap($s, $e, $s1, $e1)) {
print "$chr\t$id\t$s\t$e\t$id1\t$s1\t$e1\t";
my $d1 = $s1-$s;
my $d2 = $e1-$e;
print "$d1\t$d2\t";
}
}
for (my $j = 0; $j < @pseudos; $j++) {
my ($id2, $s2, $e2) = split(/:/, $pseudos[$j]);
if (&is_overlap2($s, $e, $s2, $e2)) {
print "$id\t$s2\t$e2\t";
my $d3 = $s2-$s;
my $d4 = $e2-$e;
print "$d3\t$d4\n";
}
}
}
close IN;
sub is_overlap {
my ($s, $e, $s1, $e1) = @_;
my $found = 0;
if ($s >= $s1 && $s < $e1 ||
$s1 >= $s && $s1 < $e ||
$s1 >= $s && $e1 <= $e ||
$s1 <= $s && $e1 >= $e) {
$found = 1;
}
return $found;
}
sub is_overlap2 {
my ($s, $e, $s2, $e2) = @_;
my $found = 0;
if ($s >= $s2 && $s < $e2 ||
$s2 >= $s && $s2 < $e ||
$s2 >= $s && $e2 <= $e ||
$s2 <= $s && $e2 >= $e) {
$found = 1;
}
return $found;
}
Comment