Header Leaderboard Ad

Collapse

Bedtools question

Collapse

Announcement

Collapse
No announcement yet.
X
 
  • Filter
  • Time
  • Show
Clear All
new posts

  • Bedtools question

    I am new with bedtools..So I have a perl script to compare three file in overlap manner, however my script is not perfectly improved to compare such thing.
    I need to compare file 1 to 2, file 1 to 3 and file 2 to 3, I expect a common file for result...

    file1:
    AT4G01510.1 1 6993 7241
    AT1G01020.2 1 7320 8668
    file2:
    AT5G35640.1 1 6993 7246
    AT5G23980.1 1 7320 8669
    file3:
    AT1G01010.1 1 6993 7267
    AT1G01020.1 1 7320 8634

    #########################
    Script:

    #!/usr/bin/perl -w
    use strict;
    use Getopt::Std;
    use vars qw ($opt_s $opt_p $opt_t);
    getopts ('s:t:');

    if(! $opt_s || !$opt_p || !$opt_t){
    print "Usage: $0\n";
    print "-s file1 \n";
    print "-p file2 \n";
    print "-t file3 \n";
    exit;
    }

    my $tablefile1 = $opt_s;
    my $tablefile2 = $opt_p;
    my $tablefile3 = $opt_t;



    if(!$tablefile1){
    die "Invalid data file name $tablefile1.\n";
    }
    if(!$tablefile2){
    die "Invalid data file name $tablefile2.\n";
    }
    if(!$tablefile3){
    die "Invalid data file name $tablefile3.\n";
    }

    open(IN,$tablefile1) || die "Can't open $tablefile1..exiting.\n";

    my %hash = ();
    my @coord_arr = ();
    my @chrs = ();
    while(<IN>){
    chomp;
    next if (/^\s*$/);
    my @cols = split(/\t/, $_);

    my $id = $cols[0];

    my $chr = $cols[1];
    my $s = $cols[2];
    my $e = $cols[3];

    if (exists($hash{$chr})) {
    my $str = $hash{$chr};
    $hash{$chr} = "$str/$id:$s:$e";
    } else {
    $hash{$chr} = "$id:$s:$e";
    }

    }
    close IN;

    open(IN,$tablefile2) || die "Can't open $tablefile2..exiting.\n";

    while(<IN>){
    chomp;
    next if (/^\s*$/);
    my @cols = split(/\t/, $_);

    my $id = $cols[0];

    my $chr = $cols[1];
    my $s = $cols[2];
    my $e = $cols[3];

    if (exists($hash{$chr})) {
    my $str = $hash{$chr};
    $hash{$chr} = "$str/$id:$s:$e";
    } else {
    $hash{$chr} = "$id:$s:$e";
    }

    }
    close IN;

    open(IN,$tablefile3) || die "Can't open $tablefile3..exiting.\n";
    my $found = 0;
    print "Chr\tPSF-ID\tStart\tEnd\tPseudopipe-ID\tStart\tEnd\tStart-diff\tend-diff\tShiu-ID\tStart\tEnd\tstart-diff\tend-diff\n";
    while(<IN>){
    chomp;
    next if (/^\s*$/);
    my @cols = split(/\t/, $_);

    my $id = $cols[0];

    my $chr = $cols[1];
    my $s = $cols[2];
    my $e = $cols[3];

    my $info_str = $hash{$chr};
    next if (!$info_str);

    my @pseudos = split(/\//, $info_str);
    #my @pseudos2 = split(/\//, $info_str);

    for (my $i = 0; $i < @pseudos; $i++) {
    my ($id1, $s1, $e1) = split(/:/, $pseudos[$i]);

    if (&is_overlap($s, $e, $s1, $e1)) {
    print "$chr\t$id\t$s\t$e\t$id1\t$s1\t$e1\t";
    my $d1 = $s1-$s;
    my $d2 = $e1-$e;
    print "$d1\t$d2\t";
    }

    }
    for (my $j = 0; $j < @pseudos; $j++) {
    my ($id2, $s2, $e2) = split(/:/, $pseudos[$j]);

    if (&is_overlap2($s, $e, $s2, $e2)) {
    print "$id\t$s2\t$e2\t";
    my $d3 = $s2-$s;
    my $d4 = $e2-$e;
    print "$d3\t$d4\n";
    }
    }

    }

    close IN;

    sub is_overlap {
    my ($s, $e, $s1, $e1) = @_;

    my $found = 0;

    if ($s >= $s1 && $s < $e1 ||
    $s1 >= $s && $s1 < $e ||
    $s1 >= $s && $e1 <= $e ||
    $s1 <= $s && $e1 >= $e) {
    $found = 1;
    }

    return $found;
    }

    sub is_overlap2 {
    my ($s, $e, $s2, $e2) = @_;

    my $found = 0;

    if ($s >= $s2 && $s < $e2 ||
    $s2 >= $s && $s2 < $e ||
    $s2 >= $s && $e2 <= $e ||
    $s2 <= $s && $e2 >= $e) {
    $found = 1;
    }

    return $found;
    }

  • #2
    @sekhwal: Please don't post unrelated questions in threads that have the right words in title.

    I have moved your post to a new thread.

    Look into bedops or bedtools intersect option for your needs.
    Last edited by GenoMax; 09-25-2015, 09:17 AM.

    Comment

    Latest Articles

    Collapse

    ad_right_rmr

    Collapse

    News

    Collapse

    Topics Statistics Last Post
    Started by seqadmin, Yesterday, 09:22 AM
    0 responses
    8 views
    0 likes
    Last Post seqadmin  
    Started by seqadmin, 05-24-2023, 09:49 AM
    0 responses
    9 views
    0 likes
    Last Post seqadmin  
    Started by seqadmin, 05-23-2023, 07:14 AM
    0 responses
    27 views
    0 likes
    Last Post seqadmin  
    Started by seqadmin, 05-18-2023, 11:36 AM
    0 responses
    113 views
    0 likes
    Last Post seqadmin  
    Working...
    X