Hi!
I'm trying to use the pipeline tophat2 -> HTSEq -> DEXSeq, but I'm stuck on the DEXSEq workflow!
Here is what I did:
1) I ran TopHat2 using a junc file:
2) I converted bam to the sam file:
3) Then I produced the count table using htseq-count
4)Now I'm trying to use the DEXSeq package to test differential use of exons
But then I get an error: Erro em x[[i]] : índice fora de limites
If I try the last command without the GFF file, the object ecs is created without problems, what makes me think that the problem is with my GFF file, the same I used with the HTSeq-count.
Here is a piece of my gff file:
Any sugestion??
Thanks!
I'm trying to use the pipeline tophat2 -> HTSEq -> DEXSeq, but I'm stuck on the DEXSEq workflow!
Here is what I did:
1) I ran TopHat2 using a junc file:
tophat2 -o tophat_out -i 10 -I 30000 -p 8 --library-type fr-unstranded -j combined.juncs -G my.gff --transcriptomeindex=transcriptome_data chr.fa myreads_1.fastq myreads_2.fastq
samtools sort -n accepted_hits.bam accepted_hits.nsorted
samtools view accepted_hits.nsorted.bam > ./accepted_hits.nsorted.sam
samtools view accepted_hits.nsorted.bam > ./accepted_hits.nsorted.sam
htseq-count -i ID accepted_hits.nsorted.sam my.gff -q --stranded=yes > count_HTSeq_exons.txt
#My code in R
library("DEXSeq")
samples = data.frame(
condition = factor(c("sl","all","all","all","all")),
replicate = factor(c(1:1,1:4)),
type = c("single-read", "paired-end", "paired-end", "paired-end", "paired-end"),
row.names = factor(c("cercaria_SL_Trapping_count_HTSeq_exons","Todos_cercaria_ERR022872_count_HTSeq_exons", "Todos_cercaria_ERR022875_count_HTSeq_exons", "Todos_cercaria_ERR022877_count_HTSeq_exons", "Todos_cercaria_ERR022878_count_HTSeq_exons")),
stringsAsFactors = TRUE,
check.names = FALSE
)
annotationfile = file.path("./GFF/v5.07.08.12.chado.HTSeq.gff")
ecs = read.HTSeqCounts(countfiles = paste(rownames(samples), "txt", sep=".") , design = samples, flattenedfile = annotationfile)
sampleNames(ecs) = rownames(samples)
library("DEXSeq")
samples = data.frame(
condition = factor(c("sl","all","all","all","all")),
replicate = factor(c(1:1,1:4)),
type = c("single-read", "paired-end", "paired-end", "paired-end", "paired-end"),
row.names = factor(c("cercaria_SL_Trapping_count_HTSeq_exons","Todos_cercaria_ERR022872_count_HTSeq_exons", "Todos_cercaria_ERR022875_count_HTSeq_exons", "Todos_cercaria_ERR022877_count_HTSeq_exons", "Todos_cercaria_ERR022878_count_HTSeq_exons")),
stringsAsFactors = TRUE,
check.names = FALSE
)
annotationfile = file.path("./GFF/v5.07.08.12.chado.HTSeq.gff")
ecs = read.HTSeqCounts(countfiles = paste(rownames(samples), "txt", sep=".") , design = samples, flattenedfile = annotationfile)
sampleNames(ecs) = rownames(samples)
If I try the last command without the GFF file, the object ecs is created without problems, what makes me think that the problem is with my GFF file, the same I used with the HTSeq-count.
Here is a piece of my gff file:
##gff-version 3
##sequence-region Schisto_mansoni.Chr_1 1 65476681
Schisto_mansoni.Chr_1 chado contig 1 19825 . + . ID=contig_14209;Name=null;
Schisto_mansoni.Chr_1 chado gene 11159 12750 . + . ID=Smp_186980;
Schisto_mansoni.Chr_1 chado exon 11159 11220 . + 0 ID=Smp_186980.1:exon:1;Parent=Smp_186980.1;
Schisto_mansoni.Chr_1 chado exon 12411 12750 . + 0 ID=Smp_186980.1:exon:2;Parent=Smp_186980.1;
Schisto_mansoni.Chr_1 chado mRNA 11159 12750 . + . ID=Smp_186980.1;Parent=Smp_186980;
Schisto_mansoni.Chr_1 chado polypeptide 11159 12750 . + . ID=Smp_186980.1ep;Derives_from=Smp_186980.1;timelastmodified=14.10.2011+12:49:04+BST;feature_id=23195294;
Schisto_mansoni.Chr_1 chado gene 16927 17315 . + . ID=Smp_197050;
Schisto_mansoni.Chr_1 chado exon 16927 17082 . + 0 ID=Smp_197050.1:exon:1;Parent=Smp_197050.1;
Schisto_mansoni.Chr_1 chado exon 17286 17315 . + 0 ID=Smp_197050.1:exon:2;Parent=Smp_197050.1;
Schisto_mansoni.Chr_1 chado polypeptide 16927 17315 . + . ID=Smp_197050.1ep;Derives_from=Smp_197050.1;colour=2;timelastmodified=14.10.2011+12:50:09+BST;feature_id=23195325;
Schisto_mansoni.Chr_1 chado mRNA 16927 17315 . + . ID=Smp_197050.1;Parent=Smp_197050;
Schisto_mansoni.Chr_1 chado gap 19826 20025 . + . ID=Schisto_mansoni.Chr_1.embl:gap:19825-20025;
Schisto_mansoni.Chr_1 chado contig 20026 60973 . + . ID=contig_14210;Name=null;
Schisto_mansoni.Chr_1 chado gene 51849 114890 . + . ID=Smp_160500;
Schisto_mansoni.Chr_1 chado exon 51849 51861 . + 0 ID=Smp_160500.1:exon:1;Parent=Smp_160500.1;
Schisto_mansoni.Chr_1 chado exon 51901 51987 . + 0 ID=Smp_160500.1:exon:2;Parent=Smp_160500.1;
Schisto_mansoni.Chr_1 chado exon 52024 52416 . + 0 ID=Smp_160500.1:exon:3;Parent=Smp_160500.1;
Schisto_mansoni.Chr_1 chado exon 52448 52679 . + 0 ID=Smp_160500.1:exon:4;Parent=Smp_160500.1;
Schisto_mansoni.Chr_1 chado exon 52715 52802 . + 0 ID=Smp_160500.1:exon:5;Parent=Smp_160500.1;
##sequence-region Schisto_mansoni.Chr_1 1 65476681
Schisto_mansoni.Chr_1 chado contig 1 19825 . + . ID=contig_14209;Name=null;
Schisto_mansoni.Chr_1 chado gene 11159 12750 . + . ID=Smp_186980;
Schisto_mansoni.Chr_1 chado exon 11159 11220 . + 0 ID=Smp_186980.1:exon:1;Parent=Smp_186980.1;
Schisto_mansoni.Chr_1 chado exon 12411 12750 . + 0 ID=Smp_186980.1:exon:2;Parent=Smp_186980.1;
Schisto_mansoni.Chr_1 chado mRNA 11159 12750 . + . ID=Smp_186980.1;Parent=Smp_186980;
Schisto_mansoni.Chr_1 chado polypeptide 11159 12750 . + . ID=Smp_186980.1ep;Derives_from=Smp_186980.1;timelastmodified=14.10.2011+12:49:04+BST;feature_id=23195294;
Schisto_mansoni.Chr_1 chado gene 16927 17315 . + . ID=Smp_197050;
Schisto_mansoni.Chr_1 chado exon 16927 17082 . + 0 ID=Smp_197050.1:exon:1;Parent=Smp_197050.1;
Schisto_mansoni.Chr_1 chado exon 17286 17315 . + 0 ID=Smp_197050.1:exon:2;Parent=Smp_197050.1;
Schisto_mansoni.Chr_1 chado polypeptide 16927 17315 . + . ID=Smp_197050.1ep;Derives_from=Smp_197050.1;colour=2;timelastmodified=14.10.2011+12:50:09+BST;feature_id=23195325;
Schisto_mansoni.Chr_1 chado mRNA 16927 17315 . + . ID=Smp_197050.1;Parent=Smp_197050;
Schisto_mansoni.Chr_1 chado gap 19826 20025 . + . ID=Schisto_mansoni.Chr_1.embl:gap:19825-20025;
Schisto_mansoni.Chr_1 chado contig 20026 60973 . + . ID=contig_14210;Name=null;
Schisto_mansoni.Chr_1 chado gene 51849 114890 . + . ID=Smp_160500;
Schisto_mansoni.Chr_1 chado exon 51849 51861 . + 0 ID=Smp_160500.1:exon:1;Parent=Smp_160500.1;
Schisto_mansoni.Chr_1 chado exon 51901 51987 . + 0 ID=Smp_160500.1:exon:2;Parent=Smp_160500.1;
Schisto_mansoni.Chr_1 chado exon 52024 52416 . + 0 ID=Smp_160500.1:exon:3;Parent=Smp_160500.1;
Schisto_mansoni.Chr_1 chado exon 52448 52679 . + 0 ID=Smp_160500.1:exon:4;Parent=Smp_160500.1;
Schisto_mansoni.Chr_1 chado exon 52715 52802 . + 0 ID=Smp_160500.1:exon:5;Parent=Smp_160500.1;
Any sugestion??
Thanks!