Hi,
I've used bcf-fix.pl script to fix the GT issue in variation files generated by SAMTOOLS/bcftools, but the problem is that not all chromosomes made it to the final output. Any idea why this happened?
M.
----------
Source file:
awk '{print $1}' tumor.var.flt.vcf | sort | uniq -c
59619 chr1
36310 chr10
12 chr10_random
33764 chr11
17 chr11_random
33917 chr12
18444 chr13
185 chr13_random
17501 chr14
20774 chr15
13 chr15_random
25582 chr16
3 chr16_random
26877 chr17
614 chr17_random
18071 chr18
10 chr18_random
18327 chr19
233 chr19_random
31 chr1_random
58496 chr2
18017 chr20
9552 chr21
597 chr21_random
9919 chr22
2 chr22_h2_hap1
137 chr22_random
1 chr2_random
40400 chr3
2 chr3_random
42160 chr4
253 chr4_random
37900 chr5
8 chr5_h2_hap1
1 chr5_random
32545 chr6
161 chr6_cox_hap1
139 chr6_qbl_hap2
75 chr6_random
37603 chr7
437 chr7_random
32943 chr8
95 chr8_random
24144 chr9
1047 chr9_random
5 chrM
1 #CHROM
13526 chrX
16 chrX_random
5593 chrY
1 ##fileformat=VCF
----------
Output:
awk '{print $1}' 2007.02142B.normal.merged.var.flt.validated.vcf | sort | uniq -c
124207 chr1
52536 chr10_random
61089 chr11
74452 chr11_random
23769 chr12
22764 chr13
27529 chr14
47784 chr16
217 chr18
43 chr18_random
61082 chr19_random
6426 chr1_random
39164 chr2
45923 chr21_random
40915 chr2_random
197 chr4
126 chr5
33712 chr6_cox_hap1
6598 chr6_qbl_hap2
35625 chr7
95 chr7_random
31326 chr8
1166 chr8_random
13913 chr9
9 chr9_random
1 #CHROM
1 ##fileformat=VCFv4.0
1 ##FORMAT=<ID=GL,Number=3,Type=Float,Description="Likelihoods
1 ##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype
1 ##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
1 ##INFO=<ID=AF1,Number=1,Type=Float,Description="EM
1 ##INFO=<ID=AFE,Number=1,Type=Float,Description="Posterior
1 ##INFO=<ID=DP4,Number=4,Type=Integer,Description="Read
1 ##INFO=<ID=HWE,Number=1,Type=Float,Description="P-value
1 ##INFO=<ID=MQ,Number=1,Type=Integer,Descriptin="RMS
1 ##INFO=<ID=PV4,Number=4,Type=Float,Description="P-values
Gladly will share my files if anyone want's to have a closer look.
I've used bcf-fix.pl script to fix the GT issue in variation files generated by SAMTOOLS/bcftools, but the problem is that not all chromosomes made it to the final output. Any idea why this happened?
M.
----------
Source file:
awk '{print $1}' tumor.var.flt.vcf | sort | uniq -c
59619 chr1
36310 chr10
12 chr10_random
33764 chr11
17 chr11_random
33917 chr12
18444 chr13
185 chr13_random
17501 chr14
20774 chr15
13 chr15_random
25582 chr16
3 chr16_random
26877 chr17
614 chr17_random
18071 chr18
10 chr18_random
18327 chr19
233 chr19_random
31 chr1_random
58496 chr2
18017 chr20
9552 chr21
597 chr21_random
9919 chr22
2 chr22_h2_hap1
137 chr22_random
1 chr2_random
40400 chr3
2 chr3_random
42160 chr4
253 chr4_random
37900 chr5
8 chr5_h2_hap1
1 chr5_random
32545 chr6
161 chr6_cox_hap1
139 chr6_qbl_hap2
75 chr6_random
37603 chr7
437 chr7_random
32943 chr8
95 chr8_random
24144 chr9
1047 chr9_random
5 chrM
1 #CHROM
13526 chrX
16 chrX_random
5593 chrY
1 ##fileformat=VCF
----------
Output:
awk '{print $1}' 2007.02142B.normal.merged.var.flt.validated.vcf | sort | uniq -c
124207 chr1
52536 chr10_random
61089 chr11
74452 chr11_random
23769 chr12
22764 chr13
27529 chr14
47784 chr16
217 chr18
43 chr18_random
61082 chr19_random
6426 chr1_random
39164 chr2
45923 chr21_random
40915 chr2_random
197 chr4
126 chr5
33712 chr6_cox_hap1
6598 chr6_qbl_hap2
35625 chr7
95 chr7_random
31326 chr8
1166 chr8_random
13913 chr9
9 chr9_random
1 #CHROM
1 ##fileformat=VCFv4.0
1 ##FORMAT=<ID=GL,Number=3,Type=Float,Description="Likelihoods
1 ##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype
1 ##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
1 ##INFO=<ID=AF1,Number=1,Type=Float,Description="EM
1 ##INFO=<ID=AFE,Number=1,Type=Float,Description="Posterior
1 ##INFO=<ID=DP4,Number=4,Type=Integer,Description="Read
1 ##INFO=<ID=HWE,Number=1,Type=Float,Description="P-value
1 ##INFO=<ID=MQ,Number=1,Type=Integer,Descriptin="RMS
1 ##INFO=<ID=PV4,Number=4,Type=Float,Description="P-values
Gladly will share my files if anyone want's to have a closer look.