Hi all, I've just started experimenting with using GATK-Queue to run through a GATK pipeline with some scatter/gather parallelism. I'm none too familiar with Java/Scala, but I think the script I've written should work fine.
When I run this script with Queue, it seems to work fine (I've only run it on tiny test BAMs so far), but the jobs don't run in parallel. For all the tasks where I've specified a scatterCount, the task is correctly split into four jobs, but those jobs are then executed one after the other rather than in parallel.
Has anyone encountered anything like this in GATK-Queue before? Is there some command-line switch or line of code I need to input into the script to make it run jobs in parallel? I've looked through the docs and the -help and everything seemed to suggest parallelization should just work out of the box.
EDIT: Really should have looked at the outputs more closely before I posted this - it seems only some of the sections aren't running in parallel. I'll fiddle with the script and report back if I can figure out what's going on.
Command line:
Script:
When I run this script with Queue, it seems to work fine (I've only run it on tiny test BAMs so far), but the jobs don't run in parallel. For all the tasks where I've specified a scatterCount, the task is correctly split into four jobs, but those jobs are then executed one after the other rather than in parallel.
Has anyone encountered anything like this in GATK-Queue before? Is there some command-line switch or line of code I need to input into the script to make it run jobs in parallel? I've looked through the docs and the -help and everything seemed to suggest parallelization should just work out of the box.
EDIT: Really should have looked at the outputs more closely before I posted this - it seems only some of the sections aren't running in parallel. I'll fiddle with the script and report back if I can figure out what's going on.
Command line:
Code:
java -Djava.io.tmpdir=/scratch/queuetmp -jar /resources/Sting/dist/Queue.jar -S /apps/pipeline/script.queue -I /scratch/tmp/testsample.dedup.bam -G /resources/genome/human_g1k_v37.fa -run
Code:
package org.broadinstitute.sting.queue.qscripts.examples import org.broadinstitute.sting.queue.QScript import org.broadinstitute.sting.queue.extensions.gatk._ class MyPipeline extends QScript { qscript => @Input(doc="Reference genome.", shortName="G") var referenceFile: File = _ @Input(doc="Bam file to process.", shortName="I") var bamFile: File = _ trait MemoryLimitAndReference extends CommandLineGATK { this.reference_sequence = qscript.referenceFile this.memoryLimit = 2 } class SamtoolsIndex extends CommandLineFunction { @Input(doc="input file") var inputFile: File = _ def commandLine = "samtools index " + inputFile } def script() { val realignertargetcreator = new RealignerTargetCreator with MemoryLimitAndReference val indelrealigner = new IndelRealigner with MemoryLimitAndReference val countcovariates = new CountCovariates with MemoryLimitAndReference val countpostrecalcovariates = new CountCovariates with MemoryLimitAndReference val tablerecalibration = new TableRecalibration with MemoryLimitAndReference val samtoolsindex = new SamtoolsIndex realignertargetcreator.input_file :+= qscript.bamFile realignertargetcreator.scatterCount = 4 realignertargetcreator.known = List("/resources/rods/1000G_biallelic.indels.b37.vcf") realignertargetcreator.out = swapExt(qscript.bamFile, "bam", "realignment.intervals") indelrealigner.input_file :+= qscript.bamFile indelrealigner.targetIntervals = realignertargetcreator.out indelrealigner.scatterCount = 4 indelrealigner.known = realignertargetcreator.known indelrealigner.consensusDeterminationModel = org.broadinstitute.sting.gatk.walkers.indels.IndelRealigner.ConsensusDeterminationModel.USE_SW indelrealigner.out = swapExt(qscript.bamFile, "bam", "realigned.bam") samtoolsindex.inputFile = indelrealigner.out countcovariates.scatterCount = 4 countcovariates.input_file :+= indelrealigner.out countcovariates.recal_file = swapExt(indelrealigner.out, "bam", "covariatecount.csv") countcovariates.standard_covs = true countcovariates.knownSites = List("/resources/rods/dbsnp_132.b37.vcf") tablerecalibration.scatterCount = 4 tablerecalibration.input_file :+= indelrealigner.out tablerecalibration.recal_file = countcovariates.recal_file tablerecalibration.out = swapExt(indelrealigner.out, "bam", "recal.bam") countpostrecalcovariates.scatterCount = 4 countpostrecalcovariates.input_file :+= tablerecalibration.out countpostrecalcovariates.recal_file = swapExt(indelrealigner.out, "bam", "postrecalcounts.csv") countpostrecalcovariates.standard_covs = true countpostrecalcovariates.knownSites = List("/resources/rods/dbsnp_132.b37.vcf") add(realignertargetcreator,indelrealigner, samtoolsindex, countcovariates, tablerecalibration, countpostrecalcovariates) } }
Comment