| 1 | = Workflow 3: sample level variant calling = |
| 2 | [[TOC()]] |
| 3 | |
| 4 | This workflow will call variants for the samples including: |
| 5 | * sample level recalibration |
| 6 | * sample level realignment |
| 7 | N.B. no sample level MarkDuplicates is needed as lanes = libraries. |
| 8 | |
| 9 | Workflow inputs: |
| 10 | * lane.chr.recal.sorted.bam - for all sample lanes: dedupped, recalibrated, realigned, sorted and indexed bams (3) |
| 11 | * sample.chip.vcf - genotypes called from genotype chip |
| 12 | Reference: |
| 13 | * genome.chr.fasta - reference genome split on chromosome |
| 14 | * genome.chr.realign.intervals - targets for realignment per chromosome |
| 15 | * genome.chr.dbsnpXYZ.rod - known snp variants, here from dpbsnp |
| 16 | * genome.chr.indelsXYZ.vcf - known indels from, here from 1KG |
| 17 | |
| 18 | Workflow outputs: |
| 19 | * sample.chr.bam - merged bam files per sample |
| 20 | * sample.chr.realign.interval - realignment target intervals |
| 21 | * sample.chr.realigned.bam - realigned |
| 22 | * sample.chr.matesfixed.bam - fixed pairs in realignment |
| 23 | * sample.chr.indels.vcf - raw indels called |
| 24 | * sample.chr.indels.bed - raw indels annotations |
| 25 | * sample.chr.indels.txt - output from the indel calling |
| 26 | * sample.chr.indels.filtered.bed - indels filtered |
| 27 | * sample.chr.snps.vcf - raw snps called |
| 28 | * sample.chr.snps.filtered.vcf - snps filtered |
| 29 | |
| 30 | == merge-lanes == |
| 31 | Merge lanes into one sample bam |
| 32 | |
| 33 | ||tool: ||sam merge || |
| 34 | ||inputs: ||lane.chr.recal.sorted.bam || |
| 35 | ||outputs: ||sample.chr.bam || |
| 36 | ||docs: ||http://samtools.sourceforge.net/samtools.shtml || |
| 37 | |
| 38 | == !RealignerTargetCreator == |
| 39 | Create realignment targets based on the data (so not only knowns) |
| 40 | |
| 41 | ||tool: ||GenomeAnalysisTK.jar -T RealignerTargetCreator || |
| 42 | ||inputs: ||sample.chr.bam [[BR]]genome.chr.fa [[BR]]dbsnpXYz.chr.rod [[BR]]indelsXYZ.vcf |
| 43 | ||outputs: ||sample.chr.realign.intervals || |
| 44 | ||doc: ||http://www.broadinstitute.org/gsa/wiki/index.php/Local_realignment_around_indels#Creating_Intervals || |
| 45 | |
| 46 | == !IndelRealigner == |
| 47 | Realign based on realignment targets in previous step |
| 48 | |
| 49 | ||tool: ||GenomeAnalysisTK.jar -T IndelRealigner || |
| 50 | ||inputs: ||sample.chr.bam [[BR]]genome.chr.realign.intervals [[BR]] genome.chr.dbsnpXYZ.rod [[BR]] genome.chr.indelsXYZ.vcf || |
| 51 | ||outputs: ||sample.chr.realigned.bam || |
| 52 | ||doc: ||http://www.broadinstitute.org/gsa/wiki/index.php/Local_realignment_around_indels#Realigning || |
| 53 | |
| 54 | == !FixMateInformation == |
| 55 | See description in workflow2, now applied to sample |
| 56 | |
| 57 | ||inputs: ||sample.chr.realigned.bam || |
| 58 | ||ouputs: ||sample.chr.matesfixed.bam || |
| 59 | == IndelGenotyperV2 == |
| 60 | Call indels |
| 61 | |
| 62 | ||tool: ||GenomeAnalysisTK.jar -T IndelGenotyperV2 || |
| 63 | ||inputs: ||sample.chr.matesfixed.bam [[BR]]genome.chr.fa || |
| 64 | ||outputs: ||sample.chr.indels.vcf [[BR]]sample.chr.indels.bed [[BR]]sample.chr.indels.txt || |
| 65 | ||doc: ||http://www.broadinstitute.org/gsa/wiki/index.php/Indel_Genotyper_V2.0 [[BR]] |
| 66 | |
| 67 | http://www.broadinstitute.org/gsa/wiki/index.php/Firehose_Parameters#SampleIndelGenotyper || |
| 68 | == filterSingleSampleCalls == |
| 69 | Filter indels |
| 70 | |
| 71 | ||tool: ||filterSingleSampleCalls.pl || |
| 72 | ||inputs: ||sample.chr.indels.bed || |
| 73 | ||outputs: ||sample.chr.indels.filtered.bed || |
| 74 | ||doc: ||http://www.broadinstitute.org/gsa/wiki/index.php/Firehose_Parameters#SampleIndelGenotyper || |
| 75 | |
| 76 | == !UnifiedGenotyper == |
| 77 | Call SNPs |
| 78 | |
| 79 | ||tool: ||GenomeAnalysisTK.jar -T UnifiedGenotyper || |
| 80 | ||inputs: ||sample.chr.matesfixed [[BR]]genome.chr.fa [[BR]]dbsnpXYz.chr.rod || |
| 81 | ||outputs: ||sample.chr.snps.vcf || |
| 82 | ||doc: ||http://www.broadinstitute.org/gsa/wiki/index.php/Firehose_Parameters#SetUnifiedGenotypertoEval [[BR]] |
| 83 | |
| 84 | http://www.broadinstitute.org/gsa/wiki/index.php/Unified_genotyper || |
| 85 | == makeIndelMask == |
| 86 | Make indel mask |
| 87 | |
| 88 | ||tool: ||makeIndelMask.py || |
| 89 | ||inputs: ||sample.chr.indels.bed || |
| 90 | ||outputs: ||sample.chr.indels.mask.bed || |
| 91 | ||doc: ||http://www.broadinstitute.org/gsa/wiki/index.php/Indel_Genotyper_V2.0#Creating_a_indel_mask_file || |
| 92 | |
| 93 | == !VariantFiltration == |
| 94 | Filter variants to get the best calls possible |
| 95 | |
| 96 | ||tool: ||GenomeAnalysisTK.jar -T VariantFiltration || |
| 97 | ||inputs: ||sample.chr.snps.vcf [[BR]]genome.chr.fa [[BR]]dbsnpXYz.chr.rod || |
| 98 | ||outputs: ||sample.chr.snps.filtered.vcf || |
| 99 | ||doc: ||http://www.broadinstitute.org/gsa/wiki/index.php/Best_Practice_Variant_Detection_with_the_GATK_v2#Integrating_analyses:_getting_the_best_call_set_possible |
| 100 | |
| 101 | || |
| 102 | |
| 103 | == !MergeVcfs == |
| 104 | == !ChipVcf == |
| 105 | Produce vcf for the chips |
| 106 | |
| 107 | == !VariantEval == |
| 108 | Create summary information on the variations called for evaluation. |
| 109 | Run per sample.snps.filtered.vcf against chip. |
| 110 | |
| 111 | ||tool: ||GenomeAnalysisTK.jar -T VariantEval || |
| 112 | ||inputs: ||sample.snps.vcf [[BR]]sample.chip.vcf [[BR]]genome.chr.fa [[BR]]dbsnpXYz.chr.rod|| |
| 113 | ||outputs: ||sample.snps.eval || |
| 114 | ||doc: ||http://www.broadinstitute.org/gsa/wiki/index.php/VariantEval || |
| 115 | |
| 116 | |
| 117 | Discussion: |
| 118 | > Do we call SNPs based on the filtered indels or the raw indels? |
| 119 | > Should we realign AGAIN after merge of lanes? |
| 120 | > BAQ? |
| 121 | > MINDEL/PINDEL? |