| | 1 | = Workflow 3: sample level variant calling = |
| | 2 | [[TOC()]] |
| | 3 | |
| | 4 | This workflow will call variants for the samples including: |
| | 5 | * sample level recalibration |
| | 6 | * sample level realignment |
| | 7 | N.B. no sample level MarkDuplicates is needed as lanes = libraries. |
| | 8 | |
| | 9 | Workflow inputs: |
| | 10 | * lane.chr.recal.sorted.bam - for all sample lanes: dedupped, recalibrated, realigned, sorted and indexed bams (3) |
| | 11 | * sample.chip.vcf - genotypes called from genotype chip |
| | 12 | Reference: |
| | 13 | * genome.chr.fasta - reference genome split on chromosome |
| | 14 | * genome.chr.realign.intervals - targets for realignment per chromosome |
| | 15 | * genome.chr.dbsnpXYZ.rod - known snp variants, here from dpbsnp |
| | 16 | * genome.chr.indelsXYZ.vcf - known indels from, here from 1KG |
| | 17 | |
| | 18 | Workflow outputs: |
| | 19 | * sample.chr.bam - merged bam files per sample |
| | 20 | * sample.chr.realign.interval - realignment target intervals |
| | 21 | * sample.chr.realigned.bam - realigned |
| | 22 | * sample.chr.matesfixed.bam - fixed pairs in realignment |
| | 23 | * sample.chr.indels.vcf - raw indels called |
| | 24 | * sample.chr.indels.bed - raw indels annotations |
| | 25 | * sample.chr.indels.txt - output from the indel calling |
| | 26 | * sample.chr.indels.filtered.bed - indels filtered |
| | 27 | * sample.chr.snps.vcf - raw snps called |
| | 28 | * sample.chr.snps.filtered.vcf - snps filtered |
| | 29 | |
| | 30 | == merge-lanes == |
| | 31 | Merge lanes into one sample bam |
| | 32 | |
| | 33 | ||tool: ||sam merge || |
| | 34 | ||inputs: ||lane.chr.recal.sorted.bam || |
| | 35 | ||outputs: ||sample.chr.bam || |
| | 36 | ||docs: ||http://samtools.sourceforge.net/samtools.shtml || |
| | 37 | |
| | 38 | == !RealignerTargetCreator == |
| | 39 | Create realignment targets based on the data (so not only knowns) |
| | 40 | |
| | 41 | ||tool: ||GenomeAnalysisTK.jar -T RealignerTargetCreator || |
| | 42 | ||inputs: ||sample.chr.bam [[BR]]genome.chr.fa [[BR]]dbsnpXYz.chr.rod [[BR]]indelsXYZ.vcf |
| | 43 | ||outputs: ||sample.chr.realign.intervals || |
| | 44 | ||doc: ||http://www.broadinstitute.org/gsa/wiki/index.php/Local_realignment_around_indels#Creating_Intervals || |
| | 45 | |
| | 46 | == !IndelRealigner == |
| | 47 | Realign based on realignment targets in previous step |
| | 48 | |
| | 49 | ||tool: ||GenomeAnalysisTK.jar -T IndelRealigner || |
| | 50 | ||inputs: ||sample.chr.bam [[BR]]genome.chr.realign.intervals [[BR]] genome.chr.dbsnpXYZ.rod [[BR]] genome.chr.indelsXYZ.vcf || |
| | 51 | ||outputs: ||sample.chr.realigned.bam || |
| | 52 | ||doc: ||http://www.broadinstitute.org/gsa/wiki/index.php/Local_realignment_around_indels#Realigning || |
| | 53 | |
| | 54 | == !FixMateInformation == |
| | 55 | See description in workflow2, now applied to sample |
| | 56 | |
| | 57 | ||inputs: ||sample.chr.realigned.bam || |
| | 58 | ||ouputs: ||sample.chr.matesfixed.bam || |
| | 59 | == IndelGenotyperV2 == |
| | 60 | Call indels |
| | 61 | |
| | 62 | ||tool: ||GenomeAnalysisTK.jar -T IndelGenotyperV2 || |
| | 63 | ||inputs: ||sample.chr.matesfixed.bam [[BR]]genome.chr.fa || |
| | 64 | ||outputs: ||sample.chr.indels.vcf [[BR]]sample.chr.indels.bed [[BR]]sample.chr.indels.txt || |
| | 65 | ||doc: ||http://www.broadinstitute.org/gsa/wiki/index.php/Indel_Genotyper_V2.0 [[BR]] |
| | 66 | |
| | 67 | http://www.broadinstitute.org/gsa/wiki/index.php/Firehose_Parameters#SampleIndelGenotyper || |
| | 68 | == filterSingleSampleCalls == |
| | 69 | Filter indels |
| | 70 | |
| | 71 | ||tool: ||filterSingleSampleCalls.pl || |
| | 72 | ||inputs: ||sample.chr.indels.bed || |
| | 73 | ||outputs: ||sample.chr.indels.filtered.bed || |
| | 74 | ||doc: ||http://www.broadinstitute.org/gsa/wiki/index.php/Firehose_Parameters#SampleIndelGenotyper || |
| | 75 | |
| | 76 | == !UnifiedGenotyper == |
| | 77 | Call SNPs |
| | 78 | |
| | 79 | ||tool: ||GenomeAnalysisTK.jar -T UnifiedGenotyper || |
| | 80 | ||inputs: ||sample.chr.matesfixed [[BR]]genome.chr.fa [[BR]]dbsnpXYz.chr.rod || |
| | 81 | ||outputs: ||sample.chr.snps.vcf || |
| | 82 | ||doc: ||http://www.broadinstitute.org/gsa/wiki/index.php/Firehose_Parameters#SetUnifiedGenotypertoEval [[BR]] |
| | 83 | |
| | 84 | http://www.broadinstitute.org/gsa/wiki/index.php/Unified_genotyper || |
| | 85 | == makeIndelMask == |
| | 86 | Make indel mask |
| | 87 | |
| | 88 | ||tool: ||makeIndelMask.py || |
| | 89 | ||inputs: ||sample.chr.indels.bed || |
| | 90 | ||outputs: ||sample.chr.indels.mask.bed || |
| | 91 | ||doc: ||http://www.broadinstitute.org/gsa/wiki/index.php/Indel_Genotyper_V2.0#Creating_a_indel_mask_file || |
| | 92 | |
| | 93 | == !VariantFiltration == |
| | 94 | Filter variants to get the best calls possible |
| | 95 | |
| | 96 | ||tool: ||GenomeAnalysisTK.jar -T VariantFiltration || |
| | 97 | ||inputs: ||sample.chr.snps.vcf [[BR]]genome.chr.fa [[BR]]dbsnpXYz.chr.rod || |
| | 98 | ||outputs: ||sample.chr.snps.filtered.vcf || |
| | 99 | ||doc: ||http://www.broadinstitute.org/gsa/wiki/index.php/Best_Practice_Variant_Detection_with_the_GATK_v2#Integrating_analyses:_getting_the_best_call_set_possible |
| | 100 | |
| | 101 | || |
| | 102 | |
| | 103 | == !MergeVcfs == |
| | 104 | == !ChipVcf == |
| | 105 | Produce vcf for the chips |
| | 106 | |
| | 107 | == !VariantEval == |
| | 108 | Create summary information on the variations called for evaluation. |
| | 109 | Run per sample.snps.filtered.vcf against chip. |
| | 110 | |
| | 111 | ||tool: ||GenomeAnalysisTK.jar -T VariantEval || |
| | 112 | ||inputs: ||sample.snps.vcf [[BR]]sample.chip.vcf [[BR]]genome.chr.fa [[BR]]dbsnpXYz.chr.rod|| |
| | 113 | ||outputs: ||sample.snps.eval || |
| | 114 | ||doc: ||http://www.broadinstitute.org/gsa/wiki/index.php/VariantEval || |
| | 115 | |
| | 116 | |
| | 117 | Discussion: |
| | 118 | > Do we call SNPs based on the filtered indels or the raw indels? |
| | 119 | > Should we realign AGAIN after merge of lanes? |
| | 120 | > BAQ? |
| | 121 | > MINDEL/PINDEL? |