78 | | |
79 | | == Workflow 1: genome reference file creation == |
80 | | |
81 | | This workflow creates reference files per chromosome including: |
82 | | * genome, dbsnp and indel vcfs per chromosome |
83 | | * realign targets for faster realignment target creation |
84 | | * index files for samtools and bwa |
85 | | |
86 | | Workflow inputs: |
87 | | * genome.chr.fa - downloaded from genome supplier (now hg19) |
88 | | * dbsnpXYZ.rod - downloaded reference SNPs from dbsnp (now 129) |
89 | | * indelsXYZ.vcf - downloaded reference indels from 1KG |
90 | | |
91 | | Workflow outputs: |
92 | | * genome.chr.fa - cleaned headers |
93 | | * genome.chr.fa.fa - index for samtools |
94 | | * genome.chr.fa.<format> - multilple index files for bwa |
95 | | * dbsnpXYZ.chr.rod - split per chromosome |
96 | | * indelsXYZ.chr.vcf - split per chromosome |
97 | | * genome.chr.realign.intervals - targets for realignment |
98 | | |
99 | | === clean-fasta-headers === |
100 | | Clean headers to only have '1' instead of Chr1, etc |
101 | | |
102 | | ||tool: || || |
103 | | ||inputs: ||genome.chr.fa || |
104 | | ||outputs: ||genome.chr.fa || |
105 | | ||doc: ||internally developed || |
106 | | |
107 | | === split-vcf-chr for dbsnp and indels === |
108 | | Split vcf per chromosome |
109 | | ||tool: || || |
110 | | ||inputs: ||dbsnpXYZ.rod, indelsXYZ.vcf || |
111 | | ||outputs: ||dbsnpXYz.chr.rod, indelsXYZ.vcf || |
112 | | ||doc: || || |
113 | | |
114 | | Discussion: |
115 | | > Can we use http://vcftools.sourceforge.net/options.html ? |
116 | | >> vcftools --vcf indelsXYZ.vcf --chr <i> --recode --out indelsXYZ.chr |
117 | | |
118 | | === index-chromosomes === |
119 | | Index reference sequence for each chromosome in the FASTA format |
120 | | |
121 | | ||tool: ||samtools faidx || |
122 | | ||input: ||genome.chr.fa || |
123 | | ||output: ||genome.chr.fa.fai || |
124 | | ||doc: ||http://samtools.sourceforge.net/samtools.shtml#3 || |
125 | | |
126 | | === bwa-index-chromosomes === |
127 | | Index reference sequence for each chromosome for bwa alignment |
128 | | |
129 | | ||tool: ||bwa index -a IS || |
130 | | ||input: ||genome.chr.fa || |
131 | | ||output: ||genome.chr.fa.xyz || |
132 | | ||doc: ||http://bio-bwa.sourceforge.net/bwa.shtml#3 || |
133 | | |
134 | | === !RealignerTargetCreator === |
135 | | Generate realignment targets for known sites for each chromosome |
136 | | |
137 | | ||tool: ||GenomeAnalysisTK.jar -T RealignerTargetCreator || |
138 | | ||input: ||genome.chr.fa, dbsnpXYz.chr.rod, indelsXYZ.vcf || |
139 | | ||output: ||genome.chr.realign.intervals || |
140 | | ||doc: ||http://www.broadinstitute.org/gsa/wiki/index.php/Local_realignment_around_indels#Running_the_Indel_Realigner_only_at_known_sites || |
141 | | |
142 | | |