| | 1 | = Workflow 1: genome reference file creation = |
| | 2 | [[TOC()]] |
| | 3 | |
| | 4 | This workflow creates reference files per chromosome including: |
| | 5 | * genome, dbsnp and indel vcfs per chromosome |
| | 6 | * realign targets for faster realignment target creation |
| | 7 | * index files for samtools and bwa |
| | 8 | |
| | 9 | Workflow inputs: |
| | 10 | * genome.chr.fa - downloaded from genome supplier (now hg19) |
| | 11 | * dbsnpXYZ.rod - downloaded reference SNPs from dbsnp (now 129) |
| | 12 | * indelsXYZ.vcf - downloaded reference indels from 1KG |
| | 13 | |
| | 14 | Workflow outputs: |
| | 15 | * genome.chr.fa - cleaned headers |
| | 16 | * genome.chr.fa.fa - index for samtools |
| | 17 | * genome.chr.fa.<format> - multilple index files for bwa |
| | 18 | * dbsnpXYZ.chr.rod - split per chromosome |
| | 19 | * indelsXYZ.chr.vcf - split per chromosome |
| | 20 | * genome.chr.realign.intervals - targets for realignment |
| | 21 | |
| | 22 | == clean-fasta-headers == |
| | 23 | Clean headers to only have '1' instead of Chr1, etc |
| | 24 | |
| | 25 | ||tool: || || |
| | 26 | ||inputs: ||genome.chr.fa || |
| | 27 | ||outputs: ||genome.chr.fa || |
| | 28 | ||doc: ||internally developed || |
| | 29 | |
| | 30 | == split-vcf-chr for dbsnp and indels == |
| | 31 | Split vcf per chromosome |
| | 32 | ||tool: || || |
| | 33 | ||inputs: ||dbsnpXYZ.rod, indelsXYZ.vcf || |
| | 34 | ||outputs: ||dbsnpXYz.chr.rod, indelsXYZ.vcf || |
| | 35 | ||doc: || || |
| | 36 | |
| | 37 | Discussion: |
| | 38 | > Can we use http://vcftools.sourceforge.net/options.html ? |
| | 39 | >> vcftools --vcf indelsXYZ.vcf --chr <i> --recode --out indelsXYZ.chr |
| | 40 | |
| | 41 | == index-chromosomes == |
| | 42 | Index reference sequence for each chromosome in the FASTA format |
| | 43 | |
| | 44 | ||tool: ||samtools faidx || |
| | 45 | ||input: ||genome.chr.fa || |
| | 46 | ||output: ||genome.chr.fa.fai || |
| | 47 | ||doc: ||http://samtools.sourceforge.net/samtools.shtml#3 || |
| | 48 | |
| | 49 | == bwa-index-chromosomes == |
| | 50 | Index reference sequence for each chromosome for bwa alignment |
| | 51 | |
| | 52 | ||tool: ||bwa index -a IS || |
| | 53 | ||input: ||genome.chr.fa || |
| | 54 | ||output: ||genome.chr.fa.xyz || |
| | 55 | ||doc: ||http://bio-bwa.sourceforge.net/bwa.shtml#3 || |
| | 56 | |
| | 57 | == !RealignerTargetCreator == |
| | 58 | Generate realignment targets for known sites for each chromosome |
| | 59 | |
| | 60 | ||tool: ||GenomeAnalysisTK.jar -T RealignerTargetCreator || |
| | 61 | ||input: ||genome.chr.fa, dbsnpXYz.chr.rod, indelsXYZ.vcf || |
| | 62 | ||output: ||genome.chr.realign.intervals || |
| | 63 | ||doc: ||http://www.broadinstitute.org/gsa/wiki/index.php/Local_realignment_around_indels#Running_the_Indel_Realigner_only_at_known_sites || |