diff --git a/config/config.yaml b/config/config.yaml index e8db779..ca79735 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -17,9 +17,17 @@ filtering: # hard filtering as outlined in GATK docs # (https://gatkforums.broadinstitute.org/gatk/discussion/2806/howto-apply-hard-filters-to-a-call-set) snvs: - "QD < 2.0 || FS > 60.0 || MQ < 40.0 || MQRankSum < -12.5 || ReadPosRankSum < -8.0" + QD2: "QD < 2.0" + SOR3: "SOR > 3.0" + FS60: "FS > 60.0" + MQ40: "MQ < 40.0" + MQRS125: "MQRankSum < -12.5" + RPRS8: "ReadPosRankSum < -8.0" indels: - "QD < 2.0 || FS > 200.0 || ReadPosRankSum < -20.0" + QD2: "QD < 2.0" + SOR3: "SOR > 3.0" + FS200: "FS > 200.0" + RPRS20: "ReadPosRankSum < -20.0" processing: remove-duplicates: true diff --git a/workflow/rules/calling.smk b/workflow/rules/calling.smk index f675f6c..67727f5 100644 --- a/workflow/rules/calling.smk +++ b/workflow/rules/calling.smk @@ -33,24 +33,25 @@ rule call_variants: "0.59.0/bio/gatk/haplotypecaller" -rule combine_calls: +rule genomics_db_import: input: - ref="resources/genome.fasta", gvcfs=expand( "results/called/{sample}.{{contig}}.g.vcf.gz", sample=samples.index ), output: - gvcf="results/called/all.{contig}.g.vcf.gz", + db=directory("results/db/{contig}"), log: - "logs/gatk/combinegvcfs.{contig}.log", + "logs/gatk/genomicsdbimport.{contig}.log", + params: + intervals="{contig}", wrapper: - "0.74.0/bio/gatk/combinegvcfs" + "0.74.0/bio/gatk/genomicsdbimport" rule genotype_variants: input: ref="resources/genome.fasta", - gvcf="results/called/all.{contig}.g.vcf.gz", + genomicsdb="results/db/{contig}", output: vcf=temp("results/genotyped/all.{contig}.vcf.gz"), params: diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk index c47edc1..a18bfed 100644 --- a/workflow/rules/common.smk +++ b/workflow/rules/common.smk @@ -135,4 +135,4 @@ def get_vartype_arg(wildcards): def get_filter(wildcards): - return {"snv-hard-filter": config["filtering"]["hard"][wildcards.vartype]} + return config["filtering"]["hard"][wildcards.vartype] diff --git a/workflow/rules/mapping.smk b/workflow/rules/mapping.smk index 8298866..c9709cd 100644 --- a/workflow/rules/mapping.smk +++ b/workflow/rules/mapping.smk @@ -33,11 +33,11 @@ rule trim_reads_pe: rule map_reads: input: reads=get_trimmed_reads, - idx=rules.bwa_index.output, + idx=rules.bwa_mem2_index.output, output: temp("results/mapped/{sample}-{unit}.sorted.bam"), log: - "logs/bwa_mem/{sample}-{unit}.log", + "logs/bwa_mem2/{sample}-{unit}.log", params: index=lambda w, input: os.path.splitext(input.idx[0])[0], extra=get_read_group, @@ -45,7 +45,7 @@ rule map_reads: sort_order="coordinate", threads: 8 wrapper: - "0.74.0/bio/bwa/mem" + "0.74.0/bio/bwa-mem2/mem" rule mark_duplicates: diff --git a/workflow/rules/qc.smk b/workflow/rules/qc.smk index 1a77646..1f277f2 100644 --- a/workflow/rules/qc.smk +++ b/workflow/rules/qc.smk @@ -29,7 +29,7 @@ rule multiqc: "results/qc/fastqc/{u.sample}-{u.unit}.zip", "results/qc/dedup/{u.sample}-{u.unit}.metrics.txt", ], - u=units.itertuples(), + u=list(units.itertuples()), ), output: report( diff --git a/workflow/rules/ref.smk b/workflow/rules/ref.smk index 25409f4..ff13823 100644 --- a/workflow/rules/ref.smk +++ b/workflow/rules/ref.smk @@ -85,18 +85,18 @@ rule tabix_known_variants: "0.74.0/bio/tabix" -rule bwa_index: +rule bwa_mem2_index: input: "resources/genome.fasta", output: - multiext("resources/genome.fasta", ".amb", ".ann", ".bwt", ".pac", ".sa"), + multiext("resources/genome.fasta", ".0123", ".amb", ".ann", ".bwt.2bit.64", ".pac"), log: "logs/bwa_index.log", resources: mem_mb=369000, cache: True wrapper: - "0.74.0/bio/bwa/index" + "0.74.0/bio/bwa-mem2/index" rule get_vep_cache: diff --git a/workflow/schemas/config.schema.yaml b/workflow/schemas/config.schema.yaml index 0b356e7..e445bd3 100644 --- a/workflow/schemas/config.schema.yaml +++ b/workflow/schemas/config.schema.yaml @@ -33,9 +33,9 @@ properties: type: object properties: snvs: - type: string + type: object indels: - type: string + type: object required: - snvs - indels