Skip to content

Commit 3ff2ed0

Browse files
committed
Restrict columns for final variant table to those that make variants unique
1 parent f2098ba commit 3ff2ed0

File tree

3 files changed

+17
-7
lines changed

3 files changed

+17
-7
lines changed

config/config.yaml

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,15 +27,15 @@ ANNOTATION:
2727
POS: POS
2828
REF: REF
2929
ALT: ALT
30-
EFFECT: "ANN[*].EFFECT"
31-
IMPACT: "ANN[*].IMPACT"
30+
EFFECT: "ANN[*].EFFECT" # hard-coded column
31+
IMPACT: "ANN[*].IMPACT" # hard-coded column
3232
BIOTYPE: "ANN[*].BIOTYPE"
33-
GENE: "ANN[*].GENE"
33+
GENE: "ANN[*].GENE" # hard-coded column
3434
GENEID: "ANN[*].GENEID"
3535
FEATURE: "ANN[*].FEATURE"
3636
FEATUREID: "ANN[*].FEATUREID"
37-
HGVS_P: "ANN[*].HGVS_P"
38-
HGVS_C: "ANN[*].HGVS_C"
37+
HGVS_P: "ANN[*].HGVS_P" # hard-coded column
38+
HGVS_C: "ANN[*].HGVS_C" # hard-coded column
3939
ERRORS: "ANN[*].ERRORS" # hard-coded column
4040
FILTER_INCLUDE:
4141
# IMPACT: [HIGH, MODERATE, LOW]
@@ -51,6 +51,16 @@ ANNOTATION:
5151
- WARNING_TRANSCRIPT_NO_START_CODON
5252
- WARNING_TRANSCRIPT_NO_STOP_CODON
5353
VARIANT_NAME_PATTERN: "{GENE}:{coalesce(HGVS_P, HGVS_C)}" # dplyr's coalesce finds the first non-missing element
54+
SELECT_COLS:
55+
- CHROM
56+
- POS
57+
- REF
58+
- ALT
59+
- EFFECT
60+
- IMPACT
61+
- GENE
62+
- HGVS_P
63+
- HGVS_C
5464
DEMIX:
5565
PATHOGEN: "SARS-CoV-2"
5666
MIN_QUALITY: 20

workflow/rules/vaf.smk

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -198,7 +198,7 @@ rule merge_annotation:
198198
conda: "../envs/renv.yaml"
199199
params:
200200
ref_name = config["ALIGNMENT_REFERENCE"],
201-
snpeff_columns = config["ANNOTATION"]["SNPEFF_COLS"].keys(),
201+
select_columns = config["ANNOTATION"]["SELECT_COLS"],
202202
input:
203203
tsv = OUTDIR/"vaf"/"{sample}.masked.prefiltered.tsv",
204204
annot = OUTDIR/"vaf"/"{sample}.vcf_fields.longer.tsv",

workflow/scripts/merge_annotation.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ log_info("Reading annotation table")
3939
annotation <- read_tsv(
4040
snakemake@input$annot,
4141
col_select = c(
42-
unlist(snakemake@params$snpeff_columns),
42+
unlist(snakemake@params$select_cols),
4343
"VARIANT_NAME"
4444
),
4545
col_types = list(

0 commit comments

Comments
 (0)