@@ -7,22 +7,45 @@ sink(log, type = "output")
77
88library(tidyverse )
99
10+ # Replace "" values with NA in R filter list
11+ # Snakemake passes filters like: list(ERRORS = c(""))
12+ empty.to.na <- function (x ) {
13+ x [x == " " ] <- NA
14+ x
15+ }
16+ filter.include <- lapply(snakemake @ params $ filter_include , empty.to.na )
17+ filter.exclude <- lapply(snakemake @ params $ filter_exclude , empty.to.na )
18+
19+ # Process input table
1020read_tsv(snakemake @ input $ tsv ) %> %
21+ # Separate <sep>-delimited "...[*]..." columns (e.g. ANN[*].EFFECT)
1122 separate_longer_delim(contains(" [*]" ), delim = snakemake @ params $ sep ) %> %
23+ # Replace empty ("") fields with NA
1224 mutate(across(contains(" [*]" ), ~ na_if(. , " " ))) %> %
25+ # Rename "...[*]..." columns using the provided lookup via Snakemake config
1326 rename(all_of(unlist(snakemake @ params $ colnames_mapping ))) %> %
27+ # Apply dynamic filters from the Snakemake config:
28+ # map2 pairs column names (.x) with value vectors (.y) and builds boolean expressions.
29+ # Inside the expr call, !! injects a single value into each expression.
30+ # The resulting list of expressions is spliced with !!! so each becomes its
31+ # own condition as if written directly inside the filter call.
1432 filter(
33+ # Keep variants that include the required values in each defined field (e.g. empty ERRORS)
1534 !!! map2(
16- names(snakemake @ params $ filter_include ),
17- snakemake @ params $ filter_include ,
35+ names(filter.include ),
36+ filter.include ,
1837 ~ expr(.data [[!! .x ]] %in% !! .y )
1938 ),
39+ # Keep variants that exclude the required values in each defined field (e.g. EFFECT != "upstream_gene_variant")
2040 !!! map2(
21- names(snakemake @ params $ filter_exclude ),
22- snakemake @ params $ filter_exclude ,
41+ names(filter.exclude ),
42+ filter.exclude ,
2343 ~ expr(! (.data [[!! .x ]] %in% !! .y ))
2444 )
2545 ) %> %
46+ # Keep unique rows
2647 distinct() %> %
48+ # Assign variant name using the pattern defined via Snakemake config
2749 mutate(VARIANT_NAME = str_glue(snakemake @ params $ variant_name_pattern )) %> %
50+ # Write output file
2851 write_tsv(snakemake @ output $ tsv )
0 commit comments