Skip to content

Commit 2b420c9

Browse files
committed
Decouple dN/dS plots from data
1 parent b613ef2 commit 2b420c9

File tree

6 files changed

+178
-173
lines changed

6 files changed

+178
-173
lines changed

config/design_plots.R

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -93,11 +93,11 @@ DNDS_LABELS <- c(
9393
)
9494

9595
DNDS_COLORS <- c(
96-
dn = "#E53E47",
97-
ds = "#2C47F5"
96+
dN = "#E53E47",
97+
dS = "#2C47F5"
9898
)
9999

100100
DNDS_SHAPES <- c(
101-
dn = 2,
102-
ds = 4
101+
dN = 2,
102+
dS = 4
103103
)

workflow/rules/evolution.smk

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,3 +14,17 @@ rule N_S_sites:
1414
LOGDIR / "N_S_sites" / "log.txt"
1515
script:
1616
"../scripts/N_S_sites_from_fasta.py"
17+
18+
19+
rule dnds_data:
20+
conda: "../envs/renv.yaml"
21+
input:
22+
n_s_sites = OUTDIR/f"{OUTPUT_NAME}.ancestor.N_S.sites.csv",
23+
variants = OUTDIR/f"{OUTPUT_NAME}.variants.tsv",
24+
metadata = config["METADATA"]
25+
output:
26+
table = report(REPORT_DIR_TABLES/"dnds.csv")
27+
log:
28+
LOGDIR / "dnds_data" / "log.txt"
29+
script:
30+
"../scripts/dnds_data.R"

workflow/rules/report.smk

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -225,22 +225,19 @@ rule time_signal_plot:
225225
"../scripts/report/time_signal_plot.R"
226226

227227

228-
rule evo_plots:
228+
rule dnds_plots:
229229
conda: "../envs/renv.yaml"
230230
params:
231231
design = config["PLOTS"]
232232
input:
233-
n_s_sites = OUTDIR/f"{OUTPUT_NAME}.ancestor.N_S.sites.csv",
234-
vcf = OUTDIR/f"{OUTPUT_NAME}.variants.tsv",
235-
metadata = config["METADATA"]
233+
table = REPORT_DIR_TABLES/"dnds.csv",
236234
output:
237-
plot = report(REPORT_DIR_PLOTS/"figure_11.png"),
238-
plot_omega = report(REPORT_DIR_PLOTS/"figure_12.png"),
239-
table = report(REPORT_DIR_TABLES/"figure_11.csv")
235+
plot_dn_ds = report(REPORT_DIR_PLOTS/"dn_and_ds.png"),
236+
plot_omega = report(REPORT_DIR_PLOTS/"dnds.png"),
240237
log:
241238
LOGDIR / "evo_plots" / "log.txt"
242239
script:
243-
"../scripts/report/evo_plots.R"
240+
"../scripts/report/dnds_plots.R"
244241

245242

246243
rule snp_plots:
@@ -292,8 +289,8 @@ rule report:
292289
tree = report(REPORT_DIR_PLOTS/"allele_freq_tree.png"),
293290
temest = report(REPORT_DIR_PLOTS/"time_signal.png"),
294291
heat_table = report(REPORT_DIR_TABLES/"heatmap.csv"),
295-
evo = report(REPORT_DIR_PLOTS/"figure_11.png"),
296-
omega_plot = report(REPORT_DIR_PLOTS/"figure_12.png"),
292+
evo = report(REPORT_DIR_PLOTS/"dn_and_ds.png"),
293+
omega_plot = report(REPORT_DIR_PLOTS/"dnds.png"),
297294
freyja_ts = OUTDIR/"demixing"/"freyja_data"/"last_barcode_update.txt",
298295
value = REPORT_DIR_TABLES/"diversity.json",
299296
stats_lm = REPORT_DIR_TABLES/"time_signal.json",

workflow/scripts/dnds_data.R

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
#!/usr/bin/env Rscript
2+
3+
# Write stdout and stderr to log file
4+
log <- file(snakemake@log[[1]], open = "wt")
5+
sink(log, type = "message")
6+
sink(log, type = "output")
7+
8+
library(tidyverse)
9+
library(logger)
10+
log_threshold(INFO)
11+
12+
# Read inputs
13+
log_info("Reading variants table")
14+
variants <- read_delim(
15+
snakemake@input[["variants"]],
16+
col_select = c(
17+
"SAMPLE",
18+
"VARIANT_NAME",
19+
"REGION",
20+
"ALT_FREQ",
21+
"SYNONYMOUS",
22+
"POS"
23+
)
24+
)
25+
26+
log_info("Reading metadata table")
27+
metadata <- read_delim(snakemake@input[["metadata"]]) %>%
28+
mutate(
29+
interval = as.numeric(
30+
as.Date(CollectionDate) - min(as.Date(CollectionDate))
31+
)
32+
) %>%
33+
select(ID, interval) %>%
34+
rename(SAMPLE = ID)
35+
36+
log_debug("Adding metadata to variants table")
37+
variants <- left_join(variants, metadata)
38+
39+
log_info("Reading N/S sites")
40+
N_S_position <- read_delim(snakemake@input[["n_s_sites"]])
41+
42+
log_info("Computing dN/dS over time (NG86)")
43+
dn.ds <- variants %>%
44+
group_by(SAMPLE, SYNONYMOUS) %>%
45+
summarise(
46+
Freq = sum(ALT_FREQ, na.rm = TRUE)
47+
) %>%
48+
pivot_wider(
49+
names_from = SYNONYMOUS,
50+
values_from = Freq,
51+
values_fill = 0
52+
) %>%
53+
transmute(
54+
dn = No / sum(N_S_position$N),
55+
ds = Yes / sum(N_S_position$S)
56+
) %>%
57+
ungroup() %>%
58+
left_join(unique(select(variants, SAMPLE, interval))) %>%
59+
transmute(
60+
sample = SAMPLE,
61+
day = interval,
62+
dN = dn,
63+
dS = ds,
64+
w = dn / ds
65+
)
66+
67+
log_info("Writing results")
68+
write_csv(dn.ds, snakemake@output[["table"]])
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
#!/usr/bin/env Rscript
2+
3+
# Write stdout and stderr to log file
4+
log <- file(snakemake@log[[1]], open = "wt")
5+
sink(log, type = "message")
6+
sink(log, type = "output")
7+
8+
library(tidyverse)
9+
library(logger)
10+
log_threshold(INFO)
11+
12+
# Import file with plots style
13+
source(snakemake@params[["design"]])
14+
15+
# Read inputs
16+
log_info("Reading plot data")
17+
plot.df <- read_delim(snakemake@input$table) %>%
18+
filter(w != Inf) %>%
19+
pivot_longer(
20+
c("dN", "dS", "w"),
21+
values_to = "value",
22+
names_to = "ratio"
23+
)
24+
25+
log_info("Plotting dN and dS")
26+
p.dn.ds <- plot.df %>%
27+
filter(ratio != "w") %>%
28+
ggplot() +
29+
aes(
30+
x = day,
31+
y = value,
32+
color = ratio,
33+
shape = ratio
34+
) +
35+
geom_point(size = 3) +
36+
geom_line() +
37+
scale_color_manual(
38+
name = "",
39+
labels = DNDS_LABELS,
40+
values = DNDS_COLORS
41+
) +
42+
scale_shape_manual(
43+
name = "",
44+
values = DNDS_SHAPES,
45+
labels = DNDS_LABELS
46+
) +
47+
labs(
48+
y = "Substitution rate",
49+
x = "Days since the initial sampling",
50+
color = ""
51+
)
52+
53+
ggsave(
54+
filename = snakemake@output$plot_dn_ds,
55+
plot = p.dn.ds,
56+
width = 159.2,
57+
height = 119.4,
58+
units = "mm",
59+
dpi = 250
60+
)
61+
62+
log_info("Plotting w (dN/dS)")
63+
p.omega <- plot.df %>%
64+
filter(ratio == "w") %>%
65+
ggplot() +
66+
aes(
67+
x = day,
68+
y = value,
69+
) +
70+
geom_point(color = "black") +
71+
geom_line(color = "black") +
72+
labs(
73+
y = "w (dN/dS)",
74+
x = "Days since the initial sampling",
75+
color = ""
76+
)
77+
78+
ggsave(
79+
filename = snakemake@output$plot_omega,
80+
plot = p.omega,
81+
width = 159.2,
82+
height = 119.4,
83+
units = "mm",
84+
dpi = 250
85+
)

workflow/scripts/report/evo_plots.R

Lines changed: 0 additions & 159 deletions
This file was deleted.

0 commit comments

Comments
 (0)