@@ -232,11 +232,170 @@ rule export_measurements:
232232 --output-json {output.measurements} 2>&1 | tee {log}
233233 """
234234
235+ checkpoint get_titers_per_reference :
236+ input :
237+ titers = "builds/{build_name}/titers/{titer_collection}.tsv" ,
238+ output :
239+ references = "builds/{build_name}/titer_references/{titer_collection}.txt" ,
240+ reference_titers_directory = directory ("builds/{build_name}/reference_titers/{titer_collection}/" ),
241+ conda : "../envs/nextstrain.yaml"
242+ shell :
243+ r"""
244+ mkdir -p {output.reference_titers_directory};
245+
246+ python scripts/get_titers_per_reference.py \
247+ --titers {input.titers} \
248+ --output-references {output.references} \
249+ --output-titers-directory {output.reference_titers_directory}
250+ """
251+
252+ rule reference_model_titers_sub :
253+ input :
254+ titers = build_dir + "/{build_name}/reference_titers/{titer_collection}/{reference}.tsv" ,
255+ tree = rules .refine .output .tree ,
256+ translations_done = build_dir + "/{build_name}/{segment}/translations.done"
257+ params :
258+ genes = get_titer_collection_genes ,
259+ translations = lambda wildcards : [f"{ build_dir } /{ wildcards .build_name } /{ wildcards .segment } /translations/{ gene } _withInternalNodes.fasta" for gene in get_titer_collection_genes (wildcards )],
260+ attribute_prefix_argument = get_titer_collection_attribute_prefix_argument ,
261+ output :
262+ titers_model = build_dir + "/{build_name}/{segment}/reference-titers-sub-model/{titer_collection}/{reference}.json" ,
263+ conda : "../envs/nextstrain.yaml"
264+ benchmark :
265+ "benchmarks/titers_sub_{build_name}_{segment}_{titer_collection}_{reference}.txt" ,
266+ log :
267+ "logs/titers_sub_{build_name}_{segment}_{titer_collection}_{reference}.txt" ,
268+ resources :
269+ mem_mb = 8000 ,
270+ shell :
271+ """
272+ augur titers sub \
273+ --titers {input.titers} \
274+ --alignment {params.translations} \
275+ --gene-names {params.genes} \
276+ --tree {input.tree} \
277+ --allow-empty-model \
278+ {params.attribute_prefix_argument} \
279+ --output {output.titers_model} 2>&1 | tee {log}
280+ """
281+
282+ rule reference_model_antigenic_distances_between_strains :
283+ input :
284+ titer_model = "builds/{build_name}/{segment}/reference-titers-sub-model/{titer_collection}/{reference}.json" ,
285+ titers = "builds/{build_name}/reference_titers/{titer_collection}/{reference}.tsv" ,
286+ output :
287+ distances = "builds/{build_name}/{segment}/reference_model_antigenic_distances_between_strains/{titer_collection}/{reference}.tsv" ,
288+ benchmark :
289+ "benchmarks/reference_model_antigenic_distances_between_strains_{build_name}_{segment}_{titer_collection}_{reference}.txt"
290+ log :
291+ "logs/reference_model_antigenic_distances_between_strains_{build_name}_{segment}_{titer_collection}_{reference}.txt"
292+ conda : "../envs/nextstrain.yaml"
293+ shell :
294+ """
295+ python3 scripts/get_antigenic_distances_for_reference_model.py \
296+ --titer-model {input.titer_model} \
297+ --titers {input.titers} \
298+ --output {output.distances} &> {log}
299+ """
300+
301+ def aggregate_reference_model_distances_input (wildcards ):
302+ with checkpoints .get_titers_per_reference .get (** wildcards ).output ["references" ].open () as fh :
303+ distances = [
304+ f"builds/{ wildcards .build_name } /{ wildcards .segment } /reference_model_antigenic_distances_between_strains/{ wildcards .titer_collection } /{ reference .strip ()} .tsv"
305+ for reference in fh
306+ ]
307+
308+ return distances
309+
310+ rule aggregate_reference_model_distances :
311+ input :
312+ distances = aggregate_reference_model_distances_input ,
313+ output :
314+ distances = "builds/{build_name}/{segment}/reference_model_antigenic_distances_between_strains/{titer_collection}.tsv" ,
315+ conda : "../envs/nextstrain.yaml"
316+ shell :
317+ r"""
318+ tsv-append -H {input.distances} > {output.distances}
319+ """
320+
321+ rule generate_reference_model_collection_config_json :
322+ input :
323+ distances = "builds/{build_name}/{segment}/reference_model_antigenic_distances_between_strains/{titer_collection}.tsv" ,
324+ tree = "builds/{build_name}/{segment}/tree.nwk" ,
325+ output :
326+ config_json = "builds/{build_name}/{segment}/reference_model_measurements_collection_config/{titer_collection}.json" ,
327+ conda : "../envs/nextstrain.yaml"
328+ params :
329+ groupings = [
330+ "reference_strain" ,
331+ ],
332+ fields = [
333+ "strain" ,
334+ "reference_strain" ,
335+ "value" ,
336+ ],
337+ log :
338+ "logs/generate_reference_model_collection_config_json_{build_name}_{segment}_{titer_collection}.txt"
339+ shell :
340+ """
341+ python3 scripts/generate_collection_config_json.py \
342+ --tree {input.tree} \
343+ --collection {input.distances} \
344+ --groupings {params.groupings:q} \
345+ --fields {params.fields:q} \
346+ --output {output.config_json} &> {log}
347+ """
348+
349+ rule export_reference_model_measurements :
350+ input :
351+ distances = "builds/{build_name}/{segment}/reference_model_antigenic_distances_between_strains/{titer_collection}.tsv" ,
352+ configuration = "builds/{build_name}/{segment}/reference_model_measurements_collection_config/{titer_collection}.json" ,
353+ output :
354+ measurements = "builds/{build_name}/{segment}/reference_model_measurements/{titer_collection}.json" ,
355+ conda : "../envs/nextstrain.yaml"
356+ benchmark :
357+ "benchmarks/export_reference_model_measurements_{build_name}_{segment}_{titer_collection}.txt"
358+ log :
359+ "logs/export_reference_model_measurements_{build_name}_{segment}_{titer_collection}.txt"
360+ params :
361+ strain_column = "test_strain" ,
362+ value_column = "log2_titer" ,
363+ title = lambda wildcards : get_titer_collection_title (wildcards ) + " (inferred)" ,
364+ x_axis_label = "inferred log2 titer" ,
365+ thresholds = [0.0 , 2.0 ],
366+ filters = [
367+ "reference_strain" ,
368+ ],
369+ include_columns = [
370+ "reference_strain" ,
371+ ],
372+ shell :
373+ """
374+ augur measurements export \
375+ --collection {input.distances} \
376+ --collection-config {input.configuration} \
377+ --include-columns {params.include_columns:q} \
378+ --strain-column {params.strain_column} \
379+ --value-column {params.value_column} \
380+ --key {wildcards.titer_collection}_inferred \
381+ --title {params.title:q} \
382+ --x-axis-label {params.x_axis_label:q} \
383+ --thresholds {params.thresholds} \
384+ --filters {params.filters} \
385+ --show-threshold \
386+ --hide-overall-mean \
387+ --minify-json \
388+ --output-json {output.measurements} 2>&1 | tee {log}
389+ """
390+
235391def get_titer_collections (wildcards ):
236392 files = []
237393 for collection in config ["builds" ][wildcards .build_name ]["titer_collections" ]:
238394 files .append (f"builds/{ wildcards .build_name } /{ wildcards .segment } /measurements/{ collection ['name' ]} .json" )
239395
396+ if collection .get ("run_reference_models" ):
397+ files .append (f"builds/{ wildcards .build_name } /{ wildcards .segment } /reference_model_measurements/{ collection ['name' ]} .json" )
398+
240399 return files
241400
242401rule concat_measurements :
0 commit comments