diff --git a/scripts/run_benchmark/run_full_seqeracloud.sh b/scripts/run_benchmark/run_full_seqeracloud.sh index 33a5bc42..05d10ce7 100755 --- a/scripts/run_benchmark/run_full_seqeracloud.sh +++ b/scripts/run_benchmark/run_full_seqeracloud.sh @@ -67,6 +67,7 @@ celltype_annotation_methods: - ssam - tacco - moscot + - mapmycells expression_correction_methods: - no_correction - gene_efficiency_correction diff --git a/scripts/run_benchmark/run_test_local.sh b/scripts/run_benchmark/run_test_local.sh index 94783b7b..8aeac7ff 100755 --- a/scripts/run_benchmark/run_test_local.sh +++ b/scripts/run_benchmark/run_test_local.sh @@ -57,6 +57,7 @@ celltype_annotation_methods: - ssam # - tacco # - moscot + # - mapmycells expression_correction_methods: - no_correction # - gene_efficiency_correction diff --git a/src/methods_cell_type_annotation/mapmycells/config.vsh.yaml b/src/methods_cell_type_annotation/mapmycells/config.vsh.yaml new file mode 100644 index 00000000..e37a79d3 --- /dev/null +++ b/src/methods_cell_type_annotation/mapmycells/config.vsh.yaml @@ -0,0 +1,35 @@ +name: mapmycells +label: "mapmycells" +summary: "Mapping of annotations from single-cell to spatial using moscot" +description: "Mapping of annotations from single-cell to spatial using moscot" +links: + documentation: 'https://github.com/AllenInstitute/cell_type_mapper' + repository: 'https://github.com/AllenInstitute/cell_type_mapper' +references: + doi: "10.1038/s41586-023-06812-z" + +__merge__: /src/api/comp_method_cell_type_annotation.yaml + + +resources: + - type: python_script + path: script.py + +engines: + - type: docker + image: openproblems/base_python:1 + __merge__: + - /src/base/setup_spatialdata_partial.yaml + - /src/base/setup_txsim_partial.yaml + setup: + - type: python + pypi: + - numpy + - git+https://github.com/AllenInstitute/cell_type_mapper.git + - type: native + +runners: + - type: executable + - type: nextflow + directives: + label: [ hightime, midcpu, highmem] \ No newline at end of file diff --git a/src/methods_cell_type_annotation/mapmycells/script.py b/src/methods_cell_type_annotation/mapmycells/script.py new file mode 100644 index 00000000..d9e9c760 --- /dev/null +++ b/src/methods_cell_type_annotation/mapmycells/script.py @@ -0,0 +1,106 @@ +import anndata as ad +import os +import subprocess +import json +import pandas as pd +from pathlib import Path +## VIASH START +par = { + 'input_spatial_normalized_counts': 'resources_test/task_ist_preprocessing/mouse_brain_combined/spatial_normalized_counts.h5ad', + 'input_scrnaseq_reference': 'resources_test/task_ist_preprocessing/mouse_brain_combined/scrnaseq_reference.h5ad', + 'celltype_key': 'cell_type', + "output": 'spatial_with_celltypes.h5ad' +} +meta = { "temp_dir": './tmp/'} + +## VIASH END + +TMP_DIR = Path(meta["temp_dir"] or "/tmp/") +TMP_DIR.mkdir(parents=True, exist_ok=True) + +adata_sp = ad.read_h5ad(par['input_spatial_normalized_counts']) +adata_sc = ad.read_h5ad(par['input_scrnaseq_reference']) + +if "counts" in adata_sc.layers: + adata_sc.X = adata_sc.layers["counts"] + +adata_sp.var_names = adata_sp.var_names.astype(str) +adata_sc.var_names = adata_sc.var_names.astype(str) +adata_sp.var_names_make_unique() +adata_sc.var_names_make_unique() + +common_genes = list(set(adata_sp.var.index).intersection(adata_sc.var.index)) + +adata_sc = adata_sc[:, common_genes] +sc_path = os.path.join(meta["temp_dir"],"sc_adata_processed.h5ad") +adata_sc.write_h5ad(sc_path) +sp_path = os.path.join(meta["temp_dir"],"sp_processed.h5ad") +adata_sp[:, common_genes].write_h5ad(sp_path) + + + +precomputed_path = os.path.join(meta["temp_dir"],"precomputed_stats.h5ad") + +command = [ + "python", + "-m", + "cell_type_mapper.cli.precompute_stats_scrattch", + "--h5ad_path", + sc_path, + "--hierarchy", + "['cell_type']", + "--output_path", + precomputed_path +] + +subprocess.run(command) + +data = {"None": common_genes} +genes_file_path = os.path.join(meta["temp_dir"],"genes.json") +with open(genes_file_path, "w") as json_file: + json.dump(data, json_file, indent=2) + +command = [ + "python", + "-m", + "cell_type_mapper.cli.from_specified_markers", + "--query_path", + sp_path, + "--type_assignment.normalization", + "log2CPM", + "--precomputed_stats.path", + precomputed_path, + "--query_markers.serialized_lookup", + genes_file_path, + "--csv_result_path", + os.path.join(meta["temp_dir"],"results.csv"), + "--extended_result_path", + os.path.join(meta["temp_dir"], "extended_results.json"), + "--flatten", + "True", + "--type_assignment.bootstrap_iteration", + "1", + "--type_assignment.bootstrap_factor", + "1.0" +] + +subprocess.run(command) +annotation_df = pd.read_csv(os.path.join(meta["temp_dir"],"results.csv"), skiprows=3) +adata_sp.obs[par['celltype_key']] = list(annotation_df['cell_type_label']) + + + +# Delete all temporary files +for file_path in [ + sc_path, + sp_path, + precomputed_path, + genes_file_path, + os.path.join(meta["temp_dir"],"results.csv"), + os.path.join(meta["temp_dir"], "extended_results.json") +]: + if os.path.isfile(file_path): + os.remove(file_path) + + +adata_sp.write_h5ad(par['output']) diff --git a/src/workflows/run_benchmark/config.vsh.yaml b/src/workflows/run_benchmark/config.vsh.yaml index c9316847..29104273 100644 --- a/src/workflows/run_benchmark/config.vsh.yaml +++ b/src/workflows/run_benchmark/config.vsh.yaml @@ -98,7 +98,7 @@ argument_groups: A list of cell type annotation methods to run. type: string multiple: true - default: "ssam:tacco:moscot" + default: "ssam:tacco:moscot:mapmycells" - name: "--expression_correction_methods" description: | A list of expression correction methods to run. @@ -168,6 +168,7 @@ dependencies: - name: methods_cell_type_annotation/ssam - name: methods_cell_type_annotation/tacco - name: methods_cell_type_annotation/moscot + - name: methods_cell_type_annotation/mapmycells - name: methods_expression_correction/no_correction - name: methods_expression_correction/gene_efficiency_correction - name: methods_expression_correction/resolvi_correction diff --git a/src/workflows/run_benchmark/main.nf b/src/workflows/run_benchmark/main.nf index 3c18294c..9ec2c892 100644 --- a/src/workflows/run_benchmark/main.nf +++ b/src/workflows/run_benchmark/main.nf @@ -374,7 +374,8 @@ workflow run_wf { cta_methods = [ ssam, tacco, - moscot + moscot, + mapmycells ] cta_ch = normalization_ch