Skip to content

Commit 6455731

Browse files
authored
Add control methods (#48)
* Add negative control * Add positive control * Add multiple outputs for control method and rename negative control * Adjust positive control for multiple outputs * Delete PCA in control dummy spatial data to prevent assertion error in metrics * Add control methods to benchmark workflow * Add pars to positive control script
1 parent 16c6e21 commit 6455731

File tree

8 files changed

+301
-4
lines changed

8 files changed

+301
-4
lines changed

src/api/comp_control_method.yaml

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
namespace: control_methods
2+
3+
info:
4+
type: control_method
5+
type_info:
6+
label: Control method
7+
summary: A control method for the ist preprocessing task.
8+
description: |
9+
A control method for the imaging-based spatial transcriptomics preprocessing task.
10+
11+
arguments:
12+
- name: --input_scrnaseq_reference
13+
required: true
14+
direction: input
15+
__merge__: file_scrnaseq_reference.yaml
16+
- name: --output
17+
required: true
18+
direction: output
19+
__merge__: file_spatial_corrected_counts.yaml
20+
- name: --output_transcript_assignments
21+
required: true
22+
direction: output
23+
__merge__: file_transcript_assignments.yaml
24+
- name: --output_qc_col
25+
required: true
26+
direction: output
27+
__merge__: file_spatial_qc_col.yaml
28+
29+
test_resources:
30+
- path: /resources_test/task_ist_preprocessing/mouse_brain_combined
31+
dest: resources_test/task_ist_preprocessing/mouse_brain_combined
32+
- type: python_script
33+
path: /common/component_tests/run_and_check_output.py
34+
- type: python_script
35+
path: /common/component_tests/check_config.py
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
__merge__: /src/api/comp_control_method.yaml
2+
name: identity
3+
label: Identical copy of the scRNAseq reference
4+
summary: Identical copy of the scRNAseq reference
5+
description: The scRNAseq reference is taken as processed spatial data
6+
resources:
7+
- type: python_script
8+
path: script.py
9+
- path: /src/control_methods/util.py
10+
engines:
11+
- type: docker
12+
image: openproblems/base_python:1
13+
__merge__:
14+
- /src/base/setup_spatialdata_partial.yaml
15+
runners:
16+
- type: executable
17+
- type: nextflow
18+
directives:
19+
label: [midtime, midmem, lowcpu]
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
import sys
2+
import numpy as np
3+
import anndata as ad
4+
5+
## VIASH START
6+
par = {
7+
'input_scrnaseq_reference': 'resources_test/task_ist_preprocessing/mouse_brain_combined/scrnaseq_reference.h5ad',
8+
'output': 'output.h5ad',
9+
'output_transcript_assignments': 'output_transcript_assignments.zarr',
10+
'output_qc_col': 'output_qc_col.h5ad',
11+
}
12+
meta = {
13+
"resources_dir": "src/control_methods/"
14+
}
15+
## VIASH END
16+
17+
# add helper scripts to path
18+
sys.path.append(meta["resources_dir"])
19+
from util import add_layers_obs_var_to_scrnaseq_ref, create_dummy_transcript_assignment_table
20+
21+
print('Read input_scrnaseq_reference', flush=True)
22+
adata = ad.read_h5ad(par['input_scrnaseq_reference'])
23+
24+
# Generate expected output of dummy processed spatial data
25+
print("Add required layers, obs and var columns for spatial data", flush=True)
26+
add_layers_obs_var_to_scrnaseq_ref(adata)
27+
28+
print("Delete obsm, obsp and varm", flush=True)
29+
del adata.obsm
30+
del adata.varm
31+
del adata.obsp
32+
33+
print("Create dummy transcript assignment table", flush=True)
34+
sdata_transcripts_only = create_dummy_transcript_assignment_table(adata)
35+
36+
print("Create dummy qc column", flush=True)
37+
adata_qc = ad.AnnData(obs=adata.obs[[]])
38+
adata_qc.obs["passed_QC"] = True
39+
40+
# Write outputs
41+
print("Write h5ad", flush=True)
42+
adata.write_h5ad(par['output'], compression='gzip')
43+
44+
print("Write transcripts zarr", flush=True)
45+
sdata_transcripts_only.write(par['output_transcript_assignments'])
46+
47+
print("Write qc column", flush=True)
48+
adata_qc.write_h5ad(par['output_qc_col'], compression='gzip')
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
__merge__: /src/api/comp_control_method.yaml
2+
name: permute_celltype_annotations
3+
label: Permute celltype annotations
4+
summary: Celltype annotations are randomly permuted
5+
description: The scRNAseq reference's celltype annotations are randomly permuted and taken as processed spatial data
6+
arguments:
7+
- name: --seed
8+
type: integer
9+
default: 0
10+
resources:
11+
- type: python_script
12+
path: script.py
13+
- path: /src/control_methods/util.py
14+
engines:
15+
- type: docker
16+
image: openproblems/base_python:1
17+
__merge__:
18+
- /src/base/setup_spatialdata_partial.yaml
19+
runners:
20+
- type: executable
21+
- type: nextflow
22+
directives:
23+
label: [midtime, midmem, lowcpu]
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
import sys
2+
import numpy as np
3+
import anndata as ad
4+
5+
## VIASH START
6+
par = {
7+
'input_scrnaseq_reference': 'resources_test/task_ist_preprocessing/mouse_brain_combined/scrnaseq_reference.h5ad',
8+
'output': 'output.h5ad',
9+
'output_transcript_assignments': 'output_transcript_assignments.zarr',
10+
'output_qc_col': 'output_qc_col.h5ad',
11+
'seed': 0,
12+
}
13+
meta = {
14+
"resources_dir": "src/control_methods/"
15+
}
16+
## VIASH END
17+
18+
# add helper scripts to path
19+
sys.path.append(meta["resources_dir"])
20+
from util import add_layers_obs_var_to_scrnaseq_ref, create_dummy_transcript_assignment_table
21+
22+
# Generate control adata
23+
np.random.seed(par['seed'])
24+
25+
print('Read input_scrnaseq_reference', flush=True)
26+
adata = ad.read_h5ad(par['input_scrnaseq_reference'])
27+
28+
print("Randomise ct annotations", flush=True)
29+
ct_annotations = np.random.permutation(adata.obs["cell_type"])
30+
adata.obs["cell_type"] = ct_annotations
31+
32+
# Generate expected output of dummy processed spatial data
33+
print("Add required layers, obs and var columns for spatial data", flush=True)
34+
add_layers_obs_var_to_scrnaseq_ref(adata)
35+
36+
print("Delete obsm, obsp and varm", flush=True)
37+
del adata.obsm
38+
del adata.varm
39+
del adata.obsp
40+
41+
print("Create dummy transcript assignment table", flush=True)
42+
sdata_transcripts_only = create_dummy_transcript_assignment_table(adata)
43+
44+
print("Create dummy qc column", flush=True)
45+
adata_qc = ad.AnnData(obs=adata.obs[[]])
46+
adata_qc.obs["passed_QC"] = True
47+
48+
# Write outputs
49+
print("Write h5ad", flush=True)
50+
adata.write_h5ad(par['output'], compression='gzip')
51+
52+
print("Write transcripts zarr", flush=True)
53+
sdata_transcripts_only.write(par['output_transcript_assignments'])
54+
55+
print("Write qc column", flush=True)
56+
adata_qc.write_h5ad(par['output_qc_col'], compression='gzip')

src/control_methods/util.py

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
import numpy as np
2+
import pandas as pd
3+
import anndata as ad
4+
import spatialdata as sd
5+
6+
def create_dummy_transcript_assignment_table(adata: ad.AnnData) -> sd.SpatialData:
7+
""" Create a dummy transcript assignment table from an AnnData object.
8+
9+
Arguments
10+
---------
11+
adata: ad.AnnData
12+
The AnnData object to create a dummy transcript assignment table from.
13+
14+
Returns
15+
-------
16+
sdata_transcripts_only: sd.SpatialData
17+
The SpatialData object with the dummy transcript assignment table.
18+
"""
19+
20+
# Convert the sparse matrix to coo for access of row and col as arrays
21+
coo = adata.layers["counts"].tocoo()
22+
23+
# Get cell and gene vectors
24+
counts = np.astype(coo.data, np.int64)
25+
cell_id_idx = np.repeat(coo.row, counts)
26+
gene_id_idx = np.repeat(coo.col, counts)
27+
28+
obs_names = adata.obs_names.values
29+
var_names = adata.var_names.values
30+
31+
cell_ids = obs_names[cell_id_idx]
32+
genes = var_names[gene_id_idx]
33+
34+
# Create the dummy transcript assignment table
35+
transcripts_df = pd.DataFrame({
36+
"x": cell_id_idx,
37+
"y": cell_id_idx,
38+
"z": 0,
39+
"feature_name": genes,
40+
"cell_id": cell_ids,
41+
"overlaps_nucleus": 0,
42+
"qv": 0,
43+
"transcript_id": [i for i in range(len(cell_ids))]
44+
})
45+
46+
# Create the transcripts sdata
47+
sdata_table = ad.AnnData(obs=adata.obs[[]], var=adata.var[[]])
48+
sdata_table.obs["cell_id"] = adata.obs_names.values
49+
50+
sdata_transcripts_only = sd.SpatialData(
51+
points={"transcripts": sd.models.PointsModel.parse(transcripts_df)},
52+
tables={"table": sdata_table}
53+
)
54+
55+
return sdata_transcripts_only
56+
57+
58+
def add_layers_obs_var_to_scrnaseq_ref(adata: ad.AnnData) -> ad.AnnData:
59+
""" Add layers, obs and var columns to an AnnData object to have the same structure as the processed spatial data.
60+
61+
Arguments
62+
---------
63+
adata: ad.AnnData
64+
The AnnData object to add layers, obs and var columns to.
65+
"""
66+
67+
adata.layers["normalized_uncorrected"] = adata.layers["normalized"]
68+
adata.obs["cell_id"] = adata.obs.index
69+
adata.obs["centroid_x"] = 0
70+
adata.obs["centroid_y"] = 0
71+
adata.obs["centroid_z"] = 0
72+
adata.obs["n_counts"] = np.array(adata.layers["counts"].sum(axis=1))[:,0]
73+
adata.obs["n_genes"] = np.array((adata.layers["counts"] > 0).sum(axis=1))[:,0]
74+
adata.obs["volume"] = 1
75+
adata.var["gene_name"] = adata.var.index
76+
adata.var["n_counts"] = np.array(adata.layers["counts"].sum(axis=0))[0,:]
77+
adata.var["n_cells"] = np.array((adata.layers["counts"] > 0).sum(axis=0))[0,:]

src/workflows/run_benchmark/config.vsh.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,8 @@ resources:
5353
dependencies:
5454
- name: utils/extract_uns_metadata
5555
repository: openproblems
56+
- name: control_methods/identity
57+
- name: control_methods/permute_celltype_annotations
5658
- name: methods_segmentation/custom_segmentation
5759
- name: methods_transcript_assignment/basic_transcript_assignment
5860
- name: methods_count_aggregation/basic_count_aggregation

src/workflows/run_benchmark/main.nf

Lines changed: 41 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,36 @@ workflow run_wf {
3535
}
3636
)
3737

38+
/****************************************
39+
* CONTROL METHODS *
40+
****************************************/
41+
control_methods = [
42+
identity,
43+
permute_celltype_annotations
44+
]
45+
control_ch = init_ch
46+
| runEach(
47+
components: control_methods,
48+
id: { id, state, comp ->
49+
id + "/control_" + comp.name
50+
},
51+
fromState: [
52+
input_scrnaseq_reference: "input_sc"
53+
],
54+
toState: { id, out_dict, state, comp ->
55+
state + [
56+
steps: state.steps + [[
57+
type: "control",
58+
component_id: comp.name,
59+
run_id: id
60+
]],
61+
output_correction: out_dict.output,
62+
output_qc_filter: out_dict.output_qc_col,
63+
output_assignment: out_dict.output_transcript_assignments
64+
]
65+
}
66+
)
67+
3868
/****************************************
3969
* RUN SEGMENTATION METHODS *
4070
****************************************/
@@ -160,9 +190,9 @@ workflow run_wf {
160190
)
161191

162192

163-
/****************************************
164-
* COUNT AGGREGATION *
165-
****************************************/
193+
/************************************
194+
* QC FILTERING *
195+
************************************/
166196
qc_filter_methods = [
167197
basic_qc_filter
168198
]
@@ -338,13 +368,20 @@ workflow run_wf {
338368
}
339369
)
340370

371+
/****************************************
372+
* COMBINE WITH CONTROL *
373+
****************************************/
374+
375+
expr_corr_and_control_ch = expr_corr_ch.mix(control_ch)
376+
377+
341378
/****************************************
342379
* METRICS *
343380
****************************************/
344381
metrics = [
345382
similarity
346383
]
347-
metric_ch = expr_corr_ch
384+
metric_ch = expr_corr_and_control_ch
348385
| runEach(
349386
components: metrics,
350387
id: { id, state, comp ->

0 commit comments

Comments
 (0)