Skip to content

Commit b6e1be2

Browse files
authored
fix error in control methods (#107)
* fix error in control methods * update changelog
1 parent 6ea1c5b commit b6e1be2

File tree

4 files changed

+40
-16
lines changed

4 files changed

+40
-16
lines changed

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,8 @@
9797
* Added EMD vertical global metric and split perfect integration into horizontal and vertical
9898
for computing horizontal and vertical metrics (PR #63).
9999

100+
* Fix problems identified during a full run (PR #99).
101+
100102
## MINOR CHANGES
101103

102104
* Enabled unit tests (PR #2).
@@ -153,5 +155,6 @@
153155

154156
* Fix missing anndata in yaml file and set the base_r docker image version to 1 instead of 1.0.0 (PR #89).
155157

158+
* Fix bug in control methods (PR #107).
156159

157160

src/control_methods/shuffle_integration/script.py

Lines changed: 32 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,11 @@
44

55
## VIASH START
66
par = {
7-
"input_unintegrated": "resources_test/task_cyto_batch_integration/mouse_spleen_flow_cytometry_subset/unintegrated_censored.h5ad",
8-
"output": "output.h5ad",
7+
"input_unintegrated": "resources_test/task_cyto_batch_integration/mouse_spleen_flow_cytometry_subset/unintegrated.h5ad",
8+
"output_integrated_split1": "resources_test/task_cyto_batch_integration/mouse_spleen_flow_cytometry_subset/control_integrated_split1.h5ad",
9+
"output_integrated_split2": "resources_test/task_cyto_batch_integration/mouse_spleen_flow_cytometry_subset/control_integrated_split2.h5ad",
910
}
10-
meta = {"name": "harmonypy"}
11+
meta = {"name": "shuffle_integration_by_cell_type", "resources_dir": "src/control_methods"}
1112
## VIASH END
1213

1314
print("Importing helper functions", flush=True)
@@ -16,25 +17,44 @@
1617

1718
print("Reading and preparing input files", flush=True)
1819
adata = ad.read_h5ad(par["input_unintegrated"])
20+
adata_split1 = adata[(adata.obs.is_control > 0) | (adata.obs.split == 1)].copy()
21+
adata_split2 = adata[(adata.obs.is_control > 0) | (adata.obs.split == 2)].copy()
1922

20-
adata.obs["batch_str"] = adata.obs["batch"].astype(str)
21-
22-
print("Randomise features", flush=True)
23+
print("Randomise features - split 1", flush=True)
24+
adata_split1.obs["batch_str"] = adata_split1.obs["batch"].astype(str)
2325
integrated = _randomize_features(
24-
adata.layers["preprocessed"]
26+
adata_split1.layers["preprocessed"]
2527
)
2628

2729
# create new anndata
28-
output = ad.AnnData(
29-
obs=adata.obs[[]],
30-
var=adata.var[[]],
30+
output_split1 = ad.AnnData(
31+
obs=adata_split1.obs[[]],
32+
var=adata_split1.var[[]],
33+
layers={"integrated": integrated},
34+
uns={
35+
"dataset_id": adata_split1.uns["dataset_id"],
36+
"method_id": meta["name"],
37+
"parameters": {},
38+
},
39+
)
40+
41+
print("Randomise features - split 2", flush=True)
42+
adata_split2.obs["batch_str"] = adata_split2.obs["batch"].astype(str)
43+
integrated = _randomize_features(
44+
adata_split2.layers["preprocessed"]
45+
)
46+
# create new anndata
47+
output_split2 = ad.AnnData(
48+
obs=adata_split2.obs[[]],
49+
var=adata_split2.var[[]],
3150
layers={"integrated": integrated},
3251
uns={
33-
"dataset_id": adata.uns["dataset_id"],
52+
"dataset_id": adata_split2.uns["dataset_id"],
3453
"method_id": meta["name"],
3554
"parameters": {},
3655
},
3756
)
3857

3958
print("Write output AnnData to file", flush=True)
40-
output.write_h5ad(par["output"], compression="gzip")
59+
output_split1.write_h5ad(par["output_integrated_split1"], compression="gzip")
60+
output_split2.write_h5ad(par["output_integrated_split2"], compression="gzip")

src/control_methods/shuffle_integration_by_batch/script.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
print("Reading and preparing input files", flush=True)
1818
adata = ad.read_h5ad(par["input_unintegrated"])
19-
adata_split1 = adata[(adata.obs.is_control > 0) | (adata.obs.batch == 1)].copy()
19+
adata_split1 = adata[(adata.obs.is_control > 0) | (adata.obs.split == 1)].copy()
2020
adata_split2 = adata[(adata.obs.is_control > 0) | (adata.obs.split == 2)].copy()
2121

2222
print("Randomise features - split 1", flush=True)

src/control_methods/shuffle_integration_by_cell_type/script.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,10 @@
55
## VIASH START
66
par = {
77
"input_unintegrated": "resources_test/task_cyto_batch_integration/mouse_spleen_flow_cytometry_subset/unintegrated.h5ad",
8-
"output": "output.h5ad",
8+
"output_integrated_split1": "resources_test/task_cyto_batch_integration/mouse_spleen_flow_cytometry_subset/control_integrated_split1.h5ad",
9+
"output_integrated_split2": "resources_test/task_cyto_batch_integration/mouse_spleen_flow_cytometry_subset/control_integrated_split2.h5ad",
910
}
10-
meta = {"name": "harmonypy"}
11+
meta = {"name": "shuffle_integration_by_cell_type", "resources_dir": "src/control_methods"}
1112
## VIASH END
1213

1314
print("Importing helper functions", flush=True)
@@ -16,7 +17,7 @@
1617

1718
print("Reading and preparing input files", flush=True)
1819
adata = ad.read_h5ad(par["input_unintegrated"])
19-
adata_split1 = adata[(adata.obs.is_control > 0) | (adata.obs.batch == 1)].copy()
20+
adata_split1 = adata[(adata.obs.is_control > 0) | (adata.obs.split == 1)].copy()
2021
adata_split2 = adata[(adata.obs.is_control > 0) | (adata.obs.split == 2)].copy()
2122

2223
print("Randomise features - split 1", flush=True)

0 commit comments

Comments
 (0)