Skip to content

Commit a584eef

Browse files
committed
fix bug in perfect integration get obs
1 parent 5ea510a commit a584eef

File tree

4 files changed

+28
-15
lines changed

4 files changed

+28
-15
lines changed

src/control_methods/perfect_integration/script.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44
# The following code has been auto-generated by Viash.
55
par = {
66
"input_unintegrated": "resources_test/task_cyto_batch_integration/mouse_spleen_flow_cytometry_subset/unintegrated.h5ad",
7-
"output_integrated_split1": "resources_test/task_cyto_batch_integration/mouse_spleen_flow_cytometry_subset/integrated_split1.h5ad",
8-
"output_integrated_split2": "resources_test/task_cyto_batch_integration/mouse_spleen_flow_cytometry_subset/integrated_split2.h5ad",
7+
"output_integrated_split1": "resources_test/task_cyto_batch_integration/mouse_spleen_flow_cytometry_subset/perfect_integrated_split1.h5ad",
8+
"output_integrated_split2": "resources_test/task_cyto_batch_integration/mouse_spleen_flow_cytometry_subset/perfect_integrated_split2.h5ad",
99
}
1010
meta = {"name": "perfect_integration"}
1111

src/metrics/cms/script.R

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,11 +43,13 @@ integrated_split2 <- anndataR::read_h5ad(par[["input_integrated_split2"]])
4343
cat("Fetching metadata from unintegrated\n")
4444
integrated_split1 <- get_obs_var_for_integrated(
4545
i_adata = integrated_split1,
46-
u_adata = unintegrated
46+
u_adata = unintegrated,
47+
split_id = 1
4748
)
4849
integrated_split2 <- get_obs_var_for_integrated(
4950
i_adata = integrated_split2,
50-
u_adata = unintegrated
51+
u_adata = unintegrated,
52+
split_id = 2
5153
)
5254

5355
# Get markers to correct

src/metrics/flowsom_mapping_similarity/script.R

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,14 @@ par <- list(
66
"input_unintegrated" = 'resources_test/task_cyto_batch_integration/mouse_spleen_flow_cytometry_subset/unintegrated.h5ad',
77
"input_integrated_split1" = 'resources_test/task_cyto_batch_integration/mouse_spleen_flow_cytometry_subset/integrated_split1.h5ad',
88
"input_integrated_split2" = 'resources_test/task_cyto_batch_integration/mouse_spleen_flow_cytometry_subset/integrated_split2.h5ad',
9+
# if using perfect integration
10+
# input_integrated_split1 = "resources_test/task_cyto_batch_integration/mouse_spleen_flow_cytometry_subset/perfect_integrated_split1.h5ad",
11+
# input_integrated_split2 = "resources_test/task_cyto_batch_integration/mouse_spleen_flow_cytometry_subset/perfect_integrated_split2.h5ad",
912
"output" = 'resources_test/task_cyto_batch_integration/mouse_spleen_flow_cytometry_subset/score.h5ad'
1013
)
1114
meta <- list(
12-
"name" = 'flowsom_mapping_similarity'
15+
"name" = 'flowsom_mapping_similarity',
16+
"resources_dir" = "src/utils"
1317
)
1418
## VIASH END
1519

@@ -18,19 +22,16 @@ source(paste0(meta$resources_dir, "/helper_functions.R"))
1822
library(anndata)
1923

2024
unintegrated <- anndata::read_h5ad(par[["input_unintegrated"]])
21-
integrated_s1 <- anndata::read_h5ad(par[["input_integrated_split1"]])
2225

23-
print(unintegrated)
24-
print(integrated_s1)
2526
# read and filter split 1 data
2627
integrated_s1 <- anndata::read_h5ad(par[["input_integrated_split1"]]) |>
27-
get_obs_var_for_integrated(unintegrated) |>
28+
get_obs_var_for_integrated(unintegrated, split_id = 1) |>
2829
subset_nocontrols() |>
2930
remove_unlabelled()
3031

3132
# read and filter split 2 data
3233
integrated_s2 <- anndata::read_h5ad(par[["input_integrated_split2"]]) |>
33-
get_obs_var_for_integrated(unintegrated) |>
34+
get_obs_var_for_integrated(unintegrated, split_id = 2) |>
3435
subset_nocontrols() |>
3536
remove_unlabelled()
3637

src/utils/helper_functions.R

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,10 @@ requireNamespace("anndataR", quietly = TRUE)
1414
#'
1515
#' @param i_adata AnnData object, integrated data
1616
#' @param u_adata AnnData object, unintegrated dataset
17+
#' @param split_id numeric, split id of the integrated data
1718
#' @return AnnData object with .var and .obs added
1819
#'
19-
get_obs_var_for_integrated <- function(i_adata, u_adata) {
20+
get_obs_var_for_integrated <- function(i_adata, u_adata, split_id) {
2021

2122
i_adata$obs <- u_adata$obs[i_adata$obs_names, ]
2223
i_adata$var <- u_adata$var[i_adata$var_names, ]
@@ -25,16 +26,23 @@ get_obs_var_for_integrated <- function(i_adata, u_adata) {
2526
# everything is from batch 1, but some samples need to be labelled to come from batch 2
2627
if (i_adata$uns["method_id"] == "perfect_integration") {
2728
cat(
28-
"Control method 'perfect_integration' detected. Changing batch labels for split 2.\n"
29+
"Control method 'perfect_integration' detected. Changing batch labels.\n"
2930
)
3031
cat("Computing new batch labels\n")
3132
# mutate is needed as donors that are used for controls, we won't have the mapping
3233
i_adata_new_batch_labels <- get_batch_label_perfect_integration(
3334
u_adata = u_adata,
3435
i_adata = i_adata,
35-
split_id = 1
36+
split_id = split_id
3637
)
3738

39+
# safeguard
40+
if (! all(i_adata_new_batch_labels$donor == i_adata$obs$donor)) {
41+
stop(
42+
"Donor labels do not match between new batch labels and integrated data. This should not happen!"
43+
)
44+
}
45+
3846
cat("Attaching new batch labels\n")
3947
i_adata$obs$batch <- i_adata_new_batch_labels$new_batch_label
4048
}
@@ -53,12 +61,14 @@ get_obs_var_for_integrated <- function(i_adata, u_adata) {
5361
#' @return a dataframe with donor and new batch label
5462
#'
5563
get_batch_label_perfect_integration <- function(u_adata, i_adata, split_id) {
64+
# this return which batch sample we used for a donor for a given split
5665
actual_donor_batch_map <- unique(
5766
u_adata$obs[(u_adata$obs$split == split_id), c("donor", "batch")]
5867
)
59-
# mutate is needed as donors that are used for controls, we won't have the mapping
68+
# mutate is needed as donors that are used for controls, won't have batch_new as
69+
# the split id is 0
6070
i_adata_new_batch_labels <- i_adata$obs[, c("donor", "batch")] %>%
61-
left_join(actual_donor_batch_map, by="donor", suffix = c("_old", "_new")) %>%
71+
left_join(actual_donor_batch_map, by = "donor", suffix = c("_old", "_new")) %>%
6272
mutate(new_batch_label = ifelse(is.na(batch_new), batch_old, batch_new)) %>%
6373
select(donor, new_batch_label)
6474

0 commit comments

Comments
 (0)