Skip to content

Commit f64ec27

Browse files
habibrehman2002Habib RehmanLouisK92
authored
Add split (#125)
* Added SPLIT, tentatively works * Fixed filtering and container for SPLIT * Save uncorrected counts in split script * Rename split_correction to split and add method to workflow and scripts --------- Co-authored-by: Habib Rehman <harehman@iu.edu> Co-authored-by: LouisK92 <louiskuemmerle@googlemail.com>
1 parent f995cb8 commit f64ec27

File tree

9 files changed

+164
-2
lines changed

9 files changed

+164
-2
lines changed

scripts/run_benchmark/config.yaml

Whitespace-only changes.

scripts/run_benchmark/run_full_local.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ expression_correction_methods:
6565
- no_correction
6666
# - gene_efficiency_correction
6767
# - resolvi_correction
68+
# - split
6869
method_parameters_yaml: /tmp/method_params.yaml
6970
HERE
7071

scripts/run_benchmark/run_full_seqeracloud.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ expression_correction_methods:
5757
- no_correction
5858
- gene_efficiency_correction
5959
- resolvi_correction
60+
- split
6061
method_parameters_yaml: /tmp/method_params.yaml
6162
HERE
6263

scripts/run_benchmark/run_test_local.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ expression_correction_methods:
6060
- no_correction
6161
# - gene_efficiency_correction
6262
# - resolvi_correction
63+
# - split
6364
method_parameters_yaml: /tmp/method_params.yaml
6465
HERE
6566

scripts/run_benchmark/run_test_seqeracloud.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ expression_correction_methods:
5656
- no_correction
5757
- gene_efficiency_correction
5858
- resolvi_correction
59+
- split
5960
#method_parameters_yaml: /tmp/method_params.yaml
6061
HERE
6162

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
__merge__: /src/api/comp_method_expression_correction.yaml
2+
3+
name: split
4+
label: "SPLIT"
5+
summary: "Correct doublet/misegmented cells using SPLIT"
6+
description: "SPLIT (Spatial Purification of Layered Intracellular Transcripts) is a novel method that integrates snRNA-seq with RCTD deconvolution to enhance signal purity. SPLIT effectively resolves mixed transcriptomic signals, improving background correction and cell-type resolution."
7+
links:
8+
documentation: "https://github.com/bdsc-tds/SPLIT"
9+
repository: "https://github.com/bdsc-tds/SPLIT"
10+
references:
11+
doi: "10.1101/2025.04.23.649965"
12+
13+
arguments:
14+
- name: --keep_all_cells
15+
required: false
16+
direction: input
17+
type: boolean
18+
default: false
19+
description: Whether to keep cells with 0 counts (may cause errors if set to TRUE)
20+
21+
resources:
22+
- type: r_script
23+
path: script.R
24+
25+
engines:
26+
- type: docker
27+
image: openproblems/base_r:1
28+
setup:
29+
- type: docker
30+
run: |
31+
apt-get update
32+
- type: r
33+
bioc: [anndataR, rhdf5, devtools, scater]
34+
- type: docker
35+
run: |
36+
Rscript -e "BiocManager::install('SingleCellExperiment', type = 'source', force = TRUE, ask = FALSE); options(timeout = 600000000); devtools::install_github('dmcable/spacexr', build_vignettes = FALSE); devtools::install_github('bdsc-tds/SPLIT')"
37+
38+
# SingleCellExperiment part can probably be left out again in the future. It currently fixes a bug described in these issues:
39+
# https://github.com/drighelli/SpatialExperiment/issues/171
40+
# https://github.com/satijalab/seurat/issues/9889
41+
# The reinstall of SingleCellExperiment triggers the correct re-install of SpatialExperiment.
42+
43+
# Using a large timeout here to reduce failures during GitHub package installation.
44+
45+
- type: native
46+
47+
runners:
48+
- type: executable
49+
- type: nextflow
50+
directives:
51+
label: [ hightime, highcpu, highmem ]
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
library(spacexr)
2+
library(Matrix)
3+
library(SingleCellExperiment)
4+
library(anndataR)
5+
library(SPLIT)
6+
library(Seurat)
7+
library(scuttle)
8+
9+
## VIASH START
10+
par <- list(
11+
"input_spatial_with_cell_types" = "task_ist_preprocessing/resources_test/task_ist_preprocessing/mouse_brain_combined/spatial_with_celltypes.h5ad",
12+
"input_scrnaseq_reference"= "task_ist_preprocessing/resources_test/task_ist_preprocessing/mouse_brain_combined/scrnaseq_reference.h5ad",
13+
"output" = "task_ist_preprocessing/tmp/split_corrected.h5ad",
14+
"keep_all_cells" = FALSE,
15+
)
16+
17+
meta <- list(
18+
'cpus': 4,
19+
)
20+
21+
## VIASH END
22+
23+
# Read the input h5ad file and convert to SingleCellExperiment and Seurat
24+
sce <- read_h5ad(par$input_spatial_with_cell_types, as = "SingleCellExperiment")
25+
xe <- read_h5ad(par$input_spatial_with_cell_types, as = "Seurat")
26+
27+
# filter out 0 cells
28+
if (!par$keep_all_cells) {
29+
cat("Filtering cells with 0 counts\n")
30+
sce <- sce[, colSums(counts(sce)) > 0]
31+
xe <- subset(xe, subset = nCount_RNA > 0)
32+
}
33+
34+
# Extract spatial coordinates and counts matrix
35+
centroid_x <- colData(sce)$centroid_x
36+
centroid_y <- colData(sce)$centroid_y
37+
coords <- data.frame(centroid_x, centroid_y)
38+
counts <- assay(sce, "counts")
39+
rownames(coords) <- colData(sce)$cell_id
40+
puck <- SpatialRNA(coords, counts)
41+
42+
# Read reference scrnaseq
43+
ref <- read_h5ad(par$input_scrnaseq_reference, as = "SingleCellExperiment")
44+
45+
#filter reference cell types to those with >25 cells (minimum for RCTD)
46+
valid_celltypes <- names(table(colData(ref)$cell_type))[table(colData(ref)$cell_type) >= 25]
47+
filtered_ref <- ref[,colData(ref)$cell_type %in% valid_celltypes]
48+
49+
ref_counts <- assay(filtered_ref, "counts")
50+
# factor to drop filtered cell types
51+
colData(filtered_ref)$cell_type <- factor(colData(filtered_ref)$cell_type)
52+
cell_types <- colData(filtered_ref)$cell_type
53+
names(cell_types) <- colnames(ref_counts)
54+
reference <- Reference(ref_counts, cell_types, min_UMI = 0)
55+
56+
# check cores
57+
cores <- 1
58+
if ("cpus" %in% names(meta) && !is.null(meta$cpus)) cores <- meta$cpus
59+
cat(sprintf("Number of cores: %s\n", cores))
60+
61+
# Run the algorithm
62+
cat("Running RCTD\n")
63+
myRCTD <- create.RCTD(puck, reference, max_cores = cores)
64+
myRCTD <- run.RCTD(myRCTD, doublet_mode = "doublet")
65+
66+
# Get the "spot_class" annotation from RCTD
67+
# cat("Saving RCTD spot_class\n")
68+
# results <- myRCTD@results
69+
# rctd_spot_class <- results$results_df$spot_class
70+
# names(rctd_spot_class) <- rownames(results$results_df)
71+
# colData(sce)$RCTD_class <- "not_included"
72+
# colData(sce)[names(rctd_spot_class),"RCTD_class"] <- as.character(rctd_spot_class)
73+
74+
# Post-process RCTD output
75+
RCTD <- SPLIT::run_post_process_RCTD(myRCTD)
76+
77+
# Run SPLIT purification
78+
cat("Running SPLIT\n")
79+
res_split <- SPLIT::purify(
80+
counts = GetAssayData(xe, assay = 'RNA', layer = 'counts'), # or any gene x cells counts matrix
81+
rctd = RCTD,
82+
DO_purify_singlets = TRUE # optional
83+
)
84+
85+
86+
# create corrected counts layer in original SingleCell object
87+
cat("Normalizing counts\n")
88+
89+
# Preserve original normalized values before overwriting with corrected normalization
90+
assay(sce, "normalized_uncorrected") <- assay(sce, "normalized")
91+
92+
# First copy in counts
93+
assay(sce, "corrected_counts") <- assay(sce, "counts")
94+
95+
# Then, replace only the updated cells
96+
assay(sce, "corrected_counts")[rownames(res_split$purified_counts), colnames(res_split$purified_counts)] <- res_split$purified_counts
97+
98+
# Library size normalization - see note in resolVI
99+
size_factors <- librarySizeFactors(assay(sce, "corrected_counts"))
100+
assay(sce, "normalized") <- assay(logNormCounts(sce, size_factors=size_factors, assay.type = "corrected_counts"),"logcounts")
101+
102+
# Write the final object to h5ad format
103+
cat("Writing to h5ad\n")
104+
dir.create(dirname(par$output), showWarnings = FALSE, recursive = TRUE)
105+
write_h5ad(sce, par$output, mode = "w")

src/workflows/run_benchmark/config.vsh.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ argument_groups:
104104
A list of expression correction methods to run.
105105
type: string
106106
multiple: true
107-
default: "no_correction:gene_efficiency_correction:resolvi_correction"
107+
default: "no_correction:gene_efficiency_correction:resolvi_correction:split"
108108
- name: Method parameters
109109
description: |
110110
Use these arguments to control the parameter sets that are run for each
@@ -175,6 +175,7 @@ dependencies:
175175
- name: methods_expression_correction/no_correction
176176
- name: methods_expression_correction/gene_efficiency_correction
177177
- name: methods_expression_correction/resolvi_correction
178+
- name: methods_expression_correction/split
178179
- name: methods_data_aggregation/aggregate_spatial_data
179180
- name: metrics/similarity
180181
- name: metrics/quality

src/workflows/run_benchmark/main.nf

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -414,7 +414,8 @@ workflow run_wf {
414414
expr_corr_methods = [
415415
no_correction,
416416
gene_efficiency_correction,
417-
resolvi_correction
417+
resolvi_correction,
418+
split
418419
]
419420

420421
expr_corr_ch = cta_ch

0 commit comments

Comments
 (0)