openproblems-bio · mumichae · Sep 25, 2025 · Apr 15, 2025 · Apr 15, 2025 · Apr 15, 2025
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,6 +4,12 @@
 
 * Added `metrics/kbet_pg` and `metrics/kbet_pg_label` components (PR #52).
 
+* Added `metrics/cilisi` new metric component (PR #57).
+    - ciLISI measures batch mixing in a cell type-aware manner by computing iLISI within each cell type and normalizing
+        the scores between 0 and 1. Unlike iLISI, ciLISI preserves sensitivity to biological variance and avoids favoring
+        overcorrected datasets with removed cell type signals.
+        We propose adding this metric to substitute iLISI.
+
 ## Minor changes
 
 * Un-pin the scPRINT version and update parameters (PR #51)

diff --git a/src/metrics/cilisi/config.vsh.yaml b/src/metrics/cilisi/config.vsh.yaml
@@ -0,0 +1,51 @@
+__merge__: ../../api/comp_metric.yaml
+name: cilisi
+info:
+  metrics:
+    - name: cilisi
+      label: CiLISI
+      summary: Cell-type aware version of iLISI (Local inverse Simpson's Index).
+                iLISI is computed separately for each cell type or cluster, normalized between 0 and 1, and averaged across all cells (global mean).
+                By default, CiLISI is calculated only for groups with at least 10 cells and 2 distinct batch labels (configurable).
+      description: |
+        ciLISI measures batch mixing in a cell type-aware manner by computing iLISI within each cell type and normalizing
+        the scores between 0 and 1. Unlike iLISI, ciLISI preserves sensitivity to biological variance and avoids favoring
+        overcorrected datasets with removed cell type signals.
+      references:
+        doi: 10.1038/s41467-024-45240-z
+      links:
+        documentation: https://github.com/carmonalab/scIntegrationMetrics
+        repository: https://github.com/carmonalab/scIntegrationMetrics
+      min: 0
+      max: 1
+      maximize: true
+
+    - name: cilisi_means
+      label: CiLISI_means
+      summary: As CiLISI, but returns mean of per-group CiLISI values (i.e., average of the means per group). instead of a global average.
+      description: |
+        ciLISI measures batch mixing in a cell type-aware manner by computing iLISI within each cell type and normalizing
+        the scores between 0 and 1. Unlike iLISI, ciLISI preserves sensitivity to biological variance and avoids favoring
+        overcorrected datasets with removed cell type signals.
+      references:
+        doi: 10.1038/s41467-024-45240-z
+      links:
+        documentation: https://github.com/carmonalab/scIntegrationMetrics
+        repository: https://github.com/carmonalab/scIntegrationMetrics
+      min: 0
+      max: 1
+      maximize: true
+resources:
+  - type: r_script
+    path: script.R
+engines:
+  - type: docker
+    image: openproblems/base_r:1.0.0
+    setup:
+       - type: r
+         github: https://github.com/carmonalab/[email protected]
+runners:
+  - type: executable
+  - type: nextflow
+    directives:
+      label: [midtime,midmem,midcpu]
diff --git a/src/metrics/cilisi/script.R b/src/metrics/cilisi/script.R
@@ -0,0 +1,48 @@
+library(anndata)
+library(scIntegrationMetrics)
+
+## VIASH START
+par <- list(
+  input_integrated = "resources_test/task_batch_integration/cxg_immune_cell_atlas/integrated_processed.h5ad",
+  input_solution = "resources_test/task_batch_integration/cxg_immune_cell_atlas/solution.h5ad",
+  output = "output.h5ad"
+)
+meta <- list(
+  name = "cilisi"
+)
+## VIASH END
+
+cat("Reading input files\n")
+adata <- anndata::read_h5ad(par[["input_integrated"]])
+solution <- anndata::read_h5ad(par[["input_solution"]])
+embeddings <- adata$obsm[["X_emb"]]
+metadata <- solution$obs
+
+cat("Compute CiLISI metrics...\n")
+lisisplit <-
+  scIntegrationMetrics::compute_lisi_splitBy(
+                                            X = embeddings,
+                                            meta_data = metadata,
+                                            label_colnames = "batch",
+                                            perplexity = 30,
+                                            split_by_colname = "cell_type",
+                                            normalize = TRUE,
+                                            min.cells.split = 10,
+                                            min.vars.label = 2)
+# average CiLISI
+cilisi <- mean(unlist(lisisplit))
+# Mean per cell type
+cilisi_means <- mean(sapply(lisisplit, function(x) mean(x[, 1])))
+
+cat("Write output AnnData to file\n")
+output <- anndata::AnnData(
+  shape = c(1,2),
+  uns = list(
+    dataset_id = adata$uns[["dataset_id"]],
+    normalization_id = adata$uns[["normalization_id"]],
+    method_id = adata$uns[["method_id"]],
+    metric_ids = c("cilisi", "cilisi_means"),
+    metric_values = list(cilisi, cilisi_means)
+  )
+)
+output$write_h5ad(par[["output"]], compression = "gzip")