Adding a new metric BRAS(Batch Removal Adapted Silhouette) (#62)

seohyonkim · web-flow · commit ab0064c33200 · 2025-07-23T18:25:54.000+02:00
* working BRAS metric

* use scib-metrics instead

* remove comments
diff --git a/src/metrics/bras/config.vsh.yaml b/src/metrics/bras/config.vsh.yaml
@@ -0,0 +1,39 @@
+__merge__: ../../api/comp_metric.yaml
+name: bras
+info:
+  metrics:
+    - name: bras
+      label: BRAS
+      summary: "Modified ASW metric for batch removal"
+      description: |
+        The BRAS (Batch Removal Adapted Silhouette) metric modifies the standard silhouette score to account for batch effects in single-cell data integration benchmarking.
+        Instead of measuring how well a cell matches its biological label cluster compared to other clusters (as in regular silhouette), BRAS compares how well it matches its biological cluster in its own batch versus the same biological cluster in other batches.
+        For each cells, BRAS computes the ai = average distance to cells with the same label in the same batch, and bi = the average distance to cells with the same label in different batches.
+        It then uses ai and bi for the standard silhoueette formula.
+      references:
+        doi: 
+          - 10.1101/2025.01.21.634098
+      links:
+        documentation: https://github.com/ohlerlab/metrics_matter_manuscript_reproducibility/tree/master
+        repository: https://github.com/ohlerlab/metrics_matter_manuscript_reproducibility/tree/master
+      min: 0
+      max: 1
+      maximize: true
+resources:
+  - type: python_script
+    path: script.py
+  - path: /src/utils/read_anndata_partial.py
+
+engines:
+  - type: docker
+    image: openproblems/base_python:1.0.0
+    setup:
+      - type: python
+        pypi:
+        - scib-metrics==0.5.5
+
+runners:
+  - type: executable
+  - type: nextflow
+    directives:
+      label: [midtime,midmem,midcpu]
diff --git a/src/metrics/bras/script.py b/src/metrics/bras/script.py
@@ -0,0 +1,47 @@
+import anndata as ad
+import sys
+import numpy as np
+import pandas as pd
+from scib_metrics import bras
+
+## VIASH START
+par = {
+  'input_integrated': 'resources_test/.../integrated.h5ad',
+  'input_solution': 'resources_test/.../solution.h5ad',
+  'output': 'output.h5ad',
+  'input_integrated': 'resources_test/task_batch_integration/cxg_immune_cell_atlas/integrated_full.h5ad',
+  'output': 'output.h5ad',
+}
+meta = {
+  'name': 'bras'
+}
+## VIASH END
+
+sys.path.append(meta["resources_dir"])
+from read_anndata_partial import read_anndata
+
+print('Reading input files', flush=True)
+adata = read_anndata(par['input_integrated'], obs='obs', obsm='obsm', uns='uns')
+adata.obs = read_anndata(par['input_solution'], obs='obs').obs
+adata.uns |= read_anndata(par['input_solution'], uns='uns').uns
+
+print('Compute metrics', flush=True)
+score = bras(
+    X=adata.obsm['X_emb'],
+    labels=adata.obs['cell_type'].to_numpy(),
+    batch=adata.obs['batch'].to_numpy()
+)
+
+print('Create output AnnData object', flush=True)
+output = ad.AnnData(
+    uns={
+        'dataset_id': adata.uns['dataset_id'],
+        'normalization_id': adata.uns['normalization_id'],
+        'method_id': adata.uns['method_id'],
+        'metric_ids': [ meta['name'] ],
+        'metric_values': [ score ]
+    }
+)
+
+print("Write output AnnData to file", flush=True)
+output.write_h5ad(par['output'], compression='gzip')