nf-core · katwre · Mar 13, 2026 · Mar 13, 2026
diff --git a/modules/local/utility/xenium2scs/main.nf b/modules/local/utility/xenium2scs/main.nf
@@ -0,0 +1,68 @@
+process XENIUM2SCS {
+    tag "$meta.id"
+    label 'process_low'
+
+    container "khersameesh24/spatialdata:0.2.6"
+
+    input:
+    tuple val(meta), path(transcripts_parquet), path(morphology_image), path(experiment_xenium)
+
+    output:
+    tuple val(meta), path("${prefix}/scs_input_bgi.tsv"), emit: scs_input_bgi_tsv
+    tuple val(meta), path("${prefix}/morph2d.tif"), emit: morph2d_tif
+    tuple val(meta), path("${prefix}/xenium2scs_metrics.tsv"), emit: metrics
+    path ("versions.yml"), emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    // Exit if running this module with -profile conda / -profile mamba
+    if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
+        error("XENIUM2SCS module does not support Conda. Please use Docker / Singularity / Podman instead.")
+    }
+
+    prefix = task.ext.prefix ?: "${meta.id}"
+
+    template('xenium2scs.py')
+
+    stub:
+    // Exit if running this module with -profile conda / -profile mamba
+    if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
+        error("XENIUM2SCS module does not support Conda. Please use Docker / Singularity / Podman instead.")
+    }
+
+    prefix = task.ext.prefix ?: "${meta.id}"
+
+    """
+    mkdir -p ${prefix}
+
+    cat <<'EOF' > ${prefix}/scs_input.tsv
+    geneID\trow\tcolumn\tcounts
+    TEST\t0\t0\t1
+    EOF
+
+    cat <<'EOF' > ${prefix}/scs_input_bgi.tsv
+    geneID\tx\ty\tMIDCounts
+    TEST\t0\t0\t1
+    EOF
+
+    python - <<'PY'
+import numpy as np
+import tifffile
+
+img = np.zeros((16, 16), dtype=np.uint16)
+tifffile.imwrite('${prefix}/morph2d.tif', img)
+PY
+
+    cat <<'EOF' > ${prefix}/xenium2scs_metrics.tsv
+    metric\tvalue
+    n_rows\t1
+    EOF
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        xenium2scs: "1.0.0"
+    END_VERSIONS
+    """
+}
diff --git a/modules/local/utility/xenium2scs/templates/xenium2scs.py b/modules/local/utility/xenium2scs/templates/xenium2scs.py
@@ -0,0 +1,181 @@
+#!/usr/bin/env python3
+
+import json
+from pathlib import Path
+import numpy as np
+import pandas as pd
+import tifffile
+
+# Xenium full-resolution image: 1 pixel = 0.2125 µm (10x Genomics spec).
+# Transcript x_location / y_location are in microns.
+# To overlay transcripts on the full-res image: pixel = micron / pixel_size.
+XENIUM_DEFAULT_PIXEL_SIZE_UM = 0.2125
+
+
+def _pick_column(df: pd.DataFrame,
+                 candidates: list[str],
+                 required: bool = True):
+    for name in candidates:
+        if name in df.columns:
+            return name
+    if required:
+        raise ValueError(f"Could not find any of the required columns: {candidates}")
+    return None
+
+
+def _read_pixel_size(experiment_xenium_path: str) -> float:
+    """Read pixel_size (µm/px) from experiment.xenium; fall back to 0.2125."""
+    try:
+        with open(experiment_xenium_path) as fh:
+            meta = json.load(fh)
+        return float(meta.get("pixel_size", XENIUM_DEFAULT_PIXEL_SIZE_UM))
+    except Exception:
+        return XENIUM_DEFAULT_PIXEL_SIZE_UM
+
+
+def convert_xenium_to_scs(parquet_path: str,
+                          output_tsv: str,
+                          output_bgi_tsv: str,
+                          morphology_image_path: str,
+                          output_morph2d_tif: str,
+                          metrics_tsv: str,
+                          experiment_xenium_path: str = "",
+                          bin_size: float = 1.0):
+    """
+    Convert Xenium transcripts to SCS/BGI format with correct pixel-space coordinates.
+
+    Xenium x_location / y_location are in microns.
+    The morphology image full resolution is 0.2125 µm/px (Xenium spec).
+    Coordinates are converted to pixels: pixel = micron / pixel_size.
+    The morphology image is cropped to the pixel ROI covered by the transcripts.
+    """
+    pixel_size = _read_pixel_size(experiment_xenium_path) if experiment_xenium_path else XENIUM_DEFAULT_PIXEL_SIZE_UM
+
+    transcripts = pd.read_parquet(parquet_path, engine="pyarrow")
+
+    gene_col = _pick_column(transcripts, ["feature_name", "gene", "gene_id", "geneID"])
+    x_col    = _pick_column(transcripts, ["x_location", "x", "x_global_px", "x_centroid"])
+    y_col    = _pick_column(transcripts, ["y_location", "y", "y_global_px", "y_centroid"])
+    count_col = _pick_column(transcripts, ["counts", "count", "n_counts"], required=False)
+
+    table = transcripts[[gene_col, x_col, y_col]].copy()
+    table = table.dropna(subset=[gene_col, x_col, y_col])
+
+    # Convert micron coordinates → full-resolution pixel coordinates.
+    # Xenium: x_location is along image width (columns), y_location along height (rows).
+    table["row_px"]    = (table[y_col].astype(float) / pixel_size).round().astype(int)
+    table["column_px"] = (table[x_col].astype(float) / pixel_size).round().astype(int)
+
+    # Optionally merge into user-specified bins (bin_size in pixels, default 1 = no binning).
+    if bin_size > 1:
+        table["row"]    = (table["row_px"]    / bin_size).astype(int)
+        table["column"] = (table["column_px"] / bin_size).astype(int)
+    else:
+        # Zero-base pixel coordinates so the BGI file starts at (0, 0).
+        r0 = table["row_px"].min()
+        c0 = table["column_px"].min()
+        table["row"]    = table["row_px"]    - r0
+        table["column"] = table["column_px"] - c0
+
+    if count_col is None:
+        table["counts"] = 1
+    else:
+        table["counts"] = transcripts.loc[table.index, count_col].fillna(1).astype(int)
+
+    table = table.rename(columns={gene_col: "geneID"})[["geneID", "row", "column", "counts"]]
+    table = table.groupby(["geneID", "row", "column"], as_index=False)["counts"].sum()
+
+    out_tsv = Path(output_tsv)
+    out_tsv.parent.mkdir(parents=True, exist_ok=True)
+    table.to_csv(out_tsv, sep="\t", index=False)
+
+    # ── Morphology image ────────────────────────────────────────────────────────
+    # Load and collapse to 2D (max projection across z/channels).
+    image = tifffile.imread(morphology_image_path)
+    image = np.squeeze(np.asarray(image))
+    if image.ndim == 2:
+        image2d = image
+    elif image.ndim >= 3:
+        h, w = image.shape[-2], image.shape[-1]
+        image2d = image.reshape((-1, h, w)).max(axis=0)
+    else:
+        raise ValueError(f"Unsupported morphology image shape: {image.shape}")
+
+    # Crop to the pixel ROI covered by transcripts.
+    # Derive absolute pixel bounds directly from physical coords in the parquet.
+    r_min_abs = int(round(float(transcripts[y_col].min()) / pixel_size))
+    r_max_abs = int(round(float(transcripts[y_col].max()) / pixel_size))
+    c_min_abs = int(round(float(transcripts[x_col].min()) / pixel_size))
+    c_max_abs = int(round(float(transcripts[x_col].max()) / pixel_size))
+
+    # Clamp to image bounds.
+    H, W = image2d.shape
+    r_min_abs = max(0, r_min_abs)
+    r_max_abs = min(H - 1, r_max_abs)
+    c_min_abs = max(0, c_min_abs)
+    c_max_abs = min(W - 1, c_max_abs)
+
+    cropped = image2d[r_min_abs:r_max_abs + 1, c_min_abs:c_max_abs + 1]
+
+    out_morph2d = Path(output_morph2d_tif)
+    out_morph2d.parent.mkdir(parents=True, exist_ok=True)
+    tifffile.imwrite(out_morph2d, cropped)
+
+    # ── BGI file (SCS/spateo format) ────────────────────────────────────────────
+    # spateo read_bgi_agg: x → AnnData dim-0 (height/rows), y → dim-1 (width/cols).
+    # Our table["row"] = height direction, table["column"] = width direction.
+    bgi = pd.DataFrame({
+        "geneID":    table["geneID"],
+        "x":         table["row"].astype(int),
+        "y":         table["column"].astype(int),
+        "MIDCounts": table["counts"].astype(int),
+    })
+
+    out_bgi_tsv = Path(output_bgi_tsv)
+    out_bgi_tsv.parent.mkdir(parents=True, exist_ok=True)
+    bgi.to_csv(out_bgi_tsv, sep="\t", index=False)
+
+    metrics = {
+        "n_rows":         int(len(table)),
+        "n_unique_genes": int(table["geneID"].nunique()),
+        "row_min":        int(table["row"].min())    if len(table) else 0,
+        "row_max":        int(table["row"].max())    if len(table) else 0,
+        "column_min":     int(table["column"].min()) if len(table) else 0,
+        "column_max":     int(table["column"].max()) if len(table) else 0,
+        "pixel_size_um":  float(pixel_size),
+        "bin_size":       float(bin_size),
+        "morph2d_H":      int(cropped.shape[0]),
+        "morph2d_W":      int(cropped.shape[1]),
+    }
+
+    pd.DataFrame(
+        {"metric": list(metrics.keys()), "value": list(metrics.values())}
+    ).to_csv(metrics_tsv, sep="\t", index=False)
+
+
+if __name__ == "__main__":
+    transcripts_parquet: str    = "${transcripts_parquet}"
+    morphology_image: str       = "${morphology_image}"
+    experiment_xenium: str      = "${experiment_xenium}"
+    prefix: str                 = "${prefix}"
+    bin_size: float             = float("${task.ext.bin_size ?: 1.0}")
+
+    output_tsv        = f"{prefix}/scs_input.tsv"
+    output_bgi_tsv    = f"{prefix}/scs_input_bgi.tsv"
+    output_morph2d_tif = f"{prefix}/morph2d.tif"
+    metrics_tsv       = f"{prefix}/xenium2scs_metrics.tsv"
+
+    convert_xenium_to_scs(
+        parquet_path=transcripts_parquet,
+        output_tsv=output_tsv,
+        output_bgi_tsv=output_bgi_tsv,
+        morphology_image_path=morphology_image,
+        output_morph2d_tif=output_morph2d_tif,
+        metrics_tsv=metrics_tsv,
+        experiment_xenium_path=experiment_xenium,
+        bin_size=bin_size,
+    )
+
+    with open("versions.yml", "w", encoding="utf-8") as fobj:
+        fobj.write('"${task.process}":\\n')
+        fobj.write('xenium2scs: "1.0.0"\\n')
diff --git a/nextflow.config b/nextflow.config
@@ -45,7 +45,7 @@ params {
     format                     = 'xenium'   // preset value set as `xenium`
 
     // Segmentation methods
-    image_seg_methods          = ["cellpose", "xeniumranger", "baysor"]
+    image_seg_methods          = ["cellpose", "xeniumranger", "baysor", "scs"]
     transcript_seg_methods     = ["proseg", "segger", "baysor"]
     segfree_methods            = ["ficture", "baysor"]
 

diff --git a/nextflow_schema.json b/nextflow_schema.json
@@ -36,7 +36,7 @@
                 },
                 "method": {
                     "type": "string",
-                    "enum": ["cellpose", "xeniumranger", "baysor", "proseg", "segger", "ficture"],
+                    "enum": ["cellpose", "xeniumranger", "baysor", "proseg", "segger", "ficture", "scs"],
                     "description": "Segmentation method to run."
                 },
                 "gene_panel": {
@@ -174,7 +174,7 @@
                     "type": "array",
                     "items": {
                         "type": "string",
-                        "enum": ["cellpose", "xeniumranger", "baysor"]
+                        "enum": ["cellpose", "xeniumranger", "baysor", "scs"]
                     },
                     "description": "List of image-based segmentation methods."
                 },

diff --git a/subworkflows/local/scs_prepare_morphology/main.nf b/subworkflows/local/scs_prepare_morphology/main.nf
@@ -0,0 +1,30 @@
+//
+// Prepare SCS-compatible input from Xenium transcripts and pass morphology for downstream SCS segmentation
+//
+
+include { XENIUM2SCS } from '../../../modules/local/utility/xenium2scs/main'
+
+workflow SCS_PREPARE_MORPHOLOGY {
+    take:
+    ch_morphology_image    // channel: [ val(meta), ["path-to-morphology.ome.tif"] ]
+    ch_transcripts_parquet // channel: [ val(meta), ["path-to-transcripts.parquet"] ]
+    ch_experiment_xenium   // channel: [ val(meta), ["path-to-experiment.xenium"] ]
+
+    main:
+
+    ch_versions = channel.empty()
+
+    // convert Xenium transcripts.parquet to SCS tabular input format
+    ch_xenium2scs_input = ch_transcripts_parquet
+        .join(ch_morphology_image, by: 0)
+        .join(ch_experiment_xenium, by: 0)
+    XENIUM2SCS(ch_xenium2scs_input)
+    ch_versions = ch_versions.mix(XENIUM2SCS.out.versions)
+
+    emit:
+    morphology_image = ch_morphology_image      // channel: [ val(meta), ["path-to-morphology.ome.tif"] ]
+    morphology_2d    = XENIUM2SCS.out.morph2d_tif // channel: [ val(meta), ["path-to-morph2d.tif"] ]
+    scs_input_bgi_tsv = XENIUM2SCS.out.scs_input_bgi_tsv // channel: [ val(meta), ["path-to-scs_input_bgi.tsv"] ]
+    metrics          = XENIUM2SCS.out.metrics   // channel: [ val(meta), ["path-to-xenium2scs_metrics.tsv"] ]
+    versions         = ch_versions              // channel: [ versions.yml ]
+}
diff --git a/subworkflows/local/utils_nfcore_spatialxe_pipeline/main.nf b/subworkflows/local/utils_nfcore_spatialxe_pipeline/main.nf
@@ -251,8 +251,8 @@ def validateInputParameters() {
 //
 def validateXeniumBundle(ch_samplesheet) {
 
-    // define xenium bundle directory structure - required files
-    def bundle_required_files = [
+    // define xenium bundle directory structure - full required files
+    def bundle_required_files_full = [
         "cell_boundaries.csv.gz",
         "cell_boundaries.parquet",
         "cell_feature_matrix.h5",
@@ -272,6 +272,18 @@ def validateXeniumBundle(ch_samplesheet) {
         "transcripts.zarr.zip",
     ]
 
+    // minimal files required for SCS input preparation
+    def bundle_required_files_scs = [
+        "experiment.xenium",
+        "morphology.ome.tif",
+        "transcripts.parquet",
+    ]
+
+    // choose required files based on mode/method
+    def bundle_required_files = (params.mode == 'image' && params.method == 'scs')
+        ? bundle_required_files_scs
+        : bundle_required_files_full
+
     // bundle optional files
     def bundle_optional_files = [
         "analysis.tar.gz",

diff --git a/workflows/spatialxe.nf b/workflows/spatialxe.nf
@@ -29,6 +29,7 @@ include { BAYSOR_RUN_PRIOR_SEGMENTATION_MASK               } from '../subworkflo
 include { CELLPOSE_RESOLIFT_MORPHOLOGY_OME_TIF             } from '../subworkflows/local/cellpose_resolift_morphology_ome_tif/main'
 include { CELLPOSE_BAYSOR_IMPORT_SEGMENTATION              } from '../subworkflows/local/cellpose_baysor_import_segmentation/main'
 include { XENIUMRANGER_RESEGMENT_MORPHOLOGY_OME_TIF        } from '../subworkflows/local/xeniumranger_resegment_morphology_ome_tif/main'
+include { SCS_PREPARE_MORPHOLOGY                            } from '../subworkflows/local/scs_prepare_morphology/main'
 
 // segmentation-free subworkflows
 include { BAYSOR_GENERATE_SEGFREE                          } from '../subworkflows/local/baysor_generate_segfree/main'
@@ -74,6 +75,7 @@ workflow SPATIALXE {
     ch_bundle_path = Channel.empty()
     ch_preview_html = Channel.empty()
     ch_exp_metadata = Channel.empty()
+    ch_experiment_xenium = Channel.empty()
     ch_gene_synonyms = Channel.empty()
     ch_multiqc_files = Channel.empty()
     ch_multiqc_report = Channel.empty()
@@ -172,6 +174,14 @@ workflow SPATIALXE {
         return [exp_metadata]
     }
 
+    ch_experiment_xenium = ch_input.map { meta, bundle, _image ->
+        def exp_metadata = file(
+            bundle.toString().replaceFirst(/\/$/, '') + "/experiment.xenium",
+            checkIfExists: true
+        )
+        return [meta, exp_metadata]
+    }
+
     // get baysor xenium config
     ch_config = Channel.fromPath(
             "${projectDir}/assets/config/xenium.toml",
@@ -370,6 +380,20 @@ workflow SPATIALXE {
             ch_redefined_bundle = CELLPOSE_RESOLIFT_MORPHOLOGY_OME_TIF.out.redefined_bundle
             ch_coordinate_space = CELLPOSE_RESOLIFT_MORPHOLOGY_OME_TIF.out.coordinate_space
         }
+
+        // prepare transcripts and morphology for SCS segmentation
+        if (params.method == 'scs') {
+
+            SCS_PREPARE_MORPHOLOGY(
+                ch_morphology_image,
+                ch_transcripts_parquet,
+                ch_experiment_xenium,
+            )
+            // TODO: Add SCS segment module here when ready
+            // For now, just preparing inputs
+            ch_redefined_bundle = ch_bundle_path
+            ch_coordinate_space = Channel.value("microns")
+        }
     }
 
     /*