Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 68 additions & 0 deletions modules/local/utility/xenium2scs/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
process XENIUM2SCS {
tag "$meta.id"
label 'process_low'

container "khersameesh24/spatialdata:0.2.6"

input:
tuple val(meta), path(transcripts_parquet), path(morphology_image), path(experiment_xenium)

output:
tuple val(meta), path("${prefix}/scs_input_bgi.tsv"), emit: scs_input_bgi_tsv
tuple val(meta), path("${prefix}/morph2d.tif"), emit: morph2d_tif
tuple val(meta), path("${prefix}/xenium2scs_metrics.tsv"), emit: metrics
path ("versions.yml"), emit: versions

when:
task.ext.when == null || task.ext.when

script:
// Exit if running this module with -profile conda / -profile mamba
if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
error("XENIUM2SCS module does not support Conda. Please use Docker / Singularity / Podman instead.")
}

prefix = task.ext.prefix ?: "${meta.id}"

template('xenium2scs.py')

stub:
// Exit if running this module with -profile conda / -profile mamba
if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
error("XENIUM2SCS module does not support Conda. Please use Docker / Singularity / Podman instead.")
}

prefix = task.ext.prefix ?: "${meta.id}"

"""
mkdir -p ${prefix}

cat <<'EOF' > ${prefix}/scs_input.tsv
geneID\trow\tcolumn\tcounts
TEST\t0\t0\t1
EOF

cat <<'EOF' > ${prefix}/scs_input_bgi.tsv
geneID\tx\ty\tMIDCounts
TEST\t0\t0\t1
EOF

python - <<'PY'
import numpy as np
import tifffile

img = np.zeros((16, 16), dtype=np.uint16)
tifffile.imwrite('${prefix}/morph2d.tif', img)
PY

cat <<'EOF' > ${prefix}/xenium2scs_metrics.tsv
metric\tvalue
n_rows\t1
EOF

cat <<-END_VERSIONS > versions.yml
"${task.process}":
xenium2scs: "1.0.0"
END_VERSIONS
"""
}
181 changes: 181 additions & 0 deletions modules/local/utility/xenium2scs/templates/xenium2scs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
#!/usr/bin/env python3

import json
from pathlib import Path
import numpy as np
import pandas as pd
import tifffile

# Xenium full-resolution image: 1 pixel = 0.2125 µm (10x Genomics spec).
# Transcript x_location / y_location are in microns.
# To overlay transcripts on the full-res image: pixel = micron / pixel_size.
XENIUM_DEFAULT_PIXEL_SIZE_UM = 0.2125


def _pick_column(df: pd.DataFrame,
candidates: list[str],
required: bool = True):
for name in candidates:
if name in df.columns:
return name
if required:
raise ValueError(f"Could not find any of the required columns: {candidates}")
return None


def _read_pixel_size(experiment_xenium_path: str) -> float:
"""Read pixel_size (µm/px) from experiment.xenium; fall back to 0.2125."""
try:
with open(experiment_xenium_path) as fh:
meta = json.load(fh)
return float(meta.get("pixel_size", XENIUM_DEFAULT_PIXEL_SIZE_UM))
except Exception:
return XENIUM_DEFAULT_PIXEL_SIZE_UM


def convert_xenium_to_scs(parquet_path: str,
output_tsv: str,
output_bgi_tsv: str,
morphology_image_path: str,
output_morph2d_tif: str,
metrics_tsv: str,
experiment_xenium_path: str = "",
bin_size: float = 1.0):
"""
Convert Xenium transcripts to SCS/BGI format with correct pixel-space coordinates.

Xenium x_location / y_location are in microns.
The morphology image full resolution is 0.2125 µm/px (Xenium spec).
Coordinates are converted to pixels: pixel = micron / pixel_size.
The morphology image is cropped to the pixel ROI covered by the transcripts.
"""
pixel_size = _read_pixel_size(experiment_xenium_path) if experiment_xenium_path else XENIUM_DEFAULT_PIXEL_SIZE_UM

transcripts = pd.read_parquet(parquet_path, engine="pyarrow")

gene_col = _pick_column(transcripts, ["feature_name", "gene", "gene_id", "geneID"])
x_col = _pick_column(transcripts, ["x_location", "x", "x_global_px", "x_centroid"])
y_col = _pick_column(transcripts, ["y_location", "y", "y_global_px", "y_centroid"])
count_col = _pick_column(transcripts, ["counts", "count", "n_counts"], required=False)

table = transcripts[[gene_col, x_col, y_col]].copy()
table = table.dropna(subset=[gene_col, x_col, y_col])

# Convert micron coordinates → full-resolution pixel coordinates.
# Xenium: x_location is along image width (columns), y_location along height (rows).
table["row_px"] = (table[y_col].astype(float) / pixel_size).round().astype(int)
table["column_px"] = (table[x_col].astype(float) / pixel_size).round().astype(int)

# Optionally merge into user-specified bins (bin_size in pixels, default 1 = no binning).
if bin_size > 1:
table["row"] = (table["row_px"] / bin_size).astype(int)
table["column"] = (table["column_px"] / bin_size).astype(int)
else:
# Zero-base pixel coordinates so the BGI file starts at (0, 0).
r0 = table["row_px"].min()
c0 = table["column_px"].min()
table["row"] = table["row_px"] - r0
table["column"] = table["column_px"] - c0

if count_col is None:
table["counts"] = 1
else:
table["counts"] = transcripts.loc[table.index, count_col].fillna(1).astype(int)

table = table.rename(columns={gene_col: "geneID"})[["geneID", "row", "column", "counts"]]
table = table.groupby(["geneID", "row", "column"], as_index=False)["counts"].sum()

out_tsv = Path(output_tsv)
out_tsv.parent.mkdir(parents=True, exist_ok=True)
table.to_csv(out_tsv, sep="\t", index=False)

# ── Morphology image ────────────────────────────────────────────────────────
# Load and collapse to 2D (max projection across z/channels).
image = tifffile.imread(morphology_image_path)
image = np.squeeze(np.asarray(image))
if image.ndim == 2:
image2d = image
elif image.ndim >= 3:
h, w = image.shape[-2], image.shape[-1]
image2d = image.reshape((-1, h, w)).max(axis=0)
else:
raise ValueError(f"Unsupported morphology image shape: {image.shape}")

# Crop to the pixel ROI covered by transcripts.
# Derive absolute pixel bounds directly from physical coords in the parquet.
r_min_abs = int(round(float(transcripts[y_col].min()) / pixel_size))
r_max_abs = int(round(float(transcripts[y_col].max()) / pixel_size))
c_min_abs = int(round(float(transcripts[x_col].min()) / pixel_size))
c_max_abs = int(round(float(transcripts[x_col].max()) / pixel_size))

# Clamp to image bounds.
H, W = image2d.shape
r_min_abs = max(0, r_min_abs)
r_max_abs = min(H - 1, r_max_abs)
c_min_abs = max(0, c_min_abs)
c_max_abs = min(W - 1, c_max_abs)

cropped = image2d[r_min_abs:r_max_abs + 1, c_min_abs:c_max_abs + 1]

out_morph2d = Path(output_morph2d_tif)
out_morph2d.parent.mkdir(parents=True, exist_ok=True)
tifffile.imwrite(out_morph2d, cropped)

# ── BGI file (SCS/spateo format) ────────────────────────────────────────────
# spateo read_bgi_agg: x → AnnData dim-0 (height/rows), y → dim-1 (width/cols).
# Our table["row"] = height direction, table["column"] = width direction.
bgi = pd.DataFrame({
"geneID": table["geneID"],
"x": table["row"].astype(int),
"y": table["column"].astype(int),
"MIDCounts": table["counts"].astype(int),
})

out_bgi_tsv = Path(output_bgi_tsv)
out_bgi_tsv.parent.mkdir(parents=True, exist_ok=True)
bgi.to_csv(out_bgi_tsv, sep="\t", index=False)

metrics = {
"n_rows": int(len(table)),
"n_unique_genes": int(table["geneID"].nunique()),
"row_min": int(table["row"].min()) if len(table) else 0,
"row_max": int(table["row"].max()) if len(table) else 0,
"column_min": int(table["column"].min()) if len(table) else 0,
"column_max": int(table["column"].max()) if len(table) else 0,
"pixel_size_um": float(pixel_size),
"bin_size": float(bin_size),
"morph2d_H": int(cropped.shape[0]),
"morph2d_W": int(cropped.shape[1]),
}

pd.DataFrame(
{"metric": list(metrics.keys()), "value": list(metrics.values())}
).to_csv(metrics_tsv, sep="\t", index=False)


if __name__ == "__main__":
transcripts_parquet: str = "${transcripts_parquet}"
morphology_image: str = "${morphology_image}"
experiment_xenium: str = "${experiment_xenium}"
prefix: str = "${prefix}"
bin_size: float = float("${task.ext.bin_size ?: 1.0}")

output_tsv = f"{prefix}/scs_input.tsv"
output_bgi_tsv = f"{prefix}/scs_input_bgi.tsv"
output_morph2d_tif = f"{prefix}/morph2d.tif"
metrics_tsv = f"{prefix}/xenium2scs_metrics.tsv"

convert_xenium_to_scs(
parquet_path=transcripts_parquet,
output_tsv=output_tsv,
output_bgi_tsv=output_bgi_tsv,
morphology_image_path=morphology_image,
output_morph2d_tif=output_morph2d_tif,
metrics_tsv=metrics_tsv,
experiment_xenium_path=experiment_xenium,
bin_size=bin_size,
)

with open("versions.yml", "w", encoding="utf-8") as fobj:
fobj.write('"${task.process}":\\n')
fobj.write('xenium2scs: "1.0.0"\\n')
2 changes: 1 addition & 1 deletion nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ params {
format = 'xenium' // preset value set as `xenium`

// Segmentation methods
image_seg_methods = ["cellpose", "xeniumranger", "baysor"]
image_seg_methods = ["cellpose", "xeniumranger", "baysor", "scs"]
transcript_seg_methods = ["proseg", "segger", "baysor"]
segfree_methods = ["ficture", "baysor"]

Expand Down
4 changes: 2 additions & 2 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
},
"method": {
"type": "string",
"enum": ["cellpose", "xeniumranger", "baysor", "proseg", "segger", "ficture"],
"enum": ["cellpose", "xeniumranger", "baysor", "proseg", "segger", "ficture", "scs"],
"description": "Segmentation method to run."
},
"gene_panel": {
Expand Down Expand Up @@ -174,7 +174,7 @@
"type": "array",
"items": {
"type": "string",
"enum": ["cellpose", "xeniumranger", "baysor"]
"enum": ["cellpose", "xeniumranger", "baysor", "scs"]
},
"description": "List of image-based segmentation methods."
},
Expand Down
30 changes: 30 additions & 0 deletions subworkflows/local/scs_prepare_morphology/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
//
// Prepare SCS-compatible input from Xenium transcripts and pass morphology for downstream SCS segmentation
//

include { XENIUM2SCS } from '../../../modules/local/utility/xenium2scs/main'

workflow SCS_PREPARE_MORPHOLOGY {
take:
ch_morphology_image // channel: [ val(meta), ["path-to-morphology.ome.tif"] ]
ch_transcripts_parquet // channel: [ val(meta), ["path-to-transcripts.parquet"] ]
ch_experiment_xenium // channel: [ val(meta), ["path-to-experiment.xenium"] ]

main:

ch_versions = channel.empty()

// convert Xenium transcripts.parquet to SCS tabular input format
ch_xenium2scs_input = ch_transcripts_parquet
.join(ch_morphology_image, by: 0)
.join(ch_experiment_xenium, by: 0)
XENIUM2SCS(ch_xenium2scs_input)
ch_versions = ch_versions.mix(XENIUM2SCS.out.versions)

emit:
morphology_image = ch_morphology_image // channel: [ val(meta), ["path-to-morphology.ome.tif"] ]
morphology_2d = XENIUM2SCS.out.morph2d_tif // channel: [ val(meta), ["path-to-morph2d.tif"] ]
scs_input_bgi_tsv = XENIUM2SCS.out.scs_input_bgi_tsv // channel: [ val(meta), ["path-to-scs_input_bgi.tsv"] ]
metrics = XENIUM2SCS.out.metrics // channel: [ val(meta), ["path-to-xenium2scs_metrics.tsv"] ]
versions = ch_versions // channel: [ versions.yml ]
}
16 changes: 14 additions & 2 deletions subworkflows/local/utils_nfcore_spatialxe_pipeline/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -251,8 +251,8 @@ def validateInputParameters() {
//
def validateXeniumBundle(ch_samplesheet) {

// define xenium bundle directory structure - required files
def bundle_required_files = [
// define xenium bundle directory structure - full required files
def bundle_required_files_full = [
"cell_boundaries.csv.gz",
"cell_boundaries.parquet",
"cell_feature_matrix.h5",
Expand All @@ -272,6 +272,18 @@ def validateXeniumBundle(ch_samplesheet) {
"transcripts.zarr.zip",
]

// minimal files required for SCS input preparation
def bundle_required_files_scs = [
"experiment.xenium",
"morphology.ome.tif",
"transcripts.parquet",
]

// choose required files based on mode/method
def bundle_required_files = (params.mode == 'image' && params.method == 'scs')
? bundle_required_files_scs
: bundle_required_files_full

// bundle optional files
def bundle_optional_files = [
"analysis.tar.gz",
Expand Down
24 changes: 24 additions & 0 deletions workflows/spatialxe.nf
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ include { BAYSOR_RUN_PRIOR_SEGMENTATION_MASK } from '../subworkflo
include { CELLPOSE_RESOLIFT_MORPHOLOGY_OME_TIF } from '../subworkflows/local/cellpose_resolift_morphology_ome_tif/main'
include { CELLPOSE_BAYSOR_IMPORT_SEGMENTATION } from '../subworkflows/local/cellpose_baysor_import_segmentation/main'
include { XENIUMRANGER_RESEGMENT_MORPHOLOGY_OME_TIF } from '../subworkflows/local/xeniumranger_resegment_morphology_ome_tif/main'
include { SCS_PREPARE_MORPHOLOGY } from '../subworkflows/local/scs_prepare_morphology/main'

// segmentation-free subworkflows
include { BAYSOR_GENERATE_SEGFREE } from '../subworkflows/local/baysor_generate_segfree/main'
Expand Down Expand Up @@ -74,6 +75,7 @@ workflow SPATIALXE {
ch_bundle_path = Channel.empty()
ch_preview_html = Channel.empty()
ch_exp_metadata = Channel.empty()
ch_experiment_xenium = Channel.empty()
ch_gene_synonyms = Channel.empty()
ch_multiqc_files = Channel.empty()
ch_multiqc_report = Channel.empty()
Expand Down Expand Up @@ -172,6 +174,14 @@ workflow SPATIALXE {
return [exp_metadata]
}

ch_experiment_xenium = ch_input.map { meta, bundle, _image ->
def exp_metadata = file(
bundle.toString().replaceFirst(/\/$/, '') + "/experiment.xenium",
checkIfExists: true
)
return [meta, exp_metadata]
}

// get baysor xenium config
ch_config = Channel.fromPath(
"${projectDir}/assets/config/xenium.toml",
Expand Down Expand Up @@ -370,6 +380,20 @@ workflow SPATIALXE {
ch_redefined_bundle = CELLPOSE_RESOLIFT_MORPHOLOGY_OME_TIF.out.redefined_bundle
ch_coordinate_space = CELLPOSE_RESOLIFT_MORPHOLOGY_OME_TIF.out.coordinate_space
}

// prepare transcripts and morphology for SCS segmentation
if (params.method == 'scs') {

SCS_PREPARE_MORPHOLOGY(
ch_morphology_image,
ch_transcripts_parquet,
ch_experiment_xenium,
)
// TODO: Add SCS segment module here when ready
// For now, just preparing inputs
ch_redefined_bundle = ch_bundle_path
ch_coordinate_space = Channel.value("microns")
}
}

/*
Expand Down
Loading