nf-core · nictru · Aug 10, 2025 · Aug 17, 2025 · Sep 7, 2025 · Sep 7, 2025
diff --git a/conf/modules.config b/conf/modules.config
@@ -528,13 +528,14 @@ process {
         ]
     }
 
-    withName: SCANPY_RANKGENESGROUPS {
-        ext.prefix = { meta.id + '_characteristic_genes' }
+    withName: "RANKGENESGROUPS_.*" {
         publishDir = [
-            path: { "${params.outdir}/per_group/${meta.id}/characteristic_genes" },
+            path: { "${params.outdir}/differential_expression" },
             mode: params.publish_dir_mode,
-            saveAs: { filename -> filename.endsWith(".png") || (params.save_intermediates && !filename.equals('versions.yml')) ? filename : null },
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
         ]
+        ext.sample_group_col = params.sample_group_col // set to null to avoid sample group comparisons
+        ext.method = params.rankgenesgroups_method
     }
 
     // Finalize

diff --git a/modules/local/adata/merge/main.nf b/modules/local/adata/merge/main.nf
@@ -16,6 +16,7 @@ process ADATA_MERGE {
     tuple val(meta), path("*_outer.h5ad")    , emit: outer
     tuple val(meta), path("*_inner.h5ad")    , emit: inner
     tuple val(meta), path("*_integrate.h5ad"), emit: integrate
+    path("celltype_predictions/*.csv")       , emit: celltypes
     path "gene_intersection.pkl"             , emit: intersect_genes
     path "versions.yml"                      , emit: versions
 

diff --git a/modules/local/adata/merge/templates/merge.py b/modules/local/adata/merge/templates/merge.py
@@ -73,6 +73,20 @@ def get_columns(adata):
 adata_outer.write("${prefix}_outer.h5ad")
 adata_inner.write("${prefix}_inner.h5ad")
 
+# we write the cell type predictions to csv files
+os.makedirs("celltype_predictions", exist_ok=True)
+for col in adata_outer.obs.columns:
+    if col.startswith("celltypes__"):
+        # split the column names into three parts
+        tool_name = col.split("__")[1]
+        model_name = "__".join(col.split("__")[2:])
+        adata_outer.obs[col].to_csv(f"celltype_predictions/{tool_name}_{model_name}.csv")
+
+# we have one more column that is the label column
+# if there are multiple values, we write it to a csv file
+if adata_outer.obs["label"].nunique() > 1:
+    adata_outer.obs["label"].to_csv("celltype_predictions/label.csv")
+
 if base_path:
     adata_integrate = adata_inner[~adata_inner.obs.index.isin(adata_base.obs.index)]
 

diff --git a/modules/local/celltypes/celltypist/templates/celltypist.py b/modules/local/celltypes/celltypist/templates/celltypist.py
@@ -66,12 +66,20 @@ def format_yaml_like(data: dict, indent: int = 0) -> str:
         adata.obs.index, ["predicted_labels", "conf_score"]
     ]
 
-    df_celltypist.columns = [f"celltypist:{model_name}", f"celltypist:{model_name}:conf"]
+    df_celltypist.columns = [f"celltypes__celltypist__{model_name}", f"celltypist__{model_name}__conf"]
     df_list.append(df_celltypist)
 
 df_celltypist = pd.concat(df_list, axis=1)
 df_celltypist.to_pickle("${prefix}.pkl")
 
+# cell type columns starting with celltypes__celltypist__ to a csv file,
+df_celltypist.loc[:, df_celltypist.columns.str.startswith("celltypes__celltypist__")].to_csv(f"{prefix}_predictions.csv")
+
+# confidence scores to a csv file,
+df_celltypist.loc[:, df_celltypist.columns.str.startswith("celltypist__")].to_csv(f"{prefix}_predictions_conf.csv")
+
+df_celltypist = df_celltypist.loc[:, df_celltypist.columns.str.startswith("celltypes__celltypist__")]
+
 adata.obs = pd.concat([adata.obs, df_celltypist], axis=1)
 adata.write_h5ad(f"{prefix}.h5ad")
 

diff --git a/modules/local/celltypes/singler/main.nf b/modules/local/celltypes/singler/main.nf
@@ -11,10 +11,11 @@ process CELLTYPES_SINGLER {
 
     output:
     //tuple val(meta), path("*.h5ad"), emit: h5ad
-    tuple val(meta), path("*.csv")             , emit: obs
-    tuple val(meta), path("*_distribution.pdf"), emit: distribution
-    tuple val(meta), path("*_heatmap.pdf")     , emit: heatmap
-    path "versions.yml"                        , emit: versions
+    tuple val(meta), path("*_predictions.csv")      , emit: obs
+    tuple val(meta), path("*_predictions_conf.csv") , emit: predictions_conf
+    tuple val(meta), path("*_distribution.pdf")     , emit: distribution
+    tuple val(meta), path("*_heatmap.pdf")          , emit: heatmap
+    path "versions.yml"                             , emit: versions
 
     when:
     task.ext.when == null || task.ext.when

diff --git a/modules/local/celltypes/singler/templates/singleR.R b/modules/local/celltypes/singler/templates/singleR.R
@@ -62,7 +62,7 @@ for (ref_idx in seq_along(references)) {
     reflabel %in% colnames(colData(reference))
   )
   predictions <- SingleR(
-    test = assay(sce, 'counts'),
+    test = assay(sce, 'decontXcounts'),
     ref = reference,
     labels = colData(reference)[[reflabel]]
   )
@@ -101,10 +101,13 @@ for (ref_idx in seq_along(references)) {
     height = 12
   )
 
+  # change columns names
+  label_col <- which(colnames(predictions) == "pruned.labels")
   colnames(predictions) <- paste0(
-    colnames(predictions), "_", prefix, "_", ref_name
+    "singler__", ref_name, "__", colnames(predictions)
   )
-  prediction_results[[ref]] <- predictions
+  colnames(predictions)[label_col] <- paste0("celltypes__singler__", ref_name)
+  prediction_results[[ref_name]] <- predictions
 }
 
 prediction_nrows <- lapply(prediction_results, nrow)
@@ -118,14 +121,27 @@ stopifnot(
 
 # This is predicated in the assumption that all prediction data frames have exactly
 # the same rows ... see the stopifnot clause above
+# Remove names from the list to prevent them being added as column prefixes
+# we handled name collision in the previous loop explicitly
+names(prediction_results) <- NULL
 predictions <- do.call(cbind, prediction_results)
 
+# we write the actual cell type columns to a csv file
 write.csv(
-  predictions,
+  predictions[, grepl("celltypes__singler__", colnames(predictions))],
   file = paste0(prefix, "_predictions.csv"),
   row.names = TRUE
 )
 
+# write all confidence scores to a csv file
+write.csv(
+  predictions[, !grepl("celltypes__singler__", colnames(predictions))],
+  file = paste0(prefix, "_predictions_conf.csv"),
+  row.names = TRUE
+)
+
+
+
 # Capturing version information, as before
 versions <- list(
   "${task.process}" = list(

diff --git a/modules/local/scanpy/leiden/main.nf b/modules/local/scanpy/leiden/main.nf
@@ -15,6 +15,7 @@ process SCANPY_LEIDEN {
 
     output:
     tuple val(meta), path("${prefix}.h5ad"), emit: h5ad
+    tuple val(meta), path("${prefix}.csv"), emit: clusters
     path "${prefix}.pkl", emit: obs
     path "${prefix}.png", emit: plots, optional: true
     path "${prefix}_mqc.json", emit: multiqc_files, optional: true

diff --git a/modules/local/scanpy/leiden/templates/leiden.py b/modules/local/scanpy/leiden/templates/leiden.py
@@ -28,6 +28,7 @@
 sc.tl.leiden(adata, **kwargs)
 
 adata.obs[[key_added]].to_pickle(f"{prefix}.pkl")
+adata.obs[[key_added]].to_csv(f"{prefix}.csv")
 adata.write_h5ad(f"{prefix}.h5ad")
 
 if "${plot_umap}" == "true":

diff --git a/modules/local/scanpy/rankgenesgroups/main.nf b/modules/local/scanpy/rankgenesgroups/main.nf
@@ -8,9 +8,10 @@ process SCANPY_RANKGENESGROUPS {
         : 'community.wave.seqera.io/library/pyyaml_scanpy:3c9e9f631f45553d'}"
 
     input:
-    tuple val(meta), path(h5ad)
+    tuple val(meta), path(h5ad), path(cluster_csv)
 
     output:
+    tuple val(meta), path(prefix), emit: outdir
     tuple val(meta), path("*.h5ad"), emit: h5ad, optional: true
     path "*.pkl", emit: uns, optional: true
     path "*.png", emit: plots, optional: true
@@ -21,7 +22,8 @@ process SCANPY_RANKGENESGROUPS {
     task.ext.when == null || task.ext.when
 
     script:
-    obs_key = meta.obs_key ?: "leiden"
-    prefix = task.ext.prefix ?: "${meta.id}"
+    prefix = task.ext.prefix ?: cluster_csv.baseName
+    sample_group_col = task.ext.sample_group_col ?: null
+    method = task.ext.method ?: 'wilcoxon'
     template('rank_genes_groups.py')
 }