diff --git a/conf/modules.config b/conf/modules.config
index beae2c2f..e78ccaa5 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -528,13 +528,14 @@ process {
         ]
     }
 
-    withName: SCANPY_RANKGENESGROUPS {
-        ext.prefix = { meta.id + '_characteristic_genes' }
+    withName: "RANKGENESGROUPS_.*" {
         publishDir = [
-            path: { "${params.outdir}/per_group/${meta.id}/characteristic_genes" },
+            path: { "${params.outdir}/differential_expression" },
             mode: params.publish_dir_mode,
-            saveAs: { filename -> filename.endsWith(".png") || (params.save_intermediates && !filename.equals('versions.yml')) ? filename : null },
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
         ]
+        ext.sample_group_col = params.sample_group_col // set to null to avoid sample group comparisons
+        ext.method = params.rankgenesgroups_method
     }
 
     // Finalize
diff --git a/modules/local/adata/merge/main.nf b/modules/local/adata/merge/main.nf
index 4e8d31c5..093e81f1 100644
--- a/modules/local/adata/merge/main.nf
+++ b/modules/local/adata/merge/main.nf
@@ -16,6 +16,7 @@ process ADATA_MERGE {
     tuple val(meta), path("*_outer.h5ad")    , emit: outer
     tuple val(meta), path("*_inner.h5ad")    , emit: inner
     tuple val(meta), path("*_integrate.h5ad"), emit: integrate
+    path("celltype_predictions/*.csv")       , emit: celltypes
     path "gene_intersection.pkl"             , emit: intersect_genes
     path "versions.yml"                      , emit: versions
 
diff --git a/modules/local/adata/merge/templates/merge.py b/modules/local/adata/merge/templates/merge.py
index 153f0c15..ea3a8aaa 100644
--- a/modules/local/adata/merge/templates/merge.py
+++ b/modules/local/adata/merge/templates/merge.py
@@ -73,6 +73,20 @@ def get_columns(adata):
 adata_outer.write("${prefix}_outer.h5ad")
 adata_inner.write("${prefix}_inner.h5ad")
 
+# we write the cell type predictions to csv files
+os.makedirs("celltype_predictions", exist_ok=True)
+for col in adata_outer.obs.columns:
+    if col.startswith("celltypes__"):
+        # split the column names into three parts
+        tool_name = col.split("__")[1]
+        model_name = "__".join(col.split("__")[2:])
+        adata_outer.obs[col].to_csv(f"celltype_predictions/{tool_name}_{model_name}.csv")
+
+# we have one more column that is the label column
+# if there are multiple values, we write it to a csv file
+if adata_outer.obs["label"].nunique() > 1:
+    adata_outer.obs["label"].to_csv("celltype_predictions/label.csv")
+
 if base_path:
     adata_integrate = adata_inner[~adata_inner.obs.index.isin(adata_base.obs.index)]
 
diff --git a/modules/local/celltypes/celltypist/templates/celltypist.py b/modules/local/celltypes/celltypist/templates/celltypist.py
index 90147ee3..2e3df595 100644
--- a/modules/local/celltypes/celltypist/templates/celltypist.py
+++ b/modules/local/celltypes/celltypist/templates/celltypist.py
@@ -66,12 +66,20 @@ def format_yaml_like(data: dict, indent: int = 0) -> str:
         adata.obs.index, ["predicted_labels", "conf_score"]
     ]
 
-    df_celltypist.columns = [f"celltypist:{model_name}", f"celltypist:{model_name}:conf"]
+    df_celltypist.columns = [f"celltypes__celltypist__{model_name}", f"celltypist__{model_name}__conf"]
     df_list.append(df_celltypist)
 
 df_celltypist = pd.concat(df_list, axis=1)
 df_celltypist.to_pickle("${prefix}.pkl")
 
+# cell type columns starting with celltypes__celltypist__ to a csv file,
+df_celltypist.loc[:, df_celltypist.columns.str.startswith("celltypes__celltypist__")].to_csv(f"{prefix}_predictions.csv")
+
+# confidence scores to a csv file,
+df_celltypist.loc[:, df_celltypist.columns.str.startswith("celltypist__")].to_csv(f"{prefix}_predictions_conf.csv")
+
+df_celltypist = df_celltypist.loc[:, df_celltypist.columns.str.startswith("celltypes__celltypist__")]
+
 adata.obs = pd.concat([adata.obs, df_celltypist], axis=1)
 adata.write_h5ad(f"{prefix}.h5ad")
 
diff --git a/modules/local/celltypes/singler/main.nf b/modules/local/celltypes/singler/main.nf
index 370256bc..f39a752a 100644
--- a/modules/local/celltypes/singler/main.nf
+++ b/modules/local/celltypes/singler/main.nf
@@ -11,10 +11,11 @@ process CELLTYPES_SINGLER {
 
     output:
     //tuple val(meta), path("*.h5ad"), emit: h5ad
-    tuple val(meta), path("*.csv")             , emit: obs
-    tuple val(meta), path("*_distribution.pdf"), emit: distribution
-    tuple val(meta), path("*_heatmap.pdf")     , emit: heatmap
-    path "versions.yml"                        , emit: versions
+    tuple val(meta), path("*_predictions.csv")      , emit: obs
+    tuple val(meta), path("*_predictions_conf.csv") , emit: predictions_conf
+    tuple val(meta), path("*_distribution.pdf")     , emit: distribution
+    tuple val(meta), path("*_heatmap.pdf")          , emit: heatmap
+    path "versions.yml"                             , emit: versions
 
     when:
     task.ext.when == null || task.ext.when
diff --git a/modules/local/celltypes/singler/templates/singleR.R b/modules/local/celltypes/singler/templates/singleR.R
index 449f993d..d8fb46e9 100644
--- a/modules/local/celltypes/singler/templates/singleR.R
+++ b/modules/local/celltypes/singler/templates/singleR.R
@@ -62,7 +62,7 @@ for (ref_idx in seq_along(references)) {
     reflabel %in% colnames(colData(reference))
   )
   predictions <- SingleR(
-    test = assay(sce, 'counts'),
+    test = assay(sce, 'decontXcounts'),
     ref = reference,
     labels = colData(reference)[[reflabel]]
   )
@@ -101,10 +101,13 @@ for (ref_idx in seq_along(references)) {
     height = 12
   )
 
+  # change columns names
+  label_col <- which(colnames(predictions) == "pruned.labels")
   colnames(predictions) <- paste0(
-    colnames(predictions), "_", prefix, "_", ref_name
+    "singler__", ref_name, "__", colnames(predictions)
   )
-  prediction_results[[ref]] <- predictions
+  colnames(predictions)[label_col] <- paste0("celltypes__singler__", ref_name)
+  prediction_results[[ref_name]] <- predictions
 }
 
 prediction_nrows <- lapply(prediction_results, nrow)
@@ -118,14 +121,27 @@ stopifnot(
 
 # This is predicated in the assumption that all prediction data frames have exactly
 # the same rows ... see the stopifnot clause above
+# Remove names from the list to prevent them being added as column prefixes
+# we handled name collision in the previous loop explicitly
+names(prediction_results) <- NULL
 predictions <- do.call(cbind, prediction_results)
 
+# we write the actual cell type columns to a csv file
 write.csv(
-  predictions,
+  predictions[, grepl("celltypes__singler__", colnames(predictions))],
   file = paste0(prefix, "_predictions.csv"),
   row.names = TRUE
 )
 
+# write all confidence scores to a csv file
+write.csv(
+  predictions[, !grepl("celltypes__singler__", colnames(predictions))],
+  file = paste0(prefix, "_predictions_conf.csv"),
+  row.names = TRUE
+)
+
+
+
 # Capturing version information, as before
 versions <- list(
   "${task.process}" = list(
diff --git a/modules/local/scanpy/leiden/main.nf b/modules/local/scanpy/leiden/main.nf
index 358c188a..d516fff9 100644
--- a/modules/local/scanpy/leiden/main.nf
+++ b/modules/local/scanpy/leiden/main.nf
@@ -15,6 +15,7 @@ process SCANPY_LEIDEN {
 
     output:
     tuple val(meta), path("${prefix}.h5ad"), emit: h5ad
+    tuple val(meta), path("${prefix}.csv"), emit: clusters
     path "${prefix}.pkl", emit: obs
     path "${prefix}.png", emit: plots, optional: true
     path "${prefix}_mqc.json", emit: multiqc_files, optional: true
diff --git a/modules/local/scanpy/leiden/templates/leiden.py b/modules/local/scanpy/leiden/templates/leiden.py
index d285d7e3..0bd01675 100644
--- a/modules/local/scanpy/leiden/templates/leiden.py
+++ b/modules/local/scanpy/leiden/templates/leiden.py
@@ -28,6 +28,7 @@
 sc.tl.leiden(adata, **kwargs)
 
 adata.obs[[key_added]].to_pickle(f"{prefix}.pkl")
+adata.obs[[key_added]].to_csv(f"{prefix}.csv")
 adata.write_h5ad(f"{prefix}.h5ad")
 
 if "${plot_umap}" == "true":
diff --git a/modules/local/scanpy/rankgenesgroups/main.nf b/modules/local/scanpy/rankgenesgroups/main.nf
index 9bc04d57..251e63b0 100644
--- a/modules/local/scanpy/rankgenesgroups/main.nf
+++ b/modules/local/scanpy/rankgenesgroups/main.nf
@@ -8,9 +8,10 @@ process SCANPY_RANKGENESGROUPS {
         : 'community.wave.seqera.io/library/pyyaml_scanpy:3c9e9f631f45553d'}"
 
     input:
-    tuple val(meta), path(h5ad)
+    tuple val(meta), path(h5ad), path(cluster_csv)
 
     output:
+    tuple val(meta), path(prefix), emit: outdir
     tuple val(meta), path("*.h5ad"), emit: h5ad, optional: true
     path "*.pkl", emit: uns, optional: true
     path "*.png", emit: plots, optional: true
@@ -21,7 +22,8 @@ process SCANPY_RANKGENESGROUPS {
     task.ext.when == null || task.ext.when
 
     script:
-    obs_key = meta.obs_key ?: "leiden"
-    prefix = task.ext.prefix ?: "${meta.id}"
+    prefix = task.ext.prefix ?: cluster_csv.baseName
+    sample_group_col = task.ext.sample_group_col ?: null
+    method = task.ext.method ?: 'wilcoxon'
     template('rank_genes_groups.py')
 }
diff --git a/modules/local/scanpy/rankgenesgroups/templates/rank_genes_groups.py b/modules/local/scanpy/rankgenesgroups/templates/rank_genes_groups.py
index 5117b488..35ea8fea 100644
--- a/modules/local/scanpy/rankgenesgroups/templates/rank_genes_groups.py
+++ b/modules/local/scanpy/rankgenesgroups/templates/rank_genes_groups.py
@@ -1,10 +1,23 @@
 #!/usr/bin/env python3
 
+# In this script, we do the following:
+# For each cell group column,
+# 1. we do the following fr each cell group recorded in the cell group column:
+#    - per-group DE vs each other group and vs rest
+#    - if sample groups are provided, we take out the cells with the cell group, and do per-sample group DE vs each other sample group and vs rest
+# 2. If sample groups are provided, we do the following for each sample group:
+#    - subset the cells with the sample group
+#    - then compare cell groups within that subset (for this cell_group_col)
+
 import os
-import json
 import platform
-import base64
-import pickle
+import re
+from pathlib import Path
+import warnings
+import json
+from typing import Tuple
+
+warnings.filterwarnings("ignore", category=RuntimeWarning)
 
 os.environ["NUMBA_CACHE_DIR"] = "./tmp/numba"
 os.environ["MPLCONFIGDIR"] = "./tmp/matplotlib"
@@ -15,60 +28,243 @@
 import yaml
 
 from threadpoolctl import threadpool_limits
+
 threadpool_limits(int("${task.cpus}"))
 sc.settings.n_jobs = int("${task.cpus}")
 
-adata = sc.read_h5ad("${h5ad}")
-prefix = "${prefix}"
 
-kwargs = {
-    "groupby": "${obs_key}",
-    "pts": True
-}
+def sanitize_filename(filename: str) -> str:
+    """
+    Replaces invalid characters in a filename with underscores.
+    """
+    # Build the pattern programmatically to avoid relying on backslash escapes
+    # that could be altered by Nextflow template processing.
+    invalid_specials = re.escape('<>:"/' + chr(92) + '|?*')
+    control_chars = ''.join(chr(c) for c in range(0x00, 0x20))
+    invalid_chars_pattern = f'[{invalid_specials}{control_chars}]'
+    sanitized_filename = re.sub(invalid_chars_pattern, "_", str(filename))
+    sanitized_filename = sanitized_filename.strip(" .")
+    if not sanitized_filename:
+        sanitized_filename = "untitled"
+    return sanitized_filename
+
+
+def ensure_categorical_str(adata_obj: sc.AnnData, column: str) -> None:
+    """
+    Ensures the specified column in the AnnData object is a categorical string.
+    """
+    adata_obj.obs[column] = adata_obj.obs[column].astype(str).astype("category")
+
+
+def valid_groups(adata_obj: sc.AnnData, column: str, min_cells: int = 3) -> list[str]:
+    """
+    Returns the valid groups in the specified column of the AnnData object that have at least min_cells cells
+    """
+    vc = adata_obj.obs[column].astype(str).value_counts()
+    return vc[vc >= min_cells].index.astype("str").tolist()
+
+
+# (No JSON loading/saving helpers; we keep results in-memory and write a single
+# combined JSON file at the start and end of the run.)
 
-if adata.obs["${obs_key}"].value_counts().size > 1:
-    sc.pp.log1p(adata)
-    sc.tl.rank_genes_groups(adata, **kwargs)
 
-    rgg_dict = adata.uns["rank_genes_groups"]
+def run_and_save_de(adata_obj: sc.AnnData, groupby: str, group: str, reference, out_dir: Path, method: str) -> Tuple[str, str]:
+    """
+    Runs differential expression analysis for the specified group and reference group, and saves the results to the specified output directory.
+    """
+    out_dir.mkdir(parents=True, exist_ok=True)
+    # Run differential expression analysis
+    sc.tl.rank_genes_groups(
+        adata_obj,
+        use_raw=False,
+        groupby=groupby,
+        groups=[group],
+        reference=reference,
+        pts=True,
+        method=method,
+    )
+    # Get the results of the differential expression analysis
+    rgg_df = sc.get.rank_genes_groups_df(adata_obj, group=None)
+    # Create a standardized filename for the output files
+    ref_name = reference if isinstance(reference, str) else str(reference)
+    basename = f"{sanitize_filename(group)}_vs_{sanitize_filename(ref_name)}.csv"
+    csv_path = out_dir / f"{basename}.csv"
+    png_path = out_dir / f"{basename}.png"
+    rgg_df.to_csv(csv_path, index=False)
+    sc.pl.rank_genes_groups(adata_obj, show=False)
+    plt.savefig(png_path)
+    plt.close()
 
-    pickle.dump(rgg_dict, open(f"{prefix}.pkl", "wb"))
-    adata.write_h5ad(f"{prefix}.h5ad")
+    return (csv_path, png_path)
 
-    # Plot
-    sc.pl.rank_genes_groups(adata, show=False)
-    path = f"{prefix}.png"
-    plt.savefig(path)
+adata = sc.read_h5ad("${h5ad}")
+sample_group_col = "${sample_group_col}"
+method = "${method}"
+
+cell_groups_csv = "${cluster_csv}"
+cell_groups_df = pd.read_csv(cell_groups_csv, index_col=0)
+outdir = Path("${prefix}")
+outdir.mkdir(parents=True, exist_ok=True)
 
-    # MultiQC
-    with open(path, "rb") as f_plot, open("${prefix}_mqc.json", "w") as f_json:
-        image_string = base64.b64encode(f_plot.read()).decode("utf-8")
-        image_html = f'<div class="mqc-custom-content-image"><img src="data:image/png;base64,{image_string}" /></div>'
+# Align order
+cell_groups_df = cell_groups_df[cell_groups_df.index.isin(adata.obs.index)]
+adata = adata[adata.obs.index.isin(cell_groups_df.index)]
+cell_groups_df = cell_groups_df.reindex(adata.obs.index)
+cell_group_cols = list(cell_groups_df.columns)
 
-        custom_json = {
-            "id": "${prefix}",
-            "parent_id": "${meta.integration}",
-            "parent_name": "${meta.integration}",
-            "parent_description": "Results of the ${meta.integration} integration.",
+# ensure there are cells left
+if adata.n_obs == 0 or cell_groups_df.shape[0] == 0:
+    raise ValueError(f"No cells left after aligning adata and cluster_csv")
 
-            "section_name": "${meta.id} characteristic genes",
-            "plot_type": "image",
-            "data": image_html,
-        }
+# Add grouping columns to adata.obs
+for cell_group_col in cell_group_cols:
+    if cell_group_col not in adata.obs.columns:
+        adata.obs[cell_group_col] = cell_groups_df[cell_group_col].astype("str").astype("category")
 
-        json.dump(custom_json, f_json)
+has_sample_groups = sample_group_col not in (None, "", "null")
+if has_sample_groups:
+    if sample_group_col not in adata.obs.columns:
+        raise ValueError(f"sample_group_col '{sample_group_col}' not found in adata.obs")
+    # if all cells have the same sample group, we skip the sample group comparisons
+    if adata.obs[sample_group_col].nunique() == 1:
+        has_sample_groups = False
+        print(f"All cells are in the same sample group.")
 else:
-    print("Skipping rank_genes_groups computation as the group has less than 2 unique values.")
+    print(f"No sample group column provided.")
+if not has_sample_groups:
+    print(f"Skipping sample group comparisons.")
+
+# Maintain three in-memory dictionaries for results; we will emit a single JSON
+# file that contains them all at the beginning and end of the run.
+results_all_cells: dict = {c: {} for c in cell_group_cols}
+# only initialize the sample group within cell group dictionary if sample groups are provided
+results_sample_within_cell = ({c: {} for c in cell_group_cols} if has_sample_groups else None)
+results_cell_within_sample = ({c: {} for c in cell_group_cols} if has_sample_groups else None)
+
+for cell_group_col in cell_group_cols:
+    print(f"Differential analysis for cell group column: {cell_group_col}")
+    ensure_categorical_str(adata, cell_group_col)
+    col_outdir = outdir / sanitize_filename(cell_group_col)
+
+    # keep only the groups that have at least 3 cells
+    groups = valid_groups(adata, cell_group_col, min_cells=3)
+    # if there are fewer than 2 groups with at least 3 cells, we skip the differential analysis
+    if len(groups) < 2:
+        print(f"\t- Skipping {cell_group_col}: fewer than 2 groups with ≥3 cells")
+        continue
+
+    for group in groups:
+        # ------------------------------------------------------------
+        # 1) For each cell group column: per-group DE vs each other group and vs rest
+        # ------------------------------------------------------------
+        print(f"\t- Processing cell group '{group}'")
+        group_outdir = col_outdir / "cell_groups" / sanitize_filename(group)
+
+        # Pairwise comparisons
+        for other in [g for g in groups if g != group]:
+            print(f"\t\t- {group} vs {other}")
+            csv_path, png_path = run_and_save_de(adata, cell_group_col, group, other, group_outdir, method)
+            results_all_cells.setdefault(cell_group_col, {}).setdefault(group, {})[other] = {
+                "csv_path": str(csv_path),
+                "png_path": str(png_path),
+            }
+        # Versus rest (only meaningful if >2 groups)
+        if len(groups) > 2:
+            other = "rest"
+            print(f"\t\t- {group} vs {other}")
+            csv_path, png_path = run_and_save_de(adata, cell_group_col, group, other, group_outdir, method)
+            results_all_cells.setdefault(cell_group_col, {}).setdefault(group, {})[other] = {
+                "csv_path": str(csv_path),
+                "png_path": str(png_path),
+            }
 
-# Versions
+        # ------------------------------------------------------------
+        # 2) For each cell group column: per-sample group DE vs each other sample group and vs rest
+        # ------------------------------------------------------------
+        if has_sample_groups:
+            # 2) Within each cell group, compare sample groups (if provided)
+            subset = adata[adata.obs[cell_group_col].astype(str) == group].copy()
+            ensure_categorical_str(subset, cell_group_col)
+            ensure_categorical_str(subset, sample_group_col)
+
+            sample_groups = valid_groups(subset, sample_group_col, min_cells=3)
+            if len(sample_groups) < 2:
+                print("\t\t- Skipping within-group sample comparisons: fewer than 2 sample groups with ≥3 cells")
+                continue
+            for sg in sample_groups:
+                print(f"\t\t- Sample group '{sg}' within cell group '{group}'")
+                sg_outdir = group_outdir / sanitize_filename(sg)
+                for other in [x for x in sample_groups if x != sg]:
+                    print(f"\t\t\t- {sg} vs {other}")
+                    csv_path, png_path = run_and_save_de(subset, sample_group_col, sg, other, sg_outdir, method)
+                    results_sample_within_cell.setdefault(cell_group_col, {}).setdefault(group, {}).setdefault(sg, {})[other] = {
+                        "csv_path": str(csv_path),
+                        "png_path": str(png_path),
+                    }
+                if len(sample_groups) > 2:
+                    other = "rest"
+                    print(f"\t\t\t- {sg} vs {other}")
+                    csv_path, png_path = run_and_save_de(subset, sample_group_col, sg, other, sg_outdir, method)
+                    results_sample_within_cell.setdefault(cell_group_col, {}).setdefault(group, {}).setdefault(sg, {})[other] = {
+                        "csv_path": str(csv_path),
+                        "png_path": str(png_path),
+                        }
+
+    # ------------------------------------------------------------
+    # 3) For each sample group: subset, then compare cell groups within that subset (for this cell_group_col)
+    # ------------------------------------------------------------
+    if has_sample_groups:
+        all_sample_groups = valid_groups(adata, sample_group_col, min_cells=3)
+        for sg in all_sample_groups:
+            print(f"\t- Processing sample group subset for column '{cell_group_col}': {sg}")
+            subset = adata[adata.obs[sample_group_col].astype(str) == sg].copy()
+            ensure_categorical_str(subset, sample_group_col)
+            ensure_categorical_str(subset, cell_group_col)
+            cg_sample_dir = col_outdir / "sample_groups" / sanitize_filename(sg)
+            groups_in_subset = valid_groups(subset, cell_group_col, min_cells=3)
+            if len(groups_in_subset) < 2:
+                print(f"\t\t- Skipping {cell_group_col} in sample '{sg}': fewer than 2 groups with ≥3 cells")
+                continue
+            for g in groups_in_subset:
+                g_dir = cg_sample_dir / sanitize_filename(g)
+                for other in [x for x in groups_in_subset if x != g]:
+                    print(f"\t\t\t- {g} vs {other} within sample '{sg}'")
+                    csv_path, png_path = run_and_save_de(subset, cell_group_col, g, other, g_dir, method)
+                    results_cell_within_sample.setdefault(cell_group_col, {}).setdefault(sg, {}).setdefault(g, {})[other] = {
+                        "csv_path": str(csv_path),
+                        "png_path": str(png_path),
+                    }
+                if len(groups_in_subset) > 2:
+                    print(f"\t\t\t- {g} vs rest within sample '{sg}'")
+                    csv_path, png_path = run_and_save_de(subset, cell_group_col, g, "rest", g_dir, method)
+                    results_cell_within_sample.setdefault(cell_group_col, {}).setdefault(sg, {}).setdefault(g, {})["rest"] = {
+                        "csv_path": str(csv_path),
+                        "png_path": str(png_path),
+                    }
+
+# Write an initial combined JSON snapshot
+combined_results = {
+    "cell_groups_all_cells": results_all_cells,
+    "sample_groups_within_each_cell_group": results_sample_within_cell,
+    "cell_groups_within_each_sample_group": results_cell_within_sample,
+}
+with open(outdir / "differential_expression_results.json", "w") as f:
+    json.dump(combined_results, f, indent=2)
 
 versions = {
     "${task.process}": {
         "python": platform.python_version(),
         "scanpy": sc.__version__,
-        "pandas": pd.__version__
-    }
+        "pandas": pd.__version__,
+    },
 }
 
 with open("versions.yml", "w") as f:
     yaml.dump(versions, f)
+
+# Write final combined JSON snapshot at end of script
+final_combined_results = {
+    "cell_group_all_cells": results_all_cells,
+    "sample_group_within_cell_group": results_sample_within_cell,
+    "cell_group_within_sample_group": results_cell_within_sample,
+}
diff --git a/nextflow.config b/nextflow.config
index 8a01e97f..8874bb27 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -50,6 +50,10 @@ params {
     celltypist_model              = ''
     celldex_reference             = ''
 
+    // Differential expression options
+    rankgenesgroups_method        = 'wilcoxon'
+    sample_group_col              = null
+
     // Pipeline options
     qc_only                       = false
     skip_liana                    = false
diff --git a/nextflow_schema.json b/nextflow_schema.json
index ee3c0332..ba02790e 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -221,6 +221,28 @@
                 }
             }
         },
+
+        "differential_expression_options": {
+            "title": "Tool options",
+            "type": "object",
+            "fa_icon": "fas fa-tools",
+            "description": "Options for various tools used in the pipeline.",
+            "properties": {
+                "sample_group_col": {
+                    "type": "string",
+                    "description": "The column in the AnnData object that contains the sample group information",
+                    "help_text": "The column in the AnnData object that contains the sample group information",
+                    "pattern": "^([a-zA-Z0-9_]*(,[a-zA-Z0-9_]*)*)?$"
+                },
+                "rankgenesgroups_method": {
+                    "type": "string",
+                    "default": "wilcoxon",
+                    "description": "Method to use for the rank_genes_groups step",
+                    "help_text": "Method to use for the rank_genes_groups step. Available methods are: 'logreg', 't-test', 'wilcoxon', 't-test_overestim_var'",
+                    "enum": ["logreg", "t-test", "wilcoxon", "t-test_overestim_var"]
+                }
+            }
+        },
         "tool_options": {
             "title": "Tool options",
             "type": "object",
@@ -540,6 +562,9 @@
         {
             "$ref": "#/$defs/clustering_options"
         },
+        {
+            "$ref": "#/$defs/differential_expression_options"
+        },
         {
             "$ref": "#/$defs/tool_options"
         },
diff --git a/subworkflows/local/cluster/main.nf b/subworkflows/local/cluster/main.nf
index debea2ed..6d7dbeae 100644
--- a/subworkflows/local/cluster/main.nf
+++ b/subworkflows/local/cluster/main.nf
@@ -19,6 +19,7 @@ workflow CLUSTER {
     ch_obsm = channel.empty()
     ch_multiqc_files = channel.empty()
     ch_h5ad = channel.empty()
+    ch_clusters = channel.empty()
 
     if (global) {
         ch_h5ad = ch_h5ad.mix(ch_input.map { meta, h5ad -> [meta + [subset: "global"], h5ad] })
@@ -71,7 +72,7 @@ workflow CLUSTER {
     ch_obs = ch_obs.mix(LEIDEN.out.obs)
     ch_h5ad_clustering = LEIDEN.out.h5ad
     ch_multiqc_files = ch_multiqc_files.mix(LEIDEN.out.multiqc_files)
-
+    ch_clusters = ch_clusters.mix(LEIDEN.out.clusters)
     ch_entropy = LEIDEN.out.h5ad.multiMap { meta, h5ad ->
         h5ad: [meta, h5ad]
         group_col: meta.id + "_leiden"
@@ -87,6 +88,7 @@ workflow CLUSTER {
     obsm            = ch_obsm // channel: [ pkl ]
     h5ad_neighbors  = ch_h5ad_neighbours // channel: [ integration, h5ad ]
     h5ad_clustering = ch_h5ad_clustering // channel: [ integration, h5ad ]
+    clusters        = ch_clusters // channel: [ csv ]
     multiqc_files   = ch_multiqc_files // channel: [ json ]
     versions        = ch_versions // channel: [ versions.yml ]
 }
diff --git a/subworkflows/local/combine.nf b/subworkflows/local/combine.nf
index 2346f36c..1ed1f3b3 100644
--- a/subworkflows/local/combine.nf
+++ b/subworkflows/local/combine.nf
@@ -14,6 +14,7 @@ workflow COMBINE {
     ch_obs           = channel.empty()
     ch_var           = channel.empty()
     ch_obsm          = channel.empty()
+    ch_celltypes     = channel.empty()
 
     ADATA_MERGE(
         ch_h5ad.map { _meta, h5ad -> [[id: "merged"], h5ad] }.groupTuple(),
@@ -23,6 +24,7 @@ workflow COMBINE {
     ch_outer = ADATA_MERGE.out.outer
     ch_inner = ADATA_MERGE.out.inner
     ch_versions = ch_versions.mix(ADATA_MERGE.out.versions)
+    ch_celltypes = ADATA_MERGE.out.celltypes
 
     INTEGRATE(
         ADATA_MERGE.out.integrate,
@@ -64,6 +66,7 @@ workflow COMBINE {
     emit:
     h5ad             = ch_outer         // channel: [ merged, h5ad ]
     h5ad_inner       = ch_inner         // channel: [ merged, h5ad ]
+    celltypes        = ch_celltypes     // channel: [ csv ]
     integrations     = ch_integrations  // channel: [ integration, h5ad ]
     var              = ch_var           // channel: [ pkl ]
     obs              = ch_obs           // channel: [ pkl ]
diff --git a/subworkflows/local/differential_expression.nf b/subworkflows/local/differential_expression.nf
new file mode 100644
index 00000000..ac622ff0
--- /dev/null
+++ b/subworkflows/local/differential_expression.nf
@@ -0,0 +1,41 @@
+include { SCANPY_RANKGENESGROUPS as RANKGENESGROUPS_CELLTYPES } from '../../modules/local/scanpy/rankgenesgroups'
+include { SCANPY_RANKGENESGROUPS as RANKGENESGROUPS_CLUSTERS } from '../../modules/local/scanpy/rankgenesgroups'
+
+
+workflow DIFFERENTIAL_EXPRESSION {
+    take:
+    ch_h5ad // channel: [ meta, h5ad ]
+    ch_celltypes // channel: [ celltype ]
+    ch_clusters // channel: [ meta, cluster ]
+
+    main:
+    ch_versions = Channel.empty()
+    ch_uns = Channel.empty()
+    ch_multiqc_files = Channel.empty()
+    ch_outdirs = Channel.empty()
+
+    ch_input_celltypes = ch_h5ad.merge(ch_celltypes.flatten())
+    RANKGENESGROUPS_CELLTYPES(ch_input_celltypes)
+    ch_outdirs       = ch_outdirs.mix(RANKGENESGROUPS_CELLTYPES.out.outdir)
+    ch_versions      = ch_versions.mix(RANKGENESGROUPS_CELLTYPES.out.versions)
+    ch_uns           = ch_uns.mix(RANKGENESGROUPS_CELLTYPES.out.uns)
+    ch_multiqc_files = ch_multiqc_files.mix(RANKGENESGROUPS_CELLTYPES.out.multiqc_files)
+
+    ch_input_clusters = ch_h5ad
+        .map { meta, h5ad -> [meta.integration, h5ad] }
+        .combine(ch_clusters.map {meta, cluster -> [meta.integration, cluster]}, by: 0)
+        .map { id, h5ad, cluster -> [[id: id], h5ad, cluster] }
+
+    RANKGENESGROUPS_CLUSTERS(ch_input_clusters)
+    ch_outdirs       = ch_outdirs.mix(RANKGENESGROUPS_CLUSTERS.out.outdir)
+    ch_versions      = ch_versions.mix(RANKGENESGROUPS_CLUSTERS.out.versions)
+    ch_uns           = ch_uns.mix(RANKGENESGROUPS_CLUSTERS.out.uns)
+    ch_multiqc_files = ch_multiqc_files.mix(RANKGENESGROUPS_CLUSTERS.out.multiqc_files)
+
+    emit:
+    outdirs       = ch_outdirs       // channel: [ outdir ]
+    uns           = ch_uns           // channel: [ pkl ]
+    multiqc_files = ch_multiqc_files // channel: [ json ]
+    versions      = ch_versions      // channel: [ versions.yml ]
+
+}
diff --git a/subworkflows/local/per_group.nf b/subworkflows/local/per_group.nf
index 940cf801..2a7ae494 100644
--- a/subworkflows/local/per_group.nf
+++ b/subworkflows/local/per_group.nf
@@ -28,13 +28,6 @@ workflow PER_GROUP {
         ch_uns           = ch_uns.mix(LIANA_RANKAGGREGATE.out.uns)
     }
 
-    if (!params.skip_rankgenesgroups) {
-        SCANPY_RANKGENESGROUPS(ch_no_neighbors)
-        ch_versions      = ch_versions.mix(SCANPY_RANKGENESGROUPS.out.versions)
-        ch_uns           = ch_uns.mix(SCANPY_RANKGENESGROUPS.out.uns)
-        ch_multiqc_files = ch_multiqc_files.mix(SCANPY_RANKGENESGROUPS.out.multiqc_files)
-    }
-
     emit:
     uns           = ch_uns           // channel: [ pkl ]
     multiqc_files = ch_multiqc_files // channel: [ json ]
diff --git a/workflows/scdownstream.nf b/workflows/scdownstream.nf
index 0b2221cd..f9e84b39 100644
--- a/workflows/scdownstream.nf
+++ b/workflows/scdownstream.nf
@@ -19,6 +19,7 @@ include { paramsSummaryMap                     } from 'plugin/nf-schema'
 include { paramsSummaryMultiqc                 } from '../subworkflows/nf-core/utils_nfcore_pipeline'
 include { softwareVersionsToYAML               } from '../subworkflows/nf-core/utils_nfcore_pipeline'
 include { methodsDescriptionText               } from '../subworkflows/local/utils_nfcore_scdownstream_pipeline'
+include { DIFFERENTIAL_EXPRESSION              } from '../subworkflows/local/differential_expression'
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -42,6 +43,8 @@ workflow SCDOWNSTREAM {
     ch_uns = channel.empty()
     ch_layers = channel.empty()
     ch_multiqc_files = channel.empty()
+    ch_clusters = channel.empty()
+    ch_celltypes = channel.empty()
 
     if (params.input) {
         ch_obs_per_sample = channel.empty()
@@ -98,6 +101,7 @@ workflow SCDOWNSTREAM {
             ch_obsm = ch_obsm.mix(COMBINE.out.obsm)
             ch_integrations = ch_integrations.mix(COMBINE.out.integrations)
             ch_finalization_base = COMBINE.out.h5ad
+            ch_celltypes = ch_celltypes.mix(COMBINE.out.celltypes)
 
             ch_label_grouping = COMBINE.out.h5ad_inner
             grouping_col = "label"
@@ -134,6 +138,7 @@ workflow SCDOWNSTREAM {
         ch_obs = ch_obs.mix(CLUSTER.out.obs)
         ch_obsm = ch_obsm.mix(CLUSTER.out.obsm)
         ch_multiqc_files = ch_multiqc_files.mix(CLUSTER.out.multiqc_files)
+        ch_clusters = ch_clusters.mix(CLUSTER.out.clusters)
 
         if (params.pseudobulk) {
             PSEUDOBULKING(
@@ -145,6 +150,8 @@ workflow SCDOWNSTREAM {
             ch_versions = ch_versions.mix(PSEUDOBULKING.out.versions)
         }
 
+        ch_h5ad_both = CLUSTER.out.h5ad_clustering.map { meta, h5ad -> [meta + [obs_key: "${meta.id}_leiden"], h5ad] }
+
         PER_GROUP(
             CLUSTER.out.h5ad_clustering.map { meta, h5ad -> [meta + [obs_key: "${meta.id}_leiden"], h5ad] },
             CLUSTER.out.h5ad_neighbors.map { meta, h5ad -> [meta + [obs_key: grouping_col], h5ad] },
@@ -154,6 +161,18 @@ workflow SCDOWNSTREAM {
         ch_uns = ch_uns.mix(PER_GROUP.out.uns)
         ch_multiqc_files = ch_multiqc_files.mix(PER_GROUP.out.multiqc_files)
 
+
+        if (!params.skip_rankgenesgroups) {
+            DIFFERENTIAL_EXPRESSION(
+                ch_integrations,
+                ch_celltypes,
+                ch_clusters,
+            )
+            ch_versions = ch_versions.mix(DIFFERENTIAL_EXPRESSION.out.versions)
+            ch_uns = ch_uns.mix(DIFFERENTIAL_EXPRESSION.out.uns)
+            ch_multiqc_files = ch_multiqc_files.mix(DIFFERENTIAL_EXPRESSION.out.multiqc_files)
+        }
+
         FINALIZE(ch_finalization_base, ch_obs, ch_var, ch_obsm, ch_obsp, ch_uns, ch_layers)
         ch_versions = ch_versions.mix(FINALIZE.out.versions)
     }