Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -528,13 +528,14 @@ process {
]
}

withName: SCANPY_RANKGENESGROUPS {
ext.prefix = { meta.id + '_characteristic_genes' }
withName: "RANKGENESGROUPS_.*" {
publishDir = [
path: { "${params.outdir}/per_group/${meta.id}/characteristic_genes" },
path: { "${params.outdir}/differential_expression" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.endsWith(".png") || (params.save_intermediates && !filename.equals('versions.yml')) ? filename : null },
saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
]
ext.sample_group_col = params.sample_group_col // set to null to avoid sample group comparisons
ext.method = params.rankgenesgroups_method
}

// Finalize
Expand Down
1 change: 1 addition & 0 deletions modules/local/adata/merge/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ process ADATA_MERGE {
tuple val(meta), path("*_outer.h5ad") , emit: outer
tuple val(meta), path("*_inner.h5ad") , emit: inner
tuple val(meta), path("*_integrate.h5ad"), emit: integrate
path("celltype_predictions/*.csv") , emit: celltypes
path "gene_intersection.pkl" , emit: intersect_genes
path "versions.yml" , emit: versions

Expand Down
14 changes: 14 additions & 0 deletions modules/local/adata/merge/templates/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,20 @@ def get_columns(adata):
adata_outer.write("${prefix}_outer.h5ad")
adata_inner.write("${prefix}_inner.h5ad")

# we write the cell type predictions to csv files
os.makedirs("celltype_predictions", exist_ok=True)
for col in adata_outer.obs.columns:
if col.startswith("celltypes__"):
# split the column names into three parts
tool_name = col.split("__")[1]
model_name = "__".join(col.split("__")[2:])
adata_outer.obs[col].to_csv(f"celltype_predictions/{tool_name}_{model_name}.csv")

# we have one more column that is the label column
# if there are multiple values, we write it to a csv file
if adata_outer.obs["label"].nunique() > 1:
adata_outer.obs["label"].to_csv("celltype_predictions/label.csv")

if base_path:
adata_integrate = adata_inner[~adata_inner.obs.index.isin(adata_base.obs.index)]

Expand Down
10 changes: 9 additions & 1 deletion modules/local/celltypes/celltypist/templates/celltypist.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,12 +66,20 @@ def format_yaml_like(data: dict, indent: int = 0) -> str:
adata.obs.index, ["predicted_labels", "conf_score"]
]

df_celltypist.columns = [f"celltypist:{model_name}", f"celltypist:{model_name}:conf"]
df_celltypist.columns = [f"celltypes__celltypist__{model_name}", f"celltypist__{model_name}__conf"]
df_list.append(df_celltypist)

df_celltypist = pd.concat(df_list, axis=1)
df_celltypist.to_pickle("${prefix}.pkl")

# cell type columns starting with celltypes__celltypist__ to a csv file,
df_celltypist.loc[:, df_celltypist.columns.str.startswith("celltypes__celltypist__")].to_csv(f"{prefix}_predictions.csv")

# confidence scores to a csv file,
df_celltypist.loc[:, df_celltypist.columns.str.startswith("celltypist__")].to_csv(f"{prefix}_predictions_conf.csv")

df_celltypist = df_celltypist.loc[:, df_celltypist.columns.str.startswith("celltypes__celltypist__")]

adata.obs = pd.concat([adata.obs, df_celltypist], axis=1)
adata.write_h5ad(f"{prefix}.h5ad")

Expand Down
9 changes: 5 additions & 4 deletions modules/local/celltypes/singler/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,11 @@ process CELLTYPES_SINGLER {

output:
//tuple val(meta), path("*.h5ad"), emit: h5ad
tuple val(meta), path("*.csv") , emit: obs
tuple val(meta), path("*_distribution.pdf"), emit: distribution
tuple val(meta), path("*_heatmap.pdf") , emit: heatmap
path "versions.yml" , emit: versions
tuple val(meta), path("*_predictions.csv") , emit: obs
tuple val(meta), path("*_predictions_conf.csv") , emit: predictions_conf
tuple val(meta), path("*_distribution.pdf") , emit: distribution
tuple val(meta), path("*_heatmap.pdf") , emit: heatmap
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when
Expand Down
24 changes: 20 additions & 4 deletions modules/local/celltypes/singler/templates/singleR.R
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ for (ref_idx in seq_along(references)) {
reflabel %in% colnames(colData(reference))
)
predictions <- SingleR(
test = assay(sce, 'counts'),
test = assay(sce, 'decontXcounts'),
ref = reference,
labels = colData(reference)[[reflabel]]
)
Expand Down Expand Up @@ -101,10 +101,13 @@ for (ref_idx in seq_along(references)) {
height = 12
)

# change columns names
label_col <- which(colnames(predictions) == "pruned.labels")
colnames(predictions) <- paste0(
colnames(predictions), "_", prefix, "_", ref_name
"singler__", ref_name, "__", colnames(predictions)
)
prediction_results[[ref]] <- predictions
colnames(predictions)[label_col] <- paste0("celltypes__singler__", ref_name)
prediction_results[[ref_name]] <- predictions
}

prediction_nrows <- lapply(prediction_results, nrow)
Expand All @@ -118,14 +121,27 @@ stopifnot(

# This is predicated in the assumption that all prediction data frames have exactly
# the same rows ... see the stopifnot clause above
# Remove names from the list to prevent them being added as column prefixes
# we handled name collision in the previous loop explicitly
names(prediction_results) <- NULL
predictions <- do.call(cbind, prediction_results)

# we write the actual cell type columns to a csv file
write.csv(
predictions,
predictions[, grepl("celltypes__singler__", colnames(predictions))],
file = paste0(prefix, "_predictions.csv"),
row.names = TRUE
)

# write all confidence scores to a csv file
write.csv(
predictions[, !grepl("celltypes__singler__", colnames(predictions))],
file = paste0(prefix, "_predictions_conf.csv"),
row.names = TRUE
)



# Capturing version information, as before
versions <- list(
"${task.process}" = list(
Expand Down
1 change: 1 addition & 0 deletions modules/local/scanpy/leiden/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ process SCANPY_LEIDEN {

output:
tuple val(meta), path("${prefix}.h5ad"), emit: h5ad
tuple val(meta), path("${prefix}.csv"), emit: clusters
path "${prefix}.pkl", emit: obs
path "${prefix}.png", emit: plots, optional: true
path "${prefix}_mqc.json", emit: multiqc_files, optional: true
Expand Down
1 change: 1 addition & 0 deletions modules/local/scanpy/leiden/templates/leiden.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
sc.tl.leiden(adata, **kwargs)

adata.obs[[key_added]].to_pickle(f"{prefix}.pkl")
adata.obs[[key_added]].to_csv(f"{prefix}.csv")
adata.write_h5ad(f"{prefix}.h5ad")

if "${plot_umap}" == "true":
Expand Down
8 changes: 5 additions & 3 deletions modules/local/scanpy/rankgenesgroups/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,10 @@ process SCANPY_RANKGENESGROUPS {
: 'community.wave.seqera.io/library/pyyaml_scanpy:3c9e9f631f45553d'}"

input:
tuple val(meta), path(h5ad)
tuple val(meta), path(h5ad), path(cluster_csv)

output:
tuple val(meta), path(prefix), emit: outdir
tuple val(meta), path("*.h5ad"), emit: h5ad, optional: true
path "*.pkl", emit: uns, optional: true
path "*.png", emit: plots, optional: true
Expand All @@ -21,7 +22,8 @@ process SCANPY_RANKGENESGROUPS {
task.ext.when == null || task.ext.when

script:
obs_key = meta.obs_key ?: "leiden"
prefix = task.ext.prefix ?: "${meta.id}"
prefix = task.ext.prefix ?: cluster_csv.baseName
sample_group_col = task.ext.sample_group_col ?: null
method = task.ext.method ?: 'wilcoxon'
template('rank_genes_groups.py')
}
Loading
Loading