Skip to content
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 11 additions & 4 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ include { seurat_conversion } from './modules/seurat-conversion'
include { cell_type_consensus } from './modules/cell-type-consensus'
include { cell_type_ewings } from './modules/cell-type-ewings'
include { infercnv_gene_order } from './modules/infercnv-gene-order'
include { export_annotations } from './modules/export-annotations'

// **** Parameter checks ****
include { validateParameters; paramsSummaryLog } from 'plugin/nf-schema'
Expand Down Expand Up @@ -54,13 +55,13 @@ workflow {
.filter{ run_all || it[1] in project_ids }

// Run the merge workflow
merge_sce(sample_ch)
//merge_sce(sample_ch)

// Run the doublet detection workflow
detect_doublets(sample_ch)
//detect_doublets(sample_ch)

// Run the seurat conversion workflow
seurat_conversion(sample_ch)
//seurat_conversion(sample_ch)

// Run the consensus cell type workflow
cell_type_consensus(sample_ch)
Expand All @@ -70,5 +71,11 @@ workflow {
cell_type_ewings(sample_ch.filter{ it[1] == "SCPCP000015" }, cell_type_consensus.out)

// Run the infercnv gene order file workflow
infercnv_gene_order()
//infercnv_gene_order()

// format and export json files with openscpca annotations
// input expected to be sample id, project id, tsv files, annotation column, ontology column, module name
// mix outputs from all cell type modules first
export_ch = cell_type_ewings.out.celltypes
export_annotations(export_ch)
}
13 changes: 12 additions & 1 deletion modules/cell-type-ewings/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,18 @@ workflow cell_type_ewings {
// assign cell types
ewing_assign_celltypes(assign_ch, file(params.cell_type_ewings_auc_thresholds_file))

// add ewing specific metadata to output tuple
celltype_output_ch = ewing_assign_celltypes.out
.map{ sample_id, project_id, assignment_files -> tuple(
sample_id,
project_id,
assignment_files,
"ewing_annotation", // annotation column
"ewing_ontology", // ontology column
"cell-type-ewings" // module name
)}

emit:
aucell = ewing_aucell.out // [sample_id, project_id, [aucell output files], [mean gene expression files]]
celltypes = ewing_assign_celltypes.out // [sample_id, project_id, [cell type assignment files]]
celltypes = celltype_output_ch // [sample_id, project_id, [cell type assignment files], annotation column, ontology column, module name]
}
13 changes: 13 additions & 0 deletions modules/export-annotations/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
This module exports annotations from cell type modules in a uniform format to a public s3 bucket for use in other applications.
Annotations can be found in `s3://openscpca-celltype-annotations-public-access`.

For each library, a JSON file is exported with the following information:

| | |
| -- | -- |
| `barcodes` | An array of unique cell barcodes |
| `openscpca_celltype_annotation` | An array of cell type annotations assigned in `OpenScPCA-nf` |
| `openscpca_celltype_ontology` | An array of Cell Ontology identifiers associated with the cell type annotation. If no Cell Ontology identifiers are assigned, this will be `NA` |
| `module_name` | Name of the original analysis module used to assign cell type annotations in `OpenScPCA-analysis` |
| `openscpca_nf_version` | Version of `OpenScPCA-nf` |
| `release_date` | Release date of input ScPCA data |
60 changes: 60 additions & 0 deletions modules/export-annotations/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#!/usr/bin/env nextflow

// Workflow to format and export openscpca annotations

process format_annotations {
container params.scpcatools_slim_container
tag "${sample_id}"
label 'mem_8'
publishDir "${params.annotations_bucket}/${params.release_prefix}/${project_id}/${sample_id}", mode: 'copy'
input:
tuple val(sample_id),
val(project_id),
path(annotations_tsv_files),
val(annotation_column),
val(ontology_column),
val(module_name)
output:
tuple val(sample_id),
val(project_id),
path(json_files)
script:
library_ids = annotations_tsv_files.collect{(it.name =~ /SCPCL\d{6}/)[0]}
json_files = library_ids.collect{"${it}_openscpca-annotations.json"}
ontology_included = "${ontology_column}" != "NONE"
"""
for library_id in ${library_ids.join(" ")};do
# get the input files for the library id
annotations_file=\$(ls ${annotations_tsv_files} | grep "\${library_id}")

export-celltype-json.R \
--annotations_tsv_file \$annotations_file \
--annotation_column "${annotation_column}" \
${ontology_included ? "--ontology_column '${ontology_column}'" : ''} \
--module_name ${module_name} \
--release_date ${params.release_prefix} \
--openscpca_nf_version ${workflow.manifest.version} \
--output_json_file \${library_id}_openscpca-annotations.json
done
"""

stub:
library_ids = annotations_tsv_files.collect{(it.name =~ /SCPCL\d{6}/)[0]}
json_files = library_ids.collect{"${it}_openscpca-annotations.json"}
"""
for library_id in ${library_ids.join(" ")};do
touch \${library_id}_openscpca-annotations.json
done
"""
}

workflow export_annotations {
take:
celltype_ch // [sample_id, project_id, [cell type assignment files], annotation column, ontology column, module name]
main:
// export json
format_annotations(celltype_ch)

emit:
format_annotations.out // [sample id, project id, annotations json]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
#!/usr/bin/env Rscript

# This script is used to create a JSON file of annotations for a single library
# JSON file will include barcodes, annotation column, ontology column (if provided),
# openscpca-nf version, data release data, and module name

library(optparse)

option_list <- list(
make_option(
opt_str = c("--annotations_tsv_file"),
type = "character",
help = "Path to TSV file with cell type annotations"
),
make_option(
opt_str = c("--annotation_column"),
type = "character",
help = "Name of the column containing the cell type annotations to use for openscpca_celltype_annotation"
),
make_option(
opt_str = c("--ontology_column"),
default = "",
type = "character",
help = "Name of the column containing the cell type ontology IDs to use for openscpca_celltype_ontology"
),
make_option(
opt_str = c("--module_name"),
type = "character",
help = "Name of original module in OpenScPCA-analysis"
),
make_option(
opt_str = c("--release_date"),
type = "character",
help = "Release date of data used when generating annotations"
),
make_option(
opt_str = c("--openscpca_nf_version"),
type = "character",
help = "Version of OpenScPCA-nf workflow"
),
make_option(
opt_str = "--output_json_file",
type = "character",
help = "Path to JSON file to save cell type annotations"
)
)

# Parse options
opt <- parse_args(OptionParser(option_list = option_list))

# Set up -----------------------------------------------------------------------

# make sure input/output exist
stopifnot(
"annotations TSV file does not exist" = file.exists(opt$annotations_tsv_file),
"annotation column must be provided" = !is.null(opt$annotation_column),
"module name must be provided" = !is.null(opt$module_name),
"release date must be provided" = !is.null(opt$release_date),
"openscpca-nf version must be provided" = !is.null(opt$openscpca_nf_version),
"output json file must end in .json" = stringr::str_ends(opt$output_json_file, "\\.json")
)

# read in annotations
annotations_df <- readr::read_tsv(opt$annotations_tsv_file)

# check that barcodes and annotation column exist
stopifnot(
"barcodes column must be present in provided TSV file" = "barcodes" %in% colnames(annotations_df),
"annotation column is not present in provided TSV file" = opt$annotation_column %in% colnames(annotations_df)
)

# check for ontology ids if provided
if (!is.null(opt$ontology_column)) {
stopifnot(
"ontology column is not present in provided TSV file" = opt$ontology_column %in% colnames(annotations_df)
)
ontology_ids <- annotations_df[[opt$ontology_column]]
} else {
ontology_ids <- NA
}

# build json contents
json_contents <- list(
barcodes = annotations_df$barcodes,
openscpca_celltype_annotation = annotations_df[[opt$annotation_column]],
openscpca_celltype_ontology = ontology_ids,
module_name = opt$module_name,
openscpca_nf_version = opt$openscpca_nf_version,
release_date = opt$release_date
)

# export json file
jsonlite::write_json(
json_contents,
path = opt$output_json_file,
simplifyVector = TRUE,
auto_unbox = TRUE,
pretty = TRUE
)
1 change: 1 addition & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ params {
release_bucket = "s3://openscpca-data-release"
results_bucket = "s3://openscpca-nf-workflow-results-staging"
sim_bucket = "s3://openscpca-test-data-release-staging"
annotations_bucket = "s3://openscpca-celltype-annotations-public-access"
project = "all"

// URIs to reference files
Expand Down
6 changes: 6 additions & 0 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,12 @@
"description": "Base URI for simulated data output",
"help_text": "Standard configurations will use an S3 bucket, but local paths can also be used."
},
"annotations_bucket": {
"type": "string",
"default": "s3://openscpca-celltype-annotations-public-access",
"description": "Base URI for saving cell type annotations output",
"help_text": "Standard configurations will use an S3 bucket, but local paths can also be used."
},
"project": {
"type": "string",
"default": "all",
Expand Down