Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ include { cell_type_ewings } from './modules/cell-type-ewings'
include { cell_type_neuroblastoma_04 } from './modules/cell-type-neuroblastoma-04'
include { infercnv_gene_order } from './modules/infercnv-gene-order'
include { cell_type_scimilarity } from './modules/cell-type-scimilarity'
include { export_annotations } from './modules/export-annotations'

// **** Parameter checks ****
include { validateParameters; paramsSummaryLog } from 'plugin/nf-schema'
Expand Down Expand Up @@ -84,4 +85,11 @@ workflow {
// only runs on SCPCP000004
cell_type_neuroblastoma_04(sample_ch.filter{ it[1] == "SCPCP000004" })

// format and export json files with openscpca annotations
// input expected to be sample id, project id, tsv files, annotation meta
// annotation meta should be a groovy map (dictionary) containing at least `module_name:` and `annotation_column:` keys.
// The optional key `ontology_column:` will also be used if provided.
// mix outputs from all cell type modules first
export_ch = cell_type_ewings.out.celltypes
export_annotations(export_ch)
}
15 changes: 14 additions & 1 deletion modules/cell-type-ewings/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,20 @@ workflow cell_type_ewings {
// assign cell types
ewing_assign_celltypes(assign_ch, file(params.cell_type_ewings_auc_thresholds_file))

// add ewing specific metadata to output tuple
celltype_output_ch = ewing_assign_celltypes.out
.map{ sample_id, project_id, assignment_files -> tuple(
sample_id,
project_id,
assignment_files,
[ // annotation metadata
module_name: "cell-type-ewings",
annotation_column: "ewing_annotation",
ontology_column: "ewing_ontology"
]
)}

emit:
aucell = ewing_aucell.out // [sample_id, project_id, [aucell output files], [mean gene expression files]]
celltypes = ewing_assign_celltypes.out // [sample_id, project_id, [cell type assignment files]]
celltypes = celltype_output_ch // [sample_id, project_id, [cell type assignment files], annotation_metadata]
}
13 changes: 13 additions & 0 deletions modules/export-annotations/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
This module exports annotations from cell type modules in a uniform format to a public s3 bucket for use in other applications.
Annotations can be found in `s3://openscpca-celltype-annotations-public-access`.

For each library, a JSON file is exported with the following information:

| | |
| -- | -- |
| `barcodes` | An array of unique cell barcodes |
| `openscpca_celltype_annotation` | An array of cell type annotations assigned in `OpenScPCA-nf` |
| `openscpca_celltype_ontology` | An array of Cell Ontology identifiers associated with the cell type annotation. If no Cell Ontology identifiers are assigned, this will be `NA` |
| `module_name` | Name of the original analysis module used to assign cell type annotations in `OpenScPCA-analysis` |
| `openscpca_nf_version` | Version of `OpenScPCA-nf` |
| `release_date` | Release date of input ScPCA data |
55 changes: 55 additions & 0 deletions modules/export-annotations/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
#!/usr/bin/env nextflow

// Workflow to format and export openscpca annotations

process format_annotations {
container params.scpcatools_slim_container
tag "${sample_id}"
label 'mem_8'
publishDir "${params.annotations_bucket}/${params.release_prefix}/${project_id}/${sample_id}", mode: 'copy'
input:
tuple val(sample_id),
val(project_id),
path(annotations_tsv_files),
val(annotation_metadata)
output:
tuple val(sample_id),
val(project_id),
path("*_openscpca-annotations.json")
script:
library_ids = annotations_tsv_files.collect{(it.name =~ /SCPCL\d{6}/)[0]}
"""
for library_id in ${library_ids.join(" ")};do
# get the input files for the library id
annotations_file=\$(ls ${annotations_tsv_files} | grep "\${library_id}")

export-celltype-json.R \
--annotations_tsv_file \$annotations_file \
--annotation_column "${annotation_metadata.annotation_column}" \
${annotation_metadata.ontology_column ? "--ontology_column '${annotation_metadata.ontology_column}'" : ''} \
--module_name ${annotation_metadata.module_name} \
--release_date ${params.release_prefix} \
--openscpca_nf_version ${workflow.manifest.version} \
--output_json_file \${library_id}_openscpca-annotations.json
done
"""

stub:
library_ids = annotations_tsv_files.collect{(it.name =~ /SCPCL\d{6}/)[0]}
"""
for library_id in ${library_ids.join(" ")};do
touch \${library_id}_openscpca-annotations.json
done
"""
}

workflow export_annotations {
take:
celltype_ch // [sample_id, project_id, [cell type assignment files], annotation metadata]
main:
// export json
format_annotations(celltype_ch)

emit:
format_annotations.out // [sample id, project id, annotations json]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
#!/usr/bin/env Rscript

# This script is used to create a JSON file of annotations for a single library
# JSON file will include barcodes, annotation column, ontology column (if provided),
# openscpca-nf version, data release data, and module name

library(optparse)

option_list <- list(
make_option(
opt_str = c("--annotations_tsv_file"),
type = "character",
help = "Path to TSV file with cell type annotations"
),
make_option(
opt_str = c("--annotation_column"),
type = "character",
help = "Name of the column containing the cell type annotations to use for openscpca_celltype_annotation"
),
make_option(
opt_str = c("--ontology_column"),
default = "",
type = "character",
help = "Name of the column containing the cell type ontology IDs to use for openscpca_celltype_ontology"
),
make_option(
opt_str = c("--module_name"),
type = "character",
help = "Name of original module in OpenScPCA-analysis"
),
make_option(
opt_str = c("--release_date"),
type = "character",
help = "Release date of data used when generating annotations"
),
make_option(
opt_str = c("--openscpca_nf_version"),
type = "character",
help = "Version of OpenScPCA-nf workflow"
),
make_option(
opt_str = "--output_json_file",
type = "character",
help = "Path to JSON file to save cell type annotations"
)
)

# Parse options
opt <- parse_args(OptionParser(option_list = option_list))

# Set up -----------------------------------------------------------------------

# make sure input/output exist
stopifnot(
"annotations TSV file does not exist" = file.exists(opt$annotations_tsv_file),
"annotation column must be provided" = !is.null(opt$annotation_column),
"module name must be provided" = !is.null(opt$module_name),
"release date must be provided" = !is.null(opt$release_date),
"openscpca-nf version must be provided" = !is.null(opt$openscpca_nf_version),
"output json file must end in .json" = stringr::str_ends(opt$output_json_file, "\\.json")
)

# read in annotations
annotations_df <- readr::read_tsv(opt$annotations_tsv_file)

# check that barcodes and annotation column exist
stopifnot(
"barcodes column must be present in provided TSV file" = "barcodes" %in% colnames(annotations_df),
"annotation column is not present in provided TSV file" = opt$annotation_column %in% colnames(annotations_df)
)

# check for ontology ids if provided
if (!is.null(opt$ontology_column)) {
stopifnot(
"ontology column is not present in provided TSV file" = opt$ontology_column %in% colnames(annotations_df)
)
ontology_ids <- annotations_df[[opt$ontology_column]]
} else {
ontology_ids <- NA
}

# build json contents
json_contents <- list(
module_name = opt$module_name,
openscpca_nf_version = opt$openscpca_nf_version,
release_date = opt$release_date,
barcodes = annotations_df$barcodes,
openscpca_celltype_annotation = annotations_df[[opt$annotation_column]],
openscpca_celltype_ontology = ontology_ids
)

# export json file
jsonlite::write_json(
json_contents,
path = opt$output_json_file,
simplifyVector = TRUE,
auto_unbox = TRUE,
pretty = TRUE
)
2 changes: 2 additions & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ params {
release_bucket = "s3://openscpca-data-release"
results_bucket = "s3://openscpca-nf-workflow-results-staging"
sim_bucket = "s3://openscpca-test-data-release-staging"
annotations_bucket = "s3://openscpca-celltype-annotations-public-access"
project = "all"

// URIs to reference files
Expand Down Expand Up @@ -111,6 +112,7 @@ profiles {
release_bucket = "s3://openscpca-test-data-release-public-access" // test bucket
results_bucket = "test/stub/results" // no output
sim_bucket = "test/stub/simulated" // local output
annotations_bucket = "test/stub/annotations" // local output
project = "SCPCP000012" // a small project

// override large reference files for stub testing
Expand Down
6 changes: 6 additions & 0 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,12 @@
"description": "Base URI for simulated data output",
"help_text": "Standard configurations will use an S3 bucket, but local paths can also be used."
},
"annotations_bucket": {
"type": "string",
"default": "s3://openscpca-celltype-annotations-public-access",
"description": "Base URI for saving cell type annotations output",
"help_text": "Standard configurations will use an S3 bucket, but local paths can also be used."
},
"project": {
"type": "string",
"default": "all",
Expand Down