Skip to content
Open
Show file tree
Hide file tree
Changes from 10 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
f044be3
adding files for CODEC pipeline
stellaning1120 Jan 30, 2024
20c00d9
change fgbio version in the wdl to match on prem
stellaning1120 Feb 6, 2024
d585698
make eval_genome_interval/bed tunable in multiple tasks
stellaning1120 Feb 13, 2024
bcc80bb
add Dockerfile that buils the codec docker
stellaning1120 Feb 16, 2024
801bdc4
add inputs.json and modify .dockstore.yml to add 2 wdls to Dockstore
stellaning1120 Feb 16, 2024
110f8ac
change duplication rate calculation, add duplex efficiency calculatio…
stellaning1120 Feb 28, 2024
7bb7ede
add ceil() to help with disk size allocation
stellaning1120 Feb 29, 2024
0feeada
minor changes to task name and QC metrics collection
stellaning1120 Mar 1, 2024
97bcfef
minor changes to disk size input
stellaning1120 Mar 1, 2024
d71532f
change variant calling parameters and remove MarkDuplicated task
stellaning1120 May 3, 2024
c2347f1
change docker images since it is now public
stellaning1120 May 23, 2024
a73fe6c
not yet pushing to public
stellaning1120 May 23, 2024
ca095be
docker images chaneg to public ones
stellaning1120 May 25, 2024
21cf843
switch to public docker image
stellaning1120 May 25, 2024
dd0d44e
correct docker images
stellaning1120 May 29, 2024
852be3e
change to public docker from tag-public
stellaning1120 May 30, 2024
aaa259e
switch to public dockers
stellaning1120 May 30, 2024
e0dc64b
Add files for new module Signature Profiler of CODEC pipeline and mod…
stellaning1120 Jul 11, 2024
3b2f657
adding and merging pipelines for captured data
stellaning1120 Oct 15, 2024
60db351
adding and merging pipelines for captured data
stellaning1120 Oct 15, 2024
a22fb5c
Merge branch 'master' into CODEC
stellaning1120 Oct 15, 2024
d18b7c5
Update file fetch from task to fit GCP batch
stellaning1120 Jun 25, 2025
165c99a
Update file fetch from task to fit GCP batch
stellaning1120 Jun 25, 2025
bafa14f
update duplex efficiency calc
stellaning1120 Jun 26, 2025
d5079cd
minor fix to correct the read_length calc
stellaning1120 Jun 26, 2025
48aa1a4
set preemptibles to 0 for some time-consuming tasks
stellaning1120 Jul 16, 2025
db69a89
simplify the concatenate task
stellaning1120 Aug 28, 2025
0f915b8
correct concatenate scripts
stellaning1120 Sep 9, 2025
aa8a3ea
add maf2vcf function
stellaning1120 Sep 18, 2025
315d20c
add maf2vcf function
stellaning1120 Sep 18, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 16 additions & 1 deletion .dockstore.yml
Original file line number Diff line number Diff line change
Expand Up @@ -118,4 +118,19 @@ workflows:
subclass: WDL
primaryDescriptorPath: /CleanupFailedSubmissions/Cleanup_Failed_Submissions.wdl
testParameterFiles:
- /CleanupFailedSubmissions/Cleanup_Failed_Submissions.inputs.json
- /CleanupFailedSubmissions/Cleanup_Failed_Submissions.inputs.json
- name: codec_bcl2fastq
subclass: WDL
primaryDescriptorPath: /CODEC/codec_bcl2fastq.wdl
testParameterFiles:
- /CODEC/codec_bcl2fastq.inputs.json
- name: demux_CODEC
subclass: WDL
primaryDescriptorPath: /CODEC/demux_CODEC.wdl
testParameterFiles:
- /CODEC/demux_CODEC.inputs.json
- name: SingleSampleCODEC
subclass: WDL
primaryDescriptorPath: /CODEC/SingleSampleCODEC.wdl
testParameterFiles:
- /CODEC/SingleSampleCODEC.inputs.json
58 changes: 58 additions & 0 deletions CODEC/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
FROM --platform=linux/amd64 ubuntu:20.04

LABEL maintainer="lining@broadinstitute.org"

ENV DEBIAN_FRONTEND noninteractive

RUN apt-get update \
&& apt-get install -y software-properties-common \
&& add-apt-repository ppa:deadsnakes/ppa \
&& apt-get update \
&& apt-get install -y python3.8 python3.8-dev python3.8-venv python3-pip \
&& pip3 install pandas argparse numpy pysam

RUN apt-get install -y r-base r-base-dev

RUN apt-get install -y \
git \
wget \
bwa \
libssl-dev \
g++ \
zlib1g-dev \
autoconf \
libbz2-dev \
liblzma-dev \
libcurl4-gnutls-dev \
libssl-dev \
build-essential \
software-properties-common

RUN apt-get update -qq
RUN apt-get install -y openjdk-11-jdk



# Clone the CODECsuite repository
RUN git clone --recursive https://github.com/broadinstitute/CODECsuite.git /CODECsuite
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Separate CODEC Docker from the rest of Docker images that were used in the CODEC WDL.



RUN wget https://github.com/Kitware/CMake/releases/download/v3.28.0-rc3/cmake-3.28.0-rc3-linux-x86_64.tar.gz \
&& tar -xzvf cmake-3.28.0-rc3-linux-x86_64.tar.gz \
&& mv cmake-3.28.0-rc3-linux-x86_64 /opt/cmake-3.28 \
&& ln -s /opt/cmake-3.28/bin/cmake /usr/local/bin/cmake \
&& ln -s /opt/cmake-3.28/bin/ctest /usr/local/bin/ctest \
&& ln -s /opt/cmake-3.28/bin/cpack /usr/local/bin/cpack \
&& rm cmake-3.28.0-rc3-linux-x86_64.tar.gz

RUN cd CODECsuite && \
mkdir build && \
cd build && \
cmake .. && \
make

COPY dependencies/ /dependencies/
COPY reference_files/ /reference_files/

RUN cp /dependencies/samtools-1.9/samtools /usr/bin/
RUN chmod +x /CODECsuite/snakemake/script/agg_log.py
63 changes: 63 additions & 0 deletions CODEC/SingleSampleCODEC.inputs.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
{
"SingleSampleCODEC.AlignMolecularConsensusReads.cpu_cores": "${2}",
"SingleSampleCODEC.AlignMolecularConsensusReads.disk_size": "${}",
"SingleSampleCODEC.AlignMolecularConsensusReads.memory": "${}",
"SingleSampleCODEC.AlignMolecularConsensusReads.threads": "${8}",
"SingleSampleCODEC.AlignRawTrimmed.disk": "${64}",
"SingleSampleCODEC.AlignRawTrimmed.disk_size": "${}",
"SingleSampleCODEC.AlignRawTrimmed.mem": "${}",
"SingleSampleCODEC.AlignRawTrimmed.memory": "${64}",
"SingleSampleCODEC.CDSByProduct.disk_size": "${}",
"SingleSampleCODEC.CDSByProduct.mem": "${}",
"SingleSampleCODEC.CSS_SFC_ErrorMetrics.disk_size": "${800}",
"SingleSampleCODEC.CSS_SFC_ErrorMetrics.memory": "${}",
"SingleSampleCODEC.CollectConsensusWgsMetrics.disk_size": "${160}",
"SingleSampleCODEC.CollectConsensusWgsMetrics.memory": "${}",
"SingleSampleCODEC.CollectInsertSizeMetrics.disk_size": "${100}",
"SingleSampleCODEC.CollectInsertSizeMetrics.memory": "${32}",
"SingleSampleCODEC.CollectRawWgsMetrics.disk_size": "${160}",
"SingleSampleCODEC.CollectRawWgsMetrics.memory": "${}",
"SingleSampleCODEC.FgbioCollapseReadFamilies.disk_size": "${200}",
"SingleSampleCODEC.FgbioCollapseReadFamilies.memory": "${}",
"SingleSampleCODEC.GroupReadByUMI.disk_size": "${400}",
"SingleSampleCODEC.GroupReadByUMI.memory": "${32}",
"SingleSampleCODEC.MarkRawDuplicates.disk_size": "${200}",
"SingleSampleCODEC.MarkRawDuplicates.memory": "${64}",
"SingleSampleCODEC.MergeAndSortMoleculeConsensusReads.disk_size": "${160}",
"SingleSampleCODEC.MergeAndSortMoleculeConsensusReads.memory": "${64}",
"SingleSampleCODEC.MergeLogSplit.disk_size": "${}",
"SingleSampleCODEC.MergeLogSplit.mem": "${}",
"SingleSampleCODEC.MergeSplit.disk_size": "${200}",
"SingleSampleCODEC.MergeSplit.memory": "${32}",
"SingleSampleCODEC.RAW_SFC_ErrorMetrics.disk_size": "${800}",
"SingleSampleCODEC.RAW_SFC_ErrorMetrics.memory": "${32}",
"SingleSampleCODEC.ReplaceRawReadGroup.disk_size": "${200}",
"SingleSampleCODEC.ReplaceRawReadGroup.memory": "${32}",
"SingleSampleCODEC.SortBam.disk_size": "${200}",
"SingleSampleCODEC.SortBam.mem": "${}",
"SingleSampleCODEC.SplitFastq1.disk_size": "${400}",
"SingleSampleCODEC.SplitFastq1.memory": "${32}",
"SingleSampleCODEC.SplitFastq2.disk_size": "${400}",
"SingleSampleCODEC.SplitFastq2.memory": "${32}",
"SingleSampleCODEC.Trim.disk_size": "${64}",
"SingleSampleCODEC.Trim.mem": "${32}",
"SingleSampleCODEC.ZipperBamAlignment.disk_size": "${200}",
"SingleSampleCODEC.ZipperBamAlignment.mem": "${32}",
"SingleSampleCODEC.eval_genome_bed": "gs://gptag/CODEC/ddbtp_codec_easy_regions.hg38.bed",
"SingleSampleCODEC.eval_genome_interval": "gs://gptag/CODEC/ddbtp_codec_easy_regions.hg38.interval_list",
"SingleSampleCODEC.fastq1": "${this.fastq1}",
"SingleSampleCODEC.fastq2": "${this.fastq2}",
"SingleSampleCODEC.germline_bam": "${this.germline_bam}",
"SingleSampleCODEC.germline_bam_index": "${this.germline_bam_index}",
"SingleSampleCODEC.num_parallel": "${40}",
"SingleSampleCODEC.reference_amb": "${workspace.referenceData_hg38_ref_amb}",
"SingleSampleCODEC.reference_ann": "${workspace.referenceData_hg38_ref_ann}",
"SingleSampleCODEC.reference_bwt": "${workspace.referenceData_hg38_ref_bwt}",
"SingleSampleCODEC.reference_dict": "${workspace.referenceData_hg38_ref_dict}",
"SingleSampleCODEC.reference_fasta": "${workspace.referenceData_hg38_ref_fasta}",
"SingleSampleCODEC.reference_fasta_index": "${workspace.referenceData_hg38_ref_fasta_index}",
"SingleSampleCODEC.reference_pac": "${workspace.referenceData_hg38_ref_pac}",
"SingleSampleCODEC.reference_sa": "${workspace.referenceData_hg38_ref_sa}",
"SingleSampleCODEC.sample_id": "${this.sample_id}",
"SingleSampleCODEC.sort_memory": "2G"
}
Loading