diff --git a/README.md b/README.md index f79dbe48a..0429b1b72 100644 --- a/README.md +++ b/README.md @@ -148,7 +148,7 @@ To learn more about the docker pull rate limits and the open source software pro | [Circlator](https://hub.docker.com/r/staphb/circlator)
[![docker pulls](https://badgen.net/docker/pulls/staphb/circlator)](https://hub.docker.com/r/staphb/circlator) |
Click to see all versions
| https://github.com/sanger-pathogens/circlator | | [Circos](https://hub.docker.com/r/staphb/circos)
[![docker pulls](https://badgen.net/docker/pulls/staphb/circos)](https://hub.docker.com/r/staphb/circos) |
Click to see all versions
| https://circos.ca/ | | [CirculoCov](https://hub.docker.com/r/staphb/circulocov)
[![docker pulls](https://badgen.net/docker/pulls/staphb/circulocov)](https://hub.docker.com/r/staphb/circulocov) |
Click to see all versions
| https://github.com/erinyoung/CirculoCov | -| [Clair3](https://hub.docker.com/r/staphb/clair3)
[![docker pulls](https://badgen.net/docker/pulls/staphb/clair3)](https://hub.docker.com/r/staphb/clair3) |
Click to see all versions
| https://github.com/HKU-BAL/Clair3 | +| [Clair3](https://hub.docker.com/r/staphb/clair3)
[![docker pulls](https://badgen.net/docker/pulls/staphb/clair3)](https://hub.docker.com/r/staphb/clair3) |
Click to see all versions
| https://github.com/HKU-BAL/Clair3 | | [Clustalo](https://hub.docker.com/r/staphb/clustalo)
[![docker pulls](https://badgen.net/docker/pulls/staphb/clustalo)](https://hub.docker.com/r/staphb/clustalo) |
Click to see all versions
| http://www.clustal.org/omega/ | | [colorid](https://hub.docker.com/r/staphb/colorid)
[![docker pulls](https://badgen.net/docker/pulls/staphb/colorid)](https://hub.docker.com/r/staphb/colorid) |
Click to see all versions
| https://github.com/hcdenbakker/colorid | | [Core-SNP-filter](https://hub.docker.com/r/staphb/core-snp-filter)
[![docker pulls](https://badgen.net/docker/pulls/staphb/core-snp-filter)](https://hub.docker.com/r/staphb/core-snp-filter) |
Click to see all versions
| https://github.com/rrwick/Core-SNP-filter | diff --git a/build-files/clair3/2.0.0/Dockerfile b/build-files/clair3/2.0.0/Dockerfile new file mode 100644 index 000000000..c1595e932 --- /dev/null +++ b/build-files/clair3/2.0.0/Dockerfile @@ -0,0 +1,183 @@ +# Builder Stage +# Credit to https://github.com/HKU-BAL/Clair3/blob/main/Dockerfile for reference code +FROM mambaorg/micromamba:2.5.0-ubuntu24.04 AS builder +ARG CLAIR3_VER="2.0.0" +USER root +WORKDIR /opt/bin + +# Install base packages for downloading source code. +RUN apt-get update && apt-get install -y --no-install-recommends \ + wget \ + ca-certificates \ + procps && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* + +# Install packages needed for Clair3 and for make files. +RUN micromamba install \ + -c conda-forge \ + -c bioconda \ + python=3.11 \ + samtools \ + whatshap \ + parallel \ + zstd \ + xz \ + zlib \ + bzip2 \ + automake \ + make \ + gcc \ + gxx \ + curl \ + pigz \ + binutils \ + boost-cpp \ + -y && \ + micromamba clean -a -f -y + +# Set environmental variables for PATH and installation with respect to appropriate micromamba directories. +ENV CONDA_PREFIX=/opt/conda +ENV PATH=${CONDA_PREFIX}/bin:/opt/conda/bin:/opt/bin:${PATH} LC_ALL=C.UTF-8 LANG=C.UTF-8 + +# Install uv for faster installation, then install relevant pytorch libraries and required packages. +RUN pip install --no-cache-dir uv && \ + uv pip install --python ${CONDA_PREFIX}/bin/python \ + torch torchvision \ + --index-url https://download.pytorch.org/whl/cpu && \ + uv pip install --python ${CONDA_PREFIX}/bin/python \ + numpy \ + h5py \ + hdf5plugin \ + numexpr \ + tqdm \ + cffi \ + torchmetrics + +# Get Clair3 Source code and extract +RUN wget -q https://github.com/HKU-BAL/Clair3/archive/refs/tags/v${CLAIR3_VER}.tar.gz && \ + tar -xzf v${CLAIR3_VER}.tar.gz && \ + cp -a Clair3-${CLAIR3_VER}/. /opt/bin/ && \ + rm -rf v${CLAIR3_VER}.tar.gz Clair3-${CLAIR3_VER} + +# Use makefile to build Clair3 +RUN make PREFIX=${CONDA_PREFIX} PYTHON=${CONDA_PREFIX}/bin/python && \ + cd /opt/bin/preprocess/realign && \ + g++ -std=c++14 -O1 -shared -fPIC -o realigner ssw_cpp.cpp ssw.c realigner.cpp && \ + g++ -std=c++11 -shared -fPIC -o debruijn_graph -O3 debruijn_graph.cpp && \ + rm -rf /opt/bin/samtools-* /opt/bin/longphase-* + +# Download required pypy3 tools +RUN wget -q https://downloads.python.org/pypy/pypy3.11-v7.3.20-linux64.tar.bz2 && \ + tar -xjf pypy3.11-v7.3.20-linux64.tar.bz2 && \ + rm pypy3.11-v7.3.20-linux64.tar.bz2 && \ + ln -sf /opt/bin/pypy3.11-v7.3.20-linux64/bin/pypy3 ${CONDA_PREFIX}/bin/pypy3 && \ + ln -sf /opt/bin/pypy3.11-v7.3.20-linux64/bin/pypy3 ${CONDA_PREFIX}/bin/pypy && \ + pypy3 -m ensurepip && \ + pypy3 -m pip install --no-cache-dir mpmath==1.2.1 + +# Download models for different sequencing approaches +ARG CLAIR3_MODELS_URL=https://www.bio8.cs.hku.hk/clair3/clair3_models_pytorch/ +RUN set -eux; \ + mkdir -p /opt/models /tmp/clair3-models; \ + base_url="${CLAIR3_MODELS_URL%/}"; \ + wget -r -np -nH --cut-dirs=2 -R "index.html*" -P /tmp/clair3-models "${base_url}/"; \ + if [ -d /tmp/clair3-models/clair3_models_pytorch ]; then \ + cp -a /tmp/clair3-models/clair3_models_pytorch/. /opt/models/; \ + else \ + cp -a /tmp/clair3-models/. /opt/models/; \ + fi; \ + rm -rf /tmp/clair3-models + + +# App Stage +FROM mambaorg/micromamba:2.5.0-ubuntu24.04 AS app +ARG CLAIR3_VER="2.0.0" +USER root +WORKDIR /opt/bin + +LABEL base.image="mambaorg/micromamba:2.5.0-ubuntu24.04" +LABEL dockerfile.version="1" +LABEL software="CLAIR3" +LABEL software.version="${CLAIR3_VER}" +LABEL description="A deep learning-based germline small variant caller for long-reads." +LABEL website="https://github.com/HKU-BAL/Clair3" +LABEL license="https://github.com/HKU-BAL/Clair3/blob/master/LICENSE.md" +LABEL maintainer="Kutluhan Incekara" +LABEL maintainer.email="kutluhan.incekara@ct.gov" +LABEL maintainer2="Raheel Ahmed" +LABEL maintainer2.email="raheelsyedahmed@gmail.com" + +RUN apt-get update && apt-get install -y --no-install-recommends \ + ca-certificates \ + procps && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* + +# Install packages needed for Clair3. +RUN micromamba install \ + -c conda-forge \ + -c bioconda \ + python=3.11 \ + samtools \ + whatshap \ + parallel \ + zstd \ + xz \ + zlib \ + bzip2 \ + pigz \ + boost-cpp \ + -y && \ + micromamba clean -a -f -y + +ENV CONDA_PREFIX=/opt/conda +ENV PATH=${CONDA_PREFIX}/bin:/opt/conda/bin:/opt/bin:/clair3:/clair3/models:${PATH} LC_ALL=C.UTF-8 LANG=C.UTF-8 + +# Copy conda env and built binaries from builder +COPY --from=builder /opt/bin /opt/bin +COPY --from=builder /opt/models /opt/models + +# Install uv for faster installation, then install relevant pytorch libraries and required packages. +RUN pip install --no-cache-dir uv && \ + uv pip install --python ${CONDA_PREFIX}/bin/python \ + torch torchvision \ + --index-url https://download.pytorch.org/whl/cpu && \ + uv pip install --python ${CONDA_PREFIX}/bin/python \ + numpy \ + h5py \ + hdf5plugin \ + numexpr \ + tqdm \ + cffi \ + torchmetrics + +# Relink required pypy3 tools +RUN ln -sf /opt/bin/pypy3.11-v7.3.20-linux64/bin/pypy3 ${CONDA_PREFIX}/bin/pypy3 && \ + ln -sf /opt/bin/pypy3.11-v7.3.20-linux64/bin/pypy3 ${CONDA_PREFIX}/bin/pypy + + +# Generate symlinks for /opt and /clair3 +RUN ln -s /opt /clair3 + +RUN mkdir /data +WORKDIR /data + + +FROM app AS test + +RUN apt-get update && \ + apt-get install -y wget && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* + +RUN run_clair3.sh --help && \ + run_clair3.sh --version + +# Download ont_quick_test script and run with prespecified arguments +RUN wget -q https://raw.githubusercontent.com/StaPH-B/docker-builds/master/build-files/clair3/1.0.9/ont_quick_test.sh && \ + chmod +x ont_quick_test.sh &&\ + ./ont_quick_test.sh + +# list models +RUN ls /clair3/models + +# list tools installed with micromamba +RUN micromamba list \ No newline at end of file diff --git a/build-files/clair3/2.0.0/README.md b/build-files/clair3/2.0.0/README.md new file mode 100644 index 000000000..9157ff830 --- /dev/null +++ b/build-files/clair3/2.0.0/README.md @@ -0,0 +1,168 @@ +# Clair3 container + +Main tool: [clair3](https://github.com/HKU-BAL/Clair3) + + +
+Additional tools installed via micromamba: + +``` + Name Version Build Channel +──────────────────────────────────────────────────────────────────────── + Jinja2 3.1.6 pypi_0 pypi + MarkupSafe 3.0.2 pypi_0 pypi + _openmp_mutex 4.5 20_gnu conda-forge + amply 0.1.6 pyhd8ed1ab_1 conda-forge + biopython 1.86 py311h49ec1c0_1 conda-forge + boost-cpp 1.85.0 h3c6214e_4 conda-forge + bzip2 1.0.8 hda65f42_9 conda-forge + c-ares 1.34.6 hb03c661_0 conda-forge + ca-certificates 2026.2.25 hbd8a1cb_0 conda-forge + cffi 2.0.0 py311h03d9500_1 conda-forge + coin-or-cbc 2.10.13 h4d16d09_0 conda-forge + coin-or-cgl 0.60.10 hc46dffc_0 conda-forge + coin-or-clp 1.17.11 hc03379b_0 conda-forge + coin-or-osi 0.108.12 hf4fecb4_0 conda-forge + coin-or-utils 2.11.13 hc93afbd_0 conda-forge + docutils 0.22.4 pyhd8ed1ab_0 conda-forge + filelock 3.20.0 pypi_0 pypi + fsspec 2025.12.0 pypi_0 pypi + h5py 3.16.0 pypi_0 pypi + hdf5plugin 6.0.0 pypi_0 pypi + htslib 1.23 h566b1c6_0 bioconda + icu 75.1 he02047a_0 conda-forge + importlib-metadata 8.7.0 pyhe01879c_1 conda-forge + isa-l 2.31.1 hb9d3cd8_1 conda-forge + keyutils 1.6.3 hb9d3cd8_0 conda-forge + krb5 1.22.2 ha1258a1_0 conda-forge + ld_impl_linux-64 2.45.1 default_hbd61a6d_101 conda-forge + libblas 3.11.0 5_h4a7cf45_openblas conda-forge + libboost 1.85.0 h0ccab89_4 conda-forge + libboost-devel 1.85.0 h00ab1b0_4 conda-forge + libboost-headers 1.85.0 ha770c72_4 conda-forge + libcblas 3.11.0 5_h0358290_openblas conda-forge + libcurl 8.19.0 hcf29cc6_0 conda-forge + libdeflate 1.22 hb9d3cd8_0 conda-forge + libedit 3.1.20250104 pl5321h7949ede_0 conda-forge + libev 4.33 hd590300_2 conda-forge + libexpat 2.7.4 hecca717_0 conda-forge + libffi 3.5.2 h3435931_0 conda-forge + libgcc 15.2.0 he0feb66_18 conda-forge + libgcc-ng 15.2.0 h69a702a_18 conda-forge + libgfortran 15.2.0 h69a702a_18 conda-forge + libgfortran5 15.2.0 h68bc16d_18 conda-forge + libgomp 15.2.0 he0feb66_18 conda-forge + liblapack 3.11.0 5_h47877c9_openblas conda-forge + liblapacke 3.11.0 5_h6ae95b6_openblas conda-forge + liblzma 5.8.2 hb03c661_0 conda-forge + liblzma-devel 5.8.2 hb03c661_0 conda-forge + libnghttp2 1.67.0 had1ee68_0 conda-forge + libnsl 2.0.1 hb9d3cd8_1 conda-forge + libopenblas 0.3.30 pthreads_h94d23a6_4 conda-forge + libsqlite 3.52.0 h0c1763c_0 conda-forge + libssh2 1.11.1 hcf80075_0 conda-forge + libstdcxx 15.2.0 h934c35e_18 conda-forge + libstdcxx-ng 15.2.0 hdf11a46_18 conda-forge + libuuid 2.41.3 h5347b49_0 conda-forge + libxcrypt 4.4.36 hd590300_1 conda-forge + libzlib 1.3.1 hb9d3cd8_2 conda-forge + lightning-utilities 0.15.3 pypi_0 pypi + mpmath 1.3.0 pypi_0 pypi + ncurses 6.5 h2d0b736_3 conda-forge + networkx 3.6.1 pyhcf101f3_0 conda-forge + numexpr 2.14.1 pypi_0 pypi + numpy 2.4.2 py311h2e04523_1 conda-forge + openssl 3.6.1 h35e630c_1 conda-forge + packaging 26.0 pyhcf101f3_0 conda-forge + parallel 20260122 ha770c72_0 conda-forge + pbzip2 1.1.13 h1fcc475_2 conda-forge + perl 5.32.1 7_hd590300_perl5 conda-forge + pigz 2.8 h421ea60_2 conda-forge + pillow 12.0.0 pypi_0 pypi + pip 26.0.1 pyh8b19718_0 conda-forge + pulp 2.8.0 py311h77a8cca_3 conda-forge + pycparser 2.22 pyh29332c3_1 conda-forge + pyfaidx 0.9.0.3 pyhdfd78af_0 bioconda + pyparsing 3.3.2 pyhcf101f3_0 conda-forge + pysam 0.23.3 py311hb456a96_1 bioconda + python 3.11.15 hd63d673_0_cpython conda-forge + python-isal 1.8.0 py311h49ec1c0_1 conda-forge + python-zlib-ng 1.0.0 py311h15805fc_1 conda-forge + python_abi 3.11 8_cp311 conda-forge + pyvcf3 1.0.4 py311haab0aaa_0 bioconda + readline 8.3 h853b02a_0 conda-forge + samtools 1.23 h96c455f_0 bioconda + scipy 1.17.1 py311hbe70eeb_0 conda-forge + setuptools 82.0.1 pyh332efcf_0 conda-forge + six 1.17.0 pyhe01879c_1 conda-forge + sympy 1.14.0 pypi_0 pypi + tk 8.6.13 noxft_h366c992_103 conda-forge + torch 2.10.0+cpu pypi_0 pypi + torchmetrics 1.9.0 pypi_0 pypi + torchvision 0.25.0+cpu pypi_0 pypi + tqdm 4.67.3 pypi_0 pypi + typing_extensions 4.15.0 pypi_0 pypi + tzdata 2025c hc9c84f9_1 conda-forge + uv 0.10.10 pypi_0 pypi + whatshap 2.8 py311he264feb_0 bioconda + wheel 0.46.3 pyhd8ed1ab_0 conda-forge + xopen 2.0.2 pyh707e725_2 conda-forge + xz 5.8.2 ha02ee65_0 conda-forge + xz-gpl-tools 5.8.2 ha02ee65_0 conda-forge + xz-tools 5.8.2 hb03c661_0 conda-forge + zipp 3.23.0 pyhcf101f3_1 conda-forge + zlib 1.3.1 hb9d3cd8_2 conda-forge + zlib-ng 2.3.3 hceb46e0_1 conda-forge + zstandard 0.25.0 py311haee01d2_1 conda-forge + zstd 1.5.7 hb78ec9c_6 conda-forge +``` +
+
+ +Code repository: https://github.com/HKU-BAL/Clair3 + +Basic information on how to use this tool: +- executable: run_clair3.sh +- help: `-h`, `--help` +- version: `-v`, `--version` +- description: A germline small variant caller for long-reads. + +Additional information: + +- This container includes the following models in `/clair3/models` and `/opt/models` +- This container has no GPU support! + +List of models: +``` +hifi +hifi_revio +hifi_sequel2 +ilmn +ont +ont_guppy5 +r1041_e82_400bps_hac_v410 +r1041_e82_400bps_hac_v500 +r1041_e82_400bps_hac_with_mv +r1041_e82_400bps_sup_v410 +r1041_e82_400bps_sup_v430_bacteria_finetuned +r1041_e82_400bps_sup_v500 +r941_prom_hac_g360+g422 +r941_prom_sup_g5014 +``` + +Full documentation: https://github.com/HKU-BAL/Clair3 + +## Example Usage + +```bash +run_clair3.sh \ + --bam_fn=${BAM} \ + --ref_fn=${REF} \ + --threads=${THREADS} \ + --platform="ont" \ ## options: {ont,hifi,ilmn} + --model_path=${MODEL_PREFIX} \ ## absolute model path prefix + --output=${OUTPUT_DIR} ## absolute output path prefix +## pileup output file: ${OUTPUT_DIR}/pileup.vcf.gz +## full-alignment output file: ${OUTPUT_DIR}/full_alignment.vcf.gz +## Clair3 final output file: ${OUTPUT_DIR}/merge_output.vcf.gz +```