|
| 1 | +FROM ubuntu:noble AS app |
| 2 | + |
| 3 | +# for easy upgrade later. ARG variables only persist during image build time |
| 4 | +ARG MINIMAP2_VER="2.3" |
| 5 | +ARG MASH_VER="2.3" |
| 6 | +ARG KAPTIVE_VER="3.2.0" |
| 7 | +ARG VP_GENOMOSEROTYPING_VER="1.1" |
| 8 | + |
| 9 | +LABEL base.image="ubuntu:noble" |
| 10 | +LABEL dockerfile.version="1" |
| 11 | +LABEL software="Kaptive" |
| 12 | +LABEL software.version="${KAPTIVE_VER}" |
| 13 | +LABEL description="Report information about surface polysaccharide loci for Klebsiella pneumoniae species complex and Acinetobacter baumannii genome assemblies" |
| 14 | +LABEL website="https://github.com/klebgenomics/Kaptive/" |
| 15 | +LABEL license="https://github.com/klebgenomics/Kaptive/blob/master/LICENSE" |
| 16 | +LABEL website.VPdatabase="https://github.com/aldertzomer/vibrio_parahaemolyticus_genomoserotyping" |
| 17 | +LABEL license.VPdatabase="https://github.com/aldertzomer/vibrio_parahaemolyticus_genomoserotyping/blob/main/LICENSE" |
| 18 | +LABEL maintainer="Tamas Stirling" |
| 19 | +LABEL maintainer.email="stirling.tamas@gmail.com" |
| 20 | +LABEL maintainer2="Curtis Kapsak" |
| 21 | +LABEL maintainer2.email="kapsakcj@gmail.com" |
| 22 | +LABEL maintainer3="Erin Young" |
| 23 | +LABEL maintainer3.email="eriny@utah.gov" |
| 24 | + |
| 25 | +# install prerequisites. Cleanup apt garbage |
| 26 | +RUN apt-get update && apt-get install -y --no-install-recommends \ |
| 27 | + python3 \ |
| 28 | + python3-pip \ |
| 29 | + wget \ |
| 30 | + ca-certificates \ |
| 31 | + bzip2 \ |
| 32 | + procps \ |
| 33 | + curl && \ |
| 34 | + rm -rf /var/lib/apt/lists/* && apt-get autoclean |
| 35 | + |
| 36 | +# mash; update UID and GID of mash files; make /data |
| 37 | +# UID and GID changes because the original owner is UID: 1081147385 and GID: 1360859114 which does NOT play well with systems that limits GIDs and UIDs |
| 38 | +RUN wget -q https://github.com/marbl/Mash/releases/download/v${MASH_VER}/mash-Linux64-v${MASH_VER}.tar && \ |
| 39 | + tar -xvf mash-Linux64-v${MASH_VER}.tar --no-same-owner && \ |
| 40 | + rm -rf mash-Linux64-v${MASH_VER}.tar && \ |
| 41 | + chown root:root /mash-Linux64-v${MASH_VER}/* |
| 42 | + |
| 43 | +# install minimap2 binary; make /data |
| 44 | +RUN curl -L https://github.com/lh3/minimap2/releases/download/v${MINIMAP2_VER}/minimap2-${MINIMAP2_VER}_x64-linux.tar.bz2 | \ |
| 45 | + tar -jxvf - --no-same-owner |
| 46 | + |
| 47 | +# install kaptive |
| 48 | +RUN pip install --no-cache-dir --break-system-packages kaptive==${KAPTIVE_VER} |
| 49 | + |
| 50 | +# move databases |
| 51 | +RUN wget -q https://github.com/klebgenomics/Kaptive/archive/refs/tags/v${KAPTIVE_VER}.tar.gz && \ |
| 52 | + tar -vxf v${KAPTIVE_VER}.tar.gz --no-same-owner && \ |
| 53 | + mkdir -p /data /kaptive/ && \ |
| 54 | + mv /Kaptive-${KAPTIVE_VER}/reference_database /kaptive/reference_database && \ |
| 55 | + rm -rf v${KAPTIVE_VER}.tar.gz Kaptive-${KAPTIVE_VER} |
| 56 | + |
| 57 | +# NOTE: in v3.2.1+, reference_database moved to src/kaptive/data/reference_database. Use line below. |
| 58 | +# mv /Kaptive-${KAPTIVE_VER}/src/kaptive/data/reference_database /kaptive/reference_database |
| 59 | + |
| 60 | +# download Vibrio parahemolyticus database; mv gbk files to where the other references are |
| 61 | +RUN wget -q https://github.com/aldertzomer/vibrio_parahaemolyticus_genomoserotyping/archive/refs/tags/${VP_GENOMOSEROTYPING_VER}.tar.gz && \ |
| 62 | + tar -xzf ${VP_GENOMOSEROTYPING_VER}.tar.gz && \ |
| 63 | + rm -v ${VP_GENOMOSEROTYPING_VER}.tar.gz && \ |
| 64 | + mv -v vibrio_parahaemolyticus_genomoserotyping-${VP_GENOMOSEROTYPING_VER}/*gbk /kaptive/reference_database/. && \ |
| 65 | + rm -rf vibrio_parahaemolyticus_genomoserotyping-${VP_GENOMOSEROTYPING_VER} |
| 66 | + |
| 67 | +# set PATH |
| 68 | +ENV PATH="/mash-Linux64-v${MASH_VER}:/minimap2-${MINIMAP2_VER}_x64-linux:${PATH}" |
| 69 | + |
| 70 | +# set working directory |
| 71 | +WORKDIR /data |
| 72 | + |
| 73 | +# default command is to print help options |
| 74 | +CMD [ "kaptive", "--help" ] |
| 75 | + |
| 76 | +# test layer |
| 77 | +FROM app AS test |
| 78 | + |
| 79 | +RUN kaptive --help && kaptive --version |
| 80 | + |
| 81 | +WORKDIR /test1 |
| 82 | + |
| 83 | +# test with A. baumannii; testing both k and o locus |
| 84 | +RUN echo "downloading an A. baumannii genome & testing Kaptive..." && \ |
| 85 | + wget -q https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/016/486/705/GCA_016486705.1_PDT000751301.1/GCA_016486705.1_PDT000751301.1_genomic.fna.gz && \ |
| 86 | + gzip -d GCA_016486705.1_PDT000751301.1_genomic.fna.gz && \ |
| 87 | + kaptive assembly /kaptive/reference_database/Acinetobacter_baumannii_k_locus_primary_reference.gbk GCA_016486705.1_PDT000751301.1_genomic.fna -o abau_k.txt && head abau_k.txt && \ |
| 88 | + kaptive assembly /kaptive/reference_database/Acinetobacter_baumannii_OC_locus_primary_reference.gbk GCA_016486705.1_PDT000751301.1_genomic.fna -o abau_oc.txt && head abau_oc.txt |
| 89 | + |
| 90 | +WORKDIR /test2 |
| 91 | +# test with K. pneumoniae; testing both k and o locus |
| 92 | +RUN echo "downloading an K. pneumoniae genome & testing Kaptive..." && \ |
| 93 | + wget -q https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/022/268/055/GCA_022268055.1_PDT000434809.1/GCA_022268055.1_PDT000434809.1_genomic.fna.gz && \ |
| 94 | + gzip -d GCA_022268055.1_PDT000434809.1_genomic.fna.gz && \ |
| 95 | + kaptive assembly /kaptive/reference_database/Klebsiella_k_locus_primary_reference.gbk GCA_022268055.1_PDT000434809.1_genomic.fna -o kpneu_k.txt && \ |
| 96 | + kaptive assembly /kaptive/reference_database/Klebsiella_o_locus_primary_reference.gbk GCA_022268055.1_PDT000434809.1_genomic.fna -o kpneu_o.txt && \ |
| 97 | + head kpneu_k.txt kpneu_o.txt |
| 98 | + |
| 99 | +WORKDIR /test3 |
| 100 | +# test with recommended usage in documentatation |
| 101 | +RUN echo "downloading an K. pneumoniae genome & testing Kaptive..." && \ |
| 102 | + wget -q https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/022/268/055/GCA_022268055.1_PDT000434809.1/GCA_022268055.1_PDT000434809.1_genomic.fna.gz && \ |
| 103 | + gzip -d GCA_022268055.1_PDT000434809.1_genomic.fna.gz && \ |
| 104 | + kaptive assembly kpsc_k assemblies/*.fna -o kaptive_results.tsv && \ |
| 105 | + head kaptive_results.tsv |
| 106 | + |
| 107 | +WORKDIR /test4 |
| 108 | + |
| 109 | +### test with at 2 Vibrio parahaemolyticus genomes with a known serotype. These 2 are pulled from the publication describing custom database ## |
| 110 | +# GCA_001558495.2 - expect OL1 and KL1 |
| 111 | +# GCA_001728135.1 - expect OL4 KL53 |
| 112 | +# more info on test genome here: https://www.ncbi.nlm.nih.gov/data-hub/genome/GCF_001558495.2/ |
| 113 | +# strain: ATCC17802 |
| 114 | +# more info on 2nd test genome here: https://www.ncbi.nlm.nih.gov/data-hub/genome/GCF_001728135.1/ |
| 115 | +# strain: CDC_K5009W |
| 116 | +RUN echo "downloading an 2 V. parahaemolyticus genomes & testing Kaptive..." && \ |
| 117 | + wget -q https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/001/558/495/GCF_001558495.2_ASM155849v2/GCF_001558495.2_ASM155849v2_genomic.fna.gz && \ |
| 118 | + wget -q https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/001/728/135/GCF_001728135.1_ASM172813v1/GCF_001728135.1_ASM172813v1_genomic.fna.gz && \ |
| 119 | + gzip -d GCF_001558495.2_ASM155849v2_genomic.fna.gz && \ |
| 120 | + gzip -d GCF_001728135.1_ASM172813v1_genomic.fna.gz && \ |
| 121 | + kaptive assembly /kaptive/reference_database/VibrioPara_Kaptivedb_K.gbk *.fna -o Vparahaemolyticus_K.txt && \ |
| 122 | + kaptive assembly /kaptive/reference_database/VibrioPara_Kaptivedb_O.gbk *.fna -o Vparahaemolyticus_O.txt && \ |
| 123 | + head Vparahaemolyticus_K.txt Vparahaemolyticus_O.txt |
| 124 | + |
0 commit comments