Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 85 additions & 10 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -13,21 +13,96 @@
# limitations under the License.

ARG PYT_VER=25.02
FROM nvcr.io/nvidia/pytorch:$PYT_VER-py3 as builder
FROM nvcr.io/nvidia/pytorch:$PYT_VER-py3

# Update pip and setuptools
RUN pip install --upgrade pip setuptools
LABEL org.opencontainers.image.source="https://github.com/pangeo-data/ncar-hackathon-xarray-on-gpus"

# Setup git lfs, graphviz gl1(vtk dep)
RUN apt-get update && \
apt-get install -y git-lfs graphviz libgl1 && \
git lfs install && \
pip install torchviz
apt-get install -y libgl1

SHELL ["/bin/bash", "-c"]
##### START COPY FROM https://github.com/pangeo-data/pangeo-docker-images/blob/302f73984c42d140dee23695e3d3a17fcc951f3d/base-image/Dockerfile #####
# Setup environment to match variables set by repo2docker as much as possible
# The name of the conda environment into which the requested packages are installed
ENV CONDA_ENV=notebook \
# Tell apt-get to not block installs by asking for interactive human input
DEBIAN_FRONTEND=noninteractive \
# Set username, uid and gid (same as uid) of non-root user the container will be run as
NB_USER=jovyan \
NB_UID=1001 \
# Use /bin/bash as shell, not the default /bin/sh (arrow keys, etc don't work then)
SHELL=/bin/bash \
# Setup locale to be UTF-8, avoiding gnarly hard to debug encoding errors
LANG=C.UTF-8 \
LC_ALL=C.UTF-8 \
# Install conda in the same place repo2docker does
CONDA_DIR=/srv/conda

# All env vars that reference other env vars need to be in their own ENV block
# Path to the python environment where the jupyter notebook packages are installed
ENV NB_PYTHON_PREFIX=${CONDA_DIR}/envs/${CONDA_ENV} \
# Home directory of our non-root user
HOME=/home/${NB_USER}

# Add both our notebook env as well as default conda installation to $PATH
# Thus, when we start a `python` process (for kernels, or notebooks, etc),
# it loads the python in the notebook conda environment, as that comes
# first here.
ENV PATH=${NB_PYTHON_PREFIX}/bin:${CONDA_DIR}/bin:${PATH}

# Ask dask to read config from ${CONDA_DIR}/etc rather than
# the default of /etc, since the non-root jovyan user can write
# to ${CONDA_DIR}/etc but not to /etc
ENV DASK_ROOT_CONFIG=${CONDA_DIR}/etc

RUN echo "Creating ${NB_USER} user..." \
# Create a group for the user to be part of, with gid same as uid
&& groupadd --gid ${NB_UID} ${NB_USER} \
# Create non-root user, with given gid, uid and create $HOME
&& useradd --create-home --gid ${NB_UID} --no-log-init --uid ${NB_UID} ${NB_USER} \
# Make sure that /srv is owned by non-root user, so we can install things there
&& chown -R ${NB_USER}:${NB_USER} /srv

# Run conda activate each time a bash shell starts, so users don't have to manually type conda activate
# Note this is only read by shell, but not by the jupyter notebook - that relies
# on us starting the correct `python` process, which we do by adding the notebook conda environment's
# bin to PATH earlier ($NB_PYTHON_PREFIX/bin)
RUN echo ". ${CONDA_DIR}/etc/profile.d/conda.sh ; conda activate ${CONDA_ENV}" > /etc/profile.d/init_conda.sh

# Install basic apt packages
RUN echo "Installing Apt-get packages..." \
&& apt-get update --fix-missing > /dev/null \
&& apt-get install -y apt-utils wget zip tzdata > /dev/null \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*

RUN git clone -b gpu-codecs https://github.com/akshaysubr/zarr-python.git /opt/zarr-python && \
cd /opt/zarr-python && \
pip install .[gpu]
# Add TZ configuration - https://github.com/PrefectHQ/prefect/issues/3061
ENV TZ=UTC
# ========================

USER ${NB_USER}
WORKDIR ${HOME}

# Install latest mambaforge in ${CONDA_DIR}
RUN echo "Installing Miniforge..." \
&& URL="https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-$(uname -m).sh" \
&& wget --quiet ${URL} -O installer.sh \
&& /bin/bash installer.sh -u -b -p ${CONDA_DIR} \
&& rm installer.sh \
&& mamba install conda-lock -y \
&& mamba clean -afy \
# After installing the packages, we cleanup some unnecessary files
# to try reduce image size - see https://jcristharif.com/conda-docker-tips.html
# Although we explicitly do *not* delete .pyc files, as that seems to slow down startup
# quite a bit unfortunately - see https://github.com/2i2c-org/infrastructure/issues/2047
&& find ${CONDA_DIR} -follow -type f -name '*.a' -delete

##### END COPY FROM https://github.com/pangeo-data/pangeo-docker-images/blob/302f73984c42d140dee23695e3d3a17fcc951f3d/base-image/Dockerfile #####

# Add conda packages
COPY environment.yml /tmp/environment.yml
RUN mamba env update --prefix ${CONDA_DIR} --file /tmp/environment.yml

SHELL ["/bin/bash", "-c"]

ENV _CUDA_COMPAT_TIMEOUT=90
9 changes: 6 additions & 3 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,20 @@ channels:
- nodefaults
dependencies:
- cupy~=13.3.0
- cupy-xarray~=0.1.4
- dask-jobqueue~=0.9.0
- git-lfs~=3.6.1
- graphviz~=12.2.1
- rapidsai-nightly::kvikio>=25.04.00a
- jupyterlab~=4.3.5
- nvidia-dali-python~=1.45.0
- nvidia-dali-python>=1.45.0
- nvtx~=0.2.11
- python=3.12
- pip~=25.0
- pytorch~=2.6.0
- segmentation-models-pytorch~=0.4.0
- xarray>=2025.3.0
- zarr~=3.0.3
- pip:
- nvidia-nvcomp-cu12
- zarr @ git+https://github.com/akshaysubr/zarr-python.git@gpu-codecs
- torchviz
- zarr[gpu] @ git+https://github.com/akshaysubr/zarr-python.git@gpu-codecs