diff --git a/1.architectures/5.sagemaker-hyperpod/LifecycleScripts/base-config/utils/install_docker.sh b/1.architectures/5.sagemaker-hyperpod/LifecycleScripts/base-config/utils/install_docker.sh index 401ad154e..654fb03af 100755 --- a/1.architectures/5.sagemaker-hyperpod/LifecycleScripts/base-config/utils/install_docker.sh +++ b/1.architectures/5.sagemaker-hyperpod/LifecycleScripts/base-config/utils/install_docker.sh @@ -56,6 +56,8 @@ sudo usermod -aG docker ubuntu # See: https://github.com/aws-samples/awsome-distributed-training/issues/127 # # Docker workdir doesn't like Lustre. Tried with storage driver overlay2, fuse-overlayfs, & vfs. +# Also, containerd ships with a commented root in its default config; we need to ensure an +# uncommented root that points to the fast local volume. if [[ $(mount | grep /opt/sagemaker) ]]; then cat <> /etc/docker/daemon.json { @@ -66,6 +68,14 @@ EOL sed -i \ 's|^\[Service\]$|[Service]\nEnvironment="DOCKER_TMPDIR=/opt/sagemaker/docker/tmp"|' \ /usr/lib/systemd/system/docker.service + + # Ensure containerd config exists and point its root to /opt/sagemaker + if [[ ! -f /etc/containerd/config.toml ]]; then + containerd config default | sudo tee /etc/containerd/config.toml >/dev/null + fi + sudo sed -i \ + -e 's|^#\\?root *=.*|root = "/opt/sagemaker/docker/containerd"|' \ + /etc/containerd/config.toml elif [[ $(mount | grep /opt/dlami/nvme) ]]; then cat <> /etc/docker/daemon.json { @@ -76,7 +86,16 @@ EOL sed -i \ 's|^\[Service\]$|[Service]\nEnvironment="DOCKER_TMPDIR=/opt/dlami/nvme/docker/tmp"|' \ /usr/lib/systemd/system/docker.service + + # Ensure containerd config exists and point its root to /opt/dlami/nvme + if [[ ! -f /etc/containerd/config.toml ]]; then + containerd config default | sudo tee /etc/containerd/config.toml >/dev/null + fi + sudo sed -i \ + -e 's|^#\\?root *=.*|root = "/opt/dlami/nvme/docker/containerd"|' \ + /etc/containerd/config.toml fi systemctl daemon-reload -systemctl restart docker \ No newline at end of file +systemctl restart containerd +systemctl restart docker