Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ workflows:
- deploy:
matrix:
parameters:
docker-variant: ['', '-cuda', '-cuda-tf1', '-cuda-tf2', '-cuda-torch']
docker-variant: ['', '-cuda', '-cuda-tf2', '-cuda-torch']
filters:
branches:
only: master
Expand Down
20 changes: 10 additions & 10 deletions .github/workflows/docker-image.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,15 @@ on:
push:
branches: [ "master" ]
workflow_dispatch: # run manually
inputs:
platforms:
description: "platform argument for docker buildx"
type: string
default: "linux/amd64,linux/arm/v7,linux/arm64/v8,linux/ppc64le"

env:
# FIXME: linux/arm/v7 disabled as long as scikit-build/cmake-python-distributions#503 is unresolved
# PLATFORMS: linux/amd64,linux/arm/v7,linux/arm64/v8,linux/ppc64le
PLATFORMS: linux/amd64,linux/arm64/v8,linux/ppc64le
#PLATFORMS: linux/amd64,linux/arm/v7,linux/arm64/v8,linux/ppc64le
PLATFORMS: ${{ github.event.inputs.platforms || 'linux/amd64' }}

jobs:

Expand All @@ -20,16 +24,12 @@ jobs:
contents: read
env:
DOCKER_BASE_TAG: ghcr.io/ocr-d docker.io/ocrd
# TODO(kba): make the interpolation work correctly
# DOCKER_BUILD: docker buildx build --progress=plain --platform ${{ env.PLATFORMS }} --push
# TODO(kba): Investigate why ppc64le build hangs on "Installing build dependencies"
# TODO(kba): Investigate why arm64 fails with .buildkit_qemu_emulator: /usr/local/bin/conda: Invalid ELF image for this architecture
DOCKER_BUILD: docker buildx build --progress=plain --platform linux/amd64 --push
DOCKER_BUILD: docker buildx build --progress=plain --push --platform
steps:
- name: Export variables
run: |
echo "DOCKER_BASE_TAG=${{ env.DOCKER_BASE_TAG }}" >> $GITHUB_ENV
echo "DOCKER_BUILD=${{ env.DOCKER_BUILD }}" >> $GITHUB_ENV
echo "DOCKER_BUILD=${{ env.DOCKER_BUILD }} ${{ env.PLATFORMS }}" >> $GITHUB_ENV
- name: Checkout
uses: actions/checkout@v4
with:
Expand All @@ -55,4 +55,4 @@ jobs:
- name: Build the Docker image with GPU support
run: make docker-cuda
- name: Build the Docker images with GPU support and ML frameworks
run: make docker-cuda-tf1 docker-cuda-tf2 docker-cuda-torch
run: make docker-cuda-tf2 docker-cuda-torch
15 changes: 0 additions & 15 deletions Dockerfile.cuda-tf1

This file was deleted.

98 changes: 12 additions & 86 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,8 @@ get-conda: export CONDA_PREFIX ?= /conda
# first part of recipe: see micro.mamba.pm/install.sh
get-conda: OS != uname
get-conda: PLATFORM = $(subst Darwin,osx,$(subst Linux,linux,$(OS)))
get-conda: MACHINE = $(or $(filter aarch64 arm64 ppc64le, $(ARCH)), 64)
get-conda: ARCH != uname -m
get-conda: MACHINE = $(or $(filter aarch64 ppc64le, $(subst arm64,aarch64,$(ARCH))), 64)
get-conda: URL = https://micro.mamba.pm/api/micromamba/$(PLATFORM)-$(MACHINE)/latest
get-conda:
curl --retry 6 -Ls $(URL) | tar -xvj bin/micromamba
Expand All @@ -85,93 +86,18 @@ endif
# Dependencies for CUDA installation via Conda
deps-cuda: PYTHON_PREFIX != $(PYTHON) -c 'import sysconfig; print(sysconfig.get_paths()["purelib"])'
deps-cuda: get-conda
# Get CUDA toolkit, including compiler and libraries with dev,
# however, the Nvidia channels do not provide (recent) cudnn (needed for Torch, TF etc):
#MAMBA_ROOT_PREFIX=$(CONDA_PREFIX) \
#conda install -c nvidia/label/cuda-11.8.0 cuda && conda clean -a
#
# The conda-forge channel has cudnn and cudatoolkit but no cudatoolkit-dev anymore (and we need both!),
# so let's combine nvidia and conda-forge (will be same lib versions, no waste of space),
# but omitting cuda-cudart-dev and cuda-libraries-dev (as these will be pulled by pip for torch anyway):
conda install -c nvidia/label/cuda-11.8.0 \
cuda-nvcc \
cuda-cccl \
&& conda clean -a \
&& find $(CONDA_PREFIX) -name "*_static.a" -delete
#conda install -c conda-forge \
# cudatoolkit=11.8.0 \
# cudnn=8.8.* && \
#conda clean -a && \
#find $(CONDA_PREFIX) -name "*_static.a" -delete
#
# Since Torch will pull in the CUDA libraries (as Python pkgs) anyway,
# let's jump the shark and pull these via NGC index directly,
# but then share them with the rest of the system so native compilation/linking
# works, too:
shopt -s nullglob; \
$(PIP) install nvidia-pyindex \
&& $(PIP) install nvidia-cudnn-cu11~=8.7 \
nvidia-cublas-cu11~=11.11 \
nvidia-cusparse-cu11~=11.7 \
nvidia-cusolver-cu11~=11.4 \
nvidia-curand-cu11~=10.3 \
nvidia-cufft-cu11~=10.9 \
nvidia-cuda-runtime-cu11~=11.8 \
nvidia-cuda-cupti-cu11~=11.8 \
nvidia-cuda-nvrtc-cu11 \
&& for pkg in cudnn cublas cusparse cusolver curand cufft cuda_runtime cuda_cupti cuda_nvrtc; do \
for lib in $(PYTHON_PREFIX)/nvidia/$$pkg/lib/lib*.so.*; do \
base=`basename $$lib`; \
ln -s $$lib $(CONDA_PREFIX)/lib/$$base.so; \
ln -s $$lib $(CONDA_PREFIX)/lib/$${base%.so.*}.so; \
done \
&& for inc in $(PYTHON_PREFIX)/nvidia/$$pkg/include/*; do \
base=`basename $$inc`; case $$base in __*) continue; esac; \
ln -s $$inc $(CONDA_PREFIX)/include/; \
done \
done \
&& ldconfig
# gputil/nvidia-smi would be nice, too – but that drags in Python as a conda dependency...

# Workaround for missing prebuilt versions of TF<2 for Python==3.8
# todo: find another solution for 3.9, 3.10 etc
# https://docs.nvidia.com/deeplearning/frameworks/tensorflow-wheel-release-notes/tf-wheel-rel.html
# Nvidia has them, but under a different name, so let's rewrite that:
# (hold at nv22.11, because newer releases require CUDA 12, which is not supported by TF2 (at py38),
# and therefore not in our ocrd/core-cuda base image yet)
# However, at that time no Numpy 1.24 was known, which breaks TF1
# (which is why later nv versions hold it at <1.24 automatically -
# see https://github.com/NVIDIA/tensorflow/blob/r1.15.5%2Bnv22.11/tensorflow/tools/pip_package/setup.py)
deps-tf1:
if $(PYTHON) -c 'import sys; print("%u.%u" % (sys.version_info.major, sys.version_info.minor))' | fgrep 3.8 && \
! $(PIP) show -q tensorflow-gpu; then \
$(PIP) install nvidia-pyindex && \
pushd $$(mktemp -d) && \
$(PIP) download --no-deps nvidia-tensorflow==1.15.5+nv22.11 && \
for name in nvidia_tensorflow-*.whl; do name=$${name%.whl}; done && \
$(PYTHON) -m wheel unpack $$name.whl && \
for name in nvidia_tensorflow-*/; do name=$${name%/}; done && \
newname=$${name/nvidia_tensorflow/tensorflow_gpu} &&\
sed -i s/nvidia_tensorflow/tensorflow_gpu/g $$name/$$name.dist-info/METADATA && \
sed -i s/nvidia_tensorflow/tensorflow_gpu/g $$name/$$name.dist-info/RECORD && \
sed -i s/nvidia_tensorflow/tensorflow_gpu/g $$name/tensorflow_core/tools/pip_package/setup.py && \
pushd $$name && for path in $$name*; do mv $$path $${path/$$name/$$newname}; done && popd && \
$(PYTHON) -m wheel pack $$name && \
$(PIP) install $$newname*.whl && popd && rm -fr $$OLDPWD; \
$(PIP) install "numpy<1.24" -r $$DIRSTACK/requirements.txt; \
else \
$(PIP) install "tensorflow-gpu<2.0" -r requirements.txt; \
fi
# Get CUDA toolkit, including compiler and libraries with dev from NVIDIA channels
# Get CUDNN (needed for Torch, TF etc) from conda-forge.
# CUDA runtime libs will be pulled by `pip` for TF and Torch differently anyway,
# so do _not_ install them here to avoid wasting space.
conda install -c nvidia/label/cuda-12.4.0 cuda-minimal-build \
&& conda clean -a && ldconfig

deps-tf2:
if $(PYTHON) -c 'import sys; print("%u.%u" % (sys.version_info.major, sys.version_info.minor))' | fgrep 3.8; then \
$(PIP) install tensorflow -r requirements.txt; \
else \
$(PIP) install "tensorflow[and-cuda]" -r requirements.txt; \
fi
$(PIP) install "tensorflow[and-cuda]" -r requirements.txt

deps-torch:
$(PIP) install -i https://download.pytorch.org/whl/cu118 torchvision==0.16.2+cu118 torch==2.1.2+cu118 -r requirements.txt
$(PIP) install torch==2.5.1 torchvision==0.20.1 -i https://download.pytorch.org/whl/cu124 -r requirements.txt

# deps-*: always mix core's requirements.txt with additional deps,
# so pip does not ignore the older version reqs,
Expand All @@ -180,11 +106,11 @@ deps-torch:
# Dependencies for deployment in an ubuntu/debian linux
deps-ubuntu:
apt-get update
apt-get install -y python3 imagemagick libgeos-dev libxml2-dev libxslt-dev libssl-dev
apt-get install -y bzip2 python3 imagemagick libgeos-dev libxml2-dev libxslt-dev libssl-dev

# Dependencies for deployment via Conda
deps-conda: get-conda
conda install -c conda-forge python==3.8.* imagemagick geos pkgconfig
conda install -c conda-forge python==3.10.* imagemagick geos pkgconfig

# Install test python deps via pip
deps-test:
Expand Down
Loading