diff --git a/.circleci/config.yml b/.circleci/config.yml index 964bc65ea..58506c55e 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -98,7 +98,7 @@ workflows: - deploy: matrix: parameters: - docker-variant: ['', '-cuda', '-cuda-tf1', '-cuda-tf2', '-cuda-torch'] + docker-variant: ['', '-cuda', '-cuda-tf2', '-cuda-torch'] filters: branches: only: master diff --git a/.github/workflows/docker-image.yml b/.github/workflows/docker-image.yml index da01c8cb2..cd331a4e0 100644 --- a/.github/workflows/docker-image.yml +++ b/.github/workflows/docker-image.yml @@ -4,11 +4,15 @@ on: push: branches: [ "master" ] workflow_dispatch: # run manually + inputs: + platforms: + description: "platform argument for docker buildx" + type: string + default: "linux/amd64,linux/arm/v7,linux/arm64/v8,linux/ppc64le" env: - # FIXME: linux/arm/v7 disabled as long as scikit-build/cmake-python-distributions#503 is unresolved - # PLATFORMS: linux/amd64,linux/arm/v7,linux/arm64/v8,linux/ppc64le - PLATFORMS: linux/amd64,linux/arm64/v8,linux/ppc64le + #PLATFORMS: linux/amd64,linux/arm/v7,linux/arm64/v8,linux/ppc64le + PLATFORMS: ${{ github.event.inputs.platforms || 'linux/amd64' }} jobs: @@ -20,16 +24,12 @@ jobs: contents: read env: DOCKER_BASE_TAG: ghcr.io/ocr-d docker.io/ocrd - # TODO(kba): make the interpolation work correctly - # DOCKER_BUILD: docker buildx build --progress=plain --platform ${{ env.PLATFORMS }} --push - # TODO(kba): Investigate why ppc64le build hangs on "Installing build dependencies" - # TODO(kba): Investigate why arm64 fails with .buildkit_qemu_emulator: /usr/local/bin/conda: Invalid ELF image for this architecture - DOCKER_BUILD: docker buildx build --progress=plain --platform linux/amd64 --push + DOCKER_BUILD: docker buildx build --progress=plain --push --platform steps: - name: Export variables run: | echo "DOCKER_BASE_TAG=${{ env.DOCKER_BASE_TAG }}" >> $GITHUB_ENV - echo "DOCKER_BUILD=${{ env.DOCKER_BUILD }}" >> $GITHUB_ENV + echo "DOCKER_BUILD=${{ env.DOCKER_BUILD }} ${{ env.PLATFORMS }}" >> $GITHUB_ENV - name: Checkout uses: actions/checkout@v4 with: @@ -55,4 +55,4 @@ jobs: - name: Build the Docker image with GPU support run: make docker-cuda - name: Build the Docker images with GPU support and ML frameworks - run: make docker-cuda-tf1 docker-cuda-tf2 docker-cuda-torch + run: make docker-cuda-tf2 docker-cuda-torch diff --git a/Dockerfile.cuda-tf1 b/Dockerfile.cuda-tf1 deleted file mode 100644 index 0b9c2d6a6..000000000 --- a/Dockerfile.cuda-tf1 +++ /dev/null @@ -1,15 +0,0 @@ -ARG BASE_IMAGE=docker.io/ocrd/core-cuda -FROM $BASE_IMAGE AS ocrd_core_base - -WORKDIR /build/core - -COPY Makefile . - -RUN make deps-tf1 -# Smoke Test -RUN ocrd --version - -WORKDIR /data - -CMD ["/usr/local/bin/ocrd", "--help"] - diff --git a/Makefile b/Makefile index 33f2c97f2..b4e11b2c0 100644 --- a/Makefile +++ b/Makefile @@ -66,7 +66,8 @@ get-conda: export CONDA_PREFIX ?= /conda # first part of recipe: see micro.mamba.pm/install.sh get-conda: OS != uname get-conda: PLATFORM = $(subst Darwin,osx,$(subst Linux,linux,$(OS))) -get-conda: MACHINE = $(or $(filter aarch64 arm64 ppc64le, $(ARCH)), 64) +get-conda: ARCH != uname -m +get-conda: MACHINE = $(or $(filter aarch64 ppc64le, $(subst arm64,aarch64,$(ARCH))), 64) get-conda: URL = https://micro.mamba.pm/api/micromamba/$(PLATFORM)-$(MACHINE)/latest get-conda: curl --retry 6 -Ls $(URL) | tar -xvj bin/micromamba @@ -85,93 +86,18 @@ endif # Dependencies for CUDA installation via Conda deps-cuda: PYTHON_PREFIX != $(PYTHON) -c 'import sysconfig; print(sysconfig.get_paths()["purelib"])' deps-cuda: get-conda -# Get CUDA toolkit, including compiler and libraries with dev, -# however, the Nvidia channels do not provide (recent) cudnn (needed for Torch, TF etc): -#MAMBA_ROOT_PREFIX=$(CONDA_PREFIX) \ -#conda install -c nvidia/label/cuda-11.8.0 cuda && conda clean -a -# -# The conda-forge channel has cudnn and cudatoolkit but no cudatoolkit-dev anymore (and we need both!), -# so let's combine nvidia and conda-forge (will be same lib versions, no waste of space), -# but omitting cuda-cudart-dev and cuda-libraries-dev (as these will be pulled by pip for torch anyway): - conda install -c nvidia/label/cuda-11.8.0 \ - cuda-nvcc \ - cuda-cccl \ - && conda clean -a \ - && find $(CONDA_PREFIX) -name "*_static.a" -delete -#conda install -c conda-forge \ -# cudatoolkit=11.8.0 \ -# cudnn=8.8.* && \ -#conda clean -a && \ -#find $(CONDA_PREFIX) -name "*_static.a" -delete -# -# Since Torch will pull in the CUDA libraries (as Python pkgs) anyway, -# let's jump the shark and pull these via NGC index directly, -# but then share them with the rest of the system so native compilation/linking -# works, too: - shopt -s nullglob; \ - $(PIP) install nvidia-pyindex \ - && $(PIP) install nvidia-cudnn-cu11~=8.7 \ - nvidia-cublas-cu11~=11.11 \ - nvidia-cusparse-cu11~=11.7 \ - nvidia-cusolver-cu11~=11.4 \ - nvidia-curand-cu11~=10.3 \ - nvidia-cufft-cu11~=10.9 \ - nvidia-cuda-runtime-cu11~=11.8 \ - nvidia-cuda-cupti-cu11~=11.8 \ - nvidia-cuda-nvrtc-cu11 \ - && for pkg in cudnn cublas cusparse cusolver curand cufft cuda_runtime cuda_cupti cuda_nvrtc; do \ - for lib in $(PYTHON_PREFIX)/nvidia/$$pkg/lib/lib*.so.*; do \ - base=`basename $$lib`; \ - ln -s $$lib $(CONDA_PREFIX)/lib/$$base.so; \ - ln -s $$lib $(CONDA_PREFIX)/lib/$${base%.so.*}.so; \ - done \ - && for inc in $(PYTHON_PREFIX)/nvidia/$$pkg/include/*; do \ - base=`basename $$inc`; case $$base in __*) continue; esac; \ - ln -s $$inc $(CONDA_PREFIX)/include/; \ - done \ - done \ - && ldconfig -# gputil/nvidia-smi would be nice, too – but that drags in Python as a conda dependency... - -# Workaround for missing prebuilt versions of TF<2 for Python==3.8 -# todo: find another solution for 3.9, 3.10 etc -# https://docs.nvidia.com/deeplearning/frameworks/tensorflow-wheel-release-notes/tf-wheel-rel.html -# Nvidia has them, but under a different name, so let's rewrite that: -# (hold at nv22.11, because newer releases require CUDA 12, which is not supported by TF2 (at py38), -# and therefore not in our ocrd/core-cuda base image yet) -# However, at that time no Numpy 1.24 was known, which breaks TF1 -# (which is why later nv versions hold it at <1.24 automatically - -# see https://github.com/NVIDIA/tensorflow/blob/r1.15.5%2Bnv22.11/tensorflow/tools/pip_package/setup.py) -deps-tf1: - if $(PYTHON) -c 'import sys; print("%u.%u" % (sys.version_info.major, sys.version_info.minor))' | fgrep 3.8 && \ - ! $(PIP) show -q tensorflow-gpu; then \ - $(PIP) install nvidia-pyindex && \ - pushd $$(mktemp -d) && \ - $(PIP) download --no-deps nvidia-tensorflow==1.15.5+nv22.11 && \ - for name in nvidia_tensorflow-*.whl; do name=$${name%.whl}; done && \ - $(PYTHON) -m wheel unpack $$name.whl && \ - for name in nvidia_tensorflow-*/; do name=$${name%/}; done && \ - newname=$${name/nvidia_tensorflow/tensorflow_gpu} &&\ - sed -i s/nvidia_tensorflow/tensorflow_gpu/g $$name/$$name.dist-info/METADATA && \ - sed -i s/nvidia_tensorflow/tensorflow_gpu/g $$name/$$name.dist-info/RECORD && \ - sed -i s/nvidia_tensorflow/tensorflow_gpu/g $$name/tensorflow_core/tools/pip_package/setup.py && \ - pushd $$name && for path in $$name*; do mv $$path $${path/$$name/$$newname}; done && popd && \ - $(PYTHON) -m wheel pack $$name && \ - $(PIP) install $$newname*.whl && popd && rm -fr $$OLDPWD; \ - $(PIP) install "numpy<1.24" -r $$DIRSTACK/requirements.txt; \ - else \ - $(PIP) install "tensorflow-gpu<2.0" -r requirements.txt; \ - fi +# Get CUDA toolkit, including compiler and libraries with dev from NVIDIA channels +# Get CUDNN (needed for Torch, TF etc) from conda-forge. +# CUDA runtime libs will be pulled by `pip` for TF and Torch differently anyway, +# so do _not_ install them here to avoid wasting space. + conda install -c nvidia/label/cuda-12.4.0 cuda-minimal-build \ + && conda clean -a && ldconfig deps-tf2: - if $(PYTHON) -c 'import sys; print("%u.%u" % (sys.version_info.major, sys.version_info.minor))' | fgrep 3.8; then \ - $(PIP) install tensorflow -r requirements.txt; \ - else \ - $(PIP) install "tensorflow[and-cuda]" -r requirements.txt; \ - fi + $(PIP) install "tensorflow[and-cuda]" -r requirements.txt deps-torch: - $(PIP) install -i https://download.pytorch.org/whl/cu118 torchvision==0.16.2+cu118 torch==2.1.2+cu118 -r requirements.txt + $(PIP) install torch==2.5.1 torchvision==0.20.1 -i https://download.pytorch.org/whl/cu124 -r requirements.txt # deps-*: always mix core's requirements.txt with additional deps, # so pip does not ignore the older version reqs, @@ -180,11 +106,11 @@ deps-torch: # Dependencies for deployment in an ubuntu/debian linux deps-ubuntu: apt-get update - apt-get install -y python3 imagemagick libgeos-dev libxml2-dev libxslt-dev libssl-dev + apt-get install -y bzip2 python3 imagemagick libgeos-dev libxml2-dev libxslt-dev libssl-dev # Dependencies for deployment via Conda deps-conda: get-conda - conda install -c conda-forge python==3.8.* imagemagick geos pkgconfig + conda install -c conda-forge python==3.10.* imagemagick geos pkgconfig # Install test python deps via pip deps-test: