Skip to content

Commit f59de74

Browse files
committed
Merge remote-tracking branch 'bertsky/update-cuda'
2 parents e3da3ac + da14030 commit f59de74

File tree

4 files changed

+23
-112
lines changed

4 files changed

+23
-112
lines changed

.circleci/config.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ workflows:
9898
- deploy:
9999
matrix:
100100
parameters:
101-
docker-variant: ['', '-cuda', '-cuda-tf1', '-cuda-tf2', '-cuda-torch']
101+
docker-variant: ['', '-cuda', '-cuda-tf2', '-cuda-torch']
102102
filters:
103103
branches:
104104
only: master

.github/workflows/docker-image.yml

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,15 @@ on:
44
push:
55
branches: [ "master" ]
66
workflow_dispatch: # run manually
7+
inputs:
8+
platforms:
9+
description: "platform argument for docker buildx"
10+
type: string
11+
default: "linux/amd64,linux/arm/v7,linux/arm64/v8,linux/ppc64le"
712

813
env:
9-
# FIXME: linux/arm/v7 disabled as long as scikit-build/cmake-python-distributions#503 is unresolved
10-
# PLATFORMS: linux/amd64,linux/arm/v7,linux/arm64/v8,linux/ppc64le
11-
PLATFORMS: linux/amd64,linux/arm64/v8,linux/ppc64le
14+
#PLATFORMS: linux/amd64,linux/arm/v7,linux/arm64/v8,linux/ppc64le
15+
PLATFORMS: ${{ github.event.inputs.platforms || 'linux/amd64' }}
1216

1317
jobs:
1418

@@ -20,16 +24,12 @@ jobs:
2024
contents: read
2125
env:
2226
DOCKER_BASE_TAG: ghcr.io/ocr-d docker.io/ocrd
23-
# TODO(kba): make the interpolation work correctly
24-
# DOCKER_BUILD: docker buildx build --progress=plain --platform ${{ env.PLATFORMS }} --push
25-
# TODO(kba): Investigate why ppc64le build hangs on "Installing build dependencies"
26-
# TODO(kba): Investigate why arm64 fails with .buildkit_qemu_emulator: /usr/local/bin/conda: Invalid ELF image for this architecture
27-
DOCKER_BUILD: docker buildx build --progress=plain --platform linux/amd64 --push
27+
DOCKER_BUILD: docker buildx build --progress=plain --push --platform
2828
steps:
2929
- name: Export variables
3030
run: |
3131
echo "DOCKER_BASE_TAG=${{ env.DOCKER_BASE_TAG }}" >> $GITHUB_ENV
32-
echo "DOCKER_BUILD=${{ env.DOCKER_BUILD }}" >> $GITHUB_ENV
32+
echo "DOCKER_BUILD=${{ env.DOCKER_BUILD }} ${{ env.PLATFORMS }}" >> $GITHUB_ENV
3333
- name: Checkout
3434
uses: actions/checkout@v4
3535
with:
@@ -55,4 +55,4 @@ jobs:
5555
- name: Build the Docker image with GPU support
5656
run: make docker-cuda
5757
- name: Build the Docker images with GPU support and ML frameworks
58-
run: make docker-cuda-tf1 docker-cuda-tf2 docker-cuda-torch
58+
run: make docker-cuda-tf2 docker-cuda-torch

Dockerfile.cuda-tf1

Lines changed: 0 additions & 15 deletions
This file was deleted.

Makefile

Lines changed: 12 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,8 @@ get-conda: export CONDA_PREFIX ?= /conda
6666
# first part of recipe: see micro.mamba.pm/install.sh
6767
get-conda: OS != uname
6868
get-conda: PLATFORM = $(subst Darwin,osx,$(subst Linux,linux,$(OS)))
69-
get-conda: MACHINE = $(or $(filter aarch64 arm64 ppc64le, $(ARCH)), 64)
69+
get-conda: ARCH != uname -m
70+
get-conda: MACHINE = $(or $(filter aarch64 ppc64le, $(subst arm64,aarch64,$(ARCH))), 64)
7071
get-conda: URL = https://micro.mamba.pm/api/micromamba/$(PLATFORM)-$(MACHINE)/latest
7172
get-conda:
7273
curl --retry 6 -Ls $(URL) | tar -xvj bin/micromamba
@@ -85,93 +86,18 @@ endif
8586
# Dependencies for CUDA installation via Conda
8687
deps-cuda: PYTHON_PREFIX != $(PYTHON) -c 'import sysconfig; print(sysconfig.get_paths()["purelib"])'
8788
deps-cuda: get-conda
88-
# Get CUDA toolkit, including compiler and libraries with dev,
89-
# however, the Nvidia channels do not provide (recent) cudnn (needed for Torch, TF etc):
90-
#MAMBA_ROOT_PREFIX=$(CONDA_PREFIX) \
91-
#conda install -c nvidia/label/cuda-11.8.0 cuda && conda clean -a
92-
#
93-
# The conda-forge channel has cudnn and cudatoolkit but no cudatoolkit-dev anymore (and we need both!),
94-
# so let's combine nvidia and conda-forge (will be same lib versions, no waste of space),
95-
# but omitting cuda-cudart-dev and cuda-libraries-dev (as these will be pulled by pip for torch anyway):
96-
conda install -c nvidia/label/cuda-11.8.0 \
97-
cuda-nvcc \
98-
cuda-cccl \
99-
&& conda clean -a \
100-
&& find $(CONDA_PREFIX) -name "*_static.a" -delete
101-
#conda install -c conda-forge \
102-
# cudatoolkit=11.8.0 \
103-
# cudnn=8.8.* && \
104-
#conda clean -a && \
105-
#find $(CONDA_PREFIX) -name "*_static.a" -delete
106-
#
107-
# Since Torch will pull in the CUDA libraries (as Python pkgs) anyway,
108-
# let's jump the shark and pull these via NGC index directly,
109-
# but then share them with the rest of the system so native compilation/linking
110-
# works, too:
111-
shopt -s nullglob; \
112-
$(PIP) install nvidia-pyindex \
113-
&& $(PIP) install nvidia-cudnn-cu11~=8.7 \
114-
nvidia-cublas-cu11~=11.11 \
115-
nvidia-cusparse-cu11~=11.7 \
116-
nvidia-cusolver-cu11~=11.4 \
117-
nvidia-curand-cu11~=10.3 \
118-
nvidia-cufft-cu11~=10.9 \
119-
nvidia-cuda-runtime-cu11~=11.8 \
120-
nvidia-cuda-cupti-cu11~=11.8 \
121-
nvidia-cuda-nvrtc-cu11 \
122-
&& for pkg in cudnn cublas cusparse cusolver curand cufft cuda_runtime cuda_cupti cuda_nvrtc; do \
123-
for lib in $(PYTHON_PREFIX)/nvidia/$$pkg/lib/lib*.so.*; do \
124-
base=`basename $$lib`; \
125-
ln -s $$lib $(CONDA_PREFIX)/lib/$$base.so; \
126-
ln -s $$lib $(CONDA_PREFIX)/lib/$${base%.so.*}.so; \
127-
done \
128-
&& for inc in $(PYTHON_PREFIX)/nvidia/$$pkg/include/*; do \
129-
base=`basename $$inc`; case $$base in __*) continue; esac; \
130-
ln -s $$inc $(CONDA_PREFIX)/include/; \
131-
done \
132-
done \
133-
&& ldconfig
134-
# gputil/nvidia-smi would be nice, too – but that drags in Python as a conda dependency...
135-
136-
# Workaround for missing prebuilt versions of TF<2 for Python==3.8
137-
# todo: find another solution for 3.9, 3.10 etc
138-
# https://docs.nvidia.com/deeplearning/frameworks/tensorflow-wheel-release-notes/tf-wheel-rel.html
139-
# Nvidia has them, but under a different name, so let's rewrite that:
140-
# (hold at nv22.11, because newer releases require CUDA 12, which is not supported by TF2 (at py38),
141-
# and therefore not in our ocrd/core-cuda base image yet)
142-
# However, at that time no Numpy 1.24 was known, which breaks TF1
143-
# (which is why later nv versions hold it at <1.24 automatically -
144-
# see https://github.com/NVIDIA/tensorflow/blob/r1.15.5%2Bnv22.11/tensorflow/tools/pip_package/setup.py)
145-
deps-tf1:
146-
if $(PYTHON) -c 'import sys; print("%u.%u" % (sys.version_info.major, sys.version_info.minor))' | fgrep 3.8 && \
147-
! $(PIP) show -q tensorflow-gpu; then \
148-
$(PIP) install nvidia-pyindex && \
149-
pushd $$(mktemp -d) && \
150-
$(PIP) download --no-deps nvidia-tensorflow==1.15.5+nv22.11 && \
151-
for name in nvidia_tensorflow-*.whl; do name=$${name%.whl}; done && \
152-
$(PYTHON) -m wheel unpack $$name.whl && \
153-
for name in nvidia_tensorflow-*/; do name=$${name%/}; done && \
154-
newname=$${name/nvidia_tensorflow/tensorflow_gpu} &&\
155-
sed -i s/nvidia_tensorflow/tensorflow_gpu/g $$name/$$name.dist-info/METADATA && \
156-
sed -i s/nvidia_tensorflow/tensorflow_gpu/g $$name/$$name.dist-info/RECORD && \
157-
sed -i s/nvidia_tensorflow/tensorflow_gpu/g $$name/tensorflow_core/tools/pip_package/setup.py && \
158-
pushd $$name && for path in $$name*; do mv $$path $${path/$$name/$$newname}; done && popd && \
159-
$(PYTHON) -m wheel pack $$name && \
160-
$(PIP) install $$newname*.whl && popd && rm -fr $$OLDPWD; \
161-
$(PIP) install "numpy<1.24" -r $$DIRSTACK/requirements.txt; \
162-
else \
163-
$(PIP) install "tensorflow-gpu<2.0" -r requirements.txt; \
164-
fi
89+
# Get CUDA toolkit, including compiler and libraries with dev from NVIDIA channels
90+
# Get CUDNN (needed for Torch, TF etc) from conda-forge.
91+
# CUDA runtime libs will be pulled by `pip` for TF and Torch differently anyway,
92+
# so do _not_ install them here to avoid wasting space.
93+
conda install -c nvidia/label/cuda-12.4.0 cuda-minimal-build \
94+
&& conda clean -a && ldconfig
16595

16696
deps-tf2:
167-
if $(PYTHON) -c 'import sys; print("%u.%u" % (sys.version_info.major, sys.version_info.minor))' | fgrep 3.8; then \
168-
$(PIP) install tensorflow -r requirements.txt; \
169-
else \
170-
$(PIP) install "tensorflow[and-cuda]" -r requirements.txt; \
171-
fi
97+
$(PIP) install "tensorflow[and-cuda]" -r requirements.txt
17298

17399
deps-torch:
174-
$(PIP) install -i https://download.pytorch.org/whl/cu118 torchvision==0.16.2+cu118 torch==2.1.2+cu118 -r requirements.txt
100+
$(PIP) install torch==2.5.1 torchvision==0.20.1 -i https://download.pytorch.org/whl/cu124 -r requirements.txt
175101

176102
# deps-*: always mix core's requirements.txt with additional deps,
177103
# so pip does not ignore the older version reqs,
@@ -180,11 +106,11 @@ deps-torch:
180106
# Dependencies for deployment in an ubuntu/debian linux
181107
deps-ubuntu:
182108
apt-get update
183-
apt-get install -y python3 imagemagick libgeos-dev libxml2-dev libxslt-dev libssl-dev
109+
apt-get install -y bzip2 python3 imagemagick libgeos-dev libxml2-dev libxslt-dev libssl-dev
184110

185111
# Dependencies for deployment via Conda
186112
deps-conda: get-conda
187-
conda install -c conda-forge python==3.8.* imagemagick geos pkgconfig
113+
conda install -c conda-forge python==3.10.* imagemagick geos pkgconfig
188114

189115
# Install test python deps via pip
190116
deps-test:

0 commit comments

Comments
 (0)