Skip to content

Commit cf70c86

Browse files
authored
Build from rocky base image (#665)
* build from Rocky linux unstructured base image * add qemu for arm * comment out push while testing * remove quotes * Add arch * bump login action * add ARCH env var to the push step * run only subset of tests on arm image Tests on emulated arm are extremely slow. Likelyhood of something breaking in arm image only, is minimal. I say that knowing I likely just jinxed us. * re-enable push from main * add a dnf cleanup * version bump * move from dev to minor version bump
1 parent cd9fd9b commit cf70c86

File tree

4 files changed

+49
-58
lines changed

4 files changed

+49
-58
lines changed

.github/workflows/docker-publish.yml

Lines changed: 33 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,10 @@ jobs:
2121
id: set_short_sha
2222
run: echo "short_sha=$(echo ${{ github.sha }} | cut -c1-7)" >> $GITHUB_OUTPUT
2323

24-
build-amd:
24+
build-images:
25+
strategy:
26+
matrix:
27+
docker-platform: ["linux/arm64", "linux/amd64"]
2528
runs-on: ubuntu-latest
2629
needs: set-short-sha
2730
env:
@@ -31,64 +34,41 @@ jobs:
3134
- name: Checkout code
3235
uses: actions/checkout@v3
3336
- name: Login to Quay.io
34-
uses: docker/login-action@v1
37+
uses: docker/login-action@v2
3538
with:
3639
registry: quay.io
3740
username: ${{ secrets.QUAY_IO_ROBOT_USERNAME }}
3841
password: ${{ secrets.QUAY_IO_ROBOT_TOKEN }}
39-
- name: Build AMD image
42+
- name: Build images
4043
run: |
41-
DOCKER_BUILDKIT=1 docker buildx build --platform=linux/amd64 --load \
44+
ARCH=$(cut -d "/" -f2 <<< ${{ matrix.docker-platform }})
45+
DOCKER_BUILDKIT=1 docker buildx build --platform=$ARCH --load \
4246
--build-arg PIP_VERSION=$PIP_VERSION \
4347
--build-arg BUILDKIT_INLINE_CACHE=1 \
4448
--progress plain \
45-
--cache-from $DOCKER_BUILD_REPOSITORY:amd \
46-
-t $DOCKER_BUILD_REPOSITORY:amd-$SHORT_SHA .
47-
- name: Test AMD image
48-
run: |
49-
DOCKER_PLATFORM="linux/amd64" DOCKER_IMAGE="$DOCKER_BUILD_REPOSITORY:amd-$SHORT_SHA" make docker-test
50-
DOCKER_IMAGE=$DOCKER_BUILD_REPOSITORY:amd-$SHORT_SHA make docker-smoke-test
51-
- name: Push AMD image
52-
run: |
53-
# write to the build repository to cache for the publish-images job
54-
docker push $DOCKER_BUILD_REPOSITORY:amd-$SHORT_SHA
55-
build-arm:
56-
runs-on: ubuntu-latest
57-
needs: set-short-sha
58-
env:
59-
SHORT_SHA: ${{ needs.set-short-sha.outputs.short_sha }}
60-
steps:
61-
- uses: docker/setup-buildx-action@v1
62-
- name: Checkout code
63-
uses: actions/checkout@v3
64-
- name: Login to Quay.io
65-
uses: docker/login-action@v1
66-
with:
67-
registry: quay.io
68-
username: ${{ secrets.QUAY_IO_ROBOT_USERNAME }}
69-
password: ${{ secrets.QUAY_IO_ROBOT_TOKEN }}
49+
--cache-from $DOCKER_BUILD_REPOSITORY:$ARCH \
50+
-t $DOCKER_BUILD_REPOSITORY:$ARCH-$SHORT_SHA .
7051
- name: Set up QEMU
7152
uses: docker/setup-qemu-action@v2
72-
- name: Build ARM image
73-
run: |
74-
DOCKER_BUILDKIT=1 docker buildx build --platform=linux/arm64 --load \
75-
--build-arg PIP_VERSION=$PIP_VERSION \
76-
--build-arg BUILDKIT_INLINE_CACHE=1 \
77-
--progress plain \
78-
--cache-from $DOCKER_BUILD_REPOSITORY:arm \
79-
-t $DOCKER_BUILD_REPOSITORY:arm-$SHORT_SHA .
80-
- name: Test ARM image
53+
- name: Test images
8154
run: |
82-
# only run a subset of tests on ARM, since they take a long time with emulation
83-
DOCKER_PLATFORM="linux/arm64" DOCKER_IMAGE="$DOCKER_BUILD_REPOSITORY:arm-$SHORT_SHA" make docker-test TEST_NAME=partition/test_text.py
84-
DOCKER_IMAGE=$DOCKER_BUILD_REPOSITORY:arm-$SHORT_SHA make docker-smoke-test
85-
- name: Push ARM image
55+
ARCH=$(cut -d "/" -f2 <<< ${{ matrix.docker-platform }})
56+
if [ "$ARCH" = "amd64" ]; then
57+
DOCKER_PLATFORM="${{ matrix.docker-platform }}" DOCKER_IMAGE="$DOCKER_BUILD_REPOSITORY:$ARCH-$SHORT_SHA" \
58+
make docker-test
59+
else
60+
DOCKER_PLATFORM="${{ matrix.docker-platform }}" DOCKER_IMAGE="$DOCKER_BUILD_REPOSITORY:$ARCH-$SHORT_SHA" \
61+
make docker-test TEST_NAME=partition/test_text.py
62+
fi
63+
DOCKER_IMAGE=$DOCKER_BUILD_REPOSITORY:$ARCH-$SHORT_SHA make docker-smoke-test
64+
- name: Push images
8665
run: |
8766
# write to the build repository to cache for the publish-images job
88-
docker push $DOCKER_BUILD_REPOSITORY:arm-$SHORT_SHA
67+
ARCH=$(cut -d "/" -f2 <<< ${{ matrix.docker-platform }})
68+
docker push "$DOCKER_BUILD_REPOSITORY:$ARCH-$SHORT_SHA"
8969
publish-images:
9070
runs-on: ubuntu-latest
91-
needs: [set-short-sha, build-amd, build-arm]
71+
needs: [set-short-sha, build-images]
9272
env:
9373
SHORT_SHA: ${{ needs.set-short-sha.outputs.short_sha }}
9474
steps:
@@ -103,25 +83,24 @@ jobs:
10383
password: ${{ secrets.QUAY_IO_ROBOT_TOKEN }}
10484
- name: Pull AMD image
10585
run: |
106-
docker pull $DOCKER_BUILD_REPOSITORY:amd-$SHORT_SHA
86+
docker pull $DOCKER_BUILD_REPOSITORY:amd64-$SHORT_SHA
10787
- name: Pull ARM image
10888
run: |
109-
docker pull $DOCKER_BUILD_REPOSITORY:arm-$SHORT_SHA
89+
docker pull $DOCKER_BUILD_REPOSITORY:arm64-$SHORT_SHA
11090
- name: Push latest build tags for AMD and ARM
11191
run: |
11292
# these are used to construct the final manifest but also cache-from in subsequent runs
113-
docker tag $DOCKER_BUILD_REPOSITORY:amd-$SHORT_SHA $DOCKER_BUILD_REPOSITORY:amd
114-
docker push $DOCKER_BUILD_REPOSITORY:amd
115-
docker tag $DOCKER_BUILD_REPOSITORY:arm-$SHORT_SHA $DOCKER_BUILD_REPOSITORY:arm
116-
docker push $DOCKER_BUILD_REPOSITORY:arm
93+
docker tag $DOCKER_BUILD_REPOSITORY:amd64-$SHORT_SHA $DOCKER_BUILD_REPOSITORY:amd64
94+
docker push $DOCKER_BUILD_REPOSITORY:amd64
95+
docker tag $DOCKER_BUILD_REPOSITORY:arm64-$SHORT_SHA $DOCKER_BUILD_REPOSITORY:arm64
96+
docker push $DOCKER_BUILD_REPOSITORY:arm64
11797
- name: Push multiarch manifest
11898
run: |
119-
docker manifest create ${DOCKER_REPOSITORY}:latest $DOCKER_BUILD_REPOSITORY:amd $DOCKER_BUILD_REPOSITORY:arm
99+
docker manifest create ${DOCKER_REPOSITORY}:latest $DOCKER_BUILD_REPOSITORY:amd64 $DOCKER_BUILD_REPOSITORY:amd64
120100
docker manifest push $DOCKER_REPOSITORY:latest
121-
docker manifest create ${DOCKER_REPOSITORY}:$SHORT_SHA $DOCKER_BUILD_REPOSITORY:amd $DOCKER_BUILD_REPOSITORY:arm
101+
docker manifest create ${DOCKER_REPOSITORY}:$SHORT_SHA $DOCKER_BUILD_REPOSITORY:arm64 $DOCKER_BUILD_REPOSITORY:arm64
122102
docker manifest push $DOCKER_REPOSITORY:$SHORT_SHA
123103
VERSION=$(grep -Po '(?<=__version__ = ")[^"]*' unstructured/__version__.py)
124-
docker manifest create ${DOCKER_REPOSITORY}:$VERSION $DOCKER_BUILD_REPOSITORY:amd $DOCKER_BUILD_REPOSITORY:arm
104+
docker manifest create ${DOCKER_REPOSITORY}:$VERSION $DOCKER_BUILD_REPOSITORY:amd64 $DOCKER_BUILD_REPOSITORY:arm64
125105
docker manifest push $DOCKER_REPOSITORY:$VERSION
126106
127-

CHANGELOG.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,13 @@
1+
## 0.7.1
2+
3+
### Enhancements
4+
5+
* Builds from Unstructured base image, built off of Rocky Linux 8.7, this resolves almost all CVE's in the image.
6+
7+
### Features
8+
9+
### Fixes
10+
111
## 0.7.0
212

313
### Enhancements

Dockerfile

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
# syntax=docker/dockerfile:experimental
2-
3-
FROM quay.io/unstructured-io/base-images:centos7.9-1
2+
FROM quay.io/unstructured-io/base-images:rocky8.7
43

54
ARG PIP_VERSION
65

@@ -16,6 +15,7 @@ ENV PATH="/home/usr/.local/bin:${PATH}"
1615
COPY requirements requirements
1716

1817
RUN python3.8 -m pip install pip==${PIP_VERSION} && \
18+
dnf -y groupinstall "Development Tools" && \
1919
pip install --no-cache -r requirements/base.txt && \
2020
pip install --no-cache -r requirements/test.txt && \
2121
pip install --no-cache -r requirements/huggingface.txt && \
@@ -29,7 +29,9 @@ RUN python3.8 -m pip install pip==${PIP_VERSION} && \
2929
pip install --no-cache -r requirements/ingest-slack.txt && \
3030
pip install --no-cache -r requirements/ingest-wikipedia.txt && \
3131
pip install --no-cache -r requirements/local-inference.txt && \
32-
scl enable devtoolset-9 bash
32+
pip install --no-cache "detectron2@git+https://github.com/facebookresearch/detectron2.git@e2ce8dc#egg=detectron2" && \
33+
dnf -y groupremove "Development Tools" && \
34+
dnf clean all
3335

3436
COPY example-docs example-docs
3537
COPY unstructured unstructured

unstructured/__version__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.7.0" # pragma: no cover
1+
__version__ = "0.7.1" # pragma: no cover

0 commit comments

Comments
 (0)