Skip to content

Commit 6fa3c6a

Browse files
authored
feat: Add Dockerfile-arm64 to allow docker builds on Apple M1/M2 architecture (#209)
1 parent 0b40ade commit 6fa3c6a

File tree

2 files changed

+100
-0
lines changed

2 files changed

+100
-0
lines changed

Dockerfile-arm64

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
FROM lukemathwalker/cargo-chef:latest-rust-1.75-bookworm AS chef
2+
3+
WORKDIR /usr/src
4+
5+
ENV SCCACHE=0.5.4
6+
ENV RUSTC_WRAPPER=/usr/local/bin/sccache
7+
8+
# Donwload and configure sccache
9+
RUN curl -fsSL https://github.com/mozilla/sccache/releases/download/v$SCCACHE/sccache-v$SCCACHE-x86_64-unknown-linux-musl.tar.gz | tar -xzv --strip-components=1 -C /usr/local/bin sccache-v$SCCACHE-x86_64-unknown-linux-musl/sccache && \
10+
chmod +x /usr/local/bin/sccache
11+
12+
FROM chef AS planner
13+
14+
COPY backends backends
15+
COPY core core
16+
COPY router router
17+
COPY Cargo.toml ./
18+
COPY Cargo.lock ./
19+
20+
RUN cargo chef prepare --recipe-path recipe.json
21+
22+
FROM chef AS builder
23+
24+
ARG GIT_SHA
25+
ARG DOCKER_LABEL
26+
27+
# sccache specific variables
28+
ARG ACTIONS_CACHE_URL
29+
ARG ACTIONS_RUNTIME_TOKEN
30+
ARG SCCACHE_GHA_ENABLED
31+
32+
RUN echo "int mkl_serv_intel_cpu_true() {return 1;}" > fakeintel.c && \
33+
gcc -shared -fPIC -o libfakeintel.so fakeintel.c
34+
35+
COPY --from=planner /usr/src/recipe.json recipe.json
36+
37+
RUN cargo chef cook --release --features candle --no-default-features --recipe-path recipe.json && sccache -s
38+
39+
COPY backends backends
40+
COPY core core
41+
COPY router router
42+
COPY Cargo.toml ./
43+
COPY Cargo.lock ./
44+
45+
FROM builder as http-builder
46+
47+
RUN cargo build --release --bin text-embeddings-router -F candle -F http --no-default-features && sccache -s
48+
49+
FROM builder as grpc-builder
50+
51+
RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \
52+
curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \
53+
unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \
54+
unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \
55+
rm -f $PROTOC_ZIP
56+
57+
COPY proto proto
58+
59+
RUN cargo build --release --bin text-embeddings-router -F grpc -F candle --no-default-features && sccache -s
60+
61+
FROM debian:bookworm-slim as base
62+
63+
COPY --from=builder /usr/src/libfakeintel.so /usr/local/libfakeintel.so
64+
65+
ENV HUGGINGFACE_HUB_CACHE=/data \
66+
PORT=80 \
67+
MKL_ENABLE_INSTRUCTIONS=AVX512_E4 \
68+
RAYON_NUM_THREADS=8 \
69+
LD_PRELOAD=/usr/local/libfakeintel.so \
70+
LD_LIBRARY_PATH=/usr/local/lib
71+
72+
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
73+
libomp-dev \
74+
ca-certificates \
75+
libssl-dev \
76+
curl \
77+
&& rm -rf /var/lib/apt/lists/*
78+
79+
80+
FROM base as grpc
81+
82+
COPY --from=grpc-builder /usr/src/target/release/text-embeddings-router /usr/local/bin/text-embeddings-router
83+
84+
ENTRYPOINT ["text-embeddings-router"]
85+
CMD ["--json-output"]
86+
87+
FROM base
88+
89+
COPY --from=http-builder /usr/src/target/release/text-embeddings-router /usr/local/bin/text-embeddings-router
90+
91+
ENTRYPOINT ["text-embeddings-router"]
92+
CMD ["--json-output"]

README.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ length of 512 tokens:
4040
- [gRPC](#grpc)
4141
- [Local Install](#local-install)
4242
- [Docker Build](#docker-build)
43+
- [Apple M1/M2 Arm](#apple-m1m2-arm64-architectures)
4344
- [Examples](#examples)
4445

4546
Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence
@@ -476,6 +477,13 @@ runtime_compute_cap=90
476477
docker build . -f Dockerfile-cuda --build-arg CUDA_COMPUTE_CAP=$runtime_compute_cap
477478
```
478479

480+
### Apple M1/M2 arm64 architectures
481+
#### DISCLAIMER
482+
As explained here [MPS-Ready, ARM64 Docker Image](https://github.com/pytorch/pytorch/issues/81224), Metal / MPS is not supported via Docker. As such inference will be CPU bound and most likely pretty slow when using this docker image on an M1/M2 ARM CPU.
483+
```
484+
docker build . -f Dockerfile-arm64 --platform=linux/arm64
485+
```
486+
479487
## Examples
480488
- [Set up an Inference Endpoint with TEI](https://huggingface.co/learn/cookbook/automatic_embedding_tei_inference_endpoints)
481489
- [RAG containers with TEI](https://github.com/plaggy/rag-containers)

0 commit comments

Comments
 (0)