feat: Add Dockerfile-arm64 to allow docker builds on Apple M1/M2 architecture (#209)

iandoe · web-flow · commit 6fa3c6a5c0bf · 2024-03-21T11:24:44.000+01:00
diff --git a/Dockerfile-arm64 b/Dockerfile-arm64
@@ -0,0 +1,92 @@
+FROM lukemathwalker/cargo-chef:latest-rust-1.75-bookworm AS chef
+
+WORKDIR /usr/src
+
+ENV SCCACHE=0.5.4
+ENV RUSTC_WRAPPER=/usr/local/bin/sccache
+
+# Donwload and configure sccache
+RUN curl -fsSL https://github.com/mozilla/sccache/releases/download/v$SCCACHE/sccache-v$SCCACHE-x86_64-unknown-linux-musl.tar.gz | tar -xzv --strip-components=1 -C /usr/local/bin sccache-v$SCCACHE-x86_64-unknown-linux-musl/sccache && \
+    chmod +x /usr/local/bin/sccache
+
+FROM chef AS planner
+
+COPY backends backends
+COPY core core
+COPY router router
+COPY Cargo.toml ./
+COPY Cargo.lock ./
+
+RUN cargo chef prepare  --recipe-path recipe.json
+
+FROM chef AS builder
+
+ARG GIT_SHA
+ARG DOCKER_LABEL
+
+# sccache specific variables
+ARG ACTIONS_CACHE_URL
+ARG ACTIONS_RUNTIME_TOKEN
+ARG SCCACHE_GHA_ENABLED
+
+RUN echo "int mkl_serv_intel_cpu_true() {return 1;}" > fakeintel.c && \
+    gcc -shared -fPIC -o libfakeintel.so fakeintel.c
+
+COPY --from=planner /usr/src/recipe.json recipe.json
+
+RUN cargo chef cook --release --features candle --no-default-features --recipe-path recipe.json && sccache -s
+
+COPY backends backends
+COPY core core
+COPY router router
+COPY Cargo.toml ./
+COPY Cargo.lock ./
+
+FROM builder as http-builder
+
+RUN cargo build --release --bin text-embeddings-router -F candle -F http --no-default-features && sccache -s
+
+FROM builder as grpc-builder
+
+RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \
+    curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \
+    unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \
+    unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \
+    rm -f $PROTOC_ZIP
+
+COPY proto proto
+
+RUN cargo build --release --bin text-embeddings-router -F grpc -F candle --no-default-features && sccache -s
+
+FROM debian:bookworm-slim as base
+
+COPY --from=builder /usr/src/libfakeintel.so /usr/local/libfakeintel.so
+
+ENV HUGGINGFACE_HUB_CACHE=/data \
+    PORT=80 \
+    MKL_ENABLE_INSTRUCTIONS=AVX512_E4 \
+    RAYON_NUM_THREADS=8 \
+    LD_PRELOAD=/usr/local/libfakeintel.so \
+    LD_LIBRARY_PATH=/usr/local/lib
+
+RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
+    libomp-dev \
+    ca-certificates \
+    libssl-dev \
+    curl \
+    && rm -rf /var/lib/apt/lists/*
+
+
+FROM base as grpc
+
+COPY --from=grpc-builder /usr/src/target/release/text-embeddings-router /usr/local/bin/text-embeddings-router
+
+ENTRYPOINT ["text-embeddings-router"]
+CMD ["--json-output"]
+
+FROM base
+
+COPY --from=http-builder /usr/src/target/release/text-embeddings-router /usr/local/bin/text-embeddings-router
+
+ENTRYPOINT ["text-embeddings-router"]
+CMD ["--json-output"]
diff --git a/README.md b/README.md
@@ -40,6 +40,7 @@ length of 512 tokens:
     - [gRPC](#grpc)
 - [Local Install](#local-install)
 - [Docker Build](#docker-build)
+    - [Apple M1/M2 Arm](#apple-m1m2-arm64-architectures)
 - [Examples](#examples)
 
 Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence
@@ -476,6 +477,13 @@ runtime_compute_cap=90
 docker build . -f Dockerfile-cuda --build-arg CUDA_COMPUTE_CAP=$runtime_compute_cap
 ```
 
+### Apple M1/M2 arm64 architectures
+#### DISCLAIMER
+As explained here [MPS-Ready, ARM64 Docker Image](https://github.com/pytorch/pytorch/issues/81224), Metal / MPS is not supported via Docker. As such inference will be CPU bound and most likely pretty slow when using this docker image on an M1/M2 ARM CPU.
+```
+docker build . -f Dockerfile-arm64 --platform=linux/arm64
+```
+
 ## Examples
 - [Set up an Inference Endpoint with TEI](https://huggingface.co/learn/cookbook/automatic_embedding_tei_inference_endpoints)
 - [RAG containers with TEI](https://github.com/plaggy/rag-containers)