|
| 1 | +# This is a muti-stage Dockerfile that can be used to build many different types of |
| 2 | +# bundled dependencies for PySpark projects. |
| 3 | +# The `base` stage installs generic tools necessary for packaging. |
| 4 | +# |
| 5 | +# There are `export-` and `build-` stages for the different types of projects. |
| 6 | +# - python-packages - Generic support for Python projects with pyproject.toml |
| 7 | +# - poetry - Support for Poetry projects |
| 8 | +# |
| 9 | +# This Dockerfile is generated automatically as part of the emr-cli tool. |
| 10 | +# Feel free to modify it for your needs, but leave the `build-` and `export-` |
| 11 | +# stages related to your project. |
| 12 | +# |
| 13 | +# To build manually, you can use the following command, assuming |
| 14 | +# the Docker BuildKit backend is enabled. https://docs.docker.com/build/buildkit/ |
| 15 | +# |
| 16 | +# Example for building a poetry project and saving the output to dist/ folder |
| 17 | +# docker build --target export-poetry --output dist . |
| 18 | + |
| 19 | + |
| 20 | +## ---------------------------------------------------------------------------- |
| 21 | +## Base stage for python development |
| 22 | +## ---------------------------------------------------------------------------- |
| 23 | +FROM --platform=linux/amd64 amazonlinux:2 AS base |
| 24 | + |
| 25 | +RUN yum install -y python3 tar gzip |
| 26 | + |
| 27 | +ENV VIRTUAL_ENV=/opt/venv |
| 28 | +RUN python3 -m venv $VIRTUAL_ENV |
| 29 | +ENV PATH="$VIRTUAL_ENV/bin:$PATH" |
| 30 | + |
| 31 | +# EMR 6.x uses Python 3.7 - limit Poetry version to 1.5.1 |
| 32 | +ENV POETRY_VERSION=1.5.1 |
| 33 | +RUN python3 -m pip install --upgrade pip |
| 34 | +RUN curl -sSL https://install.python-poetry.org | python3 - |
| 35 | + |
| 36 | +ENV PATH="$PATH:/root/.local/bin" |
| 37 | + |
| 38 | +WORKDIR /app |
| 39 | + |
| 40 | +COPY . . |
| 41 | + |
| 42 | +# Test stage - installs test dependencies defined in pyproject.toml |
| 43 | +FROM base as test |
| 44 | +RUN python3 -m pip install .[test] |
| 45 | + |
| 46 | + |
| 47 | +## ---------------------------------------------------------------------------- |
| 48 | +## Build and export stages for Poetry Python projects |
| 49 | +## ---------------------------------------------------------------------------- |
| 50 | +# Build stage for poetry |
| 51 | +FROM base as build-poetry |
| 52 | +RUN poetry self add poetry-plugin-bundle && \ |
| 53 | + poetry bundle venv dist/bundle && \ |
| 54 | + tar -czvf dist/pyspark_deps.tar.gz -C dist/bundle . && \ |
| 55 | + rm -rf dist/bundle |
| 56 | + |
| 57 | +FROM scratch as export-poetry |
| 58 | +COPY --from=build-poetry /app/dist/pyspark_deps.tar.gz / |
0 commit comments