Skip to content

Commit 82f2db9

Browse files
dervoetiadwk67
andauthored
feat: build Airflow and Superset from source (#1304)
* wip * add build of UI assets for 3.x * wip * feat: build airflow 2 from source * fix: airflow build * feat: build superset from source * chore: move statsd_exporter to final stage * chore: removed testpatch * chore: changelog * chore: lint fix * chore: use HOME env consistently --------- Co-authored-by: Andrew Kenworthy <[email protected]>
1 parent 0a69d06 commit 82f2db9

File tree

16 files changed

+175
-68
lines changed

16 files changed

+175
-68
lines changed

.github/workflows/build_airflow.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ on:
1717
# bake --product PRODUCT -d | grep -v 'docker buildx bake' | jq '.target | keys[]'
1818
- airflow/**
1919
- vector/**
20-
- stackable-base/**
20+
- stackable-devel/**
2121
- .github/actions/**
2222
- .github/workflows/build_airflow.yaml
2323
- .github/workflows/reusable_build_image.yaml

.github/workflows/build_superset.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ on:
1717
# bake --product PRODUCT -d | grep -v 'docker buildx bake' | jq '.target | keys[]'
1818
- superset/**
1919
- vector/**
20-
- stackable-base/**
20+
- stackable-devel/**
2121
- .github/actions/**
2222
- .github/workflows/build_superset.yaml
2323
- .github/workflows/reusable_build_image.yaml

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ All notable changes to this project will be documented in this file.
3131
- nifi: Add `2.6.0` ([#1293]).
3232
- hive: Add `4.1.0` ([#1295]).
3333
- hbase: Add `2.6.3` ([#1296]).
34+
- airflow,superset: Build from source ([#1304]).
3435

3536
### Changed
3637

@@ -95,6 +96,7 @@ All notable changes to this project will be documented in this file.
9596
[#1296]: https://github.com/stackabletech/docker-images/pull/1296
9697
[#1300]: https://github.com/stackabletech/docker-images/pull/1300
9798
[#1301]: https://github.com/stackabletech/docker-images/pull/1301
99+
[#1304]: https://github.com/stackabletech/docker-images/pull/1304
98100

99101
## [25.7.0] - 2025-07-23
100102

airflow/Dockerfile

Lines changed: 56 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -37,13 +37,13 @@ uv run pytest --disable-warnings
3737
uv build
3838
EOF
3939

40-
FROM local-image/vector AS airflow-build-image
40+
FROM local-image/stackable-devel AS airflow-build-image
4141

4242
ARG PRODUCT_VERSION
43-
ARG SHARED_STATSD_EXPORTER_VERSION
4443
ARG PYTHON_VERSION
4544
ARG TARGETARCH
4645
ARG STACKABLE_USER_UID
46+
ARG NODEJS_VERSION
4747
ARG S3FS_VERSION
4848
ARG CYCLONEDX_BOM_VERSION
4949
ARG UV_VERSION
@@ -54,7 +54,8 @@ ARG UV_VERSION
5454
# Requires implementation of https://github.com/apache/airflow/blob/2.2.5/scripts/docker/install_mysql.sh
5555
ARG AIRFLOW_EXTRAS
5656

57-
RUN microdnf update && \
57+
RUN microdnf module enable -y nodejs:${NODEJS_VERSION} && \
58+
microdnf update && \
5859
microdnf install \
5960
cyrus-sasl-devel \
6061
# Needed by ./configure to build gevent, see snippet [1] at the end of file
@@ -72,6 +73,9 @@ RUN microdnf update && \
7273
python${PYTHON_VERSION}-wheel \
7374
# The airflow odbc provider can compile without the development files (headers and libraries) (see https://github.com/stackabletech/docker-images/pull/683)
7475
unixODBC \
76+
# Needed for Airflow UI assets
77+
npm \
78+
nodejs \
7579
# Needed to modify the SBOM
7680
jq && \
7781
microdnf clean all && \
@@ -81,6 +85,11 @@ COPY airflow/stackable/constraints/${PRODUCT_VERSION}/constraints-python${PYTHON
8185
COPY airflow/stackable/constraints/${PRODUCT_VERSION}/build-constraints-python${PYTHON_VERSION}.txt /tmp/build-constraints.txt
8286
COPY --from=opa-auth-manager-builder /tmp/opa-auth-manager/dist/opa_auth_manager-0.1.0-py3-none-any.whl /tmp/
8387

88+
COPY --chown=${STACKABLE_USER_UID}:0 airflow/stackable/patches/patchable.toml /stackable/src/airflow/stackable/patches/patchable.toml
89+
COPY --chown=${STACKABLE_USER_UID}:0 airflow/stackable/patches/${PRODUCT_VERSION} /stackable/src/airflow/stackable/patches/${PRODUCT_VERSION}
90+
91+
WORKDIR /stackable
92+
8493
RUN <<EOF
8594
python${PYTHON_VERSION} -m venv --system-site-packages /stackable/app
8695

@@ -90,8 +99,44 @@ source /stackable/app/bin/activate
9099
# Also install uv to get support for build constraints
91100
pip install --no-cache-dir --upgrade pip
92101
pip install --no-cache-dir uv==${UV_VERSION}
102+
uv tool install hatch
103+
104+
cd "$(/stackable/patchable --images-repo-root=src checkout airflow ${PRODUCT_VERSION})"
105+
106+
if [ -d "./airflow-core" ]; then
107+
# Airflow 3.x
108+
cd airflow-core/src/airflow/ui
109+
110+
# build front-end assets
111+
npm install -g [email protected]
112+
pnpm install --frozen-lockfile
113+
pnpm run build
114+
115+
# build airflow wheel from airflow root folder
116+
# this picks up the UI assets from the pnpm build, and the dependencies from the root folder
117+
cd ../../..
118+
/root/.local/bin/hatch build -t wheel
119+
# First install the full apache-airflow package to get all dependencies including database drivers
120+
uv pip install --no-cache-dir apache-airflow[${AIRFLOW_EXTRAS}]==${PRODUCT_VERSION} --constraint /tmp/constraints.txt --build-constraints /tmp/build-constraints.txt
121+
# Then install the locally built core wheel to override the core package
122+
uv pip install --no-cache-dir dist/apache_airflow_core-${PRODUCT_VERSION}-py3-none-any.whl[${AIRFLOW_EXTRAS}] --constraint /tmp/constraints.txt --build-constraints /tmp/build-constraints.txt
123+
else
124+
# Airflow 2.x
125+
# build front-end assets
126+
cd airflow/www
127+
npm install -g [email protected]
128+
yarn install --frozen-lockfile
129+
yarn run build
130+
131+
# build airflow wheel from airflow root folder
132+
cd ../..
133+
/root/.local/bin/hatch build -t wheel
134+
# First install the full apache-airflow package to get all dependencies including database drivers
135+
uv pip install --no-cache-dir apache-airflow[${AIRFLOW_EXTRAS}]==${PRODUCT_VERSION} --constraint /tmp/constraints.txt --build-constraints /tmp/build-constraints.txt
136+
# Then install the locally built wheel to override with patched version
137+
uv pip install --no-cache-dir dist/apache_airflow-${PRODUCT_VERSION}-py3-none-any.whl[${AIRFLOW_EXTRAS}] --constraint /tmp/constraints.txt --build-constraints /tmp/build-constraints.txt
138+
fi
93139

94-
uv pip install --no-cache-dir apache-airflow[${AIRFLOW_EXTRAS}]==${PRODUCT_VERSION} --constraint /tmp/constraints.txt --build-constraints /tmp/build-constraints.txt
95140
# Needed for pandas S3 integration to e.g. write and read csv and parquet files to/from S3
96141
uv pip install --no-cache-dir s3fs==${S3FS_VERSION} cyclonedx-bom==${CYCLONEDX_BOM_VERSION}
97142
# Needed for OIDC
@@ -102,6 +147,7 @@ uv pip install --no-cache-dir /tmp/opa_auth_manager-0.1.0-py3-none-any.whl
102147
# Create the SBOM for Airflow
103148
# Important: All `pip install` commands must be above this line, otherwise the SBOM will be incomplete
104149
cyclonedx-py environment --schema-version 1.5 --outfile /tmp/sbom.json
150+
uv pip uninstall cyclonedx-bom
105151

106152
# Break circular dependencies by removing the apache-airflow dependency from the providers
107153
jq '.dependencies |= map(if .ref | test("^apache-airflow-providers-") then
@@ -111,10 +157,6 @@ else
111157
end)' /tmp/sbom.json > /stackable/app/airflow-${PRODUCT_VERSION}.cdx.json
112158
EOF
113159

114-
COPY --from=statsd_exporter-builder /statsd_exporter/statsd_exporter /stackable/statsd_exporter
115-
COPY --from=statsd_exporter-builder /statsd_exporter/statsd_exporter-${SHARED_STATSD_EXPORTER_VERSION}.cdx.json /stackable/statsd_exporter-${SHARED_STATSD_EXPORTER_VERSION}.cdx.json
116-
COPY --from=gitsync-image --chown=${STACKABLE_USER_UID}:0 /git-sync /stackable/git-sync
117-
118160
RUN <<EOF
119161
mkdir -pv /stackable/airflow
120162
mkdir -pv /stackable/airflow/dags
@@ -130,6 +172,7 @@ ARG PYTHON_VERSION
130172
ARG RELEASE_VERSION
131173
ARG TINI_VERSION
132174
ARG TARGETARCH
175+
ARG SHARED_STATSD_EXPORTER_VERSION
133176
ARG STACKABLE_USER_UID
134177

135178
LABEL name="Apache Airflow" \
@@ -146,11 +189,14 @@ ENV PATH=$PATH:/bin:$HOME/app/bin
146189
ENV AIRFLOW_HOME=$HOME/airflow
147190

148191
COPY --from=airflow-build-image --chown=${STACKABLE_USER_UID}:0 /stackable/ ${HOME}/
149-
COPY --from=airflow-build-image --chown=${STACKABLE_USER_UID}:0 /stackable/git-sync ${HOME}/git-sync
150192

151193
COPY --chown=${STACKABLE_USER_UID}:0 airflow/stackable/utils/entrypoint.sh /entrypoint.sh
152194
COPY --chown=${STACKABLE_USER_UID}:0 airflow/stackable/utils/run-airflow.sh /run-airflow.sh
153195

196+
COPY --from=statsd_exporter-builder --chown=${STACKABLE_USER_UID}:0 /statsd_exporter/statsd_exporter ${HOME}/statsd_exporter
197+
COPY --from=statsd_exporter-builder --chown=${STACKABLE_USER_UID}:0 /statsd_exporter/statsd_exporter-${SHARED_STATSD_EXPORTER_VERSION}.cdx.json ${HOME}/statsd_exporter-${SHARED_STATSD_EXPORTER_VERSION}.cdx.json
198+
COPY --from=gitsync-image --chown=${STACKABLE_USER_UID}:0 /git-sync ${HOME}/git-sync
199+
154200
COPY airflow/licenses /licenses
155201

156202
# Update image and install needed packages
@@ -185,6 +231,7 @@ curl -o /usr/bin/tini "https://repo.stackable.tech/repository/packages/tini/tini
185231
chmod a+x /entrypoint.sh
186232
chmod a+x /run-airflow.sh
187233
chmod +x /usr/bin/tini
234+
chmod g=u /stackable/statsd_exporter ${HOME}/statsd_exporter-${SHARED_STATSD_EXPORTER_VERSION}.cdx.json ${HOME}/git-sync
188235
EOF
189236

190237
# ----------------------------------------

airflow/boil-config.toml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
[versions."2.9.3".local-images]
22
"shared/statsd-exporter" = "0.28.0"
33
vector = "0.49.0"
4+
stackable-devel = "1.0.0"
45

56
[versions."2.9.3".build-arguments]
67
python-version = "3.9"
@@ -11,10 +12,12 @@ tini-version = "0.19.0"
1112
uv-version = "0.7.8"
1213
airflow-extras = "async,amazon,celery,cncf.kubernetes,docker,dask,elasticsearch,ftp,grpc,hashicorp,http,ldap,google,google_auth,microsoft.azure,odbc,pandas,postgres,redis,sendgrid,sftp,slack,ssh,statsd,virtualenv,trino"
1314
opa-auth-manager = "airflow-2"
15+
nodejs-version = "20"
1416

1517
[versions."2.10.5".local-images]
1618
"shared/statsd-exporter" = "0.28.0"
1719
vector = "0.49.0"
20+
stackable-devel = "1.0.0"
1821

1922
[versions."2.10.5".build-arguments]
2023
python-version = "3.12"
@@ -25,10 +28,12 @@ tini-version = "0.19.0"
2528
uv-version = "0.7.8"
2629
airflow-extras = "async,amazon,celery,cncf.kubernetes,docker,dask,elasticsearch,ftp,grpc,hashicorp,http,ldap,google,google_auth,microsoft.azure,odbc,pandas,postgres,redis,sendgrid,sftp,slack,ssh,statsd,virtualenv,trino"
2730
opa-auth-manager = "airflow-2"
31+
nodejs-version = "20"
2832

2933
[versions."3.0.1".local-images]
3034
"shared/statsd-exporter" = "0.28.0"
3135
vector = "0.49.0"
36+
stackable-devel = "1.0.0"
3237

3338
[versions."3.0.1".build-arguments]
3439
python-version = "3.12"
@@ -39,10 +44,12 @@ tini-version = "0.19.0"
3944
uv-version = "0.7.8"
4045
airflow-extras = "async,amazon,celery,cncf-kubernetes,docker,elasticsearch,fab,ftp,grpc,hashicorp,http,ldap,google,microsoft-azure,odbc,pandas,postgres,redis,sendgrid,sftp,slack,ssh,statsd,trino"
4146
opa-auth-manager = "airflow-3"
47+
nodejs-version = "20"
4248

4349
[versions."3.0.6".local-images]
4450
"shared/statsd-exporter" = "0.28.0"
4551
vector = "0.49.0"
52+
stackable-devel = "1.0.0"
4653

4754
[versions."3.0.6".build-arguments]
4855
python-version = "3.12"
@@ -53,3 +60,4 @@ tini-version = "0.19.0"
5360
uv-version = "0.7.8"
5461
airflow-extras = "async,amazon,celery,cncf-kubernetes,docker,elasticsearch,fab,ftp,grpc,hashicorp,http,ldap,google,microsoft-azure,odbc,pandas,postgres,redis,sendgrid,sftp,slack,ssh,statsd,trino"
5562
opa-auth-manager = "airflow-3"
63+
nodejs-version = "20"
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
mirror = "https://github.com/stackabletech/airflow.git"
2+
base = "b93c3db6b1641b0840bd15ac7d05bc58ff2cccbf"
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
mirror = "https://github.com/stackabletech/airflow.git"
2+
base = "81845de9d95a733b4eb7826aaabe23ba9813eba3"
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
mirror = "https://github.com/stackabletech/airflow.git"
2+
base = "4ecebc2973587ebaa2cb12482de82e93d15c092f"
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
mirror = "https://github.com/stackabletech/airflow.git"
2+
base = "e965c2e676d85ced65a485d4b2601addc2fd3e97"
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
upstream = "https://github.com/apache/airflow.git"
2+
default-mirror = "https://github.com/stackabletech/airflow.git"

0 commit comments

Comments
 (0)