Skip to content

Commit adbe478

Browse files
committed
docker: add arrow/parquet support
Signed-off-by: Arbin <[email protected]>
1 parent ddfef36 commit adbe478

File tree

3 files changed

+126
-13
lines changed

3 files changed

+126
-13
lines changed

dockerfiles/Dockerfile

Lines changed: 57 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,10 @@ FROM multiarch/qemu-user-static:x86_64-aarch64 AS qemu-arm64
2121

2222
FROM debian:trixie-slim AS builder-base
2323

24+
# EXTRA_CMAKE_FLAGS can be used to enable optional features like Parquet
25+
ARG EXTRA_CMAKE_FLAGS
26+
ENV EXTRA_CMAKE_FLAGS=${EXTRA_CMAKE_FLAGS}
27+
2428
COPY --from=qemu-arm32 /usr/bin/qemu-arm-static /usr/bin/
2529
COPY --from=qemu-arm64 /usr/bin/qemu-aarch64-static /usr/bin/
2630

@@ -34,7 +38,7 @@ RUN mkdir -p /fluent-bit/bin /fluent-bit/etc /fluent-bit/log
3438

3539
ENV DEBIAN_FRONTEND=noninteractive
3640

37-
# hadolint ignore=DL3008
41+
# hadolint ignore=DL3008,SC2015
3842
RUN apt-get update && \
3943
apt-get install -y --no-install-recommends \
4044
build-essential \
@@ -54,6 +58,13 @@ RUN apt-get update && \
5458
flex \
5559
bison \
5660
libyaml-dev \
61+
&& if echo "${EXTRA_CMAKE_FLAGS}" | grep -q "FLB_PARQUET_ENCODER=On"; then \
62+
curl -fsSL https://packages.apache.org/artifactory/arrow/debian/apache-arrow-apt-source-latest-trixie.deb -o apache-arrow-apt-source.deb && \
63+
apt-get install -y -V ./apache-arrow-apt-source.deb && \
64+
apt-get update && \
65+
apt-get install -y -V libarrow-dev libparquet-dev && \
66+
rm -f apache-arrow-apt-source.deb; \
67+
fi \
5768
&& apt-get satisfy -y cmake "cmake (<< 4.0)" \
5869
&& apt-get clean \
5970
&& rm -rf /var/lib/apt/lists/*
@@ -122,12 +133,23 @@ FROM debian:trixie-slim AS deb-extractor
122133
COPY --from=qemu-arm32 /usr/bin/qemu-arm-static /usr/bin/
123134
COPY --from=qemu-arm64 /usr/bin/qemu-aarch64-static /usr/bin/
124135

136+
ARG EXTRA_CMAKE_FLAGS
137+
ENV EXTRA_CMAKE_FLAGS=${EXTRA_CMAKE_FLAGS}
138+
125139
# We download all debs locally then extract them into a directory we can use as the root for distroless.
126140
# We also include some extra handling for the status files that some tooling uses for scanning, etc.
127141
WORKDIR /tmp
128142
SHELL ["/bin/bash", "-o", "pipefail", "-c"]
143+
# hadolint ignore=SC2015,DL3008
129144
RUN apt-get update && \
130-
apt-get download \
145+
if echo "${EXTRA_CMAKE_FLAGS}" | grep -q "FLB_PARQUET_ENCODER=On"; then \
146+
apt-get install -y --no-install-recommends curl ca-certificates && \
147+
curl -fsSL https://packages.apache.org/artifactory/arrow/debian/apache-arrow-apt-source-latest-trixie.deb -o apache-arrow-apt-source.deb && \
148+
apt-get install -y -V ./apache-arrow-apt-source.deb && \
149+
apt-get update && \
150+
rm -f apache-arrow-apt-source.deb; \
151+
fi \
152+
&& apt-get download \
131153
libssl3t64 \
132154
libcurl4t64 \
133155
libnghttp2-14 \
@@ -167,6 +189,19 @@ RUN apt-get update && \
167189
libyaml-0-2 \
168190
libcap2 \
169191
libldap2 \
192+
# MAINTAINER: Arrow SONAME format: libarrow{major*100+minor*10} (e.g., 22.0.0 -> libarrow2200)
193+
# Only update when upgrading Arrow version or if packages become unavailable in apt source.
194+
&& if echo "${EXTRA_CMAKE_FLAGS}" | grep -q "FLB_PARQUET_ENCODER=On"; then \
195+
apt-get download \
196+
libarrow2200 \
197+
libparquet2200 \
198+
libsnappy1v5 \
199+
libabsl20240722 \
200+
libbz2-1.0 \
201+
libprotobuf32t64 \
202+
libthrift-0.19.0t64 \
203+
libxml2; \
204+
fi \
170205
&& \
171206
mkdir -p /dpkg/var/lib/dpkg/status.d/ && \
172207
for deb in *.deb; do \
@@ -231,9 +266,12 @@ LABEL description="Fluent Bit multi-architecture debug container image" \
231266

232267
COPY --from=qemu-arm32 /usr/bin/qemu-arm-static /usr/bin/
233268
COPY --from=qemu-arm64 /usr/bin/qemu-aarch64-static /usr/bin/
269+
270+
ARG EXTRA_CMAKE_FLAGS
271+
ENV EXTRA_CMAKE_FLAGS=${EXTRA_CMAKE_FLAGS}
234272
ENV DEBIAN_FRONTEND=noninteractive
235273

236-
# hadolint ignore=DL3008
274+
# hadolint ignore=DL3008,SC2015
237275
RUN apt-get update && \
238276
apt-get install -y --no-install-recommends \
239277
libssl3t64 \
@@ -263,6 +301,22 @@ RUN apt-get update && \
263301
htop atop strace iotop sysstat ncdu logrotate hdparm pciutils psmisc tree pv \
264302
make tar flex bison \
265303
libssl-dev libsasl2-dev libsystemd-dev zlib1g-dev libpq-dev libyaml-dev postgresql-server-dev-all \
304+
# MAINTAINER: Keep Arrow packages in sync with deb-extractor stage.
305+
&& if echo "${EXTRA_CMAKE_FLAGS}" | grep -q "FLB_PARQUET_ENCODER=On"; then \
306+
curl -fsSL https://packages.apache.org/artifactory/arrow/debian/apache-arrow-apt-source-latest-trixie.deb -o apache-arrow-apt-source.deb && \
307+
apt-get install -y -V ./apache-arrow-apt-source.deb && \
308+
apt-get update && \
309+
apt-get install -y --no-install-recommends \
310+
libarrow2200 \
311+
libparquet2200 \
312+
libsnappy1v5 \
313+
libabsl20240722 \
314+
libbz2-1.0 \
315+
libprotobuf32t64 \
316+
libthrift-0.19.0t64 \
317+
libxml2 && \
318+
rm -f apache-arrow-apt-source.deb; \
319+
fi \
266320
&& apt-get satisfy -y cmake "cmake (<< 4.0)" \
267321
&& apt-get clean \
268322
&& rm -rf /var/lib/apt/lists/*

dockerfiles/Dockerfile.windows

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@ ARG WINDOWS_VERSION=ltsc2025
1616
# Builder Image - Windows Server Core
1717
FROM mcr.microsoft.com/windows/servercore:$WINDOWS_VERSION AS builder-base
1818

19+
# Unified build argument for extra CMake flags (e.g., -DFLB_PARQUET_ENCODER=On)
20+
ARG EXTRA_CMAKE_FLAGS=""
21+
1922
SHELL ["powershell", "-Command", "$ErrorActionPreference = 'Stop'; $ProgressPreference = 'SilentlyContinue';"]
2023

2124
# Clean up any existing temp files and create fresh temp directory
@@ -154,6 +157,16 @@ RUN vcpkg install --recurse libyaml --triplet x64-windows-static; `
154157
Remove-Item -Path $env:TEMP\* -Recurse -Force -ErrorAction SilentlyContinue; `
155158
Remove-Item -Path C:\dev\vcpkg\buildtrees -Recurse -Force -ErrorAction SilentlyContinue;
156159

160+
# Install Apache Arrow with Parquet support if enabled via EXTRA_CMAKE_FLAGS
161+
RUN if ($env:EXTRA_CMAKE_FLAGS -like '*-DFLB_PARQUET_ENCODER=On*') { `
162+
Write-Host 'Installing Apache Arrow with Parquet support...'; `
163+
vcpkg install --recurse arrow[parquet] --triplet x64-windows-static; `
164+
Remove-Item -Path $env:TEMP\* -Recurse -Force -ErrorAction SilentlyContinue; `
165+
Remove-Item -Path C:\dev\vcpkg\buildtrees -Recurse -Force -ErrorAction SilentlyContinue; `
166+
} else { `
167+
Write-Host 'Skipping Apache Arrow installation (FLB_PARQUET_ENCODER not in EXTRA_CMAKE_FLAGS)'; `
168+
}
169+
157170
# Final vcpkg cleanup to remove all build artifacts and save space
158171
RUN Remove-Item -Path C:\dev\vcpkg\downloads -Recurse -Force -ErrorAction SilentlyContinue; `
159172
Remove-Item -Path C:\dev\vcpkg\buildtrees -Recurse -Force -ErrorAction SilentlyContinue; `
@@ -167,11 +180,20 @@ WORKDIR /src/build
167180
COPY . /src/
168181

169182
ARG BUILD_PARALLEL=1
183+
ARG EXTRA_CMAKE_FLAGS
184+
185+
# Optional feature detection: Add cmake paths based on EXTRA_CMAKE_FLAGS
170186
SHELL ["cmd", "/S", "/C"]
171187
RUN call "%MSVS_HOME%\VC\Auxiliary\Build\vcvars64.bat" && `
188+
set "OPTIONAL_CMAKE_ARGS=" && `
189+
(echo %EXTRA_CMAKE_FLAGS% | findstr /C:"-DFLB_PARQUET_ENCODER=On" >nul && ( `
190+
set "OPTIONAL_CMAKE_ARGS=%OPTIONAL_CMAKE_ARGS% -DArrow_DIR=C:\dev\vcpkg\packages\arrow_x64-windows-static\share\arrow -DParquet_DIR=C:\dev\vcpkg\packages\arrow_x64-windows-static\share\parquet" `
191+
) || cd .) & `
172192
cmake -G "NMake Makefiles" `
173-
-DOPENSSL_ROOT_DIR='C:\dev\vcpkg\packages\openssl_x64-windows-static' `
174-
-DFLB_LIBYAML_DIR='C:\dev\vcpkg\packages\libyaml_x64-windows-static' `
193+
-DOPENSSL_ROOT_DIR=C:\dev\vcpkg\packages\openssl_x64-windows-static `
194+
-DFLB_LIBYAML_DIR=C:\dev\vcpkg\packages\libyaml_x64-windows-static `
195+
%OPTIONAL_CMAKE_ARGS% `
196+
%EXTRA_CMAKE_FLAGS% `
175197
-DFLB_SIMD=On `
176198
-DCMAKE_BUILD_TYPE=Release `
177199
-DFLB_SHARED_LIB=Off `

dockerfiles/README.md

Lines changed: 45 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,24 +21,31 @@ With QEMU set up and buildkit support, you can build all targets in one simple c
2121
To set up for Ubuntu 20.04 development PC as an example:
2222

2323
1. Add QEMU: https://askubuntu.com/a/1369504
24+
2425
```
2526
sudo add-apt-repository ppa:jacob/virtualisation
2627
sudo apt-get update && sudo apt-get install qemu qemu-user qemu-user-static
2728
```
29+
2830
2. Install buildkit: https://docs.docker.com/buildx/working-with-buildx/#install
31+
2932
```
3033
wget https://github.com/docker/buildx/releases/download/v0.7.1/buildx-v0.7.1.linux-amd64
3134
mv buildx-v0.7.1.linux-amd64 ~/.docker/cli-plugins/docker-buildx
3235
chmod a+x ~/.docker/cli-plugins/docker-buildx
3336
```
37+
3438
3. Configure and use: https://stackoverflow.com/a/60667468
39+
3540
```
3641
docker run --rm --privileged multiarch/qemu-user-static --reset -p yes
3742
docker buildx rm builder
3843
docker buildx create --name builder --use
3944
docker buildx inspect --bootstrap
4045
```
46+
4147
4. Build Fluent Bit from the **root of the Git repo (not from this directory)**:
48+
4249
```
4350
docker buildx build --platform "linux/amd64,linux/arm64,linux/arm/v7,linux/s390x" --target=production -f dockerfiles/Dockerfile .
4451
```
@@ -47,10 +54,25 @@ docker buildx build --platform "linux/amd64,linux/arm64,linux/arm/v7,linux/s390x
4754

4855
1. Checkout the branch you want, e.g. 1.8 for 1.8.X containers.
4956
2. Build Fluent Bit from the **root of the Git repo (not from this directory)**:
57+
5058
```
5159
$ docker build -t fluent/fluent-bit --target=production -f dockerfiles/Dockerfile .
5260
```
61+
62+
### Optional: Build with Parquet encoder support
63+
64+
To enable Parquet encoding support (requires Apache Arrow/Parquet libraries), use the `EXTRA_CMAKE_FLAGS` build argument:
65+
66+
```
67+
$ docker build -t fluent/fluent-bit --target=production \
68+
--build-arg EXTRA_CMAKE_FLAGS="-DFLB_PARQUET_ENCODER=On" \
69+
-f dockerfiles/Dockerfile .
70+
```
71+
72+
Note: Enabling Parquet support will increase the image size by approximately 49MB due to Apache Arrow dependencies.
73+
5374
3. Test the container.
75+
5476
```
5577
$ docker run --rm -it fluent/fluent-bit:latest
5678
```
@@ -90,10 +112,11 @@ ______ _ _ ______ _ _ ___ _____
90112
## ghcr.io topology
91113

92114
Containers are "staged" prior to release in the following ways to `ghcr.io`:
93-
* `ghcr.io/fluent/fluent-bit` - official releases, identical to DockerHub
94-
* `ghcr.io/fluent/fluent-bit/staging` - all architectures staging images used for testing prior to release
95-
* `ghcr.io/fluent/fluent-bit/master` - x86_64/AMD64 only images built on each push to master, used for integration tests
96-
* `ghcr.io/fluent/fluent-bit/pr-X` - x86_64/AMD64 only PR images where `X` is the PR number
115+
116+
- `ghcr.io/fluent/fluent-bit` - official releases, identical to DockerHub
117+
- `ghcr.io/fluent/fluent-bit/staging` - all architectures staging images used for testing prior to release
118+
- `ghcr.io/fluent/fluent-bit/master` - x86_64/AMD64 only images built on each push to master, used for integration tests
119+
- `ghcr.io/fluent/fluent-bit/pr-X` - x86_64/AMD64 only PR images where `X` is the PR number
97120

98121
## Windows
99122

@@ -109,20 +132,34 @@ More information is available at:
109132
In addition, metadata as defined in OCI image spec annotations, is leveraged in the generated image. This is the reason for the additional `--build-arg` parameters.
110133

111134
### Minimum set of build-args
135+
112136
```powershell
113137
docker build --no-cache `
114138
--build-arg WINDOWS_VERSION=ltsc2019 `
115139
-t fluent/fluent-bit:master-windows -f ./dockerfiles/Dockerfile.windows .
116140
```
117141

142+
### Optional: Build with Parquet encoder support
143+
144+
To enable Parquet encoding support on Windows, add the `EXTRA_CMAKE_FLAGS` build argument:
145+
146+
```powershell
147+
docker build --no-cache `
148+
--build-arg WINDOWS_VERSION=ltsc2019 `
149+
--build-arg EXTRA_CMAKE_FLAGS="-DFLB_PARQUET_ENCODER=On" `
150+
-t fluent/fluent-bit:master-windows -f ./dockerfiles/Dockerfile.windows .
151+
```
152+
153+
Note: Parquet support uses vcpkg to install Apache Arrow with static linking.
154+
118155
## Contact
119156

120157
Feel free to join us on our Mailing List or IRC:
121158

122-
- Slack: http://slack.fluentd.org / channel #fluent-bit
123-
- Mailing List: https://groups.google.com/forum/#!forum/fluent-bit
124-
- IRC: irc.freenode.net #fluent-bit
125-
- Twitter: http://twitter.com/fluentbit
159+
- Slack: http://slack.fluentd.org / channel #fluent-bit
160+
- Mailing List: https://groups.google.com/forum/#!forum/fluent-bit
161+
- IRC: irc.freenode.net #fluent-bit
162+
- Twitter: http://twitter.com/fluentbit
126163

127164
## License
128165

0 commit comments

Comments
 (0)