Skip to content
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@ All notable changes to this project will be documented in this file.

## [Unreleased]

### Fixed

- nifi: reduce docker image size by removing the recursive chown/chmods in the final image ([#1027]).

[#1027]: https://github.com/stackabletech/docker-images/pull/1027

## [25.3.0] - 2025-03-21

### Added
Expand Down
98 changes: 61 additions & 37 deletions nifi/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -28,38 +28,53 @@ WORKDIR /stackable

COPY --chown=${STACKABLE_USER_UID}:0 nifi/stackable/patches /stackable/patches

RUN curl 'https://repo.stackable.tech/repository/m2/tech/stackable/nifi/stackable-bcrypt/1.0-SNAPSHOT/stackable-bcrypt-1.0-20240508.153334-1-jar-with-dependencies.jar' \
# This used to be located in /bin/stackable-bcrypt.jar. We create a softlink for /bin/stackable-bcrypt.jar in the main container for backwards compatibility.
-o /stackable/stackable-bcrypt.jar && \
# Get the source release from nexus
curl "https://repo.stackable.tech/repository/packages/nifi/nifi-${PRODUCT}-source-release.zip" -o "/stackable/nifi-${PRODUCT}-source-release.zip" && \
unzip "nifi-${PRODUCT}-source-release.zip" && \
# Clean up downloaded source after unzipping
rm -rf "nifi-${PRODUCT}-source-release.zip" && \
# The NiFi "binary" ends up in a folder named "nifi-${PRODUCT}" which should be copied to /stackable
# from /stackable/nifi-${PRODUCT}-src/nifi-assembly/target/nifi-${PRODUCT}-bin/nifi-${PRODUCT} (see later steps)
# Therefore we add the suffix "-src" to be able to copy the binary and remove the unzipped sources afterwards.
mv nifi-${PRODUCT} nifi-${PRODUCT}-src && \
# Apply patches
chmod +x patches/apply_patches.sh && \
patches/apply_patches.sh ${PRODUCT} && \
# Build NiFi
cd /stackable/nifi-${PRODUCT}-src/ && \
# NOTE: Since NiFi 2.0.0 PutIceberg Processor and services were removed, so including the `include-iceberg` profile does nothing.
# Additionally some modules were moved to optional build profiles, so we need to add `include-hadoop` to get `nifi-parquet-nar` for example.
if [[ "${PRODUCT}" != 1.* ]] ; then \
RUN <<EOF
# This used to be located in /bin/stackable-bcrypt.jar. We create a softlink for /bin/stackable-bcrypt.jar in the main container for backwards compatibility.
curl 'https://repo.stackable.tech/repository/m2/tech/stackable/nifi/stackable-bcrypt/1.0-SNAPSHOT/stackable-bcrypt-1.0-20240508.153334-1-jar-with-dependencies.jar' \
-o /stackable/stackable-bcrypt.jar

# Get the source release from nexus
curl "https://repo.stackable.tech/repository/packages/nifi/nifi-${PRODUCT}-source-release.zip" -o "/stackable/nifi-${PRODUCT}-source-release.zip"
unzip "nifi-${PRODUCT}-source-release.zip"

# Clean up downloaded source after unzipping
rm -rf "nifi-${PRODUCT}-source-release.zip"

# The NiFi "binary" ends up in a folder named "nifi-${PRODUCT}" which should be copied to /stackable
# from /stackable/nifi-${PRODUCT}-src/nifi-assembly/target/nifi-${PRODUCT}-bin/nifi-${PRODUCT} (see later steps)
# Therefore we add the suffix "-src" to be able to copy the binary and remove the unzipped sources afterwards.
mv nifi-${PRODUCT} nifi-${PRODUCT}-src

# Apply patches
chmod +x patches/apply_patches.sh
patches/apply_patches.sh ${PRODUCT}

# Build NiFi
cd /stackable/nifi-${PRODUCT}-src/

# NOTE: Since NiFi 2.0.0 PutIceberg Processor and services were removed, so including the `include-iceberg` profile does nothing.
# Additionally some modules were moved to optional build profiles, so we need to add `include-hadoop` to get `nifi-parquet-nar` for example.
if [[ "${PRODUCT}" != 1.* ]] ; then \
mvn --batch-mode --no-transfer-progress clean install -Dmaven.javadoc.skip=true -DskipTests --activate-profiles include-hadoop,include-hadoop-aws,include-hadoop-azure,include-hadoop-gcp ; \
else \
else \
mvn --batch-mode --no-transfer-progress clean install -Dmaven.javadoc.skip=true -DskipTests --activate-profiles include-iceberg,include-hadoop-aws,include-hadoop-azure,include-hadoop-gcp ; \
fi && \
# Copy the binaries to the /stackable folder
mv /stackable/nifi-${PRODUCT}-src/nifi-assembly/target/nifi-${PRODUCT}-bin/nifi-${PRODUCT} /stackable/nifi-${PRODUCT} && \
# Copy the SBOM as well
mv /stackable/nifi-${PRODUCT}-src/nifi-assembly/target/bom.json /stackable/nifi-${PRODUCT}/nifi-${PRODUCT}.cdx.json && \
# Remove the unzipped sources
rm -rf /stackable/nifi-${PRODUCT}-src && \
# Remove generated docs in binary
rm -rf /stackable/nifi-${PRODUCT}/docs
fi

# Copy the binaries to the /stackable folder
mv /stackable/nifi-${PRODUCT}-src/nifi-assembly/target/nifi-${PRODUCT}-bin/nifi-${PRODUCT} /stackable/nifi-${PRODUCT}

# Copy the SBOM as well
mv /stackable/nifi-${PRODUCT}-src/nifi-assembly/target/bom.json /stackable/nifi-${PRODUCT}/nifi-${PRODUCT}.cdx.json

# Remove the unzipped sources
rm -rf /stackable/nifi-${PRODUCT}-src

# Remove generated docs in binary
rm -rf /stackable/nifi-${PRODUCT}/docs

# Set correct permissions
chmod -R g=u /stackable
EOF

FROM stackable/image/java-base AS final

Expand All @@ -81,10 +96,9 @@ COPY --chown=${STACKABLE_USER_UID}:0 --from=nifi-builder /stackable/stackable-bc
COPY --chown=${STACKABLE_USER_UID}:0 nifi/stackable/bin /stackable/bin
COPY --chown=${STACKABLE_USER_UID}:0 nifi/licenses /licenses
COPY --chown=${STACKABLE_USER_UID}:0 nifi/python /stackable/python
COPY --chown=${STACKABLE_USER_UID}:0 shared/checks/check-permissions-ownership /tmp/check-permissions-ownership

RUN <<EOF
ln -s /stackable/nifi-${PRODUCT} /stackable/nifi

microdnf update

# python-pip: Required to install Python packages
Expand All @@ -103,14 +117,24 @@ pip install --no-cache-dir \
# This can be removed once older versions / operators using this are no longer supported
ln -s /stackable/stackable-bcrypt.jar /bin/stackable-bcrypt.jar

# All files and folders owned by root group to support running as arbitrary users.
# This is best practice as all container users will belong to the root group (0).
chown -R ${STACKABLE_USER_UID}:0 /stackable
chmod -R g=u /stackable
ln -s /stackable/nifi-${PRODUCT} /stackable/nifi

# fix missing permissions / ownership
chown -h ${STACKABLE_USER_UID}:0 /stackable/nifi
chmod -R g=u /stackable/python
chmod -R g=u /stackable/bin
chmod g=u /stackable/nifi-2.2.0
EOF

# Check that permissions are set correctly
RUN <<EOF
chmod +x /tmp/check-permissions-ownership
/tmp/check-permissions-ownership /stackable ${STACKABLE_USER_UID} 0
rm /tmp/check-permissions-ownership
EOF

# ----------------------------------------
# Attention: We are changing the group of all files in /stackable directly above
# Attention:
# If you do any file based actions (copying / creating etc.) below this comment you
# absolutely need to make sure that the correct permissions are applied!
# chown ${STACKABLE_USER_UID}:0
Expand Down
58 changes: 58 additions & 0 deletions shared/checks/check-permissions-ownership
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#!/bin/bash
#
# Purpose
#
# Checks that permissions and ownership in the provided directory are set according to:
#
# chown -R ${STACKABLE_USER_UID}:0 /stackable
# chmod -R g=u /stackable
#
# Will error out and print directories / files that do not match the required permissions or ownership.
#
# Usage:
#
# ./check-permissions-ownership <directory> <uid> <gid>
# ./check-permissions-ownership /stackable ${STACKABLE_USER_UID} 0
#

if [[ $# -ne 3 ]]; then
echo "Wrong number of parameters supplied. Usage:"
echo "$0 <directory> <uid> <gid>"
echo "$0 /stackable 1000 0"
fi

DIRECTORY=$1
EXPECTED_UID=$2
EXPECTED_GID=$3

error_flag=0

# Check ownership
while IFS= read -r -d '' file; do
uid=$(stat -c "%u" "$file")
gid=$(stat -c "%g" "$file")

if [[ "$uid" -ne "$EXPECTED_UID" || "$gid" -ne "$EXPECTED_GID" ]]; then
echo "Ownership mismatch: $file (Expected: $EXPECTED_UID:$EXPECTED_GID, Found: $uid:$gid)"
error_flag=1
fi
done < <(find "$DIRECTORY" -print0)

# Check permissions
while IFS= read -r -d '' file; do
perms=$(stat -c "%A" "$file")
owner_perms="${perms:1:3}"
group_perms="${perms:4:3}"

if [[ "$owner_perms" != "$group_perms" ]]; then
echo "Permission mismatch: $file (Owner: $owner_perms, Group: $group_perms)"
error_flag=1
fi
done < <(find "$DIRECTORY" -print0)

if [[ $error_flag -ne 0 ]]; then
echo "Permission and Ownership checks failed for $DIRECTORY!"
exit 1
fi

echo "Permission and Ownership checks succeeded for $DIRECTORY!"
32 changes: 19 additions & 13 deletions vector/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,22 @@ ARG STACKABLE_USER_UID
# This happens by writing a "shutdown file" in a shared volume
# See https://github.com/stackabletech/airflow-operator/blob/23.4.1/rust/operator-binary/src/airflow_db_controller.rs#L269 for an example
# The Vector container waits for this file to appear and this waiting happens using `inotifywait` which comes from the `inotify-tools` package
RUN ARCH="${TARGETARCH/amd64/x86_64}" ARCH="${ARCH/arm64/aarch64}" && \
rpm --install \
"https://repo.stackable.tech/repository/packages/vector/vector-${PRODUCT}-${RPM_RELEASE}.${ARCH}.rpm" \
"https://repo.stackable.tech/repository/packages/inotify-tools/inotify-tools-${INOTIFY_TOOLS}.${ARCH}.rpm" && \
mkdir /licenses && \
cp /usr/share/licenses/vector-${PRODUCT}/LICENSE /licenses/VECTOR_LICENSE && \
# Create the directory /stackable/vector/var.
# This directory is set by operator-rs in the parameter `data_dir`
# of the Vector configuration. The directory is used for persisting
# Vector state, such as on-disk buffers, file checkpoints, and more.
# Vector needs write permissions.
mkdir --parents /stackable/vector/var && \
chown --recursive ${STACKABLE_USER_UID}:0 /stackable/
RUN <<EOF
ARCH="${TARGETARCH/amd64/x86_64}"
ARCH="${ARCH/arm64/aarch64}"
rpm --install \
"https://repo.stackable.tech/repository/packages/vector/vector-${PRODUCT}-${RPM_RELEASE}.${ARCH}.rpm" \
"https://repo.stackable.tech/repository/packages/inotify-tools/inotify-tools-${INOTIFY_TOOLS}.${ARCH}.rpm"
mkdir /licenses
cp /usr/share/licenses/vector-${PRODUCT}/LICENSE /licenses/VECTOR_LICENSE

# Create the directory /stackable/vector/var.
# This directory is set by operator-rs in the parameter `data_dir`
# of the Vector configuration. The directory is used for persisting
# Vector state, such as on-disk buffers, file checkpoints, and more.
# Vector needs write permissions.
mkdir --parents /stackable/vector/var
chown --recursive ${STACKABLE_USER_UID}:0 /stackable/
# Set correct permissions
chmod -R g=u /stackable
EOF