Skip to content

Commit a5ad4bc

Browse files
authored
feat: use kaniko build and add container changes (#14213)
This PR updates the Dockerfile to include the missing pieces such as: - Running `bazelisk build` command in the builder stage to warm up the bazel cache. - Allowing a user to be configured when building the docker image. - Using a virtual env to fix the missing nox error when running the post processor. - Creating custom directories for bazel cache to be used by `bazelisk build`. - Adding permissions to the cli.py file. Fixes #14142
1 parent ef5a6d7 commit a5ad4bc

File tree

2 files changed

+116
-33
lines changed

2 files changed

+116
-33
lines changed

.generator/Dockerfile

Lines changed: 101 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,16 @@ RUN apt-get update && \
2222
# Essential for compiling C code
2323
build-essential \
2424
# For downloading secure files
25+
git \
2526
wget \
2627
ca-certificates \
2728
# For running bazelisk commands
2829
openjdk-17-jdk \
30+
zip \
31+
unzip \
32+
# To avoid bazel error
33+
# "python interpreter `python3` not found in PATH"
34+
python3-dev \
2935
# --- Critical libraries for a complete Python build ---
3036
libssl-dev \
3137
zlib1g-dev \
@@ -67,10 +73,39 @@ RUN wget https://github.com/bazelbuild/bazelisk/releases/download/${BAZELISK_VER
6773
# Set the working directory for build-related tasks.
6874
WORKDIR /app
6975

76+
# Create the group and user, but only if they don't already exist.
77+
ARG UID=1000
78+
ARG GID=1000
79+
80+
RUN if ! getent group $GID > /dev/null; then \
81+
groupadd -g $GID myuser; \
82+
fi && \
83+
if ! getent passwd $UID > /dev/null; then \
84+
useradd -u $UID -g $GID -ms /bin/bash myuser; \
85+
fi
86+
87+
# Set ownership of the app directory now, before we copy files into it.
88+
RUN mkdir -p /app && chown $UID:$GID /app
89+
90+
# We'll point both to the /bazel_cache directory which will be mounted as a volume.
91+
ENV BAZELISK_HOME="/bazel_cache/bazelisk"
92+
ENV BAZEL_HOME="/bazel_cache/bazel"
93+
94+
# Ensure the cache directories within the non-root user's context exist and are writable.
95+
# This is crucial as Bazel creates subdirectories under BAZEL_HOME.
96+
RUN mkdir -p ${BAZEL_HOME}/_bazel_ubuntu/cache/repos \
97+
${BAZEL_HOME}/_bazel_ubuntu/output_base \
98+
${BAZELISK_HOME} && \
99+
chown -R $UID:$GID ${BAZEL_HOME} ${BAZELISK_HOME}
100+
101+
RUN /usr/local/bin/python3.9 -m venv bazel_env
102+
RUN . bazel_env/bin/activate
103+
104+
RUN git clone https://github.com/googleapis/googleapis.git \
105+
&& cd googleapis \
106+
&& bazelisk --output_base=/bazel_cache/_bazel_ubuntu/output_base build --disk_cache=/bazel_cache/_bazel_ubuntu/cache/repos --incompatible_strict_action_env //google/cloud/language/v1:language-v1-py
107+
70108
# TODO(https://github.com/googleapis/librarian/issues/904): Install protoc for gencode.
71-
# TODO(https://github.com/googleapis/librarian/issues/907): Install Python dependencies from requirements.in.
72-
# TODO(https://github.com/googleapis/librarian/issues/905): Install Synthtool by cloning its repo.
73-
# TODO(https://github.com/googleapis/librarian/issues/906): Clone googleapis and run bazelisk build.
74109

75110
# --- Final Stage ---
76111
# This stage creates the lightweight final image, copying only the
@@ -82,21 +117,58 @@ FROM marketplace.gcr.io/google/ubuntu2404
82117
RUN apt-get update && \
83118
apt-get install -y --no-install-recommends \
84119
ca-certificates \
120+
git \
85121
libssl3 \
86122
zlib1g \
87123
libbz2-1.0 \
88124
libffi8 \
89125
libsqlite3-0 \
90126
libreadline8 \
127+
# For running bazelisk commands
128+
openjdk-17-jdk \
129+
# To avoid bazel error
130+
# "python interpreter `python3` not found in PATH"
131+
python3-dev \
132+
# To avoid bazel error
133+
# "Cannot find gcc or CC; either correct your path or set the CC environment variable"
134+
build-essential \
135+
# To avoid bazel error
136+
# unzip command not found
137+
unzip \
91138
&& apt-get clean && \
92139
rm -rf /var/lib/apt/lists/*
93140

94-
# TODO(https://github.com/googleapis/librarian/issues/902): Create a dedicate non-root user and
95-
# switch to the non-root user to run subsequent commands.
96-
# Example:
97-
# RUN groupadd --system --gid 1000 appgroup && \
98-
# useradd --system --uid 1000 --gid appgroup appuser
99-
# USER appuser
141+
# Create the group and user, but only if they don't already exist.
142+
# NOTE: A user needs to be configured by passing down the UID/GID with the
143+
# docker build command to allow the user running the librarian CLI to have
144+
# the permissions within the /app and /bazel_cache directories in the following way:
145+
# DOCKER_BUILDKIT=1 docker build -f .generator/Dockerfile --build-arg UID=$(id -u) --build-arg GID=$(id -g) -t python-librarian-generator:latest .
146+
ARG UID=1000
147+
ARG GID=1000
148+
149+
RUN if ! getent group $GID > /dev/null; then \
150+
groupadd -g $GID myuser; \
151+
fi && \
152+
if ! getent passwd $UID > /dev/null; then \
153+
useradd -u $UID -g $GID -ms /bin/bash myuser; \
154+
fi
155+
156+
# 2025/08/05 08:27:44 could not create directory /.cache/bazelisk: mkdir /.cache: permission denied
157+
# Traceback (most recent call last):
158+
# File "/app/./cli.py", line 147, in _build_bazel_target
159+
# subprocess.run(
160+
# File "/usr/local/lib/python3.9/subprocess.py", line 528, in run
161+
# raise CalledProcessError(retcode, process.args,
162+
# subprocess.CalledProcessError: Command '['bazelisk', '--output_base=/bazel_cache/_bazel_ubuntu/output_base', 'build', '--disk_cache=/bazel_cache/_bazel_ubuntu/cache/repos', '--incompatible_strict_action_env', '//google/cloud/language/v1beta2:language-v1beta2-py']' returned non-zero exit status 1
163+
# We'll point both to the /bazel_cache directory which will be mounted as a volume.
164+
ENV BAZELISK_HOME="/bazel_cache/bazelisk"
165+
ENV BAZEL_HOME="/bazel_cache/bazel"
166+
RUN mkdir -p ${BAZEL_HOME}/_bazel_ubuntu/cache/repos \
167+
${BAZEL_HOME}/_bazel_ubuntu/output_base \
168+
${BAZELISK_HOME} && \
169+
chown -R $UID:$GID ${BAZEL_HOME} ${BAZELISK_HOME}
170+
171+
USER $UID
100172

101173
# Copy all Python interpreters, their pip executables, and their standard libraries from the builder.
102174
COPY --from=builder /usr/local/bin/python3.9 /usr/local/bin/
@@ -114,11 +186,28 @@ COPY --from=builder /usr/local/lib/python3.12 /usr/local/lib/python3.12
114186
COPY --from=builder /usr/local/bin/python3.13 /usr/local/bin/
115187
COPY --from=builder /usr/local/lib/python3.13 /usr/local/lib/python3.13
116188

189+
# Copy the bazelisk executable from the builder.
190+
COPY --from=builder /usr/local/bin/bazelisk /usr/local/bin/
191+
192+
# Copy bazel cache from the builder.
193+
COPY --from=builder --chown=$UID:$GID /bazel_cache /bazel_cache
194+
117195
# Set the working directory in the container.
118196
WORKDIR /app
119197

198+
# Create a virtual env and set the Path to fix the missing nox error
199+
# when running the post processor changes.
200+
RUN /usr/local/bin/python3.9 -m venv bazel_env
201+
RUN . bazel_env/bin/activate
202+
203+
ENV PATH=/app/bazel_env/bin:$PATH
204+
205+
RUN git clone --depth 1 https://github.com/googleapis/synthtool.git /tmp/synthtool && \
206+
bazel_env/bin/python3.9 -m pip install /tmp/synthtool nox && \
207+
rm -rf /tmp/synthtool
208+
120209
# Copy the CLI script into the container.
121-
COPY .generator/cli.py .
210+
COPY --chown=$UID:$GID .generator/cli.py .
211+
RUN chmod a+rx ./cli.py
122212

123-
# Set the entrypoint for the container to run the script.
124-
ENTRYPOINT ["python3.11", "./cli.py"]
213+
ENTRYPOINT ["python3.9", "./cli.py"]

cloudbuild.yaml

Lines changed: 15 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -16,28 +16,22 @@
1616
# Reduce this timeout by moving the installation of Python runtimes to a separate base image
1717
timeout: 7200s # 2 hours for the first uncached run, can be lowered later.
1818
steps:
19-
# STEP 1: Pull the latest version of the image to use as a cache.
20-
# The '|| exit 0' part ensures the build doesn't fail if the image
21-
# doesn't exist yet (e.g., on the very first run).
22-
- name: 'gcr.io/cloud-builders/docker'
23-
entrypoint: 'bash'
24-
args: ['-c', 'docker pull gcr.io/$PROJECT_ID/python-librarian-generator:latest || exit 0']
25-
26-
# STEP 2: Build the new image, using the pulled image as a cache source.
27-
- name: 'gcr.io/cloud-builders/docker'
19+
# A single step using the Kaniko executor to build and cache
20+
- name: 'gcr.io/kaniko-project/executor:latest'
2821
args:
29-
- 'build'
30-
- '--tag=gcr.io/$PROJECT_ID/python-librarian-generator:latest'
31-
- '--cache-from=gcr.io/$PROJECT_ID/python-librarian-generator:latest'
32-
- '--file=.generator/Dockerfile'
33-
- '.'
22+
# Specifies the Dockerfile path
23+
- '--dockerfile=.generator/Dockerfile'
24+
# Specifies the build context directory
25+
- '--context=.'
26+
# The final destination for the image
27+
- '--destination=gcr.io/$PROJECT_ID/python-librarian-generator:latest'
28+
# Enables Kaniko's remote registry caching
29+
- '--cache=true'
30+
# (Optional but recommended) Sets a time-to-live for cache layers
31+
- '--cache-ttl=24h'
3432

35-
# STEP 3: Push the newly built image to the registry so it can be used
36-
# as the cache for the next run. Cloud Build does this automatically if
37-
# the image is listed in the 'images' section.
38-
images:
39-
- 'gcr.io/$PROJECT_ID/python-librarian-generator:latest'
33+
# The 'images' section is no longer needed because Kaniko pushes the image itself.
4034

41-
# This section automatically creates a storage bucket for storing docker build logs.
4235
options:
43-
default_logs_bucket_behavior: REGIONAL_USER_OWNED_BUCKET
36+
default_logs_bucket_behavior: REGIONAL_USER_OWNED_BUCKET
37+
machineType: E2_HIGHCPU_32

0 commit comments

Comments
 (0)