Skip to content
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
5bdb746
chore(librarian): add ability to parse BUILD.bazel
parthea Sep 9, 2025
472c280
update docstring
parthea Sep 9, 2025
31dbb8e
add copyright header
parthea Sep 9, 2025
b59962a
address review feedback
parthea Sep 9, 2025
9e100a5
trim BUILD.bazel
parthea Sep 10, 2025
86c72b9
address feedback
parthea Sep 10, 2025
b8814ff
lint
parthea Sep 10, 2025
950350a
chore(librarian): migrate off of bazel for generate command
parthea Sep 3, 2025
aa6d60c
Merge branch 'main' into migrate-off-bazel
parthea Sep 10, 2025
5ce78cd
add requirements.in
parthea Sep 10, 2025
8b20163
copy requirements.in
parthea Sep 10, 2025
3c21b0d
Add missing file
parthea Sep 10, 2025
4578e14
refactor
parthea Sep 10, 2025
44817e0
lint
parthea Sep 10, 2025
62a5e1a
revert
parthea Sep 10, 2025
9718839
Move protoc download to builder step
parthea Sep 11, 2025
3938cef
move pandoc to builder step
parthea Sep 11, 2025
f499595
remove git from container
parthea Sep 11, 2025
2a8c9fd
remove apt-get install pandoc
parthea Sep 11, 2025
d34acca
add support for optional arguments
parthea Sep 16, 2025
65f3c28
migrate to shutil.copytree
parthea Sep 16, 2025
93c1a0b
Add comment
parthea Sep 16, 2025
3ed45fc
add comment
parthea Sep 16, 2025
234ae23
address feedback
parthea Sep 16, 2025
4da3e46
move env; remove obsolete code
parthea Sep 16, 2025
959d499
coverage
parthea Sep 16, 2025
d32ee67
add comment
parthea Sep 16, 2025
9bb0e3c
run black/isort
parthea Sep 16, 2025
c6a9b7f
remove erroneous import
parthea Sep 16, 2025
2ec1ce7
address review feedback
parthea Sep 16, 2025
8b182cf
address review feedback
parthea Sep 16, 2025
fcb444c
Merge branch 'main' into migrate-off-bazel
parthea Sep 16, 2025
097a54c
address review feedback
parthea Sep 16, 2025
fe18bf6
add tests
parthea Sep 16, 2025
faccd69
address review feedback
parthea Sep 16, 2025
2665fc6
add assert
parthea Sep 17, 2025
76c4061
add assert
parthea Sep 17, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
114 changes: 33 additions & 81 deletions .generator/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
# This stage installs all build dependencies and compiles all Python versions.
FROM marketplace.gcr.io/google/ubuntu2404 AS builder

# TODO(https://github.com/googleapis/librarian/issues/901): Install the necssary dependencies and build tools.
RUN apt-get update && \
apt-get install -y --no-install-recommends \
# Essential for compiling C code
Expand All @@ -25,13 +24,6 @@ RUN apt-get update && \
git \
wget \
ca-certificates \
# For running bazelisk commands
openjdk-17-jdk \
zip \
unzip \
# To avoid bazel error
# "python interpreter `python3` not found in PATH"
python3-dev \
# --- Critical libraries for a complete Python build ---
libssl-dev \
zlib1g-dev \
Expand All @@ -43,9 +35,6 @@ RUN apt-get update && \
&& apt-get clean && \
rm -rf /var/lib/apt/lists/*

# Set up environment variables for tool versions to make updates easier.
ENV BAZELISK_VERSION=v1.26.0

# Install multiple Python versions from source. `make altinstall` is used to
# prevent replacing the system's default python binary.
# TODO(http://github.com/googleapis/gapic-generator-python/issues/2435): Remove `3.10.18` when the linked issue is resolved.
Expand All @@ -68,47 +57,9 @@ RUN wget --no-check-certificate -O /tmp/get-pip.py 'https://bootstrap.pypa.io/ge
done && \
rm /tmp/get-pip.py

# Install Bazelisk
RUN wget https://github.com/bazelbuild/bazelisk/releases/download/${BAZELISK_VERSION}/bazelisk-linux-amd64 -O /usr/local/bin/bazelisk && \
chmod +x /usr/local/bin/bazelisk

# Set the working directory for build-related tasks.
WORKDIR /app

# Create the group and user, but only if they don't already exist.
ARG UID=1000
ARG GID=1000

RUN if ! getent group $GID > /dev/null; then \
groupadd -g $GID myuser; \
fi && \
if ! getent passwd $UID > /dev/null; then \
useradd -u $UID -g $GID -ms /bin/bash myuser; \
fi

# Set ownership of the app directory now, before we copy files into it.
RUN mkdir -p /app && chown $UID:$GID /app

# We'll point both to the /bazel_cache directory which will be mounted as a volume.
ENV BAZELISK_HOME="/bazel_cache/bazelisk"
ENV BAZEL_HOME="/bazel_cache/bazel"

# Ensure the cache directories within the non-root user's context exist and are writable.
# This is crucial as Bazel creates subdirectories under BAZEL_HOME.
RUN mkdir -p ${BAZEL_HOME}/_bazel_ubuntu/cache/repos \
${BAZEL_HOME}/_bazel_ubuntu/output_base \
${BAZELISK_HOME} && \
chown -R $UID:$GID ${BAZEL_HOME} ${BAZELISK_HOME}

RUN /usr/local/bin/python3.9 -m venv bazel_env
RUN . bazel_env/bin/activate

RUN git clone https://github.com/googleapis/googleapis.git \
&& cd googleapis \
&& bazelisk --output_base=/bazel_cache/_bazel_ubuntu/output_base build --disk_cache=/bazel_cache/_bazel_ubuntu/cache/repos --incompatible_strict_action_env //google/cloud/language/v1:language-v1-py

# TODO(https://github.com/googleapis/librarian/issues/904): Install protoc for gencode.

# --- Final Stage ---
# This stage creates the lightweight final image, copying only the
# necessary artifacts from the builder stage.
Expand All @@ -120,25 +71,15 @@ RUN apt-get update && \
apt-get install -y --no-install-recommends \
ca-certificates \
git \
libssl3 \
zlib1g \
libbz2-1.0 \
libffi8 \
libsqlite3-0 \
libreadline8 \
# For running bazelisk commands
openjdk-17-jdk \
# To avoid bazel error
# "python interpreter `python3` not found in PATH"
python3-dev \
# To avoid bazel error
# "Cannot find gcc or CC; either correct your path or set the CC environment variable"
build-essential \
# To avoid bazel error
# unzip command not found
unzip \
&& apt-get clean && \
rm -rf /var/lib/apt/lists/*
pandoc \
wget \
unzip \
zip \
&& apt-get clean autoclean \
&& apt-get autoremove -y \
&& rm -rf /var/lib/apt/lists/* \
&& rm -f /var/cache/apt/archives/*.deb

# Copy all Python interpreters, their pip executables, and their standard libraries from the builder.
COPY --from=builder /usr/local/bin/python3.9 /usr/local/bin/
Expand All @@ -151,29 +92,40 @@ COPY --from=builder /usr/local/lib/python3.10 /usr/local/lib/python3.10
COPY --from=builder /usr/local/bin/python3.13 /usr/local/bin/
COPY --from=builder /usr/local/lib/python3.13 /usr/local/lib/python3.13

# Copy the bazelisk executable from the builder.
COPY --from=builder /usr/local/bin/bazelisk /usr/local/bin/

# Copy bazel cache from the builder.
COPY --from=builder /bazel_cache /bazel_cache
RUN chmod -R 777 /bazel_cache
# Download/install protoc
RUN wget https://github.com/protocolbuffers/protobuf/releases/download/v25.3/protoc-25.3-linux-x86_64.zip
RUN unzip protoc-25.3-linux-x86_64.zip -d protoc
RUN mv protoc/bin/* /usr/local/bin/
RUN mv protoc/include/* /usr/local/include/
RUN chmod +x /usr/local/bin/protoc
ENV PATH="/usr/bin:${PATH}"

# Set the working directory in the container.
WORKDIR /app

# Create a virtual env and set the Path to fix the missing nox error
# when running the post processor changes.
RUN /usr/local/bin/python3.9 -m venv bazel_env
RUN . bazel_env/bin/activate

ENV PATH=/app/bazel_env/bin:$PATH
RUN python3.9 -m pip install -r requirements.in

RUN git clone --depth 1 https://github.com/googleapis/synthtool.git /tmp/synthtool && \
bazel_env/bin/python3.9 -m pip install /tmp/synthtool nox && \
rm -rf /tmp/synthtool
python3.9 -m pip install /tmp/synthtool

# Download/install pandoc
RUN wget https://github.com/jgm/pandoc/releases/download/3.7.0.2/pandoc-3.7.0.2-linux-amd64.tar.gz
RUN tar -xvf pandoc-3.7.0.2-linux-amd64.tar.gz
RUN mv pandoc-3.7.0.2/bin/* /usr/local/bin/

# Copy the CLI script into the container.
COPY .generator/cli.py .
RUN chmod a+rx ./cli.py

COPY .generator/parse_googleapis_content.py .
RUN chmod a+rx ./parse_googleapis_content.py

RUN mkdir -p /.cache/synthtool/synthtool
RUN find /.cache -type d -exec chmod a+x {} \;

# Tell synthtool to pull templates from this docker image instead of from
# the live repo.
ENV SYNTHTOOL_TEMPLATES="/tmp/synthtool/synthtool/gcp/templates"


ENTRYPOINT ["python3.9", "./cli.py"]
191 changes: 45 additions & 146 deletions .generator/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,14 @@
import json
import logging
import os
import parse_googleapis_content
import re
import shutil
import subprocess
import sys
import yaml
from datetime import datetime
import tempfile
from pathlib import Path
from typing import Dict, List

Expand Down Expand Up @@ -116,46 +118,6 @@ def handle_configure():
logger.info("'configure' command executed.")


def _determine_bazel_rule(api_path: str, source: str) -> str:
"""Finds a Bazel rule by parsing the BUILD.bazel file directly.

Args:
api_path (str): The API path, e.g., 'google/cloud/language/v1'.
source(str): The path to the root of the Bazel workspace.

Returns:
str: The discovered Bazel rule, e.g., '//google/cloud/language/v1:language-v1-py'.

Raises:
ValueError: If the file can't be processed or no matching rule is found.
"""
logger.info(f"Determining Bazel rule for api_path: '{api_path}' by parsing file.")
try:
build_file_path = os.path.join(source, api_path, "BUILD.bazel")

with open(build_file_path, "r") as f:
content = f.read()

match = re.search(r'name\s*=\s*"([^"]+-py)"', content)

# This check is for a logical failure (no match), not a runtime exception.
# It's good to keep it for clear error messaging.
if not match: # pragma: NO COVER
raise ValueError(
f"No Bazel rule with a name ending in '-py' found in {build_file_path}"
)

rule_name = match.group(1)
bazel_rule = f"//{api_path}:{rule_name}"

logger.info(f"Found Bazel rule: {bazel_rule}")
return bazel_rule
except Exception as e:
raise ValueError(
f"Failed to determine Bazel rule for '{api_path}' by parsing."
) from e


def _get_library_id(request_data: Dict) -> str:
"""Retrieve the library id from the given request dictionary

Expand All @@ -174,107 +136,6 @@ def _get_library_id(request_data: Dict) -> str:
return library_id


def _build_bazel_target(bazel_rule: str, source: str):
"""Executes `bazelisk build` on a given Bazel rule.

Args:
bazel_rule(str): The Bazel rule to build.
source(str): The path to the root of the Bazel workspace.

Raises:
ValueError: If the subprocess call fails.
"""
logger.info(f"Executing build for rule: {bazel_rule}")
try:
# We're using the prewarmed bazel cache from the docker image to speed up the bazelisk commands.
# Previously built artifacts are stored in `/bazel_cache/_bazel_ubuntu/output_base` and will be
# used to speed up the build. `disk_cache` is used as the 'remote cache' and is also prewarmed as part of
# the docker image.
# See https://bazel.build/remote/caching#disk-cache which explains using a file system as a 'remote cache'.
command = [
"bazelisk",
"--output_base=/bazel_cache/_bazel_ubuntu/output_base",
"build",
"--disk_cache=/bazel_cache/_bazel_ubuntu/cache/repos",
"--incompatible_strict_action_env",
bazel_rule,
]
subprocess.run(
command,
cwd=source,
text=True,
check=True,
)
logger.info(f"Bazel build for {bazel_rule} rule completed successfully.")
except Exception as e:
raise ValueError(f"Bazel build for {bazel_rule} rule failed.") from e


def _locate_and_extract_artifact(
bazel_rule: str,
library_id: str,
source: str,
output: str,
api_path: str,
):
"""Finds and extracts the tarball artifact from a Bazel build.

Args:
bazel_rule(str): The Bazel rule that was built.
library_id(str): The ID of the library being generated.
source(str): The path to the root of the Bazel workspace.
output(str): The path to the location where generated output
should be stored.
api_path(str): The API path for the artifact

Raises:
ValueError: If failed to locate or extract artifact.
"""
try:
# 1. Find the bazel-bin output directory.
logger.info("Locating Bazel output directory...")
# Previously built artifacts are stored in `/bazel_cache/_bazel_ubuntu/output_base`.
# See `--output_base` in `_build_bazel_target`
info_command = [
"bazelisk",
"--output_base=/bazel_cache/_bazel_ubuntu/output_base",
"info",
"bazel-bin",
]
result = subprocess.run(
info_command,
cwd=source,
text=True,
check=True,
capture_output=True,
)
bazel_bin_path = result.stdout.strip()

# 2. Construct the path to the generated tarball.
rule_path, rule_name = bazel_rule.split(":")
tarball_name = f"{rule_name}.tar.gz"
tarball_path = os.path.join(bazel_bin_path, rule_path.strip("/"), tarball_name)
logger.info(f"Found artifact at: {tarball_path}")

# 3. Create a staging directory.
api_version = api_path.split("/")[-1]
staging_dir = os.path.join(output, "owl-bot-staging", library_id, api_version)
os.makedirs(staging_dir, exist_ok=True)
logger.info(f"Preparing staging directory: {staging_dir}")

# 4. Extract the artifact.
extract_command = ["tar", "-xvf", tarball_path, "--strip-components=1"]
subprocess.run(
extract_command, cwd=staging_dir, capture_output=True, text=True, check=True
)
logger.info(f"Artifact {tarball_path} extracted successfully.")

except Exception as e:
raise ValueError(
f"Failed to locate or extract artifact for {bazel_rule} rule"
) from e


def _run_post_processor(output: str, library_id: str):
"""Runs the synthtool post-processor on the output directory.

Expand Down Expand Up @@ -402,11 +263,49 @@ def handle_generate(
for api in request_data.get("apis", []):
api_path = api.get("path")
if api_path:
bazel_rule = _determine_bazel_rule(api_path, source)
_build_bazel_target(bazel_rule, source)
_locate_and_extract_artifact(
bazel_rule, library_id, source, output, api_path
)
generator_options = []
with open(f"{source}/{api_path}/BUILD.bazel", "r") as f:
content = f.read()
result = parse_googleapis_content.parse_content(content)
py_gapic_entry = [
key for key in result.keys() if key.endswith("_py_gapic")
][0]

config_keys = [
"grpc_service_config",
"rest_numeric_enums",
"service_yaml",
"transport",
]

for key in config_keys:
config_value = result[py_gapic_entry].get(key, None)
if config_value is not None:
new_key = key.replace("_", "-")
if key == "grpc_service_config":
new_key = "retry-config"
if new_key == "service-yaml" or new_key == "retry-config":
generator_options.append(
f"{new_key}={api_path}/{config_value},"
)
else:
generator_options.append(f"{new_key}={config_value},")
with tempfile.TemporaryDirectory() as tmp_dir:
generator_command = (
f"protoc {api_path}/*.proto --python_gapic_out={tmp_dir}"
)
if len(generator_options):
generator_command += f" --python_gapic_opt=metadata,"
for generator_option in generator_options:
generator_command += generator_option
subprocess.run([generator_command], cwd=source, shell=True)
api_version = api_path.split("/")[-1]
staging_dir = os.path.join(
output, "owl-bot-staging", library_id, api_version
)
os.makedirs(staging_dir, exist_ok=True)
logger.info(f"Preparing staging directory: {staging_dir}")
subprocess.run(f"cp -r {tmp_dir}/. {staging_dir}", shell=True)

_copy_files_needed_for_post_processing(output, input, library_id)
_run_post_processor(output, library_id)
Expand Down
Loading
Loading