diff --git a/.generator/Dockerfile b/.generator/Dockerfile index 375aaabd9941..16ba96210b31 100644 --- a/.generator/Dockerfile +++ b/.generator/Dockerfile @@ -82,12 +82,24 @@ FROM marketplace.gcr.io/google/ubuntu2404 RUN apt-get update && \ apt-get install -y --no-install-recommends \ ca-certificates \ + git \ libssl3 \ zlib1g \ libbz2-1.0 \ libffi8 \ libsqlite3-0 \ libreadline8 \ + # For running bazelisk commands + openjdk-17-jdk \ + # To avoid bazel error + # "python interpreter `python3` not found in PATH" + python3-dev \ + # To avoid bazel error + # "Cannot find gcc or CC; either correct your path or set the CC environment variable" + build-essential \ + # To avoid bazel error + # unzip command not found + unzip \ && apt-get clean && \ rm -rf /var/lib/apt/lists/* @@ -114,11 +126,75 @@ COPY --from=builder /usr/local/lib/python3.12 /usr/local/lib/python3.12 COPY --from=builder /usr/local/bin/python3.13 /usr/local/bin/ COPY --from=builder /usr/local/lib/python3.13 /usr/local/lib/python3.13 +COPY --from=builder /usr/local/bin/bazelisk /usr/local/bin/ + # Set the working directory in the container. WORKDIR /app -# Copy the CLI script into the container. -COPY .generator/cli.py . + +# RUN find /app -exec chmod a+rw {} \; +# RUN find /app -type d -exec chmod a+x {} \; + +# # For bazel +# RUN mkdir -p /.cache +# RUN find /.cache -exec chmod a+rw {} \; +# RUN find /.cache -type d -exec chmod a+x {} \; + +# --- Create a dedicated non-root user --- +# Use arguments to make the user and group IDs configurable from the build command. +ARG UID=1000 +ARG GID=1000 + +# Create the group and user, but only if they don't already exist. +RUN if ! getent group $GID > /dev/null; then \ + groupadd -g $GID myuser; \ + fi && \ + if ! getent passwd $UID > /dev/null; then \ + useradd -u $UID -g $GID -ms /bin/bash myuser; \ + fi + +# Set ownership of the app and cache directory now, before we copy files into it. +RUN mkdir -p /app /.cache && chown $UID:$GID /app /.cache + +# Set ownership of the app and cache directory now, before we copy files into it. +# Bazel's default cache locations are often under $HOME/.cache/bazel or /tmp. +# We'll use /bazel_cache as a dedicated mount point for the cache to ensure persistence. +# Also, set a symlink from the user's home to this cache directory to ensure Bazelisk picks it up. +RUN mkdir -p /app /bazel_cache && \ + chown -R $UID:$GID /app /bazel_cache + +# Switch to the non-root user. All subsequent commands will run as this user. +USER $UID + +# Set environment variables for Bazelisk/Bazel cache locations for the non-root user. +# BAZELISK_HOME is where Bazelisk itself caches Bazel binaries. +# BAZEL_HOME (or --output_user_root) controls where Bazel stores its output and repository caches. +# We'll point both to the /bazel_cache directory which will be mounted as a volume. +ENV BAZELISK_HOME="/bazel_cache/bazelisk" +ENV BAZEL_HOME="/bazel_cache/bazel" +ENV XDG_CACHE_HOME="/bazel_cache" + +# Ensure the cache directories within the non-root user's context exist and are writable. +# This is crucial as Bazel creates subdirectories under BAZEL_HOME. +RUN mkdir -p ${BAZEL_HOME}/_bazel_ubuntu/cache/repos \ + ${BAZEL_HOME}/_bazel_ubuntu/output_base \ + ${BAZELISK_HOME} && \ + chown -R $UID:$GID ${BAZEL_HOME} ${BAZELISK_HOME} + +RUN /usr/local/bin/python3.9 -m venv bazel_env +RUN . bazel_env/bin/activate + +RUN git clone https://github.com/googleapis/googleapis.git \ + && cd googleapis \ + && bazelisk build //google/cloud/language/v1:language-v1-py + +# Make sure this is at the end so that changes to cli.py do not require +# re-running bazelisk build +# Copy your CLI script into the container and make it executable. +COPY --chown=$UID:$GID /.generator/cli.py . +RUN chmod a+rx ./cli.py + +RUN chown $UID:$GID /app /.cache # Set the entrypoint for the container to run the script. -ENTRYPOINT ["python3.11", "./cli.py"] \ No newline at end of file +ENTRYPOINT ["/app/bazel_env/bin/python3.9", "./cli.py"] diff --git a/.generator/cli.py b/.generator/cli.py index 417ef2d0a922..b9f8d1f13b26 100644 --- a/.generator/cli.py +++ b/.generator/cli.py @@ -18,7 +18,6 @@ import os import subprocess import sys -import subprocess from typing import Dict, List try: @@ -35,6 +34,7 @@ LIBRARIAN_DIR = "librarian" GENERATE_REQUEST_FILE = "generate-request.json" +BUILD_REQUEST_FILE = "build-request.json" SOURCE_DIR = "source" OUTPUT_DIR = "output" REPO_DIR = "repo" @@ -63,11 +63,12 @@ def handle_configure(): logger.info("'configure' command executed.") -def _determine_bazel_rule(api_path: str) -> str: +def _determine_bazel_rule(api_path: str, source_path: str = SOURCE_DIR) -> str: """Executes a `bazelisk query` to find a Bazel rule. Args: api_path (str): The API path to query for. + source_path (str): The path to the root of the Bazel workspace. Returns: str: The discovered Bazel rule. @@ -81,7 +82,7 @@ def _determine_bazel_rule(api_path: str) -> str: command = ["bazelisk", "query", query] result = subprocess.run( command, - cwd=f"{SOURCE_DIR}/googleapis", + cwd=source_path, capture_output=True, text=True, check=True, @@ -114,11 +115,12 @@ def _get_library_id(request_data: Dict) -> str: return library_id -def _build_bazel_target(bazel_rule: str): +def _build_bazel_target(bazel_rule: str, source: str = SOURCE_DIR): """Executes `bazelisk build` on a given Bazel rule. Args: bazel_rule (str): The Bazel rule to build. + source (str): The path to the root of the Bazel workspace. Raises: ValueError: If the subprocess call fails. @@ -128,7 +130,7 @@ def _build_bazel_target(bazel_rule: str): command = ["bazelisk", "build", bazel_rule] subprocess.run( command, - cwd=f"{SOURCE_DIR}/googleapis", + cwd=source, text=True, check=True, ) @@ -137,12 +139,20 @@ def _build_bazel_target(bazel_rule: str): raise ValueError(f"Bazel build for {bazel_rule} rule failed.") from e -def _locate_and_extract_artifact(bazel_rule: str, library_id: str): +def _locate_and_extract_artifact( + bazel_rule: str, + library_id: str, + source_path: str = SOURCE_DIR, + output_path: str = OUTPUT_DIR, +): """Finds and extracts the tarball artifact from a Bazel build. Args: bazel_rule (str): The Bazel rule that was built. library_id (str): The ID of the library being generated. + source_path (str): The path to the root of the Bazel workspace. + output_path (str): The path to the location where generated output + should be stored. Raises: ValueError: If failed to locate or extract artifact. @@ -153,7 +163,7 @@ def _locate_and_extract_artifact(bazel_rule: str, library_id: str): info_command = ["bazelisk", "info", "bazel-bin"] result = subprocess.run( info_command, - cwd=f"{SOURCE_DIR}/googleapis", + cwd=source_path, text=True, check=True, capture_output=True, @@ -167,7 +177,7 @@ def _locate_and_extract_artifact(bazel_rule: str, library_id: str): logger.info(f"Found artifact at: {tarball_path}") # 3. Create a staging directory. - staging_dir = os.path.join(OUTPUT_DIR, "owl-bot-staging", library_id) + staging_dir = os.path.join(output_path, "owl-bot-staging", library_id) os.makedirs(staging_dir, exist_ok=True) logger.info(f"Preparing staging directory: {staging_dir}") @@ -184,19 +194,24 @@ def _locate_and_extract_artifact(bazel_rule: str, library_id: str): ) from e -def _run_post_processor(): +def _run_post_processor(output_path: str = OUTPUT_DIR): """Runs the synthtool post-processor on the output directory. + + Args: + output_path(str): path to the output directory """ logger.info("Running Python post-processor...") if SYNTHTOOL_INSTALLED: command = ["python3", "-m", "synthtool.languages.python_mono_repo"] - subprocess.run(command, cwd=OUTPUT_DIR, text=True, check=True) + subprocess.run(command, cwd=output_path, text=True, check=True) else: raise SYNTHTOOL_IMPORT_ERROR logger.info("Python post-processor ran successfully.") -def handle_generate(): +def handle_generate( + librarian: str = LIBRARIAN_DIR, source: str = SOURCE_DIR, output: str = OUTPUT_DIR +): """The main coordinator for the code generation process. This function orchestrates the generation of a client library by reading a @@ -209,16 +224,15 @@ def handle_generate(): try: # Read a generate-request.json file - request_data = _read_json_file(f"{LIBRARIAN_DIR}/{GENERATE_REQUEST_FILE}") + request_data = _read_json_file(f"{librarian}/{GENERATE_REQUEST_FILE}") library_id = _get_library_id(request_data) - for api in request_data.get("apis", []): api_path = api.get("path") if api_path: - bazel_rule = _determine_bazel_rule(api_path) - _build_bazel_target(bazel_rule) + bazel_rule = _determine_bazel_rule(api_path, source) + _build_bazel_target(bazel_rule, source) _locate_and_extract_artifact(bazel_rule, library_id) - _run_post_processor() + _run_post_processor(output) except Exception as e: raise ValueError("Generation failed.") from e @@ -227,16 +241,17 @@ def handle_generate(): logger.info("'generate' command executed.") -def _run_nox_sessions(sessions: List[str]): +def _run_nox_sessions(sessions: List[str], librarian_path: str = LIBRARIAN_DIR): """Calls nox for all specified sessions. Args: path(List[str]): The list of nox sessions to run. + librarian_path(str): The path to the librarian build configuration directory """ - # Read a generate-request.json file + # Read a build-request.json file current_session = None try: - request_data = _read_json_file(f"{LIBRARIAN_DIR}/{GENERATE_REQUEST_FILE}") + request_data = _read_json_file(f"{librarian_path}/{BUILD_REQUEST_FILE}") library_id = _get_library_id(request_data) for nox_session in sessions: _run_individual_session(nox_session, library_id) @@ -263,7 +278,7 @@ def _run_individual_session(nox_session: str, library_id: str): logger.info(result) -def handle_build(): +def handle_build(librarian: str = LIBRARIAN_DIR): """The main coordinator for validating client library generation.""" sessions = [ "unit-3.9", @@ -278,7 +293,7 @@ def handle_build(): "mypy", "check_lower_bounds", ] - _run_nox_sessions(sessions) + _run_nox_sessions(sessions, librarian) logger.info("'build' command executed.") @@ -303,10 +318,43 @@ def handle_build(): ]: parser_cmd = subparsers.add_parser(command_name, help=help_text) parser_cmd.set_defaults(func=handler_map[command_name]) + parser_cmd.add_argument( + "--librarian", + type=str, + help="Path to the directory in the container which contains the librarian configuration", + default=LIBRARIAN_DIR, + ) + parser_cmd.add_argument( + "--input", + type=str, + help="Path to the directory in the container which contains additional generator input", + default="/input", + ) + parser_cmd.add_argument( + "--output", + type=str, + help="Path to the directory in the container where code should be generated", + default=OUTPUT_DIR, + ) + parser_cmd.add_argument( + "--source", + type=str, + help="Path to the directory in the container which contains API protos", + default=SOURCE_DIR, + ) if len(sys.argv) == 1: parser.print_help(sys.stderr) sys.exit(1) args = parser.parse_args() - args.func() + + # Pass specific arguments to the handler functions for generate/build + if args.command == "generate": + args.func( + librarian=args.librarian, source=args.source, output=args.output + ) + elif args.command == "build": + args.func(librarian=args.librarian) + else: + args.func() diff --git a/.generator/test_cli.py b/.generator/test_cli.py index 552ebaee024e..e972acc3734c 100644 --- a/.generator/test_cli.py +++ b/.generator/test_cli.py @@ -248,7 +248,7 @@ def test_handle_generate_success(caplog, mock_generate_request_file, mocker): handle_generate() - mock_determine_rule.assert_called_once_with("google/cloud/language/v1") + mock_determine_rule.assert_called_once_with("google/cloud/language/v1", "source") def test_handle_generate_fail(caplog): diff --git a/librarian/generate-request.json b/librarian/generate-request.json new file mode 100644 index 000000000000..a853d417ee7f --- /dev/null +++ b/librarian/generate-request.json @@ -0,0 +1,18 @@ +{ + "id": "google-cloud-language", + "apis": [ + { + "path": "google/cloud/language/v1", + "service_config": "language.yaml" + } + ], + "source_paths": [ + "packages/google-cloud-languages" + ], + "preserve_regex": [ + "" + ], + "remove_regex": [ + "" + ] +}