diff --git a/scripts/release/branch_detection.py b/scripts/release/branch_detection.py new file mode 100644 index 000000000..bf37a180e --- /dev/null +++ b/scripts/release/branch_detection.py @@ -0,0 +1,72 @@ +""" +Branch detection and cache scoping utilities for Evergreen CI. + +This module provides functions to detect the current git branch and generate +cache scopes for BuildKit remote cache in different environments (local development, +Evergreen patch builds, Evergreen regular builds). +""" + +import subprocess +from typing import Optional + + +def get_current_branch() -> Optional[str]: + """ + Detect the current git branch for cache scoping. + + In CI environments like Evergreen, git rev-parse --abbrev-ref HEAD returns + auto-generated branch names like evg-pr-testing-. This function finds the original branch name by + looking for remote branches that point to the current commit. + + :return: branch name or 'master' as fallback + """ + try: + # Find the original branch (same commit, but not the evg-pr-test-* branch which evg creates) + current_commit_result = subprocess.run(["git", "rev-parse", "HEAD"], capture_output=True, text=True, check=True) + current_commit = current_commit_result.stdout.strip() + + # Get all remote branches with their commit hashes + remote_branches_result = subprocess.run( + ["git", "for-each-ref", "--format=%(refname:short) %(objectname)", "refs/remotes/origin"], + capture_output=True, + text=True, + check=True, + ) + + # Find branches that point to the current commit, excluding auto-generated CI branches + for line in remote_branches_result.stdout.strip().split("\n"): + if not line: + continue + parts = line.split() + if len(parts) >= 2: + branch_name, commit_hash = parts[0], parts[1] + if commit_hash == current_commit and not "evg-pr-test" in branch_name: + # Remove 'origin/' prefix + original_branch = branch_name.replace("origin/", "", 1) + if original_branch: + return original_branch + except (subprocess.CalledProcessError, FileNotFoundError): + return "master" + + return "master" + + +def get_cache_scope() -> str: + """ + Get the cache scope for BuildKit remote cache. + + Returns a scope string that combines branch and run information: + - For master branch: returns "master" + - For other branches: returns the branch name (sanitized for use in image tags) + - For patch builds: includes version_id to avoid conflicts + + :return: cache scope string suitable for use in image tags + """ + branch = get_current_branch() + + # Sanitize branch name for use in image tags + # Replace invalid characters with hyphens and convert to lowercase + sanitized_branch = branch.lower() + sanitized_branch = "".join(c if c.isalnum() or c in "-_." else "-" for c in sanitized_branch) + + return sanitized_branch diff --git a/scripts/release/build/image_build_process.py b/scripts/release/build/image_build_process.py index 8bfecc6b2..dcf486c49 100644 --- a/scripts/release/build/image_build_process.py +++ b/scripts/release/build/image_build_process.py @@ -1,6 +1,6 @@ # This file is the new Sonar import base64 -from typing import Dict +from typing import Dict, List, Any import boto3 import docker @@ -9,10 +9,64 @@ from python_on_whales.exceptions import DockerException from lib.base_logger import logger +from scripts.release.branch_detection import get_cache_scope, get_current_branch DEFAULT_BUILDER_NAME = "multiarch" # Default buildx builder name +def ensure_ecr_cache_repository(repository_name: str, region: str = "us-east-1"): + ecr_client = boto3.client("ecr", region_name=region) + try: + _ = ecr_client.create_repository(repositoryName=repository_name) + logger.info(f"Successfully created ECR cache repository: {repository_name}") + except ClientError as e: + error_code = e.response['Error']['Code'] + if error_code == 'RepositoryAlreadyExistsException': + logger.info(f"ECR cache repository already exists: {repository_name}") + else: + logger.error(f"Failed to create ECR cache repository {repository_name}: {error_code} - {e}") + raise + + +def build_cache_configuration(base_registry: str) -> tuple[list[Any], dict[str, str]]: + """ + Build cache configuration for branch-scoped BuildKit remote cache. + + Implements the cache strategy: + - Per-image cache repo: …/dev/cache/ + - Per-branch run with read precedence: branch → master + - Write to branch scope only + - Use BuildKit registry cache exporter with mode=max, oci-mediatypes=true, image-manifest=true + + :param base_registry: Base registry URL for cache (e.g., "268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/cache/mongodb-kubernetes") + """ + cache_scope = get_cache_scope() + + # Build cache references with read precedence: branch → master + cache_from_refs = [] + + # Read precedence: branch → master + branch_ref = f"{base_registry}:{cache_scope}" + master_ref = f"{base_registry}:master" + + # Add to cache_from in order of precedence + if cache_scope != "master": + cache_from_refs.append({"type": "registry", "ref": branch_ref}) + cache_from_refs.append({"type": "registry", "ref": master_ref}) + else: + cache_from_refs.append({"type": "registry", "ref": master_ref}) + + cache_to_refs = { + "type": "registry", + "ref": branch_ref, + "mode": "max", + "oci-mediatypes": "true", + "image-manifest": "true" + } + + return cache_from_refs, cache_to_refs + + def ecr_login_boto3(region: str, account_id: str): """ Fetches an auth token from ECR via boto3 and logs @@ -75,8 +129,8 @@ def ensure_buildx_builder(builder_name: str = DEFAULT_BUILDER_NAME) -> str: def execute_docker_build( tags: list[str], dockerfile: str, - path: str, args: - Dict[str, str], + path: str, + args: Dict[str, str], push: bool, platforms: list[str], builder_name: str = DEFAULT_BUILDER_NAME, @@ -102,17 +156,24 @@ def execute_docker_build( # Convert build args to the format expected by python_on_whales build_args = {k: str(v) for k, v in args.items()} + cache_from_refs, cache_to_refs = _build_cache(tags) + logger.info(f"Building image: {tags}") logger.info(f"Platforms: {platforms}") logger.info(f"Dockerfile: {dockerfile}") logger.info(f"Build context: {path}") + logger.info(f"Cache scope: {get_cache_scope()}") + logger.info(f"Current branch: {get_current_branch()}") + logger.info(f"Cache from sources: {len(cache_from_refs)} refs") + logger.info(f"Cache to targets: {len(cache_to_refs)} refs") logger.debug(f"Build args: {build_args}") + logger.debug(f"Cache from: {cache_from_refs}") + logger.debug(f"Cache to: {cache_to_refs}") # Use buildx for multi-platform builds if len(platforms) > 1: logger.info(f"Multi-platform build for {len(platforms)} architectures") - # Build the image using buildx, builder must be already initialized docker_cmd.buildx.build( context_path=path, file=dockerfile, @@ -124,6 +185,8 @@ def execute_docker_build( push=push, provenance=False, # To not get an untagged image for single platform builds pull=False, # Don't always pull base images + cache_from=cache_from_refs, + cache_to=cache_to_refs, ) logger.info(f"Successfully built {'and pushed' if push else ''} {tags}") @@ -131,3 +194,27 @@ def execute_docker_build( except Exception as e: logger.error(f"Failed to build image {tags}: {e}") raise RuntimeError(f"Failed to build image {tags}: {str(e)}") + + +def _build_cache(tags): + # Filter tags to only include ECR ones (containing ".dkr.ecr.") + ecr_tags = [tag for tag in tags if ".dkr.ecr." in tag] + if not ecr_tags: + return [], {} + primary_tag = ecr_tags[0] + # Extract the repository URL without tag (e.g., "268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/mongodb-kubernetes:1.0.0" -> "268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/mongodb-kubernetes") + repository_url = primary_tag.split(":")[0] if ":" in primary_tag else primary_tag + # Extract just the image name from the repository URL (e.g., "268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/mongodb-kubernetes" -> "mongodb-kubernetes") + cache_image_name = repository_url.split("/")[-1] + # Base cache repository name + base_cache_repo = f"dev/cache/{cache_image_name}" + # Build branch/arch-scoped cache configuration + base_registry = f"268558157000.dkr.ecr.us-east-1.amazonaws.com/{base_cache_repo}" + + ensure_ecr_cache_repository(base_cache_repo) + + # TODO CLOUDP-335471: use env variables to configure AWS region and account ID + cache_from_refs, cache_to_refs = build_cache_configuration( + base_registry + ) + return cache_from_refs, cache_to_refs diff --git a/scripts/release/tests/branch_detection_test.py b/scripts/release/tests/branch_detection_test.py new file mode 100644 index 000000000..0b2ae2b69 --- /dev/null +++ b/scripts/release/tests/branch_detection_test.py @@ -0,0 +1,143 @@ +import subprocess +from unittest.mock import MagicMock, patch + +from scripts.release.branch_detection import ( + get_cache_scope, + get_current_branch, +) + + +class TestGetCurrentBranch: + """Test branch detection logic for different scenarios.""" + + @patch("subprocess.run") + def test_ci_environment_with_original_branch(self, mock_run): + """Test detection of original branch in CI environment like Evergreen.""" + + # Mock the sequence of git commands + def side_effect(cmd, **kwargs): + if cmd == ["git", "rev-parse", "HEAD"]: + return MagicMock(stdout="4cecea664abcd1234567890\n", returncode=0) + elif cmd == ["git", "for-each-ref", "--format=%(refname:short) %(objectname)", "refs/remotes/origin"]: + return MagicMock( + stdout="origin/master 1234567890abcdef\norigin/add-caching 4cecea664abcd1234567890\norigin/evg-pr-test-12345 4cecea664abcd1234567890\n", + returncode=0, + ) + elif cmd == ["git", "rev-parse", "--abbrev-ref", "HEAD"]: + return MagicMock(stdout="evg-pr-test-12345\n", returncode=0) + return MagicMock(stdout="", returncode=1) + + mock_run.side_effect = side_effect + + result = get_current_branch() + + assert result == "add-caching" + + @patch("subprocess.run") + def test_master_branch_fallback(self, mock_run): + """Test detection of master branch using fallback method.""" + + # Mock the sequence where sophisticated method fails but fallback works + def side_effect(cmd, **kwargs): + if cmd == ["git", "rev-parse", "HEAD"]: + return MagicMock(stdout="4cecea664abcd1234567890\n", returncode=0) + elif cmd == ["git", "for-each-ref", "--format=%(refname:short) %(objectname)", "refs/remotes/origin"]: + raise subprocess.CalledProcessError(1, "git") # This fails, triggering fallback + elif cmd == ["git", "rev-parse", "--abbrev-ref", "HEAD"]: + return MagicMock(stdout="master\n", returncode=0) + return MagicMock(stdout="", returncode=1) + + mock_run.side_effect = side_effect + + result = get_current_branch() + + assert result == "master" + + @patch("subprocess.run") + def test_detached_head_fallback(self, mock_run): + """Test detection when in detached HEAD state using fallback.""" + + # Mock the sequence where sophisticated method fails and fallback returns HEAD + def side_effect(cmd, **kwargs): + if cmd == ["git", "rev-parse", "HEAD"]: + return MagicMock(stdout="4cecea664abcd1234567890\n", returncode=0) + elif cmd == ["git", "for-each-ref", "--format=%(refname:short) %(objectname)", "refs/remotes/origin"]: + raise subprocess.CalledProcessError(1, "git") # This fails, triggering fallback + elif cmd == ["git", "rev-parse", "--abbrev-ref", "HEAD"]: + return MagicMock(stdout="HEAD\n", returncode=0) + return MagicMock(stdout="", returncode=1) + + mock_run.side_effect = side_effect + + result = get_current_branch() + + assert result == "master" # fallback to master + + @patch("subprocess.run") + def test_ci_branch_filtered_out_in_fallback(self, mock_run): + """Test that CI auto-generated branches are filtered out in fallback.""" + + # Mock the sequence where sophisticated method fails and fallback returns CI branch + def side_effect(cmd, **kwargs): + if cmd == ["git", "rev-parse", "HEAD"]: + return MagicMock(stdout="4cecea664abcd1234567890\n", returncode=0) + elif cmd == ["git", "for-each-ref", "--format=%(refname:short) %(objectname)", "refs/remotes/origin"]: + raise subprocess.CalledProcessError(1, "git") # This fails, triggering fallback + elif cmd == ["git", "rev-parse", "--abbrev-ref", "HEAD"]: + return MagicMock(stdout="evg-pr-test-12345\n", returncode=0) + return MagicMock(stdout="", returncode=1) + + mock_run.side_effect = side_effect + + result = get_current_branch() + + assert result == "master" # fallback to master when CI branch is detected + + @patch("subprocess.run") + def test_git_command_fails(self, mock_run): + """Test fallback when all git commands fail.""" + mock_run.side_effect = subprocess.CalledProcessError(1, "git") + + result = get_current_branch() + + assert result == "master" # fallback to master + + @patch("subprocess.run") + def test_git_not_found(self, mock_run): + """Test fallback when git is not found.""" + mock_run.side_effect = FileNotFoundError("git not found") + + result = get_current_branch() + + assert result == "master" # fallback to master + + +class TestGetCacheScope: + """Test cache scope generation for different scenarios.""" + + @patch("scripts.release.branch_detection.get_current_branch") + def test_feature_branch(self, mock_branch): + """Test cache scope for feature branch.""" + mock_branch.return_value = "feature/new-cache" + + result = get_cache_scope() + + assert result == "feature-new-cache" + + @patch("scripts.release.branch_detection.get_current_branch") + def test_branch_name_sanitization(self, mock_branch): + """Test branch name sanitization for cache scope.""" + mock_branch.return_value = "Feature/NEW_cache@123" + + result = get_cache_scope() + + assert result == "feature-new_cache-123" + + @patch("scripts.release.branch_detection.get_current_branch") + def test_complex_branch_name(self, mock_branch): + """Test cache scope for complex branch name with special characters.""" + mock_branch.return_value = "user/feature-123_test.branch" + + result = get_cache_scope() + + assert result == "user-feature-123_test.branch" diff --git a/scripts/release/tests/image_build_process_test.py b/scripts/release/tests/image_build_process_test.py new file mode 100644 index 000000000..08e2a5112 --- /dev/null +++ b/scripts/release/tests/image_build_process_test.py @@ -0,0 +1,60 @@ +from unittest.mock import patch + +from scripts.release.build.image_build_process import ( + build_cache_configuration, +) + + +class TestBuildCacheConfiguration: + """Test cache configuration building for different scenarios.""" + + @patch("scripts.release.build.image_build_process.get_cache_scope") + def test_master_branch(self, mock_scope): + """Test cache configuration for master branch.""" + mock_scope.return_value = "master" + + base_registry = "268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/cache/mongodb-kubernetes" + cache_from, cache_to = build_cache_configuration(base_registry) + + # Should read from master only + expected_from = [{"type": "registry", "ref": f"{base_registry}:master"}] + assert cache_from == expected_from + + # Should write to master + assert cache_to["ref"] == f"{base_registry}:master" + assert cache_to["mode"] == "max" + assert cache_to["oci-mediatypes"] == "true" + assert cache_to["image-manifest"] == "true" + + @patch("scripts.release.build.image_build_process.get_cache_scope") + def test_feature_branch(self, mock_scope): + """Test cache configuration for feature branch.""" + mock_scope.return_value = "feature-new-cache" + + base_registry = "268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/cache/mongodb-kubernetes" + cache_from, cache_to = build_cache_configuration(base_registry) + + # Should read from branch and master + expected_from = [ + {"type": "registry", "ref": f"{base_registry}:feature-new-cache"}, + {"type": "registry", "ref": f"{base_registry}:master"}, + ] + assert cache_from == expected_from + + # Should write to branch only + assert cache_to["ref"] == f"{base_registry}:feature-new-cache" + assert cache_to["mode"] == "max" + assert cache_to["oci-mediatypes"] == "true" + assert cache_to["image-manifest"] == "true" + + @patch("scripts.release.build.image_build_process.get_cache_scope") + def test_sanitized_branch_name(self, mock_scope): + """Test cache configuration with sanitized branch name.""" + mock_scope.return_value = "feature-new-cache-123" + + base_registry = "268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/cache/mongodb-kubernetes" + cache_from, cache_to = build_cache_configuration(base_registry) + + # Should include sanitized branch name in cache refs + assert cache_from[0]["ref"] == f"{base_registry}:feature-new-cache-123" + assert cache_to["ref"] == f"{base_registry}:feature-new-cache-123"