From cc47ed8184b2187a962524849a11787c23b92da5 Mon Sep 17 00:00:00 2001 From: Nam Nguyen Date: Thu, 21 Aug 2025 16:19:38 +0200 Subject: [PATCH 01/23] add docker caching --- scripts/release/build/image_build_process.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/scripts/release/build/image_build_process.py b/scripts/release/build/image_build_process.py index 93c3a53d1..026835a4b 100644 --- a/scripts/release/build/image_build_process.py +++ b/scripts/release/build/image_build_process.py @@ -102,10 +102,19 @@ def execute_docker_build( # Convert build args to the format expected by python_on_whales build_args = {k: str(v) for k, v in args.items()} + # Extract registry name from tag for cache configuration + # tag format is "registry:version", we need the registry part + registry_name = tag.split(":")[0] if ":" in tag else tag + # Extract just the image name from the full registry path + # e.g., "268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/mongodb-kubernetes" -> "mongodb-kubernetes" + cache_image_name = registry_name.split("/")[-1] + cache_registry = f"268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/cache/{cache_image_name}" + logger.info(f"Building image: {tag}") logger.info(f"Platforms: {platforms}") logger.info(f"Dockerfile: {dockerfile}") logger.info(f"Build context: {path}") + logger.info(f"Cache registry: {cache_registry}") logger.debug(f"Build args: {build_args}") # Use buildx for multi-platform builds @@ -124,6 +133,8 @@ def execute_docker_build( push=push, provenance=False, # To not get an untagged image for single platform builds pull=False, # Don't always pull base images + cache_from=[f"type=registry,ref={cache_registry}"], + cache_to=[f"type=registry,ref={cache_registry},mode=max"], ) logger.info(f"Successfully built {'and pushed' if push else ''} {tag}") From 75e930870fbef0b73ce0c9790dc23fb81ba2fc5b Mon Sep 17 00:00:00 2001 From: Nam Nguyen Date: Thu, 21 Aug 2025 16:23:52 +0200 Subject: [PATCH 02/23] add docker caching --- scripts/release/build/image_build_process.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/scripts/release/build/image_build_process.py b/scripts/release/build/image_build_process.py index 026835a4b..54df7d1cd 100644 --- a/scripts/release/build/image_build_process.py +++ b/scripts/release/build/image_build_process.py @@ -102,10 +102,7 @@ def execute_docker_build( # Convert build args to the format expected by python_on_whales build_args = {k: str(v) for k, v in args.items()} - # Extract registry name from tag for cache configuration - # tag format is "registry:version", we need the registry part registry_name = tag.split(":")[0] if ":" in tag else tag - # Extract just the image name from the full registry path # e.g., "268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/mongodb-kubernetes" -> "mongodb-kubernetes" cache_image_name = registry_name.split("/")[-1] cache_registry = f"268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/cache/{cache_image_name}" From b4fa9fed3fdedad72d8541180ec67eb18f1ccdae Mon Sep 17 00:00:00 2001 From: Nam Nguyen Date: Thu, 21 Aug 2025 16:24:32 +0200 Subject: [PATCH 03/23] add docker caching --- scripts/release/build/image_build_process.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/release/build/image_build_process.py b/scripts/release/build/image_build_process.py index 54df7d1cd..5f6a118a9 100644 --- a/scripts/release/build/image_build_process.py +++ b/scripts/release/build/image_build_process.py @@ -105,6 +105,7 @@ def execute_docker_build( registry_name = tag.split(":")[0] if ":" in tag else tag # e.g., "268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/mongodb-kubernetes" -> "mongodb-kubernetes" cache_image_name = registry_name.split("/")[-1] + # TODO CLOUDP-335471: use env variables to configure AWS region and account ID cache_registry = f"268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/cache/{cache_image_name}" logger.info(f"Building image: {tag}") From 285c77dd5071716fd4f912ff0810bb3ad3fdf6c0 Mon Sep 17 00:00:00 2001 From: Nam Nguyen Date: Thu, 21 Aug 2025 16:40:09 +0200 Subject: [PATCH 04/23] fix --- scripts/release/build/image_build_process.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/release/build/image_build_process.py b/scripts/release/build/image_build_process.py index 5f6a118a9..5805b4614 100644 --- a/scripts/release/build/image_build_process.py +++ b/scripts/release/build/image_build_process.py @@ -75,8 +75,8 @@ def ensure_buildx_builder(builder_name: str = DEFAULT_BUILDER_NAME) -> str: def execute_docker_build( tag: str, dockerfile: str, - path: str, args: - Dict[str, str], + path: str, + args: Dict[str, str], push: bool, platforms: list[str], builder_name: str = DEFAULT_BUILDER_NAME, @@ -131,8 +131,8 @@ def execute_docker_build( push=push, provenance=False, # To not get an untagged image for single platform builds pull=False, # Don't always pull base images - cache_from=[f"type=registry,ref={cache_registry}"], - cache_to=[f"type=registry,ref={cache_registry},mode=max"], + cache_from=f"type=registry,ref={cache_registry}", + cache_to=f"type=registry,ref={cache_registry},mode=max", ) logger.info(f"Successfully built {'and pushed' if push else ''} {tag}") From f65be04509f277cc690697e9b1607b1fc40fda80 Mon Sep 17 00:00:00 2001 From: Nam Nguyen Date: Thu, 21 Aug 2025 16:55:02 +0200 Subject: [PATCH 05/23] short term ecr cache creation --- scripts/release/build/image_build_process.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/scripts/release/build/image_build_process.py b/scripts/release/build/image_build_process.py index 5805b4614..3d8957604 100644 --- a/scripts/release/build/image_build_process.py +++ b/scripts/release/build/image_build_process.py @@ -13,6 +13,19 @@ DEFAULT_BUILDER_NAME = "multiarch" # Default buildx builder name +def ensure_ecr_cache_repository(repository_name: str, region: str = "us-east-1"): + """Create ECR repository for cache if it doesn't exist - TEMPORARY FUNCTION""" + ecr_client = boto3.client("ecr", region_name=region) + try: + ecr_client.create_repository(repositoryName=repository_name) + logger.info(f"Created ECR cache repository: {repository_name}") + except ClientError as e: + if e.response['Error']['Code'] == 'RepositoryAlreadyExistsException': + logger.debug(f"ECR cache repository already exists: {repository_name}") + else: + logger.warning(f"Failed to create ECR cache repository {repository_name}: {e}") + + def ecr_login_boto3(region: str, account_id: str): """ Fetches an auth token from ECR via boto3 and logs @@ -108,6 +121,9 @@ def execute_docker_build( # TODO CLOUDP-335471: use env variables to configure AWS region and account ID cache_registry = f"268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/cache/{cache_image_name}" + # TEMPORARY: Create cache repository if it doesn't exist + ensure_ecr_cache_repository(f"dev/cache/{cache_image_name}") + logger.info(f"Building image: {tag}") logger.info(f"Platforms: {platforms}") logger.info(f"Dockerfile: {dockerfile}") From e1614d23a3954d6ff0684b1111ec3d0b4d166020 Mon Sep 17 00:00:00 2001 From: Nam Nguyen Date: Thu, 21 Aug 2025 17:05:54 +0200 Subject: [PATCH 06/23] finish renaming --- scripts/release/build/image_build_process.py | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/scripts/release/build/image_build_process.py b/scripts/release/build/image_build_process.py index 3d8957604..5805b4614 100644 --- a/scripts/release/build/image_build_process.py +++ b/scripts/release/build/image_build_process.py @@ -13,19 +13,6 @@ DEFAULT_BUILDER_NAME = "multiarch" # Default buildx builder name -def ensure_ecr_cache_repository(repository_name: str, region: str = "us-east-1"): - """Create ECR repository for cache if it doesn't exist - TEMPORARY FUNCTION""" - ecr_client = boto3.client("ecr", region_name=region) - try: - ecr_client.create_repository(repositoryName=repository_name) - logger.info(f"Created ECR cache repository: {repository_name}") - except ClientError as e: - if e.response['Error']['Code'] == 'RepositoryAlreadyExistsException': - logger.debug(f"ECR cache repository already exists: {repository_name}") - else: - logger.warning(f"Failed to create ECR cache repository {repository_name}: {e}") - - def ecr_login_boto3(region: str, account_id: str): """ Fetches an auth token from ECR via boto3 and logs @@ -121,9 +108,6 @@ def execute_docker_build( # TODO CLOUDP-335471: use env variables to configure AWS region and account ID cache_registry = f"268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/cache/{cache_image_name}" - # TEMPORARY: Create cache repository if it doesn't exist - ensure_ecr_cache_repository(f"dev/cache/{cache_image_name}") - logger.info(f"Building image: {tag}") logger.info(f"Platforms: {platforms}") logger.info(f"Dockerfile: {dockerfile}") From fac670cc9d914281d96b615cad46e4875c827a49 Mon Sep 17 00:00:00 2001 From: Nam Nguyen Date: Thu, 21 Aug 2025 17:14:43 +0200 Subject: [PATCH 07/23] try cache repo creation --- scripts/release/build/image_build_process.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/scripts/release/build/image_build_process.py b/scripts/release/build/image_build_process.py index 5805b4614..c89415011 100644 --- a/scripts/release/build/image_build_process.py +++ b/scripts/release/build/image_build_process.py @@ -13,6 +13,18 @@ DEFAULT_BUILDER_NAME = "multiarch" # Default buildx builder name +def ensure_ecr_cache_repository(repository_name: str, region: str = "us-east-1"): + ecr_client = boto3.client("ecr", region_name=region) + try: + ecr_client.create_repository(repositoryName=repository_name) + logger.info(f"Created ECR repository: {repository_name}") + except ClientError as e: + if e.response['Error']['Code'] == 'RepositoryAlreadyExistsException': + logger.debug(f"ECR cache repository already exists: {repository_name}") + else: + logger.warning(f"Failed to create ECR cache repository {repository_name}: {e}") + + def ecr_login_boto3(region: str, account_id: str): """ Fetches an auth token from ECR via boto3 and logs @@ -108,6 +120,8 @@ def execute_docker_build( # TODO CLOUDP-335471: use env variables to configure AWS region and account ID cache_registry = f"268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/cache/{cache_image_name}" + ensure_ecr_cache_repository(cache_registry) + logger.info(f"Building image: {tag}") logger.info(f"Platforms: {platforms}") logger.info(f"Dockerfile: {dockerfile}") From 3a454606861af3bee5dda0dde9d4de1652008748 Mon Sep 17 00:00:00 2001 From: Nam Nguyen Date: Fri, 22 Aug 2025 10:54:01 +0200 Subject: [PATCH 08/23] try cache repo creation --- scripts/release/build/image_build_process.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/scripts/release/build/image_build_process.py b/scripts/release/build/image_build_process.py index c89415011..c4075cdd1 100644 --- a/scripts/release/build/image_build_process.py +++ b/scripts/release/build/image_build_process.py @@ -14,15 +14,18 @@ def ensure_ecr_cache_repository(repository_name: str, region: str = "us-east-1"): + logger.info(f"Attempting to create ECR cache repository: {repository_name}") ecr_client = boto3.client("ecr", region_name=region) try: - ecr_client.create_repository(repositoryName=repository_name) - logger.info(f"Created ECR repository: {repository_name}") + _ = ecr_client.create_repository(repositoryName=repository_name) + logger.info(f"TEMP: Successfully created ECR cache repository: {repository_name}") except ClientError as e: - if e.response['Error']['Code'] == 'RepositoryAlreadyExistsException': - logger.debug(f"ECR cache repository already exists: {repository_name}") + error_code = e.response['Error']['Code'] + if error_code == 'RepositoryAlreadyExistsException': + logger.info(f"TEMP: ECR cache repository already exists: {repository_name}") else: - logger.warning(f"Failed to create ECR cache repository {repository_name}: {e}") + logger.error(f"TEMP: Failed to create ECR cache repository {repository_name}: {error_code} - {e}") + raise # Re-raise to see if this is causing the build to fail def ecr_login_boto3(region: str, account_id: str): @@ -120,7 +123,8 @@ def execute_docker_build( # TODO CLOUDP-335471: use env variables to configure AWS region and account ID cache_registry = f"268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/cache/{cache_image_name}" - ensure_ecr_cache_repository(cache_registry) + cache_repo_name = f"dev/cache/{cache_image_name}" + ensure_ecr_cache_repository(cache_repo_name) logger.info(f"Building image: {tag}") logger.info(f"Platforms: {platforms}") From 6ddc8ca1f4d28597ad945cd7b6e9c0d22a0dcc3d Mon Sep 17 00:00:00 2001 From: Nam Nguyen Date: Fri, 22 Aug 2025 13:05:26 +0200 Subject: [PATCH 09/23] update comments --- scripts/release/build/image_build_process.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/scripts/release/build/image_build_process.py b/scripts/release/build/image_build_process.py index c4075cdd1..d920c88d4 100644 --- a/scripts/release/build/image_build_process.py +++ b/scripts/release/build/image_build_process.py @@ -14,18 +14,17 @@ def ensure_ecr_cache_repository(repository_name: str, region: str = "us-east-1"): - logger.info(f"Attempting to create ECR cache repository: {repository_name}") ecr_client = boto3.client("ecr", region_name=region) try: _ = ecr_client.create_repository(repositoryName=repository_name) - logger.info(f"TEMP: Successfully created ECR cache repository: {repository_name}") + logger.info(f"Successfully created ECR cache repository: {repository_name}") except ClientError as e: error_code = e.response['Error']['Code'] if error_code == 'RepositoryAlreadyExistsException': - logger.info(f"TEMP: ECR cache repository already exists: {repository_name}") + logger.info(f"ECR cache repository already exists: {repository_name}") else: - logger.error(f"TEMP: Failed to create ECR cache repository {repository_name}: {error_code} - {e}") - raise # Re-raise to see if this is causing the build to fail + logger.error(f"Failed to create ECR cache repository {repository_name}: {error_code} - {e}") + raise def ecr_login_boto3(region: str, account_id: str): From 288b31b9e39c950c32719759ccaf97415c41dbf0 Mon Sep 17 00:00:00 2001 From: Nam Nguyen Date: Fri, 22 Aug 2025 13:53:04 +0200 Subject: [PATCH 10/23] split by arch --- scripts/release/build/image_build_process.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/scripts/release/build/image_build_process.py b/scripts/release/build/image_build_process.py index d920c88d4..ba52f4bfc 100644 --- a/scripts/release/build/image_build_process.py +++ b/scripts/release/build/image_build_process.py @@ -120,7 +120,18 @@ def execute_docker_build( # e.g., "268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/mongodb-kubernetes" -> "mongodb-kubernetes" cache_image_name = registry_name.split("/")[-1] # TODO CLOUDP-335471: use env variables to configure AWS region and account ID + cache_registry = f"268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/cache/{cache_image_name}" + cache_from_sources = [] + cache_to_sources = [] + + for platform in platforms: + # Use multiple cache sources for better cache hit rate, and write to all platform-specific caches + # to avoid race conditions between concurrent multi-arch builds + arch = platform.split('/')[-1] + platform_cache = f"{cache_registry}:{arch}" + cache_from_sources.append(f"type=registry,ref={platform_cache}") + cache_to_sources.append(f"type=registry,ref={platform_cache},mode=max") cache_repo_name = f"dev/cache/{cache_image_name}" ensure_ecr_cache_repository(cache_repo_name) @@ -130,7 +141,11 @@ def execute_docker_build( logger.info(f"Dockerfile: {dockerfile}") logger.info(f"Build context: {path}") logger.info(f"Cache registry: {cache_registry}") + logger.info(f"Cache from sources: {len(cache_from_sources)} sources") + logger.info(f"Cache to sources: {len(cache_to_sources)} sources") logger.debug(f"Build args: {build_args}") + logger.debug(f"Cache from: {cache_from_sources}") + logger.debug(f"Cache to: {cache_to_sources}") # Use buildx for multi-platform builds if len(platforms) > 1: @@ -148,8 +163,8 @@ def execute_docker_build( push=push, provenance=False, # To not get an untagged image for single platform builds pull=False, # Don't always pull base images - cache_from=f"type=registry,ref={cache_registry}", - cache_to=f"type=registry,ref={cache_registry},mode=max", + cache_from=cache_from_sources, + cache_to=cache_to_sources, ) logger.info(f"Successfully built {'and pushed' if push else ''} {tag}") From 0e2f0d9e1b8f3c55edf3590c358fc49dba473120 Mon Sep 17 00:00:00 2001 From: Nam Nguyen Date: Fri, 22 Aug 2025 14:03:30 +0200 Subject: [PATCH 11/23] split by arch --- scripts/release/build/image_build_process.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/release/build/image_build_process.py b/scripts/release/build/image_build_process.py index ba52f4bfc..a36b409d1 100644 --- a/scripts/release/build/image_build_process.py +++ b/scripts/release/build/image_build_process.py @@ -131,7 +131,7 @@ def execute_docker_build( arch = platform.split('/')[-1] platform_cache = f"{cache_registry}:{arch}" cache_from_sources.append(f"type=registry,ref={platform_cache}") - cache_to_sources.append(f"type=registry,ref={platform_cache},mode=max") + cache_to_sources = [f"type=registry,ref={cache_registry}:{arch},mode=max,oci-mediatypes=true,image-manifest=true"] cache_repo_name = f"dev/cache/{cache_image_name}" ensure_ecr_cache_repository(cache_repo_name) From f3b3a756f3ad927bd4355f6b19eee26dc697724d Mon Sep 17 00:00:00 2001 From: Nam Nguyen Date: Fri, 22 Aug 2025 14:09:56 +0200 Subject: [PATCH 12/23] split by arch --- scripts/release/build/image_build_process.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/release/build/image_build_process.py b/scripts/release/build/image_build_process.py index a36b409d1..902d6124a 100644 --- a/scripts/release/build/image_build_process.py +++ b/scripts/release/build/image_build_process.py @@ -131,7 +131,7 @@ def execute_docker_build( arch = platform.split('/')[-1] platform_cache = f"{cache_registry}:{arch}" cache_from_sources.append(f"type=registry,ref={platform_cache}") - cache_to_sources = [f"type=registry,ref={cache_registry}:{arch},mode=max,oci-mediatypes=true,image-manifest=true"] + cache_to_sources.append(f"type=registry,ref={cache_registry}:{arch},mode=max,oci-mediatypes=true,image-manifest=true") cache_repo_name = f"dev/cache/{cache_image_name}" ensure_ecr_cache_repository(cache_repo_name) From effb404af5f8a140cf3a3f7aa20b57ee9a8589ae Mon Sep 17 00:00:00 2001 From: Nam Nguyen Date: Fri, 22 Aug 2025 15:03:28 +0200 Subject: [PATCH 13/23] split by arch --- scripts/release/build/image_build_process.py | 33 +++++++++----------- 1 file changed, 15 insertions(+), 18 deletions(-) diff --git a/scripts/release/build/image_build_process.py b/scripts/release/build/image_build_process.py index 902d6124a..2ccc4dfd8 100644 --- a/scripts/release/build/image_build_process.py +++ b/scripts/release/build/image_build_process.py @@ -120,20 +120,18 @@ def execute_docker_build( # e.g., "268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/mongodb-kubernetes" -> "mongodb-kubernetes" cache_image_name = registry_name.split("/")[-1] # TODO CLOUDP-335471: use env variables to configure AWS region and account ID + cache_repo_name = f"dev/cache/{cache_image_name}" + cache_registry = f"268558157000.dkr.ecr.us-east-1.amazonaws.com/{cache_repo_name}" + cache_from = [f"type=registry,ref={f"{cache_registry}:cache"}"] - cache_registry = f"268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/cache/{cache_image_name}" - cache_from_sources = [] - cache_to_sources = [] - - for platform in platforms: - # Use multiple cache sources for better cache hit rate, and write to all platform-specific caches - # to avoid race conditions between concurrent multi-arch builds - arch = platform.split('/')[-1] - platform_cache = f"{cache_registry}:{arch}" - cache_from_sources.append(f"type=registry,ref={platform_cache}") - cache_to_sources.append(f"type=registry,ref={cache_registry}:{arch},mode=max,oci-mediatypes=true,image-manifest=true") + cache_to = { + "type": "registry", + "ref": f"{cache_registry}:cache", + "mode": "max", + "oci-mediatypes": "true", + "image-manifest": "true" + } - cache_repo_name = f"dev/cache/{cache_image_name}" ensure_ecr_cache_repository(cache_repo_name) logger.info(f"Building image: {tag}") @@ -141,11 +139,10 @@ def execute_docker_build( logger.info(f"Dockerfile: {dockerfile}") logger.info(f"Build context: {path}") logger.info(f"Cache registry: {cache_registry}") - logger.info(f"Cache from sources: {len(cache_from_sources)} sources") - logger.info(f"Cache to sources: {len(cache_to_sources)} sources") + logger.info(f"Cache to: {cache_to}") logger.debug(f"Build args: {build_args}") - logger.debug(f"Cache from: {cache_from_sources}") - logger.debug(f"Cache to: {cache_to_sources}") + logger.debug(f"Cache from: {cache_from}") + logger.debug(f"Cache to: {cache_to}") # Use buildx for multi-platform builds if len(platforms) > 1: @@ -163,8 +160,8 @@ def execute_docker_build( push=push, provenance=False, # To not get an untagged image for single platform builds pull=False, # Don't always pull base images - cache_from=cache_from_sources, - cache_to=cache_to_sources, + cache_from=cache_from, + cache_to=cache_to, ) logger.info(f"Successfully built {'and pushed' if push else ''} {tag}") From 8748149426661aaec3ec79ceedde449ce6b35a1b Mon Sep 17 00:00:00 2001 From: Nam Nguyen Date: Fri, 22 Aug 2025 15:14:06 +0200 Subject: [PATCH 14/23] split by arch --- scripts/release/build/image_build_process.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/release/build/image_build_process.py b/scripts/release/build/image_build_process.py index 2ccc4dfd8..fec1c927d 100644 --- a/scripts/release/build/image_build_process.py +++ b/scripts/release/build/image_build_process.py @@ -122,7 +122,7 @@ def execute_docker_build( # TODO CLOUDP-335471: use env variables to configure AWS region and account ID cache_repo_name = f"dev/cache/{cache_image_name}" cache_registry = f"268558157000.dkr.ecr.us-east-1.amazonaws.com/{cache_repo_name}" - cache_from = [f"type=registry,ref={f"{cache_registry}:cache"}"] + cache_from = f"type=registry,ref={f"{cache_registry}:cache"}" cache_to = { "type": "registry", From 05fd38737edf277ccf8834eb488c93240ff3875b Mon Sep 17 00:00:00 2001 From: Nam Nguyen Date: Fri, 22 Aug 2025 17:15:59 +0200 Subject: [PATCH 15/23] split by branch --- scripts/release/branch_detection.py | 113 +++++++++ scripts/release/build/image_build_process.py | 95 +++++-- .../release/tests/branch_detection_test.py | 237 ++++++++++++++++++ .../release/tests/image_build_process_test.py | 209 +++++++++++++++ 4 files changed, 635 insertions(+), 19 deletions(-) create mode 100644 scripts/release/branch_detection.py create mode 100644 scripts/release/tests/branch_detection_test.py create mode 100644 scripts/release/tests/image_build_process_test.py diff --git a/scripts/release/branch_detection.py b/scripts/release/branch_detection.py new file mode 100644 index 000000000..f0b819b4a --- /dev/null +++ b/scripts/release/branch_detection.py @@ -0,0 +1,113 @@ +""" +Branch detection and cache scoping utilities for Evergreen CI. + +This module provides functions to detect the current git branch and generate +cache scopes for BuildKit remote cache in different environments (local development, +Evergreen patch builds, Evergreen regular builds). +""" + +import os +import subprocess +from typing import Optional + +from scripts.release.constants import is_running_in_evg, is_evg_patch, get_version_id + + +def get_current_branch() -> Optional[str]: + """ + Detect the current git branch for cache scoping. + + In Evergreen CI: + - For patch builds: tries to detect the original branch (not evg-pr-test-* branches) + - For master builds: returns the actual branch name + - Falls back to 'master' if detection fails + + :return: branch name or 'master' as fallback + """ + if not is_running_in_evg(): + # Local development - use git directly + try: + result = subprocess.run( + ["git", "rev-parse", "--abbrev-ref", "HEAD"], + capture_output=True, + text=True, + check=True + ) + branch = result.stdout.strip() + return branch if branch != "HEAD" else "master" + except (subprocess.CalledProcessError, FileNotFoundError): + return "master" + + # Running in Evergreen + if is_evg_patch(): + # For patch builds, try to detect the original branch + # This logic is based on scripts/evergreen/precommit_bump.sh + try: + # Get all remote refs with their commit hashes + result = subprocess.run( + ["git", "for-each-ref", "--format=%(refname:short) %(objectname)", "refs/remotes/origin"], + capture_output=True, + text=True, + check=True + ) + + # Get current commit hash + current_commit = subprocess.run( + ["git", "rev-parse", "HEAD"], + capture_output=True, + text=True, + check=True + ).stdout.strip() + + # Find branches that point to the current commit, excluding evg-pr-test-* branches + for line in result.stdout.strip().split('\n'): + if not line: + continue + parts = line.split() + if len(parts) >= 2: + ref_name, commit_hash = parts[0], parts[1] + if commit_hash == current_commit and not ref_name.startswith('origin/evg-pr-test-'): + # Remove 'origin/' prefix + branch = ref_name.replace('origin/', '', 1) + return branch + + except (subprocess.CalledProcessError, FileNotFoundError): + pass + else: + # For non-patch builds, try to get the branch from git + try: + result = subprocess.run( + ["git", "rev-parse", "--abbrev-ref", "HEAD"], + capture_output=True, + text=True, + check=True + ) + branch = result.stdout.strip() + if branch and branch != "HEAD": + return branch + except (subprocess.CalledProcessError, FileNotFoundError): + pass + + # Fallback to master + return "master" + + +def get_cache_scope() -> str: + """ + Get the cache scope for BuildKit remote cache. + + Returns a scope string that combines branch and run information: + - For master branch: returns "master" + - For other branches: returns the branch name (sanitized for use in image tags) + - For patch builds: includes version_id to avoid conflicts + + :return: cache scope string suitable for use in image tags + """ + branch = get_current_branch() + + # Sanitize branch name for use in image tags + # Replace invalid characters with hyphens and convert to lowercase + sanitized_branch = branch.lower() + sanitized_branch = ''.join(c if c.isalnum() or c in '-_.' else '-' for c in sanitized_branch) + + return sanitized_branch diff --git a/scripts/release/build/image_build_process.py b/scripts/release/build/image_build_process.py index fec1c927d..de4f62cdb 100644 --- a/scripts/release/build/image_build_process.py +++ b/scripts/release/build/image_build_process.py @@ -1,6 +1,6 @@ # This file is the new Sonar import base64 -from typing import Dict +from typing import Dict, List import boto3 import docker @@ -9,6 +9,7 @@ from python_on_whales.exceptions import DockerException from lib.base_logger import logger +from scripts.release.branch_detection import get_cache_scope, get_current_branch DEFAULT_BUILDER_NAME = "multiarch" # Default buildx builder name @@ -27,6 +28,61 @@ def ensure_ecr_cache_repository(repository_name: str, region: str = "us-east-1") raise +def build_cache_configuration(base_registry: str): + """ + Build cache configuration for branch/arch-scoped BuildKit remote cache. + + Implements the cache strategy: + - Per-image cache repo: …/dev/cache/ + - Per-branch run with read precedence: branch → master → shared + - Write to branch scope, and also to master if on master branch + - Use BuildKit registry cache exporter with mode=max, oci-mediatypes=true, image-manifest=true + + :param base_registry: + :return: tuple of (cache_from_list, cache_to_list, repositories_to_ensure) + """ + cache_scope = get_cache_scope() + logger.info(f"Building cache configuration for {cache_scope}") + current_branch = get_current_branch() + + # Build cache references with read precedence: branch → master → shared + cache_from_refs = [] + cache_to_refs = [] + + # Read precedence: branch-arch → master-arch → shared-arch + branch_arch_ref = f"{base_registry}:{cache_scope}" + master_arch_ref = f"{base_registry}:master" + shared_arch_ref = f"{base_registry}:shared" + + # Add to cache_from in order of precedence + if cache_scope != "master": + cache_from_refs.append(f"type=registry,ref={branch_arch_ref}") + cache_from_refs.append(f"type=registry,ref={master_arch_ref}") + else: + cache_from_refs.append(f"type=registry,ref={master_arch_ref}") + cache_from_refs.append(f"type=registry,ref={shared_arch_ref}") + + cache_to_refs.append({ + "type": "registry", + "ref": branch_arch_ref, + "mode": "max", + "oci-mediatypes": "true", + "image-manifest": "true" + }) + + if current_branch == "master" and branch_arch_ref != master_arch_ref: + cache_to_refs.append({ + "type": "registry", + "ref": master_arch_ref, + "mode": "max", + "oci-mediatypes": "true", + "image-manifest": "true" + }) + + + return cache_from_refs, cache_to_refs + + def ecr_login_boto3(region: str, account_id: str): """ Fetches an auth token from ECR via boto3 and logs @@ -119,30 +175,31 @@ def execute_docker_build( registry_name = tag.split(":")[0] if ":" in tag else tag # e.g., "268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/mongodb-kubernetes" -> "mongodb-kubernetes" cache_image_name = registry_name.split("/")[-1] - # TODO CLOUDP-335471: use env variables to configure AWS region and account ID - cache_repo_name = f"dev/cache/{cache_image_name}" - cache_registry = f"268558157000.dkr.ecr.us-east-1.amazonaws.com/{cache_repo_name}" - cache_from = f"type=registry,ref={f"{cache_registry}:cache"}" - cache_to = { - "type": "registry", - "ref": f"{cache_registry}:cache", - "mode": "max", - "oci-mediatypes": "true", - "image-manifest": "true" - } + # Base cache repository name + base_cache_repo = f"dev/cache/{cache_image_name}" + + # Build branch/arch-scoped cache configuration + base_registry = f"268558157000.dkr.ecr.us-east-1.amazonaws.com/{base_cache_repo}" + + # TODO CLOUDP-335471: use env variables to configure AWS region and account ID + cache_from_refs, cache_to_refs = build_cache_configuration( + base_registry + ) - ensure_ecr_cache_repository(cache_repo_name) + # ensure_ecr_cache_repository(base_cache_repo) logger.info(f"Building image: {tag}") logger.info(f"Platforms: {platforms}") logger.info(f"Dockerfile: {dockerfile}") logger.info(f"Build context: {path}") - logger.info(f"Cache registry: {cache_registry}") - logger.info(f"Cache to: {cache_to}") + logger.info(f"Cache scope: {get_cache_scope()}") + logger.info(f"Current branch: {get_current_branch()}") + logger.info(f"Cache from sources: {len(cache_from_refs)} refs") + logger.info(f"Cache to targets: {len(cache_to_refs)} refs") logger.debug(f"Build args: {build_args}") - logger.debug(f"Cache from: {cache_from}") - logger.debug(f"Cache to: {cache_to}") + logger.debug(f"Cache from: {cache_from_refs}") + logger.debug(f"Cache to: {cache_to_refs}") # Use buildx for multi-platform builds if len(platforms) > 1: @@ -160,8 +217,8 @@ def execute_docker_build( push=push, provenance=False, # To not get an untagged image for single platform builds pull=False, # Don't always pull base images - cache_from=cache_from, - cache_to=cache_to, + cache_from=cache_from_refs, + cache_to=cache_to_refs, ) logger.info(f"Successfully built {'and pushed' if push else ''} {tag}") diff --git a/scripts/release/tests/branch_detection_test.py b/scripts/release/tests/branch_detection_test.py new file mode 100644 index 000000000..711ea34e7 --- /dev/null +++ b/scripts/release/tests/branch_detection_test.py @@ -0,0 +1,237 @@ +import os +import subprocess +from unittest.mock import patch, MagicMock + +import pytest + +from scripts.release.branch_detection import ( + get_current_branch, + get_cache_scope, + is_evg_patch, + is_running_in_evg, + get_version_id, +) + + +class TestGetCurrentBranch: + """Test branch detection logic for different scenarios.""" + + @patch('scripts.release.branch_detection.is_running_in_evg') + @patch('subprocess.run') + def test_local_development_master_branch(self, mock_run, mock_is_evg): + """Test local development on master branch.""" + mock_is_evg.return_value = False + mock_run.return_value = MagicMock(stdout="master\n", returncode=0) + + result = get_current_branch() + + assert result == "master" + mock_run.assert_called_once_with( + ["git", "rev-parse", "--abbrev-ref", "HEAD"], + capture_output=True, + text=True, + check=True + ) + + @patch('scripts.release.branch_detection.is_running_in_evg') + @patch('subprocess.run') + def test_local_development_feature_branch(self, mock_run, mock_is_evg): + """Test local development on feature branch.""" + mock_is_evg.return_value = False + mock_run.return_value = MagicMock(stdout="feature/new-cache\n", returncode=0) + + result = get_current_branch() + + assert result == "feature/new-cache" + + @patch('scripts.release.branch_detection.is_running_in_evg') + @patch('subprocess.run') + def test_local_development_detached_head(self, mock_run, mock_is_evg): + """Test local development in detached HEAD state.""" + mock_is_evg.return_value = False + mock_run.return_value = MagicMock(stdout="HEAD\n", returncode=0) + + result = get_current_branch() + + assert result == "master" # fallback to master + + @patch('scripts.release.branch_detection.is_running_in_evg') + @patch('subprocess.run') + def test_local_development_git_error(self, mock_run, mock_is_evg): + """Test local development when git command fails.""" + mock_is_evg.return_value = False + mock_run.side_effect = subprocess.CalledProcessError(1, "git") + + result = get_current_branch() + + assert result == "master" # fallback to master + + @patch('scripts.release.branch_detection.is_running_in_evg') + @patch('scripts.release.branch_detection.is_evg_patch') + @patch('subprocess.run') + def test_evergreen_non_patch_build(self, mock_run, mock_is_patch, mock_is_evg): + """Test Evergreen non-patch build.""" + mock_is_evg.return_value = True + mock_is_patch.return_value = False + mock_run.return_value = MagicMock(stdout="master\n", returncode=0) + + result = get_current_branch() + + assert result == "master" + + @patch('scripts.release.branch_detection.is_running_in_evg') + @patch('scripts.release.branch_detection.is_evg_patch') + @patch('subprocess.run') + def test_evergreen_patch_build_branch_detection(self, mock_run, mock_is_patch, mock_is_evg): + """Test Evergreen patch build with successful branch detection.""" + mock_is_evg.return_value = True + mock_is_patch.return_value = True + + # Mock git for-each-ref output + mock_run.side_effect = [ + MagicMock(stdout="origin/feature/cache-improvement abc123\norigin/evg-pr-test-123 abc123\norigin/main def456\n", returncode=0), + MagicMock(stdout="abc123\n", returncode=0) + ] + + result = get_current_branch() + + assert result == "feature/cache-improvement" + + @patch('scripts.release.branch_detection.is_running_in_evg') + @patch('scripts.release.branch_detection.is_evg_patch') + @patch('subprocess.run') + def test_evergreen_patch_build_fallback(self, mock_run, mock_is_patch, mock_is_evg): + """Test Evergreen patch build fallback when branch detection fails.""" + mock_is_evg.return_value = True + mock_is_patch.return_value = True + mock_run.side_effect = subprocess.CalledProcessError(1, "git") + + result = get_current_branch() + + assert result == "master" # fallback to master + + +class TestGetCacheScope: + """Test cache scope generation for different scenarios.""" + + @patch('scripts.release.branch_detection.get_current_branch') + @patch('scripts.release.branch_detection.is_evg_patch') + @patch('scripts.release.branch_detection.get_version_id') + def test_master_branch_non_patch(self, mock_version_id, mock_is_patch, mock_branch): + """Test cache scope for master branch non-patch build.""" + mock_branch.return_value = "master" + mock_is_patch.return_value = False + mock_version_id.return_value = None + + result = get_cache_scope() + + assert result == "master" + + @patch('scripts.release.branch_detection.get_current_branch') + @patch('scripts.release.branch_detection.is_evg_patch') + @patch('scripts.release.branch_detection.get_version_id') + def test_feature_branch_non_patch(self, mock_version_id, mock_is_patch, mock_branch): + """Test cache scope for feature branch non-patch build.""" + mock_branch.return_value = "feature/new-cache" + mock_is_patch.return_value = False + mock_version_id.return_value = None + + result = get_cache_scope() + + assert result == "feature-new-cache" + + @patch('scripts.release.branch_detection.get_current_branch') + @patch('scripts.release.branch_detection.is_evg_patch') + @patch('scripts.release.branch_detection.get_version_id') + def test_patch_build_with_version_id(self, mock_version_id, mock_is_patch, mock_branch): + """Test cache scope for patch build with version ID.""" + mock_branch.return_value = "feature/new-cache" + mock_is_patch.return_value = True + mock_version_id.return_value = "6899b7e35bfaee00077db986" + + result = get_cache_scope() + + assert result == "feature-new-cache-6899b7e3" + + @patch('scripts.release.branch_detection.get_current_branch') + @patch('scripts.release.branch_detection.is_evg_patch') + @patch('scripts.release.branch_detection.get_version_id') + def test_patch_build_without_version_id(self, mock_version_id, mock_is_patch, mock_branch): + """Test cache scope for patch build without version ID.""" + mock_branch.return_value = "feature/new-cache" + mock_is_patch.return_value = True + mock_version_id.return_value = None + + result = get_cache_scope() + + assert result == "feature-new-cache" + + @patch('scripts.release.branch_detection.get_current_branch') + @patch('scripts.release.branch_detection.is_evg_patch') + @patch('scripts.release.branch_detection.get_version_id') + def test_branch_name_sanitization(self, mock_version_id, mock_is_patch, mock_branch): + """Test branch name sanitization for cache scope.""" + mock_branch.return_value = "Feature/NEW_cache@123" + mock_is_patch.return_value = False + mock_version_id.return_value = None + + result = get_cache_scope() + + assert result == "feature-new_cache-123" + + @patch('scripts.release.branch_detection.get_current_branch') + @patch('scripts.release.branch_detection.is_evg_patch') + @patch('scripts.release.branch_detection.get_version_id') + def test_dependabot_branch(self, mock_version_id, mock_is_patch, mock_branch): + """Test cache scope for dependabot branch.""" + mock_branch.return_value = "dependabot/npm_and_yarn/lodash-4.17.21" + mock_is_patch.return_value = False + mock_version_id.return_value = None + + result = get_cache_scope() + + assert result == "dependabot-npm_and_yarn-lodash-4.17.21" + + +class TestEnvironmentDetection: + """Test environment detection functions.""" + + def test_is_evg_patch_true(self): + """Test is_evg_patch returns True when is_patch is 'true'.""" + with patch.dict(os.environ, {"is_patch": "true"}): + assert is_evg_patch() is True + + def test_is_evg_patch_false(self): + """Test is_evg_patch returns False when is_patch is 'false'.""" + with patch.dict(os.environ, {"is_patch": "false"}): + assert is_evg_patch() is False + + def test_is_evg_patch_default(self): + """Test is_evg_patch returns False when is_patch is not set.""" + with patch.dict(os.environ, {}, clear=True): + assert is_evg_patch() is False + + def test_is_running_in_evg_true(self): + """Test is_running_in_evg returns True when RUNNING_IN_EVG is 'true'.""" + with patch.dict(os.environ, {"RUNNING_IN_EVG": "true"}): + assert is_running_in_evg() is True + + def test_is_running_in_evg_false(self): + """Test is_running_in_evg returns False when RUNNING_IN_EVG is 'false'.""" + with patch.dict(os.environ, {"RUNNING_IN_EVG": "false"}): + assert is_running_in_evg() is False + + def test_is_running_in_evg_default(self): + """Test is_running_in_evg returns False when RUNNING_IN_EVG is not set.""" + with patch.dict(os.environ, {}, clear=True): + assert is_running_in_evg() is False + + def test_get_version_id_set(self): + """Test get_version_id returns value when version_id is set.""" + with patch.dict(os.environ, {"version_id": "6899b7e35bfaee00077db986"}): + assert get_version_id() == "6899b7e35bfaee00077db986" + + def test_get_version_id_not_set(self): + """Test get_version_id returns None when version_id is not set.""" + with patch.dict(os.environ, {}, clear=True): + assert get_version_id() is None diff --git a/scripts/release/tests/image_build_process_test.py b/scripts/release/tests/image_build_process_test.py new file mode 100644 index 000000000..1b766e1ef --- /dev/null +++ b/scripts/release/tests/image_build_process_test.py @@ -0,0 +1,209 @@ +from unittest.mock import patch, MagicMock + +import pytest + +from scripts.release.build.image_build_process import ( + build_cache_configuration, + ensure_all_cache_repositories, +) + + +class TestBuildCacheConfiguration: + """Test cache configuration building for different scenarios.""" + + @patch('scripts.release.build.image_build_process.get_cache_scope') + @patch('scripts.release.build.image_build_process.get_current_branch') + def test_single_platform_master_branch(self, mock_branch, mock_scope): + """Test cache configuration for single platform build on master branch.""" + mock_branch.return_value = "master" + mock_scope.return_value = "master" + + cache_from, cache_to, repos = build_cache_configuration( + "mongodb-kubernetes" + ) + + # Should read from master-arch and shared-arch + expected_from = [ + "type=registry,ref=268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/cache/mongodb-kubernetes:master-linux-amd64", + "type=registry,ref=268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/cache/mongodb-kubernetes:shared-linux-amd64" + ] + assert cache_from == expected_from + + # Should write to master-arch + assert len(cache_to) == 1 + assert cache_to[0]["ref"] == "268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/cache/mongodb-kubernetes:master-linux-amd64" + assert cache_to[0]["mode"] == "max" + assert cache_to[0]["oci-mediatypes"] == "true" + assert cache_to[0]["image-manifest"] == "true" + + # Should ensure one repository + assert repos == {"dev/cache/mongodb-kubernetes"} + + @patch('scripts.release.build.image_build_process.get_cache_scope') + @patch('scripts.release.build.image_build_process.get_current_branch') + def test_single_platform_feature_branch(self, mock_branch, mock_scope): + """Test cache configuration for single platform build on feature branch.""" + mock_branch.return_value = "feature/new-cache" + mock_scope.return_value = "feature-new-cache" + + cache_from, cache_to, repos = build_cache_configuration( + "mongodb-kubernetes" + ) + + # Should read from branch-arch, master-arch, and shared-arch + expected_from = [ + "type=registry,ref=268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/cache/mongodb-kubernetes:feature-new-cache-linux-amd64", + "type=registry,ref=268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/cache/mongodb-kubernetes:master-linux-amd64", + "type=registry,ref=268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/cache/mongodb-kubernetes:shared-linux-amd64" + ] + assert cache_from == expected_from + + # Should write to branch-arch only (not master since we're not on master) + assert len(cache_to) == 1 + assert cache_to[0]["ref"] == "268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/cache/mongodb-kubernetes:feature-new-cache-linux-amd64" + + @patch('scripts.release.build.image_build_process.get_cache_scope') + @patch('scripts.release.build.image_build_process.get_current_branch') + def test_multi_platform_master_branch(self, mock_branch, mock_scope): + """Test cache configuration for multi-platform build on master branch.""" + mock_branch.return_value = "master" + mock_scope.return_value = "master" + + cache_from, cache_to, repos = build_cache_configuration( + "mongodb-kubernetes" + ) + + # Should have cache entries for both architectures + assert len(cache_from) == 4 # 2 platforms * 2 refs each (master-arch + shared-arch) + assert len(cache_to) == 2 # 2 platforms * 1 ref each (master-arch) + + # Check amd64 cache from + assert "type=registry,ref=268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/cache/mongodb-kubernetes:master-linux-amd64" in cache_from + assert "type=registry,ref=268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/cache/mongodb-kubernetes:shared-linux-amd64" in cache_from + + # Check arm64 cache from + assert "type=registry,ref=268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/cache/mongodb-kubernetes:master-linux-arm64" in cache_from + assert "type=registry,ref=268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/cache/mongodb-kubernetes:shared-linux-arm64" in cache_from + + # Check cache to targets + cache_to_refs = [entry["ref"] for entry in cache_to] + assert "268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/cache/mongodb-kubernetes:master-linux-amd64" in cache_to_refs + assert "268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/cache/mongodb-kubernetes:master-linux-arm64" in cache_to_refs + + @patch('scripts.release.build.image_build_process.get_cache_scope') + @patch('scripts.release.build.image_build_process.get_current_branch') + def test_multi_platform_feature_branch(self, mock_branch, mock_scope): + """Test cache configuration for multi-platform build on feature branch.""" + mock_branch.return_value = "feature/new-cache" + mock_scope.return_value = "feature-new-cache" + + cache_from, cache_to, repos = build_cache_configuration( + "mongodb-kubernetes" + ) + + # Should have cache entries for both architectures with 3 precedence levels each + assert len(cache_from) == 6 # 2 platforms * 3 refs each (branch-arch + master-arch + shared-arch) + assert len(cache_to) == 2 # 2 platforms * 1 ref each (branch-arch only) + + # Check amd64 cache from precedence + expected_amd64_from = [ + "type=registry,ref=268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/cache/mongodb-kubernetes:feature-new-cache-linux-amd64", + "type=registry,ref=268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/cache/mongodb-kubernetes:master-linux-amd64", + "type=registry,ref=268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/cache/mongodb-kubernetes:shared-linux-amd64" + ] + for ref in expected_amd64_from: + assert ref in cache_from + + # Check cache to targets (should only write to branch-specific cache) + cache_to_refs = [entry["ref"] for entry in cache_to] + assert "268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/cache/mongodb-kubernetes:feature-new-cache-linux-amd64" in cache_to_refs + assert "268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/cache/mongodb-kubernetes:feature-new-cache-linux-arm64" in cache_to_refs + + # Should not write to master cache when not on master branch + master_refs = [ref for ref in cache_to_refs if ":master-" in ref] + assert len(master_refs) == 0 + + @patch('scripts.release.build.image_build_process.get_cache_scope') + @patch('scripts.release.build.image_build_process.get_current_branch') + def test_patch_build_with_version_id(self, mock_branch, mock_scope): + """Test cache configuration for patch build with version ID.""" + mock_branch.return_value = "feature/new-cache" + mock_scope.return_value = "feature-new-cache-6899b7e3" + + cache_from, cache_to, repos = build_cache_configuration( + "mongodb-kubernetes" + ) + + # Should include version ID in cache refs + assert "type=registry,ref=268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/cache/mongodb-kubernetes:feature-new-cache-6899b7e3-linux-amd64" in cache_from + assert cache_to[0]["ref"] == "268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/cache/mongodb-kubernetes:feature-new-cache-6899b7e3-linux-amd64" + + def test_custom_account_and_region(self): + """Test cache configuration with custom AWS account and region.""" + with patch('scripts.release.build.image_build_process.get_cache_scope') as mock_scope, \ + patch('scripts.release.build.image_build_process.get_current_branch') as mock_branch: + + mock_branch.return_value = "master" + mock_scope.return_value = "master" + + cache_from, cache_to, repos = build_cache_configuration( + "test-image", account_id="123456789012", region="us-west-2" + ) + + # Should use custom account and region + expected_registry = "123456789012.dkr.ecr.us-west-2.amazonaws.com/dev/cache/test-image" + assert cache_from[0].startswith(f"type=registry,ref={expected_registry}") + assert cache_to[0]["ref"].startswith(expected_registry) + + def test_empty_platforms_fallback(self): + """Test cache configuration with empty platforms list.""" + with patch('scripts.release.build.image_build_process.get_cache_scope') as mock_scope, \ + patch('scripts.release.build.image_build_process.get_current_branch') as mock_branch: + + mock_branch.return_value = "master" + mock_scope.return_value = "master" + + cache_from, cache_to, repos = build_cache_configuration( + "test-image" + ) + + # Should fallback to linux-amd64 + assert "linux-amd64" in cache_from[0] + assert "linux-amd64" in cache_to[0]["ref"] + + +class TestEnsureAllCacheRepositories: + """Test cache repository management.""" + + @patch('scripts.release.build.image_build_process.ensure_ecr_cache_repository') + def test_ensure_multiple_repositories(self, mock_ensure): + """Test ensuring multiple cache repositories.""" + image_names = ["mongodb-kubernetes", "init-database", "init-ops-manager"] + + ensure_all_cache_repositories(image_names) + + # Should call ensure_ecr_cache_repository for each image + assert mock_ensure.call_count == 3 + expected_calls = [ + "dev/cache/mongodb-kubernetes", + "dev/cache/init-database", + "dev/cache/init-ops-manager" + ] + actual_calls = [call[0][0] for call in mock_ensure.call_args_list] + assert actual_calls == expected_calls + + @patch('scripts.release.build.image_build_process.ensure_ecr_cache_repository') + def test_ensure_with_custom_region(self, mock_ensure): + """Test ensuring repositories with custom region.""" + image_names = ["test-image"] + + ensure_all_cache_repositories(image_names, region="eu-west-1") + + mock_ensure.assert_called_once_with("dev/cache/test-image", "eu-west-1") + + @patch('scripts.release.build.image_build_process.ensure_ecr_cache_repository') + def test_ensure_empty_list(self, mock_ensure): + """Test ensuring repositories with empty image list.""" + ensure_all_cache_repositories([]) + + mock_ensure.assert_not_called() From fa47edd6eee495211a4cbe5a4998417001858e0c Mon Sep 17 00:00:00 2001 From: Nam Nguyen Date: Fri, 22 Aug 2025 17:33:41 +0200 Subject: [PATCH 16/23] per branch --- scripts/release/branch_detection.py | 27 +-- scripts/release/build/image_build_process.py | 63 +++--- .../release/tests/branch_detection_test.py | 88 ++++---- .../release/tests/image_build_process_test.py | 203 +++--------------- 4 files changed, 116 insertions(+), 265 deletions(-) diff --git a/scripts/release/branch_detection.py b/scripts/release/branch_detection.py index f0b819b4a..1f5e01e62 100644 --- a/scripts/release/branch_detection.py +++ b/scripts/release/branch_detection.py @@ -10,7 +10,7 @@ import subprocess from typing import Optional -from scripts.release.constants import is_running_in_evg, is_evg_patch, get_version_id +from scripts.release.constants import get_version_id, is_evg_patch, is_running_in_evg def get_current_branch() -> Optional[str]: @@ -28,10 +28,7 @@ def get_current_branch() -> Optional[str]: # Local development - use git directly try: result = subprocess.run( - ["git", "rev-parse", "--abbrev-ref", "HEAD"], - capture_output=True, - text=True, - check=True + ["git", "rev-parse", "--abbrev-ref", "HEAD"], capture_output=True, text=True, check=True ) branch = result.stdout.strip() return branch if branch != "HEAD" else "master" @@ -48,27 +45,24 @@ def get_current_branch() -> Optional[str]: ["git", "for-each-ref", "--format=%(refname:short) %(objectname)", "refs/remotes/origin"], capture_output=True, text=True, - check=True + check=True, ) # Get current commit hash current_commit = subprocess.run( - ["git", "rev-parse", "HEAD"], - capture_output=True, - text=True, - check=True + ["git", "rev-parse", "HEAD"], capture_output=True, text=True, check=True ).stdout.strip() # Find branches that point to the current commit, excluding evg-pr-test-* branches - for line in result.stdout.strip().split('\n'): + for line in result.stdout.strip().split("\n"): if not line: continue parts = line.split() if len(parts) >= 2: ref_name, commit_hash = parts[0], parts[1] - if commit_hash == current_commit and not ref_name.startswith('origin/evg-pr-test-'): + if commit_hash == current_commit and not ref_name.startswith("origin/evg-pr-test-"): # Remove 'origin/' prefix - branch = ref_name.replace('origin/', '', 1) + branch = ref_name.replace("origin/", "", 1) return branch except (subprocess.CalledProcessError, FileNotFoundError): @@ -77,10 +71,7 @@ def get_current_branch() -> Optional[str]: # For non-patch builds, try to get the branch from git try: result = subprocess.run( - ["git", "rev-parse", "--abbrev-ref", "HEAD"], - capture_output=True, - text=True, - check=True + ["git", "rev-parse", "--abbrev-ref", "HEAD"], capture_output=True, text=True, check=True ) branch = result.stdout.strip() if branch and branch != "HEAD": @@ -108,6 +99,6 @@ def get_cache_scope() -> str: # Sanitize branch name for use in image tags # Replace invalid characters with hyphens and convert to lowercase sanitized_branch = branch.lower() - sanitized_branch = ''.join(c if c.isalnum() or c in '-_.' else '-' for c in sanitized_branch) + sanitized_branch = "".join(c if c.isalnum() or c in "-_." else "-" for c in sanitized_branch) return sanitized_branch diff --git a/scripts/release/build/image_build_process.py b/scripts/release/build/image_build_process.py index de4f62cdb..0ece52870 100644 --- a/scripts/release/build/image_build_process.py +++ b/scripts/release/build/image_build_process.py @@ -1,6 +1,6 @@ # This file is the new Sonar import base64 -from typing import Dict, List +from typing import Dict, List, Any import boto3 import docker @@ -28,59 +28,57 @@ def ensure_ecr_cache_repository(repository_name: str, region: str = "us-east-1") raise -def build_cache_configuration(base_registry: str): +def build_cache_configuration(base_registry: str) -> tuple[list[Any], dict[str, str]]: """ - Build cache configuration for branch/arch-scoped BuildKit remote cache. + Build cache configuration for branch-scoped BuildKit remote cache. Implements the cache strategy: - Per-image cache repo: …/dev/cache/ - - Per-branch run with read precedence: branch → master → shared - - Write to branch scope, and also to master if on master branch + - Per-branch run with read precedence: branch → master + - Write to branch scope only - Use BuildKit registry cache exporter with mode=max, oci-mediatypes=true, image-manifest=true - :param base_registry: - :return: tuple of (cache_from_list, cache_to_list, repositories_to_ensure) + :param base_registry: Base registry URL for cache (e.g., "268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/cache/mongodb-kubernetes") """ cache_scope = get_cache_scope() - logger.info(f"Building cache configuration for {cache_scope}") - current_branch = get_current_branch() - # Build cache references with read precedence: branch → master → shared + # Build cache references with read precedence: branch → master cache_from_refs = [] - cache_to_refs = [] - # Read precedence: branch-arch → master-arch → shared-arch - branch_arch_ref = f"{base_registry}:{cache_scope}" - master_arch_ref = f"{base_registry}:master" - shared_arch_ref = f"{base_registry}:shared" + # Read precedence: branch → master + branch_ref = f"{base_registry}:{cache_scope}" + master_ref = f"{base_registry}:master" # Add to cache_from in order of precedence if cache_scope != "master": - cache_from_refs.append(f"type=registry,ref={branch_arch_ref}") - cache_from_refs.append(f"type=registry,ref={master_arch_ref}") + cache_from_refs.append({"type": "registry", "ref": branch_ref}) + cache_from_refs.append({"type": "registry", "ref": master_ref}) else: - cache_from_refs.append(f"type=registry,ref={master_arch_ref}") - cache_from_refs.append(f"type=registry,ref={shared_arch_ref}") + cache_from_refs.append({"type": "registry", "ref": master_ref}) - cache_to_refs.append({ + cache_to_refs = { "type": "registry", - "ref": branch_arch_ref, + "ref": branch_ref, "mode": "max", "oci-mediatypes": "true", "image-manifest": "true" - }) + } - if current_branch == "master" and branch_arch_ref != master_arch_ref: - cache_to_refs.append({ - "type": "registry", - "ref": master_arch_ref, - "mode": "max", - "oci-mediatypes": "true", - "image-manifest": "true" - }) + return cache_from_refs, cache_to_refs - return cache_from_refs, cache_to_refs +def ensure_all_cache_repositories(cache_image_names: List[str], region: str = "us-east-1"): + """ + Ensure all cache repositories exist for the given image names. + + This is useful for pre-creating repositories before builds start. + + :param cache_image_names: List of image names (e.g., ["mongodb-kubernetes", "init-database"]) + :param region: AWS region for ECR + """ + for image_name in cache_image_names: + cache_repo_name = f"dev/cache/{image_name}" + ensure_ecr_cache_repository(cache_repo_name, region) def ecr_login_boto3(region: str, account_id: str): @@ -187,7 +185,7 @@ def execute_docker_build( base_registry ) - # ensure_ecr_cache_repository(base_cache_repo) + ensure_ecr_cache_repository(base_cache_repo) logger.info(f"Building image: {tag}") logger.info(f"Platforms: {platforms}") @@ -205,7 +203,6 @@ def execute_docker_build( if len(platforms) > 1: logger.info(f"Multi-platform build for {len(platforms)} architectures") - # Build the image using buildx, builder must be already initialized docker_cmd.buildx.build( context_path=path, file=dockerfile, diff --git a/scripts/release/tests/branch_detection_test.py b/scripts/release/tests/branch_detection_test.py index 711ea34e7..6fad9db4a 100644 --- a/scripts/release/tests/branch_detection_test.py +++ b/scripts/release/tests/branch_detection_test.py @@ -1,23 +1,23 @@ import os import subprocess -from unittest.mock import patch, MagicMock +from unittest.mock import MagicMock, patch import pytest from scripts.release.branch_detection import ( - get_current_branch, get_cache_scope, + get_current_branch, + get_version_id, is_evg_patch, is_running_in_evg, - get_version_id, ) class TestGetCurrentBranch: """Test branch detection logic for different scenarios.""" - @patch('scripts.release.branch_detection.is_running_in_evg') - @patch('subprocess.run') + @patch("scripts.release.branch_detection.is_running_in_evg") + @patch("subprocess.run") def test_local_development_master_branch(self, mock_run, mock_is_evg): """Test local development on master branch.""" mock_is_evg.return_value = False @@ -27,14 +27,11 @@ def test_local_development_master_branch(self, mock_run, mock_is_evg): assert result == "master" mock_run.assert_called_once_with( - ["git", "rev-parse", "--abbrev-ref", "HEAD"], - capture_output=True, - text=True, - check=True + ["git", "rev-parse", "--abbrev-ref", "HEAD"], capture_output=True, text=True, check=True ) - @patch('scripts.release.branch_detection.is_running_in_evg') - @patch('subprocess.run') + @patch("scripts.release.branch_detection.is_running_in_evg") + @patch("subprocess.run") def test_local_development_feature_branch(self, mock_run, mock_is_evg): """Test local development on feature branch.""" mock_is_evg.return_value = False @@ -44,8 +41,8 @@ def test_local_development_feature_branch(self, mock_run, mock_is_evg): assert result == "feature/new-cache" - @patch('scripts.release.branch_detection.is_running_in_evg') - @patch('subprocess.run') + @patch("scripts.release.branch_detection.is_running_in_evg") + @patch("subprocess.run") def test_local_development_detached_head(self, mock_run, mock_is_evg): """Test local development in detached HEAD state.""" mock_is_evg.return_value = False @@ -55,8 +52,8 @@ def test_local_development_detached_head(self, mock_run, mock_is_evg): assert result == "master" # fallback to master - @patch('scripts.release.branch_detection.is_running_in_evg') - @patch('subprocess.run') + @patch("scripts.release.branch_detection.is_running_in_evg") + @patch("subprocess.run") def test_local_development_git_error(self, mock_run, mock_is_evg): """Test local development when git command fails.""" mock_is_evg.return_value = False @@ -66,9 +63,9 @@ def test_local_development_git_error(self, mock_run, mock_is_evg): assert result == "master" # fallback to master - @patch('scripts.release.branch_detection.is_running_in_evg') - @patch('scripts.release.branch_detection.is_evg_patch') - @patch('subprocess.run') + @patch("scripts.release.branch_detection.is_running_in_evg") + @patch("scripts.release.branch_detection.is_evg_patch") + @patch("subprocess.run") def test_evergreen_non_patch_build(self, mock_run, mock_is_patch, mock_is_evg): """Test Evergreen non-patch build.""" mock_is_evg.return_value = True @@ -79,9 +76,9 @@ def test_evergreen_non_patch_build(self, mock_run, mock_is_patch, mock_is_evg): assert result == "master" - @patch('scripts.release.branch_detection.is_running_in_evg') - @patch('scripts.release.branch_detection.is_evg_patch') - @patch('subprocess.run') + @patch("scripts.release.branch_detection.is_running_in_evg") + @patch("scripts.release.branch_detection.is_evg_patch") + @patch("subprocess.run") def test_evergreen_patch_build_branch_detection(self, mock_run, mock_is_patch, mock_is_evg): """Test Evergreen patch build with successful branch detection.""" mock_is_evg.return_value = True @@ -89,17 +86,20 @@ def test_evergreen_patch_build_branch_detection(self, mock_run, mock_is_patch, m # Mock git for-each-ref output mock_run.side_effect = [ - MagicMock(stdout="origin/feature/cache-improvement abc123\norigin/evg-pr-test-123 abc123\norigin/main def456\n", returncode=0), - MagicMock(stdout="abc123\n", returncode=0) + MagicMock( + stdout="origin/feature/cache-improvement abc123\norigin/evg-pr-test-123 abc123\norigin/main def456\n", + returncode=0, + ), + MagicMock(stdout="abc123\n", returncode=0), ] result = get_current_branch() assert result == "feature/cache-improvement" - @patch('scripts.release.branch_detection.is_running_in_evg') - @patch('scripts.release.branch_detection.is_evg_patch') - @patch('subprocess.run') + @patch("scripts.release.branch_detection.is_running_in_evg") + @patch("scripts.release.branch_detection.is_evg_patch") + @patch("subprocess.run") def test_evergreen_patch_build_fallback(self, mock_run, mock_is_patch, mock_is_evg): """Test Evergreen patch build fallback when branch detection fails.""" mock_is_evg.return_value = True @@ -114,9 +114,9 @@ def test_evergreen_patch_build_fallback(self, mock_run, mock_is_patch, mock_is_e class TestGetCacheScope: """Test cache scope generation for different scenarios.""" - @patch('scripts.release.branch_detection.get_current_branch') - @patch('scripts.release.branch_detection.is_evg_patch') - @patch('scripts.release.branch_detection.get_version_id') + @patch("scripts.release.branch_detection.get_current_branch") + @patch("scripts.release.branch_detection.is_evg_patch") + @patch("scripts.release.branch_detection.get_version_id") def test_master_branch_non_patch(self, mock_version_id, mock_is_patch, mock_branch): """Test cache scope for master branch non-patch build.""" mock_branch.return_value = "master" @@ -127,9 +127,9 @@ def test_master_branch_non_patch(self, mock_version_id, mock_is_patch, mock_bran assert result == "master" - @patch('scripts.release.branch_detection.get_current_branch') - @patch('scripts.release.branch_detection.is_evg_patch') - @patch('scripts.release.branch_detection.get_version_id') + @patch("scripts.release.branch_detection.get_current_branch") + @patch("scripts.release.branch_detection.is_evg_patch") + @patch("scripts.release.branch_detection.get_version_id") def test_feature_branch_non_patch(self, mock_version_id, mock_is_patch, mock_branch): """Test cache scope for feature branch non-patch build.""" mock_branch.return_value = "feature/new-cache" @@ -140,9 +140,9 @@ def test_feature_branch_non_patch(self, mock_version_id, mock_is_patch, mock_bra assert result == "feature-new-cache" - @patch('scripts.release.branch_detection.get_current_branch') - @patch('scripts.release.branch_detection.is_evg_patch') - @patch('scripts.release.branch_detection.get_version_id') + @patch("scripts.release.branch_detection.get_current_branch") + @patch("scripts.release.branch_detection.is_evg_patch") + @patch("scripts.release.branch_detection.get_version_id") def test_patch_build_with_version_id(self, mock_version_id, mock_is_patch, mock_branch): """Test cache scope for patch build with version ID.""" mock_branch.return_value = "feature/new-cache" @@ -153,9 +153,9 @@ def test_patch_build_with_version_id(self, mock_version_id, mock_is_patch, mock_ assert result == "feature-new-cache-6899b7e3" - @patch('scripts.release.branch_detection.get_current_branch') - @patch('scripts.release.branch_detection.is_evg_patch') - @patch('scripts.release.branch_detection.get_version_id') + @patch("scripts.release.branch_detection.get_current_branch") + @patch("scripts.release.branch_detection.is_evg_patch") + @patch("scripts.release.branch_detection.get_version_id") def test_patch_build_without_version_id(self, mock_version_id, mock_is_patch, mock_branch): """Test cache scope for patch build without version ID.""" mock_branch.return_value = "feature/new-cache" @@ -166,9 +166,9 @@ def test_patch_build_without_version_id(self, mock_version_id, mock_is_patch, mo assert result == "feature-new-cache" - @patch('scripts.release.branch_detection.get_current_branch') - @patch('scripts.release.branch_detection.is_evg_patch') - @patch('scripts.release.branch_detection.get_version_id') + @patch("scripts.release.branch_detection.get_current_branch") + @patch("scripts.release.branch_detection.is_evg_patch") + @patch("scripts.release.branch_detection.get_version_id") def test_branch_name_sanitization(self, mock_version_id, mock_is_patch, mock_branch): """Test branch name sanitization for cache scope.""" mock_branch.return_value = "Feature/NEW_cache@123" @@ -179,9 +179,9 @@ def test_branch_name_sanitization(self, mock_version_id, mock_is_patch, mock_bra assert result == "feature-new_cache-123" - @patch('scripts.release.branch_detection.get_current_branch') - @patch('scripts.release.branch_detection.is_evg_patch') - @patch('scripts.release.branch_detection.get_version_id') + @patch("scripts.release.branch_detection.get_current_branch") + @patch("scripts.release.branch_detection.is_evg_patch") + @patch("scripts.release.branch_detection.get_version_id") def test_dependabot_branch(self, mock_version_id, mock_is_patch, mock_branch): """Test cache scope for dependabot branch.""" mock_branch.return_value = "dependabot/npm_and_yarn/lodash-4.17.21" diff --git a/scripts/release/tests/image_build_process_test.py b/scripts/release/tests/image_build_process_test.py index 1b766e1ef..23162b8b3 100644 --- a/scripts/release/tests/image_build_process_test.py +++ b/scripts/release/tests/image_build_process_test.py @@ -1,209 +1,72 @@ -from unittest.mock import patch, MagicMock - -import pytest +from unittest.mock import patch from scripts.release.build.image_build_process import ( build_cache_configuration, - ensure_all_cache_repositories, ) class TestBuildCacheConfiguration: """Test cache configuration building for different scenarios.""" - @patch('scripts.release.build.image_build_process.get_cache_scope') - @patch('scripts.release.build.image_build_process.get_current_branch') - def test_single_platform_master_branch(self, mock_branch, mock_scope): - """Test cache configuration for single platform build on master branch.""" + @patch("scripts.release.build.image_build_process.get_cache_scope") + @patch("scripts.release.build.image_build_process.get_current_branch") + def test_master_branch(self, mock_branch, mock_scope): + """Test cache configuration for master branch.""" mock_branch.return_value = "master" mock_scope.return_value = "master" - cache_from, cache_to, repos = build_cache_configuration( - "mongodb-kubernetes" - ) + base_registry = "268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/cache/mongodb-kubernetes" + cache_from, cache_to = build_cache_configuration(base_registry) - # Should read from master-arch and shared-arch + # Should read from master and shared expected_from = [ - "type=registry,ref=268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/cache/mongodb-kubernetes:master-linux-amd64", - "type=registry,ref=268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/cache/mongodb-kubernetes:shared-linux-amd64" + {"type": "registry", "ref": f"{base_registry}:master"}, + {"type": "registry", "ref": f"{base_registry}:shared"}, ] assert cache_from == expected_from - # Should write to master-arch + # Should write to master assert len(cache_to) == 1 - assert cache_to[0]["ref"] == "268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/cache/mongodb-kubernetes:master-linux-amd64" + assert cache_to[0]["ref"] == f"{base_registry}:master" assert cache_to[0]["mode"] == "max" assert cache_to[0]["oci-mediatypes"] == "true" assert cache_to[0]["image-manifest"] == "true" - # Should ensure one repository - assert repos == {"dev/cache/mongodb-kubernetes"} - - @patch('scripts.release.build.image_build_process.get_cache_scope') - @patch('scripts.release.build.image_build_process.get_current_branch') - def test_single_platform_feature_branch(self, mock_branch, mock_scope): - """Test cache configuration for single platform build on feature branch.""" + @patch("scripts.release.build.image_build_process.get_cache_scope") + @patch("scripts.release.build.image_build_process.get_current_branch") + def test_feature_branch(self, mock_branch, mock_scope): + """Test cache configuration for feature branch.""" mock_branch.return_value = "feature/new-cache" mock_scope.return_value = "feature-new-cache" - cache_from, cache_to, repos = build_cache_configuration( - "mongodb-kubernetes" - ) + base_registry = "268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/cache/mongodb-kubernetes" + cache_from, cache_to = build_cache_configuration(base_registry) - # Should read from branch-arch, master-arch, and shared-arch + # Should read from branch, master, and shared expected_from = [ - "type=registry,ref=268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/cache/mongodb-kubernetes:feature-new-cache-linux-amd64", - "type=registry,ref=268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/cache/mongodb-kubernetes:master-linux-amd64", - "type=registry,ref=268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/cache/mongodb-kubernetes:shared-linux-amd64" + {"type": "registry", "ref": f"{base_registry}:feature-new-cache"}, + {"type": "registry", "ref": f"{base_registry}:master"}, + {"type": "registry", "ref": f"{base_registry}:shared"}, ] assert cache_from == expected_from - # Should write to branch-arch only (not master since we're not on master) + # Should write to branch only (not master since we're not on master) assert len(cache_to) == 1 - assert cache_to[0]["ref"] == "268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/cache/mongodb-kubernetes:feature-new-cache-linux-amd64" - - @patch('scripts.release.build.image_build_process.get_cache_scope') - @patch('scripts.release.build.image_build_process.get_current_branch') - def test_multi_platform_master_branch(self, mock_branch, mock_scope): - """Test cache configuration for multi-platform build on master branch.""" - mock_branch.return_value = "master" - mock_scope.return_value = "master" - - cache_from, cache_to, repos = build_cache_configuration( - "mongodb-kubernetes" - ) - - # Should have cache entries for both architectures - assert len(cache_from) == 4 # 2 platforms * 2 refs each (master-arch + shared-arch) - assert len(cache_to) == 2 # 2 platforms * 1 ref each (master-arch) - - # Check amd64 cache from - assert "type=registry,ref=268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/cache/mongodb-kubernetes:master-linux-amd64" in cache_from - assert "type=registry,ref=268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/cache/mongodb-kubernetes:shared-linux-amd64" in cache_from - - # Check arm64 cache from - assert "type=registry,ref=268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/cache/mongodb-kubernetes:master-linux-arm64" in cache_from - assert "type=registry,ref=268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/cache/mongodb-kubernetes:shared-linux-arm64" in cache_from - - # Check cache to targets - cache_to_refs = [entry["ref"] for entry in cache_to] - assert "268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/cache/mongodb-kubernetes:master-linux-amd64" in cache_to_refs - assert "268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/cache/mongodb-kubernetes:master-linux-arm64" in cache_to_refs - - @patch('scripts.release.build.image_build_process.get_cache_scope') - @patch('scripts.release.build.image_build_process.get_current_branch') - def test_multi_platform_feature_branch(self, mock_branch, mock_scope): - """Test cache configuration for multi-platform build on feature branch.""" - mock_branch.return_value = "feature/new-cache" - mock_scope.return_value = "feature-new-cache" - - cache_from, cache_to, repos = build_cache_configuration( - "mongodb-kubernetes" - ) - - # Should have cache entries for both architectures with 3 precedence levels each - assert len(cache_from) == 6 # 2 platforms * 3 refs each (branch-arch + master-arch + shared-arch) - assert len(cache_to) == 2 # 2 platforms * 1 ref each (branch-arch only) - - # Check amd64 cache from precedence - expected_amd64_from = [ - "type=registry,ref=268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/cache/mongodb-kubernetes:feature-new-cache-linux-amd64", - "type=registry,ref=268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/cache/mongodb-kubernetes:master-linux-amd64", - "type=registry,ref=268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/cache/mongodb-kubernetes:shared-linux-amd64" - ] - for ref in expected_amd64_from: - assert ref in cache_from - - # Check cache to targets (should only write to branch-specific cache) - cache_to_refs = [entry["ref"] for entry in cache_to] - assert "268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/cache/mongodb-kubernetes:feature-new-cache-linux-amd64" in cache_to_refs - assert "268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/cache/mongodb-kubernetes:feature-new-cache-linux-arm64" in cache_to_refs - - # Should not write to master cache when not on master branch - master_refs = [ref for ref in cache_to_refs if ":master-" in ref] - assert len(master_refs) == 0 + assert cache_to[0]["ref"] == f"{base_registry}:feature-new-cache" + assert cache_to[0]["mode"] == "max" + assert cache_to[0]["oci-mediatypes"] == "true" + assert cache_to[0]["image-manifest"] == "true" - @patch('scripts.release.build.image_build_process.get_cache_scope') - @patch('scripts.release.build.image_build_process.get_current_branch') + @patch("scripts.release.build.image_build_process.get_cache_scope") + @patch("scripts.release.build.image_build_process.get_current_branch") def test_patch_build_with_version_id(self, mock_branch, mock_scope): """Test cache configuration for patch build with version ID.""" mock_branch.return_value = "feature/new-cache" mock_scope.return_value = "feature-new-cache-6899b7e3" - cache_from, cache_to, repos = build_cache_configuration( - "mongodb-kubernetes" - ) + base_registry = "268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/cache/mongodb-kubernetes" + cache_from, cache_to = build_cache_configuration(base_registry) # Should include version ID in cache refs - assert "type=registry,ref=268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/cache/mongodb-kubernetes:feature-new-cache-6899b7e3-linux-amd64" in cache_from - assert cache_to[0]["ref"] == "268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/cache/mongodb-kubernetes:feature-new-cache-6899b7e3-linux-amd64" - - def test_custom_account_and_region(self): - """Test cache configuration with custom AWS account and region.""" - with patch('scripts.release.build.image_build_process.get_cache_scope') as mock_scope, \ - patch('scripts.release.build.image_build_process.get_current_branch') as mock_branch: - - mock_branch.return_value = "master" - mock_scope.return_value = "master" - - cache_from, cache_to, repos = build_cache_configuration( - "test-image", account_id="123456789012", region="us-west-2" - ) - - # Should use custom account and region - expected_registry = "123456789012.dkr.ecr.us-west-2.amazonaws.com/dev/cache/test-image" - assert cache_from[0].startswith(f"type=registry,ref={expected_registry}") - assert cache_to[0]["ref"].startswith(expected_registry) - - def test_empty_platforms_fallback(self): - """Test cache configuration with empty platforms list.""" - with patch('scripts.release.build.image_build_process.get_cache_scope') as mock_scope, \ - patch('scripts.release.build.image_build_process.get_current_branch') as mock_branch: - - mock_branch.return_value = "master" - mock_scope.return_value = "master" - - cache_from, cache_to, repos = build_cache_configuration( - "test-image" - ) - - # Should fallback to linux-amd64 - assert "linux-amd64" in cache_from[0] - assert "linux-amd64" in cache_to[0]["ref"] - - -class TestEnsureAllCacheRepositories: - """Test cache repository management.""" - - @patch('scripts.release.build.image_build_process.ensure_ecr_cache_repository') - def test_ensure_multiple_repositories(self, mock_ensure): - """Test ensuring multiple cache repositories.""" - image_names = ["mongodb-kubernetes", "init-database", "init-ops-manager"] - - ensure_all_cache_repositories(image_names) - - # Should call ensure_ecr_cache_repository for each image - assert mock_ensure.call_count == 3 - expected_calls = [ - "dev/cache/mongodb-kubernetes", - "dev/cache/init-database", - "dev/cache/init-ops-manager" - ] - actual_calls = [call[0][0] for call in mock_ensure.call_args_list] - assert actual_calls == expected_calls - - @patch('scripts.release.build.image_build_process.ensure_ecr_cache_repository') - def test_ensure_with_custom_region(self, mock_ensure): - """Test ensuring repositories with custom region.""" - image_names = ["test-image"] - - ensure_all_cache_repositories(image_names, region="eu-west-1") - - mock_ensure.assert_called_once_with("dev/cache/test-image", "eu-west-1") - - @patch('scripts.release.build.image_build_process.ensure_ecr_cache_repository') - def test_ensure_empty_list(self, mock_ensure): - """Test ensuring repositories with empty image list.""" - ensure_all_cache_repositories([]) - - mock_ensure.assert_not_called() + assert cache_from[0]["ref"] == f"{base_registry}:feature-new-cache-6899b7e3" + assert cache_to[0]["ref"] == f"{base_registry}:feature-new-cache-6899b7e3" From 4594db5a4da6e3cebb82cbaed484a0b4087ee3ba Mon Sep 17 00:00:00 2001 From: Nam Nguyen Date: Fri, 22 Aug 2025 17:34:47 +0200 Subject: [PATCH 17/23] per branch --- scripts/release/branch_detection.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/scripts/release/branch_detection.py b/scripts/release/branch_detection.py index 1f5e01e62..155aab86e 100644 --- a/scripts/release/branch_detection.py +++ b/scripts/release/branch_detection.py @@ -19,8 +19,7 @@ def get_current_branch() -> Optional[str]: In Evergreen CI: - For patch builds: tries to detect the original branch (not evg-pr-test-* branches) - - For master builds: returns the actual branch name - - Falls back to 'master' if detection fails + - For master builds: returns master :return: branch name or 'master' as fallback """ From 3599dfb945efd6bc45bd5e05a8291ab6a4543a8e Mon Sep 17 00:00:00 2001 From: Nam Nguyen Date: Fri, 22 Aug 2025 18:03:16 +0200 Subject: [PATCH 18/23] per branch --- scripts/release/branch_detection.py | 74 ++++------------------------- 1 file changed, 9 insertions(+), 65 deletions(-) diff --git a/scripts/release/branch_detection.py b/scripts/release/branch_detection.py index 155aab86e..8beabdd44 100644 --- a/scripts/release/branch_detection.py +++ b/scripts/release/branch_detection.py @@ -6,81 +6,25 @@ Evergreen patch builds, Evergreen regular builds). """ -import os import subprocess from typing import Optional -from scripts.release.constants import get_version_id, is_evg_patch, is_running_in_evg - def get_current_branch() -> Optional[str]: """ Detect the current git branch for cache scoping. - In Evergreen CI: - - For patch builds: tries to detect the original branch (not evg-pr-test-* branches) - - For master builds: returns master - :return: branch name or 'master' as fallback """ - if not is_running_in_evg(): - # Local development - use git directly - try: - result = subprocess.run( - ["git", "rev-parse", "--abbrev-ref", "HEAD"], capture_output=True, text=True, check=True - ) - branch = result.stdout.strip() - return branch if branch != "HEAD" else "master" - except (subprocess.CalledProcessError, FileNotFoundError): - return "master" - - # Running in Evergreen - if is_evg_patch(): - # For patch builds, try to detect the original branch - # This logic is based on scripts/evergreen/precommit_bump.sh - try: - # Get all remote refs with their commit hashes - result = subprocess.run( - ["git", "for-each-ref", "--format=%(refname:short) %(objectname)", "refs/remotes/origin"], - capture_output=True, - text=True, - check=True, - ) - - # Get current commit hash - current_commit = subprocess.run( - ["git", "rev-parse", "HEAD"], capture_output=True, text=True, check=True - ).stdout.strip() - - # Find branches that point to the current commit, excluding evg-pr-test-* branches - for line in result.stdout.strip().split("\n"): - if not line: - continue - parts = line.split() - if len(parts) >= 2: - ref_name, commit_hash = parts[0], parts[1] - if commit_hash == current_commit and not ref_name.startswith("origin/evg-pr-test-"): - # Remove 'origin/' prefix - branch = ref_name.replace("origin/", "", 1) - return branch - - except (subprocess.CalledProcessError, FileNotFoundError): - pass - else: - # For non-patch builds, try to get the branch from git - try: - result = subprocess.run( - ["git", "rev-parse", "--abbrev-ref", "HEAD"], capture_output=True, text=True, check=True - ) - branch = result.stdout.strip() - if branch and branch != "HEAD": - return branch - except (subprocess.CalledProcessError, FileNotFoundError): - pass - - # Fallback to master - return "master" - + try: + result = subprocess.run( + ["git", "rev-parse", "--abbrev-ref", "HEAD"], capture_output=True, text=True, check=True + ) + branch = result.stdout.strip() + if branch and branch != "HEAD": + return branch + except (subprocess.CalledProcessError, FileNotFoundError): + return "master" def get_cache_scope() -> str: """ From 25f0a80f62e18b9214c0622bc1bb0bedf140a010 Mon Sep 17 00:00:00 2001 From: Nam Nguyen Date: Fri, 22 Aug 2025 18:15:26 +0200 Subject: [PATCH 19/23] make things cleaner --- scripts/release/branch_detection.py | 5 +- scripts/release/build/image_build_process.py | 2 +- .../release/tests/branch_detection_test.py | 195 +++--------------- .../release/tests/image_build_process_test.py | 53 ++--- 4 files changed, 61 insertions(+), 194 deletions(-) diff --git a/scripts/release/branch_detection.py b/scripts/release/branch_detection.py index 8beabdd44..665e62add 100644 --- a/scripts/release/branch_detection.py +++ b/scripts/release/branch_detection.py @@ -21,10 +21,13 @@ def get_current_branch() -> Optional[str]: ["git", "rev-parse", "--abbrev-ref", "HEAD"], capture_output=True, text=True, check=True ) branch = result.stdout.strip() - if branch and branch != "HEAD": + if branch == "HEAD": + return "master" + if branch != "": return branch except (subprocess.CalledProcessError, FileNotFoundError): return "master" + return "master" def get_cache_scope() -> str: """ diff --git a/scripts/release/build/image_build_process.py b/scripts/release/build/image_build_process.py index 0ece52870..a8c2a2cd3 100644 --- a/scripts/release/build/image_build_process.py +++ b/scripts/release/build/image_build_process.py @@ -185,7 +185,7 @@ def execute_docker_build( base_registry ) - ensure_ecr_cache_repository(base_cache_repo) + # ensure_ecr_cache_repository(base_cache_repo) logger.info(f"Building image: {tag}") logger.info(f"Platforms: {platforms}") diff --git a/scripts/release/tests/branch_detection_test.py b/scripts/release/tests/branch_detection_test.py index 6fad9db4a..f55993a50 100644 --- a/scripts/release/tests/branch_detection_test.py +++ b/scripts/release/tests/branch_detection_test.py @@ -1,110 +1,76 @@ -import os import subprocess from unittest.mock import MagicMock, patch -import pytest - from scripts.release.branch_detection import ( get_cache_scope, get_current_branch, - get_version_id, - is_evg_patch, - is_running_in_evg, ) class TestGetCurrentBranch: """Test branch detection logic for different scenarios.""" - @patch("scripts.release.branch_detection.is_running_in_evg") @patch("subprocess.run") - def test_local_development_master_branch(self, mock_run, mock_is_evg): - """Test local development on master branch.""" - mock_is_evg.return_value = False + def test_master_branch(self, mock_run): + """Test detection of master branch.""" mock_run.return_value = MagicMock(stdout="master\n", returncode=0) result = get_current_branch() assert result == "master" mock_run.assert_called_once_with( - ["git", "rev-parse", "--abbrev-ref", "HEAD"], capture_output=True, text=True, check=True + ["git", "rev-parse", "--abbrev-ref", "HEAD"], + capture_output=True, + text=True, + check=True ) - @patch("scripts.release.branch_detection.is_running_in_evg") @patch("subprocess.run") - def test_local_development_feature_branch(self, mock_run, mock_is_evg): - """Test local development on feature branch.""" - mock_is_evg.return_value = False + def test_feature_branch(self, mock_run): + """Test detection of feature branch.""" mock_run.return_value = MagicMock(stdout="feature/new-cache\n", returncode=0) result = get_current_branch() assert result == "feature/new-cache" + mock_run.assert_called_once_with( + ["git", "rev-parse", "--abbrev-ref", "HEAD"], + capture_output=True, + text=True, + check=True + ) - @patch("scripts.release.branch_detection.is_running_in_evg") @patch("subprocess.run") - def test_local_development_detached_head(self, mock_run, mock_is_evg): - """Test local development in detached HEAD state.""" - mock_is_evg.return_value = False + def test_detached_head(self, mock_run): + """Test detection when in detached HEAD state.""" mock_run.return_value = MagicMock(stdout="HEAD\n", returncode=0) result = get_current_branch() assert result == "master" # fallback to master - @patch("scripts.release.branch_detection.is_running_in_evg") @patch("subprocess.run") - def test_local_development_git_error(self, mock_run, mock_is_evg): - """Test local development when git command fails.""" - mock_is_evg.return_value = False - mock_run.side_effect = subprocess.CalledProcessError(1, "git") + def test_empty_output(self, mock_run): + """Test detection when git returns empty output.""" + mock_run.return_value = MagicMock(stdout="\n", returncode=0) result = get_current_branch() assert result == "master" # fallback to master - @patch("scripts.release.branch_detection.is_running_in_evg") - @patch("scripts.release.branch_detection.is_evg_patch") - @patch("subprocess.run") - def test_evergreen_non_patch_build(self, mock_run, mock_is_patch, mock_is_evg): - """Test Evergreen non-patch build.""" - mock_is_evg.return_value = True - mock_is_patch.return_value = False - mock_run.return_value = MagicMock(stdout="master\n", returncode=0) - - result = get_current_branch() - - assert result == "master" - - @patch("scripts.release.branch_detection.is_running_in_evg") - @patch("scripts.release.branch_detection.is_evg_patch") @patch("subprocess.run") - def test_evergreen_patch_build_branch_detection(self, mock_run, mock_is_patch, mock_is_evg): - """Test Evergreen patch build with successful branch detection.""" - mock_is_evg.return_value = True - mock_is_patch.return_value = True - - # Mock git for-each-ref output - mock_run.side_effect = [ - MagicMock( - stdout="origin/feature/cache-improvement abc123\norigin/evg-pr-test-123 abc123\norigin/main def456\n", - returncode=0, - ), - MagicMock(stdout="abc123\n", returncode=0), - ] + def test_git_command_fails(self, mock_run): + """Test fallback when git command fails.""" + mock_run.side_effect = subprocess.CalledProcessError(1, "git") result = get_current_branch() - assert result == "feature/cache-improvement" + assert result == "master" # fallback to master - @patch("scripts.release.branch_detection.is_running_in_evg") - @patch("scripts.release.branch_detection.is_evg_patch") @patch("subprocess.run") - def test_evergreen_patch_build_fallback(self, mock_run, mock_is_patch, mock_is_evg): - """Test Evergreen patch build fallback when branch detection fails.""" - mock_is_evg.return_value = True - mock_is_patch.return_value = True - mock_run.side_effect = subprocess.CalledProcessError(1, "git") + def test_git_not_found(self, mock_run): + """Test fallback when git is not found.""" + mock_run.side_effect = FileNotFoundError("git not found") result = get_current_branch() @@ -114,124 +80,31 @@ def test_evergreen_patch_build_fallback(self, mock_run, mock_is_patch, mock_is_e class TestGetCacheScope: """Test cache scope generation for different scenarios.""" - @patch("scripts.release.branch_detection.get_current_branch") - @patch("scripts.release.branch_detection.is_evg_patch") - @patch("scripts.release.branch_detection.get_version_id") - def test_master_branch_non_patch(self, mock_version_id, mock_is_patch, mock_branch): - """Test cache scope for master branch non-patch build.""" - mock_branch.return_value = "master" - mock_is_patch.return_value = False - mock_version_id.return_value = None - - result = get_cache_scope() - - assert result == "master" @patch("scripts.release.branch_detection.get_current_branch") - @patch("scripts.release.branch_detection.is_evg_patch") - @patch("scripts.release.branch_detection.get_version_id") - def test_feature_branch_non_patch(self, mock_version_id, mock_is_patch, mock_branch): - """Test cache scope for feature branch non-patch build.""" + def test_feature_branch(self, mock_branch): + """Test cache scope for feature branch.""" mock_branch.return_value = "feature/new-cache" - mock_is_patch.return_value = False - mock_version_id.return_value = None result = get_cache_scope() assert result == "feature-new-cache" @patch("scripts.release.branch_detection.get_current_branch") - @patch("scripts.release.branch_detection.is_evg_patch") - @patch("scripts.release.branch_detection.get_version_id") - def test_patch_build_with_version_id(self, mock_version_id, mock_is_patch, mock_branch): - """Test cache scope for patch build with version ID.""" - mock_branch.return_value = "feature/new-cache" - mock_is_patch.return_value = True - mock_version_id.return_value = "6899b7e35bfaee00077db986" - - result = get_cache_scope() - - assert result == "feature-new-cache-6899b7e3" - - @patch("scripts.release.branch_detection.get_current_branch") - @patch("scripts.release.branch_detection.is_evg_patch") - @patch("scripts.release.branch_detection.get_version_id") - def test_patch_build_without_version_id(self, mock_version_id, mock_is_patch, mock_branch): - """Test cache scope for patch build without version ID.""" - mock_branch.return_value = "feature/new-cache" - mock_is_patch.return_value = True - mock_version_id.return_value = None - - result = get_cache_scope() - - assert result == "feature-new-cache" - - @patch("scripts.release.branch_detection.get_current_branch") - @patch("scripts.release.branch_detection.is_evg_patch") - @patch("scripts.release.branch_detection.get_version_id") - def test_branch_name_sanitization(self, mock_version_id, mock_is_patch, mock_branch): + def test_branch_name_sanitization(self, mock_branch): """Test branch name sanitization for cache scope.""" mock_branch.return_value = "Feature/NEW_cache@123" - mock_is_patch.return_value = False - mock_version_id.return_value = None result = get_cache_scope() assert result == "feature-new_cache-123" + @patch("scripts.release.branch_detection.get_current_branch") - @patch("scripts.release.branch_detection.is_evg_patch") - @patch("scripts.release.branch_detection.get_version_id") - def test_dependabot_branch(self, mock_version_id, mock_is_patch, mock_branch): - """Test cache scope for dependabot branch.""" - mock_branch.return_value = "dependabot/npm_and_yarn/lodash-4.17.21" - mock_is_patch.return_value = False - mock_version_id.return_value = None + def test_complex_branch_name(self, mock_branch): + """Test cache scope for complex branch name with special characters.""" + mock_branch.return_value = "user/feature-123_test.branch" result = get_cache_scope() - assert result == "dependabot-npm_and_yarn-lodash-4.17.21" - - -class TestEnvironmentDetection: - """Test environment detection functions.""" - - def test_is_evg_patch_true(self): - """Test is_evg_patch returns True when is_patch is 'true'.""" - with patch.dict(os.environ, {"is_patch": "true"}): - assert is_evg_patch() is True - - def test_is_evg_patch_false(self): - """Test is_evg_patch returns False when is_patch is 'false'.""" - with patch.dict(os.environ, {"is_patch": "false"}): - assert is_evg_patch() is False - - def test_is_evg_patch_default(self): - """Test is_evg_patch returns False when is_patch is not set.""" - with patch.dict(os.environ, {}, clear=True): - assert is_evg_patch() is False - - def test_is_running_in_evg_true(self): - """Test is_running_in_evg returns True when RUNNING_IN_EVG is 'true'.""" - with patch.dict(os.environ, {"RUNNING_IN_EVG": "true"}): - assert is_running_in_evg() is True - - def test_is_running_in_evg_false(self): - """Test is_running_in_evg returns False when RUNNING_IN_EVG is 'false'.""" - with patch.dict(os.environ, {"RUNNING_IN_EVG": "false"}): - assert is_running_in_evg() is False - - def test_is_running_in_evg_default(self): - """Test is_running_in_evg returns False when RUNNING_IN_EVG is not set.""" - with patch.dict(os.environ, {}, clear=True): - assert is_running_in_evg() is False - - def test_get_version_id_set(self): - """Test get_version_id returns value when version_id is set.""" - with patch.dict(os.environ, {"version_id": "6899b7e35bfaee00077db986"}): - assert get_version_id() == "6899b7e35bfaee00077db986" - - def test_get_version_id_not_set(self): - """Test get_version_id returns None when version_id is not set.""" - with patch.dict(os.environ, {}, clear=True): - assert get_version_id() is None + assert result == "user-feature-123_test.branch" diff --git a/scripts/release/tests/image_build_process_test.py b/scripts/release/tests/image_build_process_test.py index 23162b8b3..efa6802d4 100644 --- a/scripts/release/tests/image_build_process_test.py +++ b/scripts/release/tests/image_build_process_test.py @@ -2,6 +2,7 @@ from scripts.release.build.image_build_process import ( build_cache_configuration, + ensure_all_cache_repositories, ) @@ -9,64 +10,54 @@ class TestBuildCacheConfiguration: """Test cache configuration building for different scenarios.""" @patch("scripts.release.build.image_build_process.get_cache_scope") - @patch("scripts.release.build.image_build_process.get_current_branch") - def test_master_branch(self, mock_branch, mock_scope): + def test_master_branch(self, mock_scope): """Test cache configuration for master branch.""" - mock_branch.return_value = "master" mock_scope.return_value = "master" base_registry = "268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/cache/mongodb-kubernetes" cache_from, cache_to = build_cache_configuration(base_registry) - # Should read from master and shared + # Should read from master only expected_from = [ - {"type": "registry", "ref": f"{base_registry}:master"}, - {"type": "registry", "ref": f"{base_registry}:shared"}, + {"type": "registry", "ref": f"{base_registry}:master"} ] assert cache_from == expected_from # Should write to master - assert len(cache_to) == 1 - assert cache_to[0]["ref"] == f"{base_registry}:master" - assert cache_to[0]["mode"] == "max" - assert cache_to[0]["oci-mediatypes"] == "true" - assert cache_to[0]["image-manifest"] == "true" + assert cache_to["ref"] == f"{base_registry}:master" + assert cache_to["mode"] == "max" + assert cache_to["oci-mediatypes"] == "true" + assert cache_to["image-manifest"] == "true" @patch("scripts.release.build.image_build_process.get_cache_scope") - @patch("scripts.release.build.image_build_process.get_current_branch") - def test_feature_branch(self, mock_branch, mock_scope): + def test_feature_branch(self, mock_scope): """Test cache configuration for feature branch.""" - mock_branch.return_value = "feature/new-cache" mock_scope.return_value = "feature-new-cache" base_registry = "268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/cache/mongodb-kubernetes" cache_from, cache_to = build_cache_configuration(base_registry) - # Should read from branch, master, and shared + # Should read from branch and master expected_from = [ {"type": "registry", "ref": f"{base_registry}:feature-new-cache"}, - {"type": "registry", "ref": f"{base_registry}:master"}, - {"type": "registry", "ref": f"{base_registry}:shared"}, + {"type": "registry", "ref": f"{base_registry}:master"} ] assert cache_from == expected_from - # Should write to branch only (not master since we're not on master) - assert len(cache_to) == 1 - assert cache_to[0]["ref"] == f"{base_registry}:feature-new-cache" - assert cache_to[0]["mode"] == "max" - assert cache_to[0]["oci-mediatypes"] == "true" - assert cache_to[0]["image-manifest"] == "true" + # Should write to branch only + assert cache_to["ref"] == f"{base_registry}:feature-new-cache" + assert cache_to["mode"] == "max" + assert cache_to["oci-mediatypes"] == "true" + assert cache_to["image-manifest"] == "true" @patch("scripts.release.build.image_build_process.get_cache_scope") - @patch("scripts.release.build.image_build_process.get_current_branch") - def test_patch_build_with_version_id(self, mock_branch, mock_scope): - """Test cache configuration for patch build with version ID.""" - mock_branch.return_value = "feature/new-cache" - mock_scope.return_value = "feature-new-cache-6899b7e3" + def test_sanitized_branch_name(self, mock_scope): + """Test cache configuration with sanitized branch name.""" + mock_scope.return_value = "feature-new-cache-123" base_registry = "268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/cache/mongodb-kubernetes" cache_from, cache_to = build_cache_configuration(base_registry) - # Should include version ID in cache refs - assert cache_from[0]["ref"] == f"{base_registry}:feature-new-cache-6899b7e3" - assert cache_to[0]["ref"] == f"{base_registry}:feature-new-cache-6899b7e3" + # Should include sanitized branch name in cache refs + assert cache_from[0]["ref"] == f"{base_registry}:feature-new-cache-123" + assert cache_to["ref"] == f"{base_registry}:feature-new-cache-123" From 7fb509c9d657b83b61da1758658406e0c29c085f Mon Sep 17 00:00:00 2001 From: Nam Nguyen Date: Fri, 22 Aug 2025 18:30:07 +0200 Subject: [PATCH 20/23] only do caching on ecr --- scripts/release/build/image_build_process.py | 41 ++++++++++++-------- 1 file changed, 25 insertions(+), 16 deletions(-) diff --git a/scripts/release/build/image_build_process.py b/scripts/release/build/image_build_process.py index 7cab9a501..9054cbf22 100644 --- a/scripts/release/build/image_build_process.py +++ b/scripts/release/build/image_build_process.py @@ -170,22 +170,7 @@ def execute_docker_build( # Convert build args to the format expected by python_on_whales build_args = {k: str(v) for k, v in args.items()} - registry_name = tag.split(":")[0] if ":" in tag else tag - # e.g., "268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/mongodb-kubernetes" -> "mongodb-kubernetes" - cache_image_name = registry_name.split("/")[-1] - - # Base cache repository name - base_cache_repo = f"dev/cache/{cache_image_name}" - - # Build branch/arch-scoped cache configuration - base_registry = f"268558157000.dkr.ecr.us-east-1.amazonaws.com/{base_cache_repo}" - - # TODO CLOUDP-335471: use env variables to configure AWS region and account ID - cache_from_refs, cache_to_refs = build_cache_configuration( - base_registry - ) - - # ensure_ecr_cache_repository(base_cache_repo) + cache_from_refs, cache_to_refs = _build_cache(tags) logger.info(f"Building image: {tags}") logger.info(f"Platforms: {platforms}") @@ -223,3 +208,27 @@ def execute_docker_build( except Exception as e: logger.error(f"Failed to build image {tags}: {e}") raise RuntimeError(f"Failed to build image {tags}: {str(e)}") + + +def _build_cache(tags): + # Filter tags to only include ECR ones (containing ".dkr.ecr.") + ecr_tags = [tag for tag in tags if ".dkr.ecr." in tag] + if not ecr_tags: + return [], {} + primary_tag = ecr_tags[0] + # Extract the repository URL without tag (e.g., "268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/mongodb-kubernetes:1.0.0" -> "268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/mongodb-kubernetes") + repository_url = primary_tag.split(":")[0] if ":" in primary_tag else primary_tag + # Extract just the image name from the repository URL (e.g., "268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/mongodb-kubernetes" -> "mongodb-kubernetes") + cache_image_name = repository_url.split("/")[-1] + # Base cache repository name + base_cache_repo = f"dev/cache/{cache_image_name}" + # Build branch/arch-scoped cache configuration + base_registry = f"268558157000.dkr.ecr.us-east-1.amazonaws.com/{base_cache_repo}" + + ensure_ecr_cache_repository(base_cache_repo) + + # TODO CLOUDP-335471: use env variables to configure AWS region and account ID + cache_from_refs, cache_to_refs = build_cache_configuration( + base_registry + ) + return cache_from_refs, cache_to_refs From 2b5dc48575cc42c910696eae9f3826d9010b78e2 Mon Sep 17 00:00:00 2001 From: Nam Nguyen Date: Mon, 1 Sep 2025 14:15:41 +0200 Subject: [PATCH 21/23] remove leftover --- scripts/release/build/image_build_process.py | 14 -------------- scripts/release/tests/image_build_process_test.py | 1 - 2 files changed, 15 deletions(-) diff --git a/scripts/release/build/image_build_process.py b/scripts/release/build/image_build_process.py index 9054cbf22..dcf486c49 100644 --- a/scripts/release/build/image_build_process.py +++ b/scripts/release/build/image_build_process.py @@ -67,20 +67,6 @@ def build_cache_configuration(base_registry: str) -> tuple[list[Any], dict[str, return cache_from_refs, cache_to_refs -def ensure_all_cache_repositories(cache_image_names: List[str], region: str = "us-east-1"): - """ - Ensure all cache repositories exist for the given image names. - - This is useful for pre-creating repositories before builds start. - - :param cache_image_names: List of image names (e.g., ["mongodb-kubernetes", "init-database"]) - :param region: AWS region for ECR - """ - for image_name in cache_image_names: - cache_repo_name = f"dev/cache/{image_name}" - ensure_ecr_cache_repository(cache_repo_name, region) - - def ecr_login_boto3(region: str, account_id: str): """ Fetches an auth token from ECR via boto3 and logs diff --git a/scripts/release/tests/image_build_process_test.py b/scripts/release/tests/image_build_process_test.py index 0b464eaf9..08e2a5112 100644 --- a/scripts/release/tests/image_build_process_test.py +++ b/scripts/release/tests/image_build_process_test.py @@ -2,7 +2,6 @@ from scripts.release.build.image_build_process import ( build_cache_configuration, - ensure_all_cache_repositories, ) From 7723a13bab379c944f937f5e5801bf7a824f42b3 Mon Sep 17 00:00:00 2001 From: Nam Nguyen Date: Mon, 1 Sep 2025 19:57:06 +0200 Subject: [PATCH 22/23] fix evg-pr-parsing logic --- scripts/release/branch_detection.py | 37 +++++++-- .../release/tests/branch_detection_test.py | 78 +++++++++++++------ 2 files changed, 85 insertions(+), 30 deletions(-) diff --git a/scripts/release/branch_detection.py b/scripts/release/branch_detection.py index f463de1ae..40255eb8c 100644 --- a/scripts/release/branch_detection.py +++ b/scripts/release/branch_detection.py @@ -14,19 +14,40 @@ def get_current_branch() -> Optional[str]: """ Detect the current git branch for cache scoping. + In CI environments like Evergreen, git rev-parse --abbrev-ref HEAD returns + auto-generated branch names like evg-pr-testing-. This function finds the original branch name by + looking for remote branches that point to the current commit. + :return: branch name or 'master' as fallback """ try: - result = subprocess.run( - ["git", "rev-parse", "--abbrev-ref", "HEAD"], capture_output=True, text=True, check=True + # Find the original branch (same commit, but not the evg-pr-test-* branch which evg creates) + current_commit_result = subprocess.run( + ["git", "rev-parse", "HEAD"], capture_output=True, text=True, check=True + ) + current_commit = current_commit_result.stdout.strip() + + # Get all remote branches with their commit hashes + remote_branches_result = subprocess.run( + ["git", "for-each-ref", "--format=%(refname:short) %(objectname)", "refs/remotes/origin"], + capture_output=True, text=True, check=True ) - branch = result.stdout.strip() - if branch == "HEAD": - return "master" - if branch != "": - return branch + + # Find branches that point to the current commit, excluding auto-generated CI branches + for line in remote_branches_result.stdout.strip().split('\n'): + if not line: + continue + parts = line.split() + if len(parts) >= 2: + branch_name, commit_hash = parts[0], parts[1] + if commit_hash == current_commit and not "evg-pr-test" in branch_name: + # Remove 'origin/' prefix + original_branch = branch_name.replace('origin/', '', 1) + if original_branch: + return original_branch except (subprocess.CalledProcessError, FileNotFoundError): - return "master" + return 'master' + return "master" diff --git a/scripts/release/tests/branch_detection_test.py b/scripts/release/tests/branch_detection_test.py index b52e62456..84c5247cd 100644 --- a/scripts/release/tests/branch_detection_test.py +++ b/scripts/release/tests/branch_detection_test.py @@ -11,50 +11,84 @@ class TestGetCurrentBranch: """Test branch detection logic for different scenarios.""" @patch("subprocess.run") - def test_master_branch(self, mock_run): - """Test detection of master branch.""" - mock_run.return_value = MagicMock(stdout="master\n", returncode=0) + def test_ci_environment_with_original_branch(self, mock_run): + """Test detection of original branch in CI environment like Evergreen.""" + # Mock the sequence of git commands + def side_effect(cmd, **kwargs): + if cmd == ["git", "rev-parse", "HEAD"]: + return MagicMock(stdout="4cecea664abcd1234567890\n", returncode=0) + elif cmd == ["git", "for-each-ref", "--format=%(refname:short) %(objectname)", "refs/remotes/origin"]: + return MagicMock(stdout="origin/master 1234567890abcdef\norigin/add-caching 4cecea664abcd1234567890\norigin/evg-pr-test-12345 4cecea664abcd1234567890\n", returncode=0) + elif cmd == ["git", "rev-parse", "--abbrev-ref", "HEAD"]: + return MagicMock(stdout="evg-pr-test-12345\n", returncode=0) + return MagicMock(stdout="", returncode=1) + + mock_run.side_effect = side_effect result = get_current_branch() - assert result == "master" - mock_run.assert_called_once_with( - ["git", "rev-parse", "--abbrev-ref", "HEAD"], capture_output=True, text=True, check=True - ) + assert result == "add-caching" @patch("subprocess.run") - def test_feature_branch(self, mock_run): - """Test detection of feature branch.""" - mock_run.return_value = MagicMock(stdout="feature/new-cache\n", returncode=0) + def test_master_branch_fallback(self, mock_run): + """Test detection of master branch using fallback method.""" + # Mock the sequence where sophisticated method fails but fallback works + def side_effect(cmd, **kwargs): + if cmd == ["git", "rev-parse", "HEAD"]: + return MagicMock(stdout="4cecea664abcd1234567890\n", returncode=0) + elif cmd == ["git", "for-each-ref", "--format=%(refname:short) %(objectname)", "refs/remotes/origin"]: + raise subprocess.CalledProcessError(1, "git") # This fails, triggering fallback + elif cmd == ["git", "rev-parse", "--abbrev-ref", "HEAD"]: + return MagicMock(stdout="master\n", returncode=0) + return MagicMock(stdout="", returncode=1) + + mock_run.side_effect = side_effect result = get_current_branch() - assert result == "feature/new-cache" - mock_run.assert_called_once_with( - ["git", "rev-parse", "--abbrev-ref", "HEAD"], capture_output=True, text=True, check=True - ) + assert result == "master" @patch("subprocess.run") - def test_detached_head(self, mock_run): - """Test detection when in detached HEAD state.""" - mock_run.return_value = MagicMock(stdout="HEAD\n", returncode=0) + def test_detached_head_fallback(self, mock_run): + """Test detection when in detached HEAD state using fallback.""" + # Mock the sequence where sophisticated method fails and fallback returns HEAD + def side_effect(cmd, **kwargs): + if cmd == ["git", "rev-parse", "HEAD"]: + return MagicMock(stdout="4cecea664abcd1234567890\n", returncode=0) + elif cmd == ["git", "for-each-ref", "--format=%(refname:short) %(objectname)", "refs/remotes/origin"]: + raise subprocess.CalledProcessError(1, "git") # This fails, triggering fallback + elif cmd == ["git", "rev-parse", "--abbrev-ref", "HEAD"]: + return MagicMock(stdout="HEAD\n", returncode=0) + return MagicMock(stdout="", returncode=1) + + mock_run.side_effect = side_effect result = get_current_branch() assert result == "master" # fallback to master @patch("subprocess.run") - def test_empty_output(self, mock_run): - """Test detection when git returns empty output.""" - mock_run.return_value = MagicMock(stdout="\n", returncode=0) + def test_ci_branch_filtered_out_in_fallback(self, mock_run): + """Test that CI auto-generated branches are filtered out in fallback.""" + # Mock the sequence where sophisticated method fails and fallback returns CI branch + def side_effect(cmd, **kwargs): + if cmd == ["git", "rev-parse", "HEAD"]: + return MagicMock(stdout="4cecea664abcd1234567890\n", returncode=0) + elif cmd == ["git", "for-each-ref", "--format=%(refname:short) %(objectname)", "refs/remotes/origin"]: + raise subprocess.CalledProcessError(1, "git") # This fails, triggering fallback + elif cmd == ["git", "rev-parse", "--abbrev-ref", "HEAD"]: + return MagicMock(stdout="evg-pr-test-12345\n", returncode=0) + return MagicMock(stdout="", returncode=1) + + mock_run.side_effect = side_effect result = get_current_branch() - assert result == "master" # fallback to master + assert result == "master" # fallback to master when CI branch is detected @patch("subprocess.run") def test_git_command_fails(self, mock_run): - """Test fallback when git command fails.""" + """Test fallback when all git commands fail.""" mock_run.side_effect = subprocess.CalledProcessError(1, "git") result = get_current_branch() From d7c9cc1d55bc71733ed2553b9a1e7c716cfc0e15 Mon Sep 17 00:00:00 2001 From: Nam Nguyen Date: Tue, 2 Sep 2025 11:54:51 +0200 Subject: [PATCH 23/23] fix linter --- scripts/release/branch_detection.py | 14 +++++++------- scripts/release/tests/branch_detection_test.py | 9 ++++++++- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/scripts/release/branch_detection.py b/scripts/release/branch_detection.py index 40255eb8c..bf37a180e 100644 --- a/scripts/release/branch_detection.py +++ b/scripts/release/branch_detection.py @@ -22,19 +22,19 @@ def get_current_branch() -> Optional[str]: """ try: # Find the original branch (same commit, but not the evg-pr-test-* branch which evg creates) - current_commit_result = subprocess.run( - ["git", "rev-parse", "HEAD"], capture_output=True, text=True, check=True - ) + current_commit_result = subprocess.run(["git", "rev-parse", "HEAD"], capture_output=True, text=True, check=True) current_commit = current_commit_result.stdout.strip() # Get all remote branches with their commit hashes remote_branches_result = subprocess.run( ["git", "for-each-ref", "--format=%(refname:short) %(objectname)", "refs/remotes/origin"], - capture_output=True, text=True, check=True + capture_output=True, + text=True, + check=True, ) # Find branches that point to the current commit, excluding auto-generated CI branches - for line in remote_branches_result.stdout.strip().split('\n'): + for line in remote_branches_result.stdout.strip().split("\n"): if not line: continue parts = line.split() @@ -42,11 +42,11 @@ def get_current_branch() -> Optional[str]: branch_name, commit_hash = parts[0], parts[1] if commit_hash == current_commit and not "evg-pr-test" in branch_name: # Remove 'origin/' prefix - original_branch = branch_name.replace('origin/', '', 1) + original_branch = branch_name.replace("origin/", "", 1) if original_branch: return original_branch except (subprocess.CalledProcessError, FileNotFoundError): - return 'master' + return "master" return "master" diff --git a/scripts/release/tests/branch_detection_test.py b/scripts/release/tests/branch_detection_test.py index 84c5247cd..0b2ae2b69 100644 --- a/scripts/release/tests/branch_detection_test.py +++ b/scripts/release/tests/branch_detection_test.py @@ -13,12 +13,16 @@ class TestGetCurrentBranch: @patch("subprocess.run") def test_ci_environment_with_original_branch(self, mock_run): """Test detection of original branch in CI environment like Evergreen.""" + # Mock the sequence of git commands def side_effect(cmd, **kwargs): if cmd == ["git", "rev-parse", "HEAD"]: return MagicMock(stdout="4cecea664abcd1234567890\n", returncode=0) elif cmd == ["git", "for-each-ref", "--format=%(refname:short) %(objectname)", "refs/remotes/origin"]: - return MagicMock(stdout="origin/master 1234567890abcdef\norigin/add-caching 4cecea664abcd1234567890\norigin/evg-pr-test-12345 4cecea664abcd1234567890\n", returncode=0) + return MagicMock( + stdout="origin/master 1234567890abcdef\norigin/add-caching 4cecea664abcd1234567890\norigin/evg-pr-test-12345 4cecea664abcd1234567890\n", + returncode=0, + ) elif cmd == ["git", "rev-parse", "--abbrev-ref", "HEAD"]: return MagicMock(stdout="evg-pr-test-12345\n", returncode=0) return MagicMock(stdout="", returncode=1) @@ -32,6 +36,7 @@ def side_effect(cmd, **kwargs): @patch("subprocess.run") def test_master_branch_fallback(self, mock_run): """Test detection of master branch using fallback method.""" + # Mock the sequence where sophisticated method fails but fallback works def side_effect(cmd, **kwargs): if cmd == ["git", "rev-parse", "HEAD"]: @@ -51,6 +56,7 @@ def side_effect(cmd, **kwargs): @patch("subprocess.run") def test_detached_head_fallback(self, mock_run): """Test detection when in detached HEAD state using fallback.""" + # Mock the sequence where sophisticated method fails and fallback returns HEAD def side_effect(cmd, **kwargs): if cmd == ["git", "rev-parse", "HEAD"]: @@ -70,6 +76,7 @@ def side_effect(cmd, **kwargs): @patch("subprocess.run") def test_ci_branch_filtered_out_in_fallback(self, mock_run): """Test that CI auto-generated branches are filtered out in fallback.""" + # Mock the sequence where sophisticated method fails and fallback returns CI branch def side_effect(cmd, **kwargs): if cmd == ["git", "rev-parse", "HEAD"]: