diff --git a/backend/python/plugins/azuredevops/azuredevops/models.py b/backend/python/plugins/azuredevops/azuredevops/models.py index 27c1124296e..73f4d0764a8 100644 --- a/backend/python/plugins/azuredevops/azuredevops/models.py +++ b/backend/python/plugins/azuredevops/azuredevops/models.py @@ -37,6 +37,9 @@ class GitRepositoryConfig(ScopeConfig): refdiff: Optional[RefDiffOptions] deployment_pattern: Optional[re.Pattern] production_pattern: Optional[re.Pattern] + # Optional pattern with capture group to extract environment name from job/stage names + # Example: r'(?:deploy|predeploy)[_-](.+?)(?:[_-](?:helm|terraform))?$' extracts 'xxxx-prod' from 'deploy_xxxx-prod_helm' + environment_pattern: Optional[re.Pattern] class GitRepository(ToolScope, table=True): @@ -146,3 +149,6 @@ def __str__(self) -> str: finish_time: Optional[datetime.datetime] state: JobState result: Optional[JobResult] + identifier: Optional[str] + type: Optional[str] + parent_id: Optional[str] = Field(source='/parentId') diff --git a/backend/python/plugins/azuredevops/azuredevops/streams/builds.py b/backend/python/plugins/azuredevops/azuredevops/streams/builds.py index 5230bffa34e..8bf91d7974c 100644 --- a/backend/python/plugins/azuredevops/azuredevops/streams/builds.py +++ b/backend/python/plugins/azuredevops/azuredevops/streams/builds.py @@ -72,10 +72,16 @@ def convert(self, b: Build, ctx: Context): if ctx.scope_config.deployment_pattern and ctx.scope_config.deployment_pattern.search(b.name): type = devops.CICDType.DEPLOYMENT - environment = devops.CICDEnvironment.PRODUCTION - if ctx.scope_config.production_pattern is not None and ctx.scope_config.production_pattern.search( - b.name) is None: - environment = None + # Determine if this is a production environment + # Match production_pattern against pipeline name + environment = None + if ctx.scope_config.production_pattern is not None: + if ctx.scope_config.production_pattern.search(b.name): + environment = devops.CICDEnvironment.PRODUCTION + else: + # No production_pattern configured - default to PRODUCTION for deployments + if type == devops.CICDType.DEPLOYMENT: + environment = devops.CICDEnvironment.PRODUCTION if b.finish_time: duration_sec = abs(b.finish_time.timestamp() - b.start_time.timestamp()) diff --git a/backend/python/plugins/azuredevops/azuredevops/streams/jobs.py b/backend/python/plugins/azuredevops/azuredevops/streams/jobs.py index 7d2cd99fecf..416b4696bdd 100644 --- a/backend/python/plugins/azuredevops/azuredevops/streams/jobs.py +++ b/backend/python/plugins/azuredevops/azuredevops/streams/jobs.py @@ -14,7 +14,7 @@ # limitations under the License. from http import HTTPStatus -from typing import Iterable +from typing import Iterable, Optional import pydevlake.domain_layer.devops as devops from azuredevops.api import AzureDevOpsAPI @@ -24,6 +24,31 @@ from pydevlake.api import APIException +def extract_environment_name(name: str, identifier: Optional[str], context: Context) -> Optional[str]: + """ + Extract environment name from job/stage name or identifier using environment_pattern. + + The environment_pattern should contain a capture group to extract the environment name. + For example: r'(?:deploy|predeploy)[_-](.+?)(?:[_-](?:helm|terraform))?$' + This would extract 'xxxx-prod' from 'deploy_xxxx-prod_helm' + """ + if not context.scope_config.environment_pattern: + return None + + # Try to match against the name first + match = context.scope_config.environment_pattern.search(name) + if match and match.groups(): + return match.group(1) + + # If no match on name and identifier is available, try identifier + if identifier: + match = context.scope_config.environment_pattern.search(identifier) + if match and match.groups(): + return match.group(1) + + return None + + class Jobs(Substream): tool_model = Job domain_types = [DomainType.CICD] @@ -48,7 +73,8 @@ def collect(self, state, context, parent: Build) -> Iterable[tuple[object, dict] if response.status == HTTPStatus.NO_CONTENT: return for raw_job in response.json["records"]: - if raw_job["type"] == "Job": + # Collect both Job and Stage records to support environment detection from stages + if raw_job["type"] in ("Job", "Stage"): raw_job["build_id"] = parent.domain_id() raw_job["x_request_url"] = response.get_url_with_query_string() raw_job["x_request_input"] = { @@ -87,10 +113,26 @@ def convert(self, j: Job, ctx: Context) -> Iterable[devops.CICDPipeline]: type = devops.CICDType.BUILD if ctx.scope_config.deployment_pattern and ctx.scope_config.deployment_pattern.search(j.name): type = devops.CICDType.DEPLOYMENT - environment = devops.CICDEnvironment.PRODUCTION - if ctx.scope_config.production_pattern is not None and ctx.scope_config.production_pattern.search( - j.name) is None: - environment = None + + # Extract environment name using the new environment_pattern if configured + extracted_env_name = extract_environment_name(j.name, j.identifier, ctx) + + # Determine if this is a production environment + # Priority: 1) Use extracted environment name with production_pattern + # 2) Fall back to matching production_pattern against job name + environment = None + if ctx.scope_config.production_pattern is not None: + # If we extracted an environment name, use it for production matching + if extracted_env_name: + if ctx.scope_config.production_pattern.search(extracted_env_name): + environment = devops.CICDEnvironment.PRODUCTION + # Fall back to matching against job name + elif ctx.scope_config.production_pattern.search(j.name): + environment = devops.CICDEnvironment.PRODUCTION + else: + # No production_pattern configured - default to PRODUCTION for deployments + if type == devops.CICDType.DEPLOYMENT: + environment = devops.CICDEnvironment.PRODUCTION if j.finish_time: duration_sec = abs(j.finish_time.timestamp() - j.start_time.timestamp()) diff --git a/backend/python/plugins/azuredevops/tests/streams_test.py b/backend/python/plugins/azuredevops/tests/streams_test.py index 7e390573826..a5a65d9a5d9 100644 --- a/backend/python/plugins/azuredevops/tests/streams_test.py +++ b/backend/python/plugins/azuredevops/tests/streams_test.py @@ -360,3 +360,189 @@ def test_pull_request_commits_stream(): ) assert_stream_convert(AzureDevOpsPlugin, 'gitpullrequestcommits', raw, expected) + + +@pytest.fixture +def context_with_environment_pattern(): + """Context with environment_pattern configured to extract environment names from job names.""" + return ( + ContextBuilder(AzureDevOpsPlugin()) + .with_connection(token='token') + .with_scope_config( + deployment_pattern='deploy', + production_pattern='prod', + # Pattern to extract environment name from job names like 'deploy_xxxx-prod_helm' + environment_pattern=r'(?:deploy|predeploy)[_-](.+?)(?:[_-](?:helm|terraform))?$' + ) + .with_scope('johndoe/test-repo', url='https://github.com/johndoe/test-repo') + .build() + ) + + +def test_jobs_stream_with_environment_pattern(context_with_environment_pattern): + """Test that environment_pattern extracts environment name and uses it for production matching.""" + raw = { + 'previousAttempts': [], + 'id': 'cfa20e98-6997-523c-4233-f0a7302c929f', + 'parentId': '9ecf18fe-987d-5811-7c63-300aecae35da', + 'type': 'Job', + 'name': 'deploy_xxxx-prod_helm', # environment name 'xxxx-prod' should be extracted + 'build_id': 'azuredevops:Build:1:12', + 'start_time': '2023-02-25T06:22:36.8066667Z', + 'finish_time': '2023-02-25T06:22:43.2333333Z', + 'currentOperation': None, + 'percentComplete': None, + 'state': 'completed', + 'result': 'succeeded', + 'resultCode': None, + 'changeId': 18, + 'lastModified': '0001-01-01T00:00:00', + 'workerName': 'Hosted Agent', + 'queueId': 9, + 'order': 1, + 'details': None, + 'errorCount': 0, + 'warningCount': 0, + 'url': None, + 'log': { + 'id': 10, + 'type': 'Container', + 'url': 'https://dev.azure.com/johndoe/7a3fd40e-2aed-4fac-bac9-511bf1a70206/_apis/build/builds/12/logs/10' + }, + 'task': None, + 'attempt': 1, + 'identifier': 'deploy_xxxx-prod_helm.__default' + } + + expected = devops.CICDTask( + id='cfa20e98-6997-523c-4233-f0a7302c929f', + name='deploy_xxxx-prod_helm', + pipeline_id='azuredevops:Build:1:12', + status=devops.CICDStatus.DONE, + original_status='Completed', + original_result='Succeeded', + created_date='2023-02-25T06:22:36.8066667Z', + started_date='2023-02-25T06:22:36.8066667Z', + finished_date='2023-02-25T06:22:43.2333333Z', + result=devops.CICDResult.SUCCESS, + type=devops.CICDType.DEPLOYMENT, + duration_sec=6.426667213439941, + environment=devops.CICDEnvironment.PRODUCTION, # Should match because 'xxxx-prod' contains 'prod' + cicd_scope_id=context_with_environment_pattern.scope.domain_id() + ) + assert_stream_convert(AzureDevOpsPlugin, 'jobs', raw, expected, context_with_environment_pattern) + + +def test_jobs_stream_with_environment_pattern_non_prod(context_with_environment_pattern): + """Test that non-prod environments are correctly identified.""" + raw = { + 'previousAttempts': [], + 'id': 'cfa20e98-6997-523c-4233-f0a7302c929f', + 'parentId': '9ecf18fe-987d-5811-7c63-300aecae35da', + 'type': 'Job', + 'name': 'deploy_xxxx-dev_helm', # environment name 'xxxx-dev' should be extracted, not prod + 'build_id': 'azuredevops:Build:1:12', + 'start_time': '2023-02-25T06:22:36.8066667Z', + 'finish_time': '2023-02-25T06:22:43.2333333Z', + 'currentOperation': None, + 'percentComplete': None, + 'state': 'completed', + 'result': 'succeeded', + 'resultCode': None, + 'changeId': 18, + 'lastModified': '0001-01-01T00:00:00', + 'workerName': 'Hosted Agent', + 'queueId': 9, + 'order': 1, + 'details': None, + 'errorCount': 0, + 'warningCount': 0, + 'url': None, + 'log': { + 'id': 10, + 'type': 'Container', + 'url': 'https://dev.azure.com/johndoe/7a3fd40e-2aed-4fac-bac9-511bf1a70206/_apis/build/builds/12/logs/10' + }, + 'task': None, + 'attempt': 1, + 'identifier': 'deploy_xxxx-dev_helm.__default' + } + + expected = devops.CICDTask( + id='cfa20e98-6997-523c-4233-f0a7302c929f', + name='deploy_xxxx-dev_helm', + pipeline_id='azuredevops:Build:1:12', + status=devops.CICDStatus.DONE, + original_status='Completed', + original_result='Succeeded', + created_date='2023-02-25T06:22:36.8066667Z', + started_date='2023-02-25T06:22:36.8066667Z', + finished_date='2023-02-25T06:22:43.2333333Z', + result=devops.CICDResult.SUCCESS, + type=devops.CICDType.DEPLOYMENT, + duration_sec=6.426667213439941, + environment=None, # Should be None because 'xxxx-dev' does not contain 'prod' + cicd_scope_id=context_with_environment_pattern.scope.domain_id() + ) + assert_stream_convert(AzureDevOpsPlugin, 'jobs', raw, expected, context_with_environment_pattern) + + +def test_stage_record_collected(): + """Test that Stage records are also collected (not just Job records).""" + context = ( + ContextBuilder(AzureDevOpsPlugin()) + .with_connection(token='token') + .with_scope_config( + deployment_pattern='deploy', + production_pattern='prod' + ) + .with_scope('johndoe/test-repo', url='https://github.com/johndoe/test-repo') + .build() + ) + + raw = { + 'previousAttempts': [], + 'id': 'stage-id-123', + 'parentId': None, + 'type': 'Stage', # This is a Stage record + 'name': 'deploy_prod_stage', + 'build_id': 'azuredevops:Build:1:12', + 'start_time': '2023-02-25T06:22:36.8066667Z', + 'finish_time': '2023-02-25T06:22:43.2333333Z', + 'currentOperation': None, + 'percentComplete': None, + 'state': 'completed', + 'result': 'succeeded', + 'resultCode': None, + 'changeId': 18, + 'lastModified': '0001-01-01T00:00:00', + 'workerName': None, + 'queueId': None, + 'order': 1, + 'details': None, + 'errorCount': 0, + 'warningCount': 0, + 'url': None, + 'log': None, + 'task': None, + 'attempt': 1, + 'identifier': 'deploy_prod_stage' + } + + expected = devops.CICDTask( + id='stage-id-123', + name='deploy_prod_stage', + pipeline_id='azuredevops:Build:1:12', + status=devops.CICDStatus.DONE, + original_status='Completed', + original_result='Succeeded', + created_date='2023-02-25T06:22:36.8066667Z', + started_date='2023-02-25T06:22:36.8066667Z', + finished_date='2023-02-25T06:22:43.2333333Z', + result=devops.CICDResult.SUCCESS, + type=devops.CICDType.DEPLOYMENT, + duration_sec=6.426667213439941, + environment=devops.CICDEnvironment.PRODUCTION, + cicd_scope_id=context.scope.domain_id() + ) + assert_stream_convert(AzureDevOpsPlugin, 'jobs', raw, expected, context)