Merge pull request #604 from atlanhq/APP-5600

Aryamanz29 · web-flow · commit 732b23b2c25f · 2025-04-21T23:21:55.000+05:30
APP-5600 : Enhance Python SDK to support workflow-run search by status and interval
diff --git a/pyatlan/client/workflow.py b/pyatlan/client/workflow.py
@@ -24,7 +24,17 @@
 )
 from pyatlan.errors import ErrorCode
 from pyatlan.model.enums import AtlanWorkflowPhase, WorkflowPackage
-from pyatlan.model.search import Bool, NestedQuery, Prefix, Query, Regexp, Term
+from pyatlan.model.search import (
+    Bool,
+    Exists,
+    NestedQuery,
+    Prefix,
+    Query,
+    Range,
+    Regexp,
+    Term,
+    Terms,
+)
 from pyatlan.model.workflow import (
     ReRunRequest,
     ScheduleQueriesSearchRequest,
@@ -149,6 +159,51 @@ def find_run_by_id(self, id: str) -> Optional[WorkflowSearchResult]:
         response = self._find_runs(query, size=1)
         return results[0] if (results := response.hits and response.hits.hits) else None
 
+    @validate_arguments
+    def find_runs_by_status_and_time_range(
+        self,
+        status: List[AtlanWorkflowPhase],
+        started_at: Optional[str] = None,
+        finished_at: Optional[str] = None,
+    ) -> List[WorkflowSearchResult]:
+        """
+        Find workflows by status and optional time filters on startedAt and/or finishedAt.
+
+        :param status: list of the workflow statuses to filter
+        :param started_at: (optional) lower bound on 'status.startedAt' (e.g 'now-2h')
+        :param finished_at: (optional) lower bound on 'status.finishedAt' (e.g 'now-1h')
+        :returns: list of workflows matching the filters
+        :raises ValidationError: if inputs are invalid
+        :raises AtlanError: on any API communication issue
+        """
+        time_filters = []
+
+        if started_at:
+            time_filters.append(Range(field="status.startedAt", gte=started_at))
+        if finished_at:
+            time_filters.append(Range(field="status.finishedAt", gte=finished_at))
+
+        run_lookup_query = Bool(
+            must=[
+                NestedQuery(
+                    query=Terms(
+                        field="metadata.labels.workflows.argoproj.io/phase.keyword",
+                        values=[s.value for s in status],
+                    ),
+                    path="metadata",
+                ),
+                *time_filters,
+                NestedQuery(
+                    query=Exists(field="metadata.labels.workflows.argoproj.io/creator"),
+                    path="metadata",
+                ),
+            ],
+        )
+
+        run_lookup_results = self._find_runs(run_lookup_query)
+
+        return run_lookup_results.hits and run_lookup_results.hits.hits or []
+
     @validate_arguments
     def _find_latest_run(self, workflow_name: str) -> Optional[WorkflowSearchResult]:
         """
diff --git a/tests/integration/test_workflow_client.py b/tests/integration/test_workflow_client.py
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: Apache-2.0
 # Copyright 2024 Atlan Pte. Ltd.
 import time
+from datetime import datetime, timedelta, timezone
 from typing import Generator
 
 import pytest
@@ -171,6 +172,25 @@ def test_workflow_get_runs_and_stop(client: AtlanClient, workflow: WorkflowRespo
         and workflow_run.source.status.phase == AtlanWorkflowPhase.FAILED
     )
 
+    # Test find run by status and time range
+    runs_status = client.workflow.find_runs_by_status_and_time_range(
+        [AtlanWorkflowPhase.FAILED], started_at="now-1h"
+    )
+    assert runs_status
+    workflow_run_status = runs_status[0]
+    start_time = workflow_run_status.source.status.startedAt  # type: ignore
+    start_datetime = datetime.strptime(start_time, "%Y-%m-%dT%H:%M:%SZ")  # type: ignore
+    start_datetime = start_datetime.replace(tzinfo=timezone.utc)
+    current_time = datetime.now(timezone.utc)
+    time_diff = current_time - start_datetime
+    assert (
+        workflow_run_status
+        and workflow_run_status.source
+        and workflow_run_status.source.status
+        and workflow_run_status.source.status.phase == AtlanWorkflowPhase.FAILED
+        and time_diff < timedelta(hours=1)
+    )
+
 
 def test_workflow_get_all_scheduled_runs(
     client: AtlanClient, workflow: WorkflowResponse
diff --git a/tests/unit/test_workflow_client.py b/tests/unit/test_workflow_client.py
@@ -272,6 +272,23 @@ def test_find_by_type(client: WorkflowClient, mock_api_caller):
     )
 
 
+def test_find_runs_by_status_and_time_range(client: WorkflowClient, mock_api_caller):
+    raw_json = {"shards": {"dummy": None}, "hits": {"total": {"dummy": None}}}
+    mock_api_caller._call_api.return_value = raw_json
+
+    status = [AtlanWorkflowPhase.SUCCESS, AtlanWorkflowPhase.FAILED]
+    started_at = "now-2h"
+    finished_at = "now-1h"
+
+    assert (
+        client.find_runs_by_status_and_time_range(status, started_at, finished_at) == []
+    )
+    mock_api_caller._call_api.assert_called_once()
+    assert isinstance(
+        mock_api_caller._call_api.call_args.kwargs["request_obj"], WorkflowSearchRequest
+    )
+
+
 def test_find_by_id(
     client: WorkflowClient, search_response: WorkflowSearchResponse, mock_api_caller
 ):