feat(mcp): Add list_cloud_sync_jobs tool with pagination support (#902)

devin-ai-integration[bot] · aldogonzalez8 · web-flow · commit a4fa90115fdc · 2025-12-10T21:13:33.000Z
Co-authored-by: Devin AI &lt;158243242+devin-ai-integration[bot]@users.noreply.github.com&gt;
Co-authored-by: aldo.gonzalez@airbyte.io &lt;aldo.gonzalez@airbyte.io&gt;
diff --git a/airbyte/_util/api_util.py b/airbyte/_util/api_util.py
@@ -47,6 +47,10 @@
 JOB_WAIT_INTERVAL_SECS = 2.0
 JOB_WAIT_TIMEOUT_SECS_DEFAULT = 60 * 60  # 1 hour
 
+# Job ordering constants for list_jobs API
+JOB_ORDER_BY_CREATED_AT_DESC = "createdAt|DESC"
+JOB_ORDER_BY_CREATED_AT_ASC = "createdAt|ASC"
+
 
 def status_ok(status_code: int) -> bool:
     """Check if a status code is OK."""
@@ -412,8 +416,24 @@ def get_job_logs(
     api_root: str,
     client_id: SecretString,
     client_secret: SecretString,
+    offset: int | None = None,
+    order_by: str | None = None,
 ) -> list[models.JobResponse]:
-    """Get a job's logs."""
+    """Get a list of jobs for a connection.
+
+    Args:
+        workspace_id: The workspace ID.
+        connection_id: The connection ID.
+        limit: Maximum number of jobs to return. Defaults to 100.
+        api_root: The API root URL.
+        client_id: The client ID for authentication.
+        client_secret: The client secret for authentication.
+        offset: Number of jobs to skip from the beginning. Defaults to None (0).
+        order_by: Field and direction to order by (e.g., "createdAt|DESC"). Defaults to None.
+
+    Returns:
+        A list of JobResponse objects.
+    """
     airbyte_instance = get_airbyte_server_instance(
         client_id=client_id,
         client_secret=client_secret,
@@ -424,6 +444,8 @@ def get_job_logs(
             workspace_ids=[workspace_id],
             connection_id=connection_id,
             limit=limit,
+            offset=offset,
+            order_by=order_by,
         ),
     )
     if status_ok(response.status_code) and response.jobs_response:
diff --git a/airbyte/cloud/connections.py b/airbyte/cloud/connections.py
@@ -208,14 +208,38 @@ def __repr__(self) -> str:
     def get_previous_sync_logs(
         self,
         *,
-        limit: int = 10,
+        limit: int = 20,
+        offset: int | None = None,
+        from_tail: bool = True,
     ) -> list[SyncResult]:
-        """Get the previous sync logs for a connection."""
+        """Get previous sync jobs for a connection with pagination support.
+
+        Returns SyncResult objects containing job metadata (job_id, status, bytes_synced,
+        rows_synced, start_time). Full log text can be fetched lazily via
+        `SyncResult.get_full_log_text()`.
+
+        Args:
+            limit: Maximum number of jobs to return. Defaults to 20.
+            offset: Number of jobs to skip from the beginning. Defaults to None (0).
+            from_tail: If True, returns jobs ordered newest-first (createdAt DESC).
+                If False, returns jobs ordered oldest-first (createdAt ASC).
+                Defaults to True.
+
+        Returns:
+            A list of SyncResult objects representing the sync jobs.
+        """
+        order_by = (
+            api_util.JOB_ORDER_BY_CREATED_AT_DESC
+            if from_tail
+            else api_util.JOB_ORDER_BY_CREATED_AT_ASC
+        )
         sync_logs: list[JobResponse] = api_util.get_job_logs(
             connection_id=self.connection_id,
             api_root=self.workspace.api_root,
             workspace_id=self.workspace.workspace_id,
             limit=limit,
+            offset=offset,
+            order_by=order_by,
             client_id=self.workspace.client_id,
             client_secret=self.workspace.client_secret,
         )
diff --git a/airbyte/mcp/cloud_ops.py b/airbyte/mcp/cloud_ops.py
@@ -181,6 +181,36 @@ class LogReadResult(BaseModel):
     """Total number of log lines available, shows if any lines were missed due to the limit."""
 
 
+class SyncJobResult(BaseModel):
+    """Information about a sync job."""
+
+    job_id: int
+    """The job ID."""
+    status: str
+    """The job status (e.g., 'succeeded', 'failed', 'running', 'pending')."""
+    bytes_synced: int
+    """Number of bytes synced in this job."""
+    records_synced: int
+    """Number of records synced in this job."""
+    start_time: str
+    """ISO 8601 timestamp of when the job started."""
+    job_url: str
+    """URL to view the job in Airbyte Cloud."""
+
+
+class SyncJobListResult(BaseModel):
+    """Result of listing sync jobs with pagination support."""
+
+    jobs: list[SyncJobResult]
+    """List of sync jobs."""
+    jobs_count: int
+    """Number of jobs returned in this response."""
+    jobs_offset: int
+    """Offset used for this request (0 if not specified)."""
+    from_tail: bool
+    """Whether jobs are ordered newest-first (True) or oldest-first (False)."""
+
+
 def _get_cloud_workspace(workspace_id: str | None = None) -> CloudWorkspace:
     """Get an authenticated CloudWorkspace.
 
@@ -601,6 +631,111 @@ def get_cloud_sync_status(
     return result
 
 
+@mcp_tool(
+    domain="cloud",
+    read_only=True,
+    idempotent=True,
+    open_world=True,
+    extra_help_text=CLOUD_AUTH_TIP_TEXT,
+)
+def list_cloud_sync_jobs(
+    connection_id: Annotated[
+        str,
+        Field(description="The ID of the Airbyte Cloud connection."),
+    ],
+    *,
+    workspace_id: Annotated[
+        str | None,
+        Field(
+            description=WORKSPACE_ID_TIP_TEXT,
+            default=None,
+        ),
+    ],
+    max_jobs: Annotated[
+        int,
+        Field(
+            description=(
+                "Maximum number of jobs to return. "
+                "Defaults to 20 if not specified. "
+                "Maximum allowed value is 500."
+            ),
+            default=20,
+        ),
+    ],
+    from_tail: Annotated[
+        bool | None,
+        Field(
+            description=(
+                "When True, jobs are ordered newest-first (createdAt DESC). "
+                "When False, jobs are ordered oldest-first (createdAt ASC). "
+                "Defaults to True if `jobs_offset` is not specified. "
+                "Cannot combine `from_tail=True` with `jobs_offset`."
+            ),
+            default=None,
+        ),
+    ],
+    jobs_offset: Annotated[
+        int | None,
+        Field(
+            description=(
+                "Number of jobs to skip from the beginning. "
+                "Cannot be combined with `from_tail=True`."
+            ),
+            default=None,
+        ),
+    ],
+) -> SyncJobListResult:
+    """List sync jobs for a connection with pagination support.
+
+    This tool allows you to retrieve a list of sync jobs for a connection,
+    with control over ordering and pagination. By default, jobs are returned
+    newest-first (from_tail=True).
+    """
+    # Validate that jobs_offset and from_tail are not both set
+    if jobs_offset is not None and from_tail is True:
+        raise PyAirbyteInputError(
+            message="Cannot specify both 'jobs_offset' and 'from_tail=True' parameters.",
+            context={"jobs_offset": jobs_offset, "from_tail": from_tail},
+        )
+
+    # Default to from_tail=True if neither is specified
+    if from_tail is None and jobs_offset is None:
+        from_tail = True
+    elif from_tail is None:
+        from_tail = False
+
+    workspace: CloudWorkspace = _get_cloud_workspace(workspace_id)
+    connection = workspace.get_connection(connection_id=connection_id)
+
+    # Cap at 500 to avoid overloading agent context
+    effective_limit = min(max_jobs, 500) if max_jobs > 0 else 20
+
+    sync_results = connection.get_previous_sync_logs(
+        limit=effective_limit,
+        offset=jobs_offset,
+        from_tail=from_tail,
+    )
+
+    jobs = [
+        SyncJobResult(
+            job_id=sync_result.job_id,
+            status=str(sync_result.get_job_status()),
+            bytes_synced=sync_result.bytes_synced,
+            records_synced=sync_result.records_synced,
+            start_time=sync_result.start_time.isoformat(),
+            job_url=sync_result.job_url,
+        )
+        for sync_result in sync_results
+    ]
+
+    return SyncJobListResult(
+        jobs=jobs,
+        jobs_count=len(jobs),
+        jobs_offset=jobs_offset or 0,
+        from_tail=from_tail,
+    )
+
+
 @mcp_tool(
     domain="cloud",
     read_only=True,
diff --git a/tests/integration_tests/cloud/test_cloud_workspaces.py b/tests/integration_tests/cloud/test_cloud_workspaces.py
@@ -6,6 +6,8 @@
 
 from __future__ import annotations
 
+import pytest
+
 import airbyte as ab
 from airbyte.cloud import CloudWorkspace
 from airbyte.cloud.connections import CloudConnection
@@ -60,6 +62,10 @@ def test_deploy_dummy_source(
     cloud_workspace.permanently_delete_source(cloud_source)
 
 
+@pytest.mark.skip(
+    "Test is being flaky. TODO: Fix upstream Cloud API issue with missing secrets. "
+    "See: https://github.com/airbytehq/airbyte-internal-issues/issues/15502"
+)
 def test_deploy_connection(
     cloud_workspace: CloudWorkspace,
     deployable_dummy_source: ab.Source,