kubernetes-sigs · chw120 · Mar 7, 2026 · Mar 8, 2026
diff --git a/clients/python/agentic-sandbox-client/k8s_agent_sandbox/metrics.py b/clients/python/agentic-sandbox-client/k8s_agent_sandbox/metrics.py
@@ -0,0 +1,8 @@
+from prometheus_client import Histogram
+
+DISCOVERY_LATENCY_MS = Histogram(
+    "sandbox_client_discovery_latency_ms",
+    "Total time in Gateway IP assignment or kubectl port-forward setup.",
+    ["status", "mode"],
+    buckets=[100, 500, 1000, 5000, 10000, 30000, 60000]
+)
diff --git a/clients/python/agentic-sandbox-client/k8s_agent_sandbox/sandbox_client.py b/clients/python/agentic-sandbox-client/k8s_agent_sandbox/sandbox_client.py
@@ -44,6 +44,7 @@
     SANDBOX_API_GROUP, SANDBOX_API_VERSION, SANDBOX_PLURAL_NAME,
     POD_NAME_ANNOTATION,
 )
+from .metrics import DISCOVERY_LATENCY_MS
 
 logging.basicConfig(level=logging.INFO,
                     format='%(asctime)s - %(levelname)s - %(message)s',
@@ -335,17 +336,34 @@ def __enter__(self) -> 'SandboxClient':
         self._wait_for_sandbox_ready()
 
         # STRATEGY SELECTION
-        if self.base_url:
-            # Case 1: API URL provided manually (DNS / Internal) -> Do nothing, just use it.
-            logging.info(f"Using configured API URL: {self.base_url}")
-
-        elif self.gateway_name:
-            # Case 2: Gateway Name provided -> Production Mode (Discovery)
-            self._wait_for_gateway_ip()
+        start_time = time.time()
+        is_preconfigured = bool(self.base_url)
+        mode = "unknown"
 
-        else:
-            # Case 3: No Gateway, No URL -> Developer Mode (Port Forward to Router)
-            self._start_and_wait_for_port_forward()
+        try:
+            if is_preconfigured:
+                # Case 1: API URL provided manually (DNS / Internal) -> Do nothing, just use it.
+                mode = "preconfigured"
+                logging.info(f"Using configured API URL: {self.base_url}")
+                # We do not record discovery latency for pre-configured URL
+            else:
+                if self.gateway_name:
+                    # Case 2: Gateway Name provided -> Production Mode (Discovery)
+                    mode = "gateway"
+                    self._wait_for_gateway_ip()
+                else:
+                    # Case 3: No Gateway, No URL -> Developer Mode (Port Forward to Router)
+                    mode = "port_forward"
+                    self._start_and_wait_for_port_forward()
+
+                latency_ms = (time.time() - start_time) * 1000
+                DISCOVERY_LATENCY_MS.labels(status="success", mode=mode).observe(latency_ms)
+
+        except Exception:
+            if not is_preconfigured:
+                latency_ms = (time.time() - start_time) * 1000
+                DISCOVERY_LATENCY_MS.labels(status="failure", mode=mode).observe(latency_ms)
+            raise
 
         return self
 

diff --git a/clients/python/agentic-sandbox-client/pyproject.toml b/clients/python/agentic-sandbox-client/pyproject.toml
@@ -17,6 +17,7 @@ dependencies = [
     "kubernetes",
     "requests",
     "pydantic",
+    "prometheus-client",
 ]
 
 [project.urls]

diff --git a/clients/python/agentic-sandbox-client/test_client.py b/clients/python/agentic-sandbox-client/test_client.py
@@ -18,6 +18,7 @@
 from pydantic import ValidationError
 from k8s_agent_sandbox import SandboxClient
 from k8s_agent_sandbox.sandbox_client import ExecutionResult, FileEntry
+from k8s_agent_sandbox.metrics import DISCOVERY_LATENCY_MS
 
 POD_NAME_ANNOTATION = "agents.x-k8s.io/pod-name"
 
@@ -164,6 +165,25 @@ async def main(template_name: str, gateway_name: str | None, api_url: str | None
             sandbox._request = original_request
             print("--- Pydantic Validation Tests Passed ---")
 
+            print("\n--- Testing Metrics ---")
+
+            # Count how many successful discovery latency metrics were recorded in total
+            total_discovery_metrics = 0
+            # We use collect() to safely access Prometheus metric values across all label combinations
+            for metric in DISCOVERY_LATENCY_MS.collect():
+                for sample in metric.samples:
+                    if sample.name == "sandbox_client_discovery_latency_ms_count" and sample.labels.get("status") == "success":
+                        total_discovery_metrics += sample.value
+
+            # As long as it's not preconfigured, it should have recorded discovery latency
+            if not api_url:
+                print(f"Total discovery latency metrics recorded: {total_discovery_metrics}")
+                assert total_discovery_metrics > 0, "Expected at least one discovery latency metric to be recorded"
+            else:
+                print("Skipping discovery latency check because api_url is preconfigured")
+
+            print("--- Metrics Tests Passed ---")
+
     except Exception as e:
         print(f"\n--- An error occurred during the test: {e} ---")
         # The __exit__ method of the Sandbox class will handle cleanup.

diff --git a/clients/python/agentic-sandbox-client/test_metrics.py b/clients/python/agentic-sandbox-client/test_metrics.py
@@ -0,0 +1,142 @@
+import pytest
+from unittest.mock import MagicMock, patch
+import time
+
+from k8s_agent_sandbox.sandbox_client import SandboxClient
+from k8s_agent_sandbox.metrics import DISCOVERY_LATENCY_MS
+
+@pytest.fixture
+def mock_k8s_config():
+    with patch('k8s_agent_sandbox.sandbox_client.config.load_incluster_config'), \
+         patch('k8s_agent_sandbox.sandbox_client.config.load_kube_config'):
+        yield
+
+@pytest.fixture
+def mock_custom_objects_api():
+    with patch('k8s_agent_sandbox.sandbox_client.client.CustomObjectsApi') as mock_api:
+        yield mock_api
+
+@pytest.fixture
+def mock_create_claim():
+    with patch.object(SandboxClient, '_create_claim') as mock:
+        yield mock
+
+@pytest.fixture
+def mock_wait_ready():
+    with patch.object(SandboxClient, '_wait_for_sandbox_ready') as mock:
+        yield mock
+
+@pytest.mark.parametrize(
+    "test_name, setup_kwargs, expected_url, should_fail, expected_mode",
+    [
+        (
+            "dev_mode_success",
+            {"template_name": "test-template"},
+            "http://127.0.0.1:12345",
+            False,
+            "port_forward"
+        ),
+        (
+            "dev_mode_failure",
+            {"template_name": "test-template"},
+            None,
+            True,
+            "port_forward"
+        ),
+        (
+            "gateway_mode_success",
+            {"template_name": "test-template", "gateway_name": "test-gw"},
+            "http://10.0.0.1",
+            False,
+            "gateway"
+        ),
+        (
+            "base_url_mode_no_metric",
+            {"template_name": "test-template", "api_url": "http://custom-url"},
+            "http://custom-url",
+            False,
+            "preconfigured"
+        )
+    ]
+)
+def test_discovery_latency_modes(
+    test_name, setup_kwargs, expected_url, should_fail,
+    expected_mode,
+    mock_k8s_config, mock_custom_objects_api, mock_create_claim, mock_wait_ready
+):
+    with patch('k8s_agent_sandbox.sandbox_client.subprocess.Popen') as mock_popen, \
+         patch('k8s_agent_sandbox.sandbox_client.socket.socket') as mock_socket, \
+         patch('k8s_agent_sandbox.sandbox_client.socket.create_connection'), \
+         patch('k8s_agent_sandbox.sandbox_client.time.sleep'), \
+         patch('k8s_agent_sandbox.sandbox_client.watch.Watch') as mock_watch:
+
+        # Setup mocks based on the test case
+        if "dev_mode" in test_name:
+            mock_process = MagicMock()
+            if should_fail:
+                mock_process.poll.return_value = 1
+                mock_process.communicate.return_value = (b"", b"Crash")
+            else:
+                mock_process.poll.return_value = None
+            mock_popen.return_value = mock_process
+
+            mock_sock_instance = MagicMock()
+            mock_sock_instance.getsockname.return_value = ('0.0.0.0', 12345)
+            mock_socket.return_value.__enter__.return_value = mock_sock_instance
+
+        elif "gateway_mode" in test_name:
+            mock_w_instance = MagicMock()
+            mock_w_instance.stream.return_value = [{
+                "type": "ADDED",
+                "object": {
+                    "status": {
+                        "addresses": [{"value": "10.0.0.1"}]
+                    }
+                }
+            }]
+            mock_watch.return_value = mock_w_instance
+
+        # Capture metrics before
+        try:
+            before_success = DISCOVERY_LATENCY_MS.labels(status="success", mode=expected_mode)._sum.get()
+        except:
+            before_success = 0.0
+
+        try:
+            before_failure = DISCOVERY_LATENCY_MS.labels(status="failure", mode=expected_mode)._sum.get()
+        except:
+            before_failure = 0.0
+
+        client = SandboxClient(**setup_kwargs)
+
+        if should_fail:
+            with pytest.raises(RuntimeError):
+                with client:
+                    pass
+        else:
+            with client:
+                assert client.base_url == expected_url
+
+        # Capture metrics after
+        try:
+            after_success = DISCOVERY_LATENCY_MS.labels(status="success", mode=expected_mode)._sum.get()
+        except:
+            after_success = 0.0
+
+        try:
+            after_failure = DISCOVERY_LATENCY_MS.labels(status="failure", mode=expected_mode)._sum.get()
+        except:
+            after_failure = 0.0
+
+        # For preconfigured URLs, we never record a metric.
+        if expected_mode == "preconfigured":
+            assert after_success == before_success
+            assert after_failure == before_failure
+        else:
+            # For actual discovery modes, assert metric changes
+            if should_fail:
+                assert after_failure > before_failure
+                assert after_success == before_success
+            else:
+                assert after_success > before_success
+                assert after_failure == before_failure