Skip to content

Commit 2880eef

Browse files
authored
feat: expose endpoint to actor(alibaba#515) alibaba#514 (alibaba#566)
* feat: expose metrics_endpoint to actor (alibaba#515) * add runtime config to abstract operator * fix test case
1 parent 194c7fa commit 2880eef

File tree

7 files changed

+94
-18
lines changed

7 files changed

+94
-18
lines changed

rock/sandbox/base_actor.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ class BaseActor:
3535
_user_id: str = "default"
3636
_experiment_id: str = "default"
3737
_namespace = "default"
38+
_metrics_endpoint = ""
3839

3940
def __init__(
4041
self,
@@ -80,7 +81,8 @@ def _init_monitor(self):
8081
role = self._role
8182
self.host = host
8283
logger.info(f"Initializing MetricsCollector with host={host}, port={port}, " f"env={env}, role={role}")
83-
self.otlp_exporter = OTLPMetricExporter(endpoint=f"http://{host}:{port}/v1/metrics")
84+
endpoint = self._metrics_endpoint or f"http://{host}:{port}/v1/metrics"
85+
self.otlp_exporter = OTLPMetricExporter(endpoint=endpoint)
8486
self.metric_reader = PeriodicExportingMetricReader(
8587
self.otlp_exporter,
8688
export_interval_millis=self._export_interval_millis,
@@ -182,6 +184,12 @@ def __del__(self):
182184
logger.error(f"Error stopping monitoring: {e}")
183185
pass
184186

187+
def set_metrics_endpoint(self, metrics_endpoint: str):
188+
self._metrics_endpoint = metrics_endpoint
189+
190+
def get_metrics_endpoint(self) -> str:
191+
return self._metrics_endpoint
192+
185193
async def get_sandbox_statistics(self):
186194
"""Get sandbox statistics - default implementation returns None"""
187195
if isinstance(self._deployment, DockerDeployment):

rock/sandbox/operator/abstract.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from abc import ABC, abstractmethod
22

33
from rock.actions.sandbox.sandbox_info import SandboxInfo
4+
from rock.config import RuntimeConfig
45
from rock.deployments.config import DeploymentConfig
56
from rock.utils.providers.nacos_provider import NacosConfigProvider
67
from rock.utils.providers.redis_provider import RedisProvider
@@ -9,6 +10,7 @@
910
class AbstractOperator(ABC):
1011
_redis_provider: RedisProvider | None = None
1112
_nacos_provider: NacosConfigProvider | None = None
13+
_runtime_config: RuntimeConfig | None = None
1214

1315
@abstractmethod
1416
async def submit(self, config: DeploymentConfig, user_info: dict = {}) -> SandboxInfo:

rock/sandbox/operator/factory.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from typing import Any
55

66
from rock.admin.core.ray_service import RayService
7-
from rock.config import RuntimeConfig, K8sConfig
7+
from rock.config import K8sConfig, RuntimeConfig
88
from rock.logger import init_logger
99
from rock.sandbox.operator.abstract import AbstractOperator
1010
from rock.sandbox.operator.k8s.operator import K8sOperator
@@ -58,7 +58,7 @@ def create_operator(context: OperatorContext) -> AbstractOperator:
5858
if context.ray_service is None:
5959
raise ValueError("RayService is required for RayOperator")
6060
logger.info("Creating RayOperator")
61-
ray_operator = RayOperator(ray_service=context.ray_service)
61+
ray_operator = RayOperator(ray_service=context.ray_service, runtime_config=context.runtime_config)
6262
if context.nacos_provider is not None:
6363
ray_operator.set_nacos_provider(context.nacos_provider)
6464
return ray_operator

rock/sandbox/operator/ray.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from rock.admin.core.ray_service import RayService
99
from rock.admin.core.redis_key import alive_sandbox_key
1010
from rock.common.constants import GET_STATUS_SWITCH
11+
from rock.config import RuntimeConfig
1112
from rock.deployments.config import DockerDeploymentConfig
1213
from rock.deployments.constants import Port
1314
from rock.deployments.docker import DockerDeployment
@@ -25,8 +26,9 @@
2526

2627

2728
class RayOperator(AbstractOperator):
28-
def __init__(self, ray_service: RayService):
29+
def __init__(self, ray_service: RayService, runtime_config: RuntimeConfig):
2930
self._ray_service = ray_service
31+
self._runtime_config = runtime_config
3032

3133
def _get_actor_name(self, sandbox_id: str) -> str:
3234
return f"sandbox-{sandbox_id}"
@@ -54,6 +56,7 @@ async def submit(self, config: DockerDeploymentConfig, user_info: dict = {}) ->
5456
sandbox_id = config.container_name
5557
logger.info(f"[{sandbox_id}] start_async params:{json.dumps(config.model_dump(), indent=2)}")
5658
sandbox_actor: SandboxActor = await self.create_actor(config)
59+
sandbox_actor.set_metrics_endpoint.remote(self._runtime_config.metrics_endpoint)
5760
sandbox_actor.start.remote()
5861
user_id = user_info.get("user_id", "default")
5962
experiment_id = user_info.get("experiment_id", "default")

tests/unit/conftest.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,13 @@
11
import asyncio
22
import time
33
from pathlib import Path
4+
from unittest.mock import MagicMock
45

56
import pytest
67
import ray
78
from fakeredis import aioredis
8-
from ray.util.state import list_actors
9-
10-
from unittest.mock import MagicMock
11-
129
from kubernetes import client
10+
from ray.util.state import list_actors
1311

1412
from rock.admin.core.ray_service import RayService
1513
from rock.config import K8sConfig, RockConfig
@@ -69,8 +67,13 @@ def ray_service(rock_config: RockConfig, ray_init_shutdown):
6967

7068

7169
@pytest.fixture
72-
def ray_operator(ray_service):
73-
ray_operator = RayOperator(ray_service)
70+
def runtime_config(rock_config: RockConfig):
71+
return rock_config.runtime
72+
73+
74+
@pytest.fixture
75+
def ray_operator(ray_service, runtime_config):
76+
ray_operator = RayOperator(ray_service, runtime_config)
7477
ray_operator.set_nacos_provider(None)
7578
return ray_operator
7679

tests/unit/sandbox/operator/test_ray_operator.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,17 +9,17 @@
99

1010

1111
@pytest.mark.need_ray
12-
def test_use_rocklet_returns_false_when_nacos_provider_is_none(ray_service):
12+
def test_use_rocklet_returns_false_when_nacos_provider_is_none(ray_service, runtime_config):
1313
"""When _nacos_provider is None, use_rocklet should return False"""
14-
operator = RayOperator(ray_service=ray_service)
14+
operator = RayOperator(ray_service=ray_service, runtime_config=runtime_config)
1515
operator.set_nacos_provider(None)
1616
assert operator.use_rocklet() is False
1717

1818

1919
@pytest.mark.need_ray
20-
def test_use_rocklet_returns_false_when_switch_is_off(ray_service):
20+
def test_use_rocklet_returns_false_when_switch_is_off(ray_service, runtime_config):
2121
"""When switch status is False, use_rocklet should return False"""
22-
operator = RayOperator(ray_service=ray_service)
22+
operator = RayOperator(ray_service=ray_service, runtime_config=runtime_config)
2323
mock_nacos_provider = MagicMock()
2424
mock_nacos_provider.get_switch_status.return_value = False
2525
operator.set_nacos_provider(mock_nacos_provider)
@@ -28,20 +28,21 @@ def test_use_rocklet_returns_false_when_switch_is_off(ray_service):
2828

2929

3030
@pytest.mark.need_ray
31-
def test_use_rocklet_returns_true_when_switch_is_on(ray_service):
31+
def test_use_rocklet_returns_true_when_switch_is_on(ray_service, runtime_config):
3232
"""When switch status is True, use_rocklet should return True"""
33-
operator = RayOperator(ray_service=ray_service)
33+
operator = RayOperator(ray_service=ray_service, runtime_config=runtime_config)
3434
mock_nacos_provider = MagicMock()
3535
mock_nacos_provider.get_switch_status.return_value = True
3636
operator.set_nacos_provider(mock_nacos_provider)
3737
assert operator.use_rocklet() is True
3838
mock_nacos_provider.get_switch_status.assert_called_once_with(GET_STATUS_SWITCH)
3939

40+
4041
@pytest.mark.need_docker
4142
@pytest.mark.need_ray
4243
@pytest.mark.asyncio
43-
async def test_ray_operator(ray_service):
44-
operator = RayOperator(ray_service=ray_service)
44+
async def test_ray_operator(ray_service, runtime_config):
45+
operator = RayOperator(ray_service=ray_service, runtime_config=runtime_config)
4546
operator.set_nacos_provider(None)
4647
start_response: SandboxInfo = await operator.submit(DockerDeploymentConfig(container_name="test"))
4748
assert start_response.get("sandbox_id") == "test"

tests/unit/test_base_actor.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
import pytest
2+
import ray
3+
4+
from rock.deployments.config import LocalDeploymentConfig
5+
from rock.logger import init_logger
6+
from rock.sandbox.sandbox_actor import SandboxActor
7+
8+
logger = init_logger(__name__)
9+
10+
11+
@pytest.mark.need_ray
12+
@pytest.mark.asyncio
13+
async def test_set_and_get_metrics_endpoint(ray_init_shutdown):
14+
"""Test setting and getting metrics endpoint together using Ray actor"""
15+
sandbox_config = LocalDeploymentConfig(role="test", env="dev")
16+
actor_name = "test-set-and-get-metrics-endpoint"
17+
18+
# Create SandboxActor using Ray
19+
sandbox_actor = SandboxActor.options(name=actor_name, lifetime="detached").remote(
20+
sandbox_config, sandbox_config.get_deployment()
21+
)
22+
23+
try:
24+
# Test initial setting
25+
test_endpoint = "http://test-host:9090/v1/metrics"
26+
ray.get(sandbox_actor.set_metrics_endpoint.remote(test_endpoint))
27+
result = ray.get(sandbox_actor.get_metrics_endpoint.remote())
28+
assert result == test_endpoint
29+
logger.info(f"Initial endpoint set successfully: {result}")
30+
31+
# Test updating the endpoint
32+
new_endpoint = "http://new-host:5000/v1/metrics"
33+
ray.get(sandbox_actor.set_metrics_endpoint.remote(new_endpoint))
34+
result = ray.get(sandbox_actor.get_metrics_endpoint.remote())
35+
assert result == new_endpoint
36+
logger.info(f"Updated endpoint successfully: {result}")
37+
finally:
38+
ray.kill(sandbox_actor)
39+
40+
41+
@pytest.mark.need_ray
42+
@pytest.mark.asyncio
43+
async def test_get_metrics_endpoint_default(ray_init_shutdown):
44+
"""Test getting metrics endpoint with default empty value using Ray actor"""
45+
sandbox_config = LocalDeploymentConfig(role="test", env="dev")
46+
actor_name = "test-get-metrics-endpoint-default"
47+
48+
# Create SandboxActor using Ray
49+
sandbox_actor = SandboxActor.options(name=actor_name, lifetime="detached").remote(
50+
sandbox_config, sandbox_config.get_deployment()
51+
)
52+
53+
try:
54+
# Get the default endpoint (should be empty string)
55+
result = ray.get(sandbox_actor.get_metrics_endpoint.remote())
56+
assert result == ""
57+
logger.info(f"Default metrics endpoint is empty as expected: '{result}'")
58+
finally:
59+
ray.kill(sandbox_actor)

0 commit comments

Comments
 (0)