Skip to content

Commit 455a00c

Browse files
committed
🔨 use arm host for unitest runner and update monitor auto disable
1 parent 07de4fd commit 455a00c

File tree

2 files changed

+59
-24
lines changed

2 files changed

+59
-24
lines changed

.github/workflows/auto-unit-test.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ on:
1010
runner_label_json:
1111
description: 'runner array in json format (e.g. ["ubuntu-latest"] or ["self-hosted"])'
1212
required: false
13-
default: '["ubuntu-latest"]'
13+
default: '["ubuntu-24.04-arm"]'
1414
pull_request:
1515
branches: [develop]
1616
paths:
@@ -28,7 +28,7 @@ on:
2828

2929
jobs:
3030
test:
31-
runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }}
31+
runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-24.04-arm"]') }}
3232
steps:
3333
- name: Checkout code
3434
uses: actions/checkout@v4
@@ -45,7 +45,7 @@ jobs:
4545
run: |
4646
cd backend
4747
uv sync --extra data-process --extra test
48-
uv pip install -e ../sdk
48+
uv pip install -e "../sdk[dev]"
4949
cd ..
5050
5151
- name: Run all tests and collect coverage

sdk/nexent/monitor/monitoring.py

Lines changed: 56 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -6,18 +6,29 @@
66
integration with OpenTelemetry, Jaeger, Prometheus, and Grafana.
77
88
This module uses a singleton pattern for consistent monitoring across the SDK.
9+
When OpenTelemetry dependencies are not available, the module gracefully degrades
10+
and disables monitoring functionality without breaking the application.
11+
12+
Installation:
13+
- Basic: pip install nexent
14+
- With monitoring: pip install nexent[performance]
915
"""
1016

11-
from opentelemetry.trace.status import Status, StatusCode
12-
from opentelemetry.exporter.prometheus import PrometheusMetricReader
13-
from opentelemetry.sdk.metrics import MeterProvider
14-
from opentelemetry.sdk.trace.export import BatchSpanProcessor
15-
from opentelemetry.sdk.trace import TracerProvider
16-
from opentelemetry.instrumentation.requests import RequestsInstrumentor
17-
from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor
18-
from opentelemetry.exporter.jaeger.thrift import JaegerExporter
19-
from opentelemetry import trace, metrics
20-
from opentelemetry.sdk.resources import Resource
17+
# Optional OpenTelemetry imports - gracefully handle missing dependencies
18+
try:
19+
from opentelemetry.trace.status import Status, StatusCode
20+
from opentelemetry.exporter.prometheus import PrometheusMetricReader
21+
from opentelemetry.sdk.metrics import MeterProvider
22+
from opentelemetry.sdk.trace.export import BatchSpanProcessor
23+
from opentelemetry.sdk.trace import TracerProvider
24+
from opentelemetry.instrumentation.requests import RequestsInstrumentor
25+
from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor
26+
from opentelemetry.exporter.jaeger.thrift import JaegerExporter
27+
from opentelemetry import trace, metrics
28+
from opentelemetry.sdk.resources import Resource
29+
OPENTELEMETRY_AVAILABLE = True
30+
except ImportError:
31+
OPENTELEMETRY_AVAILABLE = False
2132
import logging
2233
import time
2334
import functools
@@ -27,11 +38,13 @@
2738

2839
logger = logging.getLogger(__name__)
2940

30-
# Import OpenTelemetry dependencies (assumed to be available in SDK)
31-
3241
F = TypeVar('F', bound=Callable[..., Any])
3342

3443

44+
def is_opentelemetry_available() -> bool:
45+
"""Check if OpenTelemetry dependencies are available."""
46+
return OPENTELEMETRY_AVAILABLE
47+
3548
@dataclass
3649
class MonitoringConfig:
3750
"""Configuration for monitoring system."""
@@ -42,6 +55,15 @@ class MonitoringConfig:
4255
telemetry_sample_rate: float = 1.0
4356
llm_slow_request_threshold_seconds: float = 5.0
4457
llm_slow_token_rate_threshold: float = 10.0
58+
59+
def __post_init__(self):
60+
"""Validate configuration and adjust based on OpenTelemetry availability."""
61+
if self.enable_telemetry and not OPENTELEMETRY_AVAILABLE:
62+
logger.warning(
63+
"OpenTelemetry dependencies not available. Disabling telemetry. "
64+
"Install with: pip install nexent[performance]"
65+
)
66+
self.enable_telemetry = False
4567

4668

4769
class MonitoringManager:
@@ -90,6 +112,13 @@ def _init_telemetry(self) -> None:
90112
logger.info("Telemetry is disabled by configuration")
91113
return
92114

115+
if not OPENTELEMETRY_AVAILABLE:
116+
logger.warning(
117+
"OpenTelemetry dependencies not available. Telemetry initialization skipped. "
118+
"Install with: pip install nexent[performance]"
119+
)
120+
return
121+
93122
try:
94123
# Setup tracing with proper service name resource
95124
resource = Resource.create({
@@ -164,7 +193,9 @@ def _init_telemetry(self) -> None:
164193
@property
165194
def is_enabled(self) -> bool:
166195
"""Check if monitoring is enabled."""
167-
return self._config is not None and self._config.enable_telemetry
196+
return (self._config is not None and
197+
self._config.enable_telemetry and
198+
OPENTELEMETRY_AVAILABLE)
168199

169200
@property
170201
def tracer(self):
@@ -174,11 +205,16 @@ def tracer(self):
174205
def setup_fastapi_app(self, app) -> bool:
175206
"""Setup monitoring for a FastAPI application."""
176207
try:
177-
if self.is_enabled and app:
208+
if self.is_enabled and app and OPENTELEMETRY_AVAILABLE:
178209
FastAPIInstrumentor.instrument_app(app)
179210
logger.info(
180211
"FastAPI application monitoring initialized successfully")
181212
return True
213+
elif not OPENTELEMETRY_AVAILABLE:
214+
logger.warning(
215+
"OpenTelemetry not available. FastAPI monitoring skipped. "
216+
"Install with: pip install nexent[performance]"
217+
)
182218
return False
183219
except Exception as e:
184220
logger.error(f"Failed to initialize FastAPI monitoring: {e}")
@@ -187,9 +223,7 @@ def setup_fastapi_app(self, app) -> bool:
187223
@contextmanager
188224
def trace_llm_request(self, operation_name: str, model_name: str, **attributes: Any) -> Iterator[Optional[Any]]:
189225
"""Context manager for tracing LLM requests with comprehensive metrics."""
190-
if not self.is_enabled or not self._tracer:
191-
logger.warning(
192-
f"⚠️ trace_llm_request returning None: is_enabled={self.is_enabled}, has_tracer={self._tracer is not None}")
226+
if not self.is_enabled or not OPENTELEMETRY_AVAILABLE or not self._tracer:
193227
yield None
194228
return
195229

@@ -218,13 +252,13 @@ def trace_llm_request(self, operation_name: str, model_name: str, **attributes:
218252

219253
def get_current_span(self) -> Optional[Any]:
220254
"""Get the current active span."""
221-
if not self.is_enabled:
255+
if not self.is_enabled or not OPENTELEMETRY_AVAILABLE:
222256
return None
223257
return trace.get_current_span()
224258

225259
def add_span_event(self, name: str, attributes: Optional[Dict[str, Any]] = None) -> None:
226260
"""Add an event to the current span."""
227-
if not self.is_enabled:
261+
if not self.is_enabled or not OPENTELEMETRY_AVAILABLE:
228262
return
229263

230264
span = trace.get_current_span()
@@ -233,7 +267,7 @@ def add_span_event(self, name: str, attributes: Optional[Dict[str, Any]] = None)
233267

234268
def set_span_attributes(self, **attributes: Any) -> None:
235269
"""Set attributes on the current span."""
236-
if not self.is_enabled:
270+
if not self.is_enabled or not OPENTELEMETRY_AVAILABLE:
237271
return
238272

239273
span = trace.get_current_span()
@@ -246,7 +280,7 @@ def create_token_tracker(self, model_name: str, span: Optional[Any] = None) -> '
246280

247281
def record_llm_metrics(self, metric_type: str, value: float, attributes: Dict[str, Any]) -> None:
248282
"""Record LLM-specific metrics."""
249-
if not self.is_enabled:
283+
if not self.is_enabled or not OPENTELEMETRY_AVAILABLE:
250284
return
251285

252286
if metric_type == "ttft" and self._llm_ttft_duration:
@@ -499,4 +533,5 @@ async def my_function():
499533
'MonitoringManager',
500534
'LLMTokenTracker',
501535
'get_monitoring_manager',
536+
'is_opentelemetry_available',
502537
]

0 commit comments

Comments
 (0)