@@ -57,11 +57,12 @@ def __init__(self, raw_req: Request, perf_metrics_collector: DisaggPerfMetricsCo
5757 self .raw_req = raw_req
5858 self .ctx_server = ""
5959 self .gen_server = ""
60+ self .request_arrival_time = raw_req .state .server_arrival_time
6061 self .server_first_token_time = 0
6162 self .perf_metrics_collector = perf_metrics_collector
6263
6364 def on_req_begin (self , request : UCompletionRequest ):
64- ...
65+ self . perf_metrics_collector . queue_latency_seconds . observe ( get_steady_clock_now_in_seconds () - self . request_arrival_time )
6566
6667 def on_ctx_resp (self , ctx_server : str , response : UCompletionResponse ):
6768 self .ctx_server = ctx_server
@@ -93,8 +94,8 @@ def __init__(self,
9394 self ._metrics_interval_secs = metrics_interval_secs
9495
9596 self ._ctx_servers , self ._gen_servers = get_ctx_gen_server_addrs (config .server_configs )
96- self ._ctx_router = create_router (config .ctx_router_config , self ._ctx_servers , metadata_server_cfg , create_metadata_server (metadata_server_cfg ))
97- self ._gen_router = create_router (config .gen_router_config , self ._gen_servers , metadata_server_cfg , create_metadata_server (metadata_server_cfg ))
97+ self ._ctx_router = create_router (config .ctx_router_config , self ._ctx_servers , metadata_server_cfg , create_metadata_server (metadata_server_cfg ), self . _sync_server_clock )
98+ self ._gen_router = create_router (config .gen_router_config , self ._gen_servers , metadata_server_cfg , create_metadata_server (metadata_server_cfg ), self . _sync_server_clock )
9899 self ._metadata_server = create_metadata_server (metadata_server_cfg )
99100 self ._perf_metrics_collector = DisaggPerfMetricsCollector (config .perf_metrics_max_requests )
100101
@@ -122,8 +123,10 @@ def __init__(self,
122123
123124 @asynccontextmanager
124125 async def lifespan (app ) -> None :
126+ # Prepare servers (sync server clock) when static ctx/gen server list is used
127+ await self ._ctx_router .prepare_servers ()
128+ await self ._gen_router .prepare_servers ()
125129 await self ._service .setup ()
126- await self ._set_steady_clock_offsets ()
127130 yield
128131 await self ._service .teardown ()
129132
@@ -133,6 +136,7 @@ async def lifespan(app) -> None:
133136
134137 @self .app .exception_handler (RequestValidationError )
135138 async def validation_exception_handler (_ , exc ):
139+ self ._perf_metrics_collector .validation_exceptions .inc ()
136140 return JSONResponse (status_code = 400 , content = {"error" : str (exc )})
137141
138142 self .register_routes ()
@@ -158,8 +162,14 @@ def register_routes(self):
158162 def _wrap_entry_point (self , entry_point : Callable ) -> Callable :
159163 async def wrapper (req : UCompletionRequest , raw_req : Request ) -> Response :
160164 try :
165+ self ._perf_metrics_collector .total_requests .inc ()
166+ if req .stream :
167+ self ._perf_metrics_collector .stream_requests .inc ()
168+ else :
169+ self ._perf_metrics_collector .nonstream_requests .inc ()
161170 hooks = RawRequestResponseHooks (raw_req , self ._perf_metrics_collector )
162171 response_or_generator = await entry_point (req , hooks )
172+ self ._perf_metrics_collector .total_responses .inc ()
163173 if req .stream :
164174 return StreamingResponse (content = response_or_generator , media_type = "text/event-stream" )
165175 else :
@@ -173,9 +183,11 @@ def _handle_exception(self, exception):
173183 logger .error ("CppExecutorError: " , traceback .format_exc ())
174184 signal .raise_signal (signal .SIGINT )
175185 elif isinstance (exception , HTTPException ):
186+ self ._perf_metrics_collector .http_exceptions .inc ()
176187 logger .error (f"HTTPException { exception .status_code } { exception .detail } : " , traceback .format_exc ())
177188 raise exception
178189 else :
190+ self ._perf_metrics_collector .internal_errors .inc ()
179191 logger .error ("Internal server error: " , traceback .format_exc ())
180192 raise HTTPException (status_code = 500 , detail = f"Internal server error { str (exception )} " )
181193
@@ -199,13 +211,12 @@ async def __call__(self, host: str, port: int, sockets: list[socket.socket] | No
199211 timeout_keep_alive = TIMEOUT_KEEP_ALIVE )
200212 await uvicorn .Server (config ).serve (sockets = sockets )
201213
202- # TODO: rework this for service discovery, now it's only for static server list
203- async def _set_steady_clock_offsets (self ):
204- STEADY_CLOCK_OFFSET_ENDPOINT = "/steady_clock_offset"
214+ async def _sync_server_clock (self , server : str ):
215+ """ Sync the ctx/gen server's steady clock with the disagg-server's steady clock (in case NTP service is not running). """
205216 async def query_steady_clock_offset (session : aiohttp .ClientSession , server_url : str ) -> tuple [Optional [float ], Optional [float ]]:
206217 try :
207218 originate_ts = get_steady_clock_now_in_seconds ()
208- async with session .get (server_url + STEADY_CLOCK_OFFSET_ENDPOINT ) as response :
219+ async with session .get (server_url ) as response :
209220 destination_ts = get_steady_clock_now_in_seconds ()
210221 if response .status == 200 :
211222 response_content = await response .json ()
@@ -222,12 +233,11 @@ async def query_steady_clock_offset(session: aiohttp.ClientSession, server_url:
222233
223234 async def set_steady_clock_offset (session : aiohttp .ClientSession , server_url : str , offset : float ) -> None :
224235 payload = {"offset" : offset }
225- async with session .post (server_url + STEADY_CLOCK_OFFSET_ENDPOINT , json = payload ) as response :
236+ async with session .post (server_url , json = payload ) as response :
226237 if response .status != 200 :
227238 logger .warning (f"Cannot set disagg server steady clock offset for server { server_url } , the perf metrics timestamps could be mis-aligned" )
228239
229240 async def align_steady_clock_offset (session : aiohttp .ClientSession , server_url : str ) -> None :
230- server_url = f"http://{ server_url } " if not server_url .startswith ("http://" ) else server_url
231241 delay , offset = await query_steady_clock_offset (session , server_url )
232242 if delay is None or offset is None :
233243 logger .warning (f"Unable to measure steady clock offset for { server_url } ; skipping adjustment" )
@@ -236,7 +246,13 @@ async def align_steady_clock_offset(session: aiohttp.ClientSession, server_url:
236246 # Negate the offset so that worker servers can adjust their steady clock by adding the new offset
237247 await set_steady_clock_offset (session , server_url , - offset )
238248
239- async with aiohttp .ClientSession (
240- connector = aiohttp .TCPConnector (limit = 0 , limit_per_host = 0 , force_close = True ),
241- timeout = aiohttp .ClientTimeout (total = self ._req_timeout_secs )) as session :
242- await asyncio .gather (* [align_steady_clock_offset (session , server_url ) for server_url in self ._ctx_servers + self ._gen_servers ])
249+ server_scheme = "http://" if not server .startswith ("http://" ) else ""
250+ server_url = f"{ server_scheme } { server } /steady_clock_offset"
251+
252+ try :
253+ async with aiohttp .ClientSession (
254+ connector = aiohttp .TCPConnector (limit = 0 , limit_per_host = 0 , force_close = True ),
255+ timeout = aiohttp .ClientTimeout (total = self ._req_timeout_secs )) as session :
256+ await align_steady_clock_offset (session , server_url )
257+ except (aiohttp .ClientError , OSError ) as e :
258+ logger .warning (f"Unable to align steady clock offset for { server_url } : { e } ; skipping adjustment" )
0 commit comments