|
1 | 1 | """LLM Model Endpoint routes for the hosted model inference service. |
2 | 2 | """ |
| 3 | + |
3 | 4 | import traceback |
4 | 5 | from datetime import datetime |
5 | 6 | from typing import Optional |
@@ -169,6 +170,7 @@ async def create_model_endpoint( |
169 | 170 | create_llm_model_bundle_use_case=create_llm_model_bundle_use_case, |
170 | 171 | model_endpoint_service=external_interfaces.model_endpoint_service, |
171 | 172 | docker_repository=external_interfaces.docker_repository, |
| 173 | + llm_artifact_gateway=external_interfaces.llm_artifact_gateway, |
172 | 174 | ) |
173 | 175 | return await use_case.execute(user=auth, request=request) |
174 | 176 | except ObjectAlreadyExistsException as exc: |
@@ -331,9 +333,9 @@ async def create_completion_sync_task( |
331 | 333 | external_interfaces.monitoring_metrics_gateway.emit_token_count_metrics, |
332 | 334 | TokenUsage( |
333 | 335 | num_prompt_tokens=response.output.num_prompt_tokens if response.output else None, |
334 | | - num_completion_tokens=response.output.num_completion_tokens |
335 | | - if response.output |
336 | | - else None, |
| 336 | + num_completion_tokens=( |
| 337 | + response.output.num_completion_tokens if response.output else None |
| 338 | + ), |
337 | 339 | total_duration=use_case_timer.duration, |
338 | 340 | ), |
339 | 341 | metric_metadata, |
@@ -401,9 +403,9 @@ async def event_generator(): |
401 | 403 | external_interfaces.monitoring_metrics_gateway.emit_token_count_metrics, |
402 | 404 | TokenUsage( |
403 | 405 | num_prompt_tokens=message.output.num_prompt_tokens if message.output else None, |
404 | | - num_completion_tokens=message.output.num_completion_tokens |
405 | | - if message.output |
406 | | - else None, |
| 406 | + num_completion_tokens=( |
| 407 | + message.output.num_completion_tokens if message.output else None |
| 408 | + ), |
407 | 409 | total_duration=use_case_timer.duration, |
408 | 410 | time_to_first_token=time_to_first_token, |
409 | 411 | ), |
@@ -593,6 +595,7 @@ async def create_batch_completions( |
593 | 595 | docker_image_batch_job_gateway=external_interfaces.docker_image_batch_job_gateway, |
594 | 596 | docker_repository=external_interfaces.docker_repository, |
595 | 597 | docker_image_batch_job_bundle_repo=external_interfaces.docker_image_batch_job_bundle_repository, |
| 598 | + llm_artifact_gateway=external_interfaces.llm_artifact_gateway, |
596 | 599 | ) |
597 | 600 | return await use_case.execute(user=auth, request=request) |
598 | 601 | except (ObjectNotFoundException, ObjectNotAuthorizedException) as exc: |
|
0 commit comments