scaleapi
diff --git a/‎model-engine/model_engine_server/api/llms_v1.py‎
Lines changed: 9 additions & 6 deletions b/‎model-engine/model_engine_server/api/llms_v1.py‎
Lines changed: 9 additions & 6 deletions
diff --git a/‎model-engine/model_engine_server/common/dtos/llms.py‎
Lines changed: 4 additions & 4 deletions b/‎model-engine/model_engine_server/common/dtos/llms.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎model-engine/model_engine_server/domain/gateways/llm_artifact_gateway.py‎
Lines changed: 11 additions & 1 deletion b/‎model-engine/model_engine_server/domain/gateways/llm_artifact_gateway.py‎
Lines changed: 11 additions & 1 deletion
@@ -1,5 +1,6 @@
 """LLM Model Endpoint routes for the hosted model inference service.
 """
+
 import traceback
 from datetime import datetime
 from typing import Optional
@@ -169,6 +170,7 @@ async def create_model_endpoint(
             create_llm_model_bundle_use_case=create_llm_model_bundle_use_case,
             model_endpoint_service=external_interfaces.model_endpoint_service,
             docker_repository=external_interfaces.docker_repository,
+            llm_artifact_gateway=external_interfaces.llm_artifact_gateway,
         )
         return await use_case.execute(user=auth, request=request)
     except ObjectAlreadyExistsException as exc:
@@ -331,9 +333,9 @@ async def create_completion_sync_task(
             external_interfaces.monitoring_metrics_gateway.emit_token_count_metrics,
             TokenUsage(
                 num_prompt_tokens=response.output.num_prompt_tokens if response.output else None,
-                num_completion_tokens=response.output.num_completion_tokens
-                if response.output
-                else None,
+                num_completion_tokens=(
+                    response.output.num_completion_tokens if response.output else None
+                ),
                 total_duration=use_case_timer.duration,
             ),
             metric_metadata,
@@ -401,9 +403,9 @@ async def event_generator():
                 external_interfaces.monitoring_metrics_gateway.emit_token_count_metrics,
                 TokenUsage(
                     num_prompt_tokens=message.output.num_prompt_tokens if message.output else None,
-                    num_completion_tokens=message.output.num_completion_tokens
-                    if message.output
-                    else None,
+                    num_completion_tokens=(
+                        message.output.num_completion_tokens if message.output else None
+                    ),
                     total_duration=use_case_timer.duration,
                     time_to_first_token=time_to_first_token,
                 ),
@@ -593,6 +595,7 @@ async def create_batch_completions(
             docker_image_batch_job_gateway=external_interfaces.docker_image_batch_job_gateway,
             docker_repository=external_interfaces.docker_repository,
             docker_image_batch_job_bundle_repo=external_interfaces.docker_image_batch_job_bundle_repository,
+            llm_artifact_gateway=external_interfaces.llm_artifact_gateway,
         )
         return await use_case.execute(user=auth, request=request)
     except (ObjectNotFoundException, ObjectNotAuthorizedException) as exc:
 
@@ -51,10 +51,10 @@ class CreateLLMModelEndpointV1Request(BaseModel):
     metadata: Dict[str, Any]  # TODO: JSON type
     post_inference_hooks: Optional[List[str]]
     endpoint_type: ModelEndpointType = ModelEndpointType.SYNC
-    cpus: CpuSpecificationType
-    gpus: int
-    memory: StorageSpecificationType
-    gpu_type: GpuType
+    cpus: Optional[CpuSpecificationType]
+    gpus: Optional[int]
+    memory: Optional[StorageSpecificationType]
+    gpu_type: Optional[GpuType]
     storage: Optional[StorageSpecificationType]
     optimize_costs: Optional[bool]
     min_workers: int
 
@@ -1,5 +1,5 @@
 from abc import ABC, abstractmethod
-from typing import List
+from typing import Any, Dict, List
 
 
 class LLMArtifactGateway(ABC):
@@ -39,3 +39,13 @@ def get_model_weights_urls(self, owner: str, model_name: str, **kwargs) -> List[
             model_name (str): name of the model
         """
         pass
+
+    @abstractmethod
+    def get_model_config(self, path: str, **kwargs) -> Dict[str, Any]:
+        """
+        Gets the model config from the model files live at given folder.
+
+        Args:
+            path (str): path to model files
+        """
+        pass