@@ -250,7 +250,7 @@ class ModelBuilder(Triton, DJL, JumpStart, TGI, Transformers, TensorflowServing,
250250 default = None , metadata = {"help" : "Define sagemaker session for execution" }
251251 )
252252 name : Optional [str ] = field (
253- default = "model-name-" + uuid .uuid1 ().hex ,
253+ default_factory = lambda : "model-name-" + uuid .uuid1 ().hex ,
254254 metadata = {"help" : "Define the model name" },
255255 )
256256 mode : Optional [Mode ] = field (
@@ -1130,7 +1130,7 @@ def build(
11301130 def _get_processing_unit (self ):
11311131 """Detects if the resource requirements are intended for a CPU or GPU instance."""
11321132 # Assume custom orchestrator will be deployed as an endpoint to a CPU instance
1133- if not self .resource_requirements :
1133+ if not self .resource_requirements or not self . resource_requirements . num_accelerators :
11341134 return "cpu"
11351135 for ic in self .modelbuilder_list or []:
11361136 if ic .resource_requirements .num_accelerators > 0 :
@@ -1171,10 +1171,10 @@ def _get_ic_resource_requirements(self, mb: ModelBuilder = None) -> ModelBuilder
11711171
11721172 @_capture_telemetry ("build_custom_orchestrator" )
11731173 def _get_smd_image_uri (self , processing_unit : str = None ) -> str :
1174- """Gets the SMD Inference URI.
1174+ """Gets the SMD Inference Image URI.
11751175
11761176 Returns:
1177- str: Pytorch DLC URI.
1177+ str: SMD Inference Image URI.
11781178 """
11791179 from sagemaker import image_uris
11801180 import sys
@@ -1183,10 +1183,10 @@ def _get_smd_image_uri(self, processing_unit: str = None) -> str:
11831183 from packaging .version import Version
11841184
11851185 formatted_py_version = f"py{ sys .version_info .major } { sys .version_info .minor } "
1186- if Version (f"{ sys .version_info .major } { sys .version_info .minor } " ) < Version ("3.11.11 " ):
1186+ if Version (f"{ sys .version_info .major } { sys .version_info .minor } " ) < Version ("3.12 " ):
11871187 raise ValueError (
11881188 f"Found Python version { formatted_py_version } but"
1189- f"Custom orchestrator deployment requires Python version >= 3.11.11 ."
1189+ f"Custom orchestrator deployment requires Python version >= 3.12 ."
11901190 )
11911191
11921192 INSTANCE_TYPES = {"cpu" : "ml.c5.xlarge" , "gpu" : "ml.g5.4xlarge" }
@@ -1956,7 +1956,7 @@ def deploy(
19561956 ]
19571957 ] = None ,
19581958 custom_orchestrator_instance_type : str = None ,
1959- custom_orchestrator_initial_instance_count : int = 1 ,
1959+ custom_orchestrator_initial_instance_count : int = None ,
19601960 ** kwargs ,
19611961 ) -> Union [Predictor , Transformer , List [Predictor ]]:
19621962 """Deploys the built Model.
@@ -1977,7 +1977,7 @@ def deploy(
19771977 """
19781978 if not hasattr (self , "built_model" ) and not hasattr (self , "_deployables" ):
19791979 raise ValueError ("Model needs to be built before deploying" )
1980- endpoint_name = unique_name_from_base ("endpoint-name" )
1980+ endpoint_name = endpoint_name or unique_name_from_base ("endpoint-name" )
19811981
19821982 if not hasattr (self , "_deployables" ):
19831983 if not inference_config : # Real-time Deployment
@@ -2038,13 +2038,14 @@ def deploy(
20382038 )
20392039 if self ._deployables .get ("CustomOrchestrator" , None ):
20402040 custom_orchestrator = self ._deployables .get ("CustomOrchestrator" )
2041+ if not custom_orchestrator_instance_type and not instance_type :
2042+ logger .warning (
2043+ "Deploying custom orchestrator as an endpoint but no instance type was "
2044+ "set. Defaulting to `ml.c5.xlarge`."
2045+ )
2046+ custom_orchestrator_instance_type = "ml.c5.xlarge"
2047+ custom_orchestrator_initial_instance_count = 1
20412048 if custom_orchestrator ["Mode" ] == "Endpoint" :
2042- if not custom_orchestrator_instance_type :
2043- logger .warning (
2044- "Deploying custom orchestrator as an endpoint but no instance type was "
2045- "set. Defaulting to `ml.c5.xlarge`."
2046- )
2047- custom_orchestrator_instance_type = "ml.c5.xlarge"
20482049 logger .info (
20492050 "Deploying custom orchestrator on instance type %s." ,
20502051 custom_orchestrator_instance_type ,
@@ -2057,13 +2058,18 @@ def deploy(
20572058 )
20582059 )
20592060 elif custom_orchestrator ["Mode" ] == "InferenceComponent" :
2061+ logger .info (
2062+ "Deploying custom orchestrator as an inference component "
2063+ f"to endpoint { endpoint_name } "
2064+ )
20602065 predictors .append (
20612066 self ._deploy_for_ic (
20622067 ic_data = custom_orchestrator ,
20632068 container_timeout_in_seconds = container_timeout_in_second ,
20642069 instance_type = custom_orchestrator_instance_type or instance_type ,
20652070 initial_instance_count = custom_orchestrator_initial_instance_count
20662071 or initial_instance_count ,
2072+ endpoint_name = endpoint_name ,
20672073 ** kwargs ,
20682074 )
20692075 )
0 commit comments