Skip to content

Commit 68cede1

Browse files
author
Bryannah Hernandez
committed
In_Process mode for TGI transformers, edits
1 parent d3b8e9b commit 68cede1

File tree

3 files changed

+21
-4
lines changed

3 files changed

+21
-4
lines changed

src/sagemaker/serve/builder/model_builder.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -857,7 +857,7 @@ def build( # pylint: disable=R0911
857857

858858
def _build_validations(self):
859859
"""Validations needed for model server overrides, or auto-detection or fallback"""
860-
if self.mode == Mode.IN_PROCESS:
860+
if self.mode == Mode.IN_PROCESS and self.model_server is not ModelServer.MMS:
861861
raise ValueError("IN_PROCESS mode is not supported yet!")
862862

863863
if self.inference_spec and self.model:

src/sagemaker/serve/builder/transformers_builder.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,7 @@ def _get_hf_metadata_create_model(self) -> Type[Model]:
161161
vpc_config=self.vpc_config,
162162
)
163163

164-
if self.mode == Mode.LOCAL_CONTAINER:
164+
if self.mode == Mode.LOCAL_CONTAINER or self.mode == Mode.IN_PROCESS:
165165
self.image_uri = pysdk_model.serving_image_uri(
166166
self.sagemaker_session.boto_region_name, "local"
167167
)
@@ -226,6 +226,23 @@ def _transformers_model_builder_deploy_wrapper(self, *args, **kwargs) -> Type[Pr
226226
jumpstart=False,
227227
)
228228
return predictor
229+
230+
if self.mode == Mode.IN_PROCESS:
231+
timeout = kwargs.get("model_data_download_timeout")
232+
233+
predictor = TransformersLocalModePredictor(
234+
self.modes[str(Mode.IN_PROCESS)], serializer, deserializer
235+
)
236+
237+
self.modes[str(Mode.IN_PROCESS)].create_server(
238+
self.image_uri,
239+
timeout if timeout else DEFAULT_TIMEOUT,
240+
None,
241+
predictor,
242+
self.pysdk_model.env,
243+
jumpstart=False,
244+
)
245+
return predictor
229246

230247
if "mode" in kwargs:
231248
del kwargs["mode"]
@@ -276,7 +293,7 @@ def _build_transformers_env(self):
276293

277294
self.pysdk_model = self._create_transformers_model()
278295

279-
if self.mode == Mode.LOCAL_CONTAINER:
296+
if self.mode == Mode.LOCAL_CONTAINER or self.mode == Mode.IN_PROCESS:
280297
self._prepare_for_mode()
281298

282299
return self.pysdk_model

src/sagemaker/serve/model_server/multi_model_server/server.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ def _invoke_multi_model_server_serving(self, request: object, content_type: str,
5656
"""Placeholder docstring"""
5757
try:
5858
response = requests.post(
59-
f"http://{get_docker_host()}:8080/invocations",
59+
f"http://{0.0.0.0}:8080/invocations",
6060
data=request,
6161
headers={"Content-Type": content_type, "Accept": accept},
6262
timeout=600,

0 commit comments

Comments
 (0)