Skip to content

Commit cc0ca14

Browse files
author
Bryannah Hernandez
committed
changes to support in_process
1 parent f39cca6 commit cc0ca14

File tree

6 files changed

+104
-67
lines changed

6 files changed

+104
-67
lines changed

src/sagemaker/serve/builder/model_builder.py

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
from sagemaker.serve.mode.function_pointers import Mode
3737
from sagemaker.serve.mode.sagemaker_endpoint_mode import SageMakerEndpointMode
3838
from sagemaker.serve.mode.local_container_mode import LocalContainerMode
39+
from sagemaker.serve.mode.in_process_mode import InProcessMode
3940
from sagemaker.serve.detector.pickler import save_pkl, save_xgboost
4041
from sagemaker.serve.builder.serve_settings import _ServeSettings
4142
from sagemaker.serve.builder.djl_builder import DJL
@@ -410,7 +411,7 @@ def _prepare_for_mode(
410411
)
411412
self.env_vars.update(env_vars_sagemaker)
412413
return self.s3_upload_path, env_vars_sagemaker
413-
if self.mode == Mode.LOCAL_CONTAINER:
414+
elif self.mode == Mode.LOCAL_CONTAINER:
414415
# init the LocalContainerMode object
415416
self.modes[str(Mode.LOCAL_CONTAINER)] = LocalContainerMode(
416417
inference_spec=self.inference_spec,
@@ -422,9 +423,21 @@ def _prepare_for_mode(
422423
)
423424
self.modes[str(Mode.LOCAL_CONTAINER)].prepare()
424425
return None
426+
elif self.mode == Mode.IN_PROCESS:
427+
# init the InProcessMode object
428+
self.modes[str(Mode.IN_PROCESS)] = InProcessMode(
429+
inference_spec=self.inference_spec,
430+
schema_builder=self.schema_builder,
431+
session=self.sagemaker_session,
432+
model_path=self.model_path,
433+
env_vars=self.env_vars,
434+
model_server=self.model_server,
435+
)
436+
self.modes[str(Mode.IN_PROCESS)].prepare()
437+
return None
425438

426439
raise ValueError(
427-
"Please specify mode in: %s, %s" % (Mode.LOCAL_CONTAINER, Mode.SAGEMAKER_ENDPOINT)
440+
"Please specify mode in: %s, %s" % (Mode.LOCAL_CONTAINER, Mode.SAGEMAKER_ENDPOINT, Mode.IN_PROCESS)
428441
)
429442

430443
def _get_client_translators(self):
@@ -796,9 +809,10 @@ def _initialize_for_mlflow(self, artifact_path: str) -> None:
796809
self.dependencies.update({"requirements": mlflow_model_dependency_path})
797810

798811
# Model Builder is a class to build the model for deployment.
799-
# It supports two modes of deployment
812+
# It supports two* modes of deployment
800813
# 1/ SageMaker Endpoint
801814
# 2/ Local launch with container
815+
# 3/ In process mode with Transformers server in beta release
802816
def build( # pylint: disable=R0911
803817
self,
804818
mode: Type[Mode] = None,
@@ -897,7 +911,10 @@ def build( # pylint: disable=R0911
897911
def _build_validations(self):
898912
"""Validations needed for model server overrides, or auto-detection or fallback"""
899913
if self.mode == Mode.IN_PROCESS and self.model_server is not ModelServer.MMS:
900-
raise ValueError("IN_PROCESS mode is not supported yet!")
914+
raise ValueError(
915+
"IN_PROCESS mode is not supported yet for model server. It is "
916+
"supported for MMS/Transformers server in beta release"
917+
)
901918

902919
if self.inference_spec and self.model:
903920
raise ValueError("Can only set one of the following: model, inference_spec.")

src/sagemaker/serve/builder/transformers_builder.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,10 @@
3737
)
3838
from sagemaker.serve.detector.pickler import save_pkl
3939
from sagemaker.serve.utils.optimize_utils import _is_optimized
40-
from sagemaker.serve.utils.predictors import TransformersLocalModePredictor
40+
from sagemaker.serve.utils.predictors import (
41+
TransformersLocalModePredictor,
42+
TransformersInProcessModePredictor,
43+
)
4144
from sagemaker.serve.utils.types import ModelServer
4245
from sagemaker.serve.mode.function_pointers import Mode
4346
from sagemaker.serve.utils.telemetry_logger import _capture_telemetry
@@ -232,7 +235,7 @@ def _transformers_model_builder_deploy_wrapper(self, *args, **kwargs) -> Type[Pr
232235
if self.mode == Mode.IN_PROCESS:
233236
timeout = kwargs.get("model_data_download_timeout")
234237

235-
predictor = TransformersLocalModePredictor(
238+
predictor = TransformersInProcessModePredictor(
236239
self.modes[str(Mode.IN_PROCESS)], serializer, deserializer
237240
)
238241

@@ -242,7 +245,6 @@ def _transformers_model_builder_deploy_wrapper(self, *args, **kwargs) -> Type[Pr
242245
None,
243246
predictor,
244247
self.pysdk_model.env,
245-
jumpstart=False,
246248
)
247249
return predictor
248250

src/sagemaker/serve/mode/in_process_mode.py

Lines changed: 6 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
1-
"""Module that defines the LocalContainerMode class"""
1+
"""Module that defines the InProcessMode class"""
22

33
from __future__ import absolute_import
44
from pathlib import Path
55
import logging
6-
from datetime import datetime, timedelta
76
from typing import Dict, Type
87
import base64
98
import time
@@ -13,7 +12,6 @@
1312
from sagemaker.base_predictor import PredictorBase
1413
from sagemaker.serve.spec.inference_spec import InferenceSpec
1514
from sagemaker.serve.builder.schema_builder import SchemaBuilder
16-
from sagemaker.serve.utils.logging_agent import pull_logs
1715
from sagemaker.serve.utils.types import ModelServer
1816
from sagemaker.serve.utils.exceptions import LocalDeepPingException
1917
from sagemaker.serve.model_server.multi_model_server.server import InProcessMultiModelServer
@@ -32,7 +30,7 @@
3230
class InProcessMode(
3331
InProcessMultiModelServer,
3432
):
35-
"""A class that holds methods to deploy model to a container in local environment"""
33+
"""A class that holds methods to deploy model to a container in process environment"""
3634

3735
def __init__(
3836
self,
@@ -83,9 +81,9 @@ def create_server(
8381
):
8482
"""Placeholder docstring"""
8583

86-
self._pull_image(image=image)
84+
# self._pull_image(image=image)
8785

88-
self.destroy_server()
86+
# self.destroy_server()
8987

9088
logger.info("Waiting for model server %s to start up...", self.model_server)
9189

@@ -97,28 +95,10 @@ def create_server(
9795
secret_key=secret_key,
9896
env_vars=env_vars if env_vars else self.env_vars,
9997
)
98+
logger.info("Starting PING")
10099
self._ping_container = self._multi_model_server_deep_ping
101100

102-
# allow some time for container to be ready
103-
time.sleep(10)
104-
105-
log_generator = self.container.logs(follow=True, stream=True)
106-
time_limit = datetime.now() + timedelta(seconds=container_timeout_seconds)
107-
healthy = False
108101
while True:
109-
now = datetime.now()
110-
final_pull = now > time_limit
111-
pull_logs(
112-
(x.decode("UTF-8").rstrip() for x in log_generator),
113-
log_generator.close,
114-
datetime.now() + timedelta(seconds=_PING_HEALTH_CHECK_INTERVAL_SEC),
115-
now > time_limit,
116-
)
117-
118-
if final_pull:
119-
break
120-
121-
# allow some time for container to be ready
122102
time.sleep(10)
123103

124104
healthy, response = self._ping_container(predictor)
@@ -128,37 +108,4 @@ def create_server(
128108

129109
if not healthy:
130110
raise LocalDeepPingException(_PING_HEALTH_CHECK_FAIL_MSG)
131-
132-
def destroy_server(self):
133-
"""Placeholder docstring"""
134-
if self.container:
135-
try:
136-
logger.debug("Stopping currently running container...")
137-
self.container.kill()
138-
except docker.errors.APIError as exc:
139-
if exc.response.status_code < 400 or exc.response.status_code > 499:
140-
raise Exception("Error encountered when cleaning up local container") from exc
141-
self.container = None
142-
143-
def _pull_image(self, image: str):
144-
"""Placeholder docstring"""
145-
try:
146-
encoded_token = (
147-
self.ecr.get_authorization_token()
148-
.get("authorizationData")[0]
149-
.get("authorizationToken")
150-
)
151-
decoded_token = base64.b64decode(encoded_token).decode("utf-8")
152-
username, password = decoded_token.split(":")
153-
ecr_uri = image.split("/")[0]
154-
login_command = ["docker", "login", "-u", username, "-p", password, ecr_uri]
155-
subprocess.run(login_command, check=True, capture_output=True)
156-
except subprocess.CalledProcessError as e:
157-
logger.warning("Unable to login to ecr: %s", e)
158-
159-
self.client = docker.from_env()
160-
try:
161-
logger.info("Pulling image %s from repository...", image)
162-
self.client.images.pull(image)
163-
except docker.errors.NotFound as e:
164-
raise ValueError("Could not find remote image to pull") from e
111+

src/sagemaker/serve/model_server/multi_model_server/server.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,23 @@ def _invoke_multi_model_server_serving(self, request: object, content_type: str,
6868
raise Exception("Unable to send request to the local container server") from e
6969

7070
return (True, response)
71+
72+
def _multi_model_server_deep_ping(self, predictor: PredictorBase):
73+
"""Placeholder docstring"""
74+
response = None
75+
logger.debug("AM I HERE? PING PING")
76+
77+
# try:
78+
# response = predictor.predict(self.schema_builder.sample_input)
79+
# return True, response
80+
# # pylint: disable=broad-except
81+
# except Exception as e:
82+
# if "422 Client Error: Unprocessable Entity for url" in str(e):
83+
# raise LocalModelInvocationException(str(e))
84+
# return False, response
85+
86+
return (True, response)
87+
7188

7289
class LocalMultiModelServer:
7390
"""Local Multi Model server instance"""

src/sagemaker/serve/utils/exceptions.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
"""Placeholder Docstring"""
1+
"""Exceptions used across different model builder invocations"""
22

33
from __future__ import absolute_import
44

@@ -22,6 +22,16 @@ class LocalDeepPingException(ModelBuilderException):
2222

2323
def __init__(self, message):
2424
super().__init__(message=message)
25+
26+
27+
class InProcessDeepPingException(ModelBuilderException):
28+
"""Raise when in process model serving does not pass the deep ping check"""
29+
30+
fmt = "Error Message: {message}"
31+
model_builder_error_code = 1
32+
33+
def __init__(self, message):
34+
super().__init__(message=message)
2535

2636

2737
class LocalModelOutOfMemoryException(ModelBuilderException):

src/sagemaker/serve/utils/predictors.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
from sagemaker import Session
88
from sagemaker.serve.mode.local_container_mode import LocalContainerMode
9+
from sagemaker.serve.mode.in_process_mode import InProcessMode
910
from sagemaker.serve.builder.schema_builder import SchemaBuilder
1011
from sagemaker.serializers import IdentitySerializer, JSONSerializer
1112
from sagemaker.deserializers import BytesDeserializer, JSONDeserializer
@@ -209,6 +210,49 @@ def delete_predictor(self):
209210
self._mode_obj.destroy_server()
210211

211212

213+
class TransformersInProcessModePredictor(PredictorBase):
214+
"""Lightweight Transformers predictor for local deployment"""
215+
216+
def __init__(
217+
self,
218+
mode_obj: Type[InProcessMode],
219+
serializer=JSONSerializer(),
220+
deserializer=JSONDeserializer(),
221+
):
222+
self._mode_obj = mode_obj
223+
self.serializer = serializer
224+
self.deserializer = deserializer
225+
226+
def predict(self, data):
227+
"""Placeholder docstring"""
228+
return [
229+
self.deserializer.deserialize(
230+
io.BytesIO(
231+
self._mode_obj._invoke_multi_model_server_serving(
232+
self.serializer.serialize(data),
233+
self.content_type,
234+
self.deserializer.ACCEPT[0],
235+
)
236+
),
237+
self.content_type,
238+
)
239+
]
240+
241+
@property
242+
def content_type(self):
243+
"""The MIME type of the data sent to the inference endpoint."""
244+
return self.serializer.CONTENT_TYPE
245+
246+
@property
247+
def accept(self):
248+
"""The content type(s) that are expected from the inference endpoint."""
249+
return self.deserializer.ACCEPT
250+
251+
def delete_predictor(self):
252+
"""Shut down and remove the container that you created in LOCAL_CONTAINER mode"""
253+
self._mode_obj.destroy_server()
254+
255+
212256
class TeiLocalModePredictor(PredictorBase):
213257
"""Lightweight Tei predictor for local deployment in IN_PROCESS and LOCAL_CONTAINER modes"""
214258

0 commit comments

Comments
 (0)