Skip to content

Commit 0719f52

Browse files
authored
Merge branch 'master' into pre-post-processing-hotfix
2 parents f95837f + beb23ec commit 0719f52

File tree

5 files changed

+14
-14
lines changed

5 files changed

+14
-14
lines changed

src/sagemaker/serve/builder/model_builder.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1433,15 +1433,15 @@ def _model_builder_optimize_wrapper(
14331433

14341434
# HF Model ID format = "meta-llama/Meta-Llama-3.1-8B"
14351435
# JS Model ID format = "meta-textgeneration-llama-3-1-8b"
1436-
llama_3_1_keywords = ["llama-3.1", "llama-3-1"]
1437-
is_llama_3_1 = self.model and any(
1438-
keyword in self.model.lower() for keyword in llama_3_1_keywords
1436+
is_llama_3_plus = self.model and bool(
1437+
re.search(r"llama-3[\.\-][1-9]\d*", self.model.lower())
14391438
)
14401439

14411440
if is_gpu_instance and self.model and self.is_compiled:
1442-
if is_llama_3_1:
1441+
if is_llama_3_plus:
14431442
raise ValueError(
1444-
"Compilation is not supported for Llama-3.1 with a GPU instance."
1443+
"Compilation is not supported for models greater "
1444+
"than Llama-3.0 with a GPU instance."
14451445
)
14461446
if speculative_decoding_config:
14471447
raise ValueError(

src/sagemaker/serve/model_server/multi_model_server/inference.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,11 +45,11 @@ def input_fn(input_data, content_type, context=None):
4545
try:
4646
if hasattr(schema_builder, "custom_input_translator"):
4747
deserialized_data = schema_builder.custom_input_translator.deserialize(
48-
io.BytesIO(input_data), content_type
48+
io.BytesIO(input_data) if type(input_data)== bytes else io.BytesIO(input_data.encode('utf-8')), content_type
4949
)
5050
else:
5151
deserialized_data = schema_builder.input_deserializer.deserialize(
52-
io.BytesIO(input_data), content_type[0]
52+
io.BytesIO(input_data) if type(input_data)== bytes else io.BytesIO(input_data.encode('utf-8')), content_type[0]
5353
)
5454

5555
# Check if preprocess method is defined and call it

src/sagemaker/serve/model_server/torchserve/inference.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,11 +67,11 @@ def input_fn(input_data, content_type):
6767
try:
6868
if hasattr(schema_builder, "custom_input_translator"):
6969
deserialized_data = schema_builder.custom_input_translator.deserialize(
70-
io.BytesIO(input_data), content_type
70+
io.BytesIO(input_data) if type(input_data)== bytes else io.BytesIO(input_data.encode('utf-8')), content_type
7171
)
7272
else:
7373
deserialized_data = schema_builder.input_deserializer.deserialize(
74-
io.BytesIO(input_data), content_type[0]
74+
io.BytesIO(input_data) if type(input_data)== bytes else io.BytesIO(input_data.encode('utf-8')), content_type[0]
7575
)
7676

7777
# Check if preprocess method is defined and call it

src/sagemaker/serve/model_server/torchserve/xgboost_inference.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,11 +70,11 @@ def input_fn(input_data, content_type):
7070
try:
7171
if hasattr(schema_builder, "custom_input_translator"):
7272
return schema_builder.custom_input_translator.deserialize(
73-
io.BytesIO(input_data), content_type
73+
io.BytesIO(input_data) if type(input_data)== bytes else io.BytesIO(input_data.encode('utf-8')), content_type
7474
)
7575
else:
7676
return schema_builder.input_deserializer.deserialize(
77-
io.BytesIO(input_data), content_type[0]
77+
io.BytesIO(input_data) if type(input_data)== bytes else io.BytesIO(input_data.encode('utf-8')), content_type[0]
7878
)
7979
except Exception as e:
8080
raise Exception("Encountered error in deserialize_request.") from e

tests/unit/sagemaker/serve/builder/test_model_builder.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3270,7 +3270,7 @@ def test_optimize_with_gpu_instance_and_llama_3_1_and_compilation(
32703270

32713271
mock_pysdk_model = Mock()
32723272
mock_pysdk_model.model_data = None
3273-
mock_pysdk_model.env = {"HF_MODEL_ID": "meta-llama/Meta-Llama-3-1-8B-Instruct"}
3273+
mock_pysdk_model.env = {"HF_MODEL_ID": "meta-llama/Meta-Llama-3-2-8B-Instruct"}
32743274

32753275
sample_input = {"inputs": "dummy prompt", "parameters": {}}
32763276

@@ -3279,7 +3279,7 @@ def test_optimize_with_gpu_instance_and_llama_3_1_and_compilation(
32793279
dummy_schema_builder = SchemaBuilder(sample_input, sample_output)
32803280

32813281
model_builder = ModelBuilder(
3282-
model="meta-llama/Meta-Llama-3-1-8B-Instruct",
3282+
model="meta-llama/Meta-Llama-3-2-8B-Instruct",
32833283
schema_builder=dummy_schema_builder,
32843284
env_vars={"HF_TOKEN": "token"},
32853285
model_metadata={
@@ -3293,7 +3293,7 @@ def test_optimize_with_gpu_instance_and_llama_3_1_and_compilation(
32933293

32943294
self.assertRaisesRegex(
32953295
ValueError,
3296-
"Compilation is not supported for Llama-3.1 with a GPU instance.",
3296+
"Compilation is not supported for models greater than Llama-3.0 with a GPU instance.",
32973297
lambda: model_builder.optimize(
32983298
job_name="job_name-123",
32993299
instance_type="ml.g5.24xlarge",

0 commit comments

Comments
 (0)