Merge branch 'master' into pre-post-processing-hotfix

gwang111 · web-flow · commit 0719f52b093c · 2024-12-19T10:20:28.000-08:00
diff --git a/src/sagemaker/serve/builder/model_builder.py b/src/sagemaker/serve/builder/model_builder.py
@@ -1433,15 +1433,15 @@ def _model_builder_optimize_wrapper(
 
             # HF Model ID format = "meta-llama/Meta-Llama-3.1-8B"
             # JS Model ID format = "meta-textgeneration-llama-3-1-8b"
-            llama_3_1_keywords = ["llama-3.1", "llama-3-1"]
-            is_llama_3_1 = self.model and any(
-                keyword in self.model.lower() for keyword in llama_3_1_keywords
+            is_llama_3_plus = self.model and bool(
+                re.search(r"llama-3[\.\-][1-9]\d*", self.model.lower())
             )
 
             if is_gpu_instance and self.model and self.is_compiled:
-                if is_llama_3_1:
+                if is_llama_3_plus:
                     raise ValueError(
-                        "Compilation is not supported for Llama-3.1 with a GPU instance."
+                        "Compilation is not supported for models greater "
+                        "than Llama-3.0 with a GPU instance."
                     )
                 if speculative_decoding_config:
                     raise ValueError(
diff --git a/src/sagemaker/serve/model_server/multi_model_server/inference.py b/src/sagemaker/serve/model_server/multi_model_server/inference.py
@@ -45,11 +45,11 @@ def input_fn(input_data, content_type, context=None):
     try:
         if hasattr(schema_builder, "custom_input_translator"):
             deserialized_data = schema_builder.custom_input_translator.deserialize(
-                io.BytesIO(input_data), content_type
+                io.BytesIO(input_data) if type(input_data)== bytes else io.BytesIO(input_data.encode('utf-8')), content_type
             )
         else:
             deserialized_data = schema_builder.input_deserializer.deserialize(
-                io.BytesIO(input_data), content_type[0]
+                io.BytesIO(input_data) if type(input_data)== bytes else io.BytesIO(input_data.encode('utf-8')), content_type[0]
             )
 
         # Check if preprocess method is defined and call it
diff --git a/src/sagemaker/serve/model_server/torchserve/inference.py b/src/sagemaker/serve/model_server/torchserve/inference.py
@@ -67,11 +67,11 @@ def input_fn(input_data, content_type):
     try:
         if hasattr(schema_builder, "custom_input_translator"):
             deserialized_data = schema_builder.custom_input_translator.deserialize(
-                io.BytesIO(input_data), content_type
+                io.BytesIO(input_data) if type(input_data)== bytes else io.BytesIO(input_data.encode('utf-8')), content_type
             )
         else:
             deserialized_data = schema_builder.input_deserializer.deserialize(
-                io.BytesIO(input_data), content_type[0]
+                io.BytesIO(input_data) if type(input_data)== bytes else io.BytesIO(input_data.encode('utf-8')), content_type[0]
             )
 
         # Check if preprocess method is defined and call it
diff --git a/src/sagemaker/serve/model_server/torchserve/xgboost_inference.py b/src/sagemaker/serve/model_server/torchserve/xgboost_inference.py
@@ -70,11 +70,11 @@ def input_fn(input_data, content_type):
     try:
         if hasattr(schema_builder, "custom_input_translator"):
             return schema_builder.custom_input_translator.deserialize(
-                io.BytesIO(input_data), content_type
+                io.BytesIO(input_data) if type(input_data)== bytes else io.BytesIO(input_data.encode('utf-8')), content_type
             )
         else:
             return schema_builder.input_deserializer.deserialize(
-                io.BytesIO(input_data), content_type[0]
+                io.BytesIO(input_data) if type(input_data)== bytes else io.BytesIO(input_data.encode('utf-8')), content_type[0]
             )
     except Exception as e:
         raise Exception("Encountered error in deserialize_request.") from e
diff --git a/tests/unit/sagemaker/serve/builder/test_model_builder.py b/tests/unit/sagemaker/serve/builder/test_model_builder.py
@@ -3270,7 +3270,7 @@ def test_optimize_with_gpu_instance_and_llama_3_1_and_compilation(
 
         mock_pysdk_model = Mock()
         mock_pysdk_model.model_data = None
-        mock_pysdk_model.env = {"HF_MODEL_ID": "meta-llama/Meta-Llama-3-1-8B-Instruct"}
+        mock_pysdk_model.env = {"HF_MODEL_ID": "meta-llama/Meta-Llama-3-2-8B-Instruct"}
 
         sample_input = {"inputs": "dummy prompt", "parameters": {}}
 
@@ -3279,7 +3279,7 @@ def test_optimize_with_gpu_instance_and_llama_3_1_and_compilation(
         dummy_schema_builder = SchemaBuilder(sample_input, sample_output)
 
         model_builder = ModelBuilder(
-            model="meta-llama/Meta-Llama-3-1-8B-Instruct",
+            model="meta-llama/Meta-Llama-3-2-8B-Instruct",
             schema_builder=dummy_schema_builder,
             env_vars={"HF_TOKEN": "token"},
             model_metadata={
@@ -3293,7 +3293,7 @@ def test_optimize_with_gpu_instance_and_llama_3_1_and_compilation(
 
         self.assertRaisesRegex(
             ValueError,
-            "Compilation is not supported for Llama-3.1 with a GPU instance.",
+            "Compilation is not supported for models greater than Llama-3.0 with a GPU instance.",
             lambda: model_builder.optimize(
                 job_name="job_name-123",
                 instance_type="ml.g5.24xlarge",

Original file line number	Diff line number	Diff line change
`@@ -45,11 +45,11 @@ def input_fn(input_data, content_type, context=None):`
`45`	`45`	`try:`
`46`	`46`	`if hasattr(schema_builder, "custom_input_translator"):`
`47`	`47`	`deserialized_data = schema_builder.custom_input_translator.deserialize(`
`48`		`- io.BytesIO(input_data), content_type`
	`48`	`+ io.BytesIO(input_data) if type(input_data)== bytes else io.BytesIO(input_data.encode('utf-8')), content_type`
`49`	`49`	`)`
`50`	`50`	`else:`
`51`	`51`	`deserialized_data = schema_builder.input_deserializer.deserialize(`
`52`		`- io.BytesIO(input_data), content_type[0]`
	`52`	`+ io.BytesIO(input_data) if type(input_data)== bytes else io.BytesIO(input_data.encode('utf-8')), content_type[0]`
`53`	`53`	`)`
`54`	`54`
`55`	`55`	`# Check if preprocess method is defined and call it`
Original file line number	Diff line number	Diff line change
`@@ -67,11 +67,11 @@ def input_fn(input_data, content_type):`
`67`	`67`	`try:`
`68`	`68`	`if hasattr(schema_builder, "custom_input_translator"):`
`69`	`69`	`deserialized_data = schema_builder.custom_input_translator.deserialize(`
`70`		`- io.BytesIO(input_data), content_type`
	`70`	`+ io.BytesIO(input_data) if type(input_data)== bytes else io.BytesIO(input_data.encode('utf-8')), content_type`
`71`	`71`	`)`
`72`	`72`	`else:`
`73`	`73`	`deserialized_data = schema_builder.input_deserializer.deserialize(`
`74`		`- io.BytesIO(input_data), content_type[0]`
	`74`	`+ io.BytesIO(input_data) if type(input_data)== bytes else io.BytesIO(input_data.encode('utf-8')), content_type[0]`
`75`	`75`	`)`
`76`	`76`
`77`	`77`	`# Check if preprocess method is defined and call it`
Original file line number	Diff line number	Diff line change
`@@ -70,11 +70,11 @@ def input_fn(input_data, content_type):`
`70`	`70`	`try:`
`71`	`71`	`if hasattr(schema_builder, "custom_input_translator"):`
`72`	`72`	`return schema_builder.custom_input_translator.deserialize(`
`73`		`- io.BytesIO(input_data), content_type`
	`73`	`+ io.BytesIO(input_data) if type(input_data)== bytes else io.BytesIO(input_data.encode('utf-8')), content_type`
`74`	`74`	`)`
`75`	`75`	`else:`
`76`	`76`	`return schema_builder.input_deserializer.deserialize(`
`77`		`- io.BytesIO(input_data), content_type[0]`
	`77`	`+ io.BytesIO(input_data) if type(input_data)== bytes else io.BytesIO(input_data.encode('utf-8')), content_type[0]`
`78`	`78`	`)`
`79`	`79`	`except Exception as e:`
`80`	`80`	`raise Exception("Encountered error in deserialize_request.") from e`