Merge branch 'master' into processing-job-codeartifact-support

akuma12 · web-flow · commit 6851e777f78b · 2024-05-02T10:41:59.000-05:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,15 @@
 # Changelog
 
+## v2.218.0 (2024-05-01)
+
+### Features
+
+ * set default allow_pickle param to False
+
+### Bug Fixes and Other Changes
+
+ * properly close files in lineage queries and tests
+
 ## v2.217.0 (2024-04-24)
 
 ### Features
diff --git a/VERSION b/VERSION
@@ -1 +1 @@
-2.217.1.dev0
+2.218.1.dev0
diff --git a/src/sagemaker/base_deserializers.py b/src/sagemaker/base_deserializers.py
@@ -196,14 +196,14 @@ class NumpyDeserializer(SimpleBaseDeserializer):
     single array.
     """
 
-    def __init__(self, dtype=None, accept="application/x-npy", allow_pickle=True):
+    def __init__(self, dtype=None, accept="application/x-npy", allow_pickle=False):
         """Initialize a ``NumpyDeserializer`` instance.
 
         Args:
             dtype (str): The dtype of the data (default: None).
             accept (union[str, tuple[str]]): The MIME type (or tuple of allowable MIME types) that
                 is expected from the inference endpoint (default: "application/x-npy").
-            allow_pickle (bool): Allow loading pickled object arrays (default: True).
+            allow_pickle (bool): Allow loading pickled object arrays (default: False).
         """
         super(NumpyDeserializer, self).__init__(accept=accept)
         self.dtype = dtype
@@ -227,10 +227,21 @@ def deserialize(self, stream, content_type):
             if content_type == "application/json":
                 return np.array(json.load(codecs.getreader("utf-8")(stream)), dtype=self.dtype)
             if content_type == "application/x-npy":
-                return np.load(io.BytesIO(stream.read()), allow_pickle=self.allow_pickle)
+                try:
+                    return np.load(io.BytesIO(stream.read()), allow_pickle=self.allow_pickle)
+                except ValueError as ve:
+                    raise ValueError(
+                        "Please set the param allow_pickle=True \
+                        to deserialize pickle objects in NumpyDeserializer"
+                    ).with_traceback(ve.__traceback__)
             if content_type == "application/x-npz":
                 try:
                     return np.load(io.BytesIO(stream.read()), allow_pickle=self.allow_pickle)
+                except ValueError as ve:
+                    raise ValueError(
+                        "Please set the param allow_pickle=True \
+                        to deserialize pickle objectsin NumpyDeserializer"
+                    ).with_traceback(ve.__traceback__)
                 finally:
                     stream.close()
         finally:
diff --git a/src/sagemaker/jumpstart/estimator.py b/src/sagemaker/jumpstart/estimator.py
@@ -734,7 +734,12 @@ def attach(
 
         model_version = model_version or "*"
 
-        additional_kwargs = {"model_id": model_id, "model_version": model_version}
+        additional_kwargs = {
+            "model_id": model_id,
+            "model_version": model_version,
+            "tolerate_vulnerable_model": True,  # model is already trained
+            "tolerate_deprecated_model": True,  # model is already trained
+        }
 
         model_specs = verify_model_region_and_return_specs(
             model_id=model_id,
diff --git a/src/sagemaker/jumpstart/types.py b/src/sagemaker/jumpstart/types.py
@@ -1064,9 +1064,8 @@ def from_json(self, json_obj: Dict[str, Any]) -> None:
                 Dictionary representation of the config component.
         """
         for field in json_obj.keys():
-            if field not in self.__slots__:
-                raise ValueError(f"Invalid component field: {field}")
-            setattr(self, field, json_obj[field])
+            if field in self.__slots__:
+                setattr(self, field, json_obj[field])
 
 
 class JumpStartMetadataConfig(JumpStartDataHolderType):
diff --git a/tests/integ/sagemaker/jumpstart/constants.py b/tests/integ/sagemaker/jumpstart/constants.py
@@ -48,6 +48,7 @@ def _to_s3_path(filename: str, s3_prefix: Optional[str]) -> str:
     ("meta-textgeneration-llama-2-7b", "*"): ("training-datasets/sec_amazon/"),
     ("meta-textgeneration-llama-2-7b", "2.*"): ("training-datasets/sec_amazon/"),
     ("meta-textgeneration-llama-2-7b", "3.*"): ("training-datasets/sec_amazon/"),
+    ("meta-textgeneration-llama-2-7b", "4.*"): ("training-datasets/sec_amazon/"),
     ("meta-textgenerationneuron-llama-2-7b", "*"): ("training-datasets/sec_amazon/"),
 }
 
diff --git a/tests/integ/sagemaker/jumpstart/estimator/test_jumpstart_estimator.py b/tests/integ/sagemaker/jumpstart/estimator/test_jumpstart_estimator.py
@@ -140,7 +140,7 @@ def test_gated_model_training_v1(setup):
 def test_gated_model_training_v2(setup):
 
     model_id = "meta-textgeneration-llama-2-7b"
-    model_version = "3.*"  # model artifacts retrieved from jumpstart-private-cache-* buckets
+    model_version = "4.*"  # model artifacts retrieved from jumpstart-private-cache-* buckets
 
     estimator = JumpStartEstimator(
         model_id=model_id,
@@ -150,6 +150,7 @@ def test_gated_model_training_v2(setup):
         tags=[{"Key": JUMPSTART_TAG, "Value": os.environ[ENV_VAR_JUMPSTART_SDK_TEST_SUITE_ID]}],
         environment={"accept_eula": "true"},
         max_run=259200,  # avoid exceeding resource limits
+        tolerate_vulnerable_model=True,  # tolerate old version of model
     )
 
     # uses ml.g5.12xlarge instance
diff --git a/tests/unit/sagemaker/deserializers/test_deserializers.py b/tests/unit/sagemaker/deserializers/test_deserializers.py
@@ -142,7 +142,8 @@ def test_numpy_deserializer_from_npy(numpy_deserializer):
     assert np.array_equal(array, result)
 
 
-def test_numpy_deserializer_from_npy_object_array(numpy_deserializer):
+def test_numpy_deserializer_from_npy_object_array():
+    numpy_deserializer = NumpyDeserializer(allow_pickle=True)
     array = np.array([{"a": "", "b": ""}, {"c": "", "d": ""}])
     stream = io.BytesIO()
     np.save(stream, array)
diff --git a/tests/unit/sagemaker/jumpstart/estimator/test_estimator.py b/tests/unit/sagemaker/jumpstart/estimator/test_estimator.py
@@ -1010,6 +1010,8 @@ def test_jumpstart_estimator_attach_eula_model(
                 "model_id": "gemma-model",
                 "model_version": "*",
                 "environment": {"accept_eula": "true"},
+                "tolerate_vulnerable_model": True,
+                "tolerate_deprecated_model": True,
             },
         )
 
@@ -1053,6 +1055,8 @@ def test_jumpstart_estimator_attach_no_model_id_happy_case(
             additional_kwargs={
                 "model_id": "js-trainable-model-prepacked",
                 "model_version": "1.0.0",
+                "tolerate_vulnerable_model": True,
+                "tolerate_deprecated_model": True,
             },
         )
 
diff --git a/tests/unit/sagemaker/jumpstart/test_types.py b/tests/unit/sagemaker/jumpstart/test_types.py
@@ -1052,6 +1052,14 @@ def test_inference_configs_parsing():
     )
     assert list(config.config_components.keys()) == ["neuron-inference"]
 
+    spec = {
+        **BASE_SPEC,
+        **INFERENCE_CONFIGS,
+        **INFERENCE_CONFIG_RANKINGS,
+        "unrecognized-field": "blah",  # New fields in base metadata fields should be ignored
+    }
+    specs1 = JumpStartModelSpecs(spec)
+
 
 def test_set_inference_configs():
     spec = {**BASE_SPEC, **INFERENCE_CONFIGS, **INFERENCE_CONFIG_RANKINGS}

Original file line number	Diff line number	Diff line change
`@@ -48,6 +48,7 @@ def _to_s3_path(filename: str, s3_prefix: Optional[str]) -> str:`
`48`	`48`	`("meta-textgeneration-llama-2-7b", "*"): ("training-datasets/sec_amazon/"),`
`49`	`49`	`("meta-textgeneration-llama-2-7b", "2.*"): ("training-datasets/sec_amazon/"),`
`50`	`50`	`("meta-textgeneration-llama-2-7b", "3.*"): ("training-datasets/sec_amazon/"),`
	`51`	`+ ("meta-textgeneration-llama-2-7b", "4.*"): ("training-datasets/sec_amazon/"),`
`51`	`52`	`("meta-textgenerationneuron-llama-2-7b", "*"): ("training-datasets/sec_amazon/"),`
`52`	`53`	`}`
`53`	`54`