Merge branch 'master' into master

tlian25 · web-flow · commit b9f456f8c6eb · 2024-08-20T09:03:47.000-04:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,36 @@
 # Changelog
 
+## v2.229.0 (2024-08-15)
+
+### Features
+
+ * Support for ModelBuilder In_Process Mode (1/2)
+ * Pulling in dependencies (in_process mode) using conda environment
+ * Add optional CodeArtifact login to FrameworkProcessing job script
+ * implemented security-monitoring to send metrics to CW #1510
+
+### Bug Fixes and Other Changes
+
+ * alt configs model deployment and training issues
+ * fix keras extension in integ test
+ * update image_uri_configs  08-13-2024 07:17:54 PST
+ * trn1 instance family does not support volume size
+ * Update model.py
+ * removed log statement
+ * update image_uri_configs  08-09-2024 07:18:00 PST
+ * Added torchrun compatibility for distributet training across multiple GPUs in a single node (single instance)
+ * BiasConfig type hint
+ * add model monitor image accounts for ap-southeast-5 and eu-central-2
+ * aligned UTC times with PST
+ * ensure hpt jobs inherit tags from config
+ * add JumpStart PDT and OSU regions
+ * chore(deps): bump certifi in /src/sagemaker/serve/utils
+ * Updates for DJL 0.29.0 release
+ * chore(deps): bump apache-airflow from 2.9.2 to 2.9.3 in /requirements/extras
+ * chore(deps): bump torch from 2.0.1 to 2.2.0 in /tests/data/serve_resources/mlflow/pytorch
+ * avoided printing stack trace and escaped input
+ * removing kwargs as this is breaking predictor_cls param for mode…
+
 ## v2.228.0 (2024-08-06)
 
 ### Features
diff --git a/VERSION b/VERSION
@@ -1 +1 @@
-2.228.1.dev0
+2.229.1.dev0
diff --git a/tests/data/tensorflow_mnist/mnist_v2.py b/tests/data/tensorflow_mnist/mnist_v2.py
@@ -198,7 +198,7 @@ def main(args):
 
         if args.current_host == args.hosts[0]:
             ckpt_manager.save()
-            net.save("/opt/ml/model/1.keras")
+            net.save("/opt/ml/model/1")
 
 
 if __name__ == "__main__":
diff --git a/tests/integ/test_tf.py b/tests/integ/test_tf.py
@@ -85,6 +85,11 @@ def test_mnist_with_checkpoint_config(
     tensorflow_training_latest_version,
     tensorflow_training_latest_py_version,
 ):
+    if Version(tensorflow_training_latest_version) >= Version("2.16"):
+        pytest.skip(
+            "This test is failing in TensorFlow 2.16 beacuse of an upstream bug: "
+            "https://github.com/tensorflow/io/issues/2039"
+        )
     checkpoint_s3_uri = "s3://{}/checkpoints/tf-{}".format(
         sagemaker_session.default_bucket(), sagemaker_timestamp()
     )
@@ -235,6 +240,11 @@ def test_mnist_distributed_cpu(
     tensorflow_training_latest_version,
     tensorflow_training_latest_py_version,
 ):
+    if Version(tensorflow_training_latest_version) >= Version("2.16"):
+        pytest.skip(
+            "This test is failing in TensorFlow 2.16 beacuse of an upstream bug: "
+            "https://github.com/tensorflow/io/issues/2039"
+        )
     _create_and_fit_estimator(
         sagemaker_session,
         tensorflow_training_latest_version,
diff --git a/tests/integ/test_tuner.py b/tests/integ/test_tuner.py
@@ -19,6 +19,7 @@
 import numpy as np
 import pytest
 from botocore.exceptions import ClientError
+from packaging.version import Version
 
 import tests.integ
 from sagemaker import KMeans, LDA, RandomCutForest, image_uris
@@ -691,6 +692,11 @@ def test_tuning_tf(
     tensorflow_training_latest_version,
     tensorflow_training_latest_py_version,
 ):
+    if Version(tensorflow_training_latest_version) >= Version("2.16"):
+        pytest.skip(
+            "This test is failing in TensorFlow 2.16 beacuse of an upstream bug: "
+            "https://github.com/tensorflow/io/issues/2039"
+        )
     resource_path = os.path.join(DATA_DIR, "tensorflow_mnist")
     script_path = "mnist.py"
 
@@ -735,6 +741,11 @@ def test_tuning_tf_vpc_multi(
     tensorflow_training_latest_py_version,
 ):
     """Test Tensorflow multi-instance using the same VpcConfig for training and inference"""
+    if Version(tensorflow_training_latest_version) >= Version("2.16"):
+        pytest.skip(
+            "This test is failing in TensorFlow 2.16 beacuse of an upstream bug: "
+            "https://github.com/tensorflow/io/issues/2039"
+        )
     instance_type = cpu_instance_type
     instance_count = 2