Merge branch 'master' into fix-ssh-policy

benieric · web-flow · commit 351be1300501 · 2025-01-29T11:51:47.000-08:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,28 @@
 # Changelog
 
+## v2.238.0 (2025-01-29)
+
+### Features
+
+ * use jumpstart deployment config image as default optimization image
+
+### Bug Fixes and Other Changes
+
+ * chore: add new images for HF TGI
+ * update image_uri_configs  01-29-2025 06:18:08 PST
+ * skip TF tests for unsupported versions
+ * Merge branch 'master-rba' into local_merge
+ * Add missing attributes to local resourceconfig
+ * update image_uri_configs  01-27-2025 06:18:13 PST
+ * update image_uri_configs  01-24-2025 06:18:11 PST
+ * add missing schema definition in docs
+ * Omegaconf upgrade
+ * SageMaker @remote function: Added multi-node functionality
+ * remove option
+ * fix typo
+ * fix tests
+ * Add an option for user to remove inputs and container artifacts when using local model trainer
+
 ## v2.237.3 (2025-01-09)
 
 ### Bug Fixes and Other Changes
diff --git a/VERSION b/VERSION
@@ -1 +1 @@
-2.237.4.dev0
+2.238.1.dev0
diff --git a/src/sagemaker/fw_utils.py b/src/sagemaker/fw_utils.py
@@ -152,6 +152,7 @@
     "2.1.0",
     "2.1.2",
     "2.2.0",
+    "2.3.0",
     "2.3.1",
     "2.4.1",
 ]
diff --git a/src/sagemaker/image_uri_config/huggingface-llm.json b/src/sagemaker/image_uri_config/huggingface-llm.json
@@ -12,7 +12,8 @@
             "1.2": "1.2.0",
             "1.3": "1.3.3",
             "1.4": "1.4.5",
-            "2.0": "2.3.1"
+            "2.0": "2.4.0",
+            "3.0": "3.0.1"
         },
         "versions": {
             "0.6.0": {
@@ -766,6 +767,100 @@
                 "container_version": {
                     "gpu": "cu124-ubuntu22.04"
                 }
+            },
+            "2.4.0": {
+                "py_versions": [
+                    "py311"
+                ],
+                "registries": {
+                    "af-south-1": "626614931356",
+                    "il-central-1": "780543022126",
+                    "ap-east-1": "871362719292",
+                    "ap-northeast-1": "763104351884",
+                    "ap-northeast-2": "763104351884",
+                    "ap-northeast-3": "364406365360",
+                    "ap-south-1": "763104351884",
+                    "ap-south-2": "772153158452",
+                    "ap-southeast-1": "763104351884",
+                    "ap-southeast-2": "763104351884",
+                    "ap-southeast-3": "907027046896",
+                    "ap-southeast-4": "457447274322",
+                    "ca-central-1": "763104351884",
+                    "cn-north-1": "727897471807",
+                    "cn-northwest-1": "727897471807",
+                    "eu-central-1": "763104351884",
+                    "eu-central-2": "380420809688",
+                    "eu-north-1": "763104351884",
+                    "eu-west-1": "763104351884",
+                    "eu-west-2": "763104351884",
+                    "eu-west-3": "763104351884",      
+                    "eu-south-1": "692866216735",
+                    "eu-south-2": "503227376785",
+                    "me-south-1": "217643126080",
+                    "me-central-1": "914824155844",
+                    "sa-east-1": "763104351884",
+                    "us-east-1": "763104351884",
+                    "us-east-2": "763104351884",
+                    "us-gov-east-1": "446045086412",
+                    "us-gov-west-1": "442386744353",
+                    "us-iso-east-1": "886529160074",
+                    "us-isob-east-1": "094389454867",
+                    "us-west-1": "763104351884",
+                    "us-west-2": "763104351884",
+                    "ca-west-1": "204538143572"
+                },
+                "tag_prefix": "2.4.0-tgi2.4.0",
+                "repository": "huggingface-pytorch-tgi-inference",
+                "container_version": {
+                    "gpu": "cu124-ubuntu22.04-v2.2"
+                }
+            },
+            "3.0.1": {
+                "py_versions": [
+                    "py311"
+                ],
+                "registries": {
+                    "af-south-1": "626614931356",
+                    "il-central-1": "780543022126",
+                    "ap-east-1": "871362719292",
+                    "ap-northeast-1": "763104351884",
+                    "ap-northeast-2": "763104351884",
+                    "ap-northeast-3": "364406365360",
+                    "ap-south-1": "763104351884",
+                    "ap-south-2": "772153158452",
+                    "ap-southeast-1": "763104351884",
+                    "ap-southeast-2": "763104351884",
+                    "ap-southeast-3": "907027046896",
+                    "ap-southeast-4": "457447274322",
+                    "ca-central-1": "763104351884",
+                    "cn-north-1": "727897471807",
+                    "cn-northwest-1": "727897471807",
+                    "eu-central-1": "763104351884",
+                    "eu-central-2": "380420809688",
+                    "eu-north-1": "763104351884",
+                    "eu-west-1": "763104351884",
+                    "eu-west-2": "763104351884",
+                    "eu-west-3": "763104351884",      
+                    "eu-south-1": "692866216735",
+                    "eu-south-2": "503227376785",
+                    "me-south-1": "217643126080",
+                    "me-central-1": "914824155844",
+                    "sa-east-1": "763104351884",
+                    "us-east-1": "763104351884",
+                    "us-east-2": "763104351884",
+                    "us-gov-east-1": "446045086412",
+                    "us-gov-west-1": "442386744353",
+                    "us-iso-east-1": "886529160074",
+                    "us-isob-east-1": "094389454867",
+                    "us-west-1": "763104351884",
+                    "us-west-2": "763104351884",
+                    "ca-west-1": "204538143572"
+                },
+                "tag_prefix": "2.4.0-tgi3.0.1",
+                "repository": "huggingface-pytorch-tgi-inference",
+                "container_version": {
+                    "gpu": "cu124-ubuntu22.04-v2.1"
+                }
             }
         }
     }
diff --git a/src/sagemaker/image_uri_config/huggingface.json b/src/sagemaker/image_uri_config/huggingface.json
@@ -13,7 +13,8 @@
             "4.17": "4.17.0",
             "4.26": "4.26.0",
             "4.28": "4.28.1",
-            "4.36": "4.36.0"
+            "4.36": "4.36.0",
+            "4.46": "4.46.1"
         },
         "versions": {
             "4.4.2": {
@@ -1018,6 +1019,53 @@
                         "gpu": "cu121-ubuntu20.04"
                     }
                 }
+            },
+            "4.46.1": {
+                "version_aliases": {
+                    "pytorch2.3": "pytorch2.3.0"
+                },
+                "pytorch2.3.0": {
+                    "py_versions": [
+                        "py311"
+                    ],
+                    "registries": {
+                        "af-south-1": "626614931356",
+                        "il-central-1": "780543022126",
+                        "ap-east-1": "871362719292",
+                        "ap-northeast-1": "763104351884",
+                        "ap-northeast-2": "763104351884",
+                        "ap-northeast-3": "364406365360",
+                        "ap-south-1": "763104351884",
+                        "ap-southeast-1": "763104351884",
+                        "ap-southeast-2": "763104351884",
+                        "ap-southeast-3": "907027046896",
+                        "ca-central-1": "763104351884",
+                        "cn-north-1": "727897471807",
+                        "cn-northwest-1": "727897471807",
+                        "eu-central-1": "763104351884",
+                        "eu-north-1": "763104351884",
+                        "eu-west-1": "763104351884",
+                        "eu-west-2": "763104351884",
+                        "eu-west-3": "763104351884",
+                        "eu-south-1": "692866216735",
+                        "me-south-1": "217643126080",
+                        "me-central-1": "914824155844",
+                        "sa-east-1": "763104351884",
+                        "us-east-1": "763104351884",
+                        "us-east-2": "763104351884",
+                        "us-gov-east-1": "446045086412",
+                        "us-gov-west-1": "442386744353",
+                        "us-iso-east-1": "886529160074",
+                        "us-isob-east-1": "094389454867",
+                        "us-west-1": "763104351884",
+                        "us-west-2": "763104351884",
+                        "ca-west-1": "204538143572"
+                    },
+                    "repository": "huggingface-pytorch-training",
+                    "container_version": {
+                        "gpu": "cu121-ubuntu20.04"
+                    }
+                }
             }
         }
     },
diff --git a/src/sagemaker/image_uri_config/sagemaker-base-python.json b/src/sagemaker/image_uri_config/sagemaker-base-python.json
@@ -11,6 +11,7 @@
                 "ap-southeast-1": "492261229750",
                 "ap-southeast-2": "452832661640",
                 "ap-southeast-3": "276181064229",
+                "ap-southeast-5": "148761635175",
                 "ca-central-1": "310906938811",
                 "cn-north-1": "390048526115",
                 "cn-northwest-1": "390780980154",
diff --git a/src/sagemaker/serve/builder/jumpstart_builder.py b/src/sagemaker/serve/builder/jumpstart_builder.py
@@ -17,7 +17,7 @@
 import re
 from abc import ABC, abstractmethod
 from datetime import datetime, timedelta
-from typing import Type, Any, List, Dict, Optional
+from typing import Type, Any, List, Dict, Optional, Tuple
 import logging
 
 from botocore.exceptions import ClientError
@@ -82,6 +82,7 @@
     ModelServer.DJL_SERVING,
     ModelServer.TGI,
 }
+_JS_MINIMUM_VERSION_IMAGE = "{}:0.31.0-lmi13.0.0-cu124"
 
 logger = logging.getLogger(__name__)
 
@@ -829,7 +830,13 @@ def _optimize_for_jumpstart(
             self.pysdk_model._enable_network_isolation = False
 
         if quantization_config or sharding_config or is_compilation:
-            return create_optimization_job_args
+            # only apply default image for vLLM usecases.
+            # vLLM does not support compilation for now so skip on compilation
+            return (
+                create_optimization_job_args
+                if is_compilation
+                else self._set_optimization_image_default(create_optimization_job_args)
+            )
         return None
 
     def _is_gated_model(self, model=None) -> bool:
@@ -986,3 +993,105 @@ def _get_neuron_model_env_vars(
                     )
                     return job_model.env
         return None
+
+    def _set_optimization_image_default(
+        self, create_optimization_job_args: Dict[str, Any]
+    ) -> Dict[str, Any]:
+        """Defaults the optimization image to the JumpStart deployment config default
+
+        Args:
+            create_optimization_job_args (Dict[str, Any]): create optimization job request
+
+        Returns:
+            Dict[str, Any]: create optimization job request with image uri default
+        """
+        default_image = self._get_default_vllm_image(self.pysdk_model.init_kwargs["image_uri"])
+
+        # find the latest vLLM image version
+        for optimization_config in create_optimization_job_args.get("OptimizationConfigs"):
+            if optimization_config.get("ModelQuantizationConfig"):
+                model_quantization_config = optimization_config.get("ModelQuantizationConfig")
+                provided_image = model_quantization_config.get("Image")
+                if provided_image and self._get_latest_lmi_version_from_list(
+                    default_image, provided_image
+                ):
+                    default_image = provided_image
+            if optimization_config.get("ModelShardingConfig"):
+                model_sharding_config = optimization_config.get("ModelShardingConfig")
+                provided_image = model_sharding_config.get("Image")
+                if provided_image and self._get_latest_lmi_version_from_list(
+                    default_image, provided_image
+                ):
+                    default_image = provided_image
+
+        # default to latest vLLM version
+        for optimization_config in create_optimization_job_args.get("OptimizationConfigs"):
+            if optimization_config.get("ModelQuantizationConfig") is not None:
+                optimization_config.get("ModelQuantizationConfig")["Image"] = default_image
+            if optimization_config.get("ModelShardingConfig") is not None:
+                optimization_config.get("ModelShardingConfig")["Image"] = default_image
+
+        logger.info("Defaulting to %s image for optimization job", default_image)
+
+        return create_optimization_job_args
+
+    def _get_default_vllm_image(self, image: str) -> bool:
+        """Ensures the minimum working image version for vLLM enabled optimization techniques
+
+        Args:
+            image (str): JumpStart provided default image
+
+        Returns:
+            str: minimum working image version
+        """
+        dlc_name, _ = image.split(":")
+        major_version_number, _, _ = self._parse_lmi_version(image)
+
+        if major_version_number < self._parse_lmi_version(_JS_MINIMUM_VERSION_IMAGE)[0]:
+            minimum_version_default = _JS_MINIMUM_VERSION_IMAGE.format(dlc_name)
+            return minimum_version_default
+        return image
+
+    def _get_latest_lmi_version_from_list(self, version: str, version_to_compare: str) -> bool:
+        """LMI version comparator
+
+        Args:
+            version (str): current version
+            version_to_compare (str): version to compare to
+
+        Returns:
+            bool: if version_to_compare larger or equal to version
+        """
+        parse_lmi_version = self._parse_lmi_version(version)
+        parse_lmi_version_to_compare = self._parse_lmi_version(version_to_compare)
+
+        # Check major version
+        if parse_lmi_version_to_compare[0] > parse_lmi_version[0]:
+            return True
+        # Check minor version
+        if parse_lmi_version_to_compare[0] == parse_lmi_version[0]:
+            if parse_lmi_version_to_compare[1] > parse_lmi_version[1]:
+                return True
+            if parse_lmi_version_to_compare[1] == parse_lmi_version[1]:
+                # Check patch version
+                if parse_lmi_version_to_compare[2] >= parse_lmi_version[2]:
+                    return True
+                return False
+            return False
+        return False
+
+    def _parse_lmi_version(self, image: str) -> Tuple[int, int, int]:
+        """Parse out LMI version
+
+        Args:
+            image (str): image to parse version out of
+
+        Returns:
+            Tuple[int, int, int]: LMI version split into major, minor, patch
+        """
+        _, dlc_tag = image.split(":")
+        _, lmi_version, _ = dlc_tag.split("-")
+        major_version, minor_version, patch_version = lmi_version.split(".")
+        major_version_number = major_version[3:]
+
+        return (int(major_version_number), int(minor_version), int(patch_version))
diff --git a/src/sagemaker/serve/model_format/mlflow/constants.py b/src/sagemaker/serve/model_format/mlflow/constants.py
@@ -18,6 +18,7 @@
     "py38": "1.12.1",
     "py39": "1.13.1",
     "py310": "2.2.0",
+    "py311": "2.3.0",
 }
 MODEL_PACKAGE_ARN_REGEX = (
     r"^arn:aws:sagemaker:[a-z0-9\-]+:[0-9]{12}:model-package\/(.*?)(?:/(\d+))?$"
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -293,6 +293,8 @@ def huggingface_pytorch_training_version(huggingface_training_version):
 
 @pytest.fixture(scope="module")
 def huggingface_pytorch_training_py_version(huggingface_pytorch_training_version):
+    if Version(huggingface_pytorch_training_version) >= Version("2.3"):
+        return "py311"
     if Version(huggingface_pytorch_training_version) >= Version("2.0"):
         return "py310"
     elif Version(huggingface_pytorch_training_version) >= Version("1.13"):
@@ -355,6 +357,8 @@ def huggingface_training_compiler_pytorch_py_version(
 def huggingface_pytorch_latest_training_py_version(
     huggingface_training_pytorch_latest_version,
 ):
+    if Version(huggingface_training_pytorch_latest_version) >= Version("2.3"):
+        return "py311"
     if Version(huggingface_training_pytorch_latest_version) >= Version("2.0"):
         return "py310"
     elif Version(huggingface_training_pytorch_latest_version) >= Version("1.13"):
diff --git a/tests/integ/sagemaker/jumpstart/private_hub/model/test_jumpstart_private_hub_model.py b/tests/integ/sagemaker/jumpstart/private_hub/model/test_jumpstart_private_hub_model.py
@@ -105,6 +105,7 @@ def test_jumpstart_hub_gated_model(setup, add_model_references):
     assert response is not None
 
 
+@pytest.mark.skip(reason="blocking PR checks and release pipeline.")
 def test_jumpstart_gated_model_inference_component_enabled(setup, add_model_references):
 
     model_id = "meta-textgeneration-llama-2-7b"
diff --git a/tests/integ/sagemaker/serve/test_serve_js_deep_unit_tests.py b/tests/integ/sagemaker/serve/test_serve_js_deep_unit_tests.py
diff --git a/tests/unit/sagemaker/image_uris/test_huggingface_llm.py b/tests/unit/sagemaker/image_uris/test_huggingface_llm.py
diff --git a/tests/unit/sagemaker/serve/builder/test_js_builder.py b/tests/unit/sagemaker/serve/builder/test_js_builder.py
diff --git a/tests/unit/sagemaker/serve/builder/test_model_builder.py b/tests/unit/sagemaker/serve/builder/test_model_builder.py

Original file line number	Diff line number	Diff line change
`@@ -152,6 +152,7 @@`
`152`	`152`	`"2.1.0",`
`153`	`153`	`"2.1.2",`
`154`	`154`	`"2.2.0",`
	`155`	`+ "2.3.0",`
`155`	`156`	`"2.3.1",`
`156`	`157`	`"2.4.1",`
`157`	`158`	`]`
Original file line number	Diff line number	Diff line change
`@@ -18,6 +18,7 @@`
`18`	`18`	`"py38": "1.12.1",`
`19`	`19`	`"py39": "1.13.1",`
`20`	`20`	`"py310": "2.2.0",`
	`21`	`+ "py311": "2.3.0",`
`21`	`22`	`}`
`22`	`23`	`MODEL_PACKAGE_ARN_REGEX = (`
`23`	`24`	`r"^arn:aws:sagemaker:[a-z0-9\-]+:[0-9]{12}:model-package\/(.*?)(?:/(\d+))?$"`