diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml new file mode 100644 index 0000000000..32c8d1f255 --- /dev/null +++ b/.github/workflows/codeql.yml @@ -0,0 +1,35 @@ +name: "CodeQL" +on: + push: + branches: [ "master" ] + pull_request: + branches: [ "master" ] + schedule: + - cron: '30 8 * * *' +jobs: + analyze: + name: Analyze (${{ matrix.language }}) + runs-on: ${{ 'ubuntu-latest' }} + permissions: + security-events: write + packages: read + + strategy: + matrix: + include: + - language: python + build-mode: none + - language: java-kotlin + build-mode: none + steps: + - name: Checkout repository + uses: actions/checkout@6ccd57f4c5d15bdc2fef309bd9fb6cc9db2ef1c6 + - name: Initialize CodeQL + uses: github/codeql-action/init@4b1d7da102ff94aca014c0245062b1a463356d72 + with: + languages: ${{ matrix.language }} + build-mode: ${{ matrix.build-mode }} + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@4b1d7da102ff94aca014c0245062b1a463356d72 + with: + category: "/language:${{matrix.language}}" diff --git a/CHANGELOG.md b/CHANGELOG.md index 3b74f91c21..44c9cac322 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,19 @@ # Changelog +## v2.226.1 (2024-07-17) + +## v2.226.0 (2024-07-12) + +### Features + + * Curated hub improvements + * InferenceSpec support for MMS and testing + +### Bug Fixes and Other Changes + + * ModelBuilder not passing HF_TOKEN to model. + * update image_uri_configs 07-10-2024 07:18:04 PST + ## v2.225.0 (2024-07-10) ### Features diff --git a/VERSION b/VERSION index d3dc17b647..e7c5debeda 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.225.1.dev0 +2.226.2.dev0 diff --git a/src/sagemaker/config/config_utils.py b/src/sagemaker/config/config_utils.py index 70f2764529..0991c39b9e 100644 --- a/src/sagemaker/config/config_utils.py +++ b/src/sagemaker/config/config_utils.py @@ -18,8 +18,10 @@ from collections import deque import logging +import re import sys from typing import Callable +from copy import deepcopy def get_sagemaker_config_logger(): @@ -67,6 +69,19 @@ def _log_sagemaker_config_single_substitution(source_value, config_value, config """ logger = get_sagemaker_config_logger() + source_value_log_copy = deepcopy(source_value) + config_value_log_copy = deepcopy(config_value) + + if isinstance(source_value_log_copy, dict): + for key in source_value_log_copy.keys(): + if re.search(r'(secret|password|key|token)', key, re.IGNORECASE): + source_value_log_copy[key] = '***' + + if isinstance(config_value_log_copy, dict): + for key in config_value_log_copy.keys(): + if re.search(r'(secret|password|key|token)', key, re.IGNORECASE): + config_value_log_copy[key] = '***' + if config_value is not None: if source_value is None: @@ -79,7 +94,7 @@ def _log_sagemaker_config_single_substitution(source_value, config_value, config logger.debug( "Applied value\n config key = %s\n config value that will be used = %s", config_key_path, - config_value, + config_value_log_copy, ) else: logger.info( @@ -102,8 +117,8 @@ def _log_sagemaker_config_single_substitution(source_value, config_value, config " source value that will be used = %s" ), config_key_path, - config_value, - source_value, + config_value_log_copy, + source_value_log_copy, ) elif source_value is not None and config_value != source_value: # Sagemaker Config had a value defined that is NOT going to be used @@ -117,8 +132,8 @@ def _log_sagemaker_config_single_substitution(source_value, config_value, config " source value that will be used = %s", ), config_key_path, - config_value, - source_value, + config_value_log_copy, + source_value_log_copy, ) else: # nothing was specified in the config and nothing is being automatically applied diff --git a/src/sagemaker/estimator.py b/src/sagemaker/estimator.py index 4831db11ee..66b746b1b0 100644 --- a/src/sagemaker/estimator.py +++ b/src/sagemaker/estimator.py @@ -68,6 +68,7 @@ from sagemaker.interactive_apps import SupportedInteractiveAppTypes from sagemaker.interactive_apps.tensorboard import TensorBoardApp from sagemaker.instance_group import InstanceGroup +from sagemaker.model_card.model_card import ModelCard, TrainingDetails from sagemaker.utils import instance_supports_kms from sagemaker.job import _Job from sagemaker.jumpstart.utils import ( @@ -1797,8 +1798,17 @@ def register( else: if "model_kms_key" not in kwargs: kwargs["model_kms_key"] = self.output_kms_key - model = self.create_model(image_uri=image_uri, **kwargs) + model = self.create_model(image_uri=image_uri, name=model_name, **kwargs) model.name = model_name + if self.model_data is not None and model_card is None: + training_details = TrainingDetails.from_model_s3_artifacts( + model_artifacts=[self.model_data], sagemaker_session=self.sagemaker_session + ) + model_card = ModelCard( + name="estimator_card", + training_details=training_details, + sagemaker_session=self.sagemaker_session, + ) return model.register( content_types, response_types, diff --git a/src/sagemaker/fw_utils.py b/src/sagemaker/fw_utils.py index be3658365a..9a0e46d1a0 100644 --- a/src/sagemaker/fw_utils.py +++ b/src/sagemaker/fw_utils.py @@ -152,7 +152,6 @@ "2.1.0", "2.1.2", "2.2.0", - "2.3.0", "2.3.1", ] diff --git a/src/sagemaker/image_uri_config/huggingface-llm.json b/src/sagemaker/image_uri_config/huggingface-llm.json index 3e3f450d23..b3988362fd 100644 --- a/src/sagemaker/image_uri_config/huggingface-llm.json +++ b/src/sagemaker/image_uri_config/huggingface-llm.json @@ -12,7 +12,7 @@ "1.2": "1.2.0", "1.3": "1.3.3", "1.4": "1.4.5", - "2.0": "2.0.2" + "2.0": "2.2.0" }, "versions": { "0.6.0": { @@ -672,6 +672,53 @@ "container_version": { "gpu": "cu121-ubuntu22.04" } + }, + "2.2.0": { + "py_versions": [ + "py310" + ], + "registries": { + "af-south-1": "626614931356", + "il-central-1": "780543022126", + "ap-east-1": "871362719292", + "ap-northeast-1": "763104351884", + "ap-northeast-2": "763104351884", + "ap-northeast-3": "364406365360", + "ap-south-1": "763104351884", + "ap-south-2": "772153158452", + "ap-southeast-1": "763104351884", + "ap-southeast-2": "763104351884", + "ap-southeast-3": "907027046896", + "ap-southeast-4": "457447274322", + "ca-central-1": "763104351884", + "cn-north-1": "727897471807", + "cn-northwest-1": "727897471807", + "eu-central-1": "763104351884", + "eu-central-2": "380420809688", + "eu-north-1": "763104351884", + "eu-west-1": "763104351884", + "eu-west-2": "763104351884", + "eu-west-3": "763104351884", + "eu-south-1": "692866216735", + "eu-south-2": "503227376785", + "me-south-1": "217643126080", + "me-central-1": "914824155844", + "sa-east-1": "763104351884", + "us-east-1": "763104351884", + "us-east-2": "763104351884", + "us-gov-east-1": "446045086412", + "us-gov-west-1": "442386744353", + "us-iso-east-1": "886529160074", + "us-isob-east-1": "094389454867", + "us-west-1": "763104351884", + "us-west-2": "763104351884", + "ca-west-1": "204538143572" + }, + "tag_prefix": "2.3.0-tgi2.2.0", + "repository": "huggingface-pytorch-tgi-inference", + "container_version": { + "gpu": "cu121-ubuntu22.04-v2.0" + } } } } diff --git a/src/sagemaker/image_uri_config/pytorch-smp.json b/src/sagemaker/image_uri_config/pytorch-smp.json index 61971e5128..ab1398666b 100644 --- a/src/sagemaker/image_uri_config/pytorch-smp.json +++ b/src/sagemaker/image_uri_config/pytorch-smp.json @@ -8,7 +8,7 @@ "2.1": "2.1.2", "2.2": "2.3.1", "2.2.0": "2.3.1", - "2.3": "2.4.0" + "2.3.1": "2.4.0" }, "versions": { "2.0.1": { diff --git a/src/sagemaker/image_uri_config/pytorch.json b/src/sagemaker/image_uri_config/pytorch.json index f3e2b14888..f2b19a1431 100644 --- a/src/sagemaker/image_uri_config/pytorch.json +++ b/src/sagemaker/image_uri_config/pytorch.json @@ -1008,6 +1008,8 @@ "us-gov-west-1": "442386744353", "us-iso-east-1": "886529160074", "us-isob-east-1": "094389454867", + "us-isof-east-1": "303241398832", + "us-isof-south-1": "454834333376", "us-west-1": "763104351884", "us-west-2": "763104351884" }, @@ -1051,6 +1053,8 @@ "us-gov-west-1": "442386744353", "us-iso-east-1": "886529160074", "us-isob-east-1": "094389454867", + "us-isof-east-1": "303241398832", + "us-isof-south-1": "454834333376", "us-west-1": "763104351884", "us-west-2": "763104351884" }, @@ -2329,6 +2333,8 @@ "us-gov-west-1": "442386744353", "us-iso-east-1": "886529160074", "us-isob-east-1": "094389454867", + "us-isof-east-1": "303241398832", + "us-isof-south-1": "454834333376", "us-west-1": "763104351884", "us-west-2": "763104351884" }, @@ -2372,6 +2378,7 @@ "us-gov-west-1": "442386744353", "us-iso-east-1": "886529160074", "us-isob-east-1": "094389454867", + "us-isof-south-1": "454834333376", "us-west-1": "763104351884", "us-west-2": "763104351884" }, @@ -2415,6 +2422,8 @@ "us-gov-west-1": "442386744353", "us-iso-east-1": "886529160074", "us-isob-east-1": "094389454867", + "us-isof-east-1": "303241398832", + "us-isof-south-1": "454834333376", "us-west-1": "763104351884", "us-west-2": "763104351884" }, diff --git a/src/sagemaker/image_uri_config/tensorflow.json b/src/sagemaker/image_uri_config/tensorflow.json index 8aaeb05b6e..07cc6f0a56 100644 --- a/src/sagemaker/image_uri_config/tensorflow.json +++ b/src/sagemaker/image_uri_config/tensorflow.json @@ -2140,6 +2140,8 @@ "us-gov-west-1": "442386744353", "us-iso-east-1": "886529160074", "us-isob-east-1": "094389454867", + "us-isof-east-1": "303241398832", + "us-isof-south-1": "454834333376", "us-west-1": "763104351884", "us-west-2": "763104351884" }, @@ -2180,6 +2182,8 @@ "us-gov-west-1": "442386744353", "us-iso-east-1": "886529160074", "us-isob-east-1": "094389454867", + "us-isof-east-1": "303241398832", + "us-isof-south-1": "454834333376", "us-west-1": "763104351884", "us-west-2": "763104351884" }, @@ -4352,6 +4356,8 @@ "us-gov-west-1": "442386744353", "us-iso-east-1": "886529160074", "us-isob-east-1": "094389454867", + "us-isof-east-1": "303241398832", + "us-isof-south-1": "454834333376", "us-west-1": "763104351884", "us-west-2": "763104351884" }, @@ -4395,6 +4401,7 @@ "us-gov-west-1": "442386744353", "us-iso-east-1": "886529160074", "us-isob-east-1": "094389454867", + "us-isof-south-1": "454834333376", "us-west-1": "763104351884", "us-west-2": "763104351884" }, diff --git a/src/sagemaker/jumpstart/hub/hub.py b/src/sagemaker/jumpstart/hub/hub.py index 69d1dbb5c1..bc42eebea0 100644 --- a/src/sagemaker/jumpstart/hub/hub.py +++ b/src/sagemaker/jumpstart/hub/hub.py @@ -233,7 +233,7 @@ def list_sagemaker_public_hub_models( f"arn:{info.partition}:" f"sagemaker:{info.region}:" f"aws:hub-content/{info.hub_name}/" - f"{HubContentType.MODEL}/{model[0]}" + f"{HubContentType.MODEL.value}/{model[0]}" ) hub_content_summary = { "hub_content_name": model[0], diff --git a/src/sagemaker/model.py b/src/sagemaker/model.py index 5d7ee5b378..9188acd437 100644 --- a/src/sagemaker/model.py +++ b/src/sagemaker/model.py @@ -549,7 +549,10 @@ def register( model_package_group_name = utils.base_name_from_image( self.image_uri, default_base_name=ModelPackage.__name__ ) - if model_package_group_name is not None: + if ( + model_package_group_name is not None + and model_type is not JumpStartModelType.PROPRIETARY + ): container_def = self.prepare_container_def(accept_eula=accept_eula) container_def = update_container_with_inference_params( framework=framework, @@ -2466,32 +2469,55 @@ def update_model_card(self, model_card: Union[ModelCard, ModelPackageModelCard]) desc_model_package = sagemaker_session.sagemaker_client.describe_model_package( ModelPackageName=self.model_package_arn ) + if hasattr(model_card, "model_package_details"): + model_card.model_package_details = None update_model_card_req = model_card._create_request_args() - if update_model_card_req["ModelCardStatus"] is not None: - if ( - desc_model_package["ModelCard"]["ModelCardStatus"] - == update_model_card_req["ModelCardStatus"] - ): - del update_model_card_req["ModelCardStatus"] - if update_model_card_req.get("ModelCardName") is not None: del update_model_card_req["ModelCardName"] - if update_model_card_req.get("Content") is not None: - previous_content_hash = _hash_content_str( - desc_model_package["ModelCard"]["ModelCardContent"] - ) - current_content_hash = _hash_content_str(update_model_card_req["Content"]) - if ( - previous_content_hash == current_content_hash - or update_model_card_req.get("Content") == "{}" - or update_model_card_req.get("Content") == "null" - ): - del update_model_card_req["Content"] - else: - update_model_card_req["ModelCardContent"] = update_model_card_req["Content"] - del update_model_card_req["Content"] - update_model_package_args = { - "ModelPackageArn": self.model_package_arn, - "ModelCard": update_model_card_req, - } - sagemaker_session.sagemaker_client.update_model_package(**update_model_package_args) + if update_model_card_req["Content"] is not None: + if "model_package_details" in update_model_card_req["Content"]: + update_model_card_req["Content"].pop("model_package_details", None) + update_model_card_req["ModelCardContent"] = update_model_card_req["Content"] + del update_model_card_req["Content"] + + if "ModelCard" in desc_model_package: + if update_model_card_req["ModelCardStatus"] is not None: + if ( + desc_model_package["ModelCard"]["ModelCardStatus"] + != update_model_card_req["ModelCardStatus"] + ): + new_mc_mp_req = update_model_card_req + del new_mc_mp_req["ModelCardContent"] + update_model_package_args = { + "ModelPackageArn": self.model_package_arn, + "ModelCard": new_mc_mp_req, + } + sagemaker_session.sagemaker_client.update_model_package( + **update_model_package_args + ) + + if update_model_card_req.get("ModelCardContent") is not None: + previous_content_hash = _hash_content_str( + desc_model_package["ModelCard"]["ModelCardContent"] + ) + current_content_hash = _hash_content_str(update_model_card_req["ModelCardContent"]) + if not ( + previous_content_hash == current_content_hash + or update_model_card_req.get("ModelCardContent") == "{}" + or update_model_card_req.get("ModelCardContent") == "null" + ): + new_mc_mp_req = update_model_card_req + del new_mc_mp_req["ModelCardStatus"] + update_model_package_args = { + "ModelPackageArn": self.model_package_arn, + "ModelCard": new_mc_mp_req, + } + sagemaker_session.sagemaker_client.update_model_package( + **update_model_package_args + ) + else: + update_model_package_args = { + "ModelPackageArn": self.model_package_arn, + "ModelCard": update_model_card_req, + } + sagemaker_session.sagemaker_client.update_model_package(**update_model_package_args) diff --git a/src/sagemaker/model_card/helpers.py b/src/sagemaker/model_card/helpers.py index a8d9e7940e..925d9ae0e0 100644 --- a/src/sagemaker/model_card/helpers.py +++ b/src/sagemaker/model_card/helpers.py @@ -503,12 +503,12 @@ def _read_s3_json(session: Session, bucket: str, key: str): raise result = {} - if data["ContentType"] == "application/json" or data["ContentType"] == "binary/octet-stream": + content_types = ["application/json", "binary/octet-stream", "application/octet-stream"] + if data["ContentType"] in content_types: result = json.loads(data["Body"].read().decode("utf-8")) else: logger.warning( - "Invalid file type %s. application/json or binary/octet-stream is expected.", - data["ContentType"], + "Invalid file type %s. %s is expected.", data["ContentType"], ", ".join(content_types) ) return result diff --git a/src/sagemaker/serve/builder/jumpstart_builder.py b/src/sagemaker/serve/builder/jumpstart_builder.py index 07885792d2..eb57dec1fa 100644 --- a/src/sagemaker/serve/builder/jumpstart_builder.py +++ b/src/sagemaker/serve/builder/jumpstart_builder.py @@ -669,7 +669,6 @@ def _optimize_for_jumpstart( self, output_path: Optional[str] = None, instance_type: Optional[str] = None, - role_arn: Optional[str] = None, tags: Optional[Tags] = None, job_name: Optional[str] = None, accept_eula: Optional[bool] = None, @@ -685,9 +684,7 @@ def _optimize_for_jumpstart( Args: output_path (Optional[str]): Specifies where to store the compiled/quantized model. - instance_type (Optional[str]): Target deployment instance type that - the model is optimized for. - role_arn (Optional[str]): Execution role. Defaults to ``None``. + instance_type (str): Target deployment instance type that the model is optimized for. tags (Optional[Tags]): Tags for labeling a model optimization job. Defaults to ``None``. job_name (Optional[str]): The name of the model optimization job. Defaults to ``None``. accept_eula (bool): For models that require a Model Access Config, specify True or @@ -715,7 +712,7 @@ def _optimize_for_jumpstart( f"Model '{self.model}' requires accepting end-user license agreement (EULA)." ) - is_compilation = (quantization_config is None) and ( + is_compilation = (not quantization_config) and ( (compilation_config is not None) or _is_inferentia_or_trainium(instance_type) ) @@ -758,7 +755,6 @@ def _optimize_for_jumpstart( else None ) self.instance_type = instance_type or deployment_config_instance_type or _get_nb_instance() - self.role_arn = role_arn or self.role_arn create_optimization_job_args = { "OptimizationJobName": job_name, @@ -787,10 +783,10 @@ def _optimize_for_jumpstart( "AcceptEula": True } + optimization_env_vars = _update_environment_variables(optimization_env_vars, override_env) + if optimization_env_vars: + self.pysdk_model.env.update(optimization_env_vars) if quantization_config or is_compilation: - self.pysdk_model.env = _update_environment_variables( - optimization_env_vars, override_env - ) return create_optimization_job_args return None diff --git a/src/sagemaker/serve/builder/model_builder.py b/src/sagemaker/serve/builder/model_builder.py index 01b2b96f68..fb6f60b9d0 100644 --- a/src/sagemaker/serve/builder/model_builder.py +++ b/src/sagemaker/serve/builder/model_builder.py @@ -73,7 +73,6 @@ _generate_model_source, _extract_optimization_config_and_env, _is_s3_uri, - _normalize_local_model_path, _custom_speculative_decoding, _extract_speculative_draft_model_provider, ) @@ -833,6 +832,8 @@ def build( # pylint: disable=R0911 # until we deprecate HUGGING_FACE_HUB_TOKEN. if self.env_vars.get("HUGGING_FACE_HUB_TOKEN") and not self.env_vars.get("HF_TOKEN"): self.env_vars["HF_TOKEN"] = self.env_vars.get("HUGGING_FACE_HUB_TOKEN") + elif self.env_vars.get("HF_TOKEN") and not self.env_vars.get("HUGGING_FACE_HUB_TOKEN"): + self.env_vars["HUGGING_FACE_HUB_TOKEN"] = self.env_vars.get("HF_TOKEN") self.sagemaker_session.settings._local_download_dir = self.model_path @@ -851,7 +852,9 @@ def build( # pylint: disable=R0911 self._build_validations() - if not self._is_jumpstart_model_id() and self.model_server: + if ( + not (isinstance(self.model, str) and self._is_jumpstart_model_id()) + ) and self.model_server: return self._build_for_model_server() if isinstance(self.model, str): @@ -1216,18 +1219,15 @@ def _model_builder_optimize_wrapper( raise ValueError("Quantization config and compilation config are mutually exclusive.") self.sagemaker_session = sagemaker_session or self.sagemaker_session or Session() - self.instance_type = instance_type or self.instance_type self.role_arn = role_arn or self.role_arn - self.build(mode=self.mode, sagemaker_session=self.sagemaker_session) job_name = job_name or f"modelbuilderjob-{uuid.uuid4().hex}" - if self._is_jumpstart_model_id(): + self.build(mode=self.mode, sagemaker_session=self.sagemaker_session) input_args = self._optimize_for_jumpstart( output_path=output_path, instance_type=instance_type, - role_arn=self.role_arn, tags=tags, job_name=job_name, accept_eula=accept_eula, @@ -1240,10 +1240,13 @@ def _model_builder_optimize_wrapper( max_runtime_in_sec=max_runtime_in_sec, ) else: + if self.model_server != ModelServer.DJL_SERVING: + logger.info("Overriding model server to DJL_SERVING.") + self.model_server = ModelServer.DJL_SERVING + + self.build(mode=self.mode, sagemaker_session=self.sagemaker_session) input_args = self._optimize_for_hf( output_path=output_path, - instance_type=instance_type, - role_arn=self.role_arn, tags=tags, job_name=job_name, quantization_config=quantization_config, @@ -1269,8 +1272,6 @@ def _model_builder_optimize_wrapper( def _optimize_for_hf( self, output_path: str, - instance_type: Optional[str] = None, - role_arn: Optional[str] = None, tags: Optional[Tags] = None, job_name: Optional[str] = None, quantization_config: Optional[Dict] = None, @@ -1285,9 +1286,6 @@ def _optimize_for_hf( Args: output_path (str): Specifies where to store the compiled/quantized model. - instance_type (Optional[str]): Target deployment instance type that - the model is optimized for. - role_arn (Optional[str]): Execution role. Defaults to ``None``. tags (Optional[Tags]): Tags for labeling a model optimization job. Defaults to ``None``. job_name (Optional[str]): The name of the model optimization job. Defaults to ``None``. quantization_config (Optional[Dict]): Quantization configuration. Defaults to ``None``. @@ -1305,13 +1303,6 @@ def _optimize_for_hf( Returns: Optional[Dict[str, Any]]: Model optimization job input arguments. """ - if self.model_server != ModelServer.DJL_SERVING: - logger.info("Overwriting model server to DJL.") - self.model_server = ModelServer.DJL_SERVING - - self.role_arn = role_arn or self.role_arn - self.instance_type = instance_type or self.instance_type - self.pysdk_model = _custom_speculative_decoding( self.pysdk_model, speculative_decoding_config, False ) @@ -1371,13 +1362,12 @@ def _optimize_prepare_for_hf(self): ) else: if not custom_model_path: - custom_model_path = f"/tmp/sagemaker/model-builder/{self.model}/code" + custom_model_path = f"/tmp/sagemaker/model-builder/{self.model}" download_huggingface_model_metadata( self.model, - custom_model_path, + os.path.join(custom_model_path, "code"), self.env_vars.get("HUGGING_FACE_HUB_TOKEN"), ) - custom_model_path = _normalize_local_model_path(custom_model_path) self.pysdk_model.model_data, env = self._prepare_for_mode( model_path=custom_model_path, diff --git a/src/sagemaker/serve/utils/optimize_utils.py b/src/sagemaker/serve/utils/optimize_utils.py index 35a937407e..5781c0bade 100644 --- a/src/sagemaker/serve/utils/optimize_utils.py +++ b/src/sagemaker/serve/utils/optimize_utils.py @@ -282,26 +282,6 @@ def _extract_optimization_config_and_env( return None, None -def _normalize_local_model_path(local_model_path: Optional[str]) -> Optional[str]: - """Normalizes the local model path. - - Args: - local_model_path (Optional[str]): The local model path. - - Returns: - Optional[str]: The normalized model path. - """ - if local_model_path is None: - return local_model_path - - # Removes /code or /code/ path at the end of local_model_path, - # as it is appended during artifacts upload. - pattern = r"/code/?$" - if re.search(pattern, local_model_path): - return re.sub(pattern, "", local_model_path) - return local_model_path - - def _custom_speculative_decoding( model: Model, speculative_decoding_config: Optional[Dict], diff --git a/src/sagemaker/session.py b/src/sagemaker/session.py index 4209f37ae6..fb65c9f2cf 100644 --- a/src/sagemaker/session.py +++ b/src/sagemaker/session.py @@ -8348,7 +8348,9 @@ def _logs_for_job( # noqa: C901 - suppress complexity warning for this method """ sagemaker_client = sagemaker_session.sagemaker_client request_end_time = time.time() + timeout if timeout else None - description = sagemaker_client.describe_training_job(TrainingJobName=job_name) + description = _wait_until( + lambda: sagemaker_client.describe_training_job(TrainingJobName=job_name) + ) print(secondary_training_status_message(description, None), end="") instance_count, stream_names, positions, client, log_group, dot, color_wrap = _logs_init( diff --git a/tests/data/marketplace/iris/scoring_logic.py b/tests/data/marketplace/iris/scoring_logic.py index f9e2f1bb35..48c16032dc 100644 --- a/tests/data/marketplace/iris/scoring_logic.py +++ b/tests/data/marketplace/iris/scoring_logic.py @@ -3,7 +3,7 @@ import logging import re from flask import Flask -from flask import request +from flask import request, escape from joblib import dump, load import numpy as np import os @@ -106,4 +106,4 @@ def endpoint_invocations(): return response except Exception as e: - return f"Error during model invocation: {str(e)} for input: {request.get_data()}" + return f"Error during model invocation: {str(e)} for input: {escape(request.get_data())}" diff --git a/tests/integ/test_byo_estimator.py b/tests/integ/test_byo_estimator.py index a504b974a9..fd3823f469 100644 --- a/tests/integ/test_byo_estimator.py +++ b/tests/integ/test_byo_estimator.py @@ -12,14 +12,20 @@ # language governing permissions and limitations under the License. from __future__ import absolute_import +import io import json import os +import numpy as np + import pytest +import sagemaker.amazon.common as smac + import sagemaker from sagemaker import image_uris from sagemaker.estimator import Estimator +from sagemaker.s3 import S3Uploader from sagemaker.serializers import SimpleBaseSerializer from sagemaker.utils import unique_name_from_base from tests.integ import DATA_DIR, TRAINING_DEFAULT_TIMEOUT_MINUTES, datasets @@ -102,6 +108,60 @@ def test_byo_estimator(sagemaker_session, region, cpu_instance_type, training_se assert prediction["score"] is not None +@pytest.mark.release +def test_estimator_register_publish_training_details(sagemaker_session, region): + + bucket = sagemaker_session.default_bucket() + prefix = "model-card-sample-notebook" + + raw_data = ( + (0.5, 0), + (0.75, 0), + (1.0, 0), + (1.25, 0), + (1.50, 0), + (1.75, 0), + (2.0, 0), + (2.25, 1), + (2.5, 0), + (2.75, 1), + (3.0, 0), + (3.25, 1), + (3.5, 0), + (4.0, 1), + (4.25, 1), + (4.5, 1), + (4.75, 1), + (5.0, 1), + (5.5, 1), + ) + training_data = np.array(raw_data).astype("float32") + labels = training_data[:, 1] + + # upload data to S3 bucket + buf = io.BytesIO() + smac.write_numpy_to_dense_tensor(buf, training_data, labels) + buf.seek(0) + s3_train_data = f"s3://{bucket}/{prefix}/train" + S3Uploader.upload_bytes(b=buf, s3_uri=s3_train_data, sagemaker_session=sagemaker_session) + output_location = f"s3://{bucket}/{prefix}/output" + container = image_uris.retrieve("linear-learner", region) + estimator = Estimator( + container, + role="SageMakerRole", + instance_count=1, + instance_type="ml.m4.xlarge", + output_path=output_location, + sagemaker_session=sagemaker_session, + ) + estimator.set_hyperparameters( + feature_dim=2, mini_batch_size=10, predictor_type="binary_classifier" + ) + estimator.fit({"train": s3_train_data}) + print(f"Training job name: {estimator.latest_training_job.name}") + estimator.register() + + def test_async_byo_estimator(sagemaker_session, region, cpu_instance_type, training_set): image_uri = image_uris.retrieve("factorization-machines", region) endpoint_name = unique_name_from_base("byo") diff --git a/tests/integ/test_sklearn.py b/tests/integ/test_sklearn.py index 839e601d34..ff5b466b3f 100644 --- a/tests/integ/test_sklearn.py +++ b/tests/integ/test_sklearn.py @@ -159,8 +159,6 @@ def test_deploy_model( def test_deploy_model_with_serverless_inference_config( sklearn_training_job, sagemaker_session, - sklearn_latest_version, - sklearn_latest_py_version, ): endpoint_name = unique_name_from_base("test-sklearn-deploy-model-serverless") with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): @@ -173,7 +171,7 @@ def test_deploy_model_with_serverless_inference_config( model_data, ROLE, entry_point=script_path, - framework_version=sklearn_latest_version, + framework_version="1.0-1", sagemaker_session=sagemaker_session, ) predictor = model.deploy( diff --git a/tests/integ/test_xgboost.py b/tests/integ/test_xgboost.py index 7b4db837fd..1c06c6b5c6 100644 --- a/tests/integ/test_xgboost.py +++ b/tests/integ/test_xgboost.py @@ -121,11 +121,9 @@ def test_training_with_network_isolation( ] -@pytest.mark.skip(reason="re:Invent keynote3 blocker. Revisit after release") def test_xgboost_serverless_inference( xgboost_training_job, sagemaker_session, - xgboost_latest_version, ): endpoint_name = unique_name_from_base("test-xgboost-deploy-model-serverless") with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): @@ -139,7 +137,7 @@ def test_xgboost_serverless_inference( model_data=model_data, role=ROLE, entry_point=os.path.join(DATA_DIR, "xgboost_abalone", "abalone.py"), - framework_version=xgboost_latest_version, + framework_version="1.5-1", ) xgboost.deploy( diff --git a/tests/unit/sagemaker/image_uris/expected_uris.py b/tests/unit/sagemaker/image_uris/expected_uris.py index 094323ef0b..3cf900565b 100644 --- a/tests/unit/sagemaker/image_uris/expected_uris.py +++ b/tests/unit/sagemaker/image_uris/expected_uris.py @@ -17,6 +17,8 @@ "cn-northwest-1": "amazonaws.com.cn", "us-iso-east-1": "c2s.ic.gov", "us-isob-east-1": "sc2s.sgov.gov", + "us-isof-south-1": "csp.hci.ic.gov", + "us-isof-east-1": "csp.hci.ic.gov", } DOMAIN = "amazonaws.com" IMAGE_URI_FORMAT = "{}.dkr.ecr.{}.{}/{}:{}" diff --git a/tests/unit/sagemaker/image_uris/test_huggingface_llm.py b/tests/unit/sagemaker/image_uris/test_huggingface_llm.py index efcdbfc201..e753687018 100644 --- a/tests/unit/sagemaker/image_uris/test_huggingface_llm.py +++ b/tests/unit/sagemaker/image_uris/test_huggingface_llm.py @@ -42,6 +42,7 @@ "2.0.0": "2.1.1-tgi2.0.0-gpu-py310-cu121-ubuntu22.04", "2.0.1": "2.1.1-tgi2.0.1-gpu-py310-cu121-ubuntu22.04", "2.0.2": "2.3.0-tgi2.0.2-gpu-py310-cu121-ubuntu22.04", + "2.2.0": "2.3.0-tgi2.2.0-gpu-py310-cu121-ubuntu22.04-v2.0", }, "inf2": { "0.0.16": "1.13.1-optimum0.0.16-neuronx-py310-ubuntu22.04", diff --git a/tests/unit/sagemaker/image_uris/test_smp_v2.py b/tests/unit/sagemaker/image_uris/test_smp_v2.py index e9c8cec292..4fd1cc6179 100644 --- a/tests/unit/sagemaker/image_uris/test_smp_v2.py +++ b/tests/unit/sagemaker/image_uris/test_smp_v2.py @@ -27,6 +27,7 @@ def test_smp_v2(load_config): "torch_distributed": {"enabled": True}, "smdistributed": {"modelparallel": {"enabled": True}}, } + for processor in PROCESSORS: for version in VERSIONS: ACCOUNTS = load_config["training"]["versions"][version]["registries"] @@ -38,6 +39,11 @@ def test_smp_v2(load_config): if "2.1" in version or "2.2" in version or "2.3" in version: cuda_vers = "cu121" + if "2.3.1" == version: + py_version = "py311" + + print(version, py_version) + uri = image_uris.get_training_image_uri( region, framework="pytorch", diff --git a/tests/unit/sagemaker/serve/builder/test_model_builder.py b/tests/unit/sagemaker/serve/builder/test_model_builder.py index 81d57243ea..4818b9d8b6 100644 --- a/tests/unit/sagemaker/serve/builder/test_model_builder.py +++ b/tests/unit/sagemaker/serve/builder/test_model_builder.py @@ -13,12 +13,11 @@ from __future__ import absolute_import from unittest.mock import MagicMock, patch, Mock, mock_open -import pytest - import unittest from pathlib import Path from copy import deepcopy +from sagemaker.serve import SchemaBuilder from sagemaker.serve.builder.model_builder import ModelBuilder from sagemaker.serve.mode.function_pointers import Mode from sagemaker.serve.model_format.mlflow.constants import MLFLOW_TRACKING_ARN @@ -2328,22 +2327,52 @@ def test_build_tensorflow_serving_non_mlflow_case( mock_session, ) - @pytest.mark.skip(reason="Implementation not completed") + @patch.object(ModelBuilder, "_prepare_for_mode") + @patch.object(ModelBuilder, "_build_for_djl") + @patch.object(ModelBuilder, "_is_jumpstart_model_id", return_value=False) @patch.object(ModelBuilder, "_get_serve_setting", autospec=True) @patch("sagemaker.serve.utils.telemetry_logger._send_telemetry") - def test_optimize(self, mock_send_telemetry, mock_get_serve_setting): + def test_optimize( + self, + mock_send_telemetry, + mock_get_serve_setting, + mock_is_jumpstart_model_id, + mock_build_for_djl, + mock_prepare_for_mode, + ): mock_sagemaker_session = Mock() mock_settings = Mock() mock_settings.telemetry_opt_out = False mock_get_serve_setting.return_value = mock_settings + pysdk_model = Mock() + pysdk_model.env = {"key": "val"} + pysdk_model.add_tags.side_effect = lambda *arg, **kwargs: None + + mock_build_for_djl.side_effect = lambda **kwargs: pysdk_model + mock_prepare_for_mode.side_effect = lambda *args, **kwargs: ( + { + "S3DataSource": { + "S3Uri": "s3://uri", + "S3DataType": "S3Prefix", + "CompressionType": "None", + } + }, + {"key": "val"}, + ) + builder = ModelBuilder( - model_path=MODEL_PATH, - schema_builder=schema_builder, - model=mock_fw_model, + schema_builder=SchemaBuilder( + sample_input={"inputs": "Hello", "parameters": {}}, + sample_output=[{"generated_text": "Hello"}], + ), + model="meta-llama/Meta-Llama-3-8B", sagemaker_session=mock_sagemaker_session, + env_vars={"HF_TOKEN": "token"}, + model_metadata={"CUSTOM_MODEL_PATH": "/tmp/modelbuilders/code"}, ) + builder.pysdk_model = pysdk_model job_name = "my-optimization-job" instance_type = "ml.inf1.xlarge" @@ -2352,10 +2381,6 @@ def test_optimize(self, mock_send_telemetry, mock_get_serve_setting): "Image": "quantization-image-uri", "OverrideEnvironment": {"ENV_VAR": "value"}, } - compilation_config = { - "Image": "compilation-image-uri", - "OverrideEnvironment": {"ENV_VAR": "value"}, - } env_vars = {"Var1": "value", "Var2": "value"} kms_key = "arn:aws:kms:us-west-2:123456789012:key/my-key-id" max_runtime_in_sec = 3600 @@ -2368,36 +2393,17 @@ def test_optimize(self, mock_send_telemetry, mock_get_serve_setting): "Subnets": ["subnet-01234567", "subnet-89abcdef"], } - expected_create_optimization_job_args = { - "ModelSource": {"S3": {"S3Uri": MODEL_PATH, "ModelAccessConfig": {"AcceptEula": True}}}, - "DeploymentInstanceType": instance_type, - "OptimizationEnvironment": env_vars, - "OptimizationConfigs": [ - {"ModelQuantizationConfig": quantization_config}, - {"ModelCompilationConfig": compilation_config}, - ], - "OutputConfig": {"S3OutputLocation": output_path, "KmsKeyId": kms_key}, - "RoleArn": mock_role_arn, - "OptimizationJobName": job_name, - "StoppingCondition": {"MaxRuntimeInSeconds": max_runtime_in_sec}, - "Tags": [ - {"Key": "Project", "Value": "my-project"}, - {"Key": "Environment", "Value": "production"}, - ], - "VpcConfig": vpc_config, - } - - mock_sagemaker_session.sagemaker_client.create_optimization_job.return_value = { - "OptimizationJobArn": "arn:aws:sagemaker:us-west-2:123456789012:optimization-job/my-optimization-job" + mock_sagemaker_session.wait_for_optimization_job.side_effect = lambda *args, **kwargs: { + "OptimizationJobArn": "arn:aws:sagemaker:us-west-2:123456789012:optimization-job/my-optimization-job", + "OptimizationJobName": "my-optimization-job", } builder.optimize( instance_type=instance_type, output_path=output_path, - role=mock_role_arn, + role_arn=mock_role_arn, job_name=job_name, quantization_config=quantization_config, - compilation_config=compilation_config, env_vars=env_vars, kms_key=kms_key, max_runtime_in_sec=max_runtime_in_sec, @@ -2405,9 +2411,37 @@ def test_optimize(self, mock_send_telemetry, mock_get_serve_setting): vpc_config=vpc_config, ) + self.assertEqual(builder.env_vars["HUGGING_FACE_HUB_TOKEN"], "token") + self.assertEqual(builder.model_server, ModelServer.DJL_SERVING) + mock_send_telemetry.assert_called_once() mock_sagemaker_session.sagemaker_client.create_optimization_job.assert_called_once_with( - **expected_create_optimization_job_args + OptimizationJobName="my-optimization-job", + DeploymentInstanceType="ml.inf1.xlarge", + RoleArn="arn:aws:iam::123456789012:role/SageMakerRole", + OptimizationEnvironment={"Var1": "value", "Var2": "value"}, + ModelSource={"S3": {"S3Uri": "s3://uri"}}, + OptimizationConfigs=[ + { + "ModelQuantizationConfig": { + "Image": "quantization-image-uri", + "OverrideEnvironment": {"ENV_VAR": "value"}, + } + } + ], + OutputConfig={ + "S3OutputLocation": "s3://my-bucket/output", + "KmsKeyId": "arn:aws:kms:us-west-2:123456789012:key/my-key-id", + }, + StoppingCondition={"MaxRuntimeInSeconds": 3600}, + Tags=[ + {"Key": "Project", "Value": "my-project"}, + {"Key": "Environment", "Value": "production"}, + ], + VpcConfig={ + "SecurityGroupIds": ["sg-01234567890abcdef", "sg-fedcba9876543210"], + "Subnets": ["subnet-01234567", "subnet-89abcdef"], + }, ) def test_handle_mlflow_input_without_mlflow_model_path(self): @@ -2649,26 +2683,25 @@ def test_optimize_for_hf_with_custom_s3_path( model_builder = ModelBuilder( model="meta-llama/Meta-Llama-3-8B-Instruct", - env_vars={"HUGGING_FACE_HUB_TOKEN": "token"}, + env_vars={"HF_TOKEN": "token"}, model_metadata={ "CUSTOM_MODEL_PATH": "s3://bucket/path/", }, + role_arn="role-arn", + instance_type="ml.g5.2xlarge", ) model_builder.pysdk_model = mock_pysdk_model out_put = model_builder._optimize_for_hf( job_name="job_name-123", - instance_type="ml.g5.2xlarge", - role_arn="role-arn", quantization_config={ "OverrideEnvironment": {"OPTION_QUANTIZE": "awq"}, }, output_path="s3://bucket/code/", ) - print(out_put) - + self.assertEqual(model_builder.env_vars["HF_TOKEN"], "token") self.assertEqual(model_builder.role_arn, "role-arn") self.assertEqual(model_builder.instance_type, "ml.g5.2xlarge") self.assertEqual(model_builder.pysdk_model.env["OPTION_QUANTIZE"], "awq") @@ -2715,14 +2748,14 @@ def test_optimize_for_hf_without_custom_s3_path( model_builder = ModelBuilder( model="meta-llama/Meta-Llama-3-8B-Instruct", env_vars={"HUGGING_FACE_HUB_TOKEN": "token"}, + role_arn="role-arn", + instance_type="ml.g5.2xlarge", ) model_builder.pysdk_model = mock_pysdk_model out_put = model_builder._optimize_for_hf( job_name="job_name-123", - instance_type="ml.g5.2xlarge", - role_arn="role-arn", quantization_config={ "OverrideEnvironment": {"OPTION_QUANTIZE": "awq"}, }, diff --git a/tests/unit/sagemaker/serve/utils/test_optimize_utils.py b/tests/unit/sagemaker/serve/utils/test_optimize_utils.py index 712382f068..a8dc6d74f4 100644 --- a/tests/unit/sagemaker/serve/utils/test_optimize_utils.py +++ b/tests/unit/sagemaker/serve/utils/test_optimize_utils.py @@ -28,7 +28,6 @@ _generate_additional_model_data_sources, _generate_channel_name, _extract_optimization_config_and_env, - _normalize_local_model_path, _is_optimized, _custom_speculative_decoding, _is_inferentia_or_trainium, @@ -312,19 +311,6 @@ def test_extract_optimization_config_and_env( ) -@pytest.mark.parametrize( - "my_path, expected_path", - [ - ("local/path/llama/code", "local/path/llama"), - ("local/path/llama/code/", "local/path/llama"), - ("local/path/llama/", "local/path/llama/"), - ("local/path/llama", "local/path/llama"), - ], -) -def test_normalize_local_model_path(my_path, expected_path): - assert _normalize_local_model_path(my_path) == expected_path - - class TestCustomSpeculativeDecodingConfig(unittest.TestCase): @patch("sagemaker.model.Model") diff --git a/tests/unit/test_estimator.py b/tests/unit/test_estimator.py index b557a9c9f0..73006ae7cd 100644 --- a/tests/unit/test_estimator.py +++ b/tests/unit/test_estimator.py @@ -4402,7 +4402,7 @@ def test_register_default_image_without_instance_type_args(sagemaker_session): framework = "TENSORFLOW" framework_version = "2.9" nearest_model_name = "resnet50" - + model_card = {"ModelCardStatus": ModelCardStatusEnum.DRAFT, "ModelCardContent": "{}"} estimator.register( content_types=content_types, response_types=response_types, @@ -4425,6 +4425,7 @@ def test_register_default_image_without_instance_type_args(sagemaker_session): "marketplace_cert": False, "sample_payload_url": sample_payload_url, "task": task, + "model_card": model_card, } sagemaker_session.create_model_package_from_containers.assert_called_with( **expected_create_model_package_request @@ -4454,6 +4455,7 @@ def test_register_inference_image(sagemaker_session): framework = "TENSORFLOW" framework_version = "2.9" nearest_model_name = "resnet50" + model_card = {"ModelCardStatus": ModelCardStatusEnum.DRAFT, "ModelCardContent": "{}"} estimator.register( content_types=content_types, @@ -4480,6 +4482,7 @@ def test_register_inference_image(sagemaker_session): "marketplace_cert": False, "sample_payload_url": sample_payload_url, "task": task, + "model_card": model_card, } sagemaker_session.create_model_package_from_containers.assert_called_with( **expected_create_model_package_request diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index deb295e6e1..d031102129 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -15,6 +15,7 @@ from __future__ import absolute_import import copy +import logging import shutil import tarfile from datetime import datetime @@ -59,6 +60,8 @@ _validate_new_tags, remove_tag_with_key, ) + +from src.sagemaker.config.config_utils import _log_sagemaker_config_single_substitution from tests.unit.sagemaker.workflow.helpers import CustomStep from sagemaker.workflow.parameters import ParameterString, ParameterInteger @@ -1279,6 +1282,91 @@ def test_resolve_value_from_config(): mock_info_logger.reset_mock() +class TestLogSagemakerConfig(TestCase): + + def test_sensitive_info_masking(self): + logger = logging.getLogger('sagemaker.config') + logger.setLevel(logging.DEBUG) + + stream_handler = logging.StreamHandler() + logger.addHandler(stream_handler) + + # source value is None + with self.assertLogs(logger, level='DEBUG') as log: + _log_sagemaker_config_single_substitution( + None, + {"apiKey": "topsecretkey"}, + "config/path" + ) + + self.assertIn("config value that will be used = {'apiKey': '***'}", log.output[0]) + + # source value is None and config_value == source_value + with self.assertLogs(logger, level='DEBUG') as log: + _log_sagemaker_config_single_substitution( + {"secretword": "topsecretword"}, + {"secretword": "topsecretword"}, + "config/path" + ) + + self.assertIn("Skipped value", log.output[0]) + self.assertIn("source value that will be used = {'secretword': '***'}", log.output[0]) + self.assertIn("config value = {'secretword': '***'}", log.output[0]) + + # source value is not None and config_value != source_value + with self.assertLogs(logger, level='DEBUG') as log: + _log_sagemaker_config_single_substitution( + {"password": "supersecretpassword"}, + {"apiKey": "topsecretkey"}, + "config/path" + ) + + self.assertIn("Skipped value", log.output[0]) + self.assertIn("source value that will be used = {'password': '***'}", log.output[0]) + self.assertIn("config value = {'apiKey': '***'}", log.output[0]) + + def test_non_sensitive_info_masking(self): + logger = logging.getLogger('sagemaker.config') + logger.setLevel(logging.DEBUG) + + stream_handler = logging.StreamHandler() + logger.addHandler(stream_handler) + + # source value is None + with self.assertLogs(logger, level='DEBUG') as log: + _log_sagemaker_config_single_substitution( + None, + {"username": "randomvalue"}, + "config/path" + ) + + self.assertIn("config value that will be used = {'username': 'randomvalue'}", log.output[0]) + + # source value is not None and config_value == source_value + with self.assertLogs(logger, level='DEBUG') as log: + _log_sagemaker_config_single_substitution( + {"nonsensitivevalue": "randomvalue"}, + {"nonsensitivevalue": "randomvalue"}, + "config/path" + ) + + self.assertIn("Skipped value", log.output[0]) + self.assertIn("source value that will be used = {'nonsensitivevalue': 'randomvalue'}", log.output[0]) + self.assertIn("config value = {'nonsensitivevalue': 'randomvalue'}", log.output[0]) + + # source value is not None and config_value != source_value + with self.assertLogs(logger, level='DEBUG') as log: + _log_sagemaker_config_single_substitution( + {"username": "nonsensitiveinfo"}, + {"configvalue": "nonsensitivevalue"}, + "config/path/non_sensitive" + ) + + self.assertIn("Skipped value", log.output[0]) + self.assertIn("source value that will be used = {'username': 'nonsensitiveinfo'}", log.output[0]) + self.assertIn("config value = {'configvalue': 'nonsensitivevalue'}", log.output[0]) + + def test_get_sagemaker_config_value(): mock_config_logger = Mock()