Skip to content

Commit 78a87a9

Browse files
authored
Merge branch 'aws:master' into master
2 parents 2b4efd0 + c176134 commit 78a87a9

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

66 files changed

+1111
-526
lines changed

.pylintrc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -384,7 +384,7 @@ max-returns=6
384384
max-branches=12
385385

386386
# Maximum number of statements in function / method body
387-
max-statements=100
387+
max-statements=105
388388

389389
# Maximum number of parents for a class (see R0901).
390390
max-parents=7

CHANGELOG.md

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,33 @@
11
# Changelog
22

3+
## v2.231.0 (2024-08-30)
4+
5+
### Features
6+
7+
* Add SageMaker Core to the dependency
8+
9+
### Bug Fixes and Other Changes
10+
11+
* Disable test_mnist_async
12+
* SMP v2.5
13+
* update image_uri_configs 08-29-2024 07:17:59 PST
14+
15+
## v2.230.0 (2024-08-28)
16+
17+
### Features
18+
19+
* FastAPI integration for In_Process Mode (2/2)
20+
21+
### Bug Fixes and Other Changes
22+
23+
* chore: add HF LLM neuronx 0.0.24 image
24+
* TF-2.16 test modification and handling
25+
* fix test fail
26+
* Add troubleshooting links to exceptions
27+
* cross account private hub model fine-tuning
28+
* chore: cleanup jumpstart factory
29+
* disable failing integration tests
30+
331
## v2.229.0 (2024-08-15)
432

533
### Features

VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
2.229.1.dev0
1+
2.231.1.dev0

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ dependencies = [
4747
"psutil",
4848
"PyYAML~=6.0",
4949
"requests",
50+
"sagemaker-core>=1.0.0,<2.0.0",
5051
"schema",
5152
"smdebug_rulesconfig==1.0.1",
5253
"tblib>=1.7.0,<4",
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,5 @@
11
accelerate>=0.24.1,<=0.27.0
22
sagemaker_schema_inference_artifacts>=0.0.5
3+
uvicorn>=0.30.1
4+
fastapi>=0.111.0
5+
nest-asyncio

requirements/extras/test_requirements.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
tox==3.24.5
2+
numpy>=1.24.0
23
build[virtualenv]==1.2.1
34
flake8==4.0.1
45
pytest==6.2.5
@@ -40,3 +41,6 @@ schema==0.7.5
4041
tensorflow>=2.1,<=2.16
4142
mlflow>=2.12.2,<2.13
4243
huggingface_hub>=0.23.4
44+
uvicorn>=0.30.1
45+
fastapi>=0.111.0
46+
nest-asyncio

src/sagemaker/algorithm.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,20 @@ def __init__(
157157
available (default: ``None``).
158158
**kwargs: Additional kwargs. This is unused. It's only added for AlgorithmEstimator
159159
to ignore the irrelevant arguments.
160+
161+
Raises:
162+
ValueError:
163+
- If an AWS IAM Role is not provided.
164+
- Bad value for instance type.
165+
RuntimeError:
166+
- When setting up custom VPC, both subnets and security_group_ids are not provided
167+
- If instance_count > 1 (distributed training) with instance type local or local gpu
168+
- If LocalSession is not used with instance type local or local gpu
169+
- file:// output path used outside of local mode
170+
botocore.exceptions.ClientError:
171+
- algorithm arn is incorrect
172+
- insufficient permission to access/ describe algorithm
173+
- algorithm is in a different region
160174
"""
161175
self.algorithm_arn = algorithm_arn
162176
super(AlgorithmEstimator, self).__init__(

src/sagemaker/base_predictor.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -430,6 +430,8 @@ def update_endpoint(
430430
- If ``initial_instance_count``, ``instance_type``, or ``accelerator_type`` is
431431
specified and either ``model_name`` is ``None`` or there are multiple models
432432
associated with the endpoint.
433+
botocore.exceptions.ClientError: If SageMaker throws an error while creating
434+
endpoint config, describing endpoint or updating endpoint
433435
"""
434436
production_variants = None
435437
current_model_names = self._get_model_names()

src/sagemaker/environment_variables.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
from sagemaker.jumpstart import utils as jumpstart_utils
2121
from sagemaker.jumpstart import artifacts
2222
from sagemaker.jumpstart.constants import DEFAULT_JUMPSTART_SAGEMAKER_SESSION
23-
from sagemaker.jumpstart.enums import JumpStartScriptScope
23+
from sagemaker.jumpstart.enums import JumpStartModelType, JumpStartScriptScope
2424
from sagemaker.session import Session
2525

2626
logger = logging.getLogger(__name__)
@@ -38,6 +38,7 @@ def retrieve_default(
3838
instance_type: Optional[str] = None,
3939
script: JumpStartScriptScope = JumpStartScriptScope.INFERENCE,
4040
config_name: Optional[str] = None,
41+
model_type: JumpStartModelType = JumpStartModelType.OPEN_WEIGHTS,
4142
) -> Dict[str, str]:
4243
"""Retrieves the default container environment variables for the model matching the arguments.
4344
@@ -70,6 +71,8 @@ def retrieve_default(
7071
script (JumpStartScriptScope): The JumpStart script for which to retrieve environment
7172
variables.
7273
config_name (Optional[str]): Name of the JumpStart Model config to apply. (Default: None).
74+
model_type (JumpStartModelType): The type of the model, can be open weights model
75+
or proprietary model. (Default: JumpStartModelType.OPEN_WEIGHTS).
7376
Returns:
7477
dict: The variables to use for the model.
7578
@@ -94,4 +97,5 @@ def retrieve_default(
9497
instance_type=instance_type,
9598
script=script,
9699
config_name=config_name,
100+
model_type=model_type,
97101
)

src/sagemaker/estimator.py

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -590,25 +590,36 @@ def __init__(
590590
self.dependencies = dependencies or []
591591
self.uploaded_code: Optional[UploadedCode] = None
592592

593-
# Check that the user properly sets both subnet and secutiry_groupe_ids
593+
# Check that the user properly sets both subnet and security_group_ids
594594
if (
595595
subnets is not None
596596
and security_group_ids is None
597597
or security_group_ids is not None
598598
and subnets is None
599599
):
600+
troubleshooting = (
601+
"Refer to this documentation on using custom VPC: "
602+
"https://sagemaker.readthedocs.io/en/v2.24.0/overview.html"
603+
"#secure-training-and-inference-with-vpc"
604+
)
605+
logger.error("Check troubleshooting guide for common errors: %s", troubleshooting)
606+
600607
raise RuntimeError(
601608
"When setting up custom VPC, both subnets and security_group_ids must be set"
602609
)
603610

604611
if self.instance_type in ("local", "local_gpu"):
605612
if self.instance_type == "local_gpu" and self.instance_count > 1:
606-
raise RuntimeError("Distributed Training in Local GPU is not supported")
613+
raise RuntimeError(
614+
"Distributed Training in Local GPU is not supported."
615+
" Set instance_count to 1."
616+
)
607617
self.sagemaker_session = sagemaker_session or LocalSession()
608618
if not isinstance(self.sagemaker_session, sagemaker.local.LocalSession):
609619
raise RuntimeError(
610620
"instance_type local or local_gpu is only supported with an"
611-
"instance of LocalSession"
621+
"instance of LocalSession. More details on local mode: "
622+
"https://sagemaker.readthedocs.io/en/stable/overview.html#local-mode"
612623
)
613624
else:
614625
self.sagemaker_session = sagemaker_session or Session()
@@ -631,7 +642,11 @@ def __init__(
631642
and not is_pipeline_variable(output_path)
632643
and output_path.startswith("file://")
633644
):
634-
raise RuntimeError("file:// output paths are only supported in Local Mode")
645+
raise RuntimeError(
646+
"The 'file://' output paths are only supported when using Local Mode. "
647+
"To resolve this issue, ensure you're running in Local Mode with a LocalSession, "
648+
"or use an 's3://' output path for jobs running on SageMaker instances."
649+
)
635650
self.output_path = output_path
636651
self.latest_training_job = None
637652
self.jobs = []
@@ -646,7 +661,12 @@ def __init__(
646661
# Now we marked that as Optional because we can fetch it from SageMakerConfig
647662
# Because of marking that parameter as optional, we should validate if it is None, even
648663
# after fetching the config.
649-
raise ValueError("An AWS IAM role is required to create an estimator.")
664+
raise ValueError(
665+
"An AWS IAM role is required to create an estimator. "
666+
"Please provide a valid `role` argument with the ARN of an IAM role"
667+
" that has the necessary SageMaker permissions."
668+
)
669+
650670
self.output_kms_key = resolve_value_from_config(
651671
output_kms_key, TRAINING_JOB_KMS_KEY_ID_PATH, sagemaker_session=self.sagemaker_session
652672
)
@@ -1855,6 +1875,8 @@ def model_data(self):
18551875
if compression_type not in {"GZIP", "NONE"}:
18561876
raise ValueError(
18571877
f'Unrecognized training job output data compression type "{compression_type}"'
1878+
'. Please specify either "GZIP" or "NONE" as valid options for '
1879+
"the compression type."
18581880
)
18591881
# model data is in uncompressed form NOTE SageMaker Hosting mandates presence of
18601882
# trailing forward slash in S3 model data URI, so append one if necessary.

0 commit comments

Comments
 (0)