Skip to content

Commit 6831679

Browse files
authored
Merge branch 'master' into add_target_container_hostname_to_predict
2 parents 5e09ce5 + 2310967 commit 6831679

File tree

12 files changed

+207
-21
lines changed

12 files changed

+207
-21
lines changed

CHANGELOG.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,17 @@
11
# Changelog
22

3+
## v2.247.0 (2025-06-13)
4+
5+
### Features
6+
7+
* Add support for MetricDefinitions in ModelTrainer
8+
9+
### Bug Fixes and Other Changes
10+
11+
* update jumpstart region_config, update image_uri_configs 06-12-2025 07:18:12 PST
12+
* Add ignore_patterns in ModelTrainer to ignore specific files/folders
13+
* Allow import failure for internal _hashlib module
14+
315
## v2.246.0 (2025-06-04)
416

517
### Features

VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
2.246.1.dev0
1+
2.247.1.dev0

src/sagemaker/image_uri_config/pytorch.json

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1705,7 +1705,8 @@
17051705
"2.3": "2.3.0",
17061706
"2.4": "2.4.0",
17071707
"2.5": "2.5.1",
1708-
"2.6": "2.6.0"
1708+
"2.6": "2.6.0",
1709+
"2.7": "2.7.1"
17091710
},
17101711
"versions": {
17111712
"0.4.0": {
@@ -2946,6 +2947,51 @@
29462947
"us-west-2": "763104351884"
29472948
},
29482949
"repository": "pytorch-training"
2950+
},
2951+
"2.7.1": {
2952+
"py_versions": [
2953+
"py312"
2954+
],
2955+
"registries": {
2956+
"af-south-1": "626614931356",
2957+
"ap-east-1": "871362719292",
2958+
"ap-east-2": "975050140332",
2959+
"ap-northeast-1": "763104351884",
2960+
"ap-northeast-2": "763104351884",
2961+
"ap-northeast-3": "364406365360",
2962+
"ap-south-1": "763104351884",
2963+
"ap-south-2": "772153158452",
2964+
"ap-southeast-1": "763104351884",
2965+
"ap-southeast-2": "763104351884",
2966+
"ap-southeast-3": "907027046896",
2967+
"ap-southeast-4": "457447274322",
2968+
"ap-southeast-5": "550225433462",
2969+
"ap-southeast-7": "590183813437",
2970+
"ca-central-1": "763104351884",
2971+
"ca-west-1": "204538143572",
2972+
"cn-north-1": "727897471807",
2973+
"cn-northwest-1": "727897471807",
2974+
"eu-central-1": "763104351884",
2975+
"eu-central-2": "380420809688",
2976+
"eu-north-1": "763104351884",
2977+
"eu-south-1": "692866216735",
2978+
"eu-south-2": "503227376785",
2979+
"eu-west-1": "763104351884",
2980+
"eu-west-2": "763104351884",
2981+
"eu-west-3": "763104351884",
2982+
"il-central-1": "780543022126",
2983+
"me-central-1": "914824155844",
2984+
"me-south-1": "217643126080",
2985+
"mx-central-1": "637423239942",
2986+
"sa-east-1": "763104351884",
2987+
"us-east-1": "763104351884",
2988+
"us-east-2": "763104351884",
2989+
"us-gov-east-1": "446045086412",
2990+
"us-gov-west-1": "442386744353",
2991+
"us-west-1": "763104351884",
2992+
"us-west-2": "763104351884"
2993+
},
2994+
"repository": "pytorch-training"
29492995
}
29502996
}
29512997
}

src/sagemaker/jumpstart/region_config.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@
77
"content_bucket": "jumpstart-cache-prod-ap-east-1",
88
"gated_content_bucket": "jumpstart-private-cache-prod-ap-east-1"
99
},
10+
"ap-east-2": {
11+
"content_bucket": "jumpstart-cache-prod-ap-east-2",
12+
"gated_content_bucket": "jumpstart-private-cache-prod-ap-east-2"
13+
},
1014
"ap-northeast-1": {
1115
"content_bucket": "jumpstart-cache-prod-ap-northeast-1",
1216
"gated_content_bucket": "jumpstart-private-cache-prod-ap-northeast-1",

src/sagemaker/modules/configs.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121

2222
from __future__ import absolute_import
2323

24-
from typing import Optional, Union
24+
from typing import Optional, Union, List
2525
from pydantic import BaseModel, model_validator, ConfigDict
2626

2727
import sagemaker_core.shapes as shapes
@@ -42,6 +42,7 @@
4242
RemoteDebugConfig,
4343
SessionChainingConfig,
4444
InstanceGroup,
45+
MetricDefinition,
4546
)
4647

4748
from sagemaker.modules.utils import convert_unassigned_to_none
@@ -68,6 +69,7 @@
6869
"Compute",
6970
"Networking",
7071
"InputData",
72+
"MetricDefinition",
7173
]
7274

7375

@@ -96,12 +98,23 @@ class SourceCode(BaseConfig):
9698
command (Optional[str]):
9799
The command(s) to execute in the training job container. Example: "python my_script.py".
98100
If not specified, entry_script must be provided.
101+
ignore_patterns: (Optional[List[str]]) :
102+
The ignore patterns to ignore specific files/folders when uploading to S3. If not specified,
103+
default to: ['.env', '.git', '__pycache__', '.DS_Store', '.cache', '.ipynb_checkpoints'].
99104
"""
100105

101106
source_dir: Optional[str] = None
102107
requirements: Optional[str] = None
103108
entry_script: Optional[str] = None
104109
command: Optional[str] = None
110+
ignore_patterns: Optional[List[str]] = [
111+
".env",
112+
".git",
113+
"__pycache__",
114+
".DS_Store",
115+
".cache",
116+
".ipynb_checkpoints",
117+
]
105118

106119

107120
class Compute(shapes.ResourceConfig):

src/sagemaker/modules/train/model_trainer.py

Lines changed: 71 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@
6666
RemoteDebugConfig,
6767
SessionChainingConfig,
6868
InputData,
69+
MetricDefinition,
6970
)
7071

7172
from sagemaker.modules.local_core.local_container import _LocalContainer
@@ -119,7 +120,8 @@ class ModelTrainer(BaseModel):
119120
from sagemaker.modules.train import ModelTrainer
120121
from sagemaker.modules.configs import SourceCode, Compute, InputData
121122
122-
source_code = SourceCode(source_dir="source", entry_script="train.py")
123+
ignore_patterns = ['.env', '.git', '__pycache__', '.DS_Store', 'data']
124+
source_code = SourceCode(source_dir="source", entry_script="train.py", ignore_patterns=ignore_patterns)
123125
training_image = "123456789012.dkr.ecr.us-west-2.amazonaws.com/my-training-image"
124126
model_trainer = ModelTrainer(
125127
training_image=training_image,
@@ -238,6 +240,7 @@ class ModelTrainer(BaseModel):
238240
_infra_check_config: Optional[InfraCheckConfig] = PrivateAttr(default=None)
239241
_session_chaining_config: Optional[SessionChainingConfig] = PrivateAttr(default=None)
240242
_remote_debug_config: Optional[RemoteDebugConfig] = PrivateAttr(default=None)
243+
_metric_definitions: Optional[List[MetricDefinition]] = PrivateAttr(default=None)
241244

242245
_temp_recipe_train_dir: Optional[TemporaryDirectory] = PrivateAttr(default=None)
243246

@@ -654,6 +657,7 @@ def train(
654657
channel_name=SM_CODE,
655658
data_source=self.source_code.source_dir,
656659
key_prefix=input_data_key_prefix,
660+
ignore_patterns=self.source_code.ignore_patterns,
657661
)
658662
final_input_data_config.append(source_code_channel)
659663

@@ -675,6 +679,7 @@ def train(
675679
channel_name=SM_DRIVERS,
676680
data_source=tmp_dir.name,
677681
key_prefix=input_data_key_prefix,
682+
ignore_patterns=self.source_code.ignore_patterns,
678683
)
679684
final_input_data_config.append(sm_drivers_channel)
680685

@@ -693,6 +698,7 @@ def train(
693698
training_image_config=self.training_image_config,
694699
container_entrypoint=container_entrypoint,
695700
container_arguments=container_arguments,
701+
metric_definitions=self._metric_definitions,
696702
)
697703

698704
resource_config = self.compute._to_resource_config()
@@ -755,7 +761,11 @@ def train(
755761
local_container.train(wait)
756762

757763
def create_input_data_channel(
758-
self, channel_name: str, data_source: DataSourceType, key_prefix: Optional[str] = None
764+
self,
765+
channel_name: str,
766+
data_source: DataSourceType,
767+
key_prefix: Optional[str] = None,
768+
ignore_patterns: Optional[List[str]] = None,
759769
) -> Channel:
760770
"""Create an input data channel for the training job.
761771
@@ -771,6 +781,10 @@ def create_input_data_channel(
771781
772782
If specified, local data will be uploaded to:
773783
``s3://<default_bucket_path>/<key_prefix>/<channel_name>/``
784+
ignore_patterns: (Optional[List[str]]) :
785+
The ignore patterns to ignore specific files/folders when uploading to S3.
786+
If not specified, default to: ['.env', '.git', '__pycache__', '.DS_Store',
787+
'.cache', '.ipynb_checkpoints'].
774788
"""
775789
channel = None
776790
if isinstance(data_source, str):
@@ -810,11 +824,28 @@ def create_input_data_channel(
810824
)
811825
if self.sagemaker_session.default_bucket_prefix:
812826
key_prefix = f"{self.sagemaker_session.default_bucket_prefix}/{key_prefix}"
813-
s3_uri = self.sagemaker_session.upload_data(
814-
path=data_source,
815-
bucket=self.sagemaker_session.default_bucket(),
816-
key_prefix=key_prefix,
817-
)
827+
if ignore_patterns and _is_valid_path(data_source, path_type="Directory"):
828+
tmp_dir = TemporaryDirectory()
829+
copied_path = os.path.join(
830+
tmp_dir.name, os.path.basename(os.path.normpath(data_source))
831+
)
832+
shutil.copytree(
833+
data_source,
834+
copied_path,
835+
dirs_exist_ok=True,
836+
ignore=shutil.ignore_patterns(*ignore_patterns),
837+
)
838+
s3_uri = self.sagemaker_session.upload_data(
839+
path=copied_path,
840+
bucket=self.sagemaker_session.default_bucket(),
841+
key_prefix=key_prefix,
842+
)
843+
else:
844+
s3_uri = self.sagemaker_session.upload_data(
845+
path=data_source,
846+
bucket=self.sagemaker_session.default_bucket(),
847+
key_prefix=key_prefix,
848+
)
818849
channel = Channel(
819850
channel_name=channel_name,
820851
data_source=DataSource(
@@ -861,7 +892,9 @@ def _get_input_data_config(
861892
channels.append(input_data)
862893
elif isinstance(input_data, InputData):
863894
channel = self.create_input_data_channel(
864-
input_data.channel_name, input_data.data_source, key_prefix=key_prefix
895+
input_data.channel_name,
896+
input_data.data_source,
897+
key_prefix=key_prefix,
865898
)
866899
channels.append(channel)
867900
else:
@@ -1260,3 +1293,33 @@ def with_checkpoint_config(
12601293
"""
12611294
self.checkpoint_config = checkpoint_config or configs.CheckpointConfig()
12621295
return self
1296+
1297+
def with_metric_definitions(
1298+
self, metric_definitions: List[MetricDefinition]
1299+
) -> "ModelTrainer": # noqa: D412
1300+
"""Set the metric definitions for the training job.
1301+
1302+
Example:
1303+
1304+
.. code:: python
1305+
1306+
from sagemaker.modules.train import ModelTrainer
1307+
from sagemaker.modules.configs import MetricDefinition
1308+
1309+
metric_definitions = [
1310+
MetricDefinition(
1311+
name="loss",
1312+
regex="Loss: (.*?)",
1313+
)
1314+
]
1315+
1316+
model_trainer = ModelTrainer(
1317+
...
1318+
).with_metric_definitions(metric_definitions)
1319+
1320+
Args:
1321+
metric_definitions (List[MetricDefinition]):
1322+
The metric definitions for the training job.
1323+
"""
1324+
self._metric_definitions = metric_definitions
1325+
return self

src/sagemaker/session.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7509,7 +7509,7 @@ def get_model_package_args(
75097509
if source_uri is not None:
75107510
model_package_args["source_uri"] = source_uri
75117511
if model_life_cycle is not None:
7512-
model_package_args["model_life_cycle"] = model_life_cycle
7512+
model_package_args["model_life_cycle"] = model_life_cycle._to_request_dict()
75137513
if model_card is not None:
75147514
original_req = model_card._create_request_args()
75157515
if original_req.get("ModelCardName") is not None:

src/sagemaker/workflow/utilities.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,15 @@
2121
import hashlib
2222
from urllib.parse import unquote, urlparse
2323
from contextlib import contextmanager
24-
from _hashlib import HASH as Hash
24+
25+
try:
26+
# _hashlib is an internal python module, and is not present in
27+
# statically linked interpreters.
28+
from _hashlib import HASH as Hash
29+
except ImportError:
30+
import typing
31+
32+
Hash = typing.Any
2533

2634
from sagemaker.utils import base_from_name
2735
from sagemaker.workflow.parameters import Parameter

tests/integ/sagemaker/workflow/test_model_create_and_registration.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
from sagemaker.s3 import S3Uploader
4949
from sagemaker.sklearn import SKLearnModel, SKLearnProcessor
5050
from sagemaker.mxnet.model import MXNetModel
51+
from sagemaker.model_life_cycle import ModelLifeCycle
5152
from sagemaker.workflow.condition_step import ConditionStep
5253
from sagemaker.workflow.parameters import ParameterInteger, ParameterString
5354
from sagemaker.workflow.pipeline import Pipeline
@@ -1005,11 +1006,11 @@ def test_model_registration_with_model_life_cycle_object(
10051006
py_version="py3",
10061007
role=role,
10071008
)
1008-
create_model_life_cycle = {
1009-
"Stage": "Development",
1010-
"StageStatus": "In-Progress",
1011-
"StageDescription": "Development In Progress",
1012-
}
1009+
create_model_life_cycle = ModelLifeCycle(
1010+
stage="Development",
1011+
stage_status="In-Progress",
1012+
stage_description="Development In Progress",
1013+
)
10131014

10141015
step_register = RegisterModel(
10151016
name="MyRegisterModelStep",

tests/integ/test_model_package.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ def test_update_model_life_cycle_model_package(sagemaker_session):
103103
inference_instances=["ml.m5.large"],
104104
transform_instances=["ml.m5.large"],
105105
model_package_group_name=model_group_name,
106-
model_life_cycle=create_model_life_cycle._to_request_dict(),
106+
model_life_cycle=create_model_life_cycle,
107107
)
108108

109109
desc_model_package = sagemaker_session.sagemaker_client.describe_model_package(

0 commit comments

Comments
 (0)