Skip to content

Commit 1bddf72

Browse files
author
Joseph Zhang
committed
Emit warning when cpu cores are requested with sharded model deployment.
1 parent 7c14046 commit 1bddf72

File tree

3 files changed

+65
-0
lines changed

3 files changed

+65
-0
lines changed

src/sagemaker/jumpstart/model.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -817,6 +817,20 @@ def deploy(
817817
f"{EndpointType.INFERENCE_COMPONENT_BASED} is not supported for Proprietary models."
818818
)
819819

820+
# No resources given to deploy() but present 'resources' key in deploy_kwargs means default
821+
# JumpStart resource requirements are being used
822+
if hasattr(self, "_is_sharded_model") and not resources and deploy_kwargs.resources:
823+
if (
824+
self._is_sharded_model
825+
and deploy_kwargs.resources.num_cpus
826+
and deploy_kwargs.resources.num_cpus > 0
827+
):
828+
JUMPSTART_LOGGER.warning(
829+
"NumOfCpuCoresRequired should be 0 for the best experience with SageMaker Fast "
830+
"Model Loading. Overriding the requested `num_cpus` to 0."
831+
)
832+
deploy_kwargs.resources.num_cpus = 0
833+
820834
self.additional_model_data_sources = _add_model_access_configs_to_model_data_sources(
821835
self.additional_model_data_sources,
822836
deploy_kwargs.model_access_configs,

src/sagemaker/model.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1613,6 +1613,13 @@ def deploy(
16131613
"Loading of model requires network access."
16141614
)
16151615

1616+
if self._is_sharded_model:
1617+
if resources.num_cpus and resources.num_cpus > 0:
1618+
logger.warning(
1619+
"NumberOfCpuCoresRequired should be 0 for the best experience with SageMaker "
1620+
"Fast Model Loading. Configure by setting `num_cpus` to 0 in `resources`."
1621+
)
1622+
16161623
# Support multiple models on same endpoint
16171624
if endpoint_type == EndpointType.INFERENCE_COMPONENT_BASED:
16181625
if endpoint_name:

tests/unit/sagemaker/model/test_model.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1482,3 +1482,47 @@ def test_model_source(
14821482
)
14831483

14841484
assert model_1._get_model_uri() == "s3://tmybuckaet"
1485+
1486+
1487+
@patch("sagemaker.utils.repack_model")
1488+
@patch("sagemaker.fw_utils.tar_and_upload_dir")
1489+
def test_deploy_sharded_model_with_cpus_requested_raises_warning(
1490+
repack_model, tar_and_upload_dir, sagemaker_session
1491+
):
1492+
framework_model_classes_to_kwargs = {
1493+
HuggingFaceModel: {
1494+
"pytorch_version": "1.7.1",
1495+
"py_version": "py36",
1496+
"transformers_version": "4.6.1",
1497+
},
1498+
}
1499+
1500+
sagemaker_session.settings = SessionSettings(include_jumpstart_tags=False)
1501+
1502+
source_dir = "s3://blah/blah/blah"
1503+
for framework_model_class, kwargs in framework_model_classes_to_kwargs.items():
1504+
test_sharded_model = framework_model_class(
1505+
entry_point=ENTRY_POINT_INFERENCE,
1506+
role=ROLE,
1507+
sagemaker_session=sagemaker_session,
1508+
model_data=source_dir,
1509+
**kwargs,
1510+
)
1511+
test_sharded_model._is_sharded_model = True
1512+
from unittest import mock
1513+
1514+
with mock.patch("sagemaker.model.logger") as mock_logger:
1515+
mock_logger.warning.reset_mock()
1516+
test_sharded_model.deploy(
1517+
instance_type="ml.m2.xlarge",
1518+
initial_instance_count=INSTANCE_COUNT,
1519+
endpoint_type=EndpointType.MODEL_BASED,
1520+
resources=ResourceRequirements(
1521+
requests={"num_accelerators": 1, "memory": 8192, "copies": 1, "num_cpus": 1},
1522+
limits={},
1523+
),
1524+
)
1525+
mock_logger.warning.assert_called_once_with(
1526+
"NumberOfCpuCoresRequired should be 0 for the best experience with SageMaker "
1527+
"Fast Model Loading. Configure by setting `num_cpus` to 0 in `resources`."
1528+
)

0 commit comments

Comments
 (0)