File tree Expand file tree Collapse file tree 4 files changed +20
-14
lines changed Expand file tree Collapse file tree 4 files changed +20
-14
lines changed Original file line number Diff line number Diff line change @@ -34,12 +34,12 @@ def read_version():
3434# Declare minimal set for installation
3535required_packages = [
3636 "attrs" ,
37- "boto3>=1.16.27 " ,
37+ "boto3>=1.16.32 " ,
3838 "google-pasta" ,
3939 "numpy>=1.9.0" ,
4040 "protobuf>=3.1" ,
4141 "protobuf3-to-dict>=0.1.5" ,
42- "smdebug_rulesconfig" ,
42+ "smdebug_rulesconfig>=1.0.0 " ,
4343 "importlib-metadata>=1.4.0" ,
4444 "packaging>=20.0" ,
4545]
Original file line number Diff line number Diff line change @@ -91,9 +91,13 @@ def retrieve(
9191 if _should_auto_select_container_version (instance_type , distribution ):
9292 container_versions = {
9393 "tensorflow-2.3-gpu-py37" : "cu110-ubuntu18.04-v3" ,
94+ "tensorflow-2.3.1-gpu-py37" : "cu110-ubuntu18.04" ,
9495 "tensorflow-1.15-gpu-py37" : "cu110-ubuntu18.04-v8" ,
96+ "tensorflow-1.15.4-gpu-py37" : "cu110-ubuntu18.04" ,
9597 "mxnet-1.8-gpu-py37" : "cu110-ubuntu16.04-v1" ,
98+ "mxnet-1.8.0-gpu-py37" : "cu110-ubuntu16.04" ,
9699 "pytorch-1.6-gpu-py36" : "cu110-ubuntu18.04-v3" ,
100+ "pytorch-1.6.0-gpu-py36" : "cu110-ubuntu18.04" ,
97101 }
98102 key = "-" .join ([framework , tag ])
99103 if key in container_versions :
Original file line number Diff line number Diff line change 1414
1515import os
1616
17- import pytest
1817import sagemaker .utils
1918import tests .integ as integ
2019
2726)
2827
2928
30- @pytest .mark .skip (
31- reason = "SMDistributedDataParallel-enabled DLC isn't publicly released hence not accessible for this test"
32- )
33- def test_smdataparallel_pt_mnist (sagemaker_session ):
29+ def test_smdataparallel_pt_mnist (
30+ sagemaker_session ,
31+ pytorch_training_latest_version ,
32+ pytorch_training_latest_py_version ,
33+ ):
3434 job_name = sagemaker .utils .unique_name_from_base ("pt-sm-distributed-dataparallel" )
3535 estimator = PyTorch (
3636 entry_point = "mnist_pt.py" ,
3737 role = "SageMakerRole" ,
38- image_uri = "redacted" ,
3938 source_dir = smdataparallel_dir ,
4039 instance_count = 2 ,
4140 instance_type = "ml.p3.16xlarge" ,
4241 sagemaker_session = sagemaker_session ,
42+ framework_version = pytorch_training_latest_version ,
43+ py_version = pytorch_training_latest_py_version ,
4344 distribution = {"smdistributed" : {"dataparallel" : {"enabled" : True }}},
4445 )
4546
Original file line number Diff line number Diff line change 1414
1515import os
1616
17- import pytest
1817import sagemaker .utils
1918import tests .integ as integ
2019
2625)
2726
2827
29- @pytest .mark .skip (
30- reason = "SMDistributedDataParallel-enabled DLC isn't publicly released hence not accessible for this test"
31- )
32- def test_smdataparallel_tf_mnist (sagemaker_session ):
28+ def test_smdataparallel_tf_mnist (
29+ sagemaker_session ,
30+ tensorflow_training_latest_version ,
31+ tensorflow_training_latest_py_version ,
32+ ):
3333 job_name = sagemaker .utils .unique_name_from_base ("tf-sm-distributed-dataparallel" )
3434 estimator = TensorFlow (
3535 entry_point = "mnist_tf.py" ,
3636 role = "SageMakerRole" ,
37- image_uri = "redacted" ,
3837 source_dir = smdataparallel_dir ,
3938 instance_count = 2 ,
4039 instance_type = "ml.p3.16xlarge" ,
4140 sagemaker_session = sagemaker_session ,
41+ framework_version = tensorflow_training_latest_version ,
42+ py_version = tensorflow_training_latest_py_version ,
4243 distribution = {"smdistributed" : {"dataparallel" : {"enabled" : True }}},
4344 )
4445
You can’t perform that action at this time.
0 commit comments