Skip to content

TF-2.16 test modification and handling #4830

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 28 commits into from
Aug 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
50cf9aa
[DO NOT MERGE] Experimenting TF image_uri configs
shantanutrip Aug 13, 2024
80f7875
Add logic to Tf estimator
shantanutrip Aug 14, 2024
0a2ed3d
Comment previous data
shantanutrip Aug 14, 2024
2a2dfd0
Test with making changes to model.py
shantanutrip Aug 14, 2024
e359e21
Add override FW version
shantanutrip Aug 14, 2024
c95a787
Make changes to prevent non 2.16 from making the change
shantanutrip Aug 14, 2024
dc522e6
Add Print
shantanutrip Aug 14, 2024
5625988
Merge remote-tracking branch 'origin/master' into test-tf-2-16-inf
shantanutrip Aug 19, 2024
3036cce
Use hasattr
shantanutrip Aug 19, 2024
5711a20
Fix print and use getattr
shantanutrip Aug 19, 2024
c053583
Print statement
shantanutrip Aug 19, 2024
0529828
Only define override when needed
shantanutrip Aug 19, 2024
8690405
Change to net.export() for TF2.16
shantanutrip Aug 20, 2024
3ad5961
Skip tests failing due to TF-IO
shantanutrip Aug 20, 2024
a2b424d
Reformatting
shantanutrip Aug 20, 2024
7a13af7
Reformatting pylint
shantanutrip Aug 20, 2024
4147f69
Change tf_full_vesion fixture
shantanutrip Aug 21, 2024
48c2507
Add Version
shantanutrip Aug 21, 2024
d1a3475
Revert unit test changes
shantanutrip Aug 21, 2024
8493136
Preventing re-initialization of Version
shantanutrip Aug 22, 2024
3e55e7d
Merge branch 'master' into test-tf-2-16-inf
shantanutrip Aug 22, 2024
5e6ff38
Revert config JSON changes
shantanutrip Aug 22, 2024
3f11254
Handle in case inf and training have different major.minor
shantanutrip Aug 22, 2024
7c5add1
Introduce return version concept in tests
shantanutrip Aug 22, 2024
3c7135e
Merge branch 'master' into test-tf-2-16-inf
shantanutrip Aug 23, 2024
f7cf9d3
Add TF2.16.1 inf to config
shantanutrip Aug 23, 2024
d88af35
Revert temp changes
shantanutrip Aug 23, 2024
3751a63
Merge branch 'master' into test-tf-2-16-inf
benieric Aug 27, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/sagemaker/tensorflow/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ def __init__(
# patch versions, but end up hosting the model of same TF version. For eg., the upstream
# TFS-2.12.0 release was a bad release and hence a new TFS-2.12.1 release was made to host
# models from TF-2.12.0.
training_inference_version_mismatch_dict = {"2.12.0": "2.12.1"}
training_inference_version_mismatch_dict = {"2.12.0": "2.12.1", "2.16.2": "2.16.1"}
self.inference_framework_version = training_inference_version_mismatch_dict.get(
framework_version, framework_version
)
Expand Down
18 changes: 13 additions & 5 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

from botocore.config import Config
from packaging.version import Version
from packaging.specifiers import SpecifierSet

from sagemaker import Session, image_uris, utils, get_execution_role
from sagemaker.local import LocalSession
Expand Down Expand Up @@ -555,11 +556,18 @@ def tf_full_version(tensorflow_training_latest_version, tensorflow_inference_lat
Fixture exists as such, since TF training and TFS have different latest versions.
Otherwise, this would simply be a single latest version.
"""
return str(
min(
Version(tensorflow_training_latest_version),
Version(tensorflow_inference_latest_version),
)
tensorflow_training_latest_version = Version(tensorflow_training_latest_version)
tensorflow_inference_latest_version = Version(tensorflow_inference_latest_version)

return_version = min(
tensorflow_training_latest_version,
tensorflow_inference_latest_version,
)

return (
f"{return_version.major}.{return_version.minor}"
if return_version in SpecifierSet(">=2.16")
else str(return_version)
)


Expand Down
5 changes: 4 additions & 1 deletion tests/data/tensorflow_mnist/mnist_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,10 @@ def main(args):

if args.current_host == args.hosts[0]:
ckpt_manager.save()
net.save("/opt/ml/model/1")
if int(tf_major) > 2 or (int(tf_major) == 2 and int(tf_minor) >= 16):
net.export("/opt/ml/model/1")
else:
net.save("/opt/ml/model/1")


if __name__ == "__main__":
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@

import pytest

from packaging.version import Version
from packaging.specifiers import SpecifierSet

from sagemaker.model_card.model_card import ModelCard, ModelOverview, ModelPackageModelCard
from sagemaker.model_card.schema_constraints import ModelCardStatusEnum
import tests
Expand Down Expand Up @@ -1250,6 +1253,11 @@ def test_model_registration_with_tensorflow_model_with_pipeline_model(
pipeline_name,
region_name,
):
if Version(tf_full_version) in SpecifierSet("==2.16.*"):
pytest.skip(
"This test is failing in TensorFlow 2.16 beacuse of an upstream bug: "
"https://github.com/tensorflow/io/issues/2039"
)
base_dir = os.path.join(DATA_DIR, "tensorflow_mnist")
entry_point = os.path.join(base_dir, "mnist_v2.py")
input_path = sagemaker_session_for_pipeline.upload_data(
Expand Down
8 changes: 8 additions & 0 deletions tests/integ/sagemaker/workflow/test_model_steps.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@

import pytest

from packaging.version import Version
from packaging.specifiers import SpecifierSet

from tests.integ.sagemaker.workflow.helpers import wait_pipeline_execution
from sagemaker.workflow.fail_step import FailStep
from sagemaker.workflow.functions import Join
Expand Down Expand Up @@ -589,6 +592,11 @@ def test_model_registration_with_drift_check_baselines_and_model_metrics(
def test_model_registration_with_tensorflow_model_with_pipeline_model(
pipeline_session, role, tf_full_version, tf_full_py_version, pipeline_name
):
if Version(tf_full_version) in SpecifierSet("==2.16.*"):
pytest.skip(
"This test is failing in TensorFlow 2.16 beacuse of an upstream bug: "
"https://github.com/tensorflow/io/issues/2039"
)
base_dir = os.path.join(DATA_DIR, "tensorflow_mnist")
entry_point = os.path.join(base_dir, "mnist_v2.py")
input_path = pipeline_session.upload_data(
Expand Down
9 changes: 9 additions & 0 deletions tests/integ/sagemaker/workflow/test_training_steps.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@

import pytest

from packaging.version import Version
from packaging.specifiers import SpecifierSet

from tests.integ.sagemaker.workflow.helpers import wait_pipeline_execution
from sagemaker import TrainingInput, get_execution_role, utils, image_uris
from sagemaker.debugger import (
Expand Down Expand Up @@ -235,6 +238,12 @@ def test_training_step_with_output_path_as_join(
def test_tensorflow_training_step_with_parameterized_code_input(
pipeline_session, role, tf_full_version, tf_full_py_version, pipeline_name
):
if Version(tf_full_version) in SpecifierSet("==2.16.*"):
pytest.skip(
"This test is failing in TensorFlow 2.16 beacuse of an upstream bug: "
"https://github.com/tensorflow/io/issues/2039"
)

base_dir = os.path.join(DATA_DIR, "tensorflow_mnist")
entry_point1 = "mnist_v2.py"
entry_point2 = "mnist_dummy.py"
Expand Down
9 changes: 9 additions & 0 deletions tests/integ/test_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@

import pytest

from packaging.version import Version
from packaging.specifiers import SpecifierSet

from sagemaker import KMeans, s3, get_execution_role
from sagemaker.mxnet import MXNet
from sagemaker.pytorch import PyTorchModel
Expand Down Expand Up @@ -553,6 +556,12 @@ def test_transform_mxnet_logs(
def test_transform_tf_kms_network_isolation(
sagemaker_session, cpu_instance_type, tmpdir, tf_full_version, tf_full_py_version
):
if Version(tf_full_version) in SpecifierSet("==2.16.*"):
pytest.skip(
"This test is failing in TensorFlow 2.16 beacuse of an upstream bug: "
"https://github.com/tensorflow/io/issues/2039"
)

data_path = os.path.join(DATA_DIR, "tensorflow_mnist")

tf = TensorFlow(
Expand Down