From 07d278b718ce7caad066ccbd079a5f8646037cfa Mon Sep 17 00:00:00 2001 From: Erick Benitez-Ramos Date: Thu, 12 Sep 2024 08:35:40 -0700 Subject: [PATCH 1/3] chore: add flaky test markers --- tests/integ/test_huggingface.py | 2 ++ tests/integ/test_training_compiler.py | 1 + tests/unit/sagemaker/jumpstart/test_notebook_utils.py | 2 ++ tests/unit/sagemaker/workflow/test_transform_step.py | 1 + 4 files changed, 6 insertions(+) diff --git a/tests/integ/test_huggingface.py b/tests/integ/test_huggingface.py index a8be54c4d4..0f098dc710 100644 --- a/tests/integ/test_huggingface.py +++ b/tests/integ/test_huggingface.py @@ -29,6 +29,7 @@ @pytest.mark.release +@pytest.mark.flaky(reruns=3, reruns_delay=5) def test_framework_processing_job_with_deps( sagemaker_session, huggingface_training_latest_version, @@ -59,6 +60,7 @@ def test_framework_processing_job_with_deps( @pytest.mark.release +@pytest.mark.flaky(reruns=3, reruns_delay=5) def test_huggingface_training( sagemaker_session, huggingface_training_latest_version, diff --git a/tests/integ/test_training_compiler.py b/tests/integ/test_training_compiler.py index 803be0013e..304e11d1b7 100644 --- a/tests/integ/test_training_compiler.py +++ b/tests/integ/test_training_compiler.py @@ -90,6 +90,7 @@ def skip_if_incompatible(gpu_instance_type, request): pytest.param("ml.p3.16xlarge", 2), ], ) +@pytest.mark.flaky(rerun=3, rerun_delay=5) def test_huggingface_pytorch( sagemaker_session, gpu_instance_type, diff --git a/tests/unit/sagemaker/jumpstart/test_notebook_utils.py b/tests/unit/sagemaker/jumpstart/test_notebook_utils.py index a06b48deb7..66e5777a7b 100644 --- a/tests/unit/sagemaker/jumpstart/test_notebook_utils.py +++ b/tests/unit/sagemaker/jumpstart/test_notebook_utils.py @@ -231,6 +231,7 @@ def test_list_jumpstart_models_simple_case( @patch("sagemaker.jumpstart.accessors.JumpStartModelsAccessor._get_manifest") @patch("sagemaker.jumpstart.notebook_utils.DEFAULT_JUMPSTART_SAGEMAKER_SESSION.read_s3_file") + @pytest.mark.flaky(reruns=5, reruns_delay=1) def test_list_jumpstart_models_script_filter( self, patched_read_s3_file: Mock, patched_get_manifest: Mock ): @@ -583,6 +584,7 @@ def vulnerable_training_model_spec(bucket, key, *args, **kwargs): @patch("sagemaker.jumpstart.accessors.JumpStartModelsAccessor._get_manifest") @patch("sagemaker.jumpstart.notebook_utils.DEFAULT_JUMPSTART_SAGEMAKER_SESSION.read_s3_file") + @pytest.mark.flaky(reruns=5, reruns_delay=1) def test_list_jumpstart_models_deprecated_models( self, patched_read_s3_file: Mock, diff --git a/tests/unit/sagemaker/workflow/test_transform_step.py b/tests/unit/sagemaker/workflow/test_transform_step.py index d22965dae8..19471228d6 100644 --- a/tests/unit/sagemaker/workflow/test_transform_step.py +++ b/tests/unit/sagemaker/workflow/test_transform_step.py @@ -70,6 +70,7 @@ custom_step.properties.OutputDataConfig.S3OutputPath, ], ) +@pytest.mark.flaky(reruns=5, reruns_delay=1) def test_transform_step_with_transformer(model_name, data, output_path, pipeline_session): transformer = Transformer( model_name=model_name, From d6bf351434ecd4ac78b1057af95d242e0382859b Mon Sep 17 00:00:00 2001 From: Erick Benitez-Ramos Date: Thu, 12 Sep 2024 08:53:41 -0700 Subject: [PATCH 2/3] skip p2 instance tests in eu-west-1 --- tests/integ/__init__.py | 1 + tests/integ/test_huggingface.py | 10 ++++++++-- tests/integ/test_training_compiler.py | 5 ++++- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/tests/integ/__init__.py b/tests/integ/__init__.py index 434f4dd744..e1c1edb685 100644 --- a/tests/integ/__init__.py +++ b/tests/integ/__init__.py @@ -68,6 +68,7 @@ "me-south-1", "sa-east-1", "us-west-1", + "eu-west-1", # not enough capacity ] TRAINING_NO_P3_REGIONS = [ "af-south-1", diff --git a/tests/integ/test_huggingface.py b/tests/integ/test_huggingface.py index 0f098dc710..991543b4a2 100644 --- a/tests/integ/test_huggingface.py +++ b/tests/integ/test_huggingface.py @@ -29,7 +29,10 @@ @pytest.mark.release -@pytest.mark.flaky(reruns=3, reruns_delay=5) +@pytest.mark.skipif( + tests.integ.test_region() in tests.integ.TRAINING_NO_P2_REGIONS, + reason="No P2 instances or low capacity in this region", +) def test_framework_processing_job_with_deps( sagemaker_session, huggingface_training_latest_version, @@ -60,7 +63,10 @@ def test_framework_processing_job_with_deps( @pytest.mark.release -@pytest.mark.flaky(reruns=3, reruns_delay=5) +@pytest.mark.skipif( + tests.integ.test_region() in tests.integ.TRAINING_NO_P2_REGIONS, + reason="No P2 instances or low capacity in this region", +) def test_huggingface_training( sagemaker_session, huggingface_training_latest_version, diff --git a/tests/integ/test_training_compiler.py b/tests/integ/test_training_compiler.py index 304e11d1b7..f8f195ac0e 100644 --- a/tests/integ/test_training_compiler.py +++ b/tests/integ/test_training_compiler.py @@ -90,7 +90,10 @@ def skip_if_incompatible(gpu_instance_type, request): pytest.param("ml.p3.16xlarge", 2), ], ) -@pytest.mark.flaky(rerun=3, rerun_delay=5) +@pytest.mark.skipif( + integ.test_region() in integ.TRAINING_NO_P2_REGIONS, + reason="No P2 instances or low capacity in this region", +) def test_huggingface_pytorch( sagemaker_session, gpu_instance_type, From 085ac5100c21ce0835e350102822e8eaa41b8cd5 Mon Sep 17 00:00:00 2001 From: Erick Benitez-Ramos Date: Thu, 12 Sep 2024 09:03:37 -0700 Subject: [PATCH 3/3] Fix to check p3 --- tests/integ/__init__.py | 2 +- tests/integ/test_huggingface.py | 8 ++++---- tests/integ/test_training_compiler.py | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/integ/__init__.py b/tests/integ/__init__.py index e1c1edb685..a01223b256 100644 --- a/tests/integ/__init__.py +++ b/tests/integ/__init__.py @@ -68,7 +68,6 @@ "me-south-1", "sa-east-1", "us-west-1", - "eu-west-1", # not enough capacity ] TRAINING_NO_P3_REGIONS = [ "af-south-1", @@ -88,6 +87,7 @@ "ap-south-1", "ap-northeast-2", # it has p3, but not enough "us-east-2", # it has p3, but not enough + "eu-west-1", # it has p3, but not enough ] # EI is currently only supported in the following regions diff --git a/tests/integ/test_huggingface.py b/tests/integ/test_huggingface.py index 991543b4a2..9098d8359a 100644 --- a/tests/integ/test_huggingface.py +++ b/tests/integ/test_huggingface.py @@ -30,8 +30,8 @@ @pytest.mark.release @pytest.mark.skipif( - tests.integ.test_region() in tests.integ.TRAINING_NO_P2_REGIONS, - reason="No P2 instances or low capacity in this region", + tests.integ.test_region() in tests.integ.TRAINING_NO_P3_REGIONS, + reason="No P3 instances or low capacity in this region", ) def test_framework_processing_job_with_deps( sagemaker_session, @@ -64,8 +64,8 @@ def test_framework_processing_job_with_deps( @pytest.mark.release @pytest.mark.skipif( - tests.integ.test_region() in tests.integ.TRAINING_NO_P2_REGIONS, - reason="No P2 instances or low capacity in this region", + tests.integ.test_region() in tests.integ.TRAINING_NO_P3_REGIONS, + reason="No P3 instances or low capacity in this region", ) def test_huggingface_training( sagemaker_session, diff --git a/tests/integ/test_training_compiler.py b/tests/integ/test_training_compiler.py index f8f195ac0e..1251eb0723 100644 --- a/tests/integ/test_training_compiler.py +++ b/tests/integ/test_training_compiler.py @@ -91,8 +91,8 @@ def skip_if_incompatible(gpu_instance_type, request): ], ) @pytest.mark.skipif( - integ.test_region() in integ.TRAINING_NO_P2_REGIONS, - reason="No P2 instances or low capacity in this region", + integ.test_region() in integ.TRAINING_NO_P3_REGIONS, + reason="No P3 instances or low capacity in this region", ) def test_huggingface_pytorch( sagemaker_session,