|
25 | 25 | from tests.integ import DATA_DIR, PYTHON_VERSION, TUNING_DEFAULT_TIMEOUT_MINUTES |
26 | 26 | from tests.integ.record_set import prepare_record_set_from_local_files |
27 | 27 | from tests.integ.timeout import timeout, timeout_and_delete_endpoint_by_name |
| 28 | +from tests.integ import vpc_test_utils |
28 | 29 |
|
29 | 30 | from sagemaker import KMeans, LDA, RandomCutForest |
30 | 31 | from sagemaker.amazon.amazon_estimator import registry |
@@ -491,6 +492,52 @@ def test_tuning_tf(sagemaker_session): |
491 | 492 | assert dict_result == list_result |
492 | 493 |
|
493 | 494 |
|
| 495 | +@pytest.mark.skipif(PYTHON_VERSION != 'py2', reason="TensorFlow image supports only python 2.") |
| 496 | +def test_tuning_tf_vpc_multi(sagemaker_session): |
| 497 | + """Test Tensorflow multi-instance using the same VpcConfig for training and inference""" |
| 498 | + instance_type = 'ml.c4.xlarge' |
| 499 | + instance_count = 2 |
| 500 | + |
| 501 | + script_path = os.path.join(DATA_DIR, 'iris', 'iris-dnn-classifier.py') |
| 502 | + |
| 503 | + ec2_client = sagemaker_session.boto_session.client('ec2') |
| 504 | + subnet_ids, security_group_id = vpc_test_utils.get_or_create_vpc_resources(ec2_client, |
| 505 | + sagemaker_session.boto_region_name) |
| 506 | + vpc_test_utils.setup_security_group_for_encryption(ec2_client, security_group_id) |
| 507 | + |
| 508 | + estimator = TensorFlow(entry_point=script_path, |
| 509 | + role='SageMakerRole', |
| 510 | + training_steps=1, |
| 511 | + evaluation_steps=1, |
| 512 | + hyperparameters={'input_tensor_name': 'inputs'}, |
| 513 | + train_instance_count=instance_count, |
| 514 | + train_instance_type=instance_type, |
| 515 | + sagemaker_session=sagemaker_session, |
| 516 | + base_job_name='test-vpc-tf', |
| 517 | + subnets=subnet_ids, |
| 518 | + security_group_ids=[security_group_id], |
| 519 | + encrypt_inter_container_traffic=True) |
| 520 | + |
| 521 | + inputs = sagemaker_session.upload_data(path=DATA_PATH, key_prefix='integ-test-data/tf_iris') |
| 522 | + hyperparameter_ranges = {'learning_rate': ContinuousParameter(0.05, 0.2)} |
| 523 | + |
| 524 | + objective_metric_name = 'loss' |
| 525 | + metric_definitions = [{'Name': 'loss', 'Regex': 'loss = ([0-9\\.]+)'}] |
| 526 | + |
| 527 | + tuner = HyperparameterTuner(estimator, objective_metric_name, hyperparameter_ranges, |
| 528 | + metric_definitions, |
| 529 | + objective_type='Minimize', max_jobs=2, max_parallel_jobs=2) |
| 530 | + |
| 531 | + tuning_job_name = unique_name_from_base('tune-tf', max_length=32) |
| 532 | + with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES): |
| 533 | + tuner.fit(inputs, job_name=tuning_job_name) |
| 534 | + |
| 535 | + print('Started hyperparameter tuning job with name:' + tuning_job_name) |
| 536 | + |
| 537 | + time.sleep(15) |
| 538 | + tuner.wait() |
| 539 | + |
| 540 | + |
494 | 541 | @pytest.mark.continuous_testing |
495 | 542 | def test_tuning_chainer(sagemaker_session): |
496 | 543 | with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES): |
|
0 commit comments