|
| 1 | +import time |
| 2 | +import uuid |
| 3 | +import pytest |
| 4 | +import boto3 |
| 5 | +import os |
| 6 | +from click.testing import CliRunner |
| 7 | +from sagemaker.hyperpod.cli.commands.inference import ( |
| 8 | + custom_create, |
| 9 | + custom_invoke, |
| 10 | + custom_list, |
| 11 | + custom_describe, |
| 12 | + custom_delete, |
| 13 | + custom_get_operator_logs, |
| 14 | + custom_list_pods |
| 15 | +) |
| 16 | +from sagemaker.hyperpod.inference.hp_endpoint import HPEndpoint |
| 17 | + |
| 18 | +# --------- Test Configuration --------- |
| 19 | +NAMESPACE = "integration" |
| 20 | +VERSION = "1.0" |
| 21 | +REGION = "us-east-2" |
| 22 | +TIMEOUT_MINUTES = 15 |
| 23 | +POLL_INTERVAL_SECONDS = 30 |
| 24 | + |
| 25 | +BETA_BUCKET = "sagemaker-hyperpod-beta-integ-test-model-bucket-n" |
| 26 | +PROD_BUCKET = "sagemaker-hyperpod-prod-integ-test-model-bucket" |
| 27 | +BETA_TLS = "s3://sagemaker-hyperpod-certificate-beta-us-east-2" |
| 28 | +PROD_TLS = "s3://sagemaker-hyperpod-certificate-prod-us-east-2" |
| 29 | +stage = os.getenv("STAGE", "BETA").upper() |
| 30 | +BUCKET_LOCATION = BETA_BUCKET if stage == "BETA" else PROD_BUCKET |
| 31 | +TLS_LOCATION = BETA_TLS if stage == "BETA" else PROD_TLS |
| 32 | + |
| 33 | +@pytest.fixture(scope="module") |
| 34 | +def runner(): |
| 35 | + return CliRunner() |
| 36 | + |
| 37 | +@pytest.fixture(scope="module") |
| 38 | +def custom_endpoint_name(): |
| 39 | + return f"custom-cli-integration-s3" |
| 40 | + |
| 41 | +@pytest.fixture(scope="module") |
| 42 | +def sagemaker_client(): |
| 43 | + return boto3.client("sagemaker", region_name=REGION) |
| 44 | + |
| 45 | +# --------- Custom Endpoint Tests --------- |
| 46 | + |
| 47 | +def test_custom_create(runner, custom_endpoint_name): |
| 48 | + result = runner.invoke(custom_create, [ |
| 49 | + "--namespace", NAMESPACE, |
| 50 | + "--version", VERSION, |
| 51 | + "--instance-type", "ml.c5.2xlarge", |
| 52 | + "--model-name", "test-model-integration-cli-s3", |
| 53 | + "--model-source-type", "s3", |
| 54 | + "--model-location", "hf-eqa", |
| 55 | + "--s3-bucket-name", BUCKET_LOCATION, |
| 56 | + "--s3-region", REGION, |
| 57 | + "--image-uri", "763104351884.dkr.ecr.us-west-2.amazonaws.com/huggingface-pytorch-inference:2.3.0-transformers4.48.0-cpu-py311-ubuntu22.04", |
| 58 | + "--container-port", "8080", |
| 59 | + "--model-volume-mount-name", "model-weights", |
| 60 | + "--endpoint-name", custom_endpoint_name, |
| 61 | + "--resources-requests", '{"cpu": "3200m", "nvidia.com/gpu": 0, "memory": "12Gi"}', |
| 62 | + "--resources-limits", '{"nvidia.com/gpu": 0}', |
| 63 | + "--tls-certificate-output-s3-uri", TLS_LOCATION, |
| 64 | + "--env", '{ "SAGEMAKER_PROGRAM": "inference.py", "SAGEMAKER_SUBMIT_DIRECTORY": "/opt/ml/model/code", "SAGEMAKER_CONTAINER_LOG_LEVEL": "20", "SAGEMAKER_MODEL_SERVER_TIMEOUT": "3600", "ENDPOINT_SERVER_TIMEOUT": "3600", "MODEL_CACHE_ROOT": "/opt/ml/model", "SAGEMAKER_ENV": "1", "SAGEMAKER_MODEL_SERVER_WORKERS": "1" }' |
| 65 | + ]) |
| 66 | + assert result.exit_code == 0, result.output |
| 67 | + |
| 68 | + |
| 69 | +def test_custom_list(runner, custom_endpoint_name): |
| 70 | + result = runner.invoke(custom_list, ["--namespace", NAMESPACE]) |
| 71 | + assert result.exit_code == 0 |
| 72 | + assert custom_endpoint_name in result.output |
| 73 | + |
| 74 | + |
| 75 | +def test_custom_describe(runner, custom_endpoint_name): |
| 76 | + result = runner.invoke(custom_describe, [ |
| 77 | + "--name", custom_endpoint_name, |
| 78 | + "--namespace", NAMESPACE, |
| 79 | + "--full" |
| 80 | + ]) |
| 81 | + assert result.exit_code == 0 |
| 82 | + assert custom_endpoint_name in result.output |
| 83 | + |
| 84 | + |
| 85 | +def test_wait_until_inservice(custom_endpoint_name): |
| 86 | + """Poll SDK until specific JumpStart endpoint reaches DeploymentComplete""" |
| 87 | + print(f"[INFO] Waiting for JumpStart endpoint '{custom_endpoint_name}' to be DeploymentComplete...") |
| 88 | + deadline = time.time() + (TIMEOUT_MINUTES * 60) |
| 89 | + poll_count = 0 |
| 90 | + |
| 91 | + while time.time() < deadline: |
| 92 | + poll_count += 1 |
| 93 | + print(f"[DEBUG] Poll #{poll_count}: Checking endpoint status...") |
| 94 | + |
| 95 | + try: |
| 96 | + ep = HPEndpoint.get(name=custom_endpoint_name, namespace=NAMESPACE) |
| 97 | + state = ep.status.endpoints.sagemaker.state |
| 98 | + print(f"[DEBUG] Current state: {state}") |
| 99 | + if state == "CreationCompleted": |
| 100 | + print("[INFO] Endpoint is in CreationCompleted state.") |
| 101 | + return |
| 102 | + |
| 103 | + deployment_state = ep.status.deploymentStatus.deploymentObjectOverallState |
| 104 | + if deployment_state == "DeploymentFailed": |
| 105 | + pytest.fail("Endpoint deployment failed.") |
| 106 | + |
| 107 | + except Exception as e: |
| 108 | + print(f"[ERROR] Exception during polling: {e}") |
| 109 | + |
| 110 | + time.sleep(POLL_INTERVAL_SECONDS) |
| 111 | + |
| 112 | + pytest.fail("[ERROR] Timed out waiting for endpoint to be DeploymentComplete") |
| 113 | + |
| 114 | + |
| 115 | +def test_custom_invoke(runner, custom_endpoint_name): |
| 116 | + result = runner.invoke(custom_invoke, [ |
| 117 | + "--endpoint-name", custom_endpoint_name, |
| 118 | + "--body", '{"question" :"what is the name of the planet?", "context":"mars"}', |
| 119 | + "--content-type", "application/list-text" |
| 120 | + ]) |
| 121 | + assert result.exit_code == 0 |
| 122 | + assert "error" not in result.output.lower() |
| 123 | + |
| 124 | + |
| 125 | +def test_custom_get_operator_logs(runner): |
| 126 | + result = runner.invoke(custom_get_operator_logs, ["--since-hours", "1"]) |
| 127 | + assert result.exit_code == 0 |
| 128 | + |
| 129 | + |
| 130 | +def test_custom_list_pods(runner): |
| 131 | + result = runner.invoke(custom_list_pods, ["--namespace", NAMESPACE]) |
| 132 | + assert result.exit_code == 0 |
| 133 | + |
| 134 | + |
| 135 | +def test_custom_delete(runner, custom_endpoint_name): |
| 136 | + result = runner.invoke(custom_delete, [ |
| 137 | + "--name", custom_endpoint_name, |
| 138 | + "--namespace", NAMESPACE |
| 139 | + ]) |
| 140 | + assert result.exit_code == 0 |
0 commit comments