Skip to content

Commit babb17d

Browse files
Inference integ test for both beta and prod account (#112)
* all integ passed for js and custom inference, added stage check to determine s3/fsx/tls location * minor update
1 parent e780e29 commit babb17d

File tree

6 files changed

+359
-66
lines changed

6 files changed

+359
-66
lines changed
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Subproject commit ce96b513c3033f815d24469f07e2ef0531aaf8d4

src/sagemaker/hyperpod/cli/commands/inference.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,9 +69,17 @@ def custom_create(namespace, version, custom_endpoint):
6969
required=True,
7070
help="Required. The body of the request to invoke.",
7171
)
72+
@click.option(
73+
"--content-type",
74+
type=click.STRING,
75+
required=False,
76+
default="application/json",
77+
help="Optional. The content type of the request to invoke. Default set to 'application/json'",
78+
)
7279
def custom_invoke(
7380
endpoint_name: str,
7481
body: str,
82+
content_type: Optional[str]
7583
):
7684
"""
7785
Invoke a model endpoint.
@@ -105,7 +113,7 @@ def custom_invoke(
105113
resp = rt.invoke_endpoint(
106114
EndpointName=endpoint_name,
107115
Body=payload.encode("utf-8"),
108-
ContentType="application/json",
116+
ContentType=content_type,
109117
)
110118
result = resp["Body"].read().decode("utf-8")
111119
click.echo(result)

test/integration_tests/cli/test_cli_custom_inference.py renamed to test/integration_tests/cli/test_cli_custom_fsx_inference.py

Lines changed: 22 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import uuid
33
import pytest
44
import boto3
5+
import os
56
from click.testing import CliRunner
67
from sagemaker.hyperpod.cli.commands.inference import (
78
custom_create,
@@ -21,13 +22,21 @@
2122
TIMEOUT_MINUTES = 15
2223
POLL_INTERVAL_SECONDS = 30
2324

25+
BETA_FSX = "fs-0454e783bbb7356fc"
26+
PROD_FSX = "fs-03c59e2a7e824a22f"
27+
BETA_TLS = "s3://sagemaker-hyperpod-certificate-beta-us-east-2"
28+
PROD_TLS = "s3://sagemaker-hyperpod-certificate-prod-us-east-2"
29+
stage = os.getenv("STAGE", "BETA").upper()
30+
FSX_LOCATION = BETA_FSX if stage == "BETA" else PROD_FSX
31+
TLS_LOCATION = BETA_TLS if stage == "BETA" else PROD_TLS
32+
2433
@pytest.fixture(scope="module")
2534
def runner():
2635
return CliRunner()
2736

2837
@pytest.fixture(scope="module")
2938
def custom_endpoint_name():
30-
return f"custom-cli-integration"
39+
return f"custom-cli-integration-fsx"
3140

3241
@pytest.fixture(scope="module")
3342
def sagemaker_client():
@@ -39,32 +48,20 @@ def test_custom_create(runner, custom_endpoint_name):
3948
result = runner.invoke(custom_create, [
4049
"--namespace", NAMESPACE,
4150
"--version", VERSION,
42-
"--instance-type", "ml.g5.8xlarge",
43-
"--model-name", "test-model-integration",
44-
"--model-source-type", "s3",
45-
"--model-location", "deepseek15b",
46-
"--s3-bucket-name", "test-model-s3-zhaoqi",
51+
"--instance-type", "ml.c5.2xlarge",
52+
"--model-name", "test-model-integration-cli-fsx",
53+
"--model-source-type", "fsx",
54+
"--model-location", "hf-eqa",
55+
"--fsx-file-system-id", FSX_LOCATION,
4756
"--s3-region", REGION,
48-
"--image-uri", "763104351884.dkr.ecr.us-east-2.amazonaws.com/huggingface-pytorch-tgi-inference:2.4.0-tgi2.3.1-gpu-py311-cu124-ubuntu22.04-v2.0",
57+
"--image-uri", "763104351884.dkr.ecr.us-west-2.amazonaws.com/huggingface-pytorch-inference:2.3.0-transformers4.48.0-cpu-py311-ubuntu22.04",
4958
"--container-port", "8080",
5059
"--model-volume-mount-name", "model-weights",
5160
"--endpoint-name", custom_endpoint_name,
52-
"--resources-requests", '{"cpu": "30000m", "nvidia.com/gpu": 1, "memory": "100Gi"}',
53-
"--resources-limits", '{"nvidia.com/gpu": 1}',
54-
"--tls-certificate-output-s3-uri", "s3://tls-bucket-inf1-beta2",
55-
"--metrics-enabled", "true",
56-
"--metric-collection-period", "30",
57-
"--metric-name", "Invocations",
58-
"--metric-stat", "Sum",
59-
"--metric-type", "Average",
60-
"--min-value", "0.0",
61-
"--cloud-watch-trigger-name", "SageMaker-Invocations-new",
62-
"--cloud-watch-trigger-namespace", "AWS/SageMaker",
63-
"--target-value", "10",
64-
"--use-cached-metrics", "true",
65-
"--dimensions", '{"EndpointName": "' + custom_endpoint_name + '", "VariantName": "AllTraffic"}',
66-
"--env", '{ "HF_MODEL_ID": "/opt/ml/model", "SAGEMAKER_PROGRAM": "inference.py", "SAGEMAKER_SUBMIT_DIRECTORY": "/opt/ml/model/code", "MODEL_CACHE_ROOT": "/opt/ml/model", "SAGEMAKER_ENV": "1" }',
67-
61+
"--resources-requests", '{"cpu": "3200m", "nvidia.com/gpu": 0, "memory": "12Gi"}',
62+
"--resources-limits", '{"nvidia.com/gpu": 0}',
63+
"--tls-certificate-output-s3-uri", TLS_LOCATION,
64+
"--env", '{ "SAGEMAKER_PROGRAM": "inference.py", "SAGEMAKER_SUBMIT_DIRECTORY": "/opt/ml/model/code", "SAGEMAKER_CONTAINER_LOG_LEVEL": "20", "SAGEMAKER_MODEL_SERVER_TIMEOUT": "3600", "ENDPOINT_SERVER_TIMEOUT": "3600", "MODEL_CACHE_ROOT": "/opt/ml/model", "SAGEMAKER_ENV": "1", "SAGEMAKER_MODEL_SERVER_WORKERS": "1" }'
6865
])
6966
assert result.exit_code == 0, result.output
7067

@@ -118,7 +115,8 @@ def test_wait_until_inservice(custom_endpoint_name):
118115
def test_custom_invoke(runner, custom_endpoint_name):
119116
result = runner.invoke(custom_invoke, [
120117
"--endpoint-name", custom_endpoint_name,
121-
"--body", '{"inputs": "What is the capital of USA?"}'
118+
"--body", '{"question" :"what is the name of the planet?", "context":"mars"}',
119+
"--content-type", "application/list-text"
122120
])
123121
assert result.exit_code == 0
124122
assert "error" not in result.output.lower()
Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
import time
2+
import uuid
3+
import pytest
4+
import boto3
5+
import os
6+
from click.testing import CliRunner
7+
from sagemaker.hyperpod.cli.commands.inference import (
8+
custom_create,
9+
custom_invoke,
10+
custom_list,
11+
custom_describe,
12+
custom_delete,
13+
custom_get_operator_logs,
14+
custom_list_pods
15+
)
16+
from sagemaker.hyperpod.inference.hp_endpoint import HPEndpoint
17+
18+
# --------- Test Configuration ---------
19+
NAMESPACE = "integration"
20+
VERSION = "1.0"
21+
REGION = "us-east-2"
22+
TIMEOUT_MINUTES = 15
23+
POLL_INTERVAL_SECONDS = 30
24+
25+
BETA_BUCKET = "sagemaker-hyperpod-beta-integ-test-model-bucket-n"
26+
PROD_BUCKET = "sagemaker-hyperpod-prod-integ-test-model-bucket"
27+
BETA_TLS = "s3://sagemaker-hyperpod-certificate-beta-us-east-2"
28+
PROD_TLS = "s3://sagemaker-hyperpod-certificate-prod-us-east-2"
29+
stage = os.getenv("STAGE", "BETA").upper()
30+
BUCKET_LOCATION = BETA_BUCKET if stage == "BETA" else PROD_BUCKET
31+
TLS_LOCATION = BETA_TLS if stage == "BETA" else PROD_TLS
32+
33+
@pytest.fixture(scope="module")
34+
def runner():
35+
return CliRunner()
36+
37+
@pytest.fixture(scope="module")
38+
def custom_endpoint_name():
39+
return f"custom-cli-integration-s3"
40+
41+
@pytest.fixture(scope="module")
42+
def sagemaker_client():
43+
return boto3.client("sagemaker", region_name=REGION)
44+
45+
# --------- Custom Endpoint Tests ---------
46+
47+
def test_custom_create(runner, custom_endpoint_name):
48+
result = runner.invoke(custom_create, [
49+
"--namespace", NAMESPACE,
50+
"--version", VERSION,
51+
"--instance-type", "ml.c5.2xlarge",
52+
"--model-name", "test-model-integration-cli-s3",
53+
"--model-source-type", "s3",
54+
"--model-location", "hf-eqa",
55+
"--s3-bucket-name", BUCKET_LOCATION,
56+
"--s3-region", REGION,
57+
"--image-uri", "763104351884.dkr.ecr.us-west-2.amazonaws.com/huggingface-pytorch-inference:2.3.0-transformers4.48.0-cpu-py311-ubuntu22.04",
58+
"--container-port", "8080",
59+
"--model-volume-mount-name", "model-weights",
60+
"--endpoint-name", custom_endpoint_name,
61+
"--resources-requests", '{"cpu": "3200m", "nvidia.com/gpu": 0, "memory": "12Gi"}',
62+
"--resources-limits", '{"nvidia.com/gpu": 0}',
63+
"--tls-certificate-output-s3-uri", TLS_LOCATION,
64+
"--env", '{ "SAGEMAKER_PROGRAM": "inference.py", "SAGEMAKER_SUBMIT_DIRECTORY": "/opt/ml/model/code", "SAGEMAKER_CONTAINER_LOG_LEVEL": "20", "SAGEMAKER_MODEL_SERVER_TIMEOUT": "3600", "ENDPOINT_SERVER_TIMEOUT": "3600", "MODEL_CACHE_ROOT": "/opt/ml/model", "SAGEMAKER_ENV": "1", "SAGEMAKER_MODEL_SERVER_WORKERS": "1" }'
65+
])
66+
assert result.exit_code == 0, result.output
67+
68+
69+
def test_custom_list(runner, custom_endpoint_name):
70+
result = runner.invoke(custom_list, ["--namespace", NAMESPACE])
71+
assert result.exit_code == 0
72+
assert custom_endpoint_name in result.output
73+
74+
75+
def test_custom_describe(runner, custom_endpoint_name):
76+
result = runner.invoke(custom_describe, [
77+
"--name", custom_endpoint_name,
78+
"--namespace", NAMESPACE,
79+
"--full"
80+
])
81+
assert result.exit_code == 0
82+
assert custom_endpoint_name in result.output
83+
84+
85+
def test_wait_until_inservice(custom_endpoint_name):
86+
"""Poll SDK until specific JumpStart endpoint reaches DeploymentComplete"""
87+
print(f"[INFO] Waiting for JumpStart endpoint '{custom_endpoint_name}' to be DeploymentComplete...")
88+
deadline = time.time() + (TIMEOUT_MINUTES * 60)
89+
poll_count = 0
90+
91+
while time.time() < deadline:
92+
poll_count += 1
93+
print(f"[DEBUG] Poll #{poll_count}: Checking endpoint status...")
94+
95+
try:
96+
ep = HPEndpoint.get(name=custom_endpoint_name, namespace=NAMESPACE)
97+
state = ep.status.endpoints.sagemaker.state
98+
print(f"[DEBUG] Current state: {state}")
99+
if state == "CreationCompleted":
100+
print("[INFO] Endpoint is in CreationCompleted state.")
101+
return
102+
103+
deployment_state = ep.status.deploymentStatus.deploymentObjectOverallState
104+
if deployment_state == "DeploymentFailed":
105+
pytest.fail("Endpoint deployment failed.")
106+
107+
except Exception as e:
108+
print(f"[ERROR] Exception during polling: {e}")
109+
110+
time.sleep(POLL_INTERVAL_SECONDS)
111+
112+
pytest.fail("[ERROR] Timed out waiting for endpoint to be DeploymentComplete")
113+
114+
115+
def test_custom_invoke(runner, custom_endpoint_name):
116+
result = runner.invoke(custom_invoke, [
117+
"--endpoint-name", custom_endpoint_name,
118+
"--body", '{"question" :"what is the name of the planet?", "context":"mars"}',
119+
"--content-type", "application/list-text"
120+
])
121+
assert result.exit_code == 0
122+
assert "error" not in result.output.lower()
123+
124+
125+
def test_custom_get_operator_logs(runner):
126+
result = runner.invoke(custom_get_operator_logs, ["--since-hours", "1"])
127+
assert result.exit_code == 0
128+
129+
130+
def test_custom_list_pods(runner):
131+
result = runner.invoke(custom_list_pods, ["--namespace", NAMESPACE])
132+
assert result.exit_code == 0
133+
134+
135+
def test_custom_delete(runner, custom_endpoint_name):
136+
result = runner.invoke(custom_delete, [
137+
"--name", custom_endpoint_name,
138+
"--namespace", NAMESPACE
139+
])
140+
assert result.exit_code == 0

0 commit comments

Comments
 (0)