Inference integ test for both beta and prod account (#112)

mollyheamazon · web-flow · commit babb17ddb56c · 2025-07-11T12:10:53.000-07:00
* all integ passed for js and custom inference, added stage check to determine s3/fsx/tls location

* minor update
diff --git a/src/hyperpod_cli/sagemaker_hyperpod_recipes b/src/hyperpod_cli/sagemaker_hyperpod_recipes
@@ -0,0 +1 @@
+Subproject commit ce96b513c3033f815d24469f07e2ef0531aaf8d4
diff --git a/src/sagemaker/hyperpod/cli/commands/inference.py b/src/sagemaker/hyperpod/cli/commands/inference.py
@@ -69,9 +69,17 @@ def custom_create(namespace, version, custom_endpoint):
     required=True,
     help="Required. The body of the request to invoke.",
 )
+@click.option(
+    "--content-type",
+    type=click.STRING,
+    required=False,
+    default="application/json",
+    help="Optional. The content type of the request to invoke. Default set to 'application/json'",
+)
 def custom_invoke(
     endpoint_name: str,
     body: str,
+    content_type: Optional[str]
 ):
     """
     Invoke a model endpoint.
@@ -105,7 +113,7 @@ def custom_invoke(
     resp = rt.invoke_endpoint(
         EndpointName=endpoint_name,
         Body=payload.encode("utf-8"),
-        ContentType="application/json",
+        ContentType=content_type,
     )
     result = resp["Body"].read().decode("utf-8")
     click.echo(result)
diff --git a/test/integration_tests/cli/test_cli_custom_fsx_inference.py b/test/integration_tests/cli/test_cli_custom_fsx_inference.py
@@ -2,6 +2,7 @@
 import uuid
 import pytest
 import boto3
+import os
 from click.testing import CliRunner
 from sagemaker.hyperpod.cli.commands.inference import (
     custom_create, 
@@ -21,13 +22,21 @@
 TIMEOUT_MINUTES = 15
 POLL_INTERVAL_SECONDS = 30
 
+BETA_FSX = "fs-0454e783bbb7356fc"
+PROD_FSX = "fs-03c59e2a7e824a22f"
+BETA_TLS = "s3://sagemaker-hyperpod-certificate-beta-us-east-2"
+PROD_TLS = "s3://sagemaker-hyperpod-certificate-prod-us-east-2"
+stage = os.getenv("STAGE", "BETA").upper()
+FSX_LOCATION = BETA_FSX if stage == "BETA" else PROD_FSX
+TLS_LOCATION = BETA_TLS if stage == "BETA" else PROD_TLS
+
 @pytest.fixture(scope="module")
 def runner():
     return CliRunner()
 
 @pytest.fixture(scope="module")
 def custom_endpoint_name():
-    return f"custom-cli-integration"
+    return f"custom-cli-integration-fsx"
 
 @pytest.fixture(scope="module")
 def sagemaker_client():
@@ -39,32 +48,20 @@ def test_custom_create(runner, custom_endpoint_name):
     result = runner.invoke(custom_create, [
         "--namespace", NAMESPACE,
         "--version", VERSION,
-        "--instance-type", "ml.g5.8xlarge",
-        "--model-name", "test-model-integration",
-        "--model-source-type", "s3",
-        "--model-location", "deepseek15b",
-        "--s3-bucket-name", "test-model-s3-zhaoqi",
+        "--instance-type", "ml.c5.2xlarge",
+        "--model-name", "test-model-integration-cli-fsx",
+        "--model-source-type", "fsx",
+        "--model-location", "hf-eqa",
+        "--fsx-file-system-id", FSX_LOCATION,
         "--s3-region", REGION,
-        "--image-uri", "763104351884.dkr.ecr.us-east-2.amazonaws.com/huggingface-pytorch-tgi-inference:2.4.0-tgi2.3.1-gpu-py311-cu124-ubuntu22.04-v2.0",
+        "--image-uri", "763104351884.dkr.ecr.us-west-2.amazonaws.com/huggingface-pytorch-inference:2.3.0-transformers4.48.0-cpu-py311-ubuntu22.04",
         "--container-port", "8080",
         "--model-volume-mount-name", "model-weights",
         "--endpoint-name", custom_endpoint_name,
-        "--resources-requests", '{"cpu": "30000m", "nvidia.com/gpu": 1, "memory": "100Gi"}',
-        "--resources-limits", '{"nvidia.com/gpu": 1}',
-        "--tls-certificate-output-s3-uri", "s3://tls-bucket-inf1-beta2",
-        "--metrics-enabled", "true",
-        "--metric-collection-period", "30",
-        "--metric-name", "Invocations",
-        "--metric-stat", "Sum",
-        "--metric-type", "Average",
-        "--min-value", "0.0",
-        "--cloud-watch-trigger-name", "SageMaker-Invocations-new",
-        "--cloud-watch-trigger-namespace", "AWS/SageMaker",
-        "--target-value", "10",
-        "--use-cached-metrics", "true",
-        "--dimensions", '{"EndpointName": "' + custom_endpoint_name + '", "VariantName": "AllTraffic"}',
-        "--env", '{ "HF_MODEL_ID": "/opt/ml/model", "SAGEMAKER_PROGRAM": "inference.py", "SAGEMAKER_SUBMIT_DIRECTORY": "/opt/ml/model/code", "MODEL_CACHE_ROOT": "/opt/ml/model", "SAGEMAKER_ENV": "1" }',
-
+        "--resources-requests", '{"cpu": "3200m", "nvidia.com/gpu": 0, "memory": "12Gi"}',
+        "--resources-limits", '{"nvidia.com/gpu": 0}',
+        "--tls-certificate-output-s3-uri", TLS_LOCATION,
+        "--env", '{ "SAGEMAKER_PROGRAM": "inference.py", "SAGEMAKER_SUBMIT_DIRECTORY": "/opt/ml/model/code", "SAGEMAKER_CONTAINER_LOG_LEVEL": "20", "SAGEMAKER_MODEL_SERVER_TIMEOUT": "3600", "ENDPOINT_SERVER_TIMEOUT": "3600", "MODEL_CACHE_ROOT": "/opt/ml/model", "SAGEMAKER_ENV": "1", "SAGEMAKER_MODEL_SERVER_WORKERS": "1" }'
     ])
     assert result.exit_code == 0, result.output
 
@@ -118,7 +115,8 @@ def test_wait_until_inservice(custom_endpoint_name):
 def test_custom_invoke(runner, custom_endpoint_name):
     result = runner.invoke(custom_invoke, [
         "--endpoint-name", custom_endpoint_name,
-        "--body", '{"inputs": "What is the capital of USA?"}'
+        "--body", '{"question" :"what is the name of the planet?", "context":"mars"}',
+        "--content-type", "application/list-text"
     ])
     assert result.exit_code == 0
     assert "error" not in result.output.lower()
diff --git a/test/integration_tests/cli/test_cli_custom_s3_inference.py b/test/integration_tests/cli/test_cli_custom_s3_inference.py
@@ -0,0 +1,140 @@
+import time
+import uuid
+import pytest
+import boto3
+import os
+from click.testing import CliRunner
+from sagemaker.hyperpod.cli.commands.inference import (
+    custom_create, 
+    custom_invoke,
+    custom_list,
+    custom_describe,
+    custom_delete,
+    custom_get_operator_logs,
+    custom_list_pods
+)
+from sagemaker.hyperpod.inference.hp_endpoint import HPEndpoint
+
+# --------- Test Configuration ---------
+NAMESPACE = "integration"
+VERSION = "1.0"
+REGION = "us-east-2"
+TIMEOUT_MINUTES = 15
+POLL_INTERVAL_SECONDS = 30
+
+BETA_BUCKET = "sagemaker-hyperpod-beta-integ-test-model-bucket-n"
+PROD_BUCKET = "sagemaker-hyperpod-prod-integ-test-model-bucket"
+BETA_TLS = "s3://sagemaker-hyperpod-certificate-beta-us-east-2"
+PROD_TLS = "s3://sagemaker-hyperpod-certificate-prod-us-east-2"
+stage = os.getenv("STAGE", "BETA").upper()
+BUCKET_LOCATION = BETA_BUCKET if stage == "BETA" else PROD_BUCKET
+TLS_LOCATION = BETA_TLS if stage == "BETA" else PROD_TLS
+
+@pytest.fixture(scope="module")
+def runner():
+    return CliRunner()
+
+@pytest.fixture(scope="module")
+def custom_endpoint_name():
+    return f"custom-cli-integration-s3"
+
+@pytest.fixture(scope="module")
+def sagemaker_client():
+    return boto3.client("sagemaker", region_name=REGION)
+
+# --------- Custom Endpoint Tests ---------
+
+def test_custom_create(runner, custom_endpoint_name):
+    result = runner.invoke(custom_create, [
+        "--namespace", NAMESPACE,
+        "--version", VERSION,
+        "--instance-type", "ml.c5.2xlarge",
+        "--model-name", "test-model-integration-cli-s3",
+        "--model-source-type", "s3",
+        "--model-location", "hf-eqa",
+        "--s3-bucket-name", BUCKET_LOCATION,
+        "--s3-region", REGION,
+        "--image-uri", "763104351884.dkr.ecr.us-west-2.amazonaws.com/huggingface-pytorch-inference:2.3.0-transformers4.48.0-cpu-py311-ubuntu22.04",
+        "--container-port", "8080",
+        "--model-volume-mount-name", "model-weights",
+        "--endpoint-name", custom_endpoint_name,
+        "--resources-requests", '{"cpu": "3200m", "nvidia.com/gpu": 0, "memory": "12Gi"}',
+        "--resources-limits", '{"nvidia.com/gpu": 0}',
+        "--tls-certificate-output-s3-uri", TLS_LOCATION,
+        "--env", '{ "SAGEMAKER_PROGRAM": "inference.py", "SAGEMAKER_SUBMIT_DIRECTORY": "/opt/ml/model/code", "SAGEMAKER_CONTAINER_LOG_LEVEL": "20", "SAGEMAKER_MODEL_SERVER_TIMEOUT": "3600", "ENDPOINT_SERVER_TIMEOUT": "3600", "MODEL_CACHE_ROOT": "/opt/ml/model", "SAGEMAKER_ENV": "1", "SAGEMAKER_MODEL_SERVER_WORKERS": "1" }'
+    ])
+    assert result.exit_code == 0, result.output
+
+
+def test_custom_list(runner, custom_endpoint_name):
+    result = runner.invoke(custom_list, ["--namespace", NAMESPACE])
+    assert result.exit_code == 0
+    assert custom_endpoint_name in result.output
+
+
+def test_custom_describe(runner, custom_endpoint_name):
+    result = runner.invoke(custom_describe, [
+        "--name", custom_endpoint_name,
+        "--namespace", NAMESPACE,
+        "--full"
+    ])
+    assert result.exit_code == 0
+    assert custom_endpoint_name in result.output
+
+
+def test_wait_until_inservice(custom_endpoint_name):
+    """Poll SDK until specific JumpStart endpoint reaches DeploymentComplete"""
+    print(f"[INFO] Waiting for JumpStart endpoint '{custom_endpoint_name}' to be DeploymentComplete...")
+    deadline = time.time() + (TIMEOUT_MINUTES * 60)
+    poll_count = 0
+
+    while time.time() < deadline:
+        poll_count += 1
+        print(f"[DEBUG] Poll #{poll_count}: Checking endpoint status...")
+
+        try:
+            ep = HPEndpoint.get(name=custom_endpoint_name, namespace=NAMESPACE)
+            state = ep.status.endpoints.sagemaker.state
+            print(f"[DEBUG] Current state: {state}")
+            if state == "CreationCompleted":
+                print("[INFO] Endpoint is in CreationCompleted state.")
+                return
+            
+            deployment_state = ep.status.deploymentStatus.deploymentObjectOverallState
+            if deployment_state == "DeploymentFailed":
+                pytest.fail("Endpoint deployment failed.")
+
+        except Exception as e:
+            print(f"[ERROR] Exception during polling: {e}")
+
+        time.sleep(POLL_INTERVAL_SECONDS)
+
+    pytest.fail("[ERROR] Timed out waiting for endpoint to be DeploymentComplete")
+
+
+def test_custom_invoke(runner, custom_endpoint_name):
+    result = runner.invoke(custom_invoke, [
+        "--endpoint-name", custom_endpoint_name,
+        "--body", '{"question" :"what is the name of the planet?", "context":"mars"}',
+        "--content-type", "application/list-text"
+    ])
+    assert result.exit_code == 0
+    assert "error" not in result.output.lower()
+
+
+def test_custom_get_operator_logs(runner):
+    result = runner.invoke(custom_get_operator_logs, ["--since-hours", "1"])
+    assert result.exit_code == 0
+
+
+def test_custom_list_pods(runner):
+    result = runner.invoke(custom_list_pods, ["--namespace", NAMESPACE])
+    assert result.exit_code == 0
+    
+
+def test_custom_delete(runner, custom_endpoint_name):
+    result = runner.invoke(custom_delete, [
+        "--name", custom_endpoint_name,
+        "--namespace", NAMESPACE
+    ])
+    assert result.exit_code == 0
diff --git a/test/integration_tests/sdk/test_sdk_custom_fsx_inference.py b/test/integration_tests/sdk/test_sdk_custom_fsx_inference.py
diff --git a/test/integration_tests/sdk/test_sdk_custom_s3_inference.py b/test/integration_tests/sdk/test_sdk_custom_s3_inference.py

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+Subproject commit ce96b513c3033f815d24469f07e2ef0531aaf8d4`