Skip to content

Commit 6e1ac1b

Browse files
author
Roja Reddy Sareddy
committed
Enable Hyperpod telemetry
1 parent 0dedc06 commit 6e1ac1b

File tree

8 files changed

+122
-13
lines changed

8 files changed

+122
-13
lines changed

setup.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,6 @@
8989
"hyperpod-pytorch-job-template>=1.0.0, <2.0.0",
9090
"hyperpod-custom-inference-template>=1.0.0, <2.0.0",
9191
"hyperpod-jumpstart-inference-template>=1.0.0, <2.0.0",
92-
"sagemaker",
9392
],
9493
entry_points={
9594
"console_scripts": [

src/sagemaker/hyperpod/cli/commands/cluster.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@
4242
TEMP_KUBE_CONFIG_FILE,
4343
OutputFormat,
4444
)
45-
from sagemaker.hyperpod.cli.telemetry.user_agent import (
45+
from sagemaker.hyperpod.common.telemetry.user_agent import (
4646
get_user_agent_extra_suffix,
4747
)
4848
from sagemaker.hyperpod.cli.service.list_pods import (
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
from __future__ import absolute_import
2+
from enum import Enum
3+
4+
5+
class Feature(Enum):
6+
"""Enumeration of feature names used in telemetry."""
7+
8+
SDK_DEFAULTS = 1
9+
LOCAL_MODE = 2
10+
REMOTE_FUNCTION = 3
11+
MODEL_TRAINER = 4
12+
ESTIMATOR = 5
13+
HYPERPOD = 6 # Added to support telemetry in sagemaker-hyperpod-cli
14+
15+
def __str__(self): # pylint: disable=E0307
16+
"""Return the feature name."""
17+
return self.name
18+
19+
20+
class Status(Enum):
21+
"""Enumeration of status values used in telemetry."""
22+
23+
SUCCESS = 1
24+
FAILURE = 0
25+
26+
def __str__(self): # pylint: disable=E0307
27+
"""Return the status name."""
28+
return self.name
29+
30+
31+
class Region(str, Enum):
32+
"""Telemetry: List of all supported AWS regions."""
33+
34+
# Classic
35+
US_EAST_1 = "us-east-1" # IAD
36+
US_EAST_2 = "us-east-2" # CMH
37+
US_WEST_1 = "us-west-1" # SFO
38+
US_WEST_2 = "us-west-2" # PDX
39+
AP_NORTHEAST_1 = "ap-northeast-1" # NRT
40+
AP_NORTHEAST_2 = "ap-northeast-2" # ICN
41+
AP_NORTHEAST_3 = "ap-northeast-3" # KIX
42+
AP_SOUTH_1 = "ap-south-1" # BOM
43+
AP_SOUTHEAST_1 = "ap-southeast-1" # SIN
44+
AP_SOUTHEAST_2 = "ap-southeast-2" # SYD
45+
CA_CENTRAL_1 = "ca-central-1" # YUL
46+
EU_CENTRAL_1 = "eu-central-1" # FRA
47+
EU_NORTH_1 = "eu-north-1" # ARN
48+
EU_WEST_1 = "eu-west-1" # DUB
49+
EU_WEST_2 = "eu-west-2" # LHR
50+
EU_WEST_3 = "eu-west-3" # CDG
51+
SA_EAST_1 = "sa-east-1" # GRU
52+
# Opt-in
53+
AP_EAST_1 = "ap-east-1" # HKG
54+
AP_SOUTHEAST_3 = "ap-southeast-3" # CGK
55+
AF_SOUTH_1 = "af-south-1" # CPT
56+
EU_SOUTH_1 = "eu-south-1" # MXP
57+
ME_SOUTH_1 = "me-south-1" # BAH
58+
MX_CENTRAL_1 = "mx-central-1" # QRO
59+
AP_SOUTHEAST_7 = "ap-southeast-7" # BKK
60+
AP_SOUTH_2 = "ap-south-2" # HYD
61+
AP_SOUTHEAST_4 = "ap-southeast-4" # MEL
62+
EU_CENTRAL_2 = "eu-central-2" # ZRH
63+
EU_SOUTH_2 = "eu-south-2" # ZAZ
64+
IL_CENTRAL_1 = "il-central-1" # TLV
65+
ME_CENTRAL_1 = "me-central-1" # DXB

src/sagemaker/hyperpod/common/telemetry/telemetry_logging.py

Lines changed: 52 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,7 @@
1010
import re
1111

1212
import boto3
13-
from sagemaker.telemetry.constants import Feature, Status, Region
14-
from sagemaker.telemetry.telemetry_logging import (
15-
FEATURE_TO_CODE,
16-
STATUS_TO_CODE,
17-
_requests_helper,
18-
_construct_url,
19-
)
13+
from sagemaker.hyperpod.common.telemetry.constants import Feature, Status, Region
2014
import importlib.metadata
2115

2216
SDK_VERSION = importlib.metadata.version("sagemaker-hyperpod")
@@ -28,6 +22,20 @@
2822
sys.version_info.major, sys.version_info.minor, sys.version_info.micro
2923
)
3024

25+
FEATURE_TO_CODE = {
26+
str(Feature.SDK_DEFAULTS): 1,
27+
str(Feature.LOCAL_MODE): 2,
28+
str(Feature.REMOTE_FUNCTION): 3,
29+
str(Feature.MODEL_TRAINER): 4,
30+
str(Feature.ESTIMATOR): 5,
31+
str(Feature.HYPERPOD): 6, # Added to support telemetry in sagemaker-hyperpod-cli
32+
}
33+
34+
STATUS_TO_CODE = {
35+
str(Status.SUCCESS): 1,
36+
str(Status.FAILURE): 0,
37+
}
38+
3139
logger = logging.getLogger(__name__)
3240

3341

@@ -65,6 +73,43 @@ def get_region_and_account_from_current_context() -> Tuple[str, str]:
6573
return DEFAULT_AWS_REGION, "unknown"
6674

6775

76+
def _requests_helper(url, timeout):
77+
"""Make a GET request to the given URL"""
78+
79+
response = None
80+
try:
81+
response = requests.get(url, timeout)
82+
except requests.exceptions.RequestException as e:
83+
logger.exception("Request exception: %s", str(e))
84+
return response
85+
86+
87+
def _construct_url(
88+
accountId: str,
89+
region: str,
90+
status: str,
91+
feature: str,
92+
failure_reason: str,
93+
failure_type: str,
94+
extra_info: str,
95+
) -> str:
96+
"""Construct the URL for the telemetry request"""
97+
98+
base_url = (
99+
f"https://sm-pysdk-t-{region}.s3.{region}.amazonaws.com/telemetry?"
100+
f"x-accountId={accountId}"
101+
f"&x-status={status}"
102+
f"&x-feature={feature}"
103+
)
104+
logger.debug("Failure reason: %s", failure_reason)
105+
if failure_reason:
106+
base_url += f"&x-failureReason={failure_reason}"
107+
base_url += f"&x-failureType={failure_type}"
108+
if extra_info:
109+
base_url += f"&x-extra={extra_info}"
110+
return base_url
111+
112+
68113
def _send_telemetry_request(
69114
status: int,
70115
feature_list: List[int],

src/sagemaker/hyperpod/inference/hp_endpoint.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
from sagemaker.hyperpod.common.telemetry.telemetry_logging import (
1515
_hyperpod_telemetry_emitter,
1616
)
17-
from sagemaker.telemetry.constants import Feature
17+
from sagemaker.hyperpod.common.telemetry.constants import Feature
1818
from sagemaker.hyperpod.inference.hp_endpoint_base import HPEndpointBase
1919
from typing import Dict, List, Optional
2020
from sagemaker_core.main.resources import Endpoint

src/sagemaker/hyperpod/inference/hp_endpoint_base.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
from sagemaker.hyperpod.common.telemetry.telemetry_logging import (
1919
_hyperpod_telemetry_emitter,
2020
)
21-
from sagemaker.telemetry.constants import Feature
21+
from sagemaker.hyperpod.common.telemetry.constants import Feature
2222

2323

2424
class HPEndpointBase:

src/sagemaker/hyperpod/inference/hp_jumpstart_endpoint.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
from sagemaker.hyperpod.common.telemetry.telemetry_logging import (
2020
_hyperpod_telemetry_emitter,
2121
)
22-
from sagemaker.telemetry.constants import Feature
22+
from sagemaker.hyperpod.common.telemetry.constants import Feature
2323

2424

2525
class HPJumpStartEndpoint(_HPJumpStartEndpoint, HPEndpointBase):

src/sagemaker/hyperpod/training/hyperpod_pytorch_job.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
from sagemaker.hyperpod.common.telemetry.telemetry_logging import (
1717
_hyperpod_telemetry_emitter,
1818
)
19-
from sagemaker.telemetry.constants import Feature
19+
from sagemaker.hyperpod.common.telemetry.constants import Feature
2020
import yaml
2121
import logging
2222

0 commit comments

Comments
 (0)