Skip to content

Commit d6c77ad

Browse files
author
Mohamed Zeidan
committed
circular dependency and truncated output
1 parent 8fe8037 commit d6c77ad

File tree

3 files changed

+46
-12
lines changed

3 files changed

+46
-12
lines changed

src/sagemaker/experiments/run.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -757,9 +757,28 @@ def __exit__(self, exc_type, exc_value, exc_traceback):
757757
end_time = datetime.datetime.now(dateutil.tz.tzlocal())
758758
self._trial_component.end_time = end_time
759759
if exc_value:
760+
# Sanitize and truncate error message to comply with AWS API constraints
761+
error_message = str(exc_value)
762+
763+
# Be very restrictive - only keep alphanumeric, spaces, and minimal punctuation
764+
import re
765+
# Remove all problematic characters, keep only basic safe characters
766+
error_message = re.sub(r'[^a-zA-Z0-9\s\.\-\,\:\;]', ' ', error_message)
767+
768+
# Remove multiple spaces and clean up
769+
error_message = re.sub(r'\s+', ' ', error_message).strip()
770+
771+
# Truncate to comply with AWS character limit (be more conservative)
772+
if len(error_message) > 500:
773+
error_message = error_message[:500] + "..."
774+
775+
# Fallback to generic message if still problematic
776+
if not error_message or len(error_message) < 10:
777+
error_message = "Training job failed with execution error"
778+
760779
self._trial_component.status = _api_types.TrialComponentStatus(
761780
primary_status=_TrialComponentStatusType.Failed.value,
762-
message=str(exc_value),
781+
message=error_message,
763782
)
764783
else:
765784
self._trial_component.status = _api_types.TrialComponentStatus(

src/sagemaker/telemetry/telemetry_logging.py

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,8 @@
1818
from time import perf_counter
1919
from typing import List
2020
import functools
21-
import requests
2221

2322
import boto3
24-
from sagemaker.session import Session
2523
from sagemaker.utils import resolve_value_from_config
2624
from sagemaker.config.config_schema import TELEMETRY_OPT_OUT_PATH
2725
from sagemaker.telemetry.constants import (
@@ -186,7 +184,7 @@ def wrapper(*args, **kwargs):
186184
def _send_telemetry_request(
187185
status: int,
188186
feature_list: List[int],
189-
session: Session,
187+
session,
190188
failure_reason: str = None,
191189
failure_type: str = None,
192190
extra_info: str = None,
@@ -254,8 +252,12 @@ def _requests_helper(url, timeout):
254252

255253
response = None
256254
try:
255+
# Import requests conditionally to avoid breaking imports when requests is not available
256+
import requests
257257
response = requests.get(url, timeout)
258-
except requests.exceptions.RequestException as e:
258+
except ImportError:
259+
logger.debug("requests module not available, skipping telemetry request")
260+
except Exception as e: # This covers requests.exceptions.RequestException and others
259261
logger.exception("Request exception: %s", str(e))
260262
return response
261263

@@ -281,8 +283,12 @@ def _get_region_or_default(session):
281283

282284
def _get_default_sagemaker_session():
283285
"""Return the default sagemaker session"""
284-
285-
boto_session = boto3.Session(region_name=DEFAULT_AWS_REGION)
286-
sagemaker_session = Session(boto_session=boto_session)
287-
288-
return sagemaker_session
286+
try:
287+
# Import Session conditionally to avoid circular import issues
288+
from sagemaker.session import Session
289+
boto_session = boto3.Session(region_name=DEFAULT_AWS_REGION)
290+
sagemaker_session = Session(boto_session=boto_session)
291+
return sagemaker_session
292+
except ImportError:
293+
logger.debug("Session module not available, returning None")
294+
return None

tests/data/experiment/train_job_script_for_run_clz.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,18 @@
1414
from __future__ import absolute_import
1515

1616
import os
17+
import glob
1718

18-
sdk_file = "sagemaker-dev-1.0.tar.gz"
19-
os.system(f"pip install {sdk_file}")
19+
# Find the SDK tar file dynamically instead of hardcoding the name
20+
sdk_files = glob.glob("sagemaker-*.tar.gz")
21+
if sdk_files:
22+
sdk_file = sdk_files[0] # Use the first (and likely only) SDK tar file found
23+
print(f"Installing SDK from: {sdk_file}")
24+
os.system(f"pip install {sdk_file}")
25+
else:
26+
print("No SDK tar file found, attempting to install from current directory")
27+
# Fallback: try to install the current directory as editable
28+
os.system("pip install -e .")
2029

2130
import json
2231
import logging

0 commit comments

Comments
 (0)