Skip to content

Commit 8bed9cf

Browse files
Merge branch 'main' into release-3.1.2
2 parents 797203b + 4256f9b commit 8bed9cf

File tree

4 files changed

+36
-10
lines changed

4 files changed

+36
-10
lines changed

template/v2/dirs/etc/sagemaker-ui/network_validation.sh

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -126,14 +126,18 @@ temp_dir=$(mktemp -d)
126126
# Launch all service API checks in parallel background jobs
127127
for service in "${!SERVICE_COMMANDS[@]}"; do
128128
{
129-
# Run command with timeout, discard stdout/stderr
130-
if timeout "${api_time_out_limit}s" bash -c "${SERVICE_COMMANDS[$service]}" > /dev/null 2>&1; then
129+
output_file="$temp_dir/${service}_output"
130+
131+
# Run command with timeout
132+
if timeout "${api_time_out_limit}s" bash -c "${SERVICE_COMMANDS[$service]}" > "$output_file" 2>&1; then
131133
# Success: write OK to temp file
132134
echo "OK" > "$temp_dir/$service"
133135
else
134136
# Get exit code to differentiate timeout or other errors
135137
exit_code=$?
136-
if [ "$exit_code" -eq 124 ]; then
138+
if grep -q "Could not connect to the endpoint URL" "$output_file"; then
139+
echo "UNREACHABLE" > "$temp_dir/$service"
140+
elif [ "$exit_code" -eq 124 ]; then
137141
# Timeout exit code
138142
echo "TIMEOUT" > "$temp_dir/$service"
139143
else
@@ -155,10 +159,13 @@ for service in "${!SERVICE_COMMANDS[@]}"; do
155159
if [[ "$result" == "TIMEOUT" ]]; then
156160
echo "$service API did NOT resolve within ${api_time_out_limit}s. Marking as unreachable."
157161
unreachable_services+=("$service")
162+
elif [[ "$result" == "UNREACHABLE" ]]; then
163+
echo "$service API failed to connect to the endpoint. Marking as unreachable."
164+
unreachable_services+=("$service")
158165
elif [[ "$result" == "OK" ]]; then
159166
echo "$service API is reachable."
160167
else
161-
echo "$service API returned an error (but not a timeout). Ignored for network check."
168+
echo "$service API returned an error (but not a timeout or endpoint reachability failure). Ignored for network check."
162169
fi
163170
else
164171
echo "$service check did not produce a result file. Skipping."

template/v2/dirs/etc/sagemaker-ui/sagemaker_ui_post_startup.sh

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -241,7 +241,13 @@ if bash /etc/sagemaker-ui/network_validation.sh "$is_s3_storage_flag" "$network_
241241
# Read unreachable services from JSON file
242242
failed_services=$(jq -r '.UnreachableServices // empty' "$network_validation_file" || echo "")
243243
if [[ -n "$failed_services" ]]; then
244-
error_message="$failed_services are unreachable. Please contact your admin."
244+
# Count number of services by splitting on comma
245+
IFS=',' read -ra failed_array <<< "$failed_services"
246+
count=${#failed_array[@]}
247+
verb="are"
248+
[[ "$count" -eq 1 ]] && verb="is"
249+
250+
error_message="$failed_services $verb unreachable. Please contact your admin."
245251
# Example error message: Redshift Clusters, Athena, STS, Glue are unreachable. Please contact your admin.
246252
write_status_to_file "error" "$error_message"
247253
echo "$error_message"

template/v3/dirs/etc/sagemaker-ui/network_validation.sh

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -126,14 +126,18 @@ temp_dir=$(mktemp -d)
126126
# Launch all service API checks in parallel background jobs
127127
for service in "${!SERVICE_COMMANDS[@]}"; do
128128
{
129-
# Run command with timeout, discard stdout/stderr
130-
if timeout "${api_time_out_limit}s" bash -c "${SERVICE_COMMANDS[$service]}" > /dev/null 2>&1; then
129+
output_file="$temp_dir/${service}_output"
130+
131+
# Run command with timeout
132+
if timeout "${api_time_out_limit}s" bash -c "${SERVICE_COMMANDS[$service]}" > "$output_file" 2>&1; then
131133
# Success: write OK to temp file
132134
echo "OK" > "$temp_dir/$service"
133135
else
134136
# Get exit code to differentiate timeout or other errors
135137
exit_code=$?
136-
if [ "$exit_code" -eq 124 ]; then
138+
if grep -q "Could not connect to the endpoint URL" "$output_file"; then
139+
echo "UNREACHABLE" > "$temp_dir/$service"
140+
elif [ "$exit_code" -eq 124 ]; then
137141
# Timeout exit code
138142
echo "TIMEOUT" > "$temp_dir/$service"
139143
else
@@ -155,10 +159,13 @@ for service in "${!SERVICE_COMMANDS[@]}"; do
155159
if [[ "$result" == "TIMEOUT" ]]; then
156160
echo "$service API did NOT resolve within ${api_time_out_limit}s. Marking as unreachable."
157161
unreachable_services+=("$service")
162+
elif [[ "$result" == "UNREACHABLE" ]]; then
163+
echo "$service API failed to connect to the endpoint. Marking as unreachable."
164+
unreachable_services+=("$service")
158165
elif [[ "$result" == "OK" ]]; then
159166
echo "$service API is reachable."
160167
else
161-
echo "$service API returned an error (but not a timeout). Ignored for network check."
168+
echo "$service API returned an error (but not a timeout or endpoint reachability failure). Ignored for network check."
162169
fi
163170
else
164171
echo "$service check did not produce a result file. Skipping."

template/v3/dirs/etc/sagemaker-ui/sagemaker_ui_post_startup.sh

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -241,7 +241,13 @@ if bash /etc/sagemaker-ui/network_validation.sh "$is_s3_storage_flag" "$network_
241241
# Read unreachable services from JSON file
242242
failed_services=$(jq -r '.UnreachableServices // empty' "$network_validation_file" || echo "")
243243
if [[ -n "$failed_services" ]]; then
244-
error_message="$failed_services are unreachable. Please contact your admin."
244+
# Count number of services by splitting on comma
245+
IFS=',' read -ra failed_array <<< "$failed_services"
246+
count=${#failed_array[@]}
247+
verb="are"
248+
[[ "$count" -eq 1 ]] && verb="is"
249+
250+
error_message="$failed_services $verb unreachable. Please contact your admin."
245251
# Example error message: Redshift Clusters, Athena, STS, Glue are unreachable. Please contact your admin.
246252
write_status_to_file "error" "$error_message"
247253
echo "$error_message"

0 commit comments

Comments
 (0)