Skip to content

Commit 0ff5b82

Browse files
Add changes from #786, #784, #789, #713
1 parent 65c759c commit 0ff5b82

File tree

4 files changed

+56
-18
lines changed

4 files changed

+56
-18
lines changed

build_artifacts/v2/v2.8/v2.8.3/dirs/etc/sagemaker-ui/jupyter/lab/settings/page_config.json

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
"@amzn/sagemaker-jupyterlab-extensions:spacemenu": true,
99
"@amzn/amazon_sagemaker_sql_editor": true,
1010
"@sagemaker-studio:EmrCluster": true,
11-
"@jupyterlab/scheduler": true,
1211
"@jupyter/collaboration-extension": true
1312
}
1413
}

build_artifacts/v2/v2.8/v2.8.3/dirs/etc/sagemaker-ui/jupyter/server/jupyter_server_config.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
11
import os
22

3+
from sagemaker_studio_jupyter_scheduler.scheduler import (
4+
SagemakerEnvironmentManager,
5+
SageMakerJobFilesManager,
6+
SageMakerUnifiedStudioScheduler,
7+
)
8+
39
c.ServerApp.terminado_settings = {"shell_command": ["/bin/bash"]}
410
region = os.environ.get("AWS_REGION")
511
csp_rule = os.environ.get("JUPYTERSERVER_CSP_RULE")
@@ -27,4 +33,9 @@
2733
module_location = os.path.dirname(module.__file__)
2834
c.LanguageServerManager.extra_node_roots = [f"{module_location}/sql-language-server"]
2935
except:
36+
3037
pass
38+
39+
c.SchedulerApp.scheduler_class = SageMakerUnifiedStudioScheduler
40+
c.SchedulerApp.environment_manager_class = SagemakerEnvironmentManager
41+
c.SchedulerApp.job_files_manager_class = SageMakerJobFilesManager

build_artifacts/v2/v2.8/v2.8.3/dirs/etc/sagemaker-ui/network_validation.sh

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -126,14 +126,18 @@ temp_dir=$(mktemp -d)
126126
# Launch all service API checks in parallel background jobs
127127
for service in "${!SERVICE_COMMANDS[@]}"; do
128128
{
129-
# Run command with timeout, discard stdout/stderr
130-
if timeout "${api_time_out_limit}s" bash -c "${SERVICE_COMMANDS[$service]}" > /dev/null 2>&1; then
129+
output_file="$temp_dir/${service}_output"
130+
131+
# Run command with timeout
132+
if timeout "${api_time_out_limit}s" bash -c "${SERVICE_COMMANDS[$service]}" > "$output_file" 2>&1; then
131133
# Success: write OK to temp file
132134
echo "OK" > "$temp_dir/$service"
133135
else
134136
# Get exit code to differentiate timeout or other errors
135137
exit_code=$?
136-
if [ "$exit_code" -eq 124 ]; then
138+
if grep -q "Could not connect to the endpoint URL" "$output_file"; then
139+
echo "UNREACHABLE" > "$temp_dir/$service"
140+
elif [ "$exit_code" -eq 124 ]; then
137141
# Timeout exit code
138142
echo "TIMEOUT" > "$temp_dir/$service"
139143
else
@@ -155,10 +159,13 @@ for service in "${!SERVICE_COMMANDS[@]}"; do
155159
if [[ "$result" == "TIMEOUT" ]]; then
156160
echo "$service API did NOT resolve within ${api_time_out_limit}s. Marking as unreachable."
157161
unreachable_services+=("$service")
162+
elif [[ "$result" == "UNREACHABLE" ]]; then
163+
echo "$service API failed to connect to the endpoint. Marking as unreachable."
164+
unreachable_services+=("$service")
158165
elif [[ "$result" == "OK" ]]; then
159166
echo "$service API is reachable."
160167
else
161-
echo "$service API returned an error (but not a timeout). Ignored for network check."
168+
echo "$service API returned an error (but not a timeout or endpoint reachability failure). Ignored for network check."
162169
fi
163170
else
164171
echo "$service check did not produce a result file. Skipping."

build_artifacts/v2/v2.8/v2.8.3/dirs/etc/sagemaker-ui/sagemaker_ui_post_startup.sh

Lines changed: 34 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -87,22 +87,43 @@ c.Application.logging_config = {
8787
EOT
8888
fi
8989

90-
# Setting this to +x to not log credentials from the response of fetching credentials.
91-
set +x
90+
# Add debug call to get domain info
91+
if [ ! -z "$dataZoneEndPoint" ]; then
92+
domain_response=$(aws datazone get-domain --debug --endpoint-url "$dataZoneEndPoint" --identifier "$dataZoneDomainId" --region "$dataZoneDomainRegion" 2>&1)
9293

93-
# Note: The $? check immediately follows the sagemaker-studio command to ensure we're checking its exit status.
94-
# Adding commands between these lines could lead to incorrect error handling.
95-
response=$(timeout 30 sagemaker-studio credentials get-domain-execution-role-credential-in-space --domain-id "$dataZoneDomainId" --profile default)
96-
responseStatus=$?
94+
else
95+
domain_response=$(aws datazone get-domain --debug --identifier "$dataZoneDomainId" --region "$dataZoneDomainRegion" 2>&1)
96+
fi
97+
98+
# Check if domain is in express mode
99+
response_body=$(echo "$domain_response" | grep -A1 "Response body:" | tail -n1 | sed 's/^b'\''//;s/'\''$//')
100+
# Remove leading/trailing whitespace and the 'b' prefix
101+
cleaned_response=$(echo "$response_body" | sed 's/\\n//g')
102+
is_express_mode=$(echo "$cleaned_response" | jq -r '.preferences.DOMAIN_MODE == "EXPRESS"')
103+
104+
if [ "$is_express_mode" = "true" ]; then
105+
echo "Domain is in express mode. Using default credentials"
106+
# Use default credentials - no additional configuration needed
107+
aws configure set credential_source EcsContainer --profile DomainExecutionRoleCreds
108+
echo "Successfully configured DomainExecutionRoleCreds profile with default credentials"
109+
else
110+
echo "Domain is not in express mode"
111+
# Setting this to +x to not log credentials from the response of fetching credentials.
112+
set +x
113+
# Note: The $? check immediately follows the sagemaker-studio command to ensure we're checking its exit status.
114+
# Adding commands between these lines could lead to incorrect error handling.
115+
response=$(timeout 30 sagemaker-studio credentials get-domain-execution-role-credential-in-space --domain-id "$dataZoneDomainId" --profile default)
116+
responseStatus=$?
97117

98-
set -x
118+
set -x
99119

100-
if [ $responseStatus -ne 0 ]; then
120+
if [ $responseStatus -ne 0 ]; then
101121
echo "Failed to fetch domain execution role credentials. Will skip adding new credentials profile: DomainExecutionRoleCreds."
102122
write_status_to_file "error" "Network issue detected. Your domain may be using a public subnet, which affects IDE functionality. Please contact your admin."
103-
else
123+
else
104124
aws configure set credential_process "sagemaker-studio credentials get-domain-execution-role-credential-in-space --domain-id $dataZoneDomainId --profile default" --profile DomainExecutionRoleCreds
105125
echo "Successfully configured DomainExecutionRoleCreds profile"
126+
fi
106127
fi
107128

108129
# Run AWS CLI command to get the username from DataZone User Profile.
@@ -179,7 +200,7 @@ mkdir -p "$HOME/.config" # Create config directory if it doesn't exist
179200
jq -n \
180201
--arg smusProjectDirectory "$SMUS_PROJECT_DIR" \
181202
--arg isGitProject "$IS_GIT_PROJECT" \
182-
'{
203+
'{
183204
smusProjectDirectory: $smusProjectDirectory,
184205
isGitProject: ($isGitProject == "true")
185206
}' > "$HOME/.config/smus-storage-metadata.json"
@@ -210,8 +231,8 @@ fi
210231

211232
# Generate sagemaker pysdk intelligent default config
212233
nohup python /etc/sagemaker/sm_pysdk_default_config.py &
213-
# Only run the following commands if SAGEMAKER_APP_TYPE_LOWERCASE is jupyterlab
214-
if [ "${SAGEMAKER_APP_TYPE_LOWERCASE}" = "jupyterlab" ]; then
234+
# Only run the following commands if SAGEMAKER_APP_TYPE_LOWERCASE is jupyterlab and domain is not in express mode
235+
if [ "${SAGEMAKER_APP_TYPE_LOWERCASE}" = "jupyterlab" ] && [ "$is_express_mode" != "true" ]; then
215236
# do not fail immediately for non-zero exit code returned
216237
# by start-workflows-container. An expected non-zero exit
217238
# code will be returned if there is not a minimum of 2
@@ -224,7 +245,7 @@ if [ "${SAGEMAKER_APP_TYPE_LOWERCASE}" = "jupyterlab" ]; then
224245

225246
# write unexpected error to file if any of the remaining scripts fail.
226247
trap 'write_status_to_file "error" "An unexpected error occurred. Please stop and restart your space to retry."' ERR
227-
248+
228249
# Install conda and pip dependencies if lib mgmt config existing
229250
bash /etc/sagemaker-ui/libmgmt/install-lib.sh
230251

0 commit comments

Comments
 (0)