Skip to content

Commit cf8f7dd

Browse files
Append the default_bucket_prefix to s3 paths if one exists to sample notebooks (#4811)
**Description** This change checks the sagemaker session if there is a default_bucket_prefix set. If a default bucket prefix is specified, it is appended it to the s3 path. This ensures sample notebooks that utilize s3, works in SageMaker Unified Studio. Co-authored-by: Marco Friaz <[email protected]>
1 parent bee1bcf commit cf8f7dd

File tree

123 files changed

+1229
-354
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

123 files changed

+1229
-354
lines changed

sagemaker-core/get_started.ipynb

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
"region = sagemaker_session.boto_region_name\n",
4444
"role = get_execution_role()\n",
4545
"bucket = sagemaker_session.default_bucket()\n",
46+
"default_bucket_prefix = sagemaker_session.default_bucket_prefix\n",
4647
"\n",
4748
"print(f\"AWS region: {region}\")\n",
4849
"print(f\"Execution role: {role}\")\n",
@@ -149,9 +150,17 @@
149150
"test_data.to_csv(\"test.csv\", header=False, index=False)\n",
150151
"\n",
151152
"# Upload each dataset to S3\n",
152-
"s3_train_input = sagemaker_session.upload_data(\"train.csv\", bucket)\n",
153-
"s3_validation_input = sagemaker_session.upload_data(\"validation.csv\", bucket)\n",
154-
"s3_test_input = sagemaker_session.upload_data(\"test.csv\", bucket)\n",
153+
"# If a default bucket prefix is specified, append it to the s3 path\n",
154+
"if default_bucket_prefix:\n",
155+
" s3_train_input = sagemaker_session.upload_data(\"train.csv\", key_prefix=default_bucket_prefix)\n",
156+
" s3_validation_input = sagemaker_session.upload_data(\n",
157+
" \"validation.csv\", key_prefix=default_bucket_prefix\n",
158+
" )\n",
159+
" s3_test_input = sagemaker_session.upload_data(\"test.csv\", key_prefix=default_bucket_prefix)\n",
160+
"else:\n",
161+
" s3_train_input = sagemaker_session.upload_data(\"train.csv\", bucket)\n",
162+
" s3_validation_input = sagemaker_session.upload_data(\"validation.csv\", bucket)\n",
163+
" s3_test_input = sagemaker_session.upload_data(\"test.csv\", bucket)\n",
155164
"\n",
156165
"print(\"Datasets uploaded to:\")\n",
157166
"print(s3_train_input)\n",
@@ -218,6 +227,10 @@
218227
"max_runtime_in_seconds = 600 # Maximum runtimt. Job exits if it doesn't finish before this\n",
219228
"s3_output_path = f\"s3://{bucket}\" # bucket and optional prefix where the training job stores output artifacts, like model artifact.\n",
220229
"\n",
230+
"# If a default bucket prefix is specified, append it to the s3 path\n",
231+
"if default_bucket_prefix:\n",
232+
" s3_output_path = f\"s3://{bucket}/{default_bucket_prefix}\"\n",
233+
"\n",
221234
"# Specify hyperparameters\n",
222235
"hyper_parameters = {\n",
223236
" \"max_depth\": \"5\",\n",
@@ -475,6 +488,11 @@
475488
" \"%Y-%m-%d-%H-%M-%S\", time.gmtime()\n",
476489
") # Name of TranformJob\n",
477490
"s3_output_path = f\"s3://{bucket}/transform\" # bucket and optional prefix where the TranformJob stores the result.\n",
491+
"\n",
492+
"# If a default bucket prefix is specified, append it to the s3 path\n",
493+
"if default_bucket_prefix:\n",
494+
" s3_output_path = f\"s3://{bucket}/{default_bucket_prefix}/transform\"\n",
495+
"\n",
478496
"instance_type = \"ml.m4.xlarge\" # SageMaker instance type to use for TranformJob\n",
479497
"instance_count = 1 # Number of instances to use for TranformJob\n",
480498
"\n",
@@ -522,6 +540,10 @@
522540
" f\"{transform_job.transform_output.s3_output_path}/{output_file_name}\" # Create output S3 URI\n",
523541
")\n",
524542
"\n",
543+
"# If a default bucket prefix is specified, append it to the s3 path\n",
544+
"if default_bucket_prefix:\n",
545+
" output_s3_uri = f\"{default_bucket_prefix}/{output_s3_uri}\"\n",
546+
"\n",
525547
"\n",
526548
"def split_s3_path(s3_path):\n",
527549
" \"\"\"Lightweight method for extracting bucket and object key from S3 uri\"\"\"\n",

sagemaker-core/inference_and_resource_chaining.ipynb

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,7 @@
144144
"region = sagemaker_session.boto_region_name\n",
145145
"role = get_execution_role()\n",
146146
"bucket = sagemaker_session.default_bucket()\n",
147+
"default_bucket_prefix = sagemaker_session.default_bucket_prefix\n",
147148
"print(role)"
148149
]
149150
},
@@ -213,6 +214,11 @@
213214
"# Upload Data\n",
214215
"\n",
215216
"prefix = \"DEMO-scikit-iris\"\n",
217+
"\n",
218+
"# If a default bucket prefix is specified, append it to the s3 path\n",
219+
"if default_bucket_prefix:\n",
220+
" prefix = f\"{default_bucket_prefix}/{prefix}\"\n",
221+
"\n",
216222
"TRAIN_DATA = \"train.csv\"\n",
217223
"DATA_DIRECTORY = \"data\"\n",
218224
"\n",
@@ -560,6 +566,12 @@
560566
"source": [
561567
"# Upload the model to S3 bucket\n",
562568
"bucket_prefix = \"async-inference-demo\"\n",
569+
"default_bucket_prefix = sagemaker_session.default_bucket_prefix\n",
570+
"\n",
571+
"# If a default bucket prefix is specified, append it to the s3 path\n",
572+
"if default_bucket_prefix:\n",
573+
" bucket_prefix = f\"{default_bucket_prefix}/{bucket_prefix}\"\n",
574+
"\n",
563575
"bucket = sagemaker_session.default_bucket()\n",
564576
"\n",
565577
"model_s3_key = f\"{bucket_prefix}/demo-xgboost-model.tar.gz\"\n",

sagemaker-core/intelligent_defaults_and_logging.ipynb

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,7 @@
176176
"region = sagemaker_session.boto_region_name\n",
177177
"role = get_execution_role()\n",
178178
"bucket = sagemaker_session.default_bucket()\n",
179+
"default_bucket_prefix = sagemaker_session.default_bucket_prefix\n",
179180
"print(role)"
180181
]
181182
},
@@ -241,6 +242,11 @@
241242
"# Upload Data\n",
242243
"\n",
243244
"prefix = \"DEMO-scikit-iris\"\n",
245+
"\n",
246+
"# If a default bucket prefix is specified, append it to the s3 path\n",
247+
"if default_bucket_prefix:\n",
248+
" prefix = f\"{default_bucket_prefix}/{prefix}\"\n",
249+
"\n",
244250
"TRAIN_DATA = \"train.csv\"\n",
245251
"DATA_DIRECTORY = \"data\"\n",
246252
"\n",

sagemaker-core/sagemaker-core-feature-store/sagemaker_core_feature_store_introduction.ipynb

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,12 @@
6969
"REGION_NAME = sagemaker_session._region_name\n",
7070
"role = get_execution_role()\n",
7171
"s3_bucket_name = sagemaker_session.default_bucket()\n",
72-
"prefix = \"sagemaker-featurestore-introduction\""
72+
"prefix = \"sagemaker-featurestore-introduction\"\n",
73+
"default_bucket_prefix = sagemaker_session.default_bucket_prefix\n",
74+
"\n",
75+
"# If a default bucket prefix is specified, append it to the s3 path\n",
76+
"if default_bucket_prefix:\n",
77+
" prefix = f\"{default_bucket_prefix}/{prefix}\""
7378
]
7479
},
7580
{

sagemaker-core/sagemaker-core-llama-3-8B-speculative-decoding.ipynb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@
7777
")\n",
7878
"\n",
7979
"default_bucket = SM_SESSION.default_bucket()\n",
80+
"default_bucket_prefix = SM_SESSION.default_bucket_prefix\n",
8081
"\n",
8182
"TARGET_MODEL = \"meta-llama/Meta-Llama-3-8B\"\n",
8283
"DRAFT_MODEL = \"sagemaker\""

sagemaker-core/sagemaker-core-llama-3-8B.ipynb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@
7272
")\n",
7373
"\n",
7474
"default_bucket = SM_SESSION.default_bucket()\n",
75+
"default_bucket_prefix = SM_SESSION.default_bucket_prefix\n",
7576
"\n",
7677
"MODEL = \"meta-llama/Meta-Llama-3-8B\""
7778
]

sagemaker-core/sagemaker-core-pyspark-processing.ipynb

Lines changed: 32 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,13 @@
101101
"sagemaker_session = Session()\n",
102102
"REGION_NAME = sagemaker_session._region_name\n",
103103
"role = get_execution_role()\n",
104-
"s3_bucket_name = sagemaker_session.default_bucket()"
104+
"s3_bucket_name = sagemaker_session.default_bucket()\n",
105+
"default_bucket_prefix = sagemaker_session.default_bucket_prefix\n",
106+
"default_bucket_prefix_path = \"\"\n",
107+
"\n",
108+
"# If a default bucket prefix is specified, append it to the s3 path\n",
109+
"if default_bucket_prefix:\n",
110+
" default_bucket_prefix_path = f\"/{default_bucket_prefix}\""
105111
]
106112
},
107113
{
@@ -305,6 +311,11 @@
305311
"# Upload the raw input dataset to a unique S3 location\n",
306312
"timestamp_prefix = strftime(\"%Y-%m-%d-%H-%M-%S\", gmtime())\n",
307313
"prefix = \"sagemaker/spark-preprocess-demo/{}\".format(timestamp_prefix)\n",
314+
"\n",
315+
"# If a default bucket prefix is specified, append it to the s3 path\n",
316+
"if default_bucket_prefix:\n",
317+
" prefix = f\"{default_bucket_prefix}/{prefix}\"\n",
318+
"\n",
308319
"input_prefix_abalone = \"{}/input/raw/abalone\".format(prefix)\n",
309320
"input_preprocessed_prefix_abalone = \"{}/input/preprocessed/abalone\".format(prefix)\n",
310321
"\n",
@@ -328,7 +339,7 @@
328339
"processing_input = ProcessingInput(\n",
329340
" input_name=\"code\",\n",
330341
" s3_input=ProcessingS3Input(\n",
331-
" s3_uri=f\"s3://{s3_bucket_name}/{final_job_name}/input/code/preprocess.py\",\n",
342+
" s3_uri=f\"s3://{s3_bucket_name}{default_bucket_prefix_path}/{final_job_name}/input/code/preprocess.py\",\n",
332343
" # s3_uri=\"s3://sagemaker-us-east-1-774297356213/sm-spark-2024-08-30-05-25-18-294/input/code/preprocess.py\",\n",
333344
" s3_data_type=\"S3Prefix\",\n",
334345
" local_path=\"/opt/ml/processing/input/code\",\n",
@@ -512,6 +523,11 @@
512523
"# Upload the raw input dataset to a unique S3 location\n",
513524
"timestamp_prefix = strftime(\"%Y-%m-%d-%H-%M-%S\", gmtime())\n",
514525
"prefix = \"sagemaker/spark-preprocess-demo/{}\".format(timestamp_prefix)\n",
526+
"\n",
527+
"# If a default bucket prefix is specified, append it to the s3 path\n",
528+
"if default_bucket_prefix:\n",
529+
" prefix = f\"{default_bucket_prefix}/{prefix}\"\n",
530+
"\n",
515531
"input_prefix_sales = \"{}/input/sales\".format(prefix)\n",
516532
"output_prefix_sales = \"{}/output/sales\".format(prefix)\n",
517533
"input_s3_uri = \"s3://{}/{}\".format(s3_bucket_name, input_prefix_sales)\n",
@@ -544,7 +560,7 @@
544560
"processing_input_code = ProcessingInput(\n",
545561
" input_name=\"code\",\n",
546562
" s3_input=ProcessingS3Input(\n",
547-
" s3_uri=f\"s3://{s3_bucket_name}/{final_job_name}/input/code/hello_py_spark_app.py\",\n",
563+
" s3_uri=f\"s3://{s3_bucket_name}{default_bucket_prefix_path}/{final_job_name}/input/code/hello_py_spark_app.py\",\n",
548564
" s3_data_type=\"S3Prefix\",\n",
549565
" local_path=\"/opt/ml/processing/input/code\",\n",
550566
" s3_input_mode=\"File\",\n",
@@ -555,7 +571,7 @@
555571
"processing_input_pyfiles = ProcessingInput(\n",
556572
" input_name=\"py-files\",\n",
557573
" s3_input=ProcessingS3Input(\n",
558-
" s3_uri=f\"s3://{s3_bucket_name}/{final_job_name}/input/py-files\",\n",
574+
" s3_uri=f\"s3://{s3_bucket_name}{default_bucket_prefix_path}/{final_job_name}/input/py-files\",\n",
559575
" s3_data_type=\"S3Prefix\",\n",
560576
" local_path=\"/opt/ml/processing/input/py-files\",\n",
561577
" s3_input_mode=\"File\",\n",
@@ -650,6 +666,10 @@
650666
"# Upload the raw input dataset to S3\n",
651667
"timestamp_prefix = strftime(\"%Y-%m-%d-%H-%M-%S\", gmtime())\n",
652668
"prefix = \"sagemaker/spark-preprocess-demo/{}\".format(timestamp_prefix)\n",
669+
"# If a default bucket prefix is specified, append it to the s3 path\n",
670+
"if default_bucket_prefix:\n",
671+
" prefix = f\"{default_bucket_prefix}/{prefix}\"\n",
672+
"\n",
653673
"input_prefix_sales = \"{}/input/sales\".format(prefix)\n",
654674
"output_prefix_sales = \"{}/output/sales\".format(prefix)\n",
655675
"input_s3_uri = \"s3://{}/{}\".format(s3_bucket_name, input_prefix_sales)\n",
@@ -675,7 +695,7 @@
675695
"processing_input_code = ProcessingInput(\n",
676696
" input_name=\"code\",\n",
677697
" s3_input=ProcessingS3Input(\n",
678-
" s3_uri=f\"s3://{s3_bucket_name}/{final_job_name}/input/code/spark-test-app.jar\",\n",
698+
" s3_uri=f\"s3://{s3_bucket_name}{default_bucket_prefix_path}/{final_job_name}/input/code/spark-test-app.jar\",\n",
679699
" # s3_uri=\"s3://sagemaker-us-east-1-774297356213/sm-spark-2024-08-30-05-25-18-294/input/code/preprocess.py\",\n",
680700
" s3_data_type=\"S3Prefix\",\n",
681701
" local_path=\"/opt/ml/processing/input/code\",\n",
@@ -768,6 +788,11 @@
768788
"# Upload the raw input dataset to a unique S3 location\n",
769789
"timestamp_prefix = strftime(\"%Y-%m-%d-%H-%M-%S\", gmtime())\n",
770790
"prefix = \"sagemaker/spark-preprocess-demo/{}\".format(timestamp_prefix)\n",
791+
"\n",
792+
"# If a default bucket prefix is specified, append it to the s3 path\n",
793+
"if default_bucket_prefix:\n",
794+
" prefix = f\"{default_bucket_prefix}/{prefix}\"\n",
795+
"\n",
771796
"input_prefix_abalone = \"{}/input/raw/abalone\".format(prefix)\n",
772797
"input_preprocessed_prefix_abalone = \"{}/input/preprocessed/abalone\".format(prefix)\n",
773798
"\n",
@@ -795,7 +820,7 @@
795820
"processing_input_code = ProcessingInput(\n",
796821
" input_name=\"code\",\n",
797822
" s3_input=ProcessingS3Input(\n",
798-
" s3_uri=f\"s3://{s3_bucket_name}/{final_job_name}/input/code/preprocess.py\",\n",
823+
" s3_uri=f\"s3://{s3_bucket_name}{default_bucket_prefix_path}/{final_job_name}/input/code/preprocess.py\",\n",
799824
" # s3_uri=\"s3://sagemaker-us-east-1-774297356213/sm-spark-2024-08-30-05-25-18-294/input/code/preprocess.py\",\n",
800825
" s3_data_type=\"S3Prefix\",\n",
801826
" local_path=\"/opt/ml/processing/input/code\",\n",
@@ -806,7 +831,7 @@
806831
"processing_input_conf = ProcessingInput(\n",
807832
" input_name=\"conf\",\n",
808833
" s3_input=ProcessingS3Input(\n",
809-
" s3_uri=f\"s3://{s3_bucket_name}/{final_job_name}/input/conf/configuration.json\",\n",
834+
" s3_uri=f\"s3://{s3_bucket_name}{default_bucket_prefix_path}/{final_job_name}/input/conf/configuration.json\",\n",
810835
" # s3_uri=\"s3://sagemaker-us-east-1-774297356213/sm-spark-2024-08-30-05-25-18-294/input/code/preprocess.py\",\n",
811836
" s3_data_type=\"S3Prefix\",\n",
812837
" local_path=\"/opt/ml/processing/input/conf\",\n",

sagemaker-core/sagemaker_core_overview.ipynb

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,7 @@
162162
"region = sagemaker_session.boto_region_name\n",
163163
"role = get_execution_role()\n",
164164
"bucket = sagemaker_session.default_bucket()\n",
165+
"default_bucket_prefix = sagemaker_session.default_bucket_prefix\n",
165166
"print(role)"
166167
]
167168
},
@@ -227,6 +228,11 @@
227228
"# Upload Data\n",
228229
"\n",
229230
"prefix = \"DEMO-scikit-iris\"\n",
231+
"\n",
232+
"# If a default bucket prefix is specified, append it to the s3 path\n",
233+
"if default_bucket_prefix:\n",
234+
" prefix = f\"{default_bucket_prefix}/{prefix}\"\n",
235+
"\n",
230236
"TRAIN_DATA = \"train.csv\"\n",
231237
"DATA_DIRECTORY = \"data\"\n",
232238
"\n",

end_to_end_ml_lifecycle/sm-autopilot_customer_churn.ipynb

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,11 @@
8888
"# You can modify the following to use a bucket of your choosing\n",
8989
"bucket = session.default_bucket()\n",
9090
"prefix = \"sagemaker/DEMO-autopilot-churn\"\n",
91+
"default_bucket_prefix = session.default_bucket_prefix\n",
92+
"\n",
93+
"# If a default bucket prefix is specified, append it to the s3 path\n",
94+
"if default_bucket_prefix:\n",
95+
" prefix = f\"{default_bucket_prefix}/{prefix}\"\n",
9196
"\n",
9297
"role = get_execution_role()\n",
9398
"\n",

end_to_end_ml_lifecycle/sm-autopilot_linear_regression_california_housing.ipynb

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,11 @@
9898
"# You can modify the following to use a bucket of your choosing\n",
9999
"bucket = session.default_bucket()\n",
100100
"prefix = \"sagemaker/DEMO-autopilot-housing\"\n",
101+
"default_bucket_prefix = session.default_bucket_prefix\n",
102+
"\n",
103+
"# If a default bucket prefix is specified, append it to the s3 path\n",
104+
"if default_bucket_prefix:\n",
105+
" prefix = f\"{default_bucket_prefix}/{prefix}\"\n",
101106
"\n",
102107
"role = get_execution_role()\n",
103108
"\n",

0 commit comments

Comments
 (0)