Skip to content

Commit fe89ca3

Browse files
authored
[PyTorch][Training][EC2][SageMaker]PyTorch 2.8 Currency Release (#5149)
* PyTorch 2.8 Currency Release
1 parent f34666a commit fe89ca3

20 files changed

+943
-217
lines changed
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
account_id: &ACCOUNT_ID <set-$ACCOUNT_ID-in-environment>
2+
prod_account_id: &PROD_ACCOUNT_ID 763104351884
3+
region: &REGION <set-$REGION-in-environment>
4+
framework: &FRAMEWORK pytorch
5+
version: &VERSION 2.8.0
6+
short_version: &SHORT_VERSION "2.8"
7+
arch_type: x86
8+
# autopatch_build: "True"
9+
10+
repository_info:
11+
training_repository: &TRAINING_REPOSITORY
12+
image_type: &TRAINING_IMAGE_TYPE training
13+
root: !join [ *FRAMEWORK, "/", *TRAINING_IMAGE_TYPE ]
14+
repository_name: &REPOSITORY_NAME !join [ pr, "-", *FRAMEWORK, "-", *TRAINING_IMAGE_TYPE ]
15+
repository: &REPOSITORY !join [ *ACCOUNT_ID, .dkr.ecr., *REGION, .amazonaws.com/, *REPOSITORY_NAME ]
16+
release_repository_name: &RELEASE_REPOSITORY_NAME !join [ *FRAMEWORK, "-", *TRAINING_IMAGE_TYPE ]
17+
release_repository: &RELEASE_REPOSITORY !join [ *PROD_ACCOUNT_ID, .dkr.ecr., *REGION, .amazonaws.com/, *RELEASE_REPOSITORY_NAME ]
18+
19+
context:
20+
training_context: &TRAINING_CONTEXT
21+
start_cuda_compat:
22+
source: docker/build_artifacts/start_cuda_compat.sh
23+
target: start_cuda_compat.sh
24+
dockerd_entrypoint:
25+
source: docker/build_artifacts/dockerd_entrypoint.sh
26+
target: dockerd_entrypoint.sh
27+
changehostname:
28+
source: docker/build_artifacts/changehostname.c
29+
target: changehostname.c
30+
start_with_right_hostname:
31+
source: docker/build_artifacts/start_with_right_hostname.sh
32+
target: start_with_right_hostname.sh
33+
example_mnist_file:
34+
source: docker/build_artifacts/mnist.py
35+
target: mnist.py
36+
deep_learning_container:
37+
source: ../../src/deep_learning_container.py
38+
target: deep_learning_container.py
39+
setup_oss_compliance:
40+
source: ../../scripts/setup_oss_compliance.sh
41+
target: setup_oss_compliance.sh
42+
43+
images:
44+
BuildEC2CPUPTTrainPy3DockerImage:
45+
<<: *TRAINING_REPOSITORY
46+
build: &PYTORCH_CPU_TRAINING_PY3 false
47+
image_size_baseline: 7200
48+
device_type: &DEVICE_TYPE cpu
49+
python_version: &DOCKER_PYTHON_VERSION py3
50+
tag_python_version: &TAG_PYTHON_VERSION py312
51+
os_version: &OS_VERSION ubuntu22.04
52+
tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *OS_VERSION, "-ec2" ]
53+
latest_release_tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *OS_VERSION, "-ec2" ]
54+
# skip_build: "False"
55+
docker_file: !join [ docker/, *SHORT_VERSION, /, *DOCKER_PYTHON_VERSION, /Dockerfile., *DEVICE_TYPE ]
56+
target: ec2
57+
context:
58+
<<: *TRAINING_CONTEXT
59+
BuildEC2GPUPTTrainPy3cu129DockerImage:
60+
<<: *TRAINING_REPOSITORY
61+
build: &PYTORCH_GPU_TRAINING_PY3 false
62+
image_size_baseline: 28000
63+
device_type: &DEVICE_TYPE gpu
64+
python_version: &DOCKER_PYTHON_VERSION py3
65+
tag_python_version: &TAG_PYTHON_VERSION py312
66+
cuda_version: &CUDA_VERSION cu129
67+
os_version: &OS_VERSION ubuntu22.04
68+
tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION, "-", *OS_VERSION, "-ec2" ]
69+
latest_release_tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION, "-", *OS_VERSION, "-ec2" ]
70+
# skip_build: "False"
71+
docker_file: !join [ docker/, *SHORT_VERSION, /, *DOCKER_PYTHON_VERSION, /, *CUDA_VERSION, /Dockerfile.,
72+
*DEVICE_TYPE ]
73+
target: ec2
74+
context:
75+
<<: *TRAINING_CONTEXT

pytorch/training/buildspec-2-8-sm.yml

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
account_id: &ACCOUNT_ID <set-$ACCOUNT_ID-in-environment>
2+
prod_account_id: &PROD_ACCOUNT_ID 763104351884
3+
region: &REGION <set-$REGION-in-environment>
4+
framework: &FRAMEWORK pytorch
5+
version: &VERSION 2.8.0
6+
short_version: &SHORT_VERSION "2.8"
7+
arch_type: x86
8+
# autopatch_build: "True"
9+
10+
repository_info:
11+
training_repository: &TRAINING_REPOSITORY
12+
image_type: &TRAINING_IMAGE_TYPE training
13+
root: !join [ *FRAMEWORK, "/", *TRAINING_IMAGE_TYPE ]
14+
repository_name: &REPOSITORY_NAME !join [ pr, "-", *FRAMEWORK, "-", *TRAINING_IMAGE_TYPE ]
15+
repository: &REPOSITORY !join [ *ACCOUNT_ID, .dkr.ecr., *REGION, .amazonaws.com/, *REPOSITORY_NAME ]
16+
release_repository_name: &RELEASE_REPOSITORY_NAME !join [ *FRAMEWORK, "-", *TRAINING_IMAGE_TYPE ]
17+
release_repository: &RELEASE_REPOSITORY !join [ *PROD_ACCOUNT_ID, .dkr.ecr., *REGION, .amazonaws.com/, *RELEASE_REPOSITORY_NAME ]
18+
19+
context:
20+
training_context: &TRAINING_CONTEXT
21+
start_cuda_compat:
22+
source: docker/build_artifacts/start_cuda_compat.sh
23+
target: start_cuda_compat.sh
24+
dockerd_entrypoint:
25+
source: docker/build_artifacts/dockerd_entrypoint.sh
26+
target: dockerd_entrypoint.sh
27+
changehostname:
28+
source: docker/build_artifacts/changehostname.c
29+
target: changehostname.c
30+
start_with_right_hostname:
31+
source: docker/build_artifacts/start_with_right_hostname.sh
32+
target: start_with_right_hostname.sh
33+
example_mnist_file:
34+
source: docker/build_artifacts/mnist.py
35+
target: mnist.py
36+
deep_learning_container:
37+
source: ../../src/deep_learning_container.py
38+
target: deep_learning_container.py
39+
setup_oss_compliance:
40+
source: ../../scripts/setup_oss_compliance.sh
41+
target: setup_oss_compliance.sh
42+
43+
images:
44+
BuildSageMakerCPUPTTrainPy3DockerImage:
45+
<<: *TRAINING_REPOSITORY
46+
build: &PYTORCH_CPU_TRAINING_PY3 false
47+
image_size_baseline: 7200
48+
device_type: &DEVICE_TYPE cpu
49+
python_version: &DOCKER_PYTHON_VERSION py3
50+
tag_python_version: &TAG_PYTHON_VERSION py312
51+
os_version: &OS_VERSION ubuntu22.04
52+
tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *OS_VERSION, "-sagemaker" ]
53+
latest_release_tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *OS_VERSION, "-sagemaker" ]
54+
# skip_build: "False"
55+
docker_file: !join [ docker/, *SHORT_VERSION, /, *DOCKER_PYTHON_VERSION, /Dockerfile., *DEVICE_TYPE ]
56+
target: sagemaker
57+
context:
58+
<<: *TRAINING_CONTEXT
59+
BuildSageMakerGPUPTTrainPy3DockerImage:
60+
<<: *TRAINING_REPOSITORY
61+
build: &PYTORCH_GPU_TRAINING_PY3 false
62+
image_size_baseline: 28000
63+
device_type: &DEVICE_TYPE gpu
64+
python_version: &DOCKER_PYTHON_VERSION py3
65+
tag_python_version: &TAG_PYTHON_VERSION py312
66+
cuda_version: &CUDA_VERSION cu129
67+
os_version: &OS_VERSION ubuntu22.04
68+
tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION, "-", *OS_VERSION, "-sagemaker" ]
69+
latest_release_tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION, "-", *OS_VERSION, "-sagemaker" ]
70+
# skip_build: "False"
71+
docker_file: !join [ docker/, *SHORT_VERSION, /, *DOCKER_PYTHON_VERSION, /, *CUDA_VERSION, /Dockerfile.,
72+
*DEVICE_TYPE ]
73+
target: sagemaker
74+
context:
75+
<<: *TRAINING_CONTEXT

pytorch/training/buildspec.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
buildspec_pointer: buildspec-2-7-sm.yml
1+
buildspec_pointer: buildspec-2-8-sm.yml

0 commit comments

Comments
 (0)