@@ -158,8 +158,8 @@ hyp create hyp-pytorch-job \
158158 --version 1.0 \
159159 --job-name test-pytorch-job \
160160 --image pytorch/pytorch: latest \
161- --command '[ " python", " train.py" ] ' \
162- --args '[ " --epochs", "10", " --batch-size", "32" ] ' \
161+ --command '[ python, train.py] ' \
162+ --args '[ --epochs=10, --batch-size=32 ] ' \
163163 --environment '{"PYTORCH_CUDA_ALLOC_CONF": "max_split_size_mb:32"}' \
164164 --pull-policy "IfNotPresent" \
165165 --instance-type ml.p4d.24xlarge \
@@ -170,8 +170,8 @@ hyp create hyp-pytorch-job \
170170 --queue-name "training-queue" \
171171 --priority "high" \
172172 --max-retry 3 \
173- --volumes '[ " data-vol", " model-vol", " checkpoint-vol" ] ' \
174- --persistent-volume-claims '[ " shared-data-pvc", " model-registry-pvc" ] ' \
173+ --volumes '[ data-vol, model-vol, checkpoint-vol] ' \
174+ --persistent-volume-claims '[ shared-data-pvc, model-registry-pvc] ' \
175175 --output-s3-uri s3://my-bucket/model-artifacts
176176```
177177
@@ -257,9 +257,10 @@ Along with the CLI, we also have SDKs available that can perform the training an
257257
258258```
259259
260- from sagemaker.hyperpod import HyperPodPytorchJob
261- from sagemaker.hyperpod.job
262- import ReplicaSpec, Template, Spec, Container, Resources, RunPolicy, Metadata
260+ from sagemaker.hyperpod.training import HyperPodPytorchJob
261+ from sagemaker.hyperpod.training
262+ import ReplicaSpec, Template, Spec, Containers, Resources, RunPolicy
263+ from sagemaker.hyperpod.common.config import Metadata
263264
264265# Define job specifications
265266nproc_per_node = "1" # Number of processes per node
@@ -274,7 +275,7 @@ replica_specs =
274275 (
275276 containers =
276277 [
277- Container
278+ Containers
278279 (
279280 # Container name
280281 name="container-name",
0 commit comments