Skip to content

Commit fd2f944

Browse files
author
Roja Reddy Sareddy
committed
Training CLI & SDK: example notebook and README update
1 parent cc08405 commit fd2f944

File tree

2 files changed

+10
-16
lines changed

2 files changed

+10
-16
lines changed

README.md

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -158,8 +158,8 @@ hyp create hyp-pytorch-job \
158158
--version 1.0 \
159159
--job-name test-pytorch-job \
160160
--image pytorch/pytorch:latest \
161-
--command '["python", "train.py"]' \
162-
--args '["--epochs", "10", "--batch-size", "32"]' \
161+
--command '[python, train.py]' \
162+
--args '[--epochs=10, --batch-size=32]' \
163163
--environment '{"PYTORCH_CUDA_ALLOC_CONF": "max_split_size_mb:32"}' \
164164
--pull-policy "IfNotPresent" \
165165
--instance-type ml.p4d.24xlarge \
@@ -170,8 +170,8 @@ hyp create hyp-pytorch-job \
170170
--queue-name "training-queue" \
171171
--priority "high" \
172172
--max-retry 3 \
173-
--volumes '["data-vol", "model-vol", "checkpoint-vol"]' \
174-
--persistent-volume-claims '["shared-data-pvc", "model-registry-pvc"]' \
173+
--volumes '[data-vol, model-vol, checkpoint-vol]' \
174+
--persistent-volume-claims '[shared-data-pvc, model-registry-pvc]' \
175175
--output-s3-uri s3://my-bucket/model-artifacts
176176
```
177177
@@ -257,9 +257,10 @@ Along with the CLI, we also have SDKs available that can perform the training an
257257
258258
```
259259

260-
from sagemaker.hyperpod import HyperPodPytorchJob
261-
from sagemaker.hyperpod.job
262-
import ReplicaSpec, Template, Spec, Container, Resources, RunPolicy, Metadata
260+
from sagemaker.hyperpod.training import HyperPodPytorchJob
261+
from sagemaker.hyperpod.training
262+
import ReplicaSpec, Template, Spec, Containers, Resources, RunPolicy
263+
from sagemaker.hyperpod.common.config import Metadata
263264

264265
# Define job specifications
265266
nproc_per_node = "1" # Number of processes per node
@@ -274,7 +275,7 @@ replica_specs =
274275
(
275276
containers =
276277
[
277-
Container
278+
Containers
278279
(
279280
# Container name
280281
name="container-name",

examples/training/CLI/training-e2e-cli.ipynb

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,14 +16,6 @@
1616
"- hyperpod-pytorchjob-config-schemas"
1717
]
1818
},
19-
{
20-
"cell_type": "code",
21-
"execution_count": null,
22-
"id": "b30debba",
23-
"metadata": {},
24-
"outputs": [],
25-
"source": "!hyperpod get-clusters"
26-
},
2719
{
2820
"metadata": {
2921
"jupyter": {
@@ -46,6 +38,7 @@
4638
"metadata": {},
4739
"outputs": [],
4840
"source": [
41+
"#example command\n",
4942
"!hyp create hyp-pytorch-job \\\n",
5043
" --version 1.0 \\\n",
5144
" --job-name test-pytorch-job-cli \\\n",

0 commit comments

Comments
 (0)