Skip to content

Commit 7d4ec29

Browse files
authored
Update recipes to release-1.3.2 (#65)
* Update recipes to release-1.3.2 * Capture output job name in test * Update README.md
1 parent 2696940 commit 7d4ec29

File tree

3 files changed

+48
-30
lines changed

3 files changed

+48
-30
lines changed

.gitmodules

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
[submodule "src/hyperpod_cli/sagemaker_hyperpod_recipes"]
22
path = src/hyperpod_cli/sagemaker_hyperpod_recipes
33
url = https://github.com/aws/sagemaker-hyperpod-recipes.git
4-
branch = 1.3.1
4+
branch = release-1.3.2

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ This command submits a new training job to the connected SageMaker HyperPod clus
137137
hyperpod start-job --job-name <job-name> [--namespace <namespace>] [--job-kind <kubeflow/PyTorchJob>] [--image <image>] [--command <command>] [--entry-script <script>] [--script-args <arg1 arg2>] [--environment <key=value>] [--pull-policy <Always|IfNotPresent|Never>] [--instance-type <instance-type>] [--node-count <count>] [--tasks-per-node <count>] [--label-selector <key=value>] [--deep-health-check-passed-nodes-only] [--scheduler-type <Kueue SageMaker None>] [--queue-name <queue-name>] [--priority <priority>] [--auto-resume] [--max-retry <count>] [--restart-policy <Always|OnFailure|Never|ExitCode>] [--volumes <volume1,volume2>] [--persistent-volume-claims <claim1:/mount/path,claim2:/mount/path>] [--results-dir <dir>] [--service-account-name <account>]
138138
```
139139
140-
* `job-name` (string) - Required. The name of the job.
140+
* `job-name` (string) - Required. The base name of the job. A unique identifier (UUID) will automatically be appended to the name like `<job-name>-<UUID>`.
141141
* `job-kind` (string) - Optional. The training job kind. The job type currently supported is `kubeflow/PyTorchJob`.
142142
* `namespace` (string) - Optional. The namespace to use. If not specified, this command will first use the namespace when connecting the cluster. Otherwise if namespace is not configured when connecting to the cluster, a namespace that is managed by SageMaker will be auto discovered.
143143
* `image` (string) - Required. The image used when creating the training job.

test/integration_tests/test_happy_case.py

Lines changed: 46 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,29 @@
2525

2626

2727
class TestHappyCase(AbstractIntegrationTests):
28+
29+
@pytest.fixture(scope="class")
30+
def hyperpod_cli_basic_job_name(self):
31+
"""
32+
Class-level fixture to set the hyperpod_cli_job_name attribute.
33+
"""
34+
config_path = os.path.expanduser("./test/integration_tests/data/basicJob.yaml")
35+
command = [
36+
"hyperpod",
37+
"start-job",
38+
"--config-file",
39+
config_path,
40+
]
41+
42+
result = self._execute_test_command(command)
43+
job_name = self._get_job_name_from_run_output(result.stdout)
44+
45+
# Wait for job to complete creation
46+
time.sleep(240)
47+
assert result.returncode == 0
48+
logger.info(result.stdout)
49+
return job_name
50+
2851

2952
@pytest.fixture(scope="class", autouse=True)
3053
def basic_test(self):
@@ -44,13 +67,20 @@ def _execute_test_command(self, command):
4467
except subprocess.CalledProcessError as e:
4568
raise RuntimeError(f"Failed to execute command: {command} Exception {e}")
4669

70+
def _get_job_name_from_run_output(self, output):
71+
for line in output.splitlines():
72+
if line.startswith('NAME:'):
73+
job_name = line.split('NAME:')[1].strip()
74+
return job_name
75+
return None
76+
4777
@pytest.mark.order(1)
4878
def test_hyperpod_connect_cluster(self):
4979
command = [
5080
"hyperpod",
5181
"connect-cluster",
5282
"--cluster-name",
53-
super().hyperpod_cli_cluster_name,
83+
self.hyperpod_cli_cluster_name,
5484
"--region",
5585
"us-west-2",
5686
"--namespace",
@@ -62,20 +92,8 @@ def test_hyperpod_connect_cluster(self):
6292
logger.info(result.stdout)
6393

6494
@pytest.mark.order(2)
65-
def test_start_job(self):
66-
config_path = os.path.expanduser("./test/integration_tests/data/basicJob.yaml")
67-
command = [
68-
"hyperpod",
69-
"start-job",
70-
"--config-file",
71-
config_path,
72-
]
73-
74-
result = self._execute_test_command(command)
75-
# wait for job to complete creation
76-
time.sleep(240)
77-
assert result.returncode == 0
78-
logger.info(result.stdout)
95+
def test_start_job(self, hyperpod_cli_basic_job_name):
96+
assert hyperpod_cli_basic_job_name is not None
7997

8098
# @pytest.mark.order(2)
8199
# def test_start_job_with_quota(self):
@@ -94,17 +112,17 @@ def test_start_job(self):
94112
# logger.info(result.stdout)
95113

96114
@pytest.mark.order(3)
97-
def test_get_job(self):
115+
def test_get_job(self, hyperpod_cli_basic_job_name):
98116
command = [
99117
"hyperpod",
100118
"get-job",
101119
"--job-name",
102-
self.hyperpod_cli_job_name,
120+
hyperpod_cli_basic_job_name,
103121
]
104122

105123
result = self._execute_test_command(command)
106124
assert result.returncode == 0
107-
assert (self.hyperpod_cli_job_name) in str(result.stdout)
125+
assert (hyperpod_cli_basic_job_name) in str(result.stdout)
108126
logger.info(result.stdout)
109127

110128
# @pytest.mark.order(3)
@@ -122,50 +140,50 @@ def test_get_job(self):
122140
# logger.info(result.stdout)
123141

124142
@pytest.mark.order(4)
125-
def test_list_jobs(self):
143+
def test_list_jobs(self, hyperpod_cli_basic_job_name):
126144
command = ["hyperpod", "list-jobs"]
127145

128146
result = self._execute_test_command(command)
129147
assert result.returncode == 0
130-
assert (self.hyperpod_cli_job_name) in str(result.stdout)
148+
assert (hyperpod_cli_basic_job_name) in str(result.stdout)
131149
logger.info(result.stdout)
132150

133151
@pytest.mark.order(5)
134-
def test_list_pods(self):
152+
def test_list_pods(self, hyperpod_cli_basic_job_name):
135153
command = [
136154
"hyperpod",
137155
"list-pods",
138156
"--job-name",
139-
self.hyperpod_cli_job_name,
157+
hyperpod_cli_basic_job_name,
140158
]
141159

142160
result = self._execute_test_command(command)
143161
assert result.returncode == 0
144-
assert (self.hyperpod_cli_job_name) in str(result.stdout)
162+
assert (hyperpod_cli_basic_job_name) in str(result.stdout)
145163
logger.info(result.stdout)
146164

147165
@pytest.mark.order(6)
148-
def test_get_logs(self):
166+
def test_get_logs(self, hyperpod_cli_basic_job_name):
149167
command = [
150168
"hyperpod",
151169
"get-log",
152170
"--job-name",
153-
self.hyperpod_cli_job_name,
171+
hyperpod_cli_basic_job_name,
154172
"--pod",
155-
self.hyperpod_cli_job_name+"-worker-0",
173+
hyperpod_cli_basic_job_name +"-worker-0",
156174
]
157175

158176
result = self._execute_test_command(command)
159177
assert result.returncode == 0
160178
logger.info(result.stdout)
161179

162180
@pytest.mark.order(7)
163-
def test_cancel_job(self):
181+
def test_cancel_job(self, hyperpod_cli_basic_job_name):
164182
command = [
165183
"hyperpod",
166184
"cancel-job",
167185
"--job-name",
168-
self.hyperpod_cli_job_name,
186+
hyperpod_cli_basic_job_name,
169187
]
170188

171189
result = self._execute_test_command(command)

0 commit comments

Comments
 (0)