Skip to content

Commit e955685

Browse files
fix:show the error message when job is deleted in bohrium (#304)
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent 99e2611 commit e955685

File tree

3 files changed

+29
-19
lines changed

3 files changed

+29
-19
lines changed

dpdispatcher/dp_cloud_server.py

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,19 @@ def do_submit(self, job):
120120
job.job_state = JobStatus.waiting
121121
return job_id
122122

123+
def _get_job_detail(self, job_id, group_id):
124+
check_return = self.api.get_job_detail(job_id)
125+
assert check_return is not None, (
126+
f"Failed to retrieve tasks information. To resubmit this job, please "
127+
f"try again, if this problem still exists please delete the submission "
128+
f"file and try again.\nYou can check submission.submission_hash in the "
129+
f'previous log or type `grep -rl "{job_id}:job_group_id:{group_id}" '
130+
f"~/.dpdispatcher/dp_cloud_server/` to find corresponding file. "
131+
f"You can try with command:\n "
132+
f'rm $(grep -rl "{job_id}:job_group_id:{group_id}" ~/.dpdispatcher/dp_cloud_server/)'
133+
)
134+
return check_return
135+
123136
def check_status(self, job):
124137
if job.job_id == "":
125138
return JobStatus.unsubmitted
@@ -138,26 +151,15 @@ def check_status(self, job):
138151
dlog.debug(
139152
f"debug: check_status; job.job_id:{job_id}; job.job_hash:{job.job_hash}"
140153
)
141-
check_return = None
142-
# print("api",self.api_version,self.input_data.get('job_group_id'),job.job_id)
143-
check_return = self.api.get_tasks(job_id, group_id)
144-
assert check_return is not None, (
145-
f"Failed to retrieve tasks information. To resubmit this job, please "
146-
f"try again, if this problem still exists please delete the submission "
147-
f"file and try again.\nYou can check submission.submission_hash in the "
148-
f'previous log or type `grep -rl "{job_id}:job_group_id:{group_id}" '
149-
f"~/.dpdispatcher/dp_cloud_server/` to find corresponding file. "
150-
f"You can try with command:\n "
151-
f'rm $(grep -rl "{job_id}:job_group_id:{group_id}" ~/.dpdispatcher/dp_cloud_server/)'
152-
)
154+
check_return = self._get_job_detail(job_id, group_id)
153155
try:
154156
dp_job_status = check_return["status"]
155157
except IndexError as e:
156158
dlog.error(
157159
f"cannot find job information in bohrium for job {job.job_id}. check_return:{check_return}; retry one more time after 60 seconds"
158160
)
159161
time.sleep(60)
160-
retry_return = self.api.get_tasks(job_id, group_id)
162+
retry_return = self._get_job_detail(job_id, group_id)
161163
try:
162164
dp_job_status = retry_return["status"]
163165
except IndexError as e:

dpdispatcher/dpcloudserver/client.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -242,10 +242,18 @@ def _is_none(_in):
242242
s = s[0].lower() + s[1:]
243243
return regex.sub(lambda m: m.group(0)[-1].upper(), s)
244244

245-
def get_tasks(self, job_id, group_id, page=1, per_page=10):
246-
ret = self.get(
247-
f"brm/v1/job/{job_id}",
248-
)
245+
def get_job_detail(self, job_id):
246+
try:
247+
ret = self.get(
248+
f"brm/v1/job/{job_id}",
249+
)
250+
except RequestInfoException as e:
251+
if e.args[0] != 200:
252+
raise e
253+
254+
dlog.error(f"get job detail error {e}", stack_info=ENABLE_STACK)
255+
return None
256+
249257
return ret
250258

251259
def get_log(self, job_id):

dpdispatcher/dpcloudserver/temp_test.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,14 +62,14 @@ def test_commit_job(self):
6262
job_id = api.job_create(
6363
self.test_data["job_type"], self.test_data["job_resources"], self.test_data
6464
)
65-
tasks = api.get_tasks(job_id)
65+
tasks = api.get_job_detail(job_id)
6666
print(tasks)
6767

6868
def test_get_tasks(self):
6969
print("----------", sys._getframe().f_code.co_name)
7070
jobs = api.get_jobs()
7171
for j in jobs:
72-
tasks = api.get_tasks(j["id"])
72+
tasks = api.get_job_detail(j["id"])
7373
print(tasks)
7474

7575
# def test_download(self):

0 commit comments

Comments
 (0)