@@ -120,6 +120,19 @@ def do_submit(self, job):
120
120
job .job_state = JobStatus .waiting
121
121
return job_id
122
122
123
+ def _get_job_detail (self , job_id , group_id ):
124
+ check_return = self .api .get_job_detail (job_id )
125
+ assert check_return is not None , (
126
+ f"Failed to retrieve tasks information. To resubmit this job, please "
127
+ f"try again, if this problem still exists please delete the submission "
128
+ f"file and try again.\n You can check submission.submission_hash in the "
129
+ f'previous log or type `grep -rl "{ job_id } :job_group_id:{ group_id } " '
130
+ f"~/.dpdispatcher/dp_cloud_server/` to find corresponding file. "
131
+ f"You can try with command:\n "
132
+ f'rm $(grep -rl "{ job_id } :job_group_id:{ group_id } " ~/.dpdispatcher/dp_cloud_server/)'
133
+ )
134
+ return check_return
135
+
123
136
def check_status (self , job ):
124
137
if job .job_id == "" :
125
138
return JobStatus .unsubmitted
@@ -138,26 +151,15 @@ def check_status(self, job):
138
151
dlog .debug (
139
152
f"debug: check_status; job.job_id:{ job_id } ; job.job_hash:{ job .job_hash } "
140
153
)
141
- check_return = None
142
- # print("api",self.api_version,self.input_data.get('job_group_id'),job.job_id)
143
- check_return = self .api .get_tasks (job_id , group_id )
144
- assert check_return is not None , (
145
- f"Failed to retrieve tasks information. To resubmit this job, please "
146
- f"try again, if this problem still exists please delete the submission "
147
- f"file and try again.\n You can check submission.submission_hash in the "
148
- f'previous log or type `grep -rl "{ job_id } :job_group_id:{ group_id } " '
149
- f"~/.dpdispatcher/dp_cloud_server/` to find corresponding file. "
150
- f"You can try with command:\n "
151
- f'rm $(grep -rl "{ job_id } :job_group_id:{ group_id } " ~/.dpdispatcher/dp_cloud_server/)'
152
- )
154
+ check_return = self ._get_job_detail (job_id , group_id )
153
155
try :
154
156
dp_job_status = check_return ["status" ]
155
157
except IndexError as e :
156
158
dlog .error (
157
159
f"cannot find job information in bohrium for job { job .job_id } . check_return:{ check_return } ; retry one more time after 60 seconds"
158
160
)
159
161
time .sleep (60 )
160
- retry_return = self .api . get_tasks (job_id , group_id )
162
+ retry_return = self ._get_job_detail (job_id , group_id )
161
163
try :
162
164
dp_job_status = retry_return ["status" ]
163
165
except IndexError as e :
0 commit comments