Skip to content

Commit b5afceb

Browse files
committed
feat: replace DIRACJobID with jobReference in AREX/HTCondorCEs
1 parent 557eb7b commit b5afceb

File tree

2 files changed

+69
-70
lines changed

2 files changed

+69
-70
lines changed

src/DIRAC/Resources/Computing/AREXComputingElement.py

Lines changed: 45 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
Port added to the CE host name to interact with AREX services.
1616
1717
ProxyTimeLeftBeforeRenewal:
18-
Time in seconds before the AREXCE renews proxy of submitted pilots.
18+
Time in seconds before the AREXCE renews proxy of submitted payloads.
1919
2020
RESTVersion:
2121
Version of the REST interface to use.
@@ -105,34 +105,33 @@ def setToken(self, token, valid):
105105
super().setToken(token, valid)
106106
self.headers["Authorization"] = "Bearer " + self.token["access_token"]
107107

108-
def _arcToDiracID(self, arcJobID):
109-
"""Convert an ARC jobID into a DIRAC jobID.
108+
def _arcIDToJobReference(self, arcJobID):
109+
"""Convert an ARC jobID into a job reference.
110110
Example: 1234 becomes https://<ce>:<port>/arex/1234
111111
112112
:param str: ARC jobID
113-
:return: DIRAC jobID
113+
:return: job reference, defined as an ARC jobID with additional details
114114
"""
115115
# Add CE and protocol information to arc Job ID
116116
if "://" in arcJobID:
117117
self.log.warn("Identifier already in ARC format", arcJobID)
118118
return arcJobID
119119

120-
diracJobID = "https://" + self.ceHost + ":" + self.port + "/arex/" + arcJobID
121-
return diracJobID
120+
return f"https://{self.ceHost}:{self.port}/arex/{arcJobID}"
122121

123-
def _DiracToArcID(self, diracJobID):
124-
"""Convert a DIRAC jobID into an ARC jobID.
122+
def _jobReferenceToArcID(self, jobReference):
123+
"""Convert a job reference into an ARC jobID.
125124
Example: https://<ce>:<port>/arex/1234 becomes 1234
126125
127-
:param str: DIRAC jobID
126+
:param str: job reference, defined as an ARC jobID with additional details
128127
:return: ARC jobID
129128
"""
130129
# Remove CE and protocol information from arc Job ID
131-
if "://" in diracJobID:
132-
arcJobID = diracJobID.split("arex/")[-1]
130+
if "://" in jobReference:
131+
arcJobID = jobReference.split("arex/")[-1]
133132
return arcJobID
134-
self.log.warn("Identifier already in REST format?", diracJobID)
135-
return diracJobID
133+
self.log.warn("Identifier already in REST format?", jobReference)
134+
return jobReference
136135

137136
#############################################################################
138137

@@ -483,12 +482,12 @@ def submitJob(self, executableFile, proxy, numberOfJobs=1, inputs=None, outputs=
483482
if not result["OK"]:
484483
break
485484

486-
jobID = self._arcToDiracID(arcJobID)
487-
batchIDList.append(jobID)
488-
stampDict[jobID] = diracStamp
485+
jobReference = self._arcIDToJobReference(arcJobID)
486+
batchIDList.append(jobReference)
487+
stampDict[jobReference] = diracStamp
489488
self.log.debug(
490489
"Successfully submitted job",
491-
f"{jobID} to CE {self.ceHost}",
490+
f"{jobReference} to CE {self.ceHost}",
492491
)
493492

494493
if batchIDList:
@@ -503,16 +502,16 @@ def submitJob(self, executableFile, proxy, numberOfJobs=1, inputs=None, outputs=
503502
def killJob(self, jobIDList):
504503
"""Kill the specified jobs
505504
506-
:param list jobIDList: list of DIRAC Job IDs
505+
:param list jobIDList: list of Job references
507506
"""
508507
if not isinstance(jobIDList, list):
509508
jobIDList = [jobIDList]
510509
self.log.debug("Killing jobs", ",".join(jobIDList))
511510

512-
# Convert DIRAC jobs to ARC jobs
513-
# DIRAC Jobs might be stored with a DIRAC stamp (":::XXXXX") that should be removed
514-
jList = [self._DiracToArcID(job.split(":::")[0]) for job in jobIDList]
515-
return self._killJob(jList)
511+
# Convert job references to ARC jobs
512+
# Job references might be stored with a DIRAC stamp (":::XXXXX") that should be removed
513+
arcJobList = [self._jobReferenceToArcID(job.split(":::")[0]) for job in jobIDList]
514+
return self._killJob(arcJobList)
516515

517516
def _killJob(self, arcJobList):
518517
"""Kill the specified jobs
@@ -545,16 +544,16 @@ def _killJob(self, arcJobList):
545544
def cleanJob(self, jobIDList):
546545
"""Clean files related to the specified jobs
547546
548-
:param list jobIDList: list of DIRAC Job IDs
547+
:param list jobIDList: list of job references
549548
"""
550549
if not isinstance(jobIDList, list):
551550
jobIDList = [jobIDList]
552551
self.log.debug("Cleaning jobs", ",".join(jobIDList))
553552

554-
# Convert DIRAC jobs to ARC jobs
555-
# DIRAC Jobs might be stored with a DIRAC stamp (":::XXXXX") that should be removed
556-
jList = [self._DiracToArcID(job.split(":::")[0]) for job in jobIDList]
557-
return self._cleanJob(jList)
553+
# Convert job references to ARC jobs
554+
# Job references might be stored with a DIRAC stamp (":::XXXXX") that should be removed
555+
arcJobList = [self._jobReferenceToArcID(job.split(":::")[0]) for job in jobIDList]
556+
return self._cleanJob(arcJobList)
558557

559558
def _cleanJob(self, arcJobList):
560559
"""Clean files related to the specified jobs
@@ -710,7 +709,7 @@ def _renewDelegation(self, delegationID):
710709
def getJobStatus(self, jobIDList):
711710
"""Get the status information for the given list of jobs.
712711
713-
:param list jobIDList: list of DIRAC Job ID, followed by the DIRAC stamp.
712+
:param list jobIDList: list of job references, followed by the DIRAC stamp.
714713
"""
715714
result = self._checkSession()
716715
if not result["OK"]:
@@ -721,9 +720,9 @@ def getJobStatus(self, jobIDList):
721720
jobIDList = [jobIDList]
722721

723722
self.log.debug("Getting status of jobs:", jobIDList)
724-
# Convert DIRAC jobs to ARC jobs and encapsulate them in a dictionary for the REST query
725-
# DIRAC Jobs might be stored with a DIRAC stamp (":::XXXXX") that should be removed
726-
arcJobsJson = {"job": [{"id": self._DiracToArcID(job.split(":::")[0])} for job in jobIDList]}
723+
# Convert job references to ARC jobs and encapsulate them in a dictionary for the REST query
724+
# Job references might be stored with a DIRAC stamp (":::XXXXX") that should be removed
725+
arcJobsJson = {"job": [{"id": self._jobReferenceToArcID(job.split(":::")[0])} for job in jobIDList]}
727726

728727
# Prepare the command
729728
params = {"action": "status"}
@@ -746,16 +745,16 @@ def getJobStatus(self, jobIDList):
746745
arcJobsInfo = [arcJobsInfo]
747746

748747
for arcJob in arcJobsInfo:
749-
jobID = self._arcToDiracID(arcJob["id"])
748+
jobReference = self._arcIDToJobReference(arcJob["id"])
750749
# ARC REST interface returns hyperbole
751750
arcState = arcJob["state"].capitalize()
752-
self.log.debug("REST ARC status", f"for job {jobID} is {arcState}")
753-
resultDict[jobID] = self.mapStates[arcState]
751+
self.log.debug("REST ARC status", f"for job {jobReference} is {arcState}")
752+
resultDict[jobReference] = self.mapStates[arcState]
754753

755754
# Cancel held jobs so they don't sit in the queue forever
756755
if arcState == "Hold":
757756
jobsToCancel.append(arcJob["id"])
758-
self.log.debug(f"Killing held job {jobID}")
757+
self.log.debug(f"Killing held job {jobReference}")
759758

760759
# Renew delegations to renew the proxies of the jobs
761760
result = self._getDelegationIDs()
@@ -782,7 +781,7 @@ def getJobStatus(self, jobIDList):
782781
def getJobLog(self, jobID):
783782
"""Get job logging info
784783
785-
:param str jobID: DIRAC JobID followed by the DIRAC stamp.
784+
:param str jobID: Job reference followed by the DIRAC stamp.
786785
:return: string representing the logging info of a given jobID
787786
"""
788787
result = self._checkSession()
@@ -791,7 +790,7 @@ def getJobLog(self, jobID):
791790
return result
792791

793792
# Prepare the command: Get output files
794-
arcJob = self._DiracToArcID(jobID.split(":::")[0])
793+
arcJob = self._jobReferenceToArcID(jobID.split(":::")[0])
795794
query = self._urlJoin(os.path.join("jobs", arcJob, "diagnose", "errors"))
796795

797796
# Submit the GET request to retrieve outputs
@@ -810,7 +809,7 @@ def getJobLog(self, jobID):
810809
def _getListOfAvailableOutputs(self, jobID, arcJobID):
811810
"""Request a list of outputs available for a given jobID.
812811
813-
:param str jobID: DIRAC job ID without the DIRAC stamp
812+
:param str jobID: job reference without the DIRAC stamp
814813
:param str arcJobID: ARC job ID
815814
:return list: names of the available outputs
816815
"""
@@ -830,11 +829,11 @@ def _getListOfAvailableOutputs(self, jobID, arcJobID):
830829
return S_OK(response.json()["file"])
831830

832831
def getJobOutput(self, jobID, workingDirectory=None):
833-
"""Get the outputs of the given DIRAC job ID.
832+
"""Get the outputs of the given job reference.
834833
835834
Outputs and stored in workingDirectory if present, else in a new directory named <ARC JobID>.
836835
837-
:param str jobID: DIRAC JobID followed by the DIRAC stamp.
836+
:param str jobID: job reference followed by the DIRAC stamp.
838837
:param str workingDirectory: name of the directory containing the retrieved outputs.
839838
:return: content of stdout and stderr
840839
"""
@@ -848,10 +847,10 @@ def getJobOutput(self, jobID, workingDirectory=None):
848847
jobRef, stamp = jobID.split(":::")
849848
else:
850849
return S_ERROR(f"DIRAC stamp not defined for {jobID}")
851-
job = self._DiracToArcID(jobRef)
850+
arcJob = self._jobReferenceToArcID(jobRef)
852851

853852
# Get the list of available outputs
854-
result = self._getListOfAvailableOutputs(jobRef, job)
853+
result = self._getListOfAvailableOutputs(jobRef, arcJob)
855854
if not result["OK"]:
856855
return result
857856
remoteOutputs = result["Value"]
@@ -860,21 +859,21 @@ def getJobOutput(self, jobID, workingDirectory=None):
860859
if not workingDirectory:
861860
if "WorkingDirectory" in self.ceParameters:
862861
# We assume that workingDirectory exists
863-
workingDirectory = os.path.join(self.ceParameters["WorkingDirectory"], job)
862+
workingDirectory = os.path.join(self.ceParameters["WorkingDirectory"], arcJob)
864863
else:
865-
workingDirectory = job
864+
workingDirectory = arcJob
866865
os.mkdir(workingDirectory)
867866

868867
stdout = None
869868
stderr = None
870869
for remoteOutput in remoteOutputs:
871870
# Prepare the command
872-
query = self._urlJoin(os.path.join("jobs", job, "session", remoteOutput))
871+
query = self._urlJoin(os.path.join("jobs", arcJob, "session", remoteOutput))
873872

874873
# Submit the GET request to retrieve outputs
875874
result = self._request("get", query, stream=True)
876875
if not result["OK"]:
877-
self.log.error("Error downloading", f"{remoteOutput} for {job}: {result['Message']}")
876+
self.log.error("Error downloading", f"{remoteOutput} for {arcJob}: {result['Message']}")
878877
return S_ERROR(f"Error downloading {remoteOutput} for {jobID}")
879878
response = result["Value"]
880879

src/DIRAC/Resources/Computing/HTCondorCEComputingElement.py

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -109,21 +109,21 @@ def __init__(self, ceUniqueID):
109109

110110
#############################################################################
111111

112-
def _DiracToCondorID(self, diracJobID):
113-
"""Convert a DIRAC jobID into a Condor jobID.
112+
def _jobReferenceToCondorID(self, jobReference):
113+
"""Convert a job reference into a Condor jobID.
114114
Example: htcondorce://<ce>/1234.0 becomes 1234.0
115115
116-
:param str: DIRAC jobID
116+
:param str: job reference, a condor jobID with additional details
117117
:return: Condor jobID
118118
"""
119119
# Remove CE and protocol information from arc Job ID
120-
if "://" in diracJobID:
121-
condorJobID = diracJobID.split("/")[-1]
120+
if "://" in jobReference:
121+
condorJobID = jobReference.split("/")[-1]
122122
return condorJobID
123-
return diracJobID
123+
return jobReference
124124

125-
def _condorToDiracID(self, condorJobIDs):
126-
"""Get the references from the condor_submit output.
125+
def _condorIDToJobReference(self, condorJobIDs):
126+
"""Get the job references from the condor job IDs.
127127
Cluster ids look like " 107.0 - 107.0 " or " 107.0 - 107.4 "
128128
129129
:param str condorJobIDs: the output of condor_submit
@@ -318,16 +318,16 @@ def submitJob(self, executableFile, proxy, numberOfJobs=1):
318318
return result
319319

320320
stdout = result["Value"]
321-
pilotJobReferences = self._condorToDiracID(stdout)
322-
if not pilotJobReferences["OK"]:
323-
return pilotJobReferences
324-
pilotJobReferences = pilotJobReferences["Value"]
321+
jobReferences = self._condorIDToJobReference(stdout)
322+
if not jobReferences["OK"]:
323+
return jobReferences
324+
jobReferences = jobReferences["Value"]
325325

326326
self.log.verbose("JobStamps:", jobStamps)
327-
self.log.verbose("pilotRefs:", pilotJobReferences)
327+
self.log.verbose("pilotRefs:", jobReferences)
328328

329-
result = S_OK(pilotJobReferences)
330-
result["PilotStampDict"] = dict(zip(pilotJobReferences, jobStamps))
329+
result = S_OK(jobReferences)
330+
result["PilotStampDict"] = dict(zip(jobReferences, jobStamps))
331331
if self.useLocalSchedd:
332332
# Executable is transferred afterward
333333
# Inform the caller that Condor cannot delete it before the end of the execution
@@ -346,15 +346,15 @@ def killJob(self, jobIDList):
346346
self.log.verbose("KillJob jobIDList", jobIDList)
347347
self.tokenFile = None
348348

349-
for diracJobID in jobIDList:
350-
condorJobID = self._DiracToCondorID(diracJobID.split(":::")[0])
351-
self.log.verbose("Killing pilot", diracJobID)
349+
for jobReference in jobIDList:
350+
condorJobID = self._jobReferenceToCondorID(jobReference.split(":::")[0])
351+
self.log.verbose("Killing pilot", jobReference)
352352
cmd = ["condor_rm"]
353353
cmd.extend(self.remoteScheddOptions.strip().split(" "))
354354
cmd.append(condorJobID)
355355
result = self._executeCondorCommand(cmd, keepTokenFile=True)
356356
if not result["OK"]:
357-
self.log.error("Failed to kill pilot", f"{diracJobID}: {result['Message']}")
357+
self.log.error("Failed to kill pilot", f"{jobReference}: {result['Message']}")
358358
return result
359359

360360
self.tokenFile = None
@@ -403,9 +403,9 @@ def getJobStatus(self, jobIDList):
403403
resultDict = {}
404404
condorIDs = {}
405405
# Get all condorIDs so we can just call condor_q and condor_history once
406-
for diracJobID in jobIDList:
407-
diracJobID = diracJobID.split(":::")[0]
408-
condorIDs[diracJobID] = self._DiracToCondorID(diracJobID)
406+
for jobReference in jobIDList:
407+
jobReference = jobReference.split(":::")[0]
408+
condorIDs[jobReference] = self._jobReferenceToCondorID(jobReference)
409409

410410
self.tokenFile = None
411411

@@ -493,7 +493,7 @@ def __getJobOutput(self, jobID, outTypes):
493493
"""
494494
# Extract stamp from the Job ID
495495
if ":::" in jobID:
496-
diracJobID, stamp = jobID.split(":::")
496+
jobReference, stamp = jobID.split(":::")
497497
else:
498498
return S_ERROR(f"DIRAC stamp not defined for {jobID}")
499499

@@ -503,7 +503,7 @@ def __getJobOutput(self, jobID, outTypes):
503503
return S_ERROR(f"Stamp is not long enough: {stamp}")
504504
pathToResult = os.path.join(self.ceName, stamp[0], stamp[1:3])
505505

506-
condorJobID = self._DiracToCondorID(diracJobID)
506+
condorJobID = self._jobReferenceToCondorID(jobReference)
507507
iwd = os.path.join(self.workingDirectory, pathToResult)
508508

509509
try:

0 commit comments

Comments
 (0)