Skip to content

Commit 54db53b

Browse files
authored
Merge pull request #6566 from fstagni/11_removeDN
[8.1] WMS: Working with usernames instead of OwnerDNs
2 parents e68f939 + a71b16d commit 54db53b

27 files changed

+384
-526
lines changed

docs/source/DeveloperGuide/CodeTesting/index.rst

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -309,23 +309,33 @@ Running the above might take a while. Supposing you are interested in running on
309309
./integration_tests.py prepare-environment [FLAGS]
310310
./integration_tests.py install-server
311311
312-
which (in some minutes) will give you a fully dockerized server setup (`docker container ls` will list the created container, and you can see what's going on inside with the standard `docker exec -it server /bin/bash`). Now, suppose that you want to run `WorkloadManagementSystem/Test_JobDB.py`.
313-
The first thing to do is that you should first login in the docker container, by doing:
312+
which (in some minutes) will give you a fully dockerized server setup
313+
(`docker container ls` will list the created container, and you can see what's going on inside with the standard `docker exec -it server /bin/bash`.
314+
Now, suppose that you want to run `WorkloadManagementSystem/Test_JobDB.py`,
315+
the first thing to do is that you should first login in the docker container, by doing:
314316

315317
.. code-block:: bash
316318
317319
./integration_tests.py exec-server
318320
319-
The installations automatically pick up external changes to the DIRAC code and tests)
321+
(The docker installation automatically picks up external changes to the DIRAC code and tests)
320322

321323
Now you can run the test with:
322324

323325
.. code-block:: bash
324326
325-
pytest LocalRepo/ALTERNATIVE_MODULES/DIRAC/tests/Integration/WorkloadManagementSystem/Test_JobDB.py
327+
pytest --no-check-dirac-environment LocalRepo/ALTERNATIVE_MODULES/DIRAC/tests/Integration/WorkloadManagementSystem/Test_JobDB.py
326328
327329
You can find the logs of the services in `/home/dirac/ServerInstallDIR/diracos/runit/`
328330

331+
You can also login in client and mysql with:
332+
333+
.. code-block:: bash
334+
335+
./integration_tests.py exec-client
336+
./integration_tests.py exec-mysql
337+
338+
329339
330340
Validation and System tests
331341
---------------------------

src/DIRAC/Interfaces/API/Job.py

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -702,18 +702,6 @@ def setOwnerGroup(self, ownerGroup):
702702
self._addParameter(self.workflow, "OwnerGroup", "JDL", ownerGroup, "User specified owner group.")
703703
return S_OK()
704704

705-
#############################################################################
706-
def setOwnerDN(self, ownerDN):
707-
"""Developer function.
708-
709-
Allows to force expected owner DN of proxy.
710-
"""
711-
if not isinstance(ownerDN, str):
712-
return self._reportError("Expected string for job owner DN", **{"ownerGroup": ownerDN})
713-
714-
self._addParameter(self.workflow, "OwnerDN", "JDL", ownerDN, "User specified owner DN.")
715-
return S_OK()
716-
717705
#############################################################################
718706
def setType(self, jobType):
719707
"""Developer function.

src/DIRAC/Interfaces/scripts/dirac_wms_job_attributes.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@
2828
'MinorStatus': 'Execution Complete',
2929
'OSandboxReadyFlag': 'False',
3030
'Owner': 'vhamar',
31-
'OwnerDN': '/O=GRID-FR/C=FR/O=CNRS/OU=CPPM/CN=Vanessa Hamar',
3231
'OwnerGroup': 'eela_user',
3332
'RescheduleCounter': '0',
3433
'RescheduleTime': 'None',
@@ -42,7 +41,8 @@
4241
'UserPriority': '1',
4342
'VerifiedFlag': 'True'}
4443
"""
45-
import DIRAC
44+
from DIRAC import exit as dExit
45+
from DIRAC import gLogger
4646
from DIRAC.Core.Base.Script import Script
4747

4848

@@ -65,9 +65,9 @@ def main():
6565
exitCode = 2
6666

6767
for error in errorList:
68-
print("ERROR %s: %s" % error)
68+
gLogger.error(f"{error}")
6969

70-
DIRAC.exit(exitCode)
70+
dExit(exitCode)
7171

7272

7373
if __name__ == "__main__":

src/DIRAC/Interfaces/scripts/dirac_wms_job_get_jdl.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,12 @@
1919
'Executable': '/bin/ls',
2020
'JobID': '1',
2121
'JobName': 'DIRAC_vhamar_602138',
22-
'JobRequirements': '[OwnerDN = /O=GRID-FR/C=FR/O=CNRS/OU=CPPM/CN=Vanessa Hamar;
23-
OwnerGroup = eela_user;
22+
'JobRequirements': '[OwnerGroup = eela_user;
2423
Setup = EELA-Production;
2524
UserPriority = 1;
2625
CPUTime = 0 ]',
2726
'OutputSandbox': ['std.out', 'std.err'],
2827
'Owner': 'vhamar',
29-
'OwnerDN': '/O=GRID-FR/C=FR/O=CNRS/OU=CPPM/CN=Vanessa Hamar',
3028
'OwnerGroup': 'eela_user',
3129
'Priority': '1'}
3230
"""

src/DIRAC/TransformationSystem/Client/WorkflowTasks.py

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -109,18 +109,17 @@ def prepareTransformationTasks(self, transBody, taskDict, owner="", ownerGroup="
109109
ownerDN = res["Value"][0]
110110

111111
if bulkSubmissionFlag:
112-
return self.__prepareTasksBulk(transBody, taskDict, owner, ownerGroup, ownerDN)
112+
return self.__prepareTasksBulk(transBody, taskDict, owner, ownerGroup)
113113
# not a bulk submission
114-
return self.__prepareTasks(transBody, taskDict, owner, ownerGroup, ownerDN)
114+
return self.__prepareTasks(transBody, taskDict, owner, ownerGroup)
115115

116-
def __prepareTasksBulk(self, transBody, taskDict, owner, ownerGroup, ownerDN):
116+
def __prepareTasksBulk(self, transBody, taskDict, owner, ownerGroup):
117117
"""Prepare transformation tasks with a single job object for bulk submission
118118
119119
:param str transBody: transformation job template
120120
:param dict taskDict: dictionary of per task parameters
121121
:param str owner: owner of the transformation
122122
:param str ownerGroup: group of the owner of the transformation
123-
:param str ownerDN: DN of the owner of the transformation
124123
125124
:return: S_OK/S_ERROR with updated taskDict
126125
"""
@@ -137,7 +136,6 @@ def __prepareTasksBulk(self, transBody, taskDict, owner, ownerGroup, ownerDN):
137136
self._logVerbose(f"Setting job owner:group to {owner}:{ownerGroup}", transID=transID, method=method)
138137
oJob.setOwner(owner)
139138
oJob.setOwnerGroup(ownerGroup)
140-
oJob.setOwnerDN(ownerDN)
141139

142140
try:
143141
site = oJob.workflow.findParameter("Site").getValue()
@@ -253,14 +251,13 @@ def __prepareTasksBulk(self, transBody, taskDict, owner, ownerGroup, ownerDN):
253251
taskDict["BulkJobObject"] = oJob
254252
return S_OK(taskDict)
255253

256-
def __prepareTasks(self, transBody, taskDict, owner, ownerGroup, ownerDN):
254+
def __prepareTasks(self, transBody, taskDict, owner, ownerGroup):
257255
"""Prepare transformation tasks with a job object per task
258256
259257
:param str transBody: transformation job template
260258
:param dict taskDict: dictionary of per task parameters
261259
:param owner: owner of the transformation
262260
:param str ownerGroup: group of the owner of the transformation
263-
:param str ownerDN: DN of the owner of the transformation
264261
265262
:return: S_OK/S_ERROR with updated taskDict
266263
"""
@@ -275,7 +272,6 @@ def __prepareTasks(self, transBody, taskDict, owner, ownerGroup, ownerDN):
275272
oJobTemplate = self.jobClass(transBody)
276273
oJobTemplate.setOwner(owner)
277274
oJobTemplate.setOwnerGroup(ownerGroup)
278-
oJobTemplate.setOwnerDN(ownerDN)
279275

280276
try:
281277
site = oJobTemplate.workflow.findParameter("Site").getValue()

src/DIRAC/WorkloadManagementSystem/Agent/JobAgent.py

Lines changed: 25 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from diraccfg import CFG
1313

1414
from DIRAC import S_OK, S_ERROR, gConfig, rootPath, siteName
15+
from DIRAC.ConfigurationSystem.Client.Helpers.Registry import getDNForUsername
1516
from DIRAC.Core.Utilities.ClassAd.ClassAdLight import ClassAd
1617
from DIRAC.Core.Base.AgentModule import AgentModule
1718
from DIRAC.Core.Security.ProxyInfo import getProxyInfo
@@ -202,7 +203,7 @@ def execute(self):
202203
self.matchFailedCount = 0
203204

204205
# Check matcher information returned
205-
matcherParams = ["JDL", "DN", "Group"]
206+
matcherParams = ["JDL", "Owner", "Group"]
206207
matcherInfo = jobRequest["Value"]
207208
jobID = matcherInfo["JobID"]
208209
self.jobReport.setJob(jobID)
@@ -217,7 +218,7 @@ def execute(self):
217218

218219
jobJDL = matcherInfo["JDL"]
219220
jobGroup = matcherInfo["Group"]
220-
ownerDN = matcherInfo["DN"]
221+
owner = matcherInfo["Owner"]
221222
ceDict = matcherInfo["CEDict"]
222223
matchTime = matcherInfo["matchTime"]
223224

@@ -242,7 +243,7 @@ def execute(self):
242243
jobType = submissionParams["jobType"]
243244

244245
self.log.verbose("Job request successful: \n", jobRequest["Value"])
245-
self.log.info("Received", f"JobID={jobID}, JobType={jobType}, OwnerDN={ownerDN}, JobGroup={jobGroup}")
246+
self.log.info("Received", f"JobID={jobID}, JobType={jobType}, Owner={owner}, JobGroup={jobGroup}")
246247
self.jobCount += 1
247248
self.jobReport.setJobParameter(par_name="MatcherServiceTime", par_value=str(matchTime), sendFlag=False)
248249
if "BOINC_JOB_ID" in os.environ:
@@ -253,6 +254,7 @@ def execute(self):
253254
)
254255

255256
self.jobReport.setJobStatus(minorStatus="Job Received by Agent", sendFlag=False)
257+
ownerDN = getDNForUsername(owner)["Value"]
256258
result_setupProxy = self._setupProxy(ownerDN, jobGroup)
257259
if not result_setupProxy["OK"]:
258260
result = self._rescheduleFailedJob(jobID, result_setupProxy["Message"])
@@ -472,26 +474,26 @@ def _setupProxy(self, ownerDN, ownerGroup):
472474
self.log.error("Failed to setup proxy", proxyResult["Message"])
473475
return S_ERROR(f"Failed to setup proxy: {proxyResult['Message']}")
474476
return S_OK(proxyResult["Value"])
475-
else:
476-
ret = getProxyInfo(disableVOMS=True)
477-
if not ret["OK"]:
478-
self.log.error("Invalid Proxy", ret["Message"])
479-
return S_ERROR("Invalid Proxy")
480-
481-
proxyChain = ret["Value"]["chain"]
482-
if "groupProperties" not in ret["Value"]:
483-
print(ret["Value"])
484-
print(proxyChain.dumpAllToString())
485-
self.log.error("Invalid Proxy", "Group has no properties defined")
486-
return S_ERROR("Proxy has no group properties defined")
487-
488-
groupProps = ret["Value"]["groupProperties"]
489-
if Properties.GENERIC_PILOT in groupProps or Properties.PILOT in groupProps:
490-
proxyResult = self._requestProxyFromProxyManager(ownerDN, ownerGroup)
491-
if not proxyResult["OK"]:
492-
self.log.error("Invalid Proxy", proxyResult["Message"])
493-
return S_ERROR(f"Failed to setup proxy: {proxyResult['Message']}")
494-
proxyChain = proxyResult["Value"]
477+
478+
ret = getProxyInfo(disableVOMS=True)
479+
if not ret["OK"]:
480+
self.log.error("Invalid Proxy", ret["Message"])
481+
return S_ERROR("Invalid Proxy")
482+
483+
proxyChain = ret["Value"]["chain"]
484+
if "groupProperties" not in ret["Value"]:
485+
print(ret["Value"])
486+
print(proxyChain.dumpAllToString())
487+
self.log.error("Invalid Proxy", "Group has no properties defined")
488+
return S_ERROR("Proxy has no group properties defined")
489+
490+
groupProps = ret["Value"]["groupProperties"]
491+
if Properties.GENERIC_PILOT in groupProps or Properties.PILOT in groupProps:
492+
proxyResult = self._requestProxyFromProxyManager(ownerDN, ownerGroup)
493+
if not proxyResult["OK"]:
494+
self.log.error("Invalid Proxy", proxyResult["Message"])
495+
return S_ERROR(f"Failed to setup proxy: {proxyResult['Message']}")
496+
proxyChain = proxyResult["Value"]
495497

496498
return S_OK(proxyChain)
497499

src/DIRAC/WorkloadManagementSystem/Agent/JobCleaningAgent.py

Lines changed: 26 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,11 @@
2525
import datetime
2626
import os
2727

28-
import DIRAC.Core.Utilities.TimeUtilities as TimeUtilities
2928
from DIRAC import S_ERROR, S_OK
3029
from DIRAC.ConfigurationSystem.Client.Helpers.Operations import Operations
3130
from DIRAC.ConfigurationSystem.Client.Helpers.Registry import getDNForUsername
3231
from DIRAC.Core.Base.AgentModule import AgentModule
32+
from DIRAC.Core.Utilities import TimeUtilities
3333
from DIRAC.RequestManagementSystem.Client.File import File
3434
from DIRAC.RequestManagementSystem.Client.Operation import Operation
3535
from DIRAC.RequestManagementSystem.Client.ReqClient import ReqClient
@@ -57,6 +57,7 @@ def __init__(self, *args, **kwargs):
5757
self.prodTypes = []
5858
self.removeStatusDelay = {}
5959
self.removeStatusDelayHB = {}
60+
self.maxHBJobsAtOnce = 0
6061

6162
#############################################################################
6263
def initialize(self):
@@ -80,7 +81,7 @@ def initialize(self):
8081
self.removeStatusDelayHB[JobStatus.DONE] = self.am_getOption("RemoveStatusDelayHB/Done", -1)
8182
self.removeStatusDelayHB[JobStatus.KILLED] = self.am_getOption("RemoveStatusDelayHB/Killed", -1)
8283
self.removeStatusDelayHB[JobStatus.FAILED] = self.am_getOption("RemoveStatusDelayHB/Failed", -1)
83-
self.maxHBJobsAtOnce = self.am_getOption("MaxHBJobsAtOnce", 0)
84+
self.maxHBJobsAtOnce = self.am_getOption("MaxHBJobsAtOnce", self.maxHBJobsAtOnce)
8485

8586
return S_OK()
8687

@@ -93,7 +94,7 @@ def _getAllowedJobTypes(self):
9394
for jobType in result["Value"]:
9495
if jobType not in self.prodTypes:
9596
cleanJobTypes.append(jobType)
96-
self.log.notice(f"JobTypes to clean {cleanJobTypes}")
97+
self.log.notice("JobTypes to clean", cleanJobTypes)
9798
return S_OK(cleanJobTypes)
9899

99100
def execute(self):
@@ -102,7 +103,7 @@ def execute(self):
102103
# First, fully remove jobs in JobStatus.DELETED state
103104
result = self.removeDeletedJobs()
104105
if not result["OK"]:
105-
self.log.error(f"Failed to remove jobs with status {JobStatus.DELETED}")
106+
self.log.error("Failed to remove jobs with status", JobStatus.DELETED)
106107

107108
# Second: set the status to JobStatus.DELETED for certain jobs
108109

@@ -117,8 +118,7 @@ def execute(self):
117118

118119
baseCond = {"JobType": result["Value"]}
119120
# Delete jobs with final status
120-
for status in self.removeStatusDelay:
121-
delay = self.removeStatusDelay[status]
121+
for status, delay in self.removeStatusDelay.items():
122122
if delay < 0:
123123
# Negative delay means don't delete anything...
124124
continue
@@ -185,26 +185,7 @@ def removeDeletedJobs(self):
185185
if not jobList:
186186
return S_OK()
187187

188-
ownerJobsDict = self._getOwnerJobsDict(jobList)
189-
190-
fail = False
191-
for owner, jobsList in ownerJobsDict.items():
192-
ownerDN = owner.split(";")[0]
193-
ownerGroup = owner.split(";")[1]
194-
self.log.verbose("Attempting to remove jobs", f"(n={len(jobsList)}) for {ownerDN} : {ownerGroup}")
195-
wmsClient = WMSClient(useCertificates=True, delegatedDN=ownerDN, delegatedGroup=ownerGroup)
196-
result = wmsClient.removeJob(jobsList)
197-
if not result["OK"]:
198-
self.log.error(
199-
"Could not remove jobs",
200-
f"for {ownerDN} : {ownerGroup} (n={len(jobsList)}) : {result['Message']}",
201-
)
202-
fail = True
203-
204-
if fail:
205-
return S_ERROR()
206-
207-
return S_OK()
188+
return self._deleteRemoveJobs(jobList, remove=True)
208189

209190
def deleteJobsByStatus(self, condDict, delay=False):
210191
"""Sets the job status to "DELETED" for jobs in condDict.
@@ -234,19 +215,29 @@ def deleteJobsByStatus(self, condDict, delay=False):
234215
if not jobList:
235216
return S_OK()
236217

218+
return self._deleteRemoveJobs(jobList)
219+
220+
def _deleteRemoveJobs(self, jobList, remove=False):
221+
"""Delete or removes a jobList"""
237222
ownerJobsDict = self._getOwnerJobsDict(jobList)
238223

239224
fail = False
240225
for owner, jobsList in ownerJobsDict.items():
241-
ownerDN = owner.split(";")[0]
242-
ownerGroup = owner.split(";")[1]
243-
self.log.verbose("Attempting to delete jobs", f"(n={len(jobsList)}) for {ownerDN} : {ownerGroup}")
244-
wmsClient = WMSClient(useCertificates=True, delegatedDN=ownerDN, delegatedGroup=ownerGroup)
245-
result = wmsClient.deleteJob(jobsList)
226+
user, ownerGroup = owner.split(";", maxsplit=1)
227+
self.log.verbose("Attempting to delete jobs", f"(n={len(jobsList)}) for {user} : {ownerGroup}")
228+
res = getDNForUsername(user)
229+
if not res["OK"]:
230+
self.log.error("No DN found", f"for {user}")
231+
return res
232+
wmsClient = WMSClient(useCertificates=True, delegatedDN=res["Value"][0], delegatedGroup=ownerGroup)
233+
if remove:
234+
result = wmsClient.removeJob(jobsList)
235+
else:
236+
result = wmsClient.deleteJob(jobsList)
246237
if not result["OK"]:
247238
self.log.error(
248-
"Could not delete jobs",
249-
f"for {ownerDN} : {ownerGroup} (n={len(jobsList)}) : {result['Message']}",
239+
"Could not {'remove' if remove else 'delete'} jobs",
240+
f"for {user} : {ownerGroup} (n={len(jobsList)}) : {result['Message']}",
250241
)
251242
fail = True
252243

@@ -279,7 +270,7 @@ def _getOwnerJobsDict(self, jobList):
279270
280271
:returns: a dict with a grouping of them by owner, e.g.{'dn;group': [1, 3, 4], 'dn;group_1': [5], 'dn_1;group': [2]}
281272
"""
282-
res = self.jobDB.getJobsAttributes(jobList, ["OwnerDN", "OwnerGroup"])
273+
res = self.jobDB.getJobsAttributes(jobList, ["Owner", "OwnerGroup"])
283274
if not res["OK"]:
284275
self.log.error("Could not get the jobs attributes", res["Message"])
285276
return res
@@ -327,8 +318,7 @@ def deleteJobOversizedSandbox(self, jobIDList):
327318
else:
328319
successful[jobID] = lfn
329320

330-
result = {"Successful": successful, "Failed": failed}
331-
return S_OK(result)
321+
return S_OK({"Successful": successful, "Failed": failed})
332322

333323
def __setRemovalRequest(self, lfn, owner, ownerGroup):
334324
"""Set removal request with the given credentials"""
@@ -369,4 +359,3 @@ def removeHeartBeatLoggingInfo(self, status, delayDays):
369359
self.log.error("Failed to delete from HeartBeatLoggingInfo", result["Message"])
370360
else:
371361
self.log.info("Deleted HeartBeatLogging info")
372-
return

0 commit comments

Comments
 (0)