Skip to content

Commit 0223483

Browse files
committed
Check the local condor's PER_JOB_HISTORY_DIR against gratia-probe-htcondor-ce's DataFolder (SOFTWARE-5115)
If the local batch system is condor, the routed job goes into the local schedd, so the probe needs to read from that schedd's PER_JOB_HISTORY_DIR.
1 parent 4401c62 commit 0223483

File tree

1 file changed

+21
-3
lines changed

1 file changed

+21
-3
lines changed

osg_configure/configure_modules/gratia.py

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ def __init__(self, *args, **kwargs):
6969
self._old_job_managers = ['pbs', 'sge', 'lsf', 'condor', 'slurm']
7070
self._probe_config = {}
7171
self.grid_group = 'OSG'
72+
self.condor_enabled = False
7273

7374
self.log("GratiaConfiguration.__init__ completed")
7475

@@ -126,6 +127,10 @@ def parse_configuration(self, configuration):
126127
return
127128

128129
self._set_enabled_probe_host(self.options['probes'].value)
130+
131+
if utilities.config_safe_getboolean(configuration, "Condor", "enabled"):
132+
self.condor_enabled = True
133+
129134
self.log('GratiaConfiguration.parse_configuration completed')
130135

131136
def configure(self, attributes):
@@ -429,13 +434,22 @@ def _configure_htcondor_ce_probe(self):
429434
def _verify_gratia_dirs_for_htcondor_ce_probe(self) -> bool:
430435
"""
431436
Verify that the HTCondor-CE PER_JOB_HISTORY_DIR and the DataFolder
432-
directory are the same and warn the admin if the two don't match
437+
directory are the same and warn the admin if the two don't match.
438+
439+
If the batch system is condor, look at its PER_JOB_HISTORY_DIR instead;
440+
we want the routed jobs, but those are moved to the batch system schedd.
441+
433442
"""
434443

435444
if not os.path.exists(CONDOR_CE_CONFIG_VAL):
436445
raise exceptions.ConfigureError(f"{CONDOR_CE_CONFIG_VAL} missing")
437446

438-
history_dir = self._get_condor_ce_history_dir()
447+
if self.condor_enabled:
448+
history_dir = self._get_condor_history_dir()
449+
condor_name = "Condor"
450+
else:
451+
history_dir = self._get_condor_ce_history_dir()
452+
condor_name = "HTCondor-CE"
439453

440454
config_location = GRATIA_CONFIG_FILES['htcondor-ce']
441455
contents = utilities.read_file(config_location, default="")
@@ -445,7 +459,7 @@ def _verify_gratia_dirs_for_htcondor_ce_probe(self) -> bool:
445459

446460
advice_on_error = (
447461
f"Make sure DataFolder in {config_location} ({data_folder or 'missing'})"
448-
f" and PER_JOB_HISTORY_DIR in the HTCondor-CE config ({history_dir or 'missing'})"
462+
f" and PER_JOB_HISTORY_DIR in the {condor_name} config ({history_dir or 'missing'})"
449463
f" exist and are the same, accessible directory."
450464
)
451465

@@ -480,6 +494,10 @@ def _verify_gratia_dirs_for_htcondor_ce_probe(self) -> bool:
480494
self.logger.error(advice_on_error)
481495
return False
482496

497+
def _get_condor_history_dir(self):
498+
history_dir = utilities.get_condor_config_val("PER_JOB_HISTORY_DIR", subsystem="SCHEDD", quiet_undefined=True)
499+
return history_dir
500+
483501
def _get_condor_ce_history_dir(self):
484502
cmd = [CONDOR_CE_CONFIG_VAL, '-subsystem', 'SCHEDD', 'PER_JOB_HISTORY_DIR']
485503
try:

0 commit comments

Comments
 (0)