Skip to content

Commit 5559257

Browse files
authored
Merge pull request #137 from matyasselmeci/pr/v4/SOFTWARE-5115.gratia-with-htcondor
Check for right PER_JOB_HISTORY_DIR with htcondor-ce probe and a condor batch system
2 parents e948d4b + 2a14f6b commit 5559257

File tree

2 files changed

+44
-27
lines changed

2 files changed

+44
-27
lines changed

osg_configure/configure_modules/gratia.py

Lines changed: 25 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,6 @@
2222

2323
CE_PROBE_RPMS = ['gratia-probe-htcondor-ce']
2424

25-
CONDOR_CE_CONFIG_VAL = "/usr/bin/condor_ce_config_val"
26-
2725

2826
def requirements_are_installed():
2927
return (utilities.gateway_installed() and
@@ -69,6 +67,7 @@ def __init__(self, *args, **kwargs):
6967
self._old_job_managers = ['pbs', 'sge', 'lsf', 'condor', 'slurm']
7068
self._probe_config = {}
7169
self.grid_group = 'OSG'
70+
self.condor_enabled = False
7271

7372
self.log("GratiaConfiguration.__init__ completed")
7473

@@ -126,6 +125,10 @@ def parse_configuration(self, configuration):
126125
return
127126

128127
self._set_enabled_probe_host(self.options['probes'].value)
128+
129+
if utilities.config_safe_getboolean(configuration, "Condor", "enabled"):
130+
self.condor_enabled = True
131+
129132
self.log('GratiaConfiguration.parse_configuration completed')
130133

131134
def configure(self, attributes):
@@ -241,7 +244,7 @@ def check_attributes(self, attributes):
241244
self.log("GratiaConfiguration.check_attributes completed")
242245
return True
243246
status = self._check_servers()
244-
if 'htcondor-ce' in self._probe_config:
247+
if 'htcondor-ce' in self._probe_config and requirements_are_installed():
245248
status &= self._verify_gratia_dirs_for_htcondor_ce_probe()
246249
self.log("GratiaConfiguration.check_attributes completed")
247250
return status
@@ -429,13 +432,19 @@ def _configure_htcondor_ce_probe(self):
429432
def _verify_gratia_dirs_for_htcondor_ce_probe(self) -> bool:
430433
"""
431434
Verify that the HTCondor-CE PER_JOB_HISTORY_DIR and the DataFolder
432-
directory are the same and warn the admin if the two don't match
433-
"""
435+
directory are the same and warn the admin if the two don't match.
434436
435-
if not os.path.exists(CONDOR_CE_CONFIG_VAL):
436-
raise exceptions.ConfigureError(f"{CONDOR_CE_CONFIG_VAL} missing")
437+
If the batch system is condor, look at its PER_JOB_HISTORY_DIR instead;
438+
we want the routed jobs, but those are moved to the batch system schedd.
437439
438-
history_dir = self._get_condor_ce_history_dir()
440+
"""
441+
442+
if self.condor_enabled:
443+
history_dir = self._get_condor_history_dir()
444+
condor_name = "Condor"
445+
else:
446+
history_dir = self._get_condor_ce_history_dir()
447+
condor_name = "HTCondor-CE"
439448

440449
config_location = GRATIA_CONFIG_FILES['htcondor-ce']
441450
contents = utilities.read_file(config_location, default="")
@@ -445,7 +454,7 @@ def _verify_gratia_dirs_for_htcondor_ce_probe(self) -> bool:
445454

446455
advice_on_error = (
447456
f"Make sure DataFolder in {config_location} ({data_folder or 'missing'})"
448-
f" and PER_JOB_HISTORY_DIR in the HTCondor-CE config ({history_dir or 'missing'})"
457+
f" and PER_JOB_HISTORY_DIR in the {condor_name} config ({history_dir or 'missing'})"
449458
f" exist and are the same, accessible directory."
450459
)
451460

@@ -480,24 +489,15 @@ def _verify_gratia_dirs_for_htcondor_ce_probe(self) -> bool:
480489
self.logger.error(advice_on_error)
481490
return False
482491

483-
def _get_condor_ce_history_dir(self):
484-
cmd = [CONDOR_CE_CONFIG_VAL, '-subsystem', 'SCHEDD', 'PER_JOB_HISTORY_DIR']
485-
try:
486-
process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding="latin-1")
487-
history_dir, errtext = process.communicate()
488-
if process.returncode != 0:
489-
self.logger.info("While checking gratia parameters: %s failed. Output follows:\n%s",
490-
CONDOR_CE_CONFIG_VAL, errtext)
491-
return None
492-
except OSError as err:
493-
self.logger.info("While checking gratia parameters: Error running %s: %s",
494-
CONDOR_CE_CONFIG_VAL, err)
495-
return None
496-
history_dir = history_dir.strip()
497-
if history_dir.startswith('Not defined'):
498-
return None
492+
@staticmethod
493+
def _get_condor_history_dir():
494+
history_dir = utilities.get_condor_config_val("PER_JOB_HISTORY_DIR", subsystem="SCHEDD", quiet_undefined=True)
499495
return history_dir
500496

497+
@staticmethod
498+
def _get_condor_ce_history_dir():
499+
return utilities.get_condor_ce_config_val("PER_JOB_HISTORY_DIR", subsystem="SCHEDD", quiet_undefined=True)
500+
501501
@staticmethod
502502
def replace_setting(buf, setting, value, xml_file=True):
503503
"""

osg_configure/modules/utilities.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -296,7 +296,7 @@ def get_condor_config(default_config='/etc/condor/condor_config'):
296296
return os.path.join(get_condor_location(), 'etc/condor_config')
297297

298298

299-
def get_condor_config_val(variable, executable='condor_config_val', quiet_undefined=False):
299+
def get_condor_config_val(variable, executable=None, quiet_undefined=False, subsystem=None):
300300
"""
301301
Use condor_config_val to return the expanded value of a variable.
302302
@@ -306,12 +306,24 @@ def get_condor_config_val(variable, executable='condor_config_val', quiet_undefi
306306
poll condor_ce_config_val or condor_cron_config_val)
307307
quiet_undefined - set to True if messages from condor_config_val
308308
claiming the variable is undefined should be silenced
309+
subsystem - if passed, query a specific subsystem (SCHEDD, COLLECTOR, etc.)
309310
Returns:
310311
The stripped output of condor_config_val, or None if
311312
condor_config_val reports an error.
312313
"""
314+
if not executable:
315+
condor_location = get_condor_location()
316+
if condor_location:
317+
executable = os.path.join(condor_location, "bin/condor_config_val")
318+
else:
319+
executable = "condor_config_val"
320+
313321
try:
314-
process = subprocess.Popen([executable, variable],
322+
cmd = [executable]
323+
if subsystem:
324+
cmd.extend(["-subsystem", subsystem])
325+
cmd.append(variable)
326+
process = subprocess.Popen(cmd,
315327
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
316328
encoding="latin-1")
317329
output, error = process.communicate()
@@ -324,6 +336,11 @@ def get_condor_config_val(variable, executable='condor_config_val', quiet_undefi
324336
return None
325337

326338

339+
def get_condor_ce_config_val(variable, *args, **kwargs):
340+
kwargs["executable"] = "/usr/bin/condor_ce_config_val"
341+
return get_condor_config_val(variable, *args, **kwargs)
342+
343+
327344
def read_file(filename, default=None):
328345
"""
329346
Read the contents of a file, returning default if the file cannot be read.

0 commit comments

Comments
 (0)