reframe-hpc
diff --git a/‎docs/config_reference.rst‎
Lines changed: 9 additions & 4 deletions b/‎docs/config_reference.rst‎
Lines changed: 9 additions & 4 deletions
diff --git a/‎reframe/core/backends.py‎
Lines changed: 1 addition & 2 deletions b/‎reframe/core/backends.py‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎reframe/core/logging.py‎
Lines changed: 79 additions & 28 deletions b/‎reframe/core/logging.py‎
Lines changed: 79 additions & 28 deletions
diff --git a/‎reframe/core/pipeline.py‎
Lines changed: 11 additions & 0 deletions b/‎reframe/core/pipeline.py‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎reframe/core/schedulers/pbs.py‎
Lines changed: 108 additions & 16 deletions b/‎reframe/core/schedulers/pbs.py‎
Lines changed: 108 additions & 16 deletions
@@ -695,12 +695,8 @@ All logging handlers share the following set of common attributes:
      If specific formatting is desired, the ``check_job_completion_time`` should be used instead.
    - ``%(check_name)s``: The name of the regression test on behalf of which ReFrame is currently executing.
      If ReFrame is not executing in the context of a regression test, ``reframe`` will be printed instead.
-   - ``%(check_num_tasks)s``: The number of tasks assigned to the regression test.
-   - ``%(check_outputdir)s``: The output directory associated with the currently executing test.
    - ``%(check_partition)s``: The system partition where this test is currently executing.
-   - ``%(check_stagedir)s``: The stage directory associated with the currently executing test.
    - ``%(check_system)s``: The system where this test is currently executing.
-   - ``%(check_tags)s``: The tags associated with this test.
    - ``%(check_perf_lower_thres)s``: The lower threshold of the performance difference from the reference value expressed as a fractional value.
      See the :attr:`reframe.core.pipeline.RegressionTest.reference` attribute of regression tests for more details.
    - ``%(check_perf_ref)s``: The reference performance value of a certain performance variable.
@@ -709,11 +705,20 @@ All logging handlers share the following set of common attributes:
      See the :attr:`reframe.core.pipeline.RegressionTest.reference` attribute of regression tests for more details.
    - ``%(check_perf_value)s``: The performance value obtained for a certain performance variable.
    - ``%(check_perf_var)s``: The name of the `performance variable <tutorial_basic.html#writing-a-performance-test>`__ being logged.
+   - ``%(check_ATTR)s``: This will log the value of the attribute ``ATTR`` of the currently executing regression test.
+     Mappings will be logged as ``k1=v1,k2=v2,..`` and all other iterables, except strings, will be logged as comma-separated lists.
+     If ``ATTR`` is not an attribute of the test, ``%(check_ATTR)s`` will be logged as ``<undefined>``.
+     This allows users to log arbitrary attributes of their tests.
+     For the complete list of test attributes, please refer to :doc:`regression_test_api`.
    - ``%(osuser)s``: The name of the OS user running ReFrame.
    - ``%(osgroup)s``: The name of the OS group running ReFrame.
    - ``%(version)s``: The ReFrame version.
 
 
+.. versionadded:: 3.3
+   The ability to log arbitrary test attributes was added.
+
+
 .. js:attribute:: .logging[].handlers[].datefmt
 
 .. object:: .logging[].handlers_perflog[].datefmt
 
@@ -19,8 +19,7 @@
 _scheduler_backend_modules = [
     'reframe.core.schedulers.local',
     'reframe.core.schedulers.slurm',
-    'reframe.core.schedulers.pbs',
-    'reframe.core.schedulers.torque'
+    'reframe.core.schedulers.pbs'
 ]
 _schedulers = {}
 
 
@@ -9,7 +9,6 @@
 import logging.handlers
 import numbers
 import os
-import pprint
 import re
 import shutil
 import sys
@@ -145,7 +144,7 @@ def emit(self, record):
         except OSError as e:
             raise LoggingError('logging failed') from e
 
-        self.baseFilename = os.path.join(dirname, record.check_name + '.log')
+        self.baseFilename = os.path.join(dirname, record.check.name + '.log')
         self.stream = self._streams.get(self.baseFilename, None)
         super().emit(record)
         self._streams[self.baseFilename] = self.stream
@@ -165,7 +164,65 @@ def _format_time_rfc3339(timestamp, datefmt):
     return re.sub(r'(%)?\:z', tz_rfc3339, time.strftime(datefmt, timestamp))
 
 
-class RFC3339Formatter(logging.Formatter):
+def _xfmt(val):
+    from reframe.core.deferrable import _DeferredExpression
+
+    if val is None:
+        return '<undefined>'
+
+    if isinstance(val, _DeferredExpression):
+        try:
+            return val.evaluate()
+        except BaseException:
+            return '<error>'
+
+    if isinstance(val, str):
+        return val
+
+    if isinstance(val, collections.abc.Mapping):
+        return ','.join(f'{k}={v}' for k, v in val.items())
+
+    if isinstance(val, collections.abc.Iterable):
+        return ','.join(val)
+
+    return val
+
+
+class CheckFieldFormatter(logging.Formatter):
+    '''Log formatter that dynamically looks up format specifiers inside a
+    regression test.'''
+
+    def __init__(self, fmt=None, datefmt=None, style='%'):
+        super().__init__(fmt, datefmt, style)
+
+        # NOTE: This will work only when style='%'
+        self.__extras = {
+            spec: None for spec in re.findall(r'\%\((check_\S+?)\)s', fmt)
+        }
+
+        # Set the default value for 'check_name'
+        if 'check_name' in self.__extras:
+            self.__extras['check_name'] = 'reframe'
+
+    def format(self, record):
+        # Fill in the check-specific record attributes
+        if record.check:
+            for spec in self.__extras:
+                if hasattr(record, spec):
+                    # Attribute set elsewhere
+                    continue
+
+                attr = spec.split('_', maxsplit=1)[1]
+                val = getattr(record.check, attr, None)
+                record.__dict__[spec] = _xfmt(val)
+        else:
+            # Update record with the dynamic extras even if check is not set
+            record.__dict__.update(self.__extras)
+
+        return super().format(record)
+
+
+class RFC3339Formatter(CheckFieldFormatter):
     def formatTime(self, record, datefmt=None):
         datefmt = datefmt or self.default_time_format
         if '%:z' not in datefmt:
@@ -176,7 +233,7 @@ def formatTime(self, record, datefmt=None):
 
     def format(self, record):
         datefmt = self.datefmt or self.default_time_format
-        if record.check_job_completion_time_unix is not None:
+        if record.check_job_completion_time_unix != _xfmt(None):
             ct = self.converter(record.check_job_completion_time_unix)
             record.check_job_completion_time = _format_time_rfc3339(
                 ct, datefmt
@@ -388,26 +445,24 @@ def __init__(self, logger=None, check=None):
         super().__init__(
             logger,
             {
-                'check_name': 'reframe',
-                'check_jobid': '-1',
-                'check_job_completion_time': None,
-                'check_job_completion_time_unix': None,
+                # Here we only set the format specifiers that do not
+                # correspond directly to check attributes
+                'check': check,
+                'check_jobid': _xfmt(None),
+                'check_job_completion_time': _xfmt(None),
+                'check_job_completion_time_unix': _xfmt(None),
                 'check_info': 'reframe',
-                'check_system': None,
-                'check_partition': None,
-                'check_environ': None,
-                'check_outputdir': None,
-                'check_stagedir': None,
-                'check_num_tasks': None,
-                'check_perf_var': None,
-                'check_perf_value': None,
-                'check_perf_ref': None,
-                'check_perf_lower_thres': None,
-                'check_perf_upper_thres': None,
-                'check_perf_unit': None,
-                'osuser':  osext.osuser()  or '<unknown>',
-                'osgroup': osext.osgroup() or '<unknown>',
-                'check_tags': None,
+                'check_system': _xfmt(None),
+                'check_partition': _xfmt(None),
+                'check_environ': _xfmt(None),
+                'check_perf_var': _xfmt(None),
+                'check_perf_value': _xfmt(None),
+                'check_perf_ref': _xfmt(None),
+                'check_perf_lower_thres': _xfmt(None),
+                'check_perf_upper_thres': _xfmt(None),
+                'check_perf_unit': _xfmt(None),
+                'osuser':  _xfmt(osext.osuser()),
+                'osgroup': _xfmt(osext.osgroup()),
                 'version': osext.reframe_version(),
             }
         )
@@ -428,15 +483,11 @@ def std_stream_handlers(self):
 
     def _update_check_extras(self):
         '''Return a dictionary with all the check-specific information.'''
+
         if self.check is None:
             return
 
-        self.extra['check_name'] = self.check.name
         self.extra['check_info'] = self.check.info()
-        self.extra['check_outputdir'] = self.check.outputdir
-        self.extra['check_stagedir'] = self.check.stagedir
-        self.extra['check_num_tasks'] = self.check.num_tasks
-        self.extra['check_tags'] = ','.join(self.check.tags)
         if self.check.current_system:
             self.extra['check_system'] = self.check.current_system.name
 
 
@@ -148,6 +148,17 @@ def disable_hook(cls, hook_name):
         '''
         cls._rfm_disabled_hooks.add(hook_name)
 
+    @classmethod
+    def pipeline_hooks(cls):
+        ret = {}
+        for c in cls.mro():
+            if hasattr(c, '_rfm_pipeline_hooks'):
+                for kind, hook in c._rfm_pipeline_hooks.items():
+                    ret.setdefault(kind, [])
+                    ret[kind] += hook
+
+        return ret
+
     #: The name of the test.
     #:
     #: :type: string that can contain any character except ``/``
 
@@ -20,7 +20,7 @@
 import reframe.utility.osext as osext
 from reframe.core.backends import register_scheduler
 from reframe.core.config import settings
-from reframe.core.exceptions import JobSchedulerError
+from reframe.core.exceptions import (JobError, JobSchedulerError)
 from reframe.utility import seconds_to_hms
 
 
@@ -40,6 +40,18 @@
 _run_strict = functools.partial(osext.run_command, check=True)
 
 
+JOB_STATES = {
+    'Q': 'QUEUED',
+    'H': 'HELD',
+    'R': 'RUNNING',
+    'E': 'EXITING',
+    'T': 'MOVED',
+    'W': 'WAITING',
+    'S': 'SUSPENDED',
+    'C': 'COMPLETED',
+}
+
+
 class _PbsJob(sched.Job):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
@@ -156,24 +168,104 @@ def finished(self, job):
 
         return job.completed
 
-    def _poll_job(self, job):
-        if job is None:
+    def _update_nodelist(self, job, nodespec):
+        if job.nodelist is not None:
             return
 
-        with osext.change_dir(job.workdir):
-            output_ready = (os.path.exists(job.stdout) and
-                            os.path.exists(job.stderr))
+        job._nodelist = [x.split('/')[0] for x in nodespec.split('+')]
+        job._nodelist.sort()
 
-            done = job.cancelled or output_ready
-            if done:
-                t_now = time.time()
-                if job.completion_time is None:
-                    job._completion_time = t_now
+    def poll(self, *jobs):
+        if jobs:
+            # Filter out non-jobs
+            jobs = [job for job in jobs if job is not None]
 
-                time_from_finish = t_now - job.completion_time
-                if time_from_finish > PBS_OUTPUT_WRITEBACK_WAIT:
-                    job._completed = True
+        if not jobs:
+            return
+
+        completed = osext.run_command(
+            f'qstat -f {" ".join(job.jobid for job in jobs)}'
+        )
+
+        # Depending on the configuration, completed jobs will remain on the job
+        # list for a limited time, or be removed upon completion.
+        # If qstat cannot find any of the job IDs, it will return 153.
+        # Otherwise, it will return with return code 0 and print information
+        # only for the jobs it could find.
+        if completed.returncode == 153:
+            self.log('Return code is 153: jobids not known by scheduler, '
+                     'assuming all jobs completed')
+            for job in jobs:
+                job._state = 'COMPLETED'
+
+            return
+
+        if completed.returncode != 0:
+            raise JobSchedulerError(
+                f'qstat failed with exit code {completed.returncode} '
+                f'(standard error follows):\n{completed.stderr}'
+            )
+
+        # Store information for each job separately
+        jobinfo = {}
+        for job_raw_info in completed.stdout.split('\n\n'):
+            jobid_match = re.search(
+                r'^Job Id:\s*(?P<jobid>\S+)', job_raw_info, re.MULTILINE
+            )
+            if jobid_match:
+                jobid = jobid_match.group('jobid')
+                jobinfo[jobid] = job_raw_info
 
-    def poll(self, *jobs):
         for job in jobs:
-            self._poll_job(job)
+            if job.jobid not in jobinfo:
+                self.log(f'Job {job.jobid} not known to scheduler, '
+                         f'assuming job completed')
+                job._state = 'COMPLETED'
+                job._completed = True
+                continue
+
+            info = jobinfo[job.jobid]
+            state_match = re.search(
+                r'^\s*job_state = (?P<state>[A-Z])', info, re.MULTILINE
+            )
+            if not state_match:
+                self.log(f'Job state not found (job info follows):\n{info}')
+                continue
+
+            state = state_match.group('state')
+            job._state = JOB_STATES[state]
+            nodelist_match = re.search(
+                r'exec_host = (?P<nodespec>[\S\t\n]+)',
+                info, re.MULTILINE
+            )
+            if nodelist_match:
+                nodespec = nodelist_match.group('nodespec')
+                nodespec = re.sub(r'[\n\t]*', '', nodespec)
+                self._update_nodelist(job, nodespec)
+
+            if job.state == 'COMPLETED':
+                exitcode_match = re.search(
+                    r'^\s*exit_status = (?P<code>\d+)',
+                    info, re.MULTILINE,
+                )
+                if exitcode_match:
+                    job._exitcode = int(exitcode_match.group('code'))
+
+                # We report a job as finished only when its stdout/stderr are
+                # written back to the working directory
+                stdout = os.path.join(job.workdir, job.stdout)
+                stderr = os.path.join(job.workdir, job.stderr)
+                out_ready = os.path.exists(stdout) and os.path.exists(stderr)
+                done = job.cancelled or out_ready
+                if done:
+                    job._completed = True
+            elif (job.state in ['QUEUED', 'HELD', 'WAITING'] and
+                  job.max_pending_time):
+                if (time.time() - job.submit_time >= job.max_pending_time):
+                    self.cancel(job)
+                    job._exception = JobError('maximum pending time exceeded')
+
+
+@register_scheduler('torque')
+class TorqueJobScheduler(PbsJobScheduler):
+    TASKS_OPT = '-l nodes={num_nodes}:ppn={num_cpus_per_node}'
Original file line number	Diff line number	Diff line change
`@@ -19,8 +19,7 @@`
`19`	`19`	`_scheduler_backend_modules = [`
`20`	`20`	`'reframe.core.schedulers.local',`
`21`	`21`	`'reframe.core.schedulers.slurm',`
`22`		`- 'reframe.core.schedulers.pbs',`
`23`		`- 'reframe.core.schedulers.torque'`
	`22`	`+ 'reframe.core.schedulers.pbs'`
`24`	`23`	`]`
`25`	`24`	`_schedulers = {}`
`26`	`25`