Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
188 changes: 165 additions & 23 deletions mache/discover.py
Original file line number Diff line number Diff line change
@@ -1,50 +1,76 @@
import configparser
import importlib
import os
import re
import socket
import sys
from dataclasses import dataclass
from importlib import resources as importlib_resources
from typing import Iterable, List, Optional


def discover_machine(quiet=False):
"""
Figure out the machine from the host name
def discover_machine(
quiet: bool = False,
package: Optional[str] = None,
path: Optional[str] = None,
):
"""Figure out the machine from the host name.

Parameters
----------
quiet : bool, optional
Whether to print warnings if the machine name is ambiguous

package : str, optional
An additional Python package to search for machine config files
(``*.cfg``) that include a ``[discovery] hostname_re`` entry.

path : str, optional
An additional directory to search for machine config files (``*.cfg``)
that include a ``[discovery] hostname_re`` entry.

Returns
-------
machine : str
The name of the current machine
"""

hostname = socket.gethostname()
machine = None
machines_by_host_re = {
r'^andes': 'andes',
r'^aurora': 'aurora',
r'^x\d{4}c\d{1}s\d{1}b0n0': 'aurora',
r'^blueslogin': 'anvil',
r'^b\d{3}': 'anvil',
r'^ch-fe': 'chicoma-cpu',
r'^chrlogin': 'chrysalis',
r'^chr-\d{4}': 'chrysalis',
r'^compy': 'compy',
r'^n\d{4}': 'anvil',
r'^polaris': 'polaris',
r'^dane\d{1,4}': 'dane',
r'^ruby\d{1,4}': 'ruby',
}
for host_re, mach in machines_by_host_re.items():
p = re.compile(host_re)
if p.match(hostname):
machine = mach
break

rules = _get_discovery_rules(package=package, path=path)
matches: List[_DiscoveryRule] = []
for rule in rules:
try:
pattern = re.compile(rule.hostname_re)
except re.error:
if not quiet:
print(
f'Warning: invalid hostname_re {rule.hostname_re!r} '
f'for machine {rule.machine!r} from {rule.source}',
file=sys.stderr,
)
continue
if pattern.match(hostname):
matches.append(rule)

if matches:
machine = matches[0].machine
if len(matches) > 1 and not quiet:
others = ', '.join(sorted({rule.machine for rule in matches[1:]}))
print(
f'Warning: hostname {hostname!r} matches multiple machines; '
f'choosing {machine!r}. Other matches: {others}',
file=sys.stderr,
)

if machine is None and 'LMOD_SYSTEM_NAME' in os.environ:
hostname = os.environ['LMOD_SYSTEM_NAME']
if hostname == 'frontier':
# frontier's hostname is too generic to detect, so relying on
# LMOD_SYSTEM_NAME
machine = 'frontier'

if machine is None and 'NERSC_HOST' in os.environ:
hostname = os.environ['NERSC_HOST']
if hostname == 'perlmutter':
Expand Down Expand Up @@ -74,3 +100,119 @@ def discover_machine(quiet=False):
machine = fp.read().replace('\n', '').strip()

return machine


@dataclass(frozen=True)
class _DiscoveryRule:
machine: str
hostname_re: str
source: str


def _parse_hostname_re_value(hostname_re: str) -> List[str]:
"""Parse one or more hostname regex patterns from a config value.

We support comma-separated and/or newline-separated entries.
"""
patterns: List[str] = []
for line in hostname_re.splitlines():
line = line.strip()
if not line:
continue
# Split only on comma+whitespace so patterns like `{1,4}` are safe.
for entry in re.split(r',\s+', line):
entry = entry.strip()
if entry:
patterns.append(entry)
return patterns


def _read_discovery_rules_from_cfg(
cfg_path: str, machine: str, source: str
) -> List[_DiscoveryRule]:
# Do NOT enable interpolation here: regex patterns commonly contain '$'.
config = configparser.RawConfigParser()
config.read(cfg_path)
if not config.has_option('discovery', 'hostname_re'):
return []
raw_value = config.get(
'discovery', 'hostname_re', raw=True, fallback=''
).strip()
if not raw_value:
return []
return [
_DiscoveryRule(machine=machine, hostname_re=pattern, source=source)
for pattern in _parse_hostname_re_value(raw_value)
]


def _iter_cfgs_in_package(package: str) -> Iterable[tuple[str, str]]:
"""Yield (machine_name, cfg_path) from a package containing *.cfg files."""
module = importlib.import_module(package)
root = importlib_resources.files(module)
for child in root.iterdir():
if not child.is_file():
continue
name = child.name
if not name.endswith('.cfg'):
continue
machine = os.path.splitext(name)[0]
yield machine, str(child)


def _iter_cfgs_in_path(path: str) -> Iterable[tuple[str, str]]:
"""Yield (machine_name, cfg_path) from a directory of ``*.cfg`` files."""
if not os.path.isdir(path):
return
for name in sorted(os.listdir(path)):
if not name.endswith('.cfg'):
continue
machine = os.path.splitext(name)[0]
yield machine, os.path.join(path, name)


def _get_discovery_rules(
*,
package: Optional[str] = None,
path: Optional[str] = None,
builtin_package: str = 'mache.machines',
) -> List[_DiscoveryRule]:
"""Get hostname discovery rules.

Precedence is:
1) rules from ``path`` (if provided)
2) rules from ``package`` (if provided)
3) rules from the built-in machines package
"""
rules: List[_DiscoveryRule] = []

if path is not None:
for machine, cfg_path in _iter_cfgs_in_path(path):
rules.extend(
_read_discovery_rules_from_cfg(
cfg_path=cfg_path,
machine=machine,
source=f'path:{path}',
)
)

if package is not None:
for machine, cfg_path in _iter_cfgs_in_package(package):
rules.extend(
_read_discovery_rules_from_cfg(
cfg_path=cfg_path,
machine=machine,
source=f'package:{package}',
)
)

for machine, cfg_path in _iter_cfgs_in_package(builtin_package):
rules.extend(
_read_discovery_rules_from_cfg(
cfg_path=cfg_path,
machine=machine,
source=f'package:{builtin_package}',
)
)

return rules
6 changes: 6 additions & 0 deletions mache/machines/andes.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,9 @@ partitions = batch

# the full hostname of the machine
hostname = andes.olcf.ornl.gov

# Options related to machine discovery
[discovery]

# a regular expression used to identify this machine from its hostname
hostname_re = ^andes
7 changes: 7 additions & 0 deletions mache/machines/anvil.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -74,3 +74,10 @@ public_diags = /lcrc/group/e3sm/public_html/diagnostics

# private diagnostics directory
private_diags = /lcrc/group/e3sm/diagnostics_private


# Options related to machine discovery
[discovery]

# regular expression(s) used to identify this machine from its hostname
hostname_re = ^blueslogin, ^b\d{3}, ^n\d{4}
7 changes: 7 additions & 0 deletions mache/machines/aurora.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,10 @@ queues = prod, debug

# the full hostname of the machine
hostname = aurora.alcf.anl.gov


# Options related to machine discovery
[discovery]

# regular expression(s) used to identify this machine from its hostname
hostname_re = ^aurora, ^x\d{4}c\d{1}s\d{1}b0n0
7 changes: 7 additions & 0 deletions mache/machines/chicoma-cpu.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,10 @@ hostname = wtrw.lanl.gov

# tunnel command
tunnel_hostname = ch-fe


# Options related to machine discovery
[discovery]

# a regular expression used to identify this machine from its hostname
hostname_re = ^ch-fe
7 changes: 7 additions & 0 deletions mache/machines/chrysalis.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -67,3 +67,10 @@ public_diags = /lcrc/group/e3sm/public_html/diagnostics

# private diagnostics directory
private_diags = /lcrc/group/e3sm/diagnostics_private


# Options related to machine discovery
[discovery]

# regular expression(s) used to identify this machine from its hostname
hostname_re = ^chrlogin, ^chr-\d{4}
7 changes: 7 additions & 0 deletions mache/machines/compy.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -68,3 +68,10 @@ qos = regular

# the full hostname of the machine
hostname = compy.pnl.gov


# Options related to machine discovery
[discovery]

# a regular expression used to identify this machine from its hostname
hostname_re = ^compy
6 changes: 6 additions & 0 deletions mache/machines/dane.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -58,3 +58,9 @@ partitions = pbatch, pdebug

# the full hostname of the machine
hostname = dane.llnl.gov

# Options related to machine discovery
[discovery]

# a regular expression used to identify this machine from its hostname
hostname_re = ^dane\d{1,4}
7 changes: 7 additions & 0 deletions mache/machines/frontier.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -65,3 +65,10 @@ cray_compilers = True

# the full hostname of the machine
hostname = frontier.olcf.ornl.gov


# Options related to machine discovery
[discovery]

# a regular expression used to identify this machine from its hostname
hostname_re = ^frontier
7 changes: 7 additions & 0 deletions mache/machines/polaris.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,10 @@ partitions = prod

# the full hostname of the machine
hostname = polaris.alcf.anl.gov


# Options related to machine discovery
[discovery]

# a regular expression used to identify this machine from its hostname
hostname_re = ^polaris
7 changes: 7 additions & 0 deletions mache/machines/ruby.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -58,3 +58,10 @@ partitions = pbatch, pdebug

# the full hostname of the machine
hostname = ruby.llnl.gov


# Options related to machine discovery
[discovery]

# a regular expression used to identify this machine from its hostname
hostname_re = ^ruby\d{1,4}
65 changes: 65 additions & 0 deletions tests/test_discover_machine.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
import textwrap

from mache.discover import discover_machine


def test_discover_machine_builtin_rules(monkeypatch):
monkeypatch.setattr('socket.gethostname', lambda: 'andes001')
assert discover_machine(quiet=True) == 'andes'


def test_discover_machine_path_overrides_package(monkeypatch, tmp_path):
# Both package and path match, path wins.
pkg_dir = tmp_path / 'extmachines'
pkg_dir.mkdir()
(pkg_dir / '__init__.py').write_text('')
(pkg_dir / 'pkgmach.cfg').write_text(
textwrap.dedent(
"""
[discovery]
hostname_re = ^testhost$
"""
).lstrip()
)

machines_dir = tmp_path / 'machines'
machines_dir.mkdir()
(machines_dir / 'pathmach.cfg').write_text(
textwrap.dedent(
"""
[discovery]
hostname_re = ^testhost$
"""
).lstrip()
)

monkeypatch.syspath_prepend(str(tmp_path))
monkeypatch.setattr('socket.gethostname', lambda: 'testhost')

assert (
discover_machine(
quiet=True,
package='extmachines',
path=str(machines_dir),
)
== 'pathmach'
)


def test_discover_machine_package_rules(monkeypatch, tmp_path):
pkg_dir = tmp_path / 'extmachines2'
pkg_dir.mkdir()
(pkg_dir / '__init__.py').write_text('')
(pkg_dir / 'mymachine.cfg').write_text(
textwrap.dedent(
"""
[discovery]
hostname_re = ^pkg-host\\d+$
"""
).lstrip()
)

monkeypatch.syspath_prepend(str(tmp_path))
monkeypatch.setattr('socket.gethostname', lambda: 'pkg-host123')

assert discover_machine(quiet=True, package='extmachines2') == 'mymachine'
Loading