Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 17 additions & 1 deletion src/lib/scheduler_clients/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,28 @@
# SPDX-License-Identifier: BSD-3-Clause

# models
from enum import Enum
from typing import List, Optional, Dict
from lib.models import CamelModel

from pydantic import Field, AliasChoices


class NodeState(str, Enum):
IDLE = "IDLE"
ALLOCATED = "ALLOCATED"
MIXED = "MIXED"
DOWN = "DOWN"
DRAIN = "DRAIN"
OFFLINE = "OFFLINE"
RESERVED = "RESERVED"
COMPLETING = "COMPLETING"
BUSY = "BUSY"
POWERING_DOWN = "POWERING_DOWN"
POWERING_UP = "POWERING_UP"
UNKNOWN = "UNKNOWN"


class SchedPing(CamelModel):
hostname: Optional[str] = Field(default=None, nullable=True)
pinged: Optional[str] = Field(default=None, nullable=True)
Expand Down Expand Up @@ -117,7 +133,7 @@ class NodeModel(CamelModel):
name: str
address: Optional[str] = Field(default=None, nullable=True)
hostname: Optional[str] = Field(default=None, nullable=True)
state: str | List[str]
state: List[NodeState]
partitions: Optional[List[str]] = Field(default=None, nullable=True)
weight: Optional[int] = Field(default=None, nullable=True)
alloc_memory: Optional[int] = Field(default=None, nullable=True)
Expand Down
21 changes: 21 additions & 0 deletions src/lib/scheduler_clients/pbs/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,28 @@
JobStatus,
JobTime,
NodeModel,
NodeState,
PartitionModel,
ReservationModel,
SchedPing,
)

_PBS_STATE_MAP: dict[str, NodeState] = {
"free": NodeState.IDLE,
"job-exclusive": NodeState.ALLOCATED,
"job-sharing": NodeState.MIXED,
"time-shared": NodeState.MIXED,
"down": NodeState.DOWN,
"offline": NodeState.OFFLINE,
"reserve": NodeState.RESERVED,
"busy": NodeState.BUSY,
"state-unknown": NodeState.UNKNOWN,
}


def _map_pbs_state(raw: str) -> NodeState:
return _PBS_STATE_MAP.get(raw.lower(), NodeState.UNKNOWN)


def parse_timestamp(value):
"""
Expand Down Expand Up @@ -152,6 +169,10 @@ def __init__(self, **kwargs):
kwargs["hostname"] = kwargs.get("resources_available", {}).get("host", None)
kwargs["alloc_memory"] = kwargs.get("resources_assigned", {}).get("mem", None)
kwargs["alloc_cpus"] = kwargs.get("resources_assigned", {}).get("ncpus", 0)
state = kwargs.get("state", [])
if isinstance(state, str):
state = [state]
kwargs["state"] = [_map_pbs_state(s) for s in state]

super().__init__(**kwargs)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def parse_output(self, stdout: str, stderr: str, exit_status: int = 0):
"name": node_info[5],
"address": node_info[6],
"hostname": node_info[7],
"state": node_info[8],
"state": [node_info[8]],
"partitions": node_info[9].split(","),
"weight": _int_or_none(node_info[10]),
"slurmd_version": node_info[11],
Expand Down
47 changes: 46 additions & 1 deletion src/lib/scheduler_clients/slurm/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,52 @@
JobTask,
JobTime,
NodeModel,
NodeState,
PartitionModel,
ReservationModel,
SchedPing,
)

_SLURM_STATE_MAP: dict[str, NodeState] = {
"idle": NodeState.IDLE,
"allocated": NodeState.ALLOCATED,
"alloc": NodeState.ALLOCATED,
"mixed": NodeState.MIXED,
"mix": NodeState.MIXED,
"down": NodeState.DOWN,
"fail": NodeState.DOWN,
"failing": NodeState.DOWN,
"failg": NodeState.DOWN,
"drain": NodeState.DRAIN,
"drained": NodeState.DRAIN,
"draining": NodeState.DRAIN,
"drng": NodeState.DRAIN,
"completing": NodeState.COMPLETING,
"comp": NodeState.COMPLETING,
"maint": NodeState.OFFLINE,
"reserved": NodeState.RESERVED,
"resv": NodeState.RESERVED,
"power_down": NodeState.POWERING_DOWN,
"pow_dn": NodeState.POWERING_DOWN,
"power_up": NodeState.POWERING_UP,
"pow_up": NodeState.POWERING_UP,
"future": NodeState.UNKNOWN,
"futr": NodeState.UNKNOWN,
"planned": NodeState.UNKNOWN,
"plnd": NodeState.UNKNOWN,
"blocked": NodeState.UNKNOWN,
"unknown": NodeState.UNKNOWN,
"unk": NodeState.UNKNOWN,
"perfctrs": NodeState.UNKNOWN,
"npc": NodeState.UNKNOWN,
}


def _map_slurm_state(raw: str) -> NodeState:
# Strip sinfo suffix flags (*, +, ~, #, %, $, @) and normalize case
key = raw.rstrip("*+~#%$@").lower()
return _SLURM_STATE_MAP.get(key, NodeState.UNKNOWN)


def slurm_int_to_int(v) -> Optional[int]:
# starting from v0.0.40 slurm api represents int with a complex object
Expand Down Expand Up @@ -209,7 +250,11 @@ def cast_slurm_jobid_to_str(cls, v):


class SlurmNode(NodeModel):
pass
def __init__(self, **kwargs):
if "state" in kwargs:
state = kwargs.get("state", [])
kwargs["state"] = [_map_slurm_state(s) for s in state]
super().__init__(**kwargs)


class SlurmPing(SchedPing):
Expand Down
3 changes: 2 additions & 1 deletion src/lib/scheduler_clients/slurm/slurm_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,8 @@ async def get_job_metadata(
)

async def get_nodes(self, username: str, jwt_token: str) -> List[SlurmNode] | None:
return await self.slurm_default_client.get_nodes(username, jwt_token)
res = await self.slurm_default_client.get_nodes(username, jwt_token)
return [SlurmNode.model_validate(node) for node in res]

async def get_reservations(
self, username: str, jwt_token: str
Expand Down
Loading