Skip to content

Commit d937532

Browse files
committed
feat: add transformer
a transformer is pretty dumb right now - it takes the matches output, a selection algorithm, and passes the result back to the solver (that nows how to parse the match details for metadata) and then return a template. We will eventually not require a solver and just use an LLM or similar Signed-off-by: vsoch <[email protected]>
1 parent 6eba410 commit d937532

File tree

14 files changed

+380
-31
lines changed

14 files changed

+380
-31
lines changed

examples/fractale/README.md

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,45 @@ fractale satisfy ./examples/fractale/jobspec-containment-unsatisfied.yaml
120120
```
121121
We likely want to have a more structured query syntax that can handle AND, OR, and other specifics. The actual search should remain general to support any generic key/value pair of attributes. My database structure and queries are also bad.
122122

123+
## Script Request
124+
125+
A script request is going to:
126+
127+
```console
128+
=> 1. take input (right now jobspec) "what clusters can satisfy this request?
129+
=> 2. subsystem solver "these clusters can"
130+
=> 3. selection plugin (defaults to random) "how many and how do you want to choose from this set?"
131+
=> 4. I have chosen N, transform them appropriately to submit
132+
=> 5. render the matches based on the subsystem
133+
```
134+
135+
Right now we have this done rather manually, and the idea is that an LLM can eventually more elegantly do it.
136+
Here is an example.
137+
138+
```bash
139+
$ fractale script ./examples/fractale/software-curl.yaml
140+
```
141+
```console
142+
=> 🍇 Loading cluster "a" subsystem "containment"
143+
=> 🍇 Loading cluster "a" subsystem "modules"
144+
=> 🍇 Loading cluster "a" subsystem "spack"
145+
=> Exploring cluster "a" containment subsystem
146+
(1/1) satisfied resource core
147+
Cluster "a" is a match
148+
{
149+
│ 'version': 1,
150+
│ 'resources': [{'type': 'slot', 'count': 1, 'with': [{'type': 'core', 'count': 1}], 'label': 'task'}],
151+
│ 'tasks': [{'command': ['gmx'], 'slot': 'task', 'count': {'per_slot': 1}}],
152+
│ 'attributes': {
153+
│ │ 'system': {
154+
│ │ │ 'duration': 0,
155+
│ │ │ 'requires': {'software': [{'name': 'curl', 'type': 'binary'}]},
156+
│ │ │ 'files': {'batch-script': {'mode': 33216, 'data': '#!/bin/bash\n\nspack load curl\ngmx', 'encoding': 'utf-8'}}
157+
│ │ }
158+
│ }
159+
}
160+
```
161+
123162
## Save
124163

125164
We can save an image of our subystem for a cluster. E.g.,

fractale/cli/__init__.py

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -89,8 +89,22 @@ def get_parser():
8989
formatter_class=argparse.RawTextHelpFormatter,
9090
description="determine clusters that satisfy a jobspec based on user subsystems",
9191
)
92-
satisfy.add_argument("jobspec", help="jobspec yaml or json file")
93-
for cmd in [satisfy, save]:
92+
# Generate a jobspec script
93+
script = subparsers.add_parser(
94+
"script",
95+
formatter_class=argparse.RawTextHelpFormatter,
96+
description="generate a batch script after satisfy",
97+
)
98+
script.add_argument(
99+
"--selector", help="selection algorithm to use", default="random", choices=["random"]
100+
)
101+
script.add_argument(
102+
"--transformer", help="transformer to use", default="flux", choices=["flux"]
103+
)
104+
for cmd in [satisfy, script]:
105+
cmd.add_argument("jobspec", help="jobspec yaml or json file")
106+
107+
for cmd in [satisfy, save, script]:
94108
cmd.add_argument(
95109
"--solver",
96110
help="subsystem solved backend",
@@ -148,6 +162,8 @@ def help(return_code=0):
148162
from .generate_subsystem import main
149163
elif args.command == "satisfy":
150164
from .satisfy import main
165+
elif args.command == "script":
166+
from .script import main
151167
elif args.command == "save":
152168
from .save import main
153169
else:

fractale/selector/__init__.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
import fractale.selector.algorithms as algorithms
2+
3+
plugins = {"random": algorithms.random_selection}
4+
5+
6+
def get_selector(name):
7+
if name not in plugins:
8+
raise ValueError(f"{name} is not a valid selection algorithm.")
9+
return plugins[name]

fractale/selector/algorithms.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
import random
2+
3+
# Selection algorithms for choosing clusters
4+
# Each selection algorithm should return a list
5+
6+
7+
def random_selection(choices):
8+
"""
9+
Select randomly
10+
"""
11+
return [random.choice(choices)]

fractale/subsystem/match.py

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
import json
2+
from dataclasses import dataclass
3+
4+
5+
@dataclass
6+
class Match:
7+
"""
8+
A single match is a datum that goes into a match set.
9+
"""
10+
11+
cluster: str
12+
subsystem: str
13+
requires: dict
14+
details: dict
15+
16+
17+
class MatchSet:
18+
"""
19+
A MatchSet includes one or more matches of clusters-> subsystems to a spec
20+
"""
21+
22+
def __init__(self):
23+
# Lookup by cluster, subsystem, and then requirements and matches.
24+
# This needs to be enough to return to the called and generate
25+
# templates to submit jobs.
26+
self.matches = {}
27+
28+
@property
29+
def count(self):
30+
"""
31+
Return the number of cluster matches.
32+
"""
33+
return len(self.matches.keys())
34+
35+
@property
36+
def clusters(self):
37+
"""
38+
Return a list of clusters.
39+
"""
40+
return list(self.matches)
41+
42+
def all(self):
43+
"""
44+
Custom function to iterate over matches
45+
"""
46+
return list(self.iterset())
47+
48+
def remove(self, cluster):
49+
"""
50+
Remove a cluster from a match.
51+
"""
52+
if cluster in self.matches:
53+
del self.matches[cluster]
54+
55+
def iterset(self):
56+
"""
57+
Custom function to iterate over matches
58+
"""
59+
for cluster, by_subsystem in self.matches.items():
60+
for subsystem, matches in by_subsystem.items():
61+
for match in matches:
62+
yield match
63+
64+
def add(self, cluster, subsystem, requires, details):
65+
"""
66+
Add a match, including cluster, subsystem, requirements, and details.
67+
"""
68+
new_match = Match(cluster, subsystem, requires, details)
69+
if cluster not in self.matches:
70+
self.matches[cluster] = {}
71+
if subsystem not in self.matches[cluster]:
72+
self.matches[cluster][subsystem] = []
73+
# this could be given directly, but I don't want to assume
74+
# that a cluster and subsystem only has one possible match.
75+
self.matches[cluster][subsystem].append(new_match)

fractale/subsystem/solver/base.py

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
1+
import json
12
import os
23

34
import fractale.jobspec as jspec
45
import fractale.utils as utils
56
from fractale.logger import LogColors, logger
7+
from fractale.subsystem.match import MatchSet
68

79

810
class Solver:
@@ -32,14 +34,20 @@ def prepare_requirements(self, jobspec):
3234
requires["containment"] = jspec.flatten_jobspec_resources(js)
3335
return requires
3436

35-
def satisfied(self, jobspec):
37+
def render(self, subsystems):
38+
"""
39+
Take in a set of cluster matches and
40+
"""
41+
return []
42+
43+
def satisfied(self, jobspec, return_results=False):
3644
"""
3745
Determine if a jobspec is satisfied by user-space subsystems.
3846
"""
3947
requires = self.prepare_requirements(jobspec)
4048

4149
# These clusters will satisfy the request
42-
matches = set()
50+
matches = MatchSet()
4351

4452
# We don't care about the association with tasks - the requires are matching clusters to entire jobs
4553
# We could optimize this to be fewer queries, but it's likely trivial for now
@@ -71,12 +79,15 @@ def satisfied(self, jobspec):
7179
nodes = self.find_nodes(cluster, name, items)
7280
if not nodes:
7381
continue
74-
matches.add((cluster, name))
82+
# This is adding cluster, subsystem name, match criteria, and node ids
83+
matches.add(cluster, name, items, nodes)
7584

7685
if matches:
77-
print(f"\n{LogColors.OKBLUE}({len(matches)}) Matches {LogColors.ENDC}")
78-
for match in matches:
79-
print(f"cluster ({match[0]}) subsystem ({match[1]})")
86+
print(f"\n{LogColors.OKBLUE}({matches.count}) Matches {LogColors.ENDC}")
87+
for match in matches.iterset():
88+
print(f"cluster ({match.cluster}) subsystem ({match.subsystem})")
89+
if return_results:
90+
return matches
8091
return True
8192
else:
8293
print(f"{LogColors.RED}=> No Matches{LogColors.ENDC}")
@@ -85,9 +96,14 @@ def satisfied(self, jobspec):
8596
def load(self, path):
8697
"""
8798
Load a group of subsystem files, typically json JGF.
99+
100+
We also are careful to store metadata here that might be needed for
101+
rendering.
88102
"""
89103
from fractale.subsystem.subsystem import Subsystem
90104

105+
self.metadata = {}
106+
91107
if not os.path.exists(path):
92108
raise ValueError(f"User subsystem directory {path} does not exist.")
93109
files = utils.recursive_find(path, "graph[.]json")
@@ -96,3 +112,4 @@ def load(self, path):
96112
for filename in files:
97113
new_subsystem = Subsystem(filename)
98114
self.load_subsystem(new_subsystem)
115+
self.metadata[new_subsystem.name] = new_subsystem.metadata

fractale/subsystem/solver/database.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,27 @@ def get_subsystem_nodes(self, cluster, subsystem):
117117
labels = self.query(statement)
118118
return [f"'{x[0]}'" for x in labels]
119119

120+
def render(self, subsystems):
121+
"""
122+
Yield lines for the transformer.
123+
"""
124+
for subsystem, items in subsystems.items():
125+
for item in items:
126+
# This is actually easier to do than a query!
127+
if subsystem == "spack":
128+
for require in item.requires:
129+
item_type = require.get("type")
130+
item_name = require.get("name")
131+
if item_type == "binary" and item_name is not None:
132+
yield f"\nspack load {item_name}"
133+
134+
elif subsystem == "environment-modules":
135+
item_type = require.get("type")
136+
# TODO we need to test if this will with with <.>
137+
item_name = require.get("attribute.name")
138+
if item_type == "module" and item_name is not None:
139+
yield f"\nmodule load {item_name}"
140+
120141
def find_nodes(self, cluster, name, items):
121142
"""
122143
Given a list of node labels, find children (attributes)

0 commit comments

Comments
 (0)