wip: creation of subsystem solver as a plugin

vsoch · vsoch · commit f5dfdf597888 · 2025-04-19T20:48:39.000-06:00
We want solvers for subsystems (what determines if
something is a valid match) to be an interface. E.g.,
the first approach I took was a database, but I also
want to try a python (C++ backed) library that creates
a graph. Mostly because that way would be really fun.

Signed-off-by: vsoch &lt;vsoch@users.noreply.github.com&gt;
diff --git a/examples/fractale/README.md b/examples/fractale/README.md
@@ -25,6 +25,11 @@ fractale generate --cluster A spack /home/vanessa/Desktop/Code/spack
 
 ## Satisfy Request
 
+Satisfy asks two questions:
+
+1. Which clusters have the subsystem resources that I need?
+2. Which clusters have the job resources that I need?
+
 This is the step where we want to say "Run gromacs on 2-4 nodes with these requirements." Since we haven't formalized a way to do that, I'm going to start with a flux jobspec, and then add attributes that can be used to search our subsystems. For example, I generated [software-gromacs.json](software-gromacs.json) with:
 
 ```bash
diff --git a/fractale/cli/__init__.py b/fractale/cli/__init__.py
@@ -8,6 +8,7 @@
 from compspec.plugin.registry import PluginRegistry
 
 import fractale
+import fractale.defaults as defaults
 from fractale.logger import setup_logger
 
 # Generate the plugin registry to add parsers
@@ -80,6 +81,12 @@ def get_parser():
     )
     for cmd in [satisfy]:
         cmd.add_argument("jobspec", help="jobspec yaml or json file")
+        cmd.add_argument(
+            "--backend",
+            help="subsystem solved backend",
+            default=defaults.solver_backend_default,
+            choices=defaults.solver_backends,
+        )
 
     extractors = generate.add_subparsers(
         title="generate",
diff --git a/fractale/cli/satisfy.py b/fractale/cli/satisfy.py
@@ -3,7 +3,7 @@
 import sys
 
 from fractale.store import FractaleStore
-from fractale.subsystem import get_subsystem_registry
+from fractale.subsystem import get_subsystem_solver
 
 
 def main(args, extra, **kwargs):
@@ -12,6 +12,6 @@ def main(args, extra, **kwargs):
     This is a fairly simple (flat) check.
     """
     store = FractaleStore(args.config_dir)
-    registry = get_subsystem_registry(store.clusters_root)
-    is_satisfied = registry.satisfied(args.jobspec)
+    solver = get_subsystem_solver(store.clusters_root, args.backend)
+    is_satisfied = solver.satisfied(args.jobspec)
     sys.exit(0 if is_satisfied else -1)
diff --git a/fractale/defaults.py b/fractale/defaults.py
@@ -1,3 +1,5 @@
 fractale_dir = ".fractale"
 valid_settings = {"sharedfs", "stage"}
 sharedfs = True
+solver_backends = ["database", "graph"]
+solver_backend_default = "database"
diff --git a/fractale/subsystem/__init__.py b/fractale/subsystem/__init__.py
@@ -1,9 +1,9 @@
 import os
 
-from .subsystem import SubsystemRegistry
+from .subsystem import SubsystemSolver
 
 
-def get_subsystem_registry(path):
+def get_subsystem_solver(path, backend="database"):
     """
     Generate a user subsystem registry, where the structure is expected
     to be a set of <cluster>/<subsystem>. For the FractaleStore, this
@@ -13,4 +13,4 @@ def get_subsystem_registry(path):
         raise ValueError(f"Cluster subsystem root {path} does not exist")
 
     # Generate the subsystem registry
-    return SubsystemRegistry(path)
+    return SubsystemSolver(path, backend)
diff --git a/fractale/subsystem/solver/__init__.py b/fractale/subsystem/solver/__init__.py
@@ -0,0 +1,13 @@
+import os
+
+from .database import DatabaseSolver
+
+
+def load_solver(backend, path):
+    """
+    Load the solver backend
+    """
+    if backend == "database":
+        return DatabaseSolver(path)
+
+    raise ValueError(f"Unsupported backend {backend}")
diff --git a/fractale/subsystem/solver/database.py b/fractale/subsystem/solver/database.py
@@ -0,0 +1,244 @@
+import os
+import sqlite3
+
+import fractale.subsystem.queries as queries
+import fractale.utils as utils
+from fractale.logger import LogColors, logger
+
+
+class DatabaseSolver:
+    """
+    A database solver solves for a cluster based on a simple database.
+
+    TODO: we need to have counters or another strategy for containment.
+    """
+
+    def __init__(self, path):
+        self.systems = {}
+        self.conn = sqlite3.connect(":memory:")
+        self.create_tables()
+        self.load(path)
+
+    def __exit__(self):
+        self.close()
+
+    def close(self):
+        self.conn.close()
+
+    def create_tables(self):
+        """
+        Create tables for subsytems, nodes, edges.
+
+        Note that I'm flattening the graph, so edges become attributes for
+        nodes so it's easy to query. This is a reasonable first shot over
+        implementing an actual graph database.
+        """
+        cursor = self.conn.cursor()
+
+        # Only save metadata we absolutely need
+        # Note I'm not saving edges because we don't use
+        # them for anything - we are going to parse them
+        # into node attributes instead.
+        create_sql = [
+            queries.create_subsystem_sql,
+            queries.create_clusters_sql,
+            queries.create_nodes_sql,
+            queries.create_attributes_sql,
+        ]
+        for sql in create_sql:
+            cursor.execute(sql)
+        self.conn.commit()
+
+    def load(self, path):
+        """
+        Load a group of subsystem files, typically json JGF.
+        """
+        from fractale.subsystem.subsystem import Subsystem
+
+        if not os.path.exists(path):
+            raise ValueError(f"User subsystem directory {path} does not exist.")
+        files = utils.recursive_find(path, "graph[.]json")
+        if not files:
+            raise ValueError(f"There are no cluster subsystems defined under root {path}")
+        for filename in files:
+            new_subsystem = Subsystem(filename)
+            self.load_subsystem(new_subsystem)
+
+    def load_subsystem(self, subsystem):
+        """
+        Load a new subsystem to the memory database
+        """
+        cursor = self.conn.cursor()
+
+        # Create the cluster if it doesn't exist
+        values = f"('{subsystem.cluster}')"
+        fields = '("name")'
+        statement = f"INSERT OR IGNORE INTO clusters {fields} VALUES {values}"
+        logger.debug(statement)
+        cursor.execute(statement)
+        self.conn.commit()
+
+        # Create the subsystem - it should error if already exists
+        values = f"('{subsystem.name}', '{subsystem.cluster}', '{subsystem.type}')"
+        fields = '("name", "cluster", "type")'
+        statement = f"INSERT INTO subsystems {fields} VALUES {values}"
+        logger.debug(statement)
+        cursor.execute(statement)
+        self.conn.commit()
+
+        # These are fields to insert a node and attributes
+        node_fields = '("subsystem", "cluster", "label", "type", "basename", "name", "id")'
+
+        # First create all nodes.
+        # for nid, node in subsystem.graph["nodes"].items():
+        #    typ = node["metadata"]["type"]
+        #    basename = node["metadata"]["basename"]
+        #    name = node["metadata"]["name"]
+        #    id = node["metadata"]["id"]
+        #    node_values = f"('{subsystem.name}', '{subsystem.cluster}', '{nid}', '{typ}', '{basename}', '{name}', '{id}')"
+        #    statement = f"INSERT INTO nodes {node_fields} VALUES {node_values}"
+        #    logger.debug(statement)
+        #    cursor.execute(statement)
+
+        # Commit transaction
+        # self.conn.commit()
+        attr_fields = '("cluster", "subsystem", "node", "name", "value")'
+
+        # Now all attributes, and also include type because I'm lazy
+        for nid, node in subsystem.iter_nodes():
+            typ = node["metadata"]["type"]
+            attr_values = f"('{subsystem.cluster}', '{subsystem.name}', '{nid}', 'type', '{typ}')"
+            statement = f"INSERT INTO attributes {attr_fields} VALUES {attr_values}"
+            cursor.execute(statement)
+            for key, value in node["metadata"].get("attributes", {}).items():
+                attr_values = (
+                    f"('{subsystem.cluster}', '{subsystem.name}', '{nid}', '{key}', '{value}')"
+                )
+                statement = f"INSERT INTO attributes {attr_fields} VALUES {attr_values}"
+                cursor.execute(statement)
+
+        # Note that we aren't doing anything with edges currently.
+        self.conn.commit()
+
+    def get_subsystem_nodes(self, cluster, subsystem):
+        """
+        Get nodes of a subsystem and cluster
+
+        Technically we could skip labels, but I'm assuming we eventually want
+        nodes in this query somewhere.
+        """
+        statement = (
+            f"SELECT label from nodes WHERE subsystem = '{subsystem}' AND cluster = '{cluster}';"
+        )
+        labels = self.query(statement)
+        return [f"'{x[0]}'" for x in labels]
+
+    def find_nodes(self, cluster, name, items):
+        """
+        Given a list of node labels, find children (attributes)
+        that have a specific key/value.
+        """
+        # Final nodes that satisfy all item requirements
+        satisfy = set()
+
+        # Each item is a set of requirements for one NODE. If we cannot satisfy one software
+        # requirement the cluster does not match.
+        for item in items:
+            nodes = set()
+            i = 0
+            for key, value in item.items():
+                statement = f"SELECT * from attributes WHERE cluster = '{cluster}' AND subsystem = '{name}' AND name = '{key}' AND value like '{value}';"
+                result = self.query(statement)
+                # We don't have any nodes yet, all are contenders
+                if i == 0:
+                    [nodes.add(x[-1]) for x in result]
+                else:
+                    new_nodes = {x[-1] for x in result}
+                    nodes = nodes.intersection(new_nodes)
+                i += 1
+
+                # If we don't have nodes left, the cluster isn't a match
+                if not nodes:
+                    return
+
+            # If we get down here, we found a matching node for one item requirement
+            [satisfy.add(x) for x in nodes]
+        return satisfy
+
+    def query(self, statement):
+        """
+        Issue a query to the database, returning fetchall.
+        """
+        cursor = self.conn.cursor()
+        printed = statement
+
+        # Don't overwhelm the output!
+        if len(printed) > 150:
+            printed = printed[:150] + "..."
+        printed = f"{LogColors.OKCYAN}{printed}{LogColors.ENDC}"
+        cursor.execute(statement)
+        self.conn.commit()
+
+        # Get results, show query and number of results
+        results = cursor.fetchall()
+        count = (f"{LogColors.PURPLE}({len(results)}){LogColors.ENDC} ").rjust(20)
+        logger.info(count + printed)
+        return results
+
+    def satisfied(self, jobspec):
+        """
+        Determine if a jobspec is satisfied by user-space subsystems.
+        """
+        # This handles json or yaml
+        js = utils.load_jobspec(jobspec)
+
+        requires = js["attributes"].get("system", {}).get("requires")
+        if not requires:
+            logger.exit("Jobspec has no system requirements.")
+
+        # These clusters will satisfy the request
+        matches = set()
+
+        # We don't care about the association with tasks - the requires are matching clusters to entire jobs
+        # We could optimize this to be fewer queries, but it's likely trivial for now
+        for subsystem_type, items in requires.items():
+
+            # Get one or more matching subsystems (top level) for some number of clusters
+            # The subsystem type is like the category (e.g., software)
+            subsystems = self.get_subsystem_by_type(subsystem_type)
+            if not subsystems:
+                continue
+
+            # For each subsystem, since we don't have a query syntax developed, we just look for nodes
+            # that have matching attributes. Each here is a tuple, (name, cluster, type)
+            for subsystem in subsystems:
+                name, cluster, subsystem_type = subsystem
+
+                # "Get nodes in subsystem X" if we have a query syntax we could limit to a type, etc.
+                # In this case, the subsystem is the name (e.g., spack) since we might have multiple for
+                # a type (e.g., software). This returns labels we can associate with attributes.
+                # labels = self.get_subsystem_nodes(cluster, name)
+
+                # "Get attribute key values associated with our search. This is done very stupidly now
+                nodes = self.find_nodes(cluster, name, items)
+                if not nodes:
+                    continue
+                matches.add((cluster, name))
+
+            if matches:
+                print(f"\n{LogColors.OKBLUE}({len(matches)}) Matches {LogColors.ENDC}")
+                for match in matches:
+                    print(f"cluster ({match[0]}) subsystem ({match[1]})")
+                return True
+            else:
+                print(f"{LogColors.RED}=> No Matches{LogColors.ENDC}")
+            return False
+
+    def get_subsystem_by_type(self, subsystem_type, ignore_missing=True):
+        """
+        Get subsystems based on a type. This will return one or more clusters
+        that will be contenders for matching.
+        """
+        # Check 2: the subsystem exists in our database
+        statement = f"SELECT * from subsystems WHERE type = '{subsystem_type}';"
+        return self.query(statement)
diff --git a/fractale/subsystem/subsystem.py b/fractale/subsystem/subsystem.py