add support for launcher (#33)

vsoch · web-flow · commit 979b544ac2d1 · 2022-11-21T21:52:50.000-07:00
* add support for launcher

now that we have flux executors within workflow
managers, launching from a single job (e.g., in
the docker container) is likely not going to work.
We introduce here the idea of a launcher, or a known
command to run on the server, e.g., "nextflow" that
is able to launch flux jobs that are equivalently
owned by the instance. Currently the one issue
with this approach is the launcher writing to
output file instead of stdout/stderr, but I have
an issue open to figure out how to handle this.
This PR will also fix a current set of bugs with
parsing the checkbox (boolean) form fields - the
name attribute was in the wrong spot, doh.

Signed-off-by: vsoch &lt;vsoch@users.noreply.github.com&gt;
diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml
@@ -4,12 +4,25 @@ on:
   pull_request: []
 
 jobs:
+  prepare-container:
+    runs-on: ubuntu-latest
+    outputs:
+      branch: ${{ steps.extract_branch.outputs.branch }}
+    steps:
+    - name: Extract branch name
+      run: echo "branch=$(echo ${GITHUB_REF#refs/heads/})" >> $GITHUB_OUTPUT
+      id: extract_branch
+
   test:
     runs-on: ubuntu-latest
+    needs: [prepare-container]
     container:
       image: ghcr.io/flux-framework/flux-restful-api:latest
       ports:
        - 5000
+      env:
+        INSTALL_BRANCH: ${{ needs.prepare-container.outputs.branch }}
+        INSTALL_REPO: ${{ github.repository }}
     steps:
     - uses: actions/checkout@v3
     - name: Install Dependencies (in case changes)
diff --git a/app/core/config.py b/app/core/config.py
@@ -41,5 +41,9 @@ class Settings(BaseSettings):
     flux_token: str = os.environ.get("FLUX_TOKEN")
     require_auth: bool = get_bool_envar("FLUX_REQUIRE_AUTH")
 
+    # If the user requests a launcher, be strict.
+    # We only allow nextflow and snakemake, sorry
+    known_launchers: list = ["nextflow", "snakemake"]
+
 
 settings = Settings()
diff --git a/app/forms.py b/app/forms.py
@@ -17,6 +17,8 @@ def __init__(self, request: Request):
         self.cores_per_task: Optional[int] = None
         self.gpus_per_task: Optional[int] = None
         self.exclusive: Optional[bool] = False
+        self.is_launcher: Optional[bool] = False
+        self.exclusive: Optional[bool] = False
 
     # STOPPED HERE - serialize in jquery from form, submit as application/json.
     async def load_data(self):
@@ -28,28 +30,31 @@ async def load_data(self):
         self.runtime = form.get("runtime") or 0
         self.cores_per_task = form.get("cores_per_task")
         self.gpus_per_task = form.get("gpus_per_task")
-        self.exclusive = form.get("exclusive")
+        self.exclusive = True if form.get("exclusive") == "on" else False
+        self.is_launcher = True if form.get("is_launcher") == "on" else False
 
     @property
     def kwargs(self):
         """
         Prepared key value dictionary of items.
         """
         kwargs = {}
-        for key in [
-            "command",
-            "num_tasks",
-            "num_nodes",
-            "cores_per_task",
-            "gpus_per_task",
-            "exclusive",
-        ]:
+        as_int = ["num_tasks", "num_nodes", "cores_per_task", "gpus_per_task"]
+        as_bool = ["exclusive", "is_launcher"]
+        for key in as_int + as_bool + ["command"]:
             if getattr(self, key, None) is not None:
                 value = getattr(self, key)
                 # Form could submit an empty value
                 if value == "":
                     continue
-                kwargs[key] = value
+
+                # Parse as integer
+                if key in as_int:
+                    kwargs[key] = int(value)
+                elif key in as_bool:
+                    kwargs[key] = True
+                else:
+                    kwargs[key] = value
         return kwargs
 
     def is_valid(self):
diff --git a/app/library/flux.py b/app/library/flux.py
@@ -23,7 +23,7 @@ def validate_submit_kwargs(kwargs, envars=None, runtime=None):
 
     # We can't ask for more nodes than available!
     num_nodes = kwargs.get("num_nodes")
-    if num_nodes and num_nodes > settings.flux_nodes:
+    if num_nodes and int(num_nodes) > settings.flux_nodes:
         errors.append(
             f"The server only has {settings.flux_nodes} nodes, you requested {num_nodes}"
         )
diff --git a/app/library/launcher.py b/app/library/launcher.py
@@ -0,0 +1,40 @@
+import os
+import shlex
+import subprocess
+
+from app.core.config import settings
+
+
+def launch(kwargs, workdir=None, envars=None):
+    """
+    Launch a job with a known launcher
+    """
+    envars = envars or {}
+
+    # Generate the flux job
+    command = kwargs["command"]
+    if isinstance(command, str):
+        command = shlex.split(command)
+    print(f"⭐️ Command being submit: {command}")
+
+    # We don't allow commands willy nilly
+    if command[0] not in settings.known_launchers:
+        return f"{command[0]} is not a known launcher. "
+
+    # Delete command from the kwargs (we added because is required and validated that way)
+    del kwargs["command"]
+
+    # Additional envars in the payload?
+    environment = dict(os.environ)
+    environment.update(envars)
+
+    print(f"⭐️ Workdir provided: {workdir}")
+
+    # Submit using subprocess (we can see output in terminal, if any)
+    try:
+        subprocess.Popen(
+            command, cwd=workdir, env=environment, stdout=None, stderr=None, stdin=None
+        )
+    except Exception as e:
+        return str(e)
+    return "Job submit, see jobs table for spawned jobs."
diff --git a/app/routers/api.py b/app/routers/api.py
@@ -10,6 +10,7 @@
 
 import app.library.flux as flux_cli
 import app.library.helpers as helpers
+import app.library.launcher as launcher
 from app.core.config import settings
 from app.library.auth import alert_auth, check_auth
 
@@ -152,15 +153,17 @@ async def submit_job(request: Request):
         kwargs[required] = payload.get(required)
 
     # Optional arguments
-    for optional in [
-        "num_tasks",
-        "cores_per_task",
-        "gpus_per_task",
-        "num_nodes",
-        "exclusive",
-    ]:
+    as_int = ["num_tasks", "cores_per_task", "gpus_per_task", "num_nodes"]
+    as_bool = ["exclusive"]
+
+    for optional in as_int + as_bool:
         if optional in payload and payload[optional]:
-            kwargs[optional] = payload[optional]
+            if optional in as_bool:
+                kwargs[optional] = bool(payload[optional])
+            elif optional in as_int:
+                kwargs[optional] = int(payload[optional])
+            else:
+                kwargs[optional] = payload[optional]
 
     # One off args not provided to JobspecV1
     envars = payload.get("envars", {})
@@ -177,22 +180,28 @@ async def submit_job(request: Request):
             status_code=400,
         )
 
-    # Prepare the flux job!
-    fluxjob = flux_cli.prepare_job(
-        kwargs, runtime=runtime, workdir=workdir, envars=envars
-    )
-
-    # Submit the job and return the ID, but allow for error
-    try:
-        flux_future = flux.job.submit_async(app.handle, fluxjob)
-    except Exception as e:
-        result = jsonable_encoder(
-            {"Message": "There was an issue submitting that job.", "Error": str(e)}
+    # Are we using a launcher instead?
+    is_launcher = payload.get("is_launcher", False)
+    if is_launcher:
+        message = launcher.launch(kwargs, workdir=workdir, envars=envars)
+        result = jsonable_encoder({"Message": message, "id": "MANY"})
+    else:
+        # Prepare the flux job!
+        fluxjob = flux_cli.prepare_job(
+            kwargs, runtime=runtime, workdir=workdir, envars=envars
         )
-        return JSONResponse(content=result, status_code=400)
-
-    jobid = flux_future.get_id()
-    result = jsonable_encoder({"Message": "Job submit.", "id": jobid})
+        # Submit the job and return the ID, but allow for error
+        try:
+            flux_future = flux.job.submit_async(app.handle, fluxjob)
+        except Exception as e:
+            result = jsonable_encoder(
+                {"Message": "There was an issue submitting that job.", "Error": str(e)}
+            )
+            return JSONResponse(content=result, status_code=400)
+        jobid = flux_future.get_id()
+        result = jsonable_encoder({"Message": "Job submit.", "id": jobid})
+
+    # If we get down here, either launcher derived or submit
     return JSONResponse(content=result, status_code=200)
 
 
diff --git a/app/routers/views.py b/app/routers/views.py
@@ -6,6 +6,7 @@
 
 import app.library.flux as flux_cli
 import app.library.helpers as helpers
+import app.library.launcher as launcher
 from app.core.config import settings
 from app.forms import SubmitForm
 from app.library.auth import check_auth
@@ -41,7 +42,6 @@ async def home(request: Request):
 @auth_views_router.get("/jobs", response_class=HTMLResponse)
 async def jobs_table(request: Request):
     jobs = list(flux_cli.list_jobs_detailed().values())
-    print(jobs)
     return templates.TemplateResponse(
         "jobs/jobs.html",
         {
@@ -71,7 +71,6 @@ async def job_info(request: Request, jobid, msg=None):
     # Otherwise ensure we get all the logs!
     else:
         info = flux_cli.get_job_output(jobid, delay=1)
-
     return templates.TemplateResponse(
         "jobs/job.html",
         {
@@ -111,36 +110,58 @@ async def submit_job_post(request: Request):
     """
     from app.main import app
 
+    messages = []
     form = SubmitForm(request)
     await form.load_data()
     if form.is_valid():
         print("🍦 Submit form is valid!")
         print(form.kwargs)
 
-        # Prepare the flux job! We don't support envars here yet
-        fluxjob = flux_cli.prepare_job(
-            form.kwargs, runtime=form.runtime, workdir=form.workdir
-        )
-
-        # Submit the job and return the ID, but allow for error
-        try:
-            flux_future = flux.job.submit_async(app.handle, fluxjob)
-            jobid = flux_future.get_id()
-            intid = int(jobid)
-            return templates.TemplateResponse(
-                "jobs/submit.html",
-                context={
-                    "request": request,
-                    "form": form,
-                    "messages": [
-                        f"Your job was successfully submit! 🦊 <a target='_blank' style='color:magenta' href='/job/{intid}'>{jobid}</a>"
-                    ],
-                },
-            )
-        except Exception as e:
-            form.errors.append("There was an issue submitting that job: %s" % str(e))
+        if form.kwargs.get("is_launcher") is True:
+            messages.append(launcher.launch(form.kwargs, workdir=form.workdir))
+        else:
+            return submit_job_helper(request, app, form)
     else:
         print("🍒 Submit form is NOT valid!")
+    return templates.TemplateResponse(
+        "jobs/submit.html",
+        context={
+            "request": request,
+            "form": form,
+            "messages": messages,
+            "has_gpus": settings.has_gpus,
+            **form.__dict__,
+        },
+    )
+
+
+def submit_job_helper(request, app, form):
+    """
+    A helper to submit a flux job (not a launcher)
+    """
+
+    # Prepare the flux job! We don't support envars here yet
+    fluxjob = flux_cli.prepare_job(
+        form.kwargs, runtime=form.runtime, workdir=form.workdir
+    )
+
+    # Submit the job and return the ID, but allow for error
+    try:
+        flux_future = flux.job.submit_async(app.handle, fluxjob)
+        jobid = flux_future.get_id()
+        intid = int(jobid)
+        message = f"Your job was successfully submit! 🦊 <a target='_blank' style='color:magenta' href='/job/{intid}'>{jobid}</a>"
+        return templates.TemplateResponse(
+            "jobs/submit.html",
+            context={
+                "request": request,
+                "form": form,
+                "messages": [message],
+            },
+        )
+    except Exception as e:
+        form.errors.append("There was an issue submitting that job: %s" % str(e))
+
     return templates.TemplateResponse(
         "jobs/submit.html",
         context={
diff --git a/clients/python/CHANGELOG.md b/clients/python/CHANGELOG.md
@@ -14,6 +14,7 @@ and **Merged pull requests**. Critical items to know are:
 The versions coincide with releases on pip. Only major versions will be released as tags on Github.
 
 ## [0.0.x](https://github.com/flux-framework/flux-restful-api/tree/main) (0.0.x)
+ - support for `is_launcher` parameter to indicate a launcher should be used instead (0.0.14)
  - support for streaming job output (0.0.13)
  - ensure logs end with one newline! (0.0.12)
  - support for job info and logs (0.0.11)
diff --git a/clients/python/flux_restful_client/main/client.py b/clients/python/flux_restful_client/main/client.py
@@ -214,6 +214,8 @@ def submit(self, command, **kwargs):
         gpus_per_task (int): Number of gpus per task (defaults to None)
         num_nodes (int): Number of nodes (defaults to None)
         exclusive (bool): is the job exclusive? (defaults to False)
+        is_launcher (bool): the command should be submit to a launcher.
+        This is currently supported for snakemake and nextflow.
         """
         # Allow the user to provide a list (and stringify everything)
         if isinstance(command, list):
@@ -225,6 +227,7 @@ def submit(self, command, **kwargs):
             "gpus_per_task",
             "num_nodes",
             "exclusive",
+            "is_launcher",
             "workdir",
             "envars",
         ]:
diff --git a/clients/python/flux_restful_client/main/schemas.py b/clients/python/flux_restful_client/main/schemas.py
@@ -46,6 +46,10 @@
         "type": ["boolean", "null"],
         "description": "ask for exclusive nodes for the job.",
     },
+    "is_launcher": {
+        "type": ["boolean", "null"],
+        "description": "indicate the command is for a launcher (e.g., nextflow, snakemake)",
+    },
 }
 
 job_submit_schema = {
diff --git a/clients/python/flux_restful_client/version.py b/clients/python/flux_restful_client/version.py
@@ -1,4 +1,4 @@
-__version__ = "0.0.13"
+__version__ = "0.0.14"
 AUTHOR = "Vanessa Sochat"
 EMAIL = "vsoch@users.noreply.github.com"
 NAME = "flux-restful-client"
diff --git a/docs/getting_started/img/launcher.png b/docs/getting_started/img/launcher.png
diff --git a/docs/getting_started/img/submit.png b/docs/getting_started/img/submit.png
diff --git a/docs/getting_started/interface.md b/docs/getting_started/interface.md
@@ -23,6 +23,13 @@ You likely want to submit jobs, and then see them progress. We first do this via
 
 ![img/submit.png](img/submit.png)
 
+Notice that there is a checkbox for a "launcher" - current launchers supported
+are NextFlow and Snakemake. Essentially, a launcher is going to submit multiple
+Flux jobs for you instead of providing a single command to flux. When you submit
+via a NextFlow launcher, for example, the one command results in many jobs:
+
+![img/launcher.png](img/launcher.png)
+
 
 ### `GET /jobs`
 
diff --git a/docs/getting_started/user-guide.md b/docs/getting_started/user-guide.md
@@ -123,6 +123,7 @@ optional arguments:
   --num_nodes NUM_NODES
                         number of nodes for the job.
   --exclusive           ask for exclusive nodes for the job.
+  --is_launcher         indicate the command is for a launcher (e.g., nextflow, snakemake)
   --env ENVARS          "key=value pairs to provide to the job in the environment (add as many as you need)
                         flux-restful-cli --env PIPELINE_VAR=one ...
                         flux-restful-cli --env SUPERHERO=batman --env SUPERHERO_NAME=manbat ...
diff --git a/templates/jobs/submit.html b/templates/jobs/submit.html
diff --git a/tests/test_api.py b/tests/test_api.py

Original file line number	Diff line number	Diff line change
`@@ -23,7 +23,7 @@ def validate_submit_kwargs(kwargs, envars=None, runtime=None):`
`23`	`23`
`24`	`24`	`# We can't ask for more nodes than available!`
`25`	`25`	`num_nodes = kwargs.get("num_nodes")`
`26`		`- if num_nodes and num_nodes > settings.flux_nodes:`
	`26`	`+ if num_nodes and int(num_nodes) > settings.flux_nodes:`
`27`	`27`	`errors.append(`
`28`	`28`	`f"The server only has {settings.flux_nodes} nodes, you requested {num_nodes}"`
`29`	`29`	`)`