HTEX handles pre-serialized submissions (#3983)

khk-globus · web-flow · commit a2ca5aded084 · 2025-10-06T21:06:26.000Z
# Description

Allow an interested party to utilize a custom task executor with HTEX,
rather than the one provided by Parsl. One example might be if a task is
pre-serialized in a custom manner, the default `execute_task()` function
will not know how to deserialize and run it. As of this PR, the HTEX
implements the context-variable keys `resource_spec` and
`task_executor`. The `resource_spec` is nominally well-understood at
this point. The `task_executor` is documented in the `submit_payload`
docstring. An example, borrowed from that documentation:

```python
&gt;&gt;&gt; htex: HighThroughputExecutor  # setup prior to this example
&gt;&gt;&gt; ctxt = {
...   "task_executor": {
...     "f": "full.import.path.of.custom_execute_task",
...     "a": ("additional", "arguments"),
...     "k": {"some": "keyword", "args": "here"}
...   }
... }
&gt;&gt;&gt; fn_buf = custom_serialize(task_func, *task_args, **task_kwargs)
&gt;&gt;&gt; fut = htex.submit_payload(ctxt, fn_buf)
```

The custom ``custom_execute_task`` would be dynamically imported, and
invoked within the `process_worker_pool.py` worker as if:

```python
from full.import.path.of import custom_execute_task

args = ("additional", "arguments")
kwargs = {"some": "keyword", "args": "here"}
result = custom_execute_task(fn_buf, *args, **kwargs)
```

# Changed Behaviour

There should be no change to existing workflows, but new workflows may
be able to use `submit_payload` to fine-tune how tasks are executed
within the worker.

## Type of change

- New feature
diff --git a/parsl/executors/high_throughput/executor.py b/parsl/executors/high_throughput/executor.py
@@ -697,20 +697,11 @@ def submit(self, func: Callable, resource_specification: dict, *args, **kwargs)
 
         self.validate_resource_spec(resource_specification)
 
-        if self.bad_state_is_set:
-            raise self.executor_exception
-
-        self._task_counter += 1
-        task_id = self._task_counter
-
         # handle people sending blobs gracefully
         if logger.getEffectiveLevel() <= logging.DEBUG:
             args_to_print = tuple([ar if len(ar := repr(arg)) < 100 else (ar[:100] + '...') for arg in args])
             logger.debug("Pushing function {} to queue with args {}".format(func, args_to_print))
 
-        fut = HTEXFuture(task_id)
-        self.tasks[task_id] = fut
-
         try:
             fn_buf = pack_apply_message(func, args, kwargs, buffer_threshold=1 << 20)
         except TypeError:
@@ -720,12 +711,69 @@ def submit(self, func: Callable, resource_specification: dict, *args, **kwargs)
         if resource_specification:
             context["resource_spec"] = resource_specification
 
-        msg = {"task_id": task_id, "context": context, "buffer": fn_buf}
+        return self.submit_payload(context, fn_buf)
+
+    def submit_payload(self, context: dict, buffer: bytes) -> HTEXFuture:
+        """
+        Submit specially crafted payloads.
+
+        For use-cases where the ``HighThroughputExecutor`` consumer needs the payload
+        handled by the worker in a special way.  For example, if the function is
+        serialized differently than Parsl's default approach, or if the task must
+        be setup more precisely than Parsl's default ``execute_task`` allows.
+
+        An example interaction:
+
+        .. code-block: python
+
+            >>> htex: HighThroughputExecutor  # setup prior to this example
+            >>> ctxt = {
+            ...   "task_executor": {
+            ...     "f": "full.import.path.of.custom_execute_task",
+            ...     "a": ("additional", "arguments"),
+            ...     "k": {"some": "keyword", "args": "here"}
+            ...   }
+            ... }
+            >>> fn_buf = custom_serialize(task_func, *task_args, **task_kwargs)
+            >>> fut = htex.submit_payload(ctxt, fn_buf)
+
+        The custom ``custom_execute_task`` would be dynamically imported, and
+        invoked as:
+
+        .. code-block: python
+
+            args = ("additional", "arguments")
+            kwargs = {"some": "keyword", "args": "here"}
+            result = custom_execute_task(fn_buf, *args, **kwargs)
+
+        Parameters
+        ----------
+        context:
+            A task-specific context associated with the function buffer.  Parsl
+            currently implements the keys ``task_executor`` and ``resource_spec``
+
+        buffer:
+            A serialized function, that will be deserialized and executed by
+            ``execute_task`` (or custom function, if ``task_executor`` is specified)
+
+        Returns
+        -------
+        An HTEXFuture (a normal Future, with the attribute ``.parsl_executor_task_id``
+        set).  The future will be set to done when the associated function buffer has
+        been invoked and completed.
+        """
+        if self.bad_state_is_set:
+            raise self.executor_exception
+
+        self._task_counter += 1
+        task_id = self._task_counter
+
+        fut = HTEXFuture(task_id)
+        self.tasks[task_id] = fut
 
-        # Post task to the outgoing queue
+        msg = {"task_id": task_id, "context": context, "buffer": buffer}
         self.outgoing_q.put(msg)
 
-        # Return the future
         return fut
 
     @property
diff --git a/parsl/executors/high_throughput/process_worker_pool.py b/parsl/executors/high_throughput/process_worker_pool.py
@@ -1,6 +1,7 @@
 #!/usr/bin/env python3
 
 import argparse
+import importlib
 import logging
 import math
 import multiprocessing
@@ -17,7 +18,7 @@
 from multiprocessing.context import SpawnProcess
 from multiprocessing.managers import DictProxy
 from multiprocessing.sharedctypes import Synchronized
-from typing import Dict, List, Optional, Sequence
+from typing import Callable, Dict, List, Optional, Sequence
 
 import psutil
 import zmq
@@ -778,8 +779,20 @@ def manager_is_alive():
 
         _init_mpi_env(mpi_launcher=mpi_launcher, resource_spec=res_spec)
 
+        exec_func: Callable = execute_task
+        exec_args = ()
+        exec_kwargs = {}
+
         try:
-            result = execute_task(req['buffer'])
+            if task_executor := ctxt.get("task_executor", None):
+                mod_name, _, fn_name = task_executor["f"].rpartition(".")
+                exec_mod = importlib.import_module(mod_name)
+                exec_func = getattr(exec_mod, fn_name)
+
+                exec_args = task_executor.get("a", ())
+                exec_kwargs = task_executor.get("k", {})
+
+            result = exec_func(req['buffer'], *exec_args, **exec_kwargs)
             serialized_result = serialize(result, buffer_threshold=1000000)
         except Exception as e:
             logger.info('Caught an exception: {}'.format(e))
diff --git a/parsl/tests/test_htex/test_htex.py b/parsl/tests/test_htex/test_htex.py
@@ -7,6 +7,7 @@
 import pytest
 
 from parsl import HighThroughputExecutor, curvezmq
+from parsl.serialize.facade import pack_apply_message, unpack_apply_message
 
 _MOCK_BASE = "parsl.executors.high_throughput.executor"
 
@@ -19,11 +20,16 @@ def encrypted(request: pytest.FixtureRequest):
 
 
 @pytest.fixture
-def htex(encrypted: bool):
+def htex(encrypted: bool, tmpd_cwd):
     htex = HighThroughputExecutor(encrypted=encrypted)
+    htex.max_workers_per_node = 1
+    htex.run_dir = tmpd_cwd
+    htex.provider.script_dir = tmpd_cwd
 
     yield htex
 
+    if hasattr(htex, "outgoing_q"):
+        htex.scale_in(blocks=1000)
     htex.shutdown()
 
 
@@ -146,3 +152,32 @@ def test_htex_interchange_launch_cmd(cmd: Optional[Sequence[str]]):
     else:
         htex = HighThroughputExecutor()
         assert htex.interchange_launch_cmd == ["interchange.py"]
+
+
+def dyn_exec(buf, *vec_y):
+    f, a, _ = unpack_apply_message(buf)
+    custom_args = [a, vec_y]
+    return f(*custom_args)
+
+
+@pytest.mark.local
+def test_worker_dynamic_import(htex: HighThroughputExecutor):
+    def _dot_prod(vec_x, vec_y):
+        return sum(x * y for x, y in zip(vec_x, vec_y))
+
+    htex.start()
+    htex.scale_out_facade(1)
+
+    num_array = tuple(range(10))
+
+    fn_buf = pack_apply_message(_dot_prod, num_array, {})
+    ctxt = {
+        "task_executor": {
+            "f": f"{dyn_exec.__module__}.{dyn_exec.__name__}",
+            "a": num_array,  # prove "custom" dyn_exec
+        }
+    }
+    val = htex.submit_payload(ctxt, fn_buf).result()
+    exp_val = _dot_prod(num_array, num_array)
+
+    assert val == exp_val
diff --git a/parsl/tests/unit/executors/high_throughput/test_htex.py b/parsl/tests/unit/executors/high_throughput/test_htex.py
@@ -0,0 +1,24 @@
+from unittest import mock
+
+import pytest
+
+from parsl import HighThroughputExecutor
+from parsl.executors.high_throughput import zmq_pipes
+
+
+@pytest.mark.local
+def test_submit_payload():
+    htex = HighThroughputExecutor()
+    htex.outgoing_q = mock.Mock(spec=zmq_pipes.TasksOutgoing)
+    ctxt = {"some": "context"}
+    buf = b'some buffer (function) payload'
+    for task_num in range(1, 20):
+        htex.outgoing_q.reset_mock()
+        fut = htex.submit_payload(ctxt, buf)
+        (msg,), _ = htex.outgoing_q.put.call_args
+
+        assert htex.tasks[fut.parsl_executor_task_id] is fut
+        assert fut.parsl_executor_task_id == task_num, "Expect monotonic increase"
+        assert msg["task_id"] == fut.parsl_executor_task_id
+        assert msg["context"] == ctxt, "Expect no modification"
+        assert msg["buffer"] == buf, "Expect no modification"
diff --git a/parsl/tests/unit/executors/high_throughput/test_process_worker_pool.py b/parsl/tests/unit/executors/high_throughput/test_process_worker_pool.py
@@ -1,9 +1,16 @@
+import os
+import pickle
 import sys
 from argparse import ArgumentError
+from unittest import mock
 
 import pytest
 
+from parsl.app.errors import RemoteExceptionWrapper
 from parsl.executors.high_throughput import process_worker_pool
+from parsl.executors.high_throughput.process_worker_pool import worker
+from parsl.multiprocessing import SpawnContext
+from parsl.serialize.facade import deserialize
 
 if sys.version_info < (3, 12):
     # exit_on_error bug; see https://github.com/python/cpython/issues/121018
@@ -72,3 +79,114 @@ def test_arg_parser_validates_cpu_affinity(valid, val):
         with pytest.raises(ArgumentError) as pyt_exc:
             p.parse_args(reqd_args)
         assert "must be one of" in pyt_exc.value.args[1]
+
+
+def _always_raise(*a, **k):
+    raise ArithmeticError(f"{a=}\n{k=}")
+
+
+@pytest.mark.local
+def test_worker_dynamic_import_happy_path(tmpd_cwd):
+    import_str = f"{_always_raise.__module__}.{_always_raise.__name__}"
+    task_exec = {
+        "f": import_str,
+        "a": (1, 2),
+        "k": {"a": "b"},
+    }
+    req = {
+        "task_id": 15,
+        "context": {"task_executor": task_exec},
+        "buffer": b"some serialized value"
+    }
+
+    try:
+        task_args = [req["buffer"]]
+        task_args.extend(task_exec["a"])
+        _always_raise(*task_args, **task_exec["k"])
+    except Exception as e:
+        exp_exc = e
+    else:
+        raise RuntimeError("Test failure; this branch should not run")
+
+    q = mock.Mock(side_effect=(req, MemoryError("intentional test error")))
+    q.get = q
+
+    block_id = "bid"
+    worker_id = 1
+    pool = 1
+    (tmpd_cwd / f"block-{block_id}/{worker_id}").mkdir(parents=True)
+    with pytest.raises(MemoryError):
+        worker(
+            worker_id,
+            pool_id=str(pool),
+            pool_size=pool,
+            task_queue=q,
+            result_queue=q,
+            monitoring_queue=None,
+            ready_worker_count=SpawnContext.Value("i", 0),
+            tasks_in_progress={},
+            cpu_affinity="none",
+            accelerator=None,
+            block_id=block_id,
+            task_queue_timeout=0,
+            manager_pid=os.getpid(),
+            logdir=str(tmpd_cwd),
+            debug=True,
+            mpi_launcher="",
+        )
+    (result_pkl,), _ = q.put.call_args
+    r = pickle.loads(result_pkl)
+    assert "exception" in r
+    wrapped_exc: RemoteExceptionWrapper = deserialize(r["exception"])
+    exc = wrapped_exc.get_exception()
+    assert isinstance(exc, type(exp_exc)), "Approximate equality"
+    assert str(exp_exc) == str(exc), "Approximate equality; all args, kwargs conveyed"
+
+
+@pytest.mark.local
+def test_worker_bad_dynamic_import(tmpd_cwd):
+    req = {
+        "task_id": 15,
+        "context": {
+            "task_executor": {
+                "f": "parsl.some.not_existing.module.__nope",
+                "a": (1, 2),
+                "k": {"a": "b"},
+            },
+        },
+        "buffer": b"some serialized value"
+    }
+
+    q = mock.Mock(side_effect=(req, MemoryError("intentional test error")))
+    q.get = q
+
+    block_id = "bid"
+    worker_id = 1
+    pool = 1
+    (tmpd_cwd / f"block-{block_id}/{worker_id}").mkdir(parents=True)
+    with pytest.raises(MemoryError):
+        worker(
+            worker_id,
+            pool_id=str(pool),
+            pool_size=pool,
+            task_queue=q,
+            result_queue=q,
+            monitoring_queue=None,
+            ready_worker_count=SpawnContext.Value("i", 0),
+            tasks_in_progress={},
+            cpu_affinity="none",
+            accelerator=None,
+            block_id=block_id,
+            task_queue_timeout=0,
+            manager_pid=os.getpid(),
+            logdir=str(tmpd_cwd),
+            debug=True,
+            mpi_launcher="",
+        )
+    (result_pkl,), _ = q.put.call_args
+    r = pickle.loads(result_pkl)
+    assert "exception" in r
+    wrapped_exc: RemoteExceptionWrapper = deserialize(r["exception"])
+    exc = wrapped_exc.get_exception()
+    assert isinstance(exc, ModuleNotFoundError)
+    assert "No module named" in str(exc)