Zac-HD
diff --git a/‎src/hypofuzz/bayes.py‎
Lines changed: 209 additions & 58 deletions b/‎src/hypofuzz/bayes.py‎
Lines changed: 209 additions & 58 deletions
diff --git a/‎src/hypofuzz/entrypoint.py‎
Lines changed: 4 additions & 1 deletion b/‎src/hypofuzz/entrypoint.py‎
Lines changed: 4 additions & 1 deletion
@@ -1,90 +1,229 @@
 import math
 from collections.abc import Sequence
-from typing import TYPE_CHECKING, Any
+from dataclasses import dataclass
+from itertools import takewhile
+from random import Random
+from typing import TYPE_CHECKING, Any, Optional, TypeVar
 
 if TYPE_CHECKING:
     from hypofuzz.hypofuzz import FuzzTarget
 
+T = TypeVar("T")
+
+# in the absence of any knowledge about worker lifetimes, assume a worker lives
+# for 5 minutes.
+DEFAULT_EXPECTED_LIFETIME_ESTIMATOR = 60 * 5
+
+
+@dataclass
+class CurrentWorker:
+    nodeids: Sequence[str]
+    e_lifetime: float
+
+
+@dataclass
+class DistributeNodesTarget:
+    nodeid: str
+    rates: "BehaviorRates"
+    e_startup_time: float
+
+
+@dataclass
+class BehaviorRates:
+    # An estimator for the number of behaviors the next input will discover. This
+    # will be between 0 and 1.
+    per_input: float
+    # An estimator for the number of behaviors discovered per second for a target,
+    # assuming one worker is fuzzing this target continuously over that second.
+    per_second: float
+
+
+def _min_values(values: Sequence[T], key: Any) -> Sequence[T]:
+    candidates = sorted(
+        [(value, key(value)) for value in values], key=lambda item: item[1]
+    )
+    min_value = candidates[0][1]
+    return [
+        item[0] for item in takewhile(lambda item: item[1] == min_value, candidates)
+    ]
+
 
 def distribute_nodes(
-    nodeids: Sequence[str], estimators: Sequence[float], *, n: int
+    targets: Sequence[DistributeNodesTarget],
+    *,
+    n: int,
+    current_workers: Optional[Sequence[CurrentWorker]] = None,
 ) -> tuple[tuple[str, ...], ...]:
     # We have x nodes node_i, each with an estimator \hat{v}_i for "behaviors per
-    # second". We have n bins (processes), and we want to distribute node_i
-    # into the n bins such that we maximize overall behaviors per second.
+    # second". We have n bins (worker), and we want to distribute node_i
+    # into the n bins such that we maximize the the sum of worker_behaviors.
+    #
+    # The estimator for the number of behaviors for a worker is given by
+    # e_worker_behaviors. Instead of trying for the optimal solution of maximizing
+    # the sum of worker_behaviors, we instead maximize the smallest worker_behaviors
+    # quantity.
     #
-    # If our estimators \hat{v}_i were static, then this is trivial: the
-    # overall behaviors per second is `sum from i=1 to c of max(p_i)`. Of course,
-    # our estimators are *not* static. Which means we are optimizing over something
-    # more complicated - a set of multi-armed bandit problems perhaps?
+    # This is related to "makespan minimization", and is a classic bin packing
+    # problem. Optimality of this problem is NP complete, so we instead approximate
+    # the optimal solution with a greedy one, specifically a variant of
+    # "longest processing time first scheduling": we first sort the nodes in
+    # increasing order of their estimator. Then, for each mode, we check which
+    # worker has the lowest worker_behaiors, and assign the node to that worker.
+    # Since we are iterating in increasing order of estimator, we know that adding
+    # a node to a worker will increase that worker's worker_behaviors (unless the
+    # worker's scheduling algorithm for targets is literally adversarial, ie
+    # adding a higher-than-average value target decreases its expected behaviors
+    # per second, which we will assume is not the case).
     #
-    # A more naive quantity to maximize (minimize) is the largest bin sum.
-    # So we are minimizing `max(sum(p_i) for i in c)`. This is related to
-    # "makespan minimization", and is a classic bin packing problem. Optimality
-    # is NP complete, so we approximate the optimal solution with a greedy one,
-    # specifically "longest processing time first scheduling".
+    # Optionally, the current assignment `current_workers` of node ids to workers
+    # can be passed. This incorporates an overhead cost to switching a nodeid to a
+    # different worker. The algorithm is the standard bin packing algorithm, but
+    # with a penalty to a node being assigned to a worker other than its current
+    # worker.
     #
-    # (intuitively, we assign the "best" nodes to processes first. So with
-    # e.g. 10 nodes with an estimator of \hat{v}_i = 1 behavior per second (which
-    # is really good!) they would all go to different processes (at least until
-    # the cap of n processes), which is what we want.
-
-    assert len(nodeids) == len(estimators)
-    # estimators of 0 are mathematically valid, but semantically weird, and much
-    # more likely to be indicative of a logic error
-    assert all(estimator > 0 for estimator in estimators)
-
-    bins: list[list[Any]] = [[] for _ in range(n)]
-    nodes = [
-        {"nodeid": nodeid, "estimator": estimator}
-        for nodeid, estimator in zip(nodeids, estimators)
+    # This penalty cost of switching a nodeid between workers is
+    # worker_behaviors_per_second * node_startup_cost_seconds, ie the number of
+    # behaviors we expect to lose by spending time starting up this node.
+
+    random = Random()
+    if current_workers is None:
+        current_workers = [CurrentWorker(nodeids=[], e_lifetime=0.0) for _ in range(n)]
+
+    assert len(current_workers) == n
+
+    # estimators of 0 are mathematically valid, but can lead to bad/pathological
+    # algorithm outcomes
+    assert all(target.rates.per_second > 0 for target in targets)
+    assert all(target.rates.per_input > 0 for target in targets)
+
+    # return partitions in the same iteration order they were passed in current_workers
+    workers: list[dict[str, Any]] = [
+        {"current_worker": worker, "targets": []} for worker in current_workers
     ]
 
-    # first, we sort the node_i in decreasing order by their estimator.
-    nodes.sort(key=lambda node: node["estimator"], reverse=True)  # type: ignore
-
-    # If we have fewer than `n` nodes, we repeat the list of nodes in decreasing
-    # order of their estimator until we reach `n` nodes. This ensures every
-    # processes receives a node (in fact, in this case, exactly one).
-    node_idx = 0
-    while len(nodes) < n:
-        nodes.append(nodes[node_idx])
-        node_idx = (node_idx + 1) % len(nodes)
-
-    # then, we assign each node_i to the partition with the least sum.
-    for node in nodes:
-        smallest_bin = min(
-            bins,
-            key=lambda bin: sum(node["estimator"] for node in bin),
+    # first, we sort the target in increasing order by their estimator.
+    targets = sorted(targets, key=lambda target: target.rates.per_second)
+
+    # If we have fewer than `n` targets, we repeat the list of targets in decreasing
+    # order of their estimator until we reach `n` targets. This ensures every
+    # worker receives at least one target (in fact, in this case, exactly one).
+    target_idx = 0
+    while len(targets) < n:
+        # `targets` are in increasing order, so we index negatively to get
+        # a decreasing order
+        targets.append(targets[-target_idx])
+        target_idx = (target_idx + 1) % len(targets)
+
+    # then, we assign each target to the worker with the worst worker_behaviors.
+    # Since we're iterating over the targets in increasing order of behaviors
+    # per-second, adding a target to a worker will always increase its
+    # worker_behaviors.
+    def worker_score(
+        worker: dict[str, Any], *, target: Optional[DistributeNodesTarget] = None
+    ) -> float:
+        e_lifetime: float = worker["current_worker"].e_lifetime
+        worker_rates = e_worker_rates(
+            target_rates=[target.rates for target in worker["targets"]],
+        )
+        offset = 0.0
+        if target is not None and target.nodeid not in worker["current_worker"].nodeids:
+            # Add a penalty for switching nodes between workers. Since the ordering
+            # quantity is the e_worker_behaviors estimator of lifetime worker
+            # behaviors, we want to allow a node to switch workers if the ev
+            # differential is greater than the number of behaviors we expect to
+            # lose from spending time starting up this worker.
+            #
+            # And the number of behaviors we expect to lose is the behaviors per
+            # second estimator for the worker, times the estimator for the startup
+            # time of this node.
+            #
+            # We are choosing the worker with the lowest score to add this node to,
+            # so if we want to encourage this node to be assigned to its current
+            # worker, we want that worker to have a low score, which means we
+            # want to increase the score of all other workers. So the offset here
+            # should be positive.
+            offset = worker_rates.per_second * target.e_startup_time
+
+        # to avoid crazy rebalancing during the initial startup phase, don't
+        # work with small lifetime estimators
+        e_lifetime = max(e_lifetime, DEFAULT_EXPECTED_LIFETIME_ESTIMATOR)
+        return (worker_rates.per_second * e_lifetime) + offset
+
+    for target in targets:
+        # find all the workers with the minimum value score, and randomly assign
+        # this target to one of them. Normally there won't be ties, and the target
+        # simply goes to the worst worker. But when fuzzing for the first time
+        # (or after a db wipe) where all targets have the same estimators, we
+        # don't want to end in an assignment where one worker is given n - 1 nodes
+        # and the other is given just 1.
+        smallest_workers = _min_values(
+            workers,
+            key=lambda worker: worker_score(worker, target=target),
+        )
+        smallest_worker = random.choice(smallest_workers)
+
+        score_before = worker_score(smallest_worker)
+        smallest_worker["targets"].append(target)
+        # ignore float rounding errors for our invariant check
+        assert worker_score(smallest_worker) - score_before >= -1e-6, (
+            score_before,
+            worker_score(smallest_worker),
         )
-        smallest_bin.append(node)
 
-    return tuple(tuple(node["nodeid"] for node in bin) for bin in bins)
+    return tuple(
+        tuple(target.nodeid for target in worker["targets"]) for worker in workers
+    )
 
 
-# for the behaviors estimators, we should incorporate a lookback across the
+# TODO for the behaviors estimators, we should incorporate a lookback across the
 # history of workers for this test. Give higher weight to newer estimators
 # (proportional to their confidence ie sample size).
 
 
-def behaviors_per_input(target: "FuzzTarget") -> float:
-    # an estimator for the number of behaviors the next input will discover.
+def e_target_rates(target: "FuzzTarget") -> BehaviorRates:
+    # per_input computation
     since = target.provider.since_new_behavior
-    return (1 / since) if since > 0 else 1
+    per_input = (1 / since) if since > 0 else 1
 
-
-def behaviors_per_second(target: "FuzzTarget") -> float:
-    # an estimator for the number of behaviors discovered per second, assuming
-    # one process is fuzzing this target continuously over that second.
-    # This is a simple adjustment of behaviors_per_input for the test runtime.
+    # per_second computation
     ninputs = target.provider.ninputs
     elapsed_time = target.provider.elapsed_time
 
     if elapsed_time == 0:
-        return 1
-
-    inputs_per_second = ninputs / elapsed_time
-    return behaviors_per_input(target) * inputs_per_second
+        per_second = 1.0
+    else:
+        inputs_per_second = ninputs / elapsed_time
+        per_second = per_input * inputs_per_second
+
+    return BehaviorRates(per_input=per_input, per_second=per_second)
+
+
+def e_worker_lifetime(current_lifetime: float) -> float:
+    """
+    An estimator for the total lifetime of a worker.
+    """
+    # We use the doomsday-argument estimator that the total lifetime is twice the
+    # current lifetime. In the future, this could incorporate past worker
+    # lifetimes as well.
+    return current_lifetime * 2
+
+
+def e_worker_rates(*, target_rates: Sequence[BehaviorRates]) -> BehaviorRates:
+    """
+    An estimator for the total number of behaviors that will be discovered by
+    a worker in its lifetime. Derived from the worker_lifetime and
+    behaviors_per_second estimators.
+    """
+    weights = bandit_weights(target_rates)
+    # the expected behavior rates of a worker is
+    # sum(probability * expected_value) for each of its targets.
+    # Note that this is tightly dependent on the sampling algorithm used in
+    # practice by the workers. If that changes (to e.g. thompson sampling), our
+    # estimators will need to change to use the same sampling algorithm as well.
+    return BehaviorRates(
+        per_input=sum(p * rates.per_input for p, rates in zip(weights, target_rates)),
+        per_second=sum(p * rates.per_second for p, rates in zip(weights, target_rates)),
+    )
 
 
 def softmax(values: list[float]) -> list[float]:
@@ -96,3 +235,15 @@ def softmax(values: list[float]) -> list[float]:
 
     total = sum(softmaxed)
     return [value / total for value in softmaxed]
+
+
+def bandit_weights(behavior_rates: Sequence[BehaviorRates]) -> list[float]:
+    """
+    Returns the probability that each target should be chosen, as a solution
+    to the multi-armed-bandit problem.
+    """
+
+    # choose the next target to fuzz with probability equal to the softmax
+    # of its expected value (behaviors per second), aka boltzmann exploration
+    per_second_estimators = [rates.per_second for rates in behavior_rates]
+    return softmax(per_second_estimators)
@@ -161,7 +161,10 @@ def _fuzz_impl(n_processes: int, pytest_args: tuple[str, ...]) -> None:
     if n_processes <= 1:
         # if we only have one process, skip the FuzzWorkerHub abstraction (which
         # would cost a process) and just start a FuzzWorker with constant node_ids
-        shared_state = {"hub_state": {"nodeids": nodeids}, "worker_state": {}}
+        shared_state = {
+            "hub_state": {"nodeids": nodeids},
+            "worker_state": {"nodeids": {}},
+        }
         _start_worker(pytest_args=pytest_args, shared_state=shared_state)
     else:
         hub = FuzzWorkerHub(