Merge pull request #236 from Zac-HD/patching

Liam-DeVoe · web-flow · commit 70394862c29a · 2025-09-02T16:21:33.000-04:00
Fix and improve patch computation
diff --git a/src/hypofuzz/dashboard/patching.py b/src/hypofuzz/dashboard/patching.py
@@ -1,33 +1,49 @@
 import threading
 from collections import defaultdict
-from functools import lru_cache
+from collections.abc import Sequence
 from queue import Empty, Queue
-from typing import Any, Literal, Optional
+from typing import TYPE_CHECKING, Any, Literal, Optional
 
-from hypothesis.extra._patching import (
-    get_patch_for as _get_patch_for,
-    make_patch as _make_patch,
-)
+from hypothesis.extra._patching import get_patch_for, make_patch as _make_patch
+from sortedcontainers import SortedList
 
 from hypofuzz import __version__
 from hypofuzz.database import Observation
 
-COVERING_VIA = "covering example"
-FAILING_VIA = "discovered failure"
+if TYPE_CHECKING:
+    from typing import TypeAlias
+
+# we have a two tiered structure.
+# * First, we store the list of test case reprs corresponding to the list of
+#   @examples.
+# * Each time we add a new such input, we compute the new patch for the entire
+#   list.
+
 # nodeid: {
-#   "covering": [(fname, before, after), ...],
-#   "failing": [(fname, before, after), ...],
+#   "covering": list[observation.representation],
+#   "failing": list[observation.representation],
 # }
-# TODO this duplicates the test function contents in `before` and `after`,
-# we probably want a more memory-efficient representation eventually
-# (and a smaller win: map fname to a list of (before, after), instead of storing
-# each fname)
-PATCHES: dict[str, dict[str, list[tuple[str, str, str]]]] = defaultdict(
-    lambda: {"covering": [], "failing": []}
+#
+# We sort by string length, as a heuristic for putting simpler examples first in
+# the patch.
+EXAMPLES: dict[str, dict[str, SortedList[str]]] = defaultdict(
+    lambda: {"covering": SortedList(key=len), "failing": SortedList(key=len)}
 )
-get_patch_for = lru_cache(maxsize=8192)(_get_patch_for)
-
-_queue: Queue = Queue()
+# nodeid: {
+#   "covering": patch,
+#   "failing": patch,
+# }
+PATCHES: dict[str, dict[str, Optional[str]]] = defaultdict(
+    lambda: {"covering": None, "failing": None}
+)
+VIA = {"covering": "covering example", "failing": "discovered failure"}
+COMMIT_MESSAGE = {
+    "covering": "add covering examples",
+    "failing": "add failing examples",
+}
+
+ObservationTypeT: "TypeAlias" = Literal["covering", "failing"]
+_queue: Queue[tuple[Any, str, Observation, ObservationTypeT]] = Queue()
 _thread: Optional[threading.Thread] = None
 
 
@@ -36,51 +52,45 @@ def add_patch(
     test_function: Any,
     nodeid: str,
     observation: Observation,
-    observation_type: Literal["covering", "failing"],
+    observation_type: ObservationTypeT,
 ) -> None:
     _queue.put((test_function, nodeid, observation, observation_type))
 
 
-@lru_cache(maxsize=1024)
-def make_patch(triples: tuple[tuple[str, str, str]], *, msg: str) -> str:
+def make_patch(
+    function: Any, examples: Sequence[str], observation_type: ObservationTypeT
+) -> Optional[str]:
+    via = VIA[observation_type]
+    triple = get_patch_for(function, examples=[(example, via) for example in examples])
+    if triple is None:
+        return None
+
+    commit_message = COMMIT_MESSAGE[observation_type]
     return _make_patch(
-        triples,
-        msg=msg,
+        (triple,),
+        msg=commit_message,
         author=f"HypoFuzz {__version__} <no-reply@hypofuzz.com>",
     )
 
 
-def failing_patch(nodeid: str) -> Optional[str]:
-    failing = PATCHES[nodeid]["failing"]
-    return make_patch(tuple(failing), msg="add failing examples") if failing else None
-
-
-def covering_patch(nodeid: str) -> Optional[str]:
-    covering = PATCHES[nodeid]["covering"]
-    return (
-        make_patch(tuple(covering), msg="add covering examples") if covering else None
-    )
-
-
 def _worker() -> None:
+    # TODO We might optimize this by checking each function ahead of time for known
+    # reasons why a patch would fail, for instance using st.data in the signature,
+    # and then early-returning here before calling get_patch_for.
     while True:
         try:
-            item = _queue.get(timeout=1.0)
+            test_function, nodeid, observation, observation_type = _queue.get(
+                timeout=1.0
+            )
         except Empty:
             continue
 
-        test_function, nodeid, observation, observation_type = item
-
-        via = COVERING_VIA if observation_type == "covering" else FAILING_VIA
-        # If this thread ends up using significant resources, we might optimize
-        # this by checking each function ahead of time for known reasons why a
-        # patch would fail, for instance using st.data in the signature, and then
-        # simply discarding those here entirely.
-        patch = get_patch_for(
-            test_function, ((observation.representation, via),), strip_via=via
+        examples = EXAMPLES[nodeid][observation_type]
+        examples.add(observation.representation)
+        PATCHES[nodeid][observation_type] = make_patch(
+            test_function, examples, observation_type
         )
-        if patch is not None:
-            PATCHES[nodeid][observation_type].append(patch)
+
         _queue.task_done()
 
 
@@ -90,3 +100,11 @@ def start_patching_thread() -> None:
 
     _thread = threading.Thread(target=_worker, daemon=True)
     _thread.start()
+
+
+def failing_patch(nodeid: str) -> Optional[str]:
+    return PATCHES[nodeid]["failing"]
+
+
+def covering_patch(nodeid: str) -> Optional[str]:
+    return PATCHES[nodeid]["covering"]
diff --git a/src/hypofuzz/frontend/src/tyche/Representation.tsx b/src/hypofuzz/frontend/src/tyche/Representation.tsx
@@ -62,7 +62,7 @@ export function Representation({
 
   return (
     <TycheSection
-      title="String representations"
+      title="Test cases"
       defaultState="closed"
       onStateChange={state => {
         if (state === "open") {
diff --git a/src/hypofuzz/frontend/src/tyche/Summary.tsx b/src/hypofuzz/frontend/src/tyche/Summary.tsx
@@ -3,7 +3,7 @@ import { TYCHE_COLOR } from "src/tyche/Tyche"
 import { TycheSection } from "src/tyche/TycheSection"
 import { Observation } from "src/types/dashboard"
 
-export function Samples({
+export function Summary({
   observations,
 }: {
   observations: { raw: Observation[]; filtered: Observation[] }
@@ -27,9 +27,9 @@ export function Samples({
   }
 
   return (
-    <TycheSection title="Samples">
+    <TycheSection title="Summary">
       <MosaicChart
-        name="samples"
+        name="summary"
         observations={observations}
         verticalAxis={[
           ["Passed", obs => obs.status === "passed"],
diff --git a/src/hypofuzz/frontend/src/tyche/Tyche.tsx b/src/hypofuzz/frontend/src/tyche/Tyche.tsx
@@ -3,7 +3,7 @@ import { Features } from "src/tyche/Features"
 import { FilterProvider, useFilters } from "src/tyche/FilterContext"
 import { Filters } from "src/tyche/Filters"
 import { Representation } from "src/tyche/Representation"
-import { Samples } from "src/tyche/Samples"
+import { Summary } from "src/tyche/Summary"
 import { Test } from "src/types/test"
 
 export const PRESENT_STRING = "Present"
@@ -78,7 +78,7 @@ function TycheInner({ test }: { test: Test }) {
       </div>
       {observations.raw.length > 0 ? (
         <>
-          <Samples observations={observations} />
+          <Summary observations={observations} />
           <Features
             observations={observations}
             observationCategory={observationCategory}
diff --git a/src/hypofuzz/frontend/src/utils/tooltip.tsx b/src/hypofuzz/frontend/src/utils/tooltip.tsx
@@ -66,7 +66,11 @@ function TooltipPortal({ state }: { state: TooltipState }) {
 
     const bottomEdge = top + tooltipHeight
     if (bottomEdge > window.innerHeight - SCREEN_MARGIN) {
-      top = state.y + TOOLTIP_OFFSET
+      top = state.y - tooltipHeight - TOOLTIP_OFFSET
+
+      if (top < SCREEN_MARGIN) {
+        top = SCREEN_MARGIN
+      }
     }
 
     setPosition({ left, top })