From 7347b1d70b668cc1f43fe2cb6478bf28fd4b7a39 Mon Sep 17 00:00:00 2001
From: Henrik Brodin <90325907+hbrodin@users.noreply.github.com>
Date: Fri, 30 Jan 2026 13:59:12 +0100
Subject: [PATCH 1/3] Implement per-file coverage tracking for uncovered lines
 (v2)

This refactors the uncovered lines tracking to properly handle functions
that use macros or have code spanning multiple files.

Key changes:
- Track coverage per file (FileLineCoverage) instead of mixing line numbers
- Identify primary file (where function is defined) by CODE region count
- Track macro call sites separately with call line + uncovered count
- Add UncoveredRanges utility for line set to protobuf conversion
- Add UncoveredLinesMap for Redis storage of uncovered lines data

The old model mixed lines from different files, making it impossible for
an LLM to know which file uncovered line numbers referred to. The new
model stores only primary file lines in uncovered ranges, with macro
call sites tracked separately.

Protobuf changes:
- Add MacroCallSite message
- Add UncoveredLines message with run-length encoding
- Add FunctionUncoveredLines with primary_file_path and macro_sites

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 common/protos/msg.proto                       |  24 ++
 common/src/buttercup/common/coverage_utils.py | 130 +++++++++++
 .../common/datastructures/msg_pb2.py          |  20 +-
 .../common/datastructures/msg_pb2.pyi         |  40 ++++
 common/src/buttercup/common/maps.py           |  50 ++++-
 fuzzer/docs/new-coverage.md                   | 207 +++++++++++++++++
 .../buttercup/fuzzing_infra/coverage_bot.py   |  58 ++++-
 .../fuzzing_infra/coverage_runner.py          | 212 ++++++++++++++++--
 8 files changed, 720 insertions(+), 21 deletions(-)
 create mode 100644 common/src/buttercup/common/coverage_utils.py
 create mode 100644 fuzzer/docs/new-coverage.md

diff --git a/common/protos/msg.proto b/common/protos/msg.proto
index 76dae473..2791523d 100644
--- a/common/protos/msg.proto
+++ b/common/protos/msg.proto
@@ -198,3 +198,27 @@ message POVReproduceResponse {
     POVReproduceRequest request = 1;
     bool did_crash = 2;
 }
+
+// Uncovered lines tracking messages
+message MacroCallSite {
+    uint32 call_line = 1;        // Line in primary file where macro is called
+    string macro_file_path = 2;  // File where macro is defined
+    uint32 uncovered_count = 3;  // Lines inside macro that are uncovered
+}
+
+message UncoveredLines {
+    repeated uint32 starts = 1 [packed = true];   // Start line of each uncovered range
+    repeated uint32 lengths = 2 [packed = true];  // Length of each uncovered range
+    uint32 function_start_line = 3;
+    uint32 function_end_line = 4;
+}
+
+message FunctionUncoveredLines {
+    string function_name = 1;
+    repeated string function_paths = 2;
+    string primary_file_path = 3;  // The function definition file (container path)
+    uint32 total_lines = 4;
+    uint32 covered_lines = 5;
+    UncoveredLines uncovered = 6;  // Uncovered lines in primary file only
+    repeated MacroCallSite macro_sites = 7;  // Macro call sites with uncovered code
+}
diff --git a/common/src/buttercup/common/coverage_utils.py b/common/src/buttercup/common/coverage_utils.py
new file mode 100644
index 00000000..0e7748dc
--- /dev/null
+++ b/common/src/buttercup/common/coverage_utils.py
@@ -0,0 +1,130 @@
+"""Utilities for working with coverage data and uncovered line tracking."""
+
+from dataclasses import dataclass
+
+from buttercup.common.datastructures.msg_pb2 import UncoveredLines
+
+
+@dataclass
+class LineRange:
+    """A range of consecutive lines."""
+
+    start: int
+    length: int
+
+    @property
+    def end(self) -> int:
+        """Return the end line (inclusive)."""
+        return self.start + self.length - 1
+
+
+@dataclass
+class UncoveredRanges:
+    """Represents uncovered line ranges in a function.
+
+    This class converts between line sets and the protobuf UncoveredLines format,
+    which uses run-length encoding (starts + lengths) for compact storage.
+    """
+
+    ranges: list[LineRange]
+    function_start_line: int
+    function_end_line: int
+
+    @classmethod
+    def from_line_sets(
+        cls,
+        total_lines: set[int],
+        covered_lines: set[int],
+        function_start_line: int,
+        function_end_line: int,
+    ) -> "UncoveredRanges | None":
+        """Create UncoveredRanges from total and covered line sets.
+
+        Args:
+            total_lines: Set of all lines in the function
+            covered_lines: Set of lines that were executed
+            function_start_line: First line of the function
+            function_end_line: Last line of the function
+
+        Returns:
+            UncoveredRanges if there are uncovered lines, None otherwise
+        """
+        uncovered = total_lines - covered_lines
+        if not uncovered:
+            return None
+
+        # Convert to sorted list and group into consecutive ranges
+        sorted_lines = sorted(uncovered)
+        ranges: list[LineRange] = []
+        range_start = sorted_lines[0]
+        range_length = 1
+
+        for i in range(1, len(sorted_lines)):
+            if sorted_lines[i] == sorted_lines[i - 1] + 1:
+                # Consecutive line, extend current range
+                range_length += 1
+            else:
+                # Gap found, save current range and start new one
+                ranges.append(LineRange(range_start, range_length))
+                range_start = sorted_lines[i]
+                range_length = 1
+
+        # Don't forget the last range
+        ranges.append(LineRange(range_start, range_length))
+
+        return cls(
+            ranges=ranges,
+            function_start_line=function_start_line,
+            function_end_line=function_end_line,
+        )
+
+    @classmethod
+    def from_protobuf(cls, proto: UncoveredLines) -> "UncoveredRanges":
+        """Create UncoveredRanges from a protobuf UncoveredLines message.
+
+        Args:
+            proto: The protobuf UncoveredLines message
+
+        Returns:
+            UncoveredRanges instance
+        """
+        ranges = [
+            LineRange(start=start, length=length) for start, length in zip(proto.starts, proto.lengths, strict=True)
+        ]
+        return cls(
+            ranges=ranges,
+            function_start_line=proto.function_start_line,
+            function_end_line=proto.function_end_line,
+        )
+
+    def to_protobuf(self) -> UncoveredLines:
+        """Convert to a protobuf UncoveredLines message.
+
+        Returns:
+            UncoveredLines protobuf message
+        """
+        proto = UncoveredLines()
+        proto.starts.extend(r.start for r in self.ranges)
+        proto.lengths.extend(r.length for r in self.ranges)
+        proto.function_start_line = self.function_start_line
+        proto.function_end_line = self.function_end_line
+        return proto
+
+    def get_uncovered_lines(self) -> set[int]:
+        """Get all uncovered lines as a set.
+
+        Returns:
+            Set of uncovered line numbers
+        """
+        lines: set[int] = set()
+        for r in self.ranges:
+            lines.update(range(r.start, r.start + r.length))
+        return lines
+
+    def total_uncovered_count(self) -> int:
+        """Get total count of uncovered lines.
+
+        Returns:
+            Number of uncovered lines
+        """
+        return sum(r.length for r in self.ranges)
diff --git a/common/src/buttercup/common/datastructures/msg_pb2.py b/common/src/buttercup/common/datastructures/msg_pb2.py
index 8052ece4..2c51fc0d 100644
--- a/common/src/buttercup/common/datastructures/msg_pb2.py
+++ b/common/src/buttercup/common/datastructures/msg_pb2.py
@@ -24,7 +24,7 @@
 
 
 
-DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\tmsg.proto\x12\x06msgpb2\"\xf0\x02\n\x04Task\x12\x12\n\nmessage_id\x18\x01 \x01(\t\x12\x14\n\x0cmessage_time\x18\x02 \x01(\x03\x12\x0f\n\x07task_id\x18\x03 \x01(\t\x12(\n\ttask_type\x18\x04 \x01(\x0e\x32\x15.msgpb2.Task.TaskType\x12%\n\x07sources\x18\x05 \x03(\x0b\x32\x14.msgpb2.SourceDetail\x12\x10\n\x08\x64\x65\x61\x64line\x18\x06 \x01(\x03\x12\x11\n\tcancelled\x18\x07 \x01(\x08\x12\x14\n\x0cproject_name\x18\x08 \x01(\t\x12\r\n\x05\x66ocus\x18\t \x01(\t\x12,\n\x08metadata\x18\n \x03(\x0b\x32\x1a.msgpb2.Task.MetadataEntry\x1a/\n\rMetadataEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"3\n\x08TaskType\x12\x12\n\x0eTASK_TYPE_FULL\x10\x00\x12\x13\n\x0fTASK_TYPE_DELTA\x10\x01\"\xb9\x01\n\x0cSourceDetail\x12\x0e\n\x06sha256\x18\x01 \x01(\t\x12\x34\n\x0bsource_type\x18\x02 \x01(\x0e\x32\x1f.msgpb2.SourceDetail.SourceType\x12\x0b\n\x03url\x18\x03 \x01(\t\"V\n\nSourceType\x12\x14\n\x10SOURCE_TYPE_REPO\x10\x00\x12\x1c\n\x18SOURCE_TYPE_FUZZ_TOOLING\x10\x01\x12\x14\n\x10SOURCE_TYPE_DIFF\x10\x02\"*\n\x0cTaskDownload\x12\x1a\n\x04task\x18\x01 \x01(\x0b\x32\x0c.msgpb2.Task\"\'\n\tTaskReady\x12\x1a\n\x04task\x18\x01 \x01(\x0b\x32\x0c.msgpb2.Task\"T\n\nTaskDelete\x12\x11\n\x07task_id\x18\x01 \x01(\tH\x00\x12\r\n\x03\x61ll\x18\x03 \x01(\x08H\x00\x12\x13\n\x0breceived_at\x18\x02 \x01(\x02\x42\x0f\n\rdelete_option\"\xb9\x01\n\x0c\x42uildRequest\x12\x0e\n\x06\x65ngine\x18\x01 \x01(\t\x12\x11\n\tsanitizer\x18\x02 \x01(\t\x12\x10\n\x08task_dir\x18\x03 \x01(\t\x12\x0f\n\x07task_id\x18\x04 \x01(\t\x12%\n\nbuild_type\x18\x05 \x01(\x0e\x32\x11.msgpb2.BuildType\x12\x12\n\napply_diff\x18\x06 \x01(\x08\x12\r\n\x05patch\x18\x07 \x01(\t\x12\x19\n\x11internal_patch_id\x18\x08 \x01(\t\"\xa9\x01\n\x0b\x42uildOutput\x12\x0e\n\x06\x65ngine\x18\x01 \x01(\t\x12\x11\n\tsanitizer\x18\x02 \x01(\t\x12\x10\n\x08task_dir\x18\x03 \x01(\t\x12\x0f\n\x07task_id\x18\x04 \x01(\t\x12%\n\nbuild_type\x18\x05 \x01(\x0e\x32\x11.msgpb2.BuildType\x12\x12\n\napply_diff\x18\x06 \x01(\x08\x12\x19\n\x11internal_patch_id\x18\x07 \x01(\t\"^\n\x0fWeightedHarness\x12\x0e\n\x06weight\x18\x01 \x01(\x02\x12\x14\n\x0cpackage_name\x18\x02 \x01(\t\x12\x14\n\x0charness_name\x18\x03 \x01(\t\x12\x0f\n\x07task_id\x18\x04 \x01(\t\"\x85\x01\n\x05\x43rash\x12#\n\x06target\x18\x01 \x01(\x0b\x32\x13.msgpb2.BuildOutput\x12\x14\n\x0charness_name\x18\x02 \x01(\t\x12\x18\n\x10\x63rash_input_path\x18\x03 \x01(\t\x12\x12\n\nstacktrace\x18\x04 \x01(\t\x12\x13\n\x0b\x63rash_token\x18\x05 \x01(\t\"F\n\x0bTracedCrash\x12\x1c\n\x05\x63rash\x18\x01 \x01(\x0b\x32\r.msgpb2.Crash\x12\x19\n\x11tracer_stacktrace\x18\x02 \x01(\t\"Y\n\x16\x43onfirmedVulnerability\x12$\n\x07\x63rashes\x18\x01 \x03(\x0b\x32\x13.msgpb2.TracedCrash\x12\x19\n\x11internal_patch_id\x18\x02 \x01(\t\"B\n\x05Patch\x12\x0f\n\x07task_id\x18\x01 \x01(\t\x12\x19\n\x11internal_patch_id\x18\x02 \x01(\t\x12\r\n\x05patch\x18\x03 \x01(\t\"\x81\x01\n\x0cIndexRequest\x12%\n\nbuild_type\x18\x01 \x01(\x0e\x32\x11.msgpb2.BuildType\x12\x14\n\x0cpackage_name\x18\x02 \x01(\t\x12\x11\n\tsanitizer\x18\x03 \x01(\t\x12\x10\n\x08task_dir\x18\x04 \x01(\t\x12\x0f\n\x07task_id\x18\x05 \x01(\t\"\x80\x01\n\x0bIndexOutput\x12%\n\nbuild_type\x18\x01 \x01(\x0e\x32\x11.msgpb2.BuildType\x12\x14\n\x0cpackage_name\x18\x02 \x01(\t\x12\x11\n\tsanitizer\x18\x03 \x01(\t\x12\x10\n\x08task_dir\x18\x04 \x01(\t\x12\x0f\n\x07task_id\x18\x05 \x01(\t\"m\n\x10\x46unctionCoverage\x12\x15\n\rfunction_name\x18\x01 \x01(\t\x12\x16\n\x0e\x66unction_paths\x18\x02 \x03(\t\x12\x13\n\x0btotal_lines\x18\x03 \x01(\x05\x12\x15\n\rcovered_lines\x18\x04 \x01(\x05\"\xc4\x01\n\x14SubmissionEntryPatch\x12\r\n\x05patch\x18\x01 \x01(\t\x12\x19\n\x11internal_patch_id\x18\x02 \x01(\t\x12\x1c\n\x14\x63ompetition_patch_id\x18\x03 \x01(\t\x12*\n\rbuild_outputs\x18\x04 \x03(\x0b\x32\x13.msgpb2.BuildOutput\x12-\n\x06result\x18\x05 \x01(\x0e\x32\x18.msgpb2.SubmissionResultH\x00\x88\x01\x01\x42\t\n\x07_result\"\x84\x01\n\x06\x42undle\x12\x0f\n\x07task_id\x18\x01 \x01(\t\x12\x1a\n\x12\x63ompetition_pov_id\x18\x02 \x01(\t\x12\x1c\n\x14\x63ompetition_patch_id\x18\x03 \x01(\t\x12\x1c\n\x14\x63ompetition_sarif_id\x18\x04 \x01(\t\x12\x11\n\tbundle_id\x18\x05 \x01(\t\"\x87\x01\n\x0b\x43rashWithId\x12\"\n\x05\x63rash\x18\x01 \x01(\x0b\x32\x13.msgpb2.TracedCrash\x12\x1a\n\x12\x63ompetition_pov_id\x18\x02 \x01(\t\x12-\n\x06result\x18\x03 \x01(\x0e\x32\x18.msgpb2.SubmissionResultH\x00\x88\x01\x01\x42\t\n\x07_result\"\xcb\x01\n\x0fSubmissionEntry\x12\x0c\n\x04stop\x18\x01 \x01(\x08\x12$\n\x07\x63rashes\x18\x02 \x03(\x0b\x32\x13.msgpb2.CrashWithId\x12\x1f\n\x07\x62undles\x18\x03 \x03(\x0b\x32\x0e.msgpb2.Bundle\x12-\n\x07patches\x18\x04 \x03(\x0b\x32\x1c.msgpb2.SubmissionEntryPatch\x12\x11\n\tpatch_idx\x18\x05 \x01(\x05\x12!\n\x19patch_submission_attempts\x18\x06 \x01(\x05\"|\n\x13POVReproduceRequest\x12\x0f\n\x07task_id\x18\x01 \x01(\t\x12\x19\n\x11internal_patch_id\x18\x02 \x01(\t\x12\x14\n\x0charness_name\x18\x03 \x01(\t\x12\x11\n\tsanitizer\x18\x04 \x01(\t\x12\x10\n\x08pov_path\x18\x05 \x01(\t\"W\n\x14POVReproduceResponse\x12,\n\x07request\x18\x01 \x01(\x0b\x32\x1b.msgpb2.POVReproduceRequest\x12\x11\n\tdid_crash\x18\x02 \x01(\x08*D\n\tBuildType\x12\n\n\x06\x46UZZER\x10\x00\x12\x0c\n\x08\x43OVERAGE\x10\x01\x12\x12\n\x0eTRACER_NO_DIFF\x10\x02\x12\t\n\x05PATCH\x10\x03*x\n\x10SubmissionResult\x12\x08\n\x04NONE\x10\x00\x12\x0c\n\x08\x41\x43\x43\x45PTED\x10\x01\x12\n\n\x06PASSED\x10\x02\x12\n\n\x06\x46\x41ILED\x10\x03\x12\x15\n\x11\x44\x45\x41\x44LINE_EXCEEDED\x10\x04\x12\x0b\n\x07\x45RRORED\x10\x05\x12\x10\n\x0cINCONCLUSIVE\x10\x06\x62\x06proto3')
+DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\tmsg.proto\x12\x06msgpb2\"\xf0\x02\n\x04Task\x12\x12\n\nmessage_id\x18\x01 \x01(\t\x12\x14\n\x0cmessage_time\x18\x02 \x01(\x03\x12\x0f\n\x07task_id\x18\x03 \x01(\t\x12(\n\ttask_type\x18\x04 \x01(\x0e\x32\x15.msgpb2.Task.TaskType\x12%\n\x07sources\x18\x05 \x03(\x0b\x32\x14.msgpb2.SourceDetail\x12\x10\n\x08\x64\x65\x61\x64line\x18\x06 \x01(\x03\x12\x11\n\tcancelled\x18\x07 \x01(\x08\x12\x14\n\x0cproject_name\x18\x08 \x01(\t\x12\r\n\x05\x66ocus\x18\t \x01(\t\x12,\n\x08metadata\x18\n \x03(\x0b\x32\x1a.msgpb2.Task.MetadataEntry\x1a/\n\rMetadataEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"3\n\x08TaskType\x12\x12\n\x0eTASK_TYPE_FULL\x10\x00\x12\x13\n\x0fTASK_TYPE_DELTA\x10\x01\"\xb9\x01\n\x0cSourceDetail\x12\x0e\n\x06sha256\x18\x01 \x01(\t\x12\x34\n\x0bsource_type\x18\x02 \x01(\x0e\x32\x1f.msgpb2.SourceDetail.SourceType\x12\x0b\n\x03url\x18\x03 \x01(\t\"V\n\nSourceType\x12\x14\n\x10SOURCE_TYPE_REPO\x10\x00\x12\x1c\n\x18SOURCE_TYPE_FUZZ_TOOLING\x10\x01\x12\x14\n\x10SOURCE_TYPE_DIFF\x10\x02\"*\n\x0cTaskDownload\x12\x1a\n\x04task\x18\x01 \x01(\x0b\x32\x0c.msgpb2.Task\"\'\n\tTaskReady\x12\x1a\n\x04task\x18\x01 \x01(\x0b\x32\x0c.msgpb2.Task\"T\n\nTaskDelete\x12\x11\n\x07task_id\x18\x01 \x01(\tH\x00\x12\r\n\x03\x61ll\x18\x03 \x01(\x08H\x00\x12\x13\n\x0breceived_at\x18\x02 \x01(\x02\x42\x0f\n\rdelete_option\"\xb9\x01\n\x0c\x42uildRequest\x12\x0e\n\x06\x65ngine\x18\x01 \x01(\t\x12\x11\n\tsanitizer\x18\x02 \x01(\t\x12\x10\n\x08task_dir\x18\x03 \x01(\t\x12\x0f\n\x07task_id\x18\x04 \x01(\t\x12%\n\nbuild_type\x18\x05 \x01(\x0e\x32\x11.msgpb2.BuildType\x12\x12\n\napply_diff\x18\x06 \x01(\x08\x12\r\n\x05patch\x18\x07 \x01(\t\x12\x19\n\x11internal_patch_id\x18\x08 \x01(\t\"\xa9\x01\n\x0b\x42uildOutput\x12\x0e\n\x06\x65ngine\x18\x01 \x01(\t\x12\x11\n\tsanitizer\x18\x02 \x01(\t\x12\x10\n\x08task_dir\x18\x03 \x01(\t\x12\x0f\n\x07task_id\x18\x04 \x01(\t\x12%\n\nbuild_type\x18\x05 \x01(\x0e\x32\x11.msgpb2.BuildType\x12\x12\n\napply_diff\x18\x06 \x01(\x08\x12\x19\n\x11internal_patch_id\x18\x07 \x01(\t\"^\n\x0fWeightedHarness\x12\x0e\n\x06weight\x18\x01 \x01(\x02\x12\x14\n\x0cpackage_name\x18\x02 \x01(\t\x12\x14\n\x0charness_name\x18\x03 \x01(\t\x12\x0f\n\x07task_id\x18\x04 \x01(\t\"\x85\x01\n\x05\x43rash\x12#\n\x06target\x18\x01 \x01(\x0b\x32\x13.msgpb2.BuildOutput\x12\x14\n\x0charness_name\x18\x02 \x01(\t\x12\x18\n\x10\x63rash_input_path\x18\x03 \x01(\t\x12\x12\n\nstacktrace\x18\x04 \x01(\t\x12\x13\n\x0b\x63rash_token\x18\x05 \x01(\t\"F\n\x0bTracedCrash\x12\x1c\n\x05\x63rash\x18\x01 \x01(\x0b\x32\r.msgpb2.Crash\x12\x19\n\x11tracer_stacktrace\x18\x02 \x01(\t\"Y\n\x16\x43onfirmedVulnerability\x12$\n\x07\x63rashes\x18\x01 \x03(\x0b\x32\x13.msgpb2.TracedCrash\x12\x19\n\x11internal_patch_id\x18\x02 \x01(\t\"B\n\x05Patch\x12\x0f\n\x07task_id\x18\x01 \x01(\t\x12\x19\n\x11internal_patch_id\x18\x02 \x01(\t\x12\r\n\x05patch\x18\x03 \x01(\t\"\x81\x01\n\x0cIndexRequest\x12%\n\nbuild_type\x18\x01 \x01(\x0e\x32\x11.msgpb2.BuildType\x12\x14\n\x0cpackage_name\x18\x02 \x01(\t\x12\x11\n\tsanitizer\x18\x03 \x01(\t\x12\x10\n\x08task_dir\x18\x04 \x01(\t\x12\x0f\n\x07task_id\x18\x05 \x01(\t\"\x80\x01\n\x0bIndexOutput\x12%\n\nbuild_type\x18\x01 \x01(\x0e\x32\x11.msgpb2.BuildType\x12\x14\n\x0cpackage_name\x18\x02 \x01(\t\x12\x11\n\tsanitizer\x18\x03 \x01(\t\x12\x10\n\x08task_dir\x18\x04 \x01(\t\x12\x0f\n\x07task_id\x18\x05 \x01(\t\"m\n\x10\x46unctionCoverage\x12\x15\n\rfunction_name\x18\x01 \x01(\t\x12\x16\n\x0e\x66unction_paths\x18\x02 \x03(\t\x12\x13\n\x0btotal_lines\x18\x03 \x01(\x05\x12\x15\n\rcovered_lines\x18\x04 \x01(\x05\"\xc4\x01\n\x14SubmissionEntryPatch\x12\r\n\x05patch\x18\x01 \x01(\t\x12\x19\n\x11internal_patch_id\x18\x02 \x01(\t\x12\x1c\n\x14\x63ompetition_patch_id\x18\x03 \x01(\t\x12*\n\rbuild_outputs\x18\x04 \x03(\x0b\x32\x13.msgpb2.BuildOutput\x12-\n\x06result\x18\x05 \x01(\x0e\x32\x18.msgpb2.SubmissionResultH\x00\x88\x01\x01\x42\t\n\x07_result\"\x84\x01\n\x06\x42undle\x12\x0f\n\x07task_id\x18\x01 \x01(\t\x12\x1a\n\x12\x63ompetition_pov_id\x18\x02 \x01(\t\x12\x1c\n\x14\x63ompetition_patch_id\x18\x03 \x01(\t\x12\x1c\n\x14\x63ompetition_sarif_id\x18\x04 \x01(\t\x12\x11\n\tbundle_id\x18\x05 \x01(\t\"\x87\x01\n\x0b\x43rashWithId\x12\"\n\x05\x63rash\x18\x01 \x01(\x0b\x32\x13.msgpb2.TracedCrash\x12\x1a\n\x12\x63ompetition_pov_id\x18\x02 \x01(\t\x12-\n\x06result\x18\x03 \x01(\x0e\x32\x18.msgpb2.SubmissionResultH\x00\x88\x01\x01\x42\t\n\x07_result\"\xcb\x01\n\x0fSubmissionEntry\x12\x0c\n\x04stop\x18\x01 \x01(\x08\x12$\n\x07\x63rashes\x18\x02 \x03(\x0b\x32\x13.msgpb2.CrashWithId\x12\x1f\n\x07\x62undles\x18\x03 \x03(\x0b\x32\x0e.msgpb2.Bundle\x12-\n\x07patches\x18\x04 \x03(\x0b\x32\x1c.msgpb2.SubmissionEntryPatch\x12\x11\n\tpatch_idx\x18\x05 \x01(\x05\x12!\n\x19patch_submission_attempts\x18\x06 \x01(\x05\"|\n\x13POVReproduceRequest\x12\x0f\n\x07task_id\x18\x01 \x01(\t\x12\x19\n\x11internal_patch_id\x18\x02 \x01(\t\x12\x14\n\x0charness_name\x18\x03 \x01(\t\x12\x11\n\tsanitizer\x18\x04 \x01(\t\x12\x10\n\x08pov_path\x18\x05 \x01(\t\"W\n\x14POVReproduceResponse\x12,\n\x07request\x18\x01 \x01(\x0b\x32\x1b.msgpb2.POVReproduceRequest\x12\x11\n\tdid_crash\x18\x02 \x01(\x08\"T\n\rMacroCallSite\x12\x11\n\tcall_line\x18\x01 \x01(\r\x12\x17\n\x0fmacro_file_path\x18\x02 \x01(\t\x12\x17\n\x0funcovered_count\x18\x03 \x01(\r\"q\n\x0eUncoveredLines\x12\x12\n\x06starts\x18\x01 \x03(\rB\x02\x10\x01\x12\x13\n\x07lengths\x18\x02 \x03(\rB\x02\x10\x01\x12\x1b\n\x13\x66unction_start_line\x18\x03 \x01(\r\x12\x19\n\x11\x66unction_end_line\x18\x04 \x01(\r\"\xe5\x01\n\x16\x46unctionUncoveredLines\x12\x15\n\rfunction_name\x18\x01 \x01(\t\x12\x16\n\x0e\x66unction_paths\x18\x02 \x03(\t\x12\x19\n\x11primary_file_path\x18\x03 \x01(\t\x12\x13\n\x0btotal_lines\x18\x04 \x01(\r\x12\x15\n\rcovered_lines\x18\x05 \x01(\r\x12)\n\tuncovered\x18\x06 \x01(\x0b\x32\x16.msgpb2.UncoveredLines\x12*\n\x0bmacro_sites\x18\x07 \x03(\x0b\x32\x15.msgpb2.MacroCallSite*D\n\tBuildType\x12\n\n\x06\x46UZZER\x10\x00\x12\x0c\n\x08\x43OVERAGE\x10\x01\x12\x12\n\x0eTRACER_NO_DIFF\x10\x02\x12\t\n\x05PATCH\x10\x03*x\n\x10SubmissionResult\x12\x08\n\x04NONE\x10\x00\x12\x0c\n\x08\x41\x43\x43\x45PTED\x10\x01\x12\n\n\x06PASSED\x10\x02\x12\n\n\x06\x46\x41ILED\x10\x03\x12\x15\n\x11\x44\x45\x41\x44LINE_EXCEEDED\x10\x04\x12\x0b\n\x07\x45RRORED\x10\x05\x12\x10\n\x0cINCONCLUSIVE\x10\x06\x62\x06proto3')
 
 _globals = globals()
 _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
@@ -33,10 +33,14 @@
   DESCRIPTOR._loaded_options = None
   _globals['_TASK_METADATAENTRY']._loaded_options = None
   _globals['_TASK_METADATAENTRY']._serialized_options = b'8\001'
-  _globals['_BUILDTYPE']._serialized_start=2841
-  _globals['_BUILDTYPE']._serialized_end=2909
-  _globals['_SUBMISSIONRESULT']._serialized_start=2911
-  _globals['_SUBMISSIONRESULT']._serialized_end=3031
+  _globals['_UNCOVEREDLINES'].fields_by_name['starts']._loaded_options = None
+  _globals['_UNCOVEREDLINES'].fields_by_name['starts']._serialized_options = b'\020\001'
+  _globals['_UNCOVEREDLINES'].fields_by_name['lengths']._loaded_options = None
+  _globals['_UNCOVEREDLINES'].fields_by_name['lengths']._serialized_options = b'\020\001'
+  _globals['_BUILDTYPE']._serialized_start=3274
+  _globals['_BUILDTYPE']._serialized_end=3342
+  _globals['_SUBMISSIONRESULT']._serialized_start=3344
+  _globals['_SUBMISSIONRESULT']._serialized_end=3464
   _globals['_TASK']._serialized_start=22
   _globals['_TASK']._serialized_end=390
   _globals['_TASK_METADATAENTRY']._serialized_start=290
@@ -85,4 +89,10 @@
   _globals['_POVREPRODUCEREQUEST']._serialized_end=2750
   _globals['_POVREPRODUCERESPONSE']._serialized_start=2752
   _globals['_POVREPRODUCERESPONSE']._serialized_end=2839
+  _globals['_MACROCALLSITE']._serialized_start=2841
+  _globals['_MACROCALLSITE']._serialized_end=2925
+  _globals['_UNCOVEREDLINES']._serialized_start=2927
+  _globals['_UNCOVEREDLINES']._serialized_end=3040
+  _globals['_FUNCTIONUNCOVEREDLINES']._serialized_start=3043
+  _globals['_FUNCTIONUNCOVEREDLINES']._serialized_end=3272
 # @@protoc_insertion_point(module_scope)
diff --git a/common/src/buttercup/common/datastructures/msg_pb2.pyi b/common/src/buttercup/common/datastructures/msg_pb2.pyi
index e256b878..495735d9 100644
--- a/common/src/buttercup/common/datastructures/msg_pb2.pyi
+++ b/common/src/buttercup/common/datastructures/msg_pb2.pyi
@@ -317,3 +317,43 @@ class POVReproduceResponse(_message.Message):
     request: POVReproduceRequest
     did_crash: bool
     def __init__(self, request: _Optional[_Union[POVReproduceRequest, _Mapping]] = ..., did_crash: bool = ...) -> None: ...
+
+class MacroCallSite(_message.Message):
+    __slots__ = ("call_line", "macro_file_path", "uncovered_count")
+    CALL_LINE_FIELD_NUMBER: _ClassVar[int]
+    MACRO_FILE_PATH_FIELD_NUMBER: _ClassVar[int]
+    UNCOVERED_COUNT_FIELD_NUMBER: _ClassVar[int]
+    call_line: int
+    macro_file_path: str
+    uncovered_count: int
+    def __init__(self, call_line: _Optional[int] = ..., macro_file_path: _Optional[str] = ..., uncovered_count: _Optional[int] = ...) -> None: ...
+
+class UncoveredLines(_message.Message):
+    __slots__ = ("starts", "lengths", "function_start_line", "function_end_line")
+    STARTS_FIELD_NUMBER: _ClassVar[int]
+    LENGTHS_FIELD_NUMBER: _ClassVar[int]
+    FUNCTION_START_LINE_FIELD_NUMBER: _ClassVar[int]
+    FUNCTION_END_LINE_FIELD_NUMBER: _ClassVar[int]
+    starts: _containers.RepeatedScalarFieldContainer[int]
+    lengths: _containers.RepeatedScalarFieldContainer[int]
+    function_start_line: int
+    function_end_line: int
+    def __init__(self, starts: _Optional[_Iterable[int]] = ..., lengths: _Optional[_Iterable[int]] = ..., function_start_line: _Optional[int] = ..., function_end_line: _Optional[int] = ...) -> None: ...
+
+class FunctionUncoveredLines(_message.Message):
+    __slots__ = ("function_name", "function_paths", "primary_file_path", "total_lines", "covered_lines", "uncovered", "macro_sites")
+    FUNCTION_NAME_FIELD_NUMBER: _ClassVar[int]
+    FUNCTION_PATHS_FIELD_NUMBER: _ClassVar[int]
+    PRIMARY_FILE_PATH_FIELD_NUMBER: _ClassVar[int]
+    TOTAL_LINES_FIELD_NUMBER: _ClassVar[int]
+    COVERED_LINES_FIELD_NUMBER: _ClassVar[int]
+    UNCOVERED_FIELD_NUMBER: _ClassVar[int]
+    MACRO_SITES_FIELD_NUMBER: _ClassVar[int]
+    function_name: str
+    function_paths: _containers.RepeatedScalarFieldContainer[str]
+    primary_file_path: str
+    total_lines: int
+    covered_lines: int
+    uncovered: UncoveredLines
+    macro_sites: _containers.RepeatedCompositeFieldContainer[MacroCallSite]
+    def __init__(self, function_name: _Optional[str] = ..., function_paths: _Optional[_Iterable[str]] = ..., primary_file_path: _Optional[str] = ..., total_lines: _Optional[int] = ..., covered_lines: _Optional[int] = ..., uncovered: _Optional[_Union[UncoveredLines, _Mapping]] = ..., macro_sites: _Optional[_Iterable[_Union[MacroCallSite, _Mapping]]] = ...) -> None: ...
diff --git a/common/src/buttercup/common/maps.py b/common/src/buttercup/common/maps.py
index f428489e..08a684f5 100644
--- a/common/src/buttercup/common/maps.py
+++ b/common/src/buttercup/common/maps.py
@@ -5,7 +5,13 @@
 from google.protobuf.message import Message
 from redis import Redis
 
-from buttercup.common.datastructures.msg_pb2 import BuildOutput, BuildType, FunctionCoverage, WeightedHarness
+from buttercup.common.datastructures.msg_pb2 import (
+    BuildOutput,
+    BuildType,
+    FunctionCoverage,
+    FunctionUncoveredLines,
+    WeightedHarness,
+)
 from buttercup.common.sets import RedisSet
 
 # ruff: noqa: UP046
@@ -43,6 +49,7 @@ def __iter__(self) -> Iterator[MsgType]:
 BUILD_MAP_NAME = "build_list"
 BUILD_SAN_MAP_NAME = "build_san_list"
 COVERAGE_MAP_PREFIX = "coverage_map"
+UNCOVERED_MAP_PREFIX = "uncovered_lines_map"
 
 
 # A build map makes it effecient to find for a given task_id + harness a build type
@@ -155,3 +162,44 @@ def get_function_coverage(self, function_name: str, function_paths: list[str]) -
 
     def list_function_coverage(self) -> list[FunctionCoverage]:
         return list(iter(self.mp))
+
+
+class UncoveredLinesMap:
+    """Redis-backed map for storing uncovered lines data per function."""
+
+    def __init__(self, redis: Redis, harness_name: str, package_name: str, task_id: str):
+        self.redis = redis
+        self.harness_name = harness_name
+        self.package_name = package_name
+        self.task_id = task_id
+        hash_name = [
+            UNCOVERED_MAP_PREFIX,
+            harness_name,
+            package_name,
+            task_id,
+        ]
+        hash_name_str = dumps(hash_name, json_options=CANONICAL_JSON_OPTIONS)
+        self.mp: RedisMap[FunctionUncoveredLines] = RedisMap(redis, hash_name_str, FunctionUncoveredLines)
+
+    def set_uncovered_lines(self, uncovered_lines: FunctionUncoveredLines) -> None:
+        """Store uncovered lines data for a function."""
+        function_paths_list = list(uncovered_lines.function_paths)
+        key = [
+            uncovered_lines.function_name,
+            function_paths_list,
+        ]
+        key_str = dumps(key, json_options=CANONICAL_JSON_OPTIONS)
+        self.mp.set(key_str, uncovered_lines)
+
+    def get_uncovered_lines(self, function_name: str, function_paths: list[str]) -> FunctionUncoveredLines | None:
+        """Get uncovered lines data for a function."""
+        key = [
+            function_name,
+            function_paths,
+        ]
+        key_str = dumps(key, json_options=CANONICAL_JSON_OPTIONS)
+        return self.mp.get(key_str)
+
+    def list_uncovered_lines(self) -> list[FunctionUncoveredLines]:
+        """List all stored uncovered lines data."""
+        return list(iter(self.mp))
diff --git a/fuzzer/docs/new-coverage.md b/fuzzer/docs/new-coverage.md
new file mode 100644
index 00000000..36e0bca5
--- /dev/null
+++ b/fuzzer/docs/new-coverage.md
@@ -0,0 +1,207 @@
+# Uncovered Lines Tracking v2
+
+This document describes the changes to coverage tracking introduced in v2, which improves how uncovered lines are tracked for functions that use macros or have code spanning multiple files.
+
+## Problem with Original Implementation
+
+The original implementation mixed line numbers from different files into a single set:
+
+```python
+# Old CoveredFunction
+CoveredFunction:
+    total_line_set: {2, 3, 4, 5, 6, 8, 9, 12, 13}  # Lines from BOTH foo.c AND macros.h
+    covered_line_set: {2, 5, 6, 8, 9}
+    function_start_line: 2   # Could be from macro file!
+    function_end_line: 13
+```
+
+**Issues:**
+1. Line numbers from different files were mixed together
+2. `function_start_line` could come from a macro header, not the actual function
+3. When an LLM saw "uncovered lines 3-4", it couldn't know which file those lines were in
+4. Container paths from LLVM (e.g., `/src/libpng/png.h`) don't map to task paths
+
+## v2 Solution: Per-File Tracking
+
+### New Data Structures
+
+#### FileLineCoverage
+Tracks coverage for a single file:
+
+```python
+@dataclass
+class FileLineCoverage:
+    file_id: int
+    file_path: str          # Container path from coverage
+    total_lines: set[int]
+    covered_lines: set[int]
+    is_primary: bool        # True if this is the function definition file
+```
+
+#### MacroCallSite
+Tracks where macros with uncovered code are called:
+
+```python
+@dataclass
+class MacroCallSite:
+    call_line: int              # Line in primary file where macro is called
+    macro_file_path: str        # File where macro is defined
+    uncovered_line_count: int   # How many lines inside macro are uncovered
+```
+
+#### Updated CoveredFunction
+```python
+@dataclass
+class CoveredFunction:
+    names: str
+    total_lines: int           # Aggregate count (unchanged)
+    covered_lines: int         # Aggregate count (unchanged)
+    function_paths: list[str]
+
+    # Existing fields (for backwards compatibility)
+    total_line_set: set[int] | None
+    covered_line_set: set[int] | None
+    function_start_line: int | None
+    function_end_line: int | None
+
+    # NEW: Per-file tracking
+    file_coverage: list[FileLineCoverage] | None
+    primary_file_id: int | None
+    macro_call_sites: list[MacroCallSite] | None
+```
+
+### Protobuf Changes
+
+New messages in `msg.proto`:
+
+```protobuf
+message MacroCallSite {
+    uint32 call_line = 1;        // Line in primary file where macro is called
+    string macro_file_path = 2;  // File where macro is defined
+    uint32 uncovered_count = 3;  // Lines inside macro that are uncovered
+}
+
+message UncoveredLines {
+    repeated uint32 starts = 1 [packed = true];
+    repeated uint32 lengths = 2 [packed = true];
+    uint32 function_start_line = 3;
+    uint32 function_end_line = 4;
+}
+
+message FunctionUncoveredLines {
+    string function_name = 1;
+    repeated string function_paths = 2;
+    string primary_file_path = 3;      // Renamed from file_path
+    uint32 total_lines = 4;
+    uint32 covered_lines = 5;
+    UncoveredLines uncovered = 6;      // Uncovered lines in PRIMARY file only
+    repeated MacroCallSite macro_sites = 7;  // Macro call sites with uncovered code
+}
+```
+
+### Key Changes
+
+#### 1. Primary File Identification
+
+The primary file (where the function is defined) is identified by:
+- File with the most `REGION_KIND_CODE` regions
+- Preference for `.c/.cpp` files over `.h/.hpp` as tiebreaker
+
+```python
+def find_primary_file(regions: list, filenames: list[str]) -> int:
+    """Find the file_id of the primary file."""
+    code_region_counts: dict[int, int] = {}
+
+    for region in regions:
+        if region[7] == REGION_KIND_CODE:
+            file_id = region[5]
+            code_region_counts[file_id] = code_region_counts.get(file_id, 0) + 1
+
+    def file_sort_key(fid):
+        count = code_region_counts[fid]
+        is_source = filenames[fid].endswith(('.c', '.cpp', '.cc', '.cxx'))
+        return (count, is_source)
+
+    return max(code_region_counts.keys(), key=file_sort_key)
+```
+
+#### 2. Per-File Region Processing
+
+CODE regions are now grouped by their `file_id`:
+
+```python
+for region in regions:
+    if kind == REGION_KIND_CODE:
+        file_id = region[5]
+        # Track in file-specific sets
+        if file_id not in lines_by_file:
+            lines_by_file[file_id] = (set(), set())
+        file_total, file_covered = lines_by_file[file_id]
+        self._add_region_lines(region, file_total, file_covered)
+```
+
+#### 3. Macro Call Site Tracking
+
+EXPANSION regions are processed to track call sites with uncovered code:
+
+```python
+elif kind == REGION_KIND_EXPANSION:
+    # Get expansion lines (still needed for aggregate counts)
+    exp_total, exp_covered = self._get_expansion_lines(...)
+
+    # Track as macro call site if has uncovered code
+    uncovered_count = len(exp_total - exp_covered)
+    if uncovered_count > 0:
+        macro_call_sites.append(MacroCallSite(
+            call_line=region[0],
+            macro_file_path=expansion_file_path,
+            uncovered_line_count=uncovered_count,
+        ))
+```
+
+#### 4. Redis Storage
+
+`FunctionUncoveredLines` now stores:
+- `primary_file_path`: The actual function definition file
+- `uncovered`: Only lines from the primary file
+- `macro_sites`: List of macro call sites with uncovered code
+
+## Example
+
+Consider a function `process_data()` in `foo.c` that calls a macro `CHECK_NULL()` from `macros.h`:
+
+### Old Model
+```
+uncovered.starts: [3, 4, 12, 13]  <- Lines 3-4 are from macros.h!
+```
+Problem: LLM looks at foo.c lines 3-4 but they don't match.
+
+### New Model
+```
+primary_file_path: '/src/foo.c'
+uncovered.starts: [12, 13]        <- Only lines from foo.c
+macro_sites: [
+    MacroCallSite(call_line=6, macro_file_path='/src/macros.h', uncovered_count=2)
+]
+```
+Benefit: LLM knows lines 12-13 are in foo.c, and line 6 has a macro with uncovered code.
+
+## Backwards Compatibility
+
+- Aggregate `total_lines` and `covered_lines` counts still include expansion lines
+- `total_line_set` and `covered_line_set` still contain mixed lines (for existing code)
+- New fields (`file_coverage`, `primary_file_id`, `macro_call_sites`) are `None` when coverage is 0% or 100%
+
+## New Files
+
+- `common/src/buttercup/common/coverage_utils.py`: `UncoveredRanges` class for line set ↔ protobuf conversion
+- `common/src/buttercup/common/maps.py`: Added `UncoveredLinesMap` for Redis storage
+
+## Usage for LLM Guidance
+
+The seed-gen component can now provide the LLM with:
+1. Function source from program model (avoids path mapping issues)
+2. Which lines in the function body are uncovered (from primary file)
+3. Which macro calls have uncovered code (call site + count)
+
+This enables targeted input generation to reach specific uncovered paths.
diff --git a/fuzzer/src/buttercup/fuzzing_infra/coverage_bot.py b/fuzzer/src/buttercup/fuzzing_infra/coverage_bot.py
index a31c5883..b5afc211 100644
--- a/fuzzer/src/buttercup/fuzzing_infra/coverage_bot.py
+++ b/fuzzer/src/buttercup/fuzzing_infra/coverage_bot.py
@@ -10,11 +10,21 @@
 from buttercup.common import node_local
 from buttercup.common.challenge_task import ChallengeTask
 from buttercup.common.corpus import Corpus
+from buttercup.common.coverage_utils import UncoveredRanges
 from buttercup.common.datastructures.aliases import BuildType as BuildTypeHint
-from buttercup.common.datastructures.msg_pb2 import BuildOutput, BuildType, FunctionCoverage, WeightedHarness
+from buttercup.common.datastructures.msg_pb2 import (
+    BuildOutput,
+    BuildType,
+    FunctionCoverage,
+    FunctionUncoveredLines,
+    WeightedHarness,
+)
+from buttercup.common.datastructures.msg_pb2 import (
+    MacroCallSite as MacroCallSiteProto,
+)
 from buttercup.common.default_task_loop import TaskLoop
 from buttercup.common.logger import setup_package_logger
-from buttercup.common.maps import CoverageMap
+from buttercup.common.maps import CoverageMap, UncoveredLinesMap
 from buttercup.common.telemetry import CRSActionCategory, init_telemetry, set_crs_attributes
 from buttercup.common.utils import setup_periodic_zombie_reaper
 from opentelemetry import trace
@@ -195,8 +205,10 @@ def _submit_function_coverage(
 
         """
         coverage_map = CoverageMap(self.redis, harness_name, package_name, task_id)
+        uncovered_map = UncoveredLinesMap(self.redis, harness_name, package_name, task_id)
 
         updated_functions = 0
+        updated_uncovered = 0
         for function in func_coverage:
             function_coverage = FunctionCoverage()
             function_paths_set = set(function.function_paths)
@@ -211,7 +223,49 @@ def _submit_function_coverage(
             if CoverageBot._should_update_function_coverage(coverage_map, function_coverage):
                 coverage_map.set_function_coverage(function_coverage)
                 updated_functions += 1
+
+                # Submit uncovered lines data for partial coverage
+                if function.file_coverage is not None:
+                    # Find primary file coverage
+                    primary_coverage = next(
+                        (fc for fc in function.file_coverage if fc.is_primary),
+                        None,
+                    )
+
+                    if primary_coverage and primary_coverage.total_lines - primary_coverage.covered_lines:
+                        uncovered_ranges = UncoveredRanges.from_line_sets(
+                            primary_coverage.total_lines,
+                            primary_coverage.covered_lines,
+                            min(primary_coverage.total_lines),
+                            max(primary_coverage.total_lines),
+                        )
+
+                        if uncovered_ranges is not None:
+                            # Convert macro call sites to protobuf
+                            macro_sites = [
+                                MacroCallSiteProto(
+                                    call_line=m.call_line,
+                                    macro_file_path=m.macro_file_path,
+                                    uncovered_count=m.uncovered_line_count,
+                                )
+                                for m in (function.macro_call_sites or [])
+                            ]
+
+                            uncovered_data = FunctionUncoveredLines(
+                                function_name=function.names,
+                                function_paths=function_paths,
+                                primary_file_path=primary_coverage.file_path,
+                                total_lines=function.total_lines,
+                                covered_lines=function.covered_lines,
+                                uncovered=uncovered_ranges.to_protobuf(),
+                                macro_sites=macro_sites,
+                            )
+                            uncovered_map.set_uncovered_lines(uncovered_data)
+                            updated_uncovered += 1
+
         logger.info(f"Updated coverage for {updated_functions} functions in Redis")
+        if updated_uncovered > 0:
+            logger.info(f"Updated uncovered lines for {updated_uncovered} functions in Redis")
 
 
 def main() -> None:
diff --git a/fuzzer/src/buttercup/fuzzing_infra/coverage_runner.py b/fuzzer/src/buttercup/fuzzing_infra/coverage_runner.py
index db96272e..be000ad5 100644
--- a/fuzzer/src/buttercup/fuzzing_infra/coverage_runner.py
+++ b/fuzzer/src/buttercup/fuzzing_infra/coverage_runner.py
@@ -44,6 +44,26 @@ class CachedExpansionLines(NamedTuple):
     covered_lines: frozenset[int]
 
 
+@dataclass
+class FileLineCoverage:
+    """Coverage data for lines in a single file."""
+
+    file_id: int
+    file_path: str  # Container path from coverage
+    total_lines: set[int]
+    covered_lines: set[int]
+    is_primary: bool  # True if this is the function definition file
+
+
+@dataclass
+class MacroCallSite:
+    """Location where a macro with uncovered code is called."""
+
+    call_line: int  # Line in primary file where macro is called
+    macro_file_path: str  # File where macro is defined
+    uncovered_line_count: int  # How many lines inside macro are uncovered
+
+
 # Type aliases for complex data structures
 ExpansionMap = dict[ExpansionKey, list[Any]]
 CoordToFilenames = dict[RegionCoords, list[str]]
@@ -65,9 +85,51 @@ class CoveredFunction:
     """Coverage metrics for a single function."""
 
     names: str
-    total_lines: int
-    covered_lines: int
+    total_lines: int  # Aggregate count
+    covered_lines: int  # Aggregate count
     function_paths: list[str]
+    # Line-level data for partial coverage (only populated when 0 < coverage < 100%)
+    total_line_set: set[int] | None = None
+    covered_line_set: set[int] | None = None
+    function_start_line: int | None = None
+    function_end_line: int | None = None
+    # For partial coverage - per-file data
+    file_coverage: list[FileLineCoverage] | None = None
+    primary_file_id: int | None = None
+    # Macro call sites (line in primary file where macro is invoked)
+    macro_call_sites: list[MacroCallSite] | None = None
+
+
+def find_primary_file(regions: list[Any], filenames: list[str]) -> int:
+    """Find the file_id of the primary file (where function is defined).
+
+    The primary file is identified by:
+    1. File with the most REGION_KIND_CODE regions
+    2. Prefer .c/.cpp files over .h/.hpp files as tiebreaker
+    """
+    code_region_counts: dict[int, int] = {}
+
+    for region in regions:
+        if len(region) < 8:
+            continue
+        kind = region[7]
+        file_id = region[5] if len(region) > 5 else 0
+
+        if kind == REGION_KIND_CODE:
+            code_region_counts[file_id] = code_region_counts.get(file_id, 0) + 1
+
+    if not code_region_counts:
+        return 0
+
+    # Pick file with most code regions, prefer source over header
+    def file_sort_key(fid: int) -> tuple[int, bool]:
+        count = code_region_counts[fid]
+        is_source = False
+        if fid < len(filenames):
+            is_source = filenames[fid].endswith((".c", ".cpp", ".cc", ".cxx"))
+        return (count, is_source)
+
+    return max(code_region_counts.keys(), key=file_sort_key)
 
 
 class CoverageRunner:
@@ -127,28 +189,43 @@ def _process_function_coverage(self, coverage_data: dict[str, Any]) -> list[Cove
                 regions = function["regions"]
                 filenames = function.get("filenames", [])
 
-                covered_lines: set[int] = set()
-                total_lines: set[int] = set()
-
-                self._process_regions(
-                    regions,
-                    total_lines,
-                    covered_lines,
-                    expansion_map,
-                    coord_to_filenames,
-                    filenames,
-                    expansion_lines_cache,
+                # Use new per-file tracking method
+                file_coverage, primary_file_id, macro_call_sites, total_lines, covered_lines = (
+                    self._process_regions_with_file_tracking(
+                        regions,
+                        filenames,
+                        expansion_map,
+                        coord_to_filenames,
+                        expansion_lines_cache,
+                    )
                 )
 
                 total_line_count = len(total_lines)
                 covered_line_count = len(covered_lines)
                 if covered_line_count > 0:
+                    # Check if this is partial coverage (0 < coverage < 100%)
+                    is_partial = 0 < covered_line_count < total_line_count
+
+                    # Get primary file coverage for start/end lines
+                    primary_coverage = file_coverage.get(primary_file_id) if primary_file_id is not None else None
+                    func_start = min(primary_coverage.total_lines) if primary_coverage else min(total_lines)
+                    func_end = max(primary_coverage.total_lines) if primary_coverage else max(total_lines)
+
                     function_coverage.append(
                         CoveredFunction(
                             name,
                             total_line_count,
                             covered_line_count,
                             function.get("filenames", []),
+                            # Include line sets for partial coverage only
+                            total_line_set=total_lines.copy() if is_partial else None,
+                            covered_line_set=covered_lines.copy() if is_partial else None,
+                            function_start_line=func_start if is_partial else None,
+                            function_end_line=func_end if is_partial else None,
+                            # New per-file tracking fields
+                            file_coverage=list(file_coverage.values()) if is_partial else None,
+                            primary_file_id=primary_file_id if is_partial else None,
+                            macro_call_sites=macro_call_sites if is_partial and macro_call_sites else None,
                         ),
                     )
 
@@ -357,6 +434,115 @@ def _add_region_lines(
         if execution_count > 0:
             covered_lines.update(lines)
 
+    def _process_regions_with_file_tracking(
+        self,
+        regions: list[Any],
+        filenames: list[str],
+        expansion_map: ExpansionMap,
+        coord_to_filenames: CoordToFilenames,
+        expansion_lines_cache: ExpansionLinesCache,
+    ) -> tuple[dict[int, FileLineCoverage], int, list[MacroCallSite], set[int], set[int]]:
+        """Process regions and group by file, also tracking macro call sites.
+
+        Returns:
+            - Dict of file_id -> FileLineCoverage (for CODE regions only)
+            - Primary file_id
+            - List of macro call sites with uncovered code
+            - Aggregate total_lines set (includes expansion lines)
+            - Aggregate covered_lines set (includes expansion lines)
+        """
+        # Track lines by file for CODE regions
+        lines_by_file: dict[int, tuple[set[int], set[int]]] = {}  # file_id -> (total, covered)
+        macro_call_sites: list[MacroCallSite] = []
+
+        # Aggregate sets (for backwards compatibility, includes expansion lines)
+        total_lines: set[int] = set()
+        covered_lines: set[int] = set()
+
+        filenames_set = set(filenames) if filenames else set()
+        primary_file_id = find_primary_file(regions, filenames)
+
+        for region in regions:
+            if len(region) < 5:
+                continue
+
+            region_kind = region[7] if len(region) > 7 else REGION_KIND_CODE
+            file_id = region[5] if len(region) > 5 else 0
+
+            if region_kind == REGION_KIND_CODE:
+                # Track in file-specific sets
+                if file_id not in lines_by_file:
+                    lines_by_file[file_id] = (set(), set())
+                file_total, file_covered = lines_by_file[file_id]
+                self._add_region_lines(region, file_total, file_covered)
+
+                # Also add to aggregate sets
+                self._add_region_lines(region, total_lines, covered_lines)
+
+            elif region_kind == REGION_KIND_EXPANSION:
+                # Process expansion for aggregate counts
+                coords = RegionCoords(region[0], region[1], region[2], region[3])
+                expansion_filenames = coord_to_filenames.get(coords, [])
+                for fn in expansion_filenames:
+                    if fn in filenames_set:
+                        key = ExpansionKey.from_coords(fn, coords)
+                        if key in expansion_map:
+                            # Get or compute expansion lines
+                            if key in expansion_lines_cache:
+                                cached = expansion_lines_cache[key]
+                                exp_total = cached.total_lines
+                                exp_covered = cached.covered_lines
+                            else:
+                                exp_total_set: set[int] = set()
+                                exp_covered_set: set[int] = set()
+                                self._process_expansion_lines(
+                                    expansion_map[key],
+                                    exp_total_set,
+                                    exp_covered_set,
+                                    expansion_map,
+                                    coord_to_filenames,
+                                    filenames_set,
+                                    expansion_lines_cache,
+                                )
+                                expansion_lines_cache[key] = CachedExpansionLines(
+                                    frozenset(exp_total_set),
+                                    frozenset(exp_covered_set),
+                                )
+                                exp_total = frozenset(exp_total_set)
+                                exp_covered = frozenset(exp_covered_set)
+
+                            # Add to aggregate counts
+                            total_lines.update(exp_total)
+                            covered_lines.update(exp_covered)
+
+                            # Track as macro call site if has uncovered code
+                            uncovered_count = len(exp_total - exp_covered)
+                            if uncovered_count > 0:
+                                call_line = region[0]  # Line where macro is called
+                                # Get the macro file path from the expansion
+                                macro_file_path = fn
+                                macro_call_sites.append(
+                                    MacroCallSite(
+                                        call_line=call_line,
+                                        macro_file_path=macro_file_path,
+                                        uncovered_line_count=uncovered_count,
+                                    )
+                                )
+                            break
+
+        # Convert to FileLineCoverage objects
+        file_coverage: dict[int, FileLineCoverage] = {}
+        for fid, (ftotal, fcovered) in lines_by_file.items():
+            file_coverage[fid] = FileLineCoverage(
+                file_id=fid,
+                file_path=filenames[fid] if fid < len(filenames) else "",
+                total_lines=ftotal,
+                covered_lines=fcovered,
+                is_primary=(fid == primary_file_id),
+            )
+
+        return file_coverage, primary_file_id, macro_call_sites, total_lines, covered_lines
+
     def run(self, harness_name: str, corpus_dir: str) -> list[CoveredFunction] | None:
         lang = ProjectYaml(self.tool, self.tool.project_name).unified_language
         if lang in [Language.C, Language.CPP]:

From e89901fd42840b99e021a06eeb786990cb36cfa3 Mon Sep 17 00:00:00 2001
From: Henrik Brodin <90325907+hbrodin@users.noreply.github.com>
Date: Fri, 30 Jan 2026 15:01:45 +0100
Subject: [PATCH 2/3] Add design doc for hierarchical coverage representation

Documents a tree-based coverage representation that mirrors macro expansion
structure, designed to provide actionable guidance for LLMs generating
targeted fuzzing inputs.

Key concepts:
- CodeBlock: contiguous non-macro code with line-level coverage
- MacroExpansion: call site with recursive children from macro body
- FunctionCoverageHierarchy: complete tree for partial coverage functions

Only functions with partial coverage (0 < covered < total) are processed,
as fully covered or uncovered functions don't benefit from this analysis.

Includes algorithm for building the tree from LLVM coverage JSON,
protobuf message definitions, and LLM prompt generation format.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 fuzzer/docs/coverage-precision.md | 539 ++++++++++++++++++++++++++++++
 1 file changed, 539 insertions(+)
 create mode 100644 fuzzer/docs/coverage-precision.md

diff --git a/fuzzer/docs/coverage-precision.md b/fuzzer/docs/coverage-precision.md
new file mode 100644
index 00000000..d57f97df
--- /dev/null
+++ b/fuzzer/docs/coverage-precision.md
@@ -0,0 +1,539 @@
+# Precise Coverage Hierarchy for LLM Guidance
+
+This document describes a hierarchical coverage representation designed to guide LLMs in generating inputs that target specific uncovered code paths.
+
+## Scope
+
+This feature applies only to **functions with partial coverage** (0 < covered_lines < total_lines). Functions with 0% or 100% coverage are excluded:
+- **0% coverage**: Function never executed - need to find how to call it first
+- **100% coverage**: Nothing to improve
+- **Partial coverage**: Has uncovered paths we can target with specific inputs
+
+## Problem Statement
+
+The current flat representation of uncovered lines is insufficient for LLM guidance:
+
+```
+Uncovered lines: [48-52, 104-106, 965-970]
+```
+
+Problems:
+1. **No context**: What do these lines do? Why aren't they covered?
+2. **Mixed sources**: Lines may come from different files (macros, includes)
+3. **No structure**: Can't tell if line 104 is inside a macro called from line 50
+4. **No actionability**: How should the LLM trigger these paths?
+
+## Solution: Coverage Hierarchy
+
+Represent coverage as a tree that mirrors the code's macro expansion structure:
+
+```
+Function: png_read_end (pngread.c:912-987)
+│
+├─ CodeBlock [912-915] ✓ covered
+│
+├─ MacroExpansion: PNG_SETJMP() at line 916
+│   ├─ CodeBlock [pngpriv.h:45-47] ✓ covered
+│   └─ CodeBlock [pngpriv.h:48-52] ✗ UNCOVERED
+│
+├─ CodeBlock [917-925] ✓ covered
+│
+├─ MacroExpansion: png_crc_finish() at line 926
+│   ├─ CodeBlock [png.c:230-235] ✓ covered
+│   ├─ MacroExpansion: PNG_CRC_CHECK() at png.c:236
+│   │   ├─ CodeBlock [pngpriv.h:102-103] ✓ covered
+│   │   └─ CodeBlock [pngpriv.h:104-106] ✗ UNCOVERED
+│   └─ CodeBlock [png.c:237-240] ✓ covered
+│
+└─ CodeBlock [927-987] partial
+    └─ Lines 965-970 ✗ UNCOVERED
+```
+
+## LLVM Coverage Background
+
+### Region Format
+```
+[lineStart, colStart, lineEnd, colEnd, execCount, fileID, expandedFileID, kind]
+```
+
+- **fileID**: Index into the function's `filenames` array (per-function, not global)
+- **expandedFileID**: For ExpansionRegions, points to file containing macro body
+- **kind**: 0=Code, 1=Expansion, 2=Skipped, 3=Gap, 4+=Branch/MCDC
+
+### Expansion Hierarchy in LLVM
+
+1. Function's `regions` array contains CodeRegions and ExpansionRegions
+2. ExpansionRegion marks WHERE a macro is called (call site)
+3. `expandedFileID` tells us which file contains the macro body
+4. Actual macro body regions are in `files[].expansions[].target_regions`
+5. Macro bodies can contain nested ExpansionRegions (recursive)
+
+## Data Model
+
+### Python Classes
+
+```python
+@dataclass
+class CodeBlock:
+    """A contiguous block of non-macro code."""
+    file_path: str
+    start_line: int
+    end_line: int
+    covered_lines: set[int]
+    total_lines: set[int]
+
+    @property
+    def uncovered_lines(self) -> set[int]:
+        return self.total_lines - self.covered_lines
+
+    @property
+    def is_fully_covered(self) -> bool:
+        return len(self.uncovered_lines) == 0
+
+    @property
+    def coverage_fraction(self) -> float:
+        if not self.total_lines:
+            return 1.0
+        return len(self.covered_lines) / len(self.total_lines)
+
+
+@dataclass
+class MacroExpansion:
+    """A macro call site with its expansion hierarchy."""
+    call_file: str
+    call_line: int
+    call_column: int
+    macro_file: str  # From expandedFileID
+    children: list["CoverageNode"]
+
+    def total_uncovered_lines(self) -> int:
+        """Recursively count uncovered lines in this expansion tree."""
+        count = 0
+        for child in self.children:
+            if isinstance(child, CodeBlock):
+                count += len(child.uncovered_lines)
+            elif isinstance(child, MacroExpansion):
+                count += child.total_uncovered_lines()
+        return count
+
+    def has_uncovered_code(self) -> bool:
+        return self.total_uncovered_lines() > 0
+
+
+# Type alias for tree nodes
+CoverageNode = CodeBlock | MacroExpansion
+
+
+@dataclass
+class FunctionCoverageHierarchy:
+    """Complete coverage hierarchy for a partially-covered function."""
+    function_name: str
+    primary_file: str
+    start_line: int
+    end_line: int
+    total_lines: int
+    covered_lines: int
+    children: list[CoverageNode]
+
+    @property
+    def coverage_percentage(self) -> float:
+        return (self.covered_lines / self.total_lines * 100) if self.total_lines else 0
+
+    def iter_uncovered_paths(self) -> Iterator[tuple[list[str], CodeBlock]]:
+        """Yield (path, code_block) for each uncovered code block.
+
+        Path is like ["png_read_end", "PNG_SETJMP() at line 916", "pngpriv.h:48-52"]
+        """
+        yield from self._iter_uncovered(self.children, [self.function_name])
+
+    def _iter_uncovered(self, nodes, path):
+        for node in nodes:
+            if isinstance(node, CodeBlock):
+                if not node.is_fully_covered:
+                    yield (path + [f"{node.file_path}:{node.start_line}-{node.end_line}"], node)
+            elif isinstance(node, MacroExpansion):
+                if node.has_uncovered_code():
+                    macro_path = path + [f"{node.macro_file} at {node.call_file}:{node.call_line}"]
+                    yield from self._iter_uncovered(node.children, macro_path)
+```
+
+### Protobuf Messages
+
+```protobuf
+// Coverage hierarchy for LLM guidance (only for partial coverage functions)
+
+message CodeBlock {
+    string file_path = 1;
+    uint32 start_line = 2;
+    uint32 end_line = 3;
+    // Run-length encoded line coverage within block
+    repeated uint32 covered_starts = 4 [packed = true];
+    repeated uint32 covered_lengths = 5 [packed = true];
+    repeated uint32 uncovered_starts = 6 [packed = true];
+    repeated uint32 uncovered_lengths = 7 [packed = true];
+}
+
+message MacroExpansion {
+    string call_file = 1;
+    uint32 call_line = 2;
+    uint32 call_column = 3;
+    string macro_file = 4;
+    repeated CoverageNode children = 5;
+}
+
+message CoverageNode {
+    oneof node {
+        CodeBlock code = 1;
+        MacroExpansion macro = 2;
+    }
+}
+
+message FunctionCoverageHierarchy {
+    string function_name = 1;
+    string primary_file = 2;
+    uint32 start_line = 3;
+    uint32 end_line = 4;
+    uint32 total_lines = 5;
+    uint32 covered_lines = 6;
+    repeated CoverageNode children = 7;
+}
+```
+
+## Algorithm
+
+### Building the Hierarchy
+
+```python
+def build_coverage_hierarchy(
+    function: dict,
+    file_expansions: dict[str, list],
+) -> FunctionCoverageHierarchy | None:
+    """Build hierarchical coverage for a function.
+
+    Args:
+        function: Function object from LLVM coverage JSON
+        file_expansions: Map of filename -> list of expansion objects
+
+    Returns:
+        FunctionCoverageHierarchy for partial coverage, None otherwise
+    """
+    filenames = function['filenames']
+    regions = function['regions']
+
+    # Calculate aggregate coverage
+    total_lines, covered_lines = count_all_lines(regions, filenames, file_expansions)
+
+    # Skip if not partial coverage
+    if covered_lines == 0 or covered_lines == total_lines:
+        return None
+
+    # Find primary file (where function is defined)
+    primary_file_id = find_primary_file(regions, filenames)
+    primary_file = filenames[primary_file_id]
+
+    # Build the tree starting from primary file regions
+    children = build_node_tree(
+        regions=[r for r in regions if r[5] == primary_file_id],
+        all_regions=regions,
+        filenames=filenames,
+        file_expansions=file_expansions,
+    )
+
+    # Compute function bounds from primary file
+    primary_regions = [r for r in regions if r[5] == primary_file_id and r[7] == CODE_REGION]
+    start_line = min(r[0] for r in primary_regions) if primary_regions else 0
+    end_line = max(r[2] for r in primary_regions) if primary_regions else 0
+
+    return FunctionCoverageHierarchy(
+        function_name=function['name'],
+        primary_file=primary_file,
+        start_line=start_line,
+        end_line=end_line,
+        total_lines=total_lines,
+        covered_lines=covered_lines,
+        children=children,
+    )
+
+
+def build_node_tree(
+    regions: list,
+    all_regions: list,
+    filenames: list[str],
+    file_expansions: dict[str, list],
+) -> list[CoverageNode]:
+    """Build tree of CoverageNodes from regions.
+
+    Regions should be pre-filtered to a specific file_id.
+    """
+    # Sort by start position
+    sorted_regions = sorted(regions, key=lambda r: (r[0], r[1]))
+
+    nodes: list[CoverageNode] = []
+    pending_code_lines: dict[int, bool] = {}  # line -> is_covered
+
+    for region in sorted_regions:
+        kind = region[7] if len(region) > 7 else CODE_REGION
+
+        if kind == CODE_REGION:
+            # Accumulate code lines
+            exec_count = region[4]
+            for line in range(region[0], region[2] + 1):
+                if line not in pending_code_lines:
+                    pending_code_lines[line] = (exec_count > 0)
+                else:
+                    # Line is covered if ANY region covering it is executed
+                    pending_code_lines[line] |= (exec_count > 0)
+
+        elif kind == EXPANSION_REGION:
+            # Flush pending code block before macro
+            if pending_code_lines:
+                nodes.append(make_code_block(filenames[region[5]], pending_code_lines))
+                pending_code_lines = {}
+
+            # Build macro expansion node
+            file_id = region[5]
+            expanded_file_id = region[6]
+
+            # Look up expansion's target regions
+            target_regions = lookup_expansion(
+                file_expansions,
+                filenames[file_id],
+                (region[0], region[1], region[2], region[3])
+            )
+
+            # Recursively build children from target regions
+            macro_children = build_expansion_tree(
+                target_regions,
+                filenames,
+                file_expansions,
+            )
+
+            nodes.append(MacroExpansion(
+                call_file=filenames[file_id],
+                call_line=region[0],
+                call_column=region[1],
+                macro_file=filenames[expanded_file_id] if expanded_file_id < len(filenames) else "unknown",
+                children=macro_children,
+            ))
+
+    # Flush remaining code
+    if pending_code_lines:
+        file_id = sorted_regions[0][5] if sorted_regions else 0
+        nodes.append(make_code_block(filenames[file_id], pending_code_lines))
+
+    return nodes
+
+
+def build_expansion_tree(
+    target_regions: list,
+    filenames: list[str],
+    file_expansions: dict[str, list],
+    visited: set[tuple] | None = None,
+) -> list[CoverageNode]:
+    """Recursively build tree from expansion target regions."""
+    if visited is None:
+        visited = set()
+
+    nodes: list[CoverageNode] = []
+    pending_code_lines: dict[int, bool] = {}
+    current_file_id = None
+
+    for region in target_regions:
+        if len(region) < 5:
+            continue
+
+        kind = region[7] if len(region) > 7 else CODE_REGION
+        file_id = region[5] if len(region) > 5 else 0
+
+        # Track file changes
+        if current_file_id is None:
+            current_file_id = file_id
+        elif file_id != current_file_id:
+            # Flush code block when switching files
+            if pending_code_lines and current_file_id < len(filenames):
+                nodes.append(make_code_block(filenames[current_file_id], pending_code_lines))
+                pending_code_lines = {}
+            current_file_id = file_id
+
+        if kind == CODE_REGION:
+            exec_count = region[4]
+            for line in range(region[0], region[2] + 1):
+                if line not in pending_code_lines:
+                    pending_code_lines[line] = (exec_count > 0)
+                else:
+                    pending_code_lines[line] |= (exec_count > 0)
+
+        elif kind == EXPANSION_REGION:
+            # Prevent infinite recursion
+            region_key = (file_id, region[0], region[1], region[2], region[3])
+            if region_key in visited:
+                continue
+            visited.add(region_key)
+
+            # Flush pending code
+            if pending_code_lines and current_file_id < len(filenames):
+                nodes.append(make_code_block(filenames[current_file_id], pending_code_lines))
+                pending_code_lines = {}
+
+            # Get nested expansion
+            expanded_file_id = region[6]
+            nested_targets = lookup_expansion(
+                file_expansions,
+                filenames[file_id] if file_id < len(filenames) else "",
+                (region[0], region[1], region[2], region[3])
+            )
+
+            nested_children = build_expansion_tree(
+                nested_targets,
+                filenames,
+                file_expansions,
+                visited,
+            )
+
+            if file_id < len(filenames):
+                nodes.append(MacroExpansion(
+                    call_file=filenames[file_id],
+                    call_line=region[0],
+                    call_column=region[1],
+                    macro_file=filenames[expanded_file_id] if expanded_file_id < len(filenames) else "unknown",
+                    children=nested_children,
+                ))
+
+    # Flush remaining code
+    if pending_code_lines and current_file_id is not None and current_file_id < len(filenames):
+        nodes.append(make_code_block(filenames[current_file_id], pending_code_lines))
+
+    return nodes
+
+
+def make_code_block(file_path: str, lines: dict[int, bool]) -> CodeBlock:
+    """Create a CodeBlock from accumulated line coverage data."""
+    sorted_lines = sorted(lines.keys())
+    return CodeBlock(
+        file_path=file_path,
+        start_line=min(sorted_lines),
+        end_line=max(sorted_lines),
+        covered_lines={ln for ln, cov in lines.items() if cov},
+        total_lines=set(sorted_lines),
+    )
+```
+
+## LLM Prompt Generation
+
+### Serialization for LLM
+
+```python
+def format_hierarchy_for_llm(hierarchy: FunctionCoverageHierarchy) -> str:
+    """Format coverage hierarchy as text for LLM consumption."""
+    lines = [
+        f"Function: {hierarchy.function_name}",
+        f"Location: {hierarchy.primary_file}:{hierarchy.start_line}-{hierarchy.end_line}",
+        f"Coverage: {hierarchy.coverage_percentage:.1f}% ({hierarchy.covered_lines}/{hierarchy.total_lines} lines)",
+        "",
+        "Code structure with uncovered paths:",
+        "",
+    ]
+
+    lines.extend(format_nodes(hierarchy.children, indent=0))
+
+    # Add summary of uncovered paths
+    uncovered_paths = list(hierarchy.iter_uncovered_paths())
+    if uncovered_paths:
+        lines.append("")
+        lines.append("Uncovered code paths to target:")
+        for i, (path, block) in enumerate(uncovered_paths, 1):
+            lines.append(f"  {i}. {' → '.join(path)}")
+            lines.append(f"     {len(block.uncovered_lines)} uncovered lines")
+
+    return "\n".join(lines)
+
+
+def format_nodes(nodes: list[CoverageNode], indent: int) -> list[str]:
+    """Recursively format nodes as indented text."""
+    lines = []
+    prefix = "  " * indent
+
+    for node in nodes:
+        if isinstance(node, CodeBlock):
+            status = "✓" if node.is_fully_covered else "✗" if node.coverage_fraction == 0 else "◐"
+            uncovered_info = ""
+            if not node.is_fully_covered:
+                uncovered = sorted(node.uncovered_lines)
+                ranges = compress_to_ranges(uncovered)
+                uncovered_info = f" [uncovered: {format_ranges(ranges)}]"
+
+            lines.append(f"{prefix}[{node.start_line}-{node.end_line}] {status}{uncovered_info}")
+
+        elif isinstance(node, MacroExpansion):
+            status = "✗" if node.has_uncovered_code() else "✓"
+            uncovered_info = ""
+            if node.has_uncovered_code():
+                uncovered_info = f" ({node.total_uncovered_lines()} uncovered)"
+
+            lines.append(f"{prefix}↳ Macro at line {node.call_line} → {node.macro_file}{uncovered_info}")
+            lines.extend(format_nodes(node.children, indent + 1))
+
+    return lines
+```
+
+### Example Output
+
+```
+Function: png_read_end
+Location: pngread.c:912-987
+Coverage: 85.2% (184/216 lines)
+
+Code structure with uncovered paths:
+
+[912-915] ✓
+↳ Macro at line 916 → pngpriv.h (5 uncovered)
+  [45-47] ✓
+  [48-52] ✗ [uncovered: 48-52]
+[917-925] ✓
+↳ Macro at line 926 → png.c (3 uncovered)
+  [230-235] ✓
+  ↳ Macro at line 236 → pngpriv.h (3 uncovered)
+    [102-103] ✓
+    [104-106] ✗ [uncovered: 104-106]
+  [237-240] ✓
+[927-964] ✓
+[965-987] ◐ [uncovered: 965-970]
+
+Uncovered code paths to target:
+  1. png_read_end → pngpriv.h at pngread.c:916 → pngpriv.h:48-52
+     5 uncovered lines
+  2. png_read_end → png.c at pngread.c:926 → pngpriv.h at png.c:236 → pngpriv.h:104-106
+     3 uncovered lines
+  3. png_read_end → pngread.c:965-987
+     6 uncovered lines
+```
+
+## Integration Points
+
+### Coverage Runner
+- Build hierarchy during `_process_function_coverage` for partial coverage functions
+- Store in new field `coverage_hierarchy` on `CoveredFunction`
+
+### Coverage Bot
+- Serialize `FunctionCoverageHierarchy` to protobuf
+- Store in Redis via new `CoverageHierarchyMap`
+
+### Seed Generator
+- Fetch hierarchy for target function
+- Include formatted hierarchy in LLM prompt
+- LLM can see exactly which code paths need inputs
+
+## Benefits
+
+1. **Actionable**: LLM sees the structure, not just line numbers
+2. **Contextual**: Macro expansions show where uncovered code comes from
+3. **Hierarchical**: Nested macros are properly represented
+4. **Focused**: Only partial coverage functions are processed
+5. **Efficient**: Tree structure avoids redundant information
+
+## Future Enhancements
+
+1. **Source snippets**: Include actual source code for uncovered blocks
+2. **Path conditions**: Annotate what conditions lead to uncovered paths
+3. **Semantic labels**: Detect common patterns (error handlers, bounds checks, etc.)
+4. **Coverage delta**: Track which paths were newly covered by recent inputs

From 46dd740f6d9e53a9c2a4dc4082fec1fddfc32397 Mon Sep 17 00:00:00 2001
From: Henrik Brodin <90325907+hbrodin@users.noreply.github.com>
Date: Wed, 4 Feb 2026 14:51:18 +0100
Subject: [PATCH 3/3] Fix starlette security vulnerabilities (CVE-2025-62727,
 CVE-2025-54121)

Upgrade FastAPI from ~0.115.6 to ~0.128.0 in orchestrator to allow
starlette >= 0.49.1, fixing:

- CVE-2025-62727 (High): O(n^2) DoS via Range header in FileResponse
- CVE-2025-54121 (Medium): DoS via large multipart file parsing

The orchestrator uses FileResponse and StaticFiles (the affected
components) in the competition API UI.

Resolves: Dependabot alerts #33, #34
---
 orchestrator/pyproject.toml |  4 ++--
 orchestrator/uv.lock        | 43 ++++++++++++++++++++++++++++++-------
 2 files changed, 37 insertions(+), 10 deletions(-)

diff --git a/orchestrator/pyproject.toml b/orchestrator/pyproject.toml
index d23f3658..d170dd8d 100644
--- a/orchestrator/pyproject.toml
+++ b/orchestrator/pyproject.toml
@@ -9,7 +9,7 @@ requires-python = ">=3.12,<3.13"
 dependencies = [
     "argon2-cffi ~=21.3.0",
     "common[full]",
-    "fastapi ~=0.115.6",
+    "fastapi ~=0.128.0",
     "pydantic ~=2.11.0",
     "pydantic-settings ~=2.7.1",
     "python-dateutil ~=2.9.0",
@@ -44,7 +44,7 @@ dev = [
     "pytest-asyncio ~=0.25.2",
     "pytest-cov ~=6.0.0",
     "pytest-xdist ~=3.6.1",
-    "fastapi[standard] ~=0.115.6",
+    "fastapi[standard] ~=0.128.0",
     "responses ~=0.25.6",
     "httpx ~=0.28.1",
     # Linting and type checking
diff --git a/orchestrator/uv.lock b/orchestrator/uv.lock
index c17cf5cb..fab0ed44 100644
--- a/orchestrator/uv.lock
+++ b/orchestrator/uv.lock
@@ -2,6 +2,15 @@ version = 1
 revision = 3
 requires-python = "==3.12.*"
 
+[[package]]
+name = "annotated-doc"
+version = "0.0.4"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/57/ba/046ceea27344560984e26a590f90bc7f4a75b06701f653222458922b558c/annotated_doc-0.0.4.tar.gz", hash = "sha256:fbcda96e87e9c92ad167c2e53839e57503ecfda18804ea28102353485033faa4", size = 7288, upload-time = "2025-11-10T22:07:42.062Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/1e/d3/26bf1008eb3d2daa8ef4cacc7f3bfdc11818d111f7e2d0201bc6e3b49d45/annotated_doc-0.0.4-py3-none-any.whl", hash = "sha256:571ac1dc6991c450b25a9c2d84a3705e2ae7a53467b5d111c24fa8baabbed320", size = 5303, upload-time = "2025-11-10T22:07:40.673Z" },
+]
+
 [[package]]
 name = "annotated-types"
 version = "0.7.0"
@@ -372,16 +381,17 @@ wheels = [
 
 [[package]]
 name = "fastapi"
-version = "0.115.14"
+version = "0.128.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
+    { name = "annotated-doc" },
     { name = "pydantic" },
     { name = "starlette" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/ca/53/8c38a874844a8b0fa10dd8adf3836ac154082cf88d3f22b544e9ceea0a15/fastapi-0.115.14.tar.gz", hash = "sha256:b1de15cdc1c499a4da47914db35d0e4ef8f1ce62b624e94e0e5824421df99739", size = 296263, upload-time = "2025-06-26T15:29:08.21Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/52/08/8c8508db6c7b9aae8f7175046af41baad690771c9bcde676419965e338c7/fastapi-0.128.0.tar.gz", hash = "sha256:1cc179e1cef10a6be60ffe429f79b829dce99d8de32d7acb7e6c8dfdf7f2645a", size = 365682, upload-time = "2025-12-27T15:21:13.714Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/53/50/b1222562c6d270fea83e9c9075b8e8600b8479150a18e4516a6138b980d1/fastapi-0.115.14-py3-none-any.whl", hash = "sha256:6c0c8bf9420bd58f565e585036d971872472b4f7d3f6c73b698e10cffdefb3ca", size = 95514, upload-time = "2025-06-26T15:29:06.49Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/05/5cbb59154b093548acd0f4c7c474a118eda06da25aa75c616b72d8fcd92a/fastapi-0.128.0-py3-none-any.whl", hash = "sha256:aebd93f9716ee3b4f4fcfe13ffb7cf308d99c9f3ab5622d8877441072561582d", size = 103094, upload-time = "2025-12-27T15:21:12.154Z" },
 ]
 
 [package.optional-dependencies]
@@ -390,6 +400,8 @@ standard = [
     { name = "fastapi-cli", extra = ["standard"] },
     { name = "httpx" },
     { name = "jinja2" },
+    { name = "pydantic-extra-types" },
+    { name = "pydantic-settings" },
     { name = "python-multipart" },
     { name = "uvicorn", extra = ["standard"] },
 ]
@@ -477,6 +489,7 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/f9/c8/9d76a66421d1ae24340dfae7e79c313957f6e3195c144d2c73333b5bfe34/greenlet-3.3.1-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:7e806ca53acf6d15a888405880766ec84721aa4181261cd11a457dfe9a7a4975", size = 276443, upload-time = "2026-01-23T15:30:10.066Z" },
     { url = "https://files.pythonhosted.org/packages/81/99/401ff34bb3c032d1f10477d199724f5e5f6fbfb59816ad1455c79c1eb8e7/greenlet-3.3.1-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d842c94b9155f1c9b3058036c24ffb8ff78b428414a19792b2380be9cecf4f36", size = 597359, upload-time = "2026-01-23T16:00:57.394Z" },
     { url = "https://files.pythonhosted.org/packages/2b/bc/4dcc0871ed557792d304f50be0f7487a14e017952ec689effe2180a6ff35/greenlet-3.3.1-cp312-cp312-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:20fedaadd422fa02695f82093f9a98bad3dab5fcda793c658b945fcde2ab27ba", size = 607805, upload-time = "2026-01-23T16:05:28.068Z" },
+    { url = "https://files.pythonhosted.org/packages/3b/cd/7a7ca57588dac3389e97f7c9521cb6641fd8b6602faf1eaa4188384757df/greenlet-3.3.1-cp312-cp312-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:c620051669fd04ac6b60ebc70478210119c56e2d5d5df848baec4312e260e4ca", size = 622363, upload-time = "2026-01-23T16:15:54.754Z" },
     { url = "https://files.pythonhosted.org/packages/cf/05/821587cf19e2ce1f2b24945d890b164401e5085f9d09cbd969b0c193cd20/greenlet-3.3.1-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:14194f5f4305800ff329cbf02c5fcc88f01886cadd29941b807668a45f0d2336", size = 609947, upload-time = "2026-01-23T15:32:51.004Z" },
     { url = "https://files.pythonhosted.org/packages/a4/52/ee8c46ed9f8babaa93a19e577f26e3d28a519feac6350ed6f25f1afee7e9/greenlet-3.3.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:7b2fe4150a0cf59f847a67db8c155ac36aed89080a6a639e9f16df5d6c6096f1", size = 1567487, upload-time = "2026-01-23T16:04:22.125Z" },
     { url = "https://files.pythonhosted.org/packages/8f/7c/456a74f07029597626f3a6db71b273a3632aecb9afafeeca452cfa633197/greenlet-3.3.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:49f4ad195d45f4a66a0eb9c1ba4832bb380570d361912fa3554746830d332149", size = 1636087, upload-time = "2026-01-23T15:33:47.486Z" },
@@ -1344,7 +1357,7 @@ dev = [
 requires-dist = [
     { name = "argon2-cffi", specifier = "~=21.3.0" },
     { name = "common", extras = ["full"], editable = "../common" },
-    { name = "fastapi", specifier = "~=0.115.6" },
+    { name = "fastapi", specifier = "~=0.128.0" },
     { name = "pydantic", specifier = "~=2.11.0" },
     { name = "pydantic-settings", specifier = "~=2.7.1" },
     { name = "python-dateutil", specifier = "~=2.9.0" },
@@ -1360,7 +1373,7 @@ requires-dist = [
 
 [package.metadata.requires-dev]
 dev = [
-    { name = "fastapi", extras = ["standard"], specifier = "~=0.115.6" },
+    { name = "fastapi", extras = ["standard"], specifier = "~=0.128.0" },
     { name = "httpx", specifier = "~=0.28.1" },
     { name = "pytest", specifier = "~=8.3.4" },
     { name = "pytest-asyncio", specifier = "~=0.25.2" },
@@ -1484,6 +1497,19 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/0d/24/8b11e8b3e2be9dd82df4b11408a67c61bb4dc4f8e11b5b0fc888b38118b5/pydantic_core-2.33.2-cp312-cp312-win_arm64.whl", hash = "sha256:cca3868ddfaccfbc4bfb1d608e2ccaaebe0ae628e1416aeb9c4d88c001bb45ab", size = 1888894, upload-time = "2025-04-23T18:31:51.609Z" },
 ]
 
+[[package]]
+name = "pydantic-extra-types"
+version = "2.11.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pydantic" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/fd/35/2fee58b1316a73e025728583d3b1447218a97e621933fc776fb8c0f2ebdd/pydantic_extra_types-2.11.0.tar.gz", hash = "sha256:4e9991959d045b75feb775683437a97991d02c138e00b59176571db9ce634f0e", size = 157226, upload-time = "2025-12-31T16:18:27.944Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/fe/17/fabd56da47096d240dd45ba627bead0333b0cf0ee8ada9bec579287dadf3/pydantic_extra_types-2.11.0-py3-none-any.whl", hash = "sha256:84b864d250a0fc62535b7ec591e36f2c5b4d1325fa0017eb8cda9aeb63b374a6", size = 74296, upload-time = "2025-12-31T16:18:26.38Z" },
+]
+
 [[package]]
 name = "pydantic-settings"
 version = "2.7.1"
@@ -1945,14 +1971,15 @@ wheels = [
 
 [[package]]
 name = "starlette"
-version = "0.46.2"
+version = "0.50.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "anyio" },
+    { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/ce/20/08dfcd9c983f6a6f4a1000d934b9e6d626cff8d2eeb77a89a68eef20a2b7/starlette-0.46.2.tar.gz", hash = "sha256:7f7361f34eed179294600af672f565727419830b54b7b084efe44bb82d2fccd5", size = 2580846, upload-time = "2025-04-13T13:56:17.942Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/ba/b8/73a0e6a6e079a9d9cfa64113d771e421640b6f679a52eeb9b32f72d871a1/starlette-0.50.0.tar.gz", hash = "sha256:a2a17b22203254bcbc2e1f926d2d55f3f9497f769416b3190768befe598fa3ca", size = 2646985, upload-time = "2025-11-01T15:25:27.516Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/8b/0c/9d30a4ebeb6db2b25a841afbb80f6ef9a854fc3b41be131d249a977b4959/starlette-0.46.2-py3-none-any.whl", hash = "sha256:595633ce89f8ffa71a015caed34a5b2dc1c0cdb3f0f1fbd1e69339cf2abeec35", size = 72037, upload-time = "2025-04-13T13:56:16.21Z" },
+    { url = "https://files.pythonhosted.org/packages/d9/52/1064f510b141bd54025f9b55105e26d1fa970b9be67ad766380a3c9b74b0/starlette-0.50.0-py3-none-any.whl", hash = "sha256:9e5391843ec9b6e472eed1365a78c8098cfceb7a74bfd4d6b1c0c0095efb3bca", size = 74033, upload-time = "2025-11-01T15:25:25.461Z" },
 ]
 
 [[package]]