diff --git a/common/protos/msg.proto b/common/protos/msg.proto index 76dae473..2791523d 100644 --- a/common/protos/msg.proto +++ b/common/protos/msg.proto @@ -198,3 +198,27 @@ message POVReproduceResponse { POVReproduceRequest request = 1; bool did_crash = 2; } + +// Uncovered lines tracking messages +message MacroCallSite { + uint32 call_line = 1; // Line in primary file where macro is called + string macro_file_path = 2; // File where macro is defined + uint32 uncovered_count = 3; // Lines inside macro that are uncovered +} + +message UncoveredLines { + repeated uint32 starts = 1 [packed = true]; // Start line of each uncovered range + repeated uint32 lengths = 2 [packed = true]; // Length of each uncovered range + uint32 function_start_line = 3; + uint32 function_end_line = 4; +} + +message FunctionUncoveredLines { + string function_name = 1; + repeated string function_paths = 2; + string primary_file_path = 3; // The function definition file (container path) + uint32 total_lines = 4; + uint32 covered_lines = 5; + UncoveredLines uncovered = 6; // Uncovered lines in primary file only + repeated MacroCallSite macro_sites = 7; // Macro call sites with uncovered code +} diff --git a/common/src/buttercup/common/coverage_utils.py b/common/src/buttercup/common/coverage_utils.py new file mode 100644 index 00000000..0e7748dc --- /dev/null +++ b/common/src/buttercup/common/coverage_utils.py @@ -0,0 +1,130 @@ +"""Utilities for working with coverage data and uncovered line tracking.""" + +from dataclasses import dataclass + +from buttercup.common.datastructures.msg_pb2 import UncoveredLines + + +@dataclass +class LineRange: + """A range of consecutive lines.""" + + start: int + length: int + + @property + def end(self) -> int: + """Return the end line (inclusive).""" + return self.start + self.length - 1 + + +@dataclass +class UncoveredRanges: + """Represents uncovered line ranges in a function. + + This class converts between line sets and the protobuf UncoveredLines format, + which uses run-length encoding (starts + lengths) for compact storage. + """ + + ranges: list[LineRange] + function_start_line: int + function_end_line: int + + @classmethod + def from_line_sets( + cls, + total_lines: set[int], + covered_lines: set[int], + function_start_line: int, + function_end_line: int, + ) -> "UncoveredRanges | None": + """Create UncoveredRanges from total and covered line sets. + + Args: + total_lines: Set of all lines in the function + covered_lines: Set of lines that were executed + function_start_line: First line of the function + function_end_line: Last line of the function + + Returns: + UncoveredRanges if there are uncovered lines, None otherwise + """ + uncovered = total_lines - covered_lines + if not uncovered: + return None + + # Convert to sorted list and group into consecutive ranges + sorted_lines = sorted(uncovered) + ranges: list[LineRange] = [] + range_start = sorted_lines[0] + range_length = 1 + + for i in range(1, len(sorted_lines)): + if sorted_lines[i] == sorted_lines[i - 1] + 1: + # Consecutive line, extend current range + range_length += 1 + else: + # Gap found, save current range and start new one + ranges.append(LineRange(range_start, range_length)) + range_start = sorted_lines[i] + range_length = 1 + + # Don't forget the last range + ranges.append(LineRange(range_start, range_length)) + + return cls( + ranges=ranges, + function_start_line=function_start_line, + function_end_line=function_end_line, + ) + + @classmethod + def from_protobuf(cls, proto: UncoveredLines) -> "UncoveredRanges": + """Create UncoveredRanges from a protobuf UncoveredLines message. + + Args: + proto: The protobuf UncoveredLines message + + Returns: + UncoveredRanges instance + """ + ranges = [ + LineRange(start=start, length=length) for start, length in zip(proto.starts, proto.lengths, strict=True) + ] + return cls( + ranges=ranges, + function_start_line=proto.function_start_line, + function_end_line=proto.function_end_line, + ) + + def to_protobuf(self) -> UncoveredLines: + """Convert to a protobuf UncoveredLines message. + + Returns: + UncoveredLines protobuf message + """ + proto = UncoveredLines() + proto.starts.extend(r.start for r in self.ranges) + proto.lengths.extend(r.length for r in self.ranges) + proto.function_start_line = self.function_start_line + proto.function_end_line = self.function_end_line + return proto + + def get_uncovered_lines(self) -> set[int]: + """Get all uncovered lines as a set. + + Returns: + Set of uncovered line numbers + """ + lines: set[int] = set() + for r in self.ranges: + lines.update(range(r.start, r.start + r.length)) + return lines + + def total_uncovered_count(self) -> int: + """Get total count of uncovered lines. + + Returns: + Number of uncovered lines + """ + return sum(r.length for r in self.ranges) diff --git a/common/src/buttercup/common/datastructures/msg_pb2.py b/common/src/buttercup/common/datastructures/msg_pb2.py index 8052ece4..2c51fc0d 100644 --- a/common/src/buttercup/common/datastructures/msg_pb2.py +++ b/common/src/buttercup/common/datastructures/msg_pb2.py @@ -24,7 +24,7 @@ -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\tmsg.proto\x12\x06msgpb2\"\xf0\x02\n\x04Task\x12\x12\n\nmessage_id\x18\x01 \x01(\t\x12\x14\n\x0cmessage_time\x18\x02 \x01(\x03\x12\x0f\n\x07task_id\x18\x03 \x01(\t\x12(\n\ttask_type\x18\x04 \x01(\x0e\x32\x15.msgpb2.Task.TaskType\x12%\n\x07sources\x18\x05 \x03(\x0b\x32\x14.msgpb2.SourceDetail\x12\x10\n\x08\x64\x65\x61\x64line\x18\x06 \x01(\x03\x12\x11\n\tcancelled\x18\x07 \x01(\x08\x12\x14\n\x0cproject_name\x18\x08 \x01(\t\x12\r\n\x05\x66ocus\x18\t \x01(\t\x12,\n\x08metadata\x18\n \x03(\x0b\x32\x1a.msgpb2.Task.MetadataEntry\x1a/\n\rMetadataEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"3\n\x08TaskType\x12\x12\n\x0eTASK_TYPE_FULL\x10\x00\x12\x13\n\x0fTASK_TYPE_DELTA\x10\x01\"\xb9\x01\n\x0cSourceDetail\x12\x0e\n\x06sha256\x18\x01 \x01(\t\x12\x34\n\x0bsource_type\x18\x02 \x01(\x0e\x32\x1f.msgpb2.SourceDetail.SourceType\x12\x0b\n\x03url\x18\x03 \x01(\t\"V\n\nSourceType\x12\x14\n\x10SOURCE_TYPE_REPO\x10\x00\x12\x1c\n\x18SOURCE_TYPE_FUZZ_TOOLING\x10\x01\x12\x14\n\x10SOURCE_TYPE_DIFF\x10\x02\"*\n\x0cTaskDownload\x12\x1a\n\x04task\x18\x01 \x01(\x0b\x32\x0c.msgpb2.Task\"\'\n\tTaskReady\x12\x1a\n\x04task\x18\x01 \x01(\x0b\x32\x0c.msgpb2.Task\"T\n\nTaskDelete\x12\x11\n\x07task_id\x18\x01 \x01(\tH\x00\x12\r\n\x03\x61ll\x18\x03 \x01(\x08H\x00\x12\x13\n\x0breceived_at\x18\x02 \x01(\x02\x42\x0f\n\rdelete_option\"\xb9\x01\n\x0c\x42uildRequest\x12\x0e\n\x06\x65ngine\x18\x01 \x01(\t\x12\x11\n\tsanitizer\x18\x02 \x01(\t\x12\x10\n\x08task_dir\x18\x03 \x01(\t\x12\x0f\n\x07task_id\x18\x04 \x01(\t\x12%\n\nbuild_type\x18\x05 \x01(\x0e\x32\x11.msgpb2.BuildType\x12\x12\n\napply_diff\x18\x06 \x01(\x08\x12\r\n\x05patch\x18\x07 \x01(\t\x12\x19\n\x11internal_patch_id\x18\x08 \x01(\t\"\xa9\x01\n\x0b\x42uildOutput\x12\x0e\n\x06\x65ngine\x18\x01 \x01(\t\x12\x11\n\tsanitizer\x18\x02 \x01(\t\x12\x10\n\x08task_dir\x18\x03 \x01(\t\x12\x0f\n\x07task_id\x18\x04 \x01(\t\x12%\n\nbuild_type\x18\x05 \x01(\x0e\x32\x11.msgpb2.BuildType\x12\x12\n\napply_diff\x18\x06 \x01(\x08\x12\x19\n\x11internal_patch_id\x18\x07 \x01(\t\"^\n\x0fWeightedHarness\x12\x0e\n\x06weight\x18\x01 \x01(\x02\x12\x14\n\x0cpackage_name\x18\x02 \x01(\t\x12\x14\n\x0charness_name\x18\x03 \x01(\t\x12\x0f\n\x07task_id\x18\x04 \x01(\t\"\x85\x01\n\x05\x43rash\x12#\n\x06target\x18\x01 \x01(\x0b\x32\x13.msgpb2.BuildOutput\x12\x14\n\x0charness_name\x18\x02 \x01(\t\x12\x18\n\x10\x63rash_input_path\x18\x03 \x01(\t\x12\x12\n\nstacktrace\x18\x04 \x01(\t\x12\x13\n\x0b\x63rash_token\x18\x05 \x01(\t\"F\n\x0bTracedCrash\x12\x1c\n\x05\x63rash\x18\x01 \x01(\x0b\x32\r.msgpb2.Crash\x12\x19\n\x11tracer_stacktrace\x18\x02 \x01(\t\"Y\n\x16\x43onfirmedVulnerability\x12$\n\x07\x63rashes\x18\x01 \x03(\x0b\x32\x13.msgpb2.TracedCrash\x12\x19\n\x11internal_patch_id\x18\x02 \x01(\t\"B\n\x05Patch\x12\x0f\n\x07task_id\x18\x01 \x01(\t\x12\x19\n\x11internal_patch_id\x18\x02 \x01(\t\x12\r\n\x05patch\x18\x03 \x01(\t\"\x81\x01\n\x0cIndexRequest\x12%\n\nbuild_type\x18\x01 \x01(\x0e\x32\x11.msgpb2.BuildType\x12\x14\n\x0cpackage_name\x18\x02 \x01(\t\x12\x11\n\tsanitizer\x18\x03 \x01(\t\x12\x10\n\x08task_dir\x18\x04 \x01(\t\x12\x0f\n\x07task_id\x18\x05 \x01(\t\"\x80\x01\n\x0bIndexOutput\x12%\n\nbuild_type\x18\x01 \x01(\x0e\x32\x11.msgpb2.BuildType\x12\x14\n\x0cpackage_name\x18\x02 \x01(\t\x12\x11\n\tsanitizer\x18\x03 \x01(\t\x12\x10\n\x08task_dir\x18\x04 \x01(\t\x12\x0f\n\x07task_id\x18\x05 \x01(\t\"m\n\x10\x46unctionCoverage\x12\x15\n\rfunction_name\x18\x01 \x01(\t\x12\x16\n\x0e\x66unction_paths\x18\x02 \x03(\t\x12\x13\n\x0btotal_lines\x18\x03 \x01(\x05\x12\x15\n\rcovered_lines\x18\x04 \x01(\x05\"\xc4\x01\n\x14SubmissionEntryPatch\x12\r\n\x05patch\x18\x01 \x01(\t\x12\x19\n\x11internal_patch_id\x18\x02 \x01(\t\x12\x1c\n\x14\x63ompetition_patch_id\x18\x03 \x01(\t\x12*\n\rbuild_outputs\x18\x04 \x03(\x0b\x32\x13.msgpb2.BuildOutput\x12-\n\x06result\x18\x05 \x01(\x0e\x32\x18.msgpb2.SubmissionResultH\x00\x88\x01\x01\x42\t\n\x07_result\"\x84\x01\n\x06\x42undle\x12\x0f\n\x07task_id\x18\x01 \x01(\t\x12\x1a\n\x12\x63ompetition_pov_id\x18\x02 \x01(\t\x12\x1c\n\x14\x63ompetition_patch_id\x18\x03 \x01(\t\x12\x1c\n\x14\x63ompetition_sarif_id\x18\x04 \x01(\t\x12\x11\n\tbundle_id\x18\x05 \x01(\t\"\x87\x01\n\x0b\x43rashWithId\x12\"\n\x05\x63rash\x18\x01 \x01(\x0b\x32\x13.msgpb2.TracedCrash\x12\x1a\n\x12\x63ompetition_pov_id\x18\x02 \x01(\t\x12-\n\x06result\x18\x03 \x01(\x0e\x32\x18.msgpb2.SubmissionResultH\x00\x88\x01\x01\x42\t\n\x07_result\"\xcb\x01\n\x0fSubmissionEntry\x12\x0c\n\x04stop\x18\x01 \x01(\x08\x12$\n\x07\x63rashes\x18\x02 \x03(\x0b\x32\x13.msgpb2.CrashWithId\x12\x1f\n\x07\x62undles\x18\x03 \x03(\x0b\x32\x0e.msgpb2.Bundle\x12-\n\x07patches\x18\x04 \x03(\x0b\x32\x1c.msgpb2.SubmissionEntryPatch\x12\x11\n\tpatch_idx\x18\x05 \x01(\x05\x12!\n\x19patch_submission_attempts\x18\x06 \x01(\x05\"|\n\x13POVReproduceRequest\x12\x0f\n\x07task_id\x18\x01 \x01(\t\x12\x19\n\x11internal_patch_id\x18\x02 \x01(\t\x12\x14\n\x0charness_name\x18\x03 \x01(\t\x12\x11\n\tsanitizer\x18\x04 \x01(\t\x12\x10\n\x08pov_path\x18\x05 \x01(\t\"W\n\x14POVReproduceResponse\x12,\n\x07request\x18\x01 \x01(\x0b\x32\x1b.msgpb2.POVReproduceRequest\x12\x11\n\tdid_crash\x18\x02 \x01(\x08*D\n\tBuildType\x12\n\n\x06\x46UZZER\x10\x00\x12\x0c\n\x08\x43OVERAGE\x10\x01\x12\x12\n\x0eTRACER_NO_DIFF\x10\x02\x12\t\n\x05PATCH\x10\x03*x\n\x10SubmissionResult\x12\x08\n\x04NONE\x10\x00\x12\x0c\n\x08\x41\x43\x43\x45PTED\x10\x01\x12\n\n\x06PASSED\x10\x02\x12\n\n\x06\x46\x41ILED\x10\x03\x12\x15\n\x11\x44\x45\x41\x44LINE_EXCEEDED\x10\x04\x12\x0b\n\x07\x45RRORED\x10\x05\x12\x10\n\x0cINCONCLUSIVE\x10\x06\x62\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\tmsg.proto\x12\x06msgpb2\"\xf0\x02\n\x04Task\x12\x12\n\nmessage_id\x18\x01 \x01(\t\x12\x14\n\x0cmessage_time\x18\x02 \x01(\x03\x12\x0f\n\x07task_id\x18\x03 \x01(\t\x12(\n\ttask_type\x18\x04 \x01(\x0e\x32\x15.msgpb2.Task.TaskType\x12%\n\x07sources\x18\x05 \x03(\x0b\x32\x14.msgpb2.SourceDetail\x12\x10\n\x08\x64\x65\x61\x64line\x18\x06 \x01(\x03\x12\x11\n\tcancelled\x18\x07 \x01(\x08\x12\x14\n\x0cproject_name\x18\x08 \x01(\t\x12\r\n\x05\x66ocus\x18\t \x01(\t\x12,\n\x08metadata\x18\n \x03(\x0b\x32\x1a.msgpb2.Task.MetadataEntry\x1a/\n\rMetadataEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"3\n\x08TaskType\x12\x12\n\x0eTASK_TYPE_FULL\x10\x00\x12\x13\n\x0fTASK_TYPE_DELTA\x10\x01\"\xb9\x01\n\x0cSourceDetail\x12\x0e\n\x06sha256\x18\x01 \x01(\t\x12\x34\n\x0bsource_type\x18\x02 \x01(\x0e\x32\x1f.msgpb2.SourceDetail.SourceType\x12\x0b\n\x03url\x18\x03 \x01(\t\"V\n\nSourceType\x12\x14\n\x10SOURCE_TYPE_REPO\x10\x00\x12\x1c\n\x18SOURCE_TYPE_FUZZ_TOOLING\x10\x01\x12\x14\n\x10SOURCE_TYPE_DIFF\x10\x02\"*\n\x0cTaskDownload\x12\x1a\n\x04task\x18\x01 \x01(\x0b\x32\x0c.msgpb2.Task\"\'\n\tTaskReady\x12\x1a\n\x04task\x18\x01 \x01(\x0b\x32\x0c.msgpb2.Task\"T\n\nTaskDelete\x12\x11\n\x07task_id\x18\x01 \x01(\tH\x00\x12\r\n\x03\x61ll\x18\x03 \x01(\x08H\x00\x12\x13\n\x0breceived_at\x18\x02 \x01(\x02\x42\x0f\n\rdelete_option\"\xb9\x01\n\x0c\x42uildRequest\x12\x0e\n\x06\x65ngine\x18\x01 \x01(\t\x12\x11\n\tsanitizer\x18\x02 \x01(\t\x12\x10\n\x08task_dir\x18\x03 \x01(\t\x12\x0f\n\x07task_id\x18\x04 \x01(\t\x12%\n\nbuild_type\x18\x05 \x01(\x0e\x32\x11.msgpb2.BuildType\x12\x12\n\napply_diff\x18\x06 \x01(\x08\x12\r\n\x05patch\x18\x07 \x01(\t\x12\x19\n\x11internal_patch_id\x18\x08 \x01(\t\"\xa9\x01\n\x0b\x42uildOutput\x12\x0e\n\x06\x65ngine\x18\x01 \x01(\t\x12\x11\n\tsanitizer\x18\x02 \x01(\t\x12\x10\n\x08task_dir\x18\x03 \x01(\t\x12\x0f\n\x07task_id\x18\x04 \x01(\t\x12%\n\nbuild_type\x18\x05 \x01(\x0e\x32\x11.msgpb2.BuildType\x12\x12\n\napply_diff\x18\x06 \x01(\x08\x12\x19\n\x11internal_patch_id\x18\x07 \x01(\t\"^\n\x0fWeightedHarness\x12\x0e\n\x06weight\x18\x01 \x01(\x02\x12\x14\n\x0cpackage_name\x18\x02 \x01(\t\x12\x14\n\x0charness_name\x18\x03 \x01(\t\x12\x0f\n\x07task_id\x18\x04 \x01(\t\"\x85\x01\n\x05\x43rash\x12#\n\x06target\x18\x01 \x01(\x0b\x32\x13.msgpb2.BuildOutput\x12\x14\n\x0charness_name\x18\x02 \x01(\t\x12\x18\n\x10\x63rash_input_path\x18\x03 \x01(\t\x12\x12\n\nstacktrace\x18\x04 \x01(\t\x12\x13\n\x0b\x63rash_token\x18\x05 \x01(\t\"F\n\x0bTracedCrash\x12\x1c\n\x05\x63rash\x18\x01 \x01(\x0b\x32\r.msgpb2.Crash\x12\x19\n\x11tracer_stacktrace\x18\x02 \x01(\t\"Y\n\x16\x43onfirmedVulnerability\x12$\n\x07\x63rashes\x18\x01 \x03(\x0b\x32\x13.msgpb2.TracedCrash\x12\x19\n\x11internal_patch_id\x18\x02 \x01(\t\"B\n\x05Patch\x12\x0f\n\x07task_id\x18\x01 \x01(\t\x12\x19\n\x11internal_patch_id\x18\x02 \x01(\t\x12\r\n\x05patch\x18\x03 \x01(\t\"\x81\x01\n\x0cIndexRequest\x12%\n\nbuild_type\x18\x01 \x01(\x0e\x32\x11.msgpb2.BuildType\x12\x14\n\x0cpackage_name\x18\x02 \x01(\t\x12\x11\n\tsanitizer\x18\x03 \x01(\t\x12\x10\n\x08task_dir\x18\x04 \x01(\t\x12\x0f\n\x07task_id\x18\x05 \x01(\t\"\x80\x01\n\x0bIndexOutput\x12%\n\nbuild_type\x18\x01 \x01(\x0e\x32\x11.msgpb2.BuildType\x12\x14\n\x0cpackage_name\x18\x02 \x01(\t\x12\x11\n\tsanitizer\x18\x03 \x01(\t\x12\x10\n\x08task_dir\x18\x04 \x01(\t\x12\x0f\n\x07task_id\x18\x05 \x01(\t\"m\n\x10\x46unctionCoverage\x12\x15\n\rfunction_name\x18\x01 \x01(\t\x12\x16\n\x0e\x66unction_paths\x18\x02 \x03(\t\x12\x13\n\x0btotal_lines\x18\x03 \x01(\x05\x12\x15\n\rcovered_lines\x18\x04 \x01(\x05\"\xc4\x01\n\x14SubmissionEntryPatch\x12\r\n\x05patch\x18\x01 \x01(\t\x12\x19\n\x11internal_patch_id\x18\x02 \x01(\t\x12\x1c\n\x14\x63ompetition_patch_id\x18\x03 \x01(\t\x12*\n\rbuild_outputs\x18\x04 \x03(\x0b\x32\x13.msgpb2.BuildOutput\x12-\n\x06result\x18\x05 \x01(\x0e\x32\x18.msgpb2.SubmissionResultH\x00\x88\x01\x01\x42\t\n\x07_result\"\x84\x01\n\x06\x42undle\x12\x0f\n\x07task_id\x18\x01 \x01(\t\x12\x1a\n\x12\x63ompetition_pov_id\x18\x02 \x01(\t\x12\x1c\n\x14\x63ompetition_patch_id\x18\x03 \x01(\t\x12\x1c\n\x14\x63ompetition_sarif_id\x18\x04 \x01(\t\x12\x11\n\tbundle_id\x18\x05 \x01(\t\"\x87\x01\n\x0b\x43rashWithId\x12\"\n\x05\x63rash\x18\x01 \x01(\x0b\x32\x13.msgpb2.TracedCrash\x12\x1a\n\x12\x63ompetition_pov_id\x18\x02 \x01(\t\x12-\n\x06result\x18\x03 \x01(\x0e\x32\x18.msgpb2.SubmissionResultH\x00\x88\x01\x01\x42\t\n\x07_result\"\xcb\x01\n\x0fSubmissionEntry\x12\x0c\n\x04stop\x18\x01 \x01(\x08\x12$\n\x07\x63rashes\x18\x02 \x03(\x0b\x32\x13.msgpb2.CrashWithId\x12\x1f\n\x07\x62undles\x18\x03 \x03(\x0b\x32\x0e.msgpb2.Bundle\x12-\n\x07patches\x18\x04 \x03(\x0b\x32\x1c.msgpb2.SubmissionEntryPatch\x12\x11\n\tpatch_idx\x18\x05 \x01(\x05\x12!\n\x19patch_submission_attempts\x18\x06 \x01(\x05\"|\n\x13POVReproduceRequest\x12\x0f\n\x07task_id\x18\x01 \x01(\t\x12\x19\n\x11internal_patch_id\x18\x02 \x01(\t\x12\x14\n\x0charness_name\x18\x03 \x01(\t\x12\x11\n\tsanitizer\x18\x04 \x01(\t\x12\x10\n\x08pov_path\x18\x05 \x01(\t\"W\n\x14POVReproduceResponse\x12,\n\x07request\x18\x01 \x01(\x0b\x32\x1b.msgpb2.POVReproduceRequest\x12\x11\n\tdid_crash\x18\x02 \x01(\x08\"T\n\rMacroCallSite\x12\x11\n\tcall_line\x18\x01 \x01(\r\x12\x17\n\x0fmacro_file_path\x18\x02 \x01(\t\x12\x17\n\x0funcovered_count\x18\x03 \x01(\r\"q\n\x0eUncoveredLines\x12\x12\n\x06starts\x18\x01 \x03(\rB\x02\x10\x01\x12\x13\n\x07lengths\x18\x02 \x03(\rB\x02\x10\x01\x12\x1b\n\x13\x66unction_start_line\x18\x03 \x01(\r\x12\x19\n\x11\x66unction_end_line\x18\x04 \x01(\r\"\xe5\x01\n\x16\x46unctionUncoveredLines\x12\x15\n\rfunction_name\x18\x01 \x01(\t\x12\x16\n\x0e\x66unction_paths\x18\x02 \x03(\t\x12\x19\n\x11primary_file_path\x18\x03 \x01(\t\x12\x13\n\x0btotal_lines\x18\x04 \x01(\r\x12\x15\n\rcovered_lines\x18\x05 \x01(\r\x12)\n\tuncovered\x18\x06 \x01(\x0b\x32\x16.msgpb2.UncoveredLines\x12*\n\x0bmacro_sites\x18\x07 \x03(\x0b\x32\x15.msgpb2.MacroCallSite*D\n\tBuildType\x12\n\n\x06\x46UZZER\x10\x00\x12\x0c\n\x08\x43OVERAGE\x10\x01\x12\x12\n\x0eTRACER_NO_DIFF\x10\x02\x12\t\n\x05PATCH\x10\x03*x\n\x10SubmissionResult\x12\x08\n\x04NONE\x10\x00\x12\x0c\n\x08\x41\x43\x43\x45PTED\x10\x01\x12\n\n\x06PASSED\x10\x02\x12\n\n\x06\x46\x41ILED\x10\x03\x12\x15\n\x11\x44\x45\x41\x44LINE_EXCEEDED\x10\x04\x12\x0b\n\x07\x45RRORED\x10\x05\x12\x10\n\x0cINCONCLUSIVE\x10\x06\x62\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) @@ -33,10 +33,14 @@ DESCRIPTOR._loaded_options = None _globals['_TASK_METADATAENTRY']._loaded_options = None _globals['_TASK_METADATAENTRY']._serialized_options = b'8\001' - _globals['_BUILDTYPE']._serialized_start=2841 - _globals['_BUILDTYPE']._serialized_end=2909 - _globals['_SUBMISSIONRESULT']._serialized_start=2911 - _globals['_SUBMISSIONRESULT']._serialized_end=3031 + _globals['_UNCOVEREDLINES'].fields_by_name['starts']._loaded_options = None + _globals['_UNCOVEREDLINES'].fields_by_name['starts']._serialized_options = b'\020\001' + _globals['_UNCOVEREDLINES'].fields_by_name['lengths']._loaded_options = None + _globals['_UNCOVEREDLINES'].fields_by_name['lengths']._serialized_options = b'\020\001' + _globals['_BUILDTYPE']._serialized_start=3274 + _globals['_BUILDTYPE']._serialized_end=3342 + _globals['_SUBMISSIONRESULT']._serialized_start=3344 + _globals['_SUBMISSIONRESULT']._serialized_end=3464 _globals['_TASK']._serialized_start=22 _globals['_TASK']._serialized_end=390 _globals['_TASK_METADATAENTRY']._serialized_start=290 @@ -85,4 +89,10 @@ _globals['_POVREPRODUCEREQUEST']._serialized_end=2750 _globals['_POVREPRODUCERESPONSE']._serialized_start=2752 _globals['_POVREPRODUCERESPONSE']._serialized_end=2839 + _globals['_MACROCALLSITE']._serialized_start=2841 + _globals['_MACROCALLSITE']._serialized_end=2925 + _globals['_UNCOVEREDLINES']._serialized_start=2927 + _globals['_UNCOVEREDLINES']._serialized_end=3040 + _globals['_FUNCTIONUNCOVEREDLINES']._serialized_start=3043 + _globals['_FUNCTIONUNCOVEREDLINES']._serialized_end=3272 # @@protoc_insertion_point(module_scope) diff --git a/common/src/buttercup/common/datastructures/msg_pb2.pyi b/common/src/buttercup/common/datastructures/msg_pb2.pyi index e256b878..495735d9 100644 --- a/common/src/buttercup/common/datastructures/msg_pb2.pyi +++ b/common/src/buttercup/common/datastructures/msg_pb2.pyi @@ -317,3 +317,43 @@ class POVReproduceResponse(_message.Message): request: POVReproduceRequest did_crash: bool def __init__(self, request: _Optional[_Union[POVReproduceRequest, _Mapping]] = ..., did_crash: bool = ...) -> None: ... + +class MacroCallSite(_message.Message): + __slots__ = ("call_line", "macro_file_path", "uncovered_count") + CALL_LINE_FIELD_NUMBER: _ClassVar[int] + MACRO_FILE_PATH_FIELD_NUMBER: _ClassVar[int] + UNCOVERED_COUNT_FIELD_NUMBER: _ClassVar[int] + call_line: int + macro_file_path: str + uncovered_count: int + def __init__(self, call_line: _Optional[int] = ..., macro_file_path: _Optional[str] = ..., uncovered_count: _Optional[int] = ...) -> None: ... + +class UncoveredLines(_message.Message): + __slots__ = ("starts", "lengths", "function_start_line", "function_end_line") + STARTS_FIELD_NUMBER: _ClassVar[int] + LENGTHS_FIELD_NUMBER: _ClassVar[int] + FUNCTION_START_LINE_FIELD_NUMBER: _ClassVar[int] + FUNCTION_END_LINE_FIELD_NUMBER: _ClassVar[int] + starts: _containers.RepeatedScalarFieldContainer[int] + lengths: _containers.RepeatedScalarFieldContainer[int] + function_start_line: int + function_end_line: int + def __init__(self, starts: _Optional[_Iterable[int]] = ..., lengths: _Optional[_Iterable[int]] = ..., function_start_line: _Optional[int] = ..., function_end_line: _Optional[int] = ...) -> None: ... + +class FunctionUncoveredLines(_message.Message): + __slots__ = ("function_name", "function_paths", "primary_file_path", "total_lines", "covered_lines", "uncovered", "macro_sites") + FUNCTION_NAME_FIELD_NUMBER: _ClassVar[int] + FUNCTION_PATHS_FIELD_NUMBER: _ClassVar[int] + PRIMARY_FILE_PATH_FIELD_NUMBER: _ClassVar[int] + TOTAL_LINES_FIELD_NUMBER: _ClassVar[int] + COVERED_LINES_FIELD_NUMBER: _ClassVar[int] + UNCOVERED_FIELD_NUMBER: _ClassVar[int] + MACRO_SITES_FIELD_NUMBER: _ClassVar[int] + function_name: str + function_paths: _containers.RepeatedScalarFieldContainer[str] + primary_file_path: str + total_lines: int + covered_lines: int + uncovered: UncoveredLines + macro_sites: _containers.RepeatedCompositeFieldContainer[MacroCallSite] + def __init__(self, function_name: _Optional[str] = ..., function_paths: _Optional[_Iterable[str]] = ..., primary_file_path: _Optional[str] = ..., total_lines: _Optional[int] = ..., covered_lines: _Optional[int] = ..., uncovered: _Optional[_Union[UncoveredLines, _Mapping]] = ..., macro_sites: _Optional[_Iterable[_Union[MacroCallSite, _Mapping]]] = ...) -> None: ... diff --git a/common/src/buttercup/common/maps.py b/common/src/buttercup/common/maps.py index f428489e..08a684f5 100644 --- a/common/src/buttercup/common/maps.py +++ b/common/src/buttercup/common/maps.py @@ -5,7 +5,13 @@ from google.protobuf.message import Message from redis import Redis -from buttercup.common.datastructures.msg_pb2 import BuildOutput, BuildType, FunctionCoverage, WeightedHarness +from buttercup.common.datastructures.msg_pb2 import ( + BuildOutput, + BuildType, + FunctionCoverage, + FunctionUncoveredLines, + WeightedHarness, +) from buttercup.common.sets import RedisSet # ruff: noqa: UP046 @@ -43,6 +49,7 @@ def __iter__(self) -> Iterator[MsgType]: BUILD_MAP_NAME = "build_list" BUILD_SAN_MAP_NAME = "build_san_list" COVERAGE_MAP_PREFIX = "coverage_map" +UNCOVERED_MAP_PREFIX = "uncovered_lines_map" # A build map makes it effecient to find for a given task_id + harness a build type @@ -155,3 +162,44 @@ def get_function_coverage(self, function_name: str, function_paths: list[str]) - def list_function_coverage(self) -> list[FunctionCoverage]: return list(iter(self.mp)) + + +class UncoveredLinesMap: + """Redis-backed map for storing uncovered lines data per function.""" + + def __init__(self, redis: Redis, harness_name: str, package_name: str, task_id: str): + self.redis = redis + self.harness_name = harness_name + self.package_name = package_name + self.task_id = task_id + hash_name = [ + UNCOVERED_MAP_PREFIX, + harness_name, + package_name, + task_id, + ] + hash_name_str = dumps(hash_name, json_options=CANONICAL_JSON_OPTIONS) + self.mp: RedisMap[FunctionUncoveredLines] = RedisMap(redis, hash_name_str, FunctionUncoveredLines) + + def set_uncovered_lines(self, uncovered_lines: FunctionUncoveredLines) -> None: + """Store uncovered lines data for a function.""" + function_paths_list = list(uncovered_lines.function_paths) + key = [ + uncovered_lines.function_name, + function_paths_list, + ] + key_str = dumps(key, json_options=CANONICAL_JSON_OPTIONS) + self.mp.set(key_str, uncovered_lines) + + def get_uncovered_lines(self, function_name: str, function_paths: list[str]) -> FunctionUncoveredLines | None: + """Get uncovered lines data for a function.""" + key = [ + function_name, + function_paths, + ] + key_str = dumps(key, json_options=CANONICAL_JSON_OPTIONS) + return self.mp.get(key_str) + + def list_uncovered_lines(self) -> list[FunctionUncoveredLines]: + """List all stored uncovered lines data.""" + return list(iter(self.mp)) diff --git a/fuzzer/docs/coverage-precision.md b/fuzzer/docs/coverage-precision.md new file mode 100644 index 00000000..d57f97df --- /dev/null +++ b/fuzzer/docs/coverage-precision.md @@ -0,0 +1,539 @@ +# Precise Coverage Hierarchy for LLM Guidance + +This document describes a hierarchical coverage representation designed to guide LLMs in generating inputs that target specific uncovered code paths. + +## Scope + +This feature applies only to **functions with partial coverage** (0 < covered_lines < total_lines). Functions with 0% or 100% coverage are excluded: +- **0% coverage**: Function never executed - need to find how to call it first +- **100% coverage**: Nothing to improve +- **Partial coverage**: Has uncovered paths we can target with specific inputs + +## Problem Statement + +The current flat representation of uncovered lines is insufficient for LLM guidance: + +``` +Uncovered lines: [48-52, 104-106, 965-970] +``` + +Problems: +1. **No context**: What do these lines do? Why aren't they covered? +2. **Mixed sources**: Lines may come from different files (macros, includes) +3. **No structure**: Can't tell if line 104 is inside a macro called from line 50 +4. **No actionability**: How should the LLM trigger these paths? + +## Solution: Coverage Hierarchy + +Represent coverage as a tree that mirrors the code's macro expansion structure: + +``` +Function: png_read_end (pngread.c:912-987) +│ +├─ CodeBlock [912-915] ✓ covered +│ +├─ MacroExpansion: PNG_SETJMP() at line 916 +│ ├─ CodeBlock [pngpriv.h:45-47] ✓ covered +│ └─ CodeBlock [pngpriv.h:48-52] ✗ UNCOVERED +│ +├─ CodeBlock [917-925] ✓ covered +│ +├─ MacroExpansion: png_crc_finish() at line 926 +│ ├─ CodeBlock [png.c:230-235] ✓ covered +│ ├─ MacroExpansion: PNG_CRC_CHECK() at png.c:236 +│ │ ├─ CodeBlock [pngpriv.h:102-103] ✓ covered +│ │ └─ CodeBlock [pngpriv.h:104-106] ✗ UNCOVERED +│ └─ CodeBlock [png.c:237-240] ✓ covered +│ +└─ CodeBlock [927-987] partial + └─ Lines 965-970 ✗ UNCOVERED +``` + +## LLVM Coverage Background + +### Region Format +``` +[lineStart, colStart, lineEnd, colEnd, execCount, fileID, expandedFileID, kind] +``` + +- **fileID**: Index into the function's `filenames` array (per-function, not global) +- **expandedFileID**: For ExpansionRegions, points to file containing macro body +- **kind**: 0=Code, 1=Expansion, 2=Skipped, 3=Gap, 4+=Branch/MCDC + +### Expansion Hierarchy in LLVM + +1. Function's `regions` array contains CodeRegions and ExpansionRegions +2. ExpansionRegion marks WHERE a macro is called (call site) +3. `expandedFileID` tells us which file contains the macro body +4. Actual macro body regions are in `files[].expansions[].target_regions` +5. Macro bodies can contain nested ExpansionRegions (recursive) + +## Data Model + +### Python Classes + +```python +@dataclass +class CodeBlock: + """A contiguous block of non-macro code.""" + file_path: str + start_line: int + end_line: int + covered_lines: set[int] + total_lines: set[int] + + @property + def uncovered_lines(self) -> set[int]: + return self.total_lines - self.covered_lines + + @property + def is_fully_covered(self) -> bool: + return len(self.uncovered_lines) == 0 + + @property + def coverage_fraction(self) -> float: + if not self.total_lines: + return 1.0 + return len(self.covered_lines) / len(self.total_lines) + + +@dataclass +class MacroExpansion: + """A macro call site with its expansion hierarchy.""" + call_file: str + call_line: int + call_column: int + macro_file: str # From expandedFileID + children: list["CoverageNode"] + + def total_uncovered_lines(self) -> int: + """Recursively count uncovered lines in this expansion tree.""" + count = 0 + for child in self.children: + if isinstance(child, CodeBlock): + count += len(child.uncovered_lines) + elif isinstance(child, MacroExpansion): + count += child.total_uncovered_lines() + return count + + def has_uncovered_code(self) -> bool: + return self.total_uncovered_lines() > 0 + + +# Type alias for tree nodes +CoverageNode = CodeBlock | MacroExpansion + + +@dataclass +class FunctionCoverageHierarchy: + """Complete coverage hierarchy for a partially-covered function.""" + function_name: str + primary_file: str + start_line: int + end_line: int + total_lines: int + covered_lines: int + children: list[CoverageNode] + + @property + def coverage_percentage(self) -> float: + return (self.covered_lines / self.total_lines * 100) if self.total_lines else 0 + + def iter_uncovered_paths(self) -> Iterator[tuple[list[str], CodeBlock]]: + """Yield (path, code_block) for each uncovered code block. + + Path is like ["png_read_end", "PNG_SETJMP() at line 916", "pngpriv.h:48-52"] + """ + yield from self._iter_uncovered(self.children, [self.function_name]) + + def _iter_uncovered(self, nodes, path): + for node in nodes: + if isinstance(node, CodeBlock): + if not node.is_fully_covered: + yield (path + [f"{node.file_path}:{node.start_line}-{node.end_line}"], node) + elif isinstance(node, MacroExpansion): + if node.has_uncovered_code(): + macro_path = path + [f"{node.macro_file} at {node.call_file}:{node.call_line}"] + yield from self._iter_uncovered(node.children, macro_path) +``` + +### Protobuf Messages + +```protobuf +// Coverage hierarchy for LLM guidance (only for partial coverage functions) + +message CodeBlock { + string file_path = 1; + uint32 start_line = 2; + uint32 end_line = 3; + // Run-length encoded line coverage within block + repeated uint32 covered_starts = 4 [packed = true]; + repeated uint32 covered_lengths = 5 [packed = true]; + repeated uint32 uncovered_starts = 6 [packed = true]; + repeated uint32 uncovered_lengths = 7 [packed = true]; +} + +message MacroExpansion { + string call_file = 1; + uint32 call_line = 2; + uint32 call_column = 3; + string macro_file = 4; + repeated CoverageNode children = 5; +} + +message CoverageNode { + oneof node { + CodeBlock code = 1; + MacroExpansion macro = 2; + } +} + +message FunctionCoverageHierarchy { + string function_name = 1; + string primary_file = 2; + uint32 start_line = 3; + uint32 end_line = 4; + uint32 total_lines = 5; + uint32 covered_lines = 6; + repeated CoverageNode children = 7; +} +``` + +## Algorithm + +### Building the Hierarchy + +```python +def build_coverage_hierarchy( + function: dict, + file_expansions: dict[str, list], +) -> FunctionCoverageHierarchy | None: + """Build hierarchical coverage for a function. + + Args: + function: Function object from LLVM coverage JSON + file_expansions: Map of filename -> list of expansion objects + + Returns: + FunctionCoverageHierarchy for partial coverage, None otherwise + """ + filenames = function['filenames'] + regions = function['regions'] + + # Calculate aggregate coverage + total_lines, covered_lines = count_all_lines(regions, filenames, file_expansions) + + # Skip if not partial coverage + if covered_lines == 0 or covered_lines == total_lines: + return None + + # Find primary file (where function is defined) + primary_file_id = find_primary_file(regions, filenames) + primary_file = filenames[primary_file_id] + + # Build the tree starting from primary file regions + children = build_node_tree( + regions=[r for r in regions if r[5] == primary_file_id], + all_regions=regions, + filenames=filenames, + file_expansions=file_expansions, + ) + + # Compute function bounds from primary file + primary_regions = [r for r in regions if r[5] == primary_file_id and r[7] == CODE_REGION] + start_line = min(r[0] for r in primary_regions) if primary_regions else 0 + end_line = max(r[2] for r in primary_regions) if primary_regions else 0 + + return FunctionCoverageHierarchy( + function_name=function['name'], + primary_file=primary_file, + start_line=start_line, + end_line=end_line, + total_lines=total_lines, + covered_lines=covered_lines, + children=children, + ) + + +def build_node_tree( + regions: list, + all_regions: list, + filenames: list[str], + file_expansions: dict[str, list], +) -> list[CoverageNode]: + """Build tree of CoverageNodes from regions. + + Regions should be pre-filtered to a specific file_id. + """ + # Sort by start position + sorted_regions = sorted(regions, key=lambda r: (r[0], r[1])) + + nodes: list[CoverageNode] = [] + pending_code_lines: dict[int, bool] = {} # line -> is_covered + + for region in sorted_regions: + kind = region[7] if len(region) > 7 else CODE_REGION + + if kind == CODE_REGION: + # Accumulate code lines + exec_count = region[4] + for line in range(region[0], region[2] + 1): + if line not in pending_code_lines: + pending_code_lines[line] = (exec_count > 0) + else: + # Line is covered if ANY region covering it is executed + pending_code_lines[line] |= (exec_count > 0) + + elif kind == EXPANSION_REGION: + # Flush pending code block before macro + if pending_code_lines: + nodes.append(make_code_block(filenames[region[5]], pending_code_lines)) + pending_code_lines = {} + + # Build macro expansion node + file_id = region[5] + expanded_file_id = region[6] + + # Look up expansion's target regions + target_regions = lookup_expansion( + file_expansions, + filenames[file_id], + (region[0], region[1], region[2], region[3]) + ) + + # Recursively build children from target regions + macro_children = build_expansion_tree( + target_regions, + filenames, + file_expansions, + ) + + nodes.append(MacroExpansion( + call_file=filenames[file_id], + call_line=region[0], + call_column=region[1], + macro_file=filenames[expanded_file_id] if expanded_file_id < len(filenames) else "unknown", + children=macro_children, + )) + + # Flush remaining code + if pending_code_lines: + file_id = sorted_regions[0][5] if sorted_regions else 0 + nodes.append(make_code_block(filenames[file_id], pending_code_lines)) + + return nodes + + +def build_expansion_tree( + target_regions: list, + filenames: list[str], + file_expansions: dict[str, list], + visited: set[tuple] | None = None, +) -> list[CoverageNode]: + """Recursively build tree from expansion target regions.""" + if visited is None: + visited = set() + + nodes: list[CoverageNode] = [] + pending_code_lines: dict[int, bool] = {} + current_file_id = None + + for region in target_regions: + if len(region) < 5: + continue + + kind = region[7] if len(region) > 7 else CODE_REGION + file_id = region[5] if len(region) > 5 else 0 + + # Track file changes + if current_file_id is None: + current_file_id = file_id + elif file_id != current_file_id: + # Flush code block when switching files + if pending_code_lines and current_file_id < len(filenames): + nodes.append(make_code_block(filenames[current_file_id], pending_code_lines)) + pending_code_lines = {} + current_file_id = file_id + + if kind == CODE_REGION: + exec_count = region[4] + for line in range(region[0], region[2] + 1): + if line not in pending_code_lines: + pending_code_lines[line] = (exec_count > 0) + else: + pending_code_lines[line] |= (exec_count > 0) + + elif kind == EXPANSION_REGION: + # Prevent infinite recursion + region_key = (file_id, region[0], region[1], region[2], region[3]) + if region_key in visited: + continue + visited.add(region_key) + + # Flush pending code + if pending_code_lines and current_file_id < len(filenames): + nodes.append(make_code_block(filenames[current_file_id], pending_code_lines)) + pending_code_lines = {} + + # Get nested expansion + expanded_file_id = region[6] + nested_targets = lookup_expansion( + file_expansions, + filenames[file_id] if file_id < len(filenames) else "", + (region[0], region[1], region[2], region[3]) + ) + + nested_children = build_expansion_tree( + nested_targets, + filenames, + file_expansions, + visited, + ) + + if file_id < len(filenames): + nodes.append(MacroExpansion( + call_file=filenames[file_id], + call_line=region[0], + call_column=region[1], + macro_file=filenames[expanded_file_id] if expanded_file_id < len(filenames) else "unknown", + children=nested_children, + )) + + # Flush remaining code + if pending_code_lines and current_file_id is not None and current_file_id < len(filenames): + nodes.append(make_code_block(filenames[current_file_id], pending_code_lines)) + + return nodes + + +def make_code_block(file_path: str, lines: dict[int, bool]) -> CodeBlock: + """Create a CodeBlock from accumulated line coverage data.""" + sorted_lines = sorted(lines.keys()) + return CodeBlock( + file_path=file_path, + start_line=min(sorted_lines), + end_line=max(sorted_lines), + covered_lines={ln for ln, cov in lines.items() if cov}, + total_lines=set(sorted_lines), + ) +``` + +## LLM Prompt Generation + +### Serialization for LLM + +```python +def format_hierarchy_for_llm(hierarchy: FunctionCoverageHierarchy) -> str: + """Format coverage hierarchy as text for LLM consumption.""" + lines = [ + f"Function: {hierarchy.function_name}", + f"Location: {hierarchy.primary_file}:{hierarchy.start_line}-{hierarchy.end_line}", + f"Coverage: {hierarchy.coverage_percentage:.1f}% ({hierarchy.covered_lines}/{hierarchy.total_lines} lines)", + "", + "Code structure with uncovered paths:", + "", + ] + + lines.extend(format_nodes(hierarchy.children, indent=0)) + + # Add summary of uncovered paths + uncovered_paths = list(hierarchy.iter_uncovered_paths()) + if uncovered_paths: + lines.append("") + lines.append("Uncovered code paths to target:") + for i, (path, block) in enumerate(uncovered_paths, 1): + lines.append(f" {i}. {' → '.join(path)}") + lines.append(f" {len(block.uncovered_lines)} uncovered lines") + + return "\n".join(lines) + + +def format_nodes(nodes: list[CoverageNode], indent: int) -> list[str]: + """Recursively format nodes as indented text.""" + lines = [] + prefix = " " * indent + + for node in nodes: + if isinstance(node, CodeBlock): + status = "✓" if node.is_fully_covered else "✗" if node.coverage_fraction == 0 else "◐" + uncovered_info = "" + if not node.is_fully_covered: + uncovered = sorted(node.uncovered_lines) + ranges = compress_to_ranges(uncovered) + uncovered_info = f" [uncovered: {format_ranges(ranges)}]" + + lines.append(f"{prefix}[{node.start_line}-{node.end_line}] {status}{uncovered_info}") + + elif isinstance(node, MacroExpansion): + status = "✗" if node.has_uncovered_code() else "✓" + uncovered_info = "" + if node.has_uncovered_code(): + uncovered_info = f" ({node.total_uncovered_lines()} uncovered)" + + lines.append(f"{prefix}↳ Macro at line {node.call_line} → {node.macro_file}{uncovered_info}") + lines.extend(format_nodes(node.children, indent + 1)) + + return lines +``` + +### Example Output + +``` +Function: png_read_end +Location: pngread.c:912-987 +Coverage: 85.2% (184/216 lines) + +Code structure with uncovered paths: + +[912-915] ✓ +↳ Macro at line 916 → pngpriv.h (5 uncovered) + [45-47] ✓ + [48-52] ✗ [uncovered: 48-52] +[917-925] ✓ +↳ Macro at line 926 → png.c (3 uncovered) + [230-235] ✓ + ↳ Macro at line 236 → pngpriv.h (3 uncovered) + [102-103] ✓ + [104-106] ✗ [uncovered: 104-106] + [237-240] ✓ +[927-964] ✓ +[965-987] ◐ [uncovered: 965-970] + +Uncovered code paths to target: + 1. png_read_end → pngpriv.h at pngread.c:916 → pngpriv.h:48-52 + 5 uncovered lines + 2. png_read_end → png.c at pngread.c:926 → pngpriv.h at png.c:236 → pngpriv.h:104-106 + 3 uncovered lines + 3. png_read_end → pngread.c:965-987 + 6 uncovered lines +``` + +## Integration Points + +### Coverage Runner +- Build hierarchy during `_process_function_coverage` for partial coverage functions +- Store in new field `coverage_hierarchy` on `CoveredFunction` + +### Coverage Bot +- Serialize `FunctionCoverageHierarchy` to protobuf +- Store in Redis via new `CoverageHierarchyMap` + +### Seed Generator +- Fetch hierarchy for target function +- Include formatted hierarchy in LLM prompt +- LLM can see exactly which code paths need inputs + +## Benefits + +1. **Actionable**: LLM sees the structure, not just line numbers +2. **Contextual**: Macro expansions show where uncovered code comes from +3. **Hierarchical**: Nested macros are properly represented +4. **Focused**: Only partial coverage functions are processed +5. **Efficient**: Tree structure avoids redundant information + +## Future Enhancements + +1. **Source snippets**: Include actual source code for uncovered blocks +2. **Path conditions**: Annotate what conditions lead to uncovered paths +3. **Semantic labels**: Detect common patterns (error handlers, bounds checks, etc.) +4. **Coverage delta**: Track which paths were newly covered by recent inputs diff --git a/fuzzer/docs/new-coverage.md b/fuzzer/docs/new-coverage.md new file mode 100644 index 00000000..36e0bca5 --- /dev/null +++ b/fuzzer/docs/new-coverage.md @@ -0,0 +1,207 @@ +# Uncovered Lines Tracking v2 + +This document describes the changes to coverage tracking introduced in v2, which improves how uncovered lines are tracked for functions that use macros or have code spanning multiple files. + +## Problem with Original Implementation + +The original implementation mixed line numbers from different files into a single set: + +```python +# Old CoveredFunction +CoveredFunction: + total_line_set: {2, 3, 4, 5, 6, 8, 9, 12, 13} # Lines from BOTH foo.c AND macros.h + covered_line_set: {2, 5, 6, 8, 9} + function_start_line: 2 # Could be from macro file! + function_end_line: 13 +``` + +**Issues:** +1. Line numbers from different files were mixed together +2. `function_start_line` could come from a macro header, not the actual function +3. When an LLM saw "uncovered lines 3-4", it couldn't know which file those lines were in +4. Container paths from LLVM (e.g., `/src/libpng/png.h`) don't map to task paths + +## v2 Solution: Per-File Tracking + +### New Data Structures + +#### FileLineCoverage +Tracks coverage for a single file: + +```python +@dataclass +class FileLineCoverage: + file_id: int + file_path: str # Container path from coverage + total_lines: set[int] + covered_lines: set[int] + is_primary: bool # True if this is the function definition file +``` + +#### MacroCallSite +Tracks where macros with uncovered code are called: + +```python +@dataclass +class MacroCallSite: + call_line: int # Line in primary file where macro is called + macro_file_path: str # File where macro is defined + uncovered_line_count: int # How many lines inside macro are uncovered +``` + +#### Updated CoveredFunction +```python +@dataclass +class CoveredFunction: + names: str + total_lines: int # Aggregate count (unchanged) + covered_lines: int # Aggregate count (unchanged) + function_paths: list[str] + + # Existing fields (for backwards compatibility) + total_line_set: set[int] | None + covered_line_set: set[int] | None + function_start_line: int | None + function_end_line: int | None + + # NEW: Per-file tracking + file_coverage: list[FileLineCoverage] | None + primary_file_id: int | None + macro_call_sites: list[MacroCallSite] | None +``` + +### Protobuf Changes + +New messages in `msg.proto`: + +```protobuf +message MacroCallSite { + uint32 call_line = 1; // Line in primary file where macro is called + string macro_file_path = 2; // File where macro is defined + uint32 uncovered_count = 3; // Lines inside macro that are uncovered +} + +message UncoveredLines { + repeated uint32 starts = 1 [packed = true]; + repeated uint32 lengths = 2 [packed = true]; + uint32 function_start_line = 3; + uint32 function_end_line = 4; +} + +message FunctionUncoveredLines { + string function_name = 1; + repeated string function_paths = 2; + string primary_file_path = 3; // Renamed from file_path + uint32 total_lines = 4; + uint32 covered_lines = 5; + UncoveredLines uncovered = 6; // Uncovered lines in PRIMARY file only + repeated MacroCallSite macro_sites = 7; // Macro call sites with uncovered code +} +``` + +### Key Changes + +#### 1. Primary File Identification + +The primary file (where the function is defined) is identified by: +- File with the most `REGION_KIND_CODE` regions +- Preference for `.c/.cpp` files over `.h/.hpp` as tiebreaker + +```python +def find_primary_file(regions: list, filenames: list[str]) -> int: + """Find the file_id of the primary file.""" + code_region_counts: dict[int, int] = {} + + for region in regions: + if region[7] == REGION_KIND_CODE: + file_id = region[5] + code_region_counts[file_id] = code_region_counts.get(file_id, 0) + 1 + + def file_sort_key(fid): + count = code_region_counts[fid] + is_source = filenames[fid].endswith(('.c', '.cpp', '.cc', '.cxx')) + return (count, is_source) + + return max(code_region_counts.keys(), key=file_sort_key) +``` + +#### 2. Per-File Region Processing + +CODE regions are now grouped by their `file_id`: + +```python +for region in regions: + if kind == REGION_KIND_CODE: + file_id = region[5] + # Track in file-specific sets + if file_id not in lines_by_file: + lines_by_file[file_id] = (set(), set()) + file_total, file_covered = lines_by_file[file_id] + self._add_region_lines(region, file_total, file_covered) +``` + +#### 3. Macro Call Site Tracking + +EXPANSION regions are processed to track call sites with uncovered code: + +```python +elif kind == REGION_KIND_EXPANSION: + # Get expansion lines (still needed for aggregate counts) + exp_total, exp_covered = self._get_expansion_lines(...) + + # Track as macro call site if has uncovered code + uncovered_count = len(exp_total - exp_covered) + if uncovered_count > 0: + macro_call_sites.append(MacroCallSite( + call_line=region[0], + macro_file_path=expansion_file_path, + uncovered_line_count=uncovered_count, + )) +``` + +#### 4. Redis Storage + +`FunctionUncoveredLines` now stores: +- `primary_file_path`: The actual function definition file +- `uncovered`: Only lines from the primary file +- `macro_sites`: List of macro call sites with uncovered code + +## Example + +Consider a function `process_data()` in `foo.c` that calls a macro `CHECK_NULL()` from `macros.h`: + +### Old Model +``` +uncovered.starts: [3, 4, 12, 13] <- Lines 3-4 are from macros.h! +``` +Problem: LLM looks at foo.c lines 3-4 but they don't match. + +### New Model +``` +primary_file_path: '/src/foo.c' +uncovered.starts: [12, 13] <- Only lines from foo.c +macro_sites: [ + MacroCallSite(call_line=6, macro_file_path='/src/macros.h', uncovered_count=2) +] +``` +Benefit: LLM knows lines 12-13 are in foo.c, and line 6 has a macro with uncovered code. + +## Backwards Compatibility + +- Aggregate `total_lines` and `covered_lines` counts still include expansion lines +- `total_line_set` and `covered_line_set` still contain mixed lines (for existing code) +- New fields (`file_coverage`, `primary_file_id`, `macro_call_sites`) are `None` when coverage is 0% or 100% + +## New Files + +- `common/src/buttercup/common/coverage_utils.py`: `UncoveredRanges` class for line set ↔ protobuf conversion +- `common/src/buttercup/common/maps.py`: Added `UncoveredLinesMap` for Redis storage + +## Usage for LLM Guidance + +The seed-gen component can now provide the LLM with: +1. Function source from program model (avoids path mapping issues) +2. Which lines in the function body are uncovered (from primary file) +3. Which macro calls have uncovered code (call site + count) + +This enables targeted input generation to reach specific uncovered paths. diff --git a/fuzzer/src/buttercup/fuzzing_infra/coverage_bot.py b/fuzzer/src/buttercup/fuzzing_infra/coverage_bot.py index a31c5883..b5afc211 100644 --- a/fuzzer/src/buttercup/fuzzing_infra/coverage_bot.py +++ b/fuzzer/src/buttercup/fuzzing_infra/coverage_bot.py @@ -10,11 +10,21 @@ from buttercup.common import node_local from buttercup.common.challenge_task import ChallengeTask from buttercup.common.corpus import Corpus +from buttercup.common.coverage_utils import UncoveredRanges from buttercup.common.datastructures.aliases import BuildType as BuildTypeHint -from buttercup.common.datastructures.msg_pb2 import BuildOutput, BuildType, FunctionCoverage, WeightedHarness +from buttercup.common.datastructures.msg_pb2 import ( + BuildOutput, + BuildType, + FunctionCoverage, + FunctionUncoveredLines, + WeightedHarness, +) +from buttercup.common.datastructures.msg_pb2 import ( + MacroCallSite as MacroCallSiteProto, +) from buttercup.common.default_task_loop import TaskLoop from buttercup.common.logger import setup_package_logger -from buttercup.common.maps import CoverageMap +from buttercup.common.maps import CoverageMap, UncoveredLinesMap from buttercup.common.telemetry import CRSActionCategory, init_telemetry, set_crs_attributes from buttercup.common.utils import setup_periodic_zombie_reaper from opentelemetry import trace @@ -195,8 +205,10 @@ def _submit_function_coverage( """ coverage_map = CoverageMap(self.redis, harness_name, package_name, task_id) + uncovered_map = UncoveredLinesMap(self.redis, harness_name, package_name, task_id) updated_functions = 0 + updated_uncovered = 0 for function in func_coverage: function_coverage = FunctionCoverage() function_paths_set = set(function.function_paths) @@ -211,7 +223,49 @@ def _submit_function_coverage( if CoverageBot._should_update_function_coverage(coverage_map, function_coverage): coverage_map.set_function_coverage(function_coverage) updated_functions += 1 + + # Submit uncovered lines data for partial coverage + if function.file_coverage is not None: + # Find primary file coverage + primary_coverage = next( + (fc for fc in function.file_coverage if fc.is_primary), + None, + ) + + if primary_coverage and primary_coverage.total_lines - primary_coverage.covered_lines: + uncovered_ranges = UncoveredRanges.from_line_sets( + primary_coverage.total_lines, + primary_coverage.covered_lines, + min(primary_coverage.total_lines), + max(primary_coverage.total_lines), + ) + + if uncovered_ranges is not None: + # Convert macro call sites to protobuf + macro_sites = [ + MacroCallSiteProto( + call_line=m.call_line, + macro_file_path=m.macro_file_path, + uncovered_count=m.uncovered_line_count, + ) + for m in (function.macro_call_sites or []) + ] + + uncovered_data = FunctionUncoveredLines( + function_name=function.names, + function_paths=function_paths, + primary_file_path=primary_coverage.file_path, + total_lines=function.total_lines, + covered_lines=function.covered_lines, + uncovered=uncovered_ranges.to_protobuf(), + macro_sites=macro_sites, + ) + uncovered_map.set_uncovered_lines(uncovered_data) + updated_uncovered += 1 + logger.info(f"Updated coverage for {updated_functions} functions in Redis") + if updated_uncovered > 0: + logger.info(f"Updated uncovered lines for {updated_uncovered} functions in Redis") def main() -> None: diff --git a/fuzzer/src/buttercup/fuzzing_infra/coverage_runner.py b/fuzzer/src/buttercup/fuzzing_infra/coverage_runner.py index db96272e..be000ad5 100644 --- a/fuzzer/src/buttercup/fuzzing_infra/coverage_runner.py +++ b/fuzzer/src/buttercup/fuzzing_infra/coverage_runner.py @@ -44,6 +44,26 @@ class CachedExpansionLines(NamedTuple): covered_lines: frozenset[int] +@dataclass +class FileLineCoverage: + """Coverage data for lines in a single file.""" + + file_id: int + file_path: str # Container path from coverage + total_lines: set[int] + covered_lines: set[int] + is_primary: bool # True if this is the function definition file + + +@dataclass +class MacroCallSite: + """Location where a macro with uncovered code is called.""" + + call_line: int # Line in primary file where macro is called + macro_file_path: str # File where macro is defined + uncovered_line_count: int # How many lines inside macro are uncovered + + # Type aliases for complex data structures ExpansionMap = dict[ExpansionKey, list[Any]] CoordToFilenames = dict[RegionCoords, list[str]] @@ -65,9 +85,51 @@ class CoveredFunction: """Coverage metrics for a single function.""" names: str - total_lines: int - covered_lines: int + total_lines: int # Aggregate count + covered_lines: int # Aggregate count function_paths: list[str] + # Line-level data for partial coverage (only populated when 0 < coverage < 100%) + total_line_set: set[int] | None = None + covered_line_set: set[int] | None = None + function_start_line: int | None = None + function_end_line: int | None = None + # For partial coverage - per-file data + file_coverage: list[FileLineCoverage] | None = None + primary_file_id: int | None = None + # Macro call sites (line in primary file where macro is invoked) + macro_call_sites: list[MacroCallSite] | None = None + + +def find_primary_file(regions: list[Any], filenames: list[str]) -> int: + """Find the file_id of the primary file (where function is defined). + + The primary file is identified by: + 1. File with the most REGION_KIND_CODE regions + 2. Prefer .c/.cpp files over .h/.hpp files as tiebreaker + """ + code_region_counts: dict[int, int] = {} + + for region in regions: + if len(region) < 8: + continue + kind = region[7] + file_id = region[5] if len(region) > 5 else 0 + + if kind == REGION_KIND_CODE: + code_region_counts[file_id] = code_region_counts.get(file_id, 0) + 1 + + if not code_region_counts: + return 0 + + # Pick file with most code regions, prefer source over header + def file_sort_key(fid: int) -> tuple[int, bool]: + count = code_region_counts[fid] + is_source = False + if fid < len(filenames): + is_source = filenames[fid].endswith((".c", ".cpp", ".cc", ".cxx")) + return (count, is_source) + + return max(code_region_counts.keys(), key=file_sort_key) class CoverageRunner: @@ -127,28 +189,43 @@ def _process_function_coverage(self, coverage_data: dict[str, Any]) -> list[Cove regions = function["regions"] filenames = function.get("filenames", []) - covered_lines: set[int] = set() - total_lines: set[int] = set() - - self._process_regions( - regions, - total_lines, - covered_lines, - expansion_map, - coord_to_filenames, - filenames, - expansion_lines_cache, + # Use new per-file tracking method + file_coverage, primary_file_id, macro_call_sites, total_lines, covered_lines = ( + self._process_regions_with_file_tracking( + regions, + filenames, + expansion_map, + coord_to_filenames, + expansion_lines_cache, + ) ) total_line_count = len(total_lines) covered_line_count = len(covered_lines) if covered_line_count > 0: + # Check if this is partial coverage (0 < coverage < 100%) + is_partial = 0 < covered_line_count < total_line_count + + # Get primary file coverage for start/end lines + primary_coverage = file_coverage.get(primary_file_id) if primary_file_id is not None else None + func_start = min(primary_coverage.total_lines) if primary_coverage else min(total_lines) + func_end = max(primary_coverage.total_lines) if primary_coverage else max(total_lines) + function_coverage.append( CoveredFunction( name, total_line_count, covered_line_count, function.get("filenames", []), + # Include line sets for partial coverage only + total_line_set=total_lines.copy() if is_partial else None, + covered_line_set=covered_lines.copy() if is_partial else None, + function_start_line=func_start if is_partial else None, + function_end_line=func_end if is_partial else None, + # New per-file tracking fields + file_coverage=list(file_coverage.values()) if is_partial else None, + primary_file_id=primary_file_id if is_partial else None, + macro_call_sites=macro_call_sites if is_partial and macro_call_sites else None, ), ) @@ -357,6 +434,115 @@ def _add_region_lines( if execution_count > 0: covered_lines.update(lines) + def _process_regions_with_file_tracking( + self, + regions: list[Any], + filenames: list[str], + expansion_map: ExpansionMap, + coord_to_filenames: CoordToFilenames, + expansion_lines_cache: ExpansionLinesCache, + ) -> tuple[dict[int, FileLineCoverage], int, list[MacroCallSite], set[int], set[int]]: + """Process regions and group by file, also tracking macro call sites. + + Returns: + - Dict of file_id -> FileLineCoverage (for CODE regions only) + - Primary file_id + - List of macro call sites with uncovered code + - Aggregate total_lines set (includes expansion lines) + - Aggregate covered_lines set (includes expansion lines) + """ + # Track lines by file for CODE regions + lines_by_file: dict[int, tuple[set[int], set[int]]] = {} # file_id -> (total, covered) + macro_call_sites: list[MacroCallSite] = [] + + # Aggregate sets (for backwards compatibility, includes expansion lines) + total_lines: set[int] = set() + covered_lines: set[int] = set() + + filenames_set = set(filenames) if filenames else set() + primary_file_id = find_primary_file(regions, filenames) + + for region in regions: + if len(region) < 5: + continue + + region_kind = region[7] if len(region) > 7 else REGION_KIND_CODE + file_id = region[5] if len(region) > 5 else 0 + + if region_kind == REGION_KIND_CODE: + # Track in file-specific sets + if file_id not in lines_by_file: + lines_by_file[file_id] = (set(), set()) + file_total, file_covered = lines_by_file[file_id] + self._add_region_lines(region, file_total, file_covered) + + # Also add to aggregate sets + self._add_region_lines(region, total_lines, covered_lines) + + elif region_kind == REGION_KIND_EXPANSION: + # Process expansion for aggregate counts + coords = RegionCoords(region[0], region[1], region[2], region[3]) + expansion_filenames = coord_to_filenames.get(coords, []) + for fn in expansion_filenames: + if fn in filenames_set: + key = ExpansionKey.from_coords(fn, coords) + if key in expansion_map: + # Get or compute expansion lines + if key in expansion_lines_cache: + cached = expansion_lines_cache[key] + exp_total = cached.total_lines + exp_covered = cached.covered_lines + else: + exp_total_set: set[int] = set() + exp_covered_set: set[int] = set() + self._process_expansion_lines( + expansion_map[key], + exp_total_set, + exp_covered_set, + expansion_map, + coord_to_filenames, + filenames_set, + expansion_lines_cache, + ) + expansion_lines_cache[key] = CachedExpansionLines( + frozenset(exp_total_set), + frozenset(exp_covered_set), + ) + exp_total = frozenset(exp_total_set) + exp_covered = frozenset(exp_covered_set) + + # Add to aggregate counts + total_lines.update(exp_total) + covered_lines.update(exp_covered) + + # Track as macro call site if has uncovered code + uncovered_count = len(exp_total - exp_covered) + if uncovered_count > 0: + call_line = region[0] # Line where macro is called + # Get the macro file path from the expansion + macro_file_path = fn + macro_call_sites.append( + MacroCallSite( + call_line=call_line, + macro_file_path=macro_file_path, + uncovered_line_count=uncovered_count, + ) + ) + break + + # Convert to FileLineCoverage objects + file_coverage: dict[int, FileLineCoverage] = {} + for fid, (ftotal, fcovered) in lines_by_file.items(): + file_coverage[fid] = FileLineCoverage( + file_id=fid, + file_path=filenames[fid] if fid < len(filenames) else "", + total_lines=ftotal, + covered_lines=fcovered, + is_primary=(fid == primary_file_id), + ) + + return file_coverage, primary_file_id, macro_call_sites, total_lines, covered_lines + def run(self, harness_name: str, corpus_dir: str) -> list[CoveredFunction] | None: lang = ProjectYaml(self.tool, self.tool.project_name).unified_language if lang in [Language.C, Language.CPP]: diff --git a/orchestrator/pyproject.toml b/orchestrator/pyproject.toml index d23f3658..d170dd8d 100644 --- a/orchestrator/pyproject.toml +++ b/orchestrator/pyproject.toml @@ -9,7 +9,7 @@ requires-python = ">=3.12,<3.13" dependencies = [ "argon2-cffi ~=21.3.0", "common[full]", - "fastapi ~=0.115.6", + "fastapi ~=0.128.0", "pydantic ~=2.11.0", "pydantic-settings ~=2.7.1", "python-dateutil ~=2.9.0", @@ -44,7 +44,7 @@ dev = [ "pytest-asyncio ~=0.25.2", "pytest-cov ~=6.0.0", "pytest-xdist ~=3.6.1", - "fastapi[standard] ~=0.115.6", + "fastapi[standard] ~=0.128.0", "responses ~=0.25.6", "httpx ~=0.28.1", # Linting and type checking diff --git a/orchestrator/uv.lock b/orchestrator/uv.lock index c17cf5cb..fab0ed44 100644 --- a/orchestrator/uv.lock +++ b/orchestrator/uv.lock @@ -2,6 +2,15 @@ version = 1 revision = 3 requires-python = "==3.12.*" +[[package]] +name = "annotated-doc" +version = "0.0.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/57/ba/046ceea27344560984e26a590f90bc7f4a75b06701f653222458922b558c/annotated_doc-0.0.4.tar.gz", hash = "sha256:fbcda96e87e9c92ad167c2e53839e57503ecfda18804ea28102353485033faa4", size = 7288, upload-time = "2025-11-10T22:07:42.062Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1e/d3/26bf1008eb3d2daa8ef4cacc7f3bfdc11818d111f7e2d0201bc6e3b49d45/annotated_doc-0.0.4-py3-none-any.whl", hash = "sha256:571ac1dc6991c450b25a9c2d84a3705e2ae7a53467b5d111c24fa8baabbed320", size = 5303, upload-time = "2025-11-10T22:07:40.673Z" }, +] + [[package]] name = "annotated-types" version = "0.7.0" @@ -372,16 +381,17 @@ wheels = [ [[package]] name = "fastapi" -version = "0.115.14" +version = "0.128.0" source = { registry = "https://pypi.org/simple" } dependencies = [ + { name = "annotated-doc" }, { name = "pydantic" }, { name = "starlette" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/ca/53/8c38a874844a8b0fa10dd8adf3836ac154082cf88d3f22b544e9ceea0a15/fastapi-0.115.14.tar.gz", hash = "sha256:b1de15cdc1c499a4da47914db35d0e4ef8f1ce62b624e94e0e5824421df99739", size = 296263, upload-time = "2025-06-26T15:29:08.21Z" } +sdist = { url = "https://files.pythonhosted.org/packages/52/08/8c8508db6c7b9aae8f7175046af41baad690771c9bcde676419965e338c7/fastapi-0.128.0.tar.gz", hash = "sha256:1cc179e1cef10a6be60ffe429f79b829dce99d8de32d7acb7e6c8dfdf7f2645a", size = 365682, upload-time = "2025-12-27T15:21:13.714Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/53/50/b1222562c6d270fea83e9c9075b8e8600b8479150a18e4516a6138b980d1/fastapi-0.115.14-py3-none-any.whl", hash = "sha256:6c0c8bf9420bd58f565e585036d971872472b4f7d3f6c73b698e10cffdefb3ca", size = 95514, upload-time = "2025-06-26T15:29:06.49Z" }, + { url = "https://files.pythonhosted.org/packages/5c/05/5cbb59154b093548acd0f4c7c474a118eda06da25aa75c616b72d8fcd92a/fastapi-0.128.0-py3-none-any.whl", hash = "sha256:aebd93f9716ee3b4f4fcfe13ffb7cf308d99c9f3ab5622d8877441072561582d", size = 103094, upload-time = "2025-12-27T15:21:12.154Z" }, ] [package.optional-dependencies] @@ -390,6 +400,8 @@ standard = [ { name = "fastapi-cli", extra = ["standard"] }, { name = "httpx" }, { name = "jinja2" }, + { name = "pydantic-extra-types" }, + { name = "pydantic-settings" }, { name = "python-multipart" }, { name = "uvicorn", extra = ["standard"] }, ] @@ -477,6 +489,7 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f9/c8/9d76a66421d1ae24340dfae7e79c313957f6e3195c144d2c73333b5bfe34/greenlet-3.3.1-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:7e806ca53acf6d15a888405880766ec84721aa4181261cd11a457dfe9a7a4975", size = 276443, upload-time = "2026-01-23T15:30:10.066Z" }, { url = "https://files.pythonhosted.org/packages/81/99/401ff34bb3c032d1f10477d199724f5e5f6fbfb59816ad1455c79c1eb8e7/greenlet-3.3.1-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d842c94b9155f1c9b3058036c24ffb8ff78b428414a19792b2380be9cecf4f36", size = 597359, upload-time = "2026-01-23T16:00:57.394Z" }, { url = "https://files.pythonhosted.org/packages/2b/bc/4dcc0871ed557792d304f50be0f7487a14e017952ec689effe2180a6ff35/greenlet-3.3.1-cp312-cp312-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:20fedaadd422fa02695f82093f9a98bad3dab5fcda793c658b945fcde2ab27ba", size = 607805, upload-time = "2026-01-23T16:05:28.068Z" }, + { url = "https://files.pythonhosted.org/packages/3b/cd/7a7ca57588dac3389e97f7c9521cb6641fd8b6602faf1eaa4188384757df/greenlet-3.3.1-cp312-cp312-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:c620051669fd04ac6b60ebc70478210119c56e2d5d5df848baec4312e260e4ca", size = 622363, upload-time = "2026-01-23T16:15:54.754Z" }, { url = "https://files.pythonhosted.org/packages/cf/05/821587cf19e2ce1f2b24945d890b164401e5085f9d09cbd969b0c193cd20/greenlet-3.3.1-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:14194f5f4305800ff329cbf02c5fcc88f01886cadd29941b807668a45f0d2336", size = 609947, upload-time = "2026-01-23T15:32:51.004Z" }, { url = "https://files.pythonhosted.org/packages/a4/52/ee8c46ed9f8babaa93a19e577f26e3d28a519feac6350ed6f25f1afee7e9/greenlet-3.3.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:7b2fe4150a0cf59f847a67db8c155ac36aed89080a6a639e9f16df5d6c6096f1", size = 1567487, upload-time = "2026-01-23T16:04:22.125Z" }, { url = "https://files.pythonhosted.org/packages/8f/7c/456a74f07029597626f3a6db71b273a3632aecb9afafeeca452cfa633197/greenlet-3.3.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:49f4ad195d45f4a66a0eb9c1ba4832bb380570d361912fa3554746830d332149", size = 1636087, upload-time = "2026-01-23T15:33:47.486Z" }, @@ -1344,7 +1357,7 @@ dev = [ requires-dist = [ { name = "argon2-cffi", specifier = "~=21.3.0" }, { name = "common", extras = ["full"], editable = "../common" }, - { name = "fastapi", specifier = "~=0.115.6" }, + { name = "fastapi", specifier = "~=0.128.0" }, { name = "pydantic", specifier = "~=2.11.0" }, { name = "pydantic-settings", specifier = "~=2.7.1" }, { name = "python-dateutil", specifier = "~=2.9.0" }, @@ -1360,7 +1373,7 @@ requires-dist = [ [package.metadata.requires-dev] dev = [ - { name = "fastapi", extras = ["standard"], specifier = "~=0.115.6" }, + { name = "fastapi", extras = ["standard"], specifier = "~=0.128.0" }, { name = "httpx", specifier = "~=0.28.1" }, { name = "pytest", specifier = "~=8.3.4" }, { name = "pytest-asyncio", specifier = "~=0.25.2" }, @@ -1484,6 +1497,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0d/24/8b11e8b3e2be9dd82df4b11408a67c61bb4dc4f8e11b5b0fc888b38118b5/pydantic_core-2.33.2-cp312-cp312-win_arm64.whl", hash = "sha256:cca3868ddfaccfbc4bfb1d608e2ccaaebe0ae628e1416aeb9c4d88c001bb45ab", size = 1888894, upload-time = "2025-04-23T18:31:51.609Z" }, ] +[[package]] +name = "pydantic-extra-types" +version = "2.11.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pydantic" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/fd/35/2fee58b1316a73e025728583d3b1447218a97e621933fc776fb8c0f2ebdd/pydantic_extra_types-2.11.0.tar.gz", hash = "sha256:4e9991959d045b75feb775683437a97991d02c138e00b59176571db9ce634f0e", size = 157226, upload-time = "2025-12-31T16:18:27.944Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fe/17/fabd56da47096d240dd45ba627bead0333b0cf0ee8ada9bec579287dadf3/pydantic_extra_types-2.11.0-py3-none-any.whl", hash = "sha256:84b864d250a0fc62535b7ec591e36f2c5b4d1325fa0017eb8cda9aeb63b374a6", size = 74296, upload-time = "2025-12-31T16:18:26.38Z" }, +] + [[package]] name = "pydantic-settings" version = "2.7.1" @@ -1945,14 +1971,15 @@ wheels = [ [[package]] name = "starlette" -version = "0.46.2" +version = "0.50.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, + { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/ce/20/08dfcd9c983f6a6f4a1000d934b9e6d626cff8d2eeb77a89a68eef20a2b7/starlette-0.46.2.tar.gz", hash = "sha256:7f7361f34eed179294600af672f565727419830b54b7b084efe44bb82d2fccd5", size = 2580846, upload-time = "2025-04-13T13:56:17.942Z" } +sdist = { url = "https://files.pythonhosted.org/packages/ba/b8/73a0e6a6e079a9d9cfa64113d771e421640b6f679a52eeb9b32f72d871a1/starlette-0.50.0.tar.gz", hash = "sha256:a2a17b22203254bcbc2e1f926d2d55f3f9497f769416b3190768befe598fa3ca", size = 2646985, upload-time = "2025-11-01T15:25:27.516Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/8b/0c/9d30a4ebeb6db2b25a841afbb80f6ef9a854fc3b41be131d249a977b4959/starlette-0.46.2-py3-none-any.whl", hash = "sha256:595633ce89f8ffa71a015caed34a5b2dc1c0cdb3f0f1fbd1e69339cf2abeec35", size = 72037, upload-time = "2025-04-13T13:56:16.21Z" }, + { url = "https://files.pythonhosted.org/packages/d9/52/1064f510b141bd54025f9b55105e26d1fa970b9be67ad766380a3c9b74b0/starlette-0.50.0-py3-none-any.whl", hash = "sha256:9e5391843ec9b6e472eed1365a78c8098cfceb7a74bfd4d6b1c0c0095efb3bca", size = 74033, upload-time = "2025-11-01T15:25:25.461Z" }, ] [[package]]