|
| 1 | +package cvelist2osv |
| 2 | + |
| 3 | +import ( |
| 4 | + "fmt" |
| 5 | + "log/slog" |
| 6 | + "slices" |
| 7 | + |
| 8 | + "github.com/google/osv/vulnfeeds/utility/logger" |
| 9 | + "github.com/ossf/osv-schema/bindings/go/osvschema" |
| 10 | + "google.golang.org/protobuf/encoding/protojson" |
| 11 | + "google.golang.org/protobuf/types/known/structpb" |
| 12 | +) |
| 13 | + |
| 14 | +// groupAffectedRanges groups ranges that share the same introduced value, type, and repo. |
| 15 | +// This is because having multiple ranges with the same introduced value would act like an |
| 16 | +// OR condition, rather than AND. |
| 17 | +// This function modifies in-place |
| 18 | +func groupAffectedRanges(affected []*osvschema.Affected) { |
| 19 | + for _, aff := range affected { |
| 20 | + if len(aff.GetRanges()) <= 1 { |
| 21 | + continue |
| 22 | + } |
| 23 | + |
| 24 | + // Key for grouping: Type + Repo + Introduced Value |
| 25 | + type groupKey struct { |
| 26 | + RangeType osvschema.Range_Type |
| 27 | + Repo string |
| 28 | + Introduced string |
| 29 | + } |
| 30 | + |
| 31 | + groups := make(map[groupKey]*osvschema.Range) |
| 32 | + var order []groupKey // To maintain deterministic order of first appearance |
| 33 | + |
| 34 | + for _, r := range aff.GetRanges() { |
| 35 | + // Find the introduced event |
| 36 | + var introduced string |
| 37 | + var introducedCount int |
| 38 | + for _, e := range r.GetEvents() { |
| 39 | + if e.GetIntroduced() != "" { |
| 40 | + introduced = e.GetIntroduced() |
| 41 | + introducedCount++ |
| 42 | + } |
| 43 | + } |
| 44 | + |
| 45 | + if introducedCount > 1 { |
| 46 | + logger.Error("Multiple 'introduced' events found in a single range", slog.Any("range", r)) |
| 47 | + } |
| 48 | + |
| 49 | + // If no introduced event is found, we use an empty string as the introduced value. |
| 50 | + key := groupKey{ |
| 51 | + RangeType: r.GetType(), |
| 52 | + Repo: r.GetRepo(), |
| 53 | + Introduced: introduced, |
| 54 | + } |
| 55 | + |
| 56 | + if _, exists := groups[key]; !exists { |
| 57 | + // Initialize with a deep copy of the first range found for this group |
| 58 | + // We need to be careful about DatabaseSpecific. |
| 59 | + // We want to keep the "versions" from this first range. |
| 60 | + groups[key] = &osvschema.Range{ |
| 61 | + Type: r.GetType(), |
| 62 | + Repo: r.GetRepo(), |
| 63 | + Events: []*osvschema.Event{}, |
| 64 | + DatabaseSpecific: r.GetDatabaseSpecific(), // Start with this one's DS |
| 65 | + } |
| 66 | + order = append(order, key) |
| 67 | + } else { |
| 68 | + // Merge DatabaseSpecific "versions" |
| 69 | + mergeDatabaseSpecificVersions(groups[key], r.GetDatabaseSpecific()) |
| 70 | + } |
| 71 | + |
| 72 | + // Add all events to the group. Deduplication happens later in cleanEvents. |
| 73 | + groups[key].Events = append(groups[key].Events, r.GetEvents()...) |
| 74 | + } |
| 75 | + |
| 76 | + // Reconstruct ranges from groups |
| 77 | + var newRanges []*osvschema.Range |
| 78 | + for _, key := range order { |
| 79 | + r := groups[key] |
| 80 | + r.Events = cleanEvents(r.GetEvents()) |
| 81 | + newRanges = append(newRanges, r) |
| 82 | + } |
| 83 | + aff.Ranges = newRanges |
| 84 | + } |
| 85 | +} |
| 86 | + |
| 87 | +// mergeDatabaseSpecificVersions merges the "versions" field from the source DatabaseSpecific |
| 88 | +// into the target DatabaseSpecific. |
| 89 | +// |
| 90 | +// Examples: |
| 91 | +// 1. Target: nil, Source: {"versions": ["v1", "v2"]} |
| 92 | +// Result: Target becomes {"versions": ["v1", "v2"]} |
| 93 | +// 2. Target: {}, Source: {"versions": ["v1", "v2"]} |
| 94 | +// Result: Target becomes {"versions": ["v1", "v2"]} |
| 95 | +// 3. Target: {"versions": ["v1", "v3"]}, Source: {"versions": ["v1", "v2"]} |
| 96 | +// Result: Target becomes {"versions": ["v1", "v3", "v2"]} (order might vary for new additions, but existing order is preserved) |
| 97 | +// 4. Target: {"other": "data"}, Source: {"versions": ["v1", "v2"]} |
| 98 | +// Result: Target becomes {"other": "data", "versions": ["v1", "v2"]} |
| 99 | +// 5. Target: {"versions": ["v1", "v2"]}, Source: nil |
| 100 | +// Result: Target remains {"versions": ["v1", "v2"]} |
| 101 | +func mergeDatabaseSpecificVersions(target *osvschema.Range, source *structpb.Struct) { |
| 102 | + if source == nil { |
| 103 | + return |
| 104 | + } |
| 105 | + sourceVersions := source.GetFields()["versions"] |
| 106 | + if sourceVersions == nil { |
| 107 | + return |
| 108 | + } |
| 109 | + |
| 110 | + if target.GetDatabaseSpecific() == nil { |
| 111 | + var err error |
| 112 | + target.DatabaseSpecific, err = structpb.NewStruct(nil) |
| 113 | + if err != nil { |
| 114 | + logger.Fatal("Failed to create DatabaseSpecific", slog.Any("error", err)) |
| 115 | + } |
| 116 | + } |
| 117 | + |
| 118 | + targetFields := target.GetDatabaseSpecific().GetFields() |
| 119 | + if targetFields == nil { |
| 120 | + targetFields = make(map[string]*structpb.Value) |
| 121 | + target.DatabaseSpecific.Fields = targetFields |
| 122 | + } |
| 123 | + |
| 124 | + targetVersions := targetFields["versions"] |
| 125 | + if targetVersions == nil { |
| 126 | + targetFields["versions"] = sourceVersions |
| 127 | + return |
| 128 | + } |
| 129 | + |
| 130 | + // Both have versions, merge them |
| 131 | + // Assuming versions is a ListValue |
| 132 | + if targetVersions.GetListValue() != nil && sourceVersions.GetListValue() != nil { |
| 133 | + // Append source versions to target versions |
| 134 | + targetVersions.GetListValue().Values = append(targetVersions.GetListValue().GetValues(), sourceVersions.GetListValue().GetValues()...) |
| 135 | + |
| 136 | + // Deduplicate versions |
| 137 | + uniqueVersions := make([]*structpb.Value, 0, len(targetVersions.GetListValue().GetValues())) |
| 138 | + seenVersions := make(map[string]bool) |
| 139 | + |
| 140 | + for _, v := range targetVersions.GetListValue().GetValues() { |
| 141 | + // Serialize to string for comparison |
| 142 | + // This might be expensive but robust for structpb.Value |
| 143 | + b, _ := protojson.Marshal(v) |
| 144 | + key := string(b) |
| 145 | + if seenVersions[key] { |
| 146 | + continue |
| 147 | + } |
| 148 | + seenVersions[key] = true |
| 149 | + uniqueVersions = append(uniqueVersions, v) |
| 150 | + } |
| 151 | + targetVersions.GetListValue().Values = uniqueVersions |
| 152 | + } |
| 153 | +} |
| 154 | + |
| 155 | +// cleanEvents deduplicates events and ensures there is only one Introduced event per group. |
| 156 | +func cleanEvents(events []*osvschema.Event) []*osvschema.Event { |
| 157 | + uniqueEvents := make([]*osvschema.Event, 0, len(events)) |
| 158 | + seen := make(map[string]bool) |
| 159 | + |
| 160 | + for _, e := range events { |
| 161 | + // Create a unique key for the event to check for duplicates |
| 162 | + key := fmt.Sprintf("%v|%v|%v|%v", e.GetIntroduced(), e.GetFixed(), e.GetLimit(), e.GetLastAffected()) |
| 163 | + if seen[key] { |
| 164 | + continue |
| 165 | + } |
| 166 | + seen[key] = true |
| 167 | + uniqueEvents = append(uniqueEvents, e) |
| 168 | + } |
| 169 | + |
| 170 | + // Sort: Introduced events come first. |
| 171 | + slices.SortStableFunc(uniqueEvents, func(a, b *osvschema.Event) int { |
| 172 | + // Introduced comes before everything else |
| 173 | + if a.GetIntroduced() != "" && b.GetIntroduced() == "" { |
| 174 | + return -1 |
| 175 | + } |
| 176 | + if a.GetIntroduced() == "" && b.GetIntroduced() != "" { |
| 177 | + return 1 |
| 178 | + } |
| 179 | + |
| 180 | + return 0 |
| 181 | + }) |
| 182 | + |
| 183 | + // Ensure only one Introduced event remains. |
| 184 | + // Since we grouped by Introduced value, all Introduced events in this group are identical. |
| 185 | + var finalEvents []*osvschema.Event |
| 186 | + introduced := "" |
| 187 | + for _, e := range uniqueEvents { |
| 188 | + if e.GetIntroduced() != "" { |
| 189 | + if introduced == "" { |
| 190 | + finalEvents = append(finalEvents, e) |
| 191 | + introduced = e.GetIntroduced() |
| 192 | + } else if introduced != e.GetIntroduced() { |
| 193 | + logger.Error("Found multiple introduced values in the same group", slog.Any("introduced", introduced), slog.Any("event", e.GetIntroduced())) |
| 194 | + } |
| 195 | + } else { |
| 196 | + finalEvents = append(finalEvents, e) |
| 197 | + } |
| 198 | + } |
| 199 | + |
| 200 | + return finalEvents |
| 201 | +} |
0 commit comments