Skip to content

Commit 0fedba3

Browse files
authored
fix(cve5): group by introduced (#4390)
The way we were interpreting version ranges where they have the same introduced is wrong as the schema treats ranges independently like an AND operator rather than an OR operator, meaning that if there is areas of no overlap between the ranges (like branches), all ranges are affected rather than just the ranges specified. This PR will group ranges if they have the same Introduced value into one set of events like: ``` { introduced: x, fixed: y, fixed: z } ``` shoutout @michaelkedar for spotting this
1 parent 3296d02 commit 0fedba3

File tree

4 files changed

+635
-63
lines changed

4 files changed

+635
-63
lines changed

vulnfeeds/cvelist2osv/__snapshots__/converter_test.snap

Lines changed: 0 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -161,87 +161,24 @@
161161
},
162162
{
163163
"fixed": "a3e77da9f843e4ab93917d30c314f0283e28c124"
164-
}
165-
],
166-
"repo": "https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git",
167-
"type": "GIT"
168-
},
169-
{
170-
"events": [
171-
{
172-
"introduced": "1da177e4c3f41524e886b7f1b8a0c1fc7321cac2"
173164
},
174165
{
175166
"fixed": "213ba5bd81b7e97ac6e6190b8f3bc6ba76123625"
176-
}
177-
],
178-
"repo": "https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git",
179-
"type": "GIT"
180-
},
181-
{
182-
"events": [
183-
{
184-
"introduced": "1da177e4c3f41524e886b7f1b8a0c1fc7321cac2"
185167
},
186168
{
187169
"fixed": "40a35d14f3c0dc72b689061ec72fc9b193f37d1f"
188-
}
189-
],
190-
"repo": "https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git",
191-
"type": "GIT"
192-
},
193-
{
194-
"events": [
195-
{
196-
"introduced": "1da177e4c3f41524e886b7f1b8a0c1fc7321cac2"
197170
},
198171
{
199172
"fixed": "27a39d006f85e869be68c1d5d2ce05e5d6445bf5"
200-
}
201-
],
202-
"repo": "https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git",
203-
"type": "GIT"
204-
},
205-
{
206-
"events": [
207-
{
208-
"introduced": "1da177e4c3f41524e886b7f1b8a0c1fc7321cac2"
209173
},
210174
{
211175
"fixed": "92527100be38ede924768f4277450dfe8a40e16b"
212-
}
213-
],
214-
"repo": "https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git",
215-
"type": "GIT"
216-
},
217-
{
218-
"events": [
219-
{
220-
"introduced": "1da177e4c3f41524e886b7f1b8a0c1fc7321cac2"
221176
},
222177
{
223178
"fixed": "6578717ebca91678131d2b1f4ba4258e60536e9f"
224-
}
225-
],
226-
"repo": "https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git",
227-
"type": "GIT"
228-
},
229-
{
230-
"events": [
231-
{
232-
"introduced": "1da177e4c3f41524e886b7f1b8a0c1fc7321cac2"
233179
},
234180
{
235181
"fixed": "7fa9706722882f634090bfc9af642bf9ed719e27"
236-
}
237-
],
238-
"repo": "https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git",
239-
"type": "GIT"
240-
},
241-
{
242-
"events": [
243-
{
244-
"introduced": "1da177e4c3f41524e886b7f1b8a0c1fc7321cac2"
245182
},
246183
{
247184
"fixed": "80e648042e512d5a767da251d44132553fe04ae0"

vulnfeeds/cvelist2osv/converter.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,8 @@ func ConvertAndExportCVEToOSV(cve cves.CVE5, vulnSink io.Writer, metricsSink io.
245245
versionExtractor := GetVersionExtractor(cve.Metadata.AssignerShortName)
246246
versionExtractor.ExtractVersions(cve, v, &metrics, metrics.Repos)
247247

248+
groupAffectedRanges(v.Affected)
249+
248250
determineOutcome(&metrics)
249251

250252
err := v.ToJSON(vulnSink)

vulnfeeds/cvelist2osv/grouping.go

Lines changed: 201 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,201 @@
1+
package cvelist2osv
2+
3+
import (
4+
"fmt"
5+
"log/slog"
6+
"slices"
7+
8+
"github.com/google/osv/vulnfeeds/utility/logger"
9+
"github.com/ossf/osv-schema/bindings/go/osvschema"
10+
"google.golang.org/protobuf/encoding/protojson"
11+
"google.golang.org/protobuf/types/known/structpb"
12+
)
13+
14+
// groupAffectedRanges groups ranges that share the same introduced value, type, and repo.
15+
// This is because having multiple ranges with the same introduced value would act like an
16+
// OR condition, rather than AND.
17+
// This function modifies in-place
18+
func groupAffectedRanges(affected []*osvschema.Affected) {
19+
for _, aff := range affected {
20+
if len(aff.GetRanges()) <= 1 {
21+
continue
22+
}
23+
24+
// Key for grouping: Type + Repo + Introduced Value
25+
type groupKey struct {
26+
RangeType osvschema.Range_Type
27+
Repo string
28+
Introduced string
29+
}
30+
31+
groups := make(map[groupKey]*osvschema.Range)
32+
var order []groupKey // To maintain deterministic order of first appearance
33+
34+
for _, r := range aff.GetRanges() {
35+
// Find the introduced event
36+
var introduced string
37+
var introducedCount int
38+
for _, e := range r.GetEvents() {
39+
if e.GetIntroduced() != "" {
40+
introduced = e.GetIntroduced()
41+
introducedCount++
42+
}
43+
}
44+
45+
if introducedCount > 1 {
46+
logger.Error("Multiple 'introduced' events found in a single range", slog.Any("range", r))
47+
}
48+
49+
// If no introduced event is found, we use an empty string as the introduced value.
50+
key := groupKey{
51+
RangeType: r.GetType(),
52+
Repo: r.GetRepo(),
53+
Introduced: introduced,
54+
}
55+
56+
if _, exists := groups[key]; !exists {
57+
// Initialize with a deep copy of the first range found for this group
58+
// We need to be careful about DatabaseSpecific.
59+
// We want to keep the "versions" from this first range.
60+
groups[key] = &osvschema.Range{
61+
Type: r.GetType(),
62+
Repo: r.GetRepo(),
63+
Events: []*osvschema.Event{},
64+
DatabaseSpecific: r.GetDatabaseSpecific(), // Start with this one's DS
65+
}
66+
order = append(order, key)
67+
} else {
68+
// Merge DatabaseSpecific "versions"
69+
mergeDatabaseSpecificVersions(groups[key], r.GetDatabaseSpecific())
70+
}
71+
72+
// Add all events to the group. Deduplication happens later in cleanEvents.
73+
groups[key].Events = append(groups[key].Events, r.GetEvents()...)
74+
}
75+
76+
// Reconstruct ranges from groups
77+
var newRanges []*osvschema.Range
78+
for _, key := range order {
79+
r := groups[key]
80+
r.Events = cleanEvents(r.GetEvents())
81+
newRanges = append(newRanges, r)
82+
}
83+
aff.Ranges = newRanges
84+
}
85+
}
86+
87+
// mergeDatabaseSpecificVersions merges the "versions" field from the source DatabaseSpecific
88+
// into the target DatabaseSpecific.
89+
//
90+
// Examples:
91+
// 1. Target: nil, Source: {"versions": ["v1", "v2"]}
92+
// Result: Target becomes {"versions": ["v1", "v2"]}
93+
// 2. Target: {}, Source: {"versions": ["v1", "v2"]}
94+
// Result: Target becomes {"versions": ["v1", "v2"]}
95+
// 3. Target: {"versions": ["v1", "v3"]}, Source: {"versions": ["v1", "v2"]}
96+
// Result: Target becomes {"versions": ["v1", "v3", "v2"]} (order might vary for new additions, but existing order is preserved)
97+
// 4. Target: {"other": "data"}, Source: {"versions": ["v1", "v2"]}
98+
// Result: Target becomes {"other": "data", "versions": ["v1", "v2"]}
99+
// 5. Target: {"versions": ["v1", "v2"]}, Source: nil
100+
// Result: Target remains {"versions": ["v1", "v2"]}
101+
func mergeDatabaseSpecificVersions(target *osvschema.Range, source *structpb.Struct) {
102+
if source == nil {
103+
return
104+
}
105+
sourceVersions := source.GetFields()["versions"]
106+
if sourceVersions == nil {
107+
return
108+
}
109+
110+
if target.GetDatabaseSpecific() == nil {
111+
var err error
112+
target.DatabaseSpecific, err = structpb.NewStruct(nil)
113+
if err != nil {
114+
logger.Fatal("Failed to create DatabaseSpecific", slog.Any("error", err))
115+
}
116+
}
117+
118+
targetFields := target.GetDatabaseSpecific().GetFields()
119+
if targetFields == nil {
120+
targetFields = make(map[string]*structpb.Value)
121+
target.DatabaseSpecific.Fields = targetFields
122+
}
123+
124+
targetVersions := targetFields["versions"]
125+
if targetVersions == nil {
126+
targetFields["versions"] = sourceVersions
127+
return
128+
}
129+
130+
// Both have versions, merge them
131+
// Assuming versions is a ListValue
132+
if targetVersions.GetListValue() != nil && sourceVersions.GetListValue() != nil {
133+
// Append source versions to target versions
134+
targetVersions.GetListValue().Values = append(targetVersions.GetListValue().GetValues(), sourceVersions.GetListValue().GetValues()...)
135+
136+
// Deduplicate versions
137+
uniqueVersions := make([]*structpb.Value, 0, len(targetVersions.GetListValue().GetValues()))
138+
seenVersions := make(map[string]bool)
139+
140+
for _, v := range targetVersions.GetListValue().GetValues() {
141+
// Serialize to string for comparison
142+
// This might be expensive but robust for structpb.Value
143+
b, _ := protojson.Marshal(v)
144+
key := string(b)
145+
if seenVersions[key] {
146+
continue
147+
}
148+
seenVersions[key] = true
149+
uniqueVersions = append(uniqueVersions, v)
150+
}
151+
targetVersions.GetListValue().Values = uniqueVersions
152+
}
153+
}
154+
155+
// cleanEvents deduplicates events and ensures there is only one Introduced event per group.
156+
func cleanEvents(events []*osvschema.Event) []*osvschema.Event {
157+
uniqueEvents := make([]*osvschema.Event, 0, len(events))
158+
seen := make(map[string]bool)
159+
160+
for _, e := range events {
161+
// Create a unique key for the event to check for duplicates
162+
key := fmt.Sprintf("%v|%v|%v|%v", e.GetIntroduced(), e.GetFixed(), e.GetLimit(), e.GetLastAffected())
163+
if seen[key] {
164+
continue
165+
}
166+
seen[key] = true
167+
uniqueEvents = append(uniqueEvents, e)
168+
}
169+
170+
// Sort: Introduced events come first.
171+
slices.SortStableFunc(uniqueEvents, func(a, b *osvschema.Event) int {
172+
// Introduced comes before everything else
173+
if a.GetIntroduced() != "" && b.GetIntroduced() == "" {
174+
return -1
175+
}
176+
if a.GetIntroduced() == "" && b.GetIntroduced() != "" {
177+
return 1
178+
}
179+
180+
return 0
181+
})
182+
183+
// Ensure only one Introduced event remains.
184+
// Since we grouped by Introduced value, all Introduced events in this group are identical.
185+
var finalEvents []*osvschema.Event
186+
introduced := ""
187+
for _, e := range uniqueEvents {
188+
if e.GetIntroduced() != "" {
189+
if introduced == "" {
190+
finalEvents = append(finalEvents, e)
191+
introduced = e.GetIntroduced()
192+
} else if introduced != e.GetIntroduced() {
193+
logger.Error("Found multiple introduced values in the same group", slog.Any("introduced", introduced), slog.Any("event", e.GetIntroduced()))
194+
}
195+
} else {
196+
finalEvents = append(finalEvents, e)
197+
}
198+
}
199+
200+
return finalEvents
201+
}

0 commit comments

Comments
 (0)