Skip to content

Commit 81155be

Browse files
authored
feat(vulnfeeds): Enable git resolution for affected versions field CVEList (#3951)
This PR: - Fixes the metrics notes file concurrency issue - Enables git resolution for version ranges extracted - Stores versions converted in the range `database_specific` and stores failed version conversions in the `affected` `database_specific` - Enables conversion for GitHub_M scoped CVEs
1 parent b596148 commit 81155be

File tree

5 files changed

+164
-46
lines changed

5 files changed

+164
-46
lines changed

vulnfeeds/cmd/cve-bulk-converter/main.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ var (
2121
localOutputDir = flag.String("out_dir", "cvelist2osv", "Path to output results.")
2222
years = flag.String("years", "2022,2023,2024,2025", "A comma-separated list of years to process.")
2323
workers = flag.Int("workers", 30, "The number of concurrent workers to use for processing CVEs.")
24-
cnas = flag.String("cnas", "Linux", "A comma-separated list of CNAs to process.")
24+
cnas = flag.String("cnas", "Linux,GitHub_M", "A comma-separated list of CNAs to process.")
2525
)
2626

2727
func main() {
File renamed without changes.

vulnfeeds/cmd/cvelist2osv/converter.go

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -103,9 +103,15 @@ func FromCVE5(cve cves.CVE5, refs []cves.Reference, metrics *ConversionMetrics)
103103
}
104104
v.Modified = modified
105105

106-
// Add affected version information.
107-
AddVersionInfo(cve, &v, metrics)
106+
// Try to extract repository URLs from references.
107+
repos, repoNotes := cves.ReposFromReferencesCVEList(string(cve.Metadata.CVEID), refs, RefTagDenyList)
108+
for _, note := range repoNotes {
109+
metrics.AddNote("%s", note)
110+
}
111+
metrics.Repos = repos
108112

113+
// Add affected version information.
114+
AddVersionInfo(cve, &v, metrics, repos)
109115
// TODO(jesslowe@): Add CWEs.
110116

111117
// Combine severity metrics from both CNA and ADP containers.
@@ -178,19 +184,13 @@ func ConvertAndExportCVEToOSV(cve cves.CVE5, directory string) error {
178184
cveID := cve.Metadata.CVEID
179185
cnaAssigner := cve.Metadata.AssignerShortName
180186
references := identifyPossibleURLs(cve)
181-
metrics := &ConversionMetrics{CVEID: cveID, CNA: cnaAssigner}
187+
metrics := ConversionMetrics{CVEID: cveID, CNA: cnaAssigner}
188+
182189
// Create a base OSV record from the CVE.
183-
v := FromCVE5(cve, references, metrics)
190+
v := FromCVE5(cve, references, &metrics)
184191

185192
// Collect metrics about the conversion.
186-
extractConversionMetrics(cve, v.References, metrics)
187-
188-
// Try to extract repository URLs from references.
189-
repos, repoNotes := cves.ReposFromReferencesCVEList(string(cveID), references, RefTagDenyList)
190-
for _, note := range repoNotes {
191-
metrics.AddNote("%s", note)
192-
}
193-
metrics.Repos = repos
193+
extractConversionMetrics(cve, v.References, &metrics)
194194

195195
vulnDir := filepath.Join(directory, cnaAssigner)
196196

@@ -200,7 +200,7 @@ func ConvertAndExportCVEToOSV(cve cves.CVE5, directory string) error {
200200
}
201201

202202
// Save the conversion metrics to a file.
203-
if err := writeMetricToFile(cveID, vulnDir, metrics); err != nil {
203+
if err := writeMetricToFile(cveID, vulnDir, &metrics); err != nil {
204204
return err
205205
}
206206

vulnfeeds/cmd/cvelist2osv/converter_test.go

Lines changed: 25 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -191,13 +191,18 @@ func TestFromCVE5(t *testing.T) {
191191
},
192192
},
193193
Affected: []osvschema.Affected{{
194-
// DatabaseSpecific: map[string]interface{}{
195-
// "CPE": []string{"cpe:2.3:a:gitlab:gitlab:*:*:*:*:*:*:*:*"},
196-
// },
197-
198-
Ranges: []osvschema.Range{{Type: "ECOSYSTEM",
199-
Events: []osvschema.Event{{Introduced: "18.0"}, {Fixed: "18.0.1"}},
200-
}}}},
194+
Ranges: []osvschema.Range{{
195+
Type: "GIT",
196+
Repo: "https://gitlab.com/gitlab-org/gitlab",
197+
Events: []osvschema.Event{
198+
{Introduced: "504fd9e5236e13d674e051c6b8a1e9892b371c58"},
199+
{Fixed: "3426be1b93852c5358240c5df40970c0ddfbdb2a"},
200+
},
201+
DatabaseSpecific: map[string]any{
202+
"versions": []osvschema.Event{{Introduced: "18.0"}, {Fixed: "18.0.1"}},
203+
},
204+
}},
205+
}},
201206
},
202207
},
203208
},
@@ -226,9 +231,19 @@ func TestFromCVE5(t *testing.T) {
226231
Score: "CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:N/I:N/A:H",
227232
},
228233
},
229-
Affected: []osvschema.Affected{{Ranges: []osvschema.Range{{
230-
Type: "ECOSYSTEM",
231-
Events: []osvschema.Event{{Introduced: "0"}, {Fixed: "1.10.5"}}}}}},
234+
Affected: []osvschema.Affected{{
235+
Ranges: []osvschema.Range{{
236+
Type: "GIT",
237+
Repo: "https://github.com/amazon-ion/ion-java",
238+
Events: []osvschema.Event{
239+
{Introduced: "0"},
240+
{Fixed: "019a6117fb99131f74f92ecf462169613234abbf"},
241+
},
242+
DatabaseSpecific: map[string]any{
243+
"versions": []osvschema.Event{{Introduced: "0"}, {Fixed: "1.10.5"}},
244+
},
245+
}},
246+
}},
232247
DatabaseSpecific: nil,
233248
},
234249
},

vulnfeeds/cmd/cvelist2osv/version_extraction.go

Lines changed: 125 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,15 @@ import (
44
"cmp"
55
"errors"
66
"fmt"
7+
"log/slog"
78
"strconv"
89
"strings"
910

1011
"slices"
1112

1213
"github.com/google/osv/vulnfeeds/cves"
1314
"github.com/google/osv/vulnfeeds/git"
15+
"github.com/google/osv/vulnfeeds/utility/logger"
1416
"github.com/google/osv/vulnfeeds/vulns"
1517
"github.com/ossf/osv-schema/bindings/go/osvschema"
1618
)
@@ -72,7 +74,7 @@ func toVersionRangeType(s string) VersionRangeType {
7274
// 3. If no versions are found, it falls back to searching for CPEs in the CNA container.
7375
// 4. As a last resort, it attempts to extract version information from the description text (currently not saved).
7476
// It returns the source of the version information and a slice of notes detailing the extraction process.
75-
func AddVersionInfo(cve cves.CVE5, v *vulns.Vulnerability, metrics *ConversionMetrics) {
77+
func AddVersionInfo(cve cves.CVE5, v *vulns.Vulnerability, metrics *ConversionMetrics, repos []string) {
7678
gotVersions := false
7779

7880
// Combine 'affected' entries from both CNA and ADP containers.
@@ -104,31 +106,36 @@ func AddVersionInfo(cve cves.CVE5, v *vulns.Vulnerability, metrics *ConversionMe
104106
hasGit = true
105107
}
106108

107-
aff := osvschema.Affected{}
108-
for _, vr := range versionRanges {
109-
if versionType == VersionRangeTypeGit {
110-
vr.Type = osvschema.RangeGit
111-
vr.Repo = cveAff.Repo
112-
} else {
113-
vr.Type = osvschema.RangeEcosystem
114-
}
115-
aff.Ranges = append(aff.Ranges, vr)
116-
}
117-
109+
var aff osvschema.Affected
118110
// Special handling for Linux kernel CVEs.
119-
if cve.Metadata.AssignerShortName == "Linux" && versionType != VersionRangeTypeGit {
120-
aff.Package = osvschema.Package{
121-
Ecosystem: string(osvschema.EcosystemLinux),
122-
Name: "Kernel",
111+
if cve.Metadata.AssignerShortName == "Linux" {
112+
for _, vr := range versionRanges {
113+
if versionType == VersionRangeTypeGit {
114+
vr.Type = osvschema.RangeGit
115+
vr.Repo = cveAff.Repo
116+
} else {
117+
vr.Type = osvschema.RangeEcosystem
118+
}
119+
aff.Ranges = append(aff.Ranges, vr)
120+
}
121+
if versionType != VersionRangeTypeGit {
122+
aff.Package = osvschema.Package{
123+
Ecosystem: string(osvschema.EcosystemLinux),
124+
Name: "Kernel",
125+
}
126+
}
127+
} else {
128+
var err error
129+
aff, err = gitVersionsToCommits(cve.Metadata.CVEID, versionRanges, repos, make(git.RepoTagsCache))
130+
if err != nil {
131+
logger.Error("Failed to convert git versions to commits", slog.Any("err", err))
132+
} else {
133+
hasGit = true
123134
}
124135
}
125136

126137
v.Affected = append(v.Affected, aff)
127-
if hasGit {
128-
metrics.AddSource(VersionSourceGit)
129-
} else {
130-
metrics.AddSource(VersionSourceAffected)
131-
}
138+
metrics.VersionSources = append(metrics.VersionSources, VersionSourceAffected)
132139
}
133140

134141
// If no versions were found so far, fall back to CPEs.
@@ -166,6 +173,103 @@ func AddVersionInfo(cve cves.CVE5, v *vulns.Vulnerability, metrics *ConversionMe
166173
}
167174
}
168175

176+
// resolveVersionToCommit is a helper to convert a version string to a commit hash.
177+
// It logs the outcome of the conversion attempt and returns an empty string on failure.
178+
func resolveVersionToCommit(cveID cves.CVEID, version, versionType, repo string, normalizedTags map[string]git.NormalizedTag) string {
179+
if version == "" {
180+
return ""
181+
}
182+
logger.Info("Attempting to resolve version to commit", slog.String("cve", string(cveID)), slog.String("version", version), slog.String("type", versionType), slog.String("repo", repo))
183+
commit, err := git.VersionToCommit(version, normalizedTags)
184+
if err != nil {
185+
logger.Warn("Failed to get Git commit for version", slog.String("cve", string(cveID)), slog.String("version", version), slog.String("type", versionType), slog.String("repo", repo), slog.Any("err", err))
186+
return ""
187+
}
188+
logger.Info("Successfully derived commit for version", slog.String("cve", string(cveID)), slog.String("commit", commit), slog.String("version", version), slog.String("type", versionType))
189+
190+
return commit
191+
}
192+
193+
// Examines repos and tries to convert versions to commits by treating them as Git tags.
194+
// Takes a CVE ID string (for logging), VersionInfo with AffectedVersions and
195+
// typically no AffectedCommits and attempts to add AffectedCommits (including Fixed commits) where there aren't any.
196+
// Refuses to add the same commit to AffectedCommits more than once.
197+
func gitVersionsToCommits(cveID cves.CVEID, versionRanges []osvschema.Range, repos []string, cache git.RepoTagsCache) (osvschema.Affected, error) {
198+
var newAff osvschema.Affected
199+
var newVersionRanges []osvschema.Range
200+
unresolvedRanges := versionRanges
201+
202+
for _, repo := range repos {
203+
if len(unresolvedRanges) == 0 {
204+
break // All ranges have been resolved.
205+
}
206+
207+
normalizedTags, err := git.NormalizeRepoTags(repo, cache)
208+
if err != nil {
209+
logger.Warn("Failed to normalize tags", slog.String("cve", string(cveID)), slog.String("repo", repo), slog.Any("err", err))
210+
continue
211+
}
212+
213+
var stillUnresolvedRanges []osvschema.Range
214+
for _, vr := range unresolvedRanges {
215+
var introduced, fixed, lastAffected string
216+
for _, e := range vr.Events {
217+
if e.Introduced != "" {
218+
introduced = e.Introduced
219+
}
220+
if e.Fixed != "" {
221+
fixed = e.Fixed
222+
}
223+
if e.LastAffected != "" {
224+
lastAffected = e.LastAffected
225+
}
226+
}
227+
228+
var introducedCommit string
229+
if introduced == "0" {
230+
introducedCommit = "0"
231+
} else {
232+
introducedCommit = resolveVersionToCommit(cveID, introduced, "introduced", repo, normalizedTags)
233+
}
234+
fixedCommit := resolveVersionToCommit(cveID, fixed, "fixed", repo, normalizedTags)
235+
lastAffectedCommit := resolveVersionToCommit(cveID, lastAffected, "last_affected", repo, normalizedTags)
236+
237+
if introducedCommit != "" && (fixedCommit != "" || lastAffectedCommit != "") {
238+
var newVR osvschema.Range
239+
240+
if fixedCommit != "" {
241+
newVR = buildVersionRange(introducedCommit, "", fixedCommit)
242+
} else {
243+
newVR = buildVersionRange(introducedCommit, lastAffectedCommit, "")
244+
}
245+
246+
newVR.Repo = repo
247+
newVR.Type = osvschema.RangeGit
248+
newVR.DatabaseSpecific = make(map[string]any)
249+
newVR.DatabaseSpecific["versions"] = vr.Events
250+
newVersionRanges = append(newVersionRanges, newVR)
251+
} else {
252+
stillUnresolvedRanges = append(stillUnresolvedRanges, vr)
253+
}
254+
}
255+
unresolvedRanges = stillUnresolvedRanges
256+
}
257+
258+
var err error
259+
if len(unresolvedRanges) > 0 {
260+
newAff.DatabaseSpecific = make(map[string]any)
261+
newAff.DatabaseSpecific["unresolved_versions"] = unresolvedRanges
262+
}
263+
264+
if len(newVersionRanges) > 0 {
265+
newAff.Ranges = newVersionRanges
266+
} else if len(unresolvedRanges) > 0 { // Only error if there were ranges to resolve but none were.
267+
err = errors.New("was not able to get git version ranges")
268+
}
269+
270+
return newAff, err
271+
}
272+
169273
// findCPEVersionRanges extracts version ranges and CPE strings from the CNA's
170274
// CPE applicability statements in a CVE record.
171275
func findCPEVersionRanges(cve cves.CVE5) (versionRanges []osvschema.Range, cpes []string, err error) {
@@ -345,7 +449,6 @@ func findNormalAffectedRanges(affected cves.Affected, metrics *ConversionMetrics
345449
// affected, but more likely, it affects up to that version. It could also mean that the range is given
346450
// in one line instead - like "< 1.5.3" or "< 2.45.4, >= 2.0 " or just "before 1.4.7", so check for that.
347451
metrics.AddNote("Only version exists")
348-
// GitHub often encodes the range directly in the version string.
349452

350453
av, err := git.ParseVersionRange(vers.Version)
351454
if err == nil {

0 commit comments

Comments
 (0)