From 360e54b5e0262732f181c99cc9b29c8ad27dac9f Mon Sep 17 00:00:00 2001 From: ashmod Date: Sun, 14 Sep 2025 18:25:33 +0300 Subject: [PATCH 1/2] feat(vulnfeeds): populate package.purl for GIT ranges --- vulnfeeds/cmd/combine-to-osv/main.go | 31 ++++++++++++ vulnfeeds/cmd/combine-to-osv/main_test.go | 37 +++++++++++++++ vulnfeeds/git/purl.go | 33 +++++++++++++ vulnfeeds/git/repository_test.go | 57 +++++++++++++++++++++++ vulnfeeds/go.mod | 2 +- 5 files changed, 159 insertions(+), 1 deletion(-) create mode 100644 vulnfeeds/git/purl.go diff --git a/vulnfeeds/cmd/combine-to-osv/main.go b/vulnfeeds/cmd/combine-to-osv/main.go index be44f13f395..07cef66bd25 100644 --- a/vulnfeeds/cmd/combine-to-osv/main.go +++ b/vulnfeeds/cmd/combine-to-osv/main.go @@ -12,6 +12,7 @@ import ( "time" "github.com/google/osv/vulnfeeds/cves" + gitpurl "github.com/google/osv/vulnfeeds/git" "github.com/google/osv/vulnfeeds/utility/logger" "github.com/google/osv/vulnfeeds/vulns" "github.com/ossf/osv-schema/bindings/go/osvschema" @@ -183,6 +184,7 @@ func combineIntoOSV(loadedCves map[cves.CVEID]cves.Vulnerability, allParts map[c if cvePartsModifiedTime[cveID].After(cveModified) { convertedCve.Modified = cvePartsModifiedTime[cveID] } + enrichRepoPURLs(convertedCve) convertedCves[cveID] = convertedCve } logger.Info("Ended writing OSV files", slog.Int("count", len(convertedCves))) @@ -258,3 +260,32 @@ func addReference(cveID string, ecosystem string, convertedCve *vulns.Vulnerabil convertedCve.References = append(convertedCve.References, securityReference) } + +// repoURLFromRanges returns the first repo URL from a GIT-type range, if present. +func repoURLFromRanges(ranges []osvschema.Range) string { + for _, r := range ranges { + if r.Type == "GIT" && r.Repo != "" { + return r.Repo + } + } + + return "" +} + +// enrichRepoPURLs sets affected.package.purl to an unversioned pkg:generic repo pURL +// when a GIT range with a repo URL exists and purl is currently empty. +func enrichRepoPURLs(v *vulns.Vulnerability) { + if v == nil || len(v.Affected) == 0 { + return + } + for i := range v.Affected { + if v.Affected[i].Package.Purl != "" { + continue + } + if repo := repoURLFromRanges(v.Affected[i].Ranges); repo != "" { + if p, err := gitpurl.BuildGenericRepoPURL(repo); err == nil && p != "" { + v.Affected[i].Package.Purl = p + } + } + } +} diff --git a/vulnfeeds/cmd/combine-to-osv/main_test.go b/vulnfeeds/cmd/combine-to-osv/main_test.go index e3cd32e6ca7..d6000654907 100644 --- a/vulnfeeds/cmd/combine-to-osv/main_test.go +++ b/vulnfeeds/cmd/combine-to-osv/main_test.go @@ -13,6 +13,7 @@ import ( "github.com/google/osv/vulnfeeds/cves" "github.com/google/osv/vulnfeeds/utility" + "github.com/ossf/osv-schema/bindings/go/osvschema" ) func loadTestData2(cveName string) cves.Vulnerability { @@ -179,3 +180,39 @@ func TestUpdateModifiedDate(t *testing.T) { t.Errorf("Wrong modified time, expected: %s, got: %s", time2, combinedOSV["CVE-2022-32746"].Modified) } } + +func TestRepoURLFromRanges_GIT(t *testing.T) { + t.Parallel() + + ranges := []osvschema.Range{ + { + Type: "GIT", + Repo: "https://github.com/eclipse-openj9/openj9", + Events: []osvschema.Event{ + {Introduced: "0"}, + }, + }, + } + got := repoURLFromRanges(ranges) + want := "https://github.com/eclipse-openj9/openj9" + if got != want { + t.Fatalf("repoURLFromRanges() = %q, want %q", got, want) + } +} + +func TestRepoURLFromRanges_NoGIT(t *testing.T) { + t.Parallel() + + ranges := []osvschema.Range{ + { + Type: "ECOSYSTEM", + Events: []osvschema.Event{ + {Introduced: "0"}, + {Fixed: "1.2.3"}, + }, + }, + } + if got := repoURLFromRanges(ranges); got != "" { + t.Fatalf("repoURLFromRanges() = %q, want empty", got) + } +} diff --git a/vulnfeeds/git/purl.go b/vulnfeeds/git/purl.go new file mode 100644 index 00000000000..a576a847057 --- /dev/null +++ b/vulnfeeds/git/purl.go @@ -0,0 +1,33 @@ +package git + +import ( + "fmt" + "net/url" + "strings" + + packageurl "github.com/package-url/packageurl-go" +) + +// BuildGenericRepoPURL returns an unversioned generic purl +// Example: pkg:generic/github.com/owner/repo +func BuildGenericRepoPURL(repoURL string) (string, error) { + u, err := url.Parse(repoURL) + if err != nil { + return "", fmt.Errorf("invalid repo url: %w", err) + } + + host := strings.ToLower(u.Hostname()) + path := strings.Trim(strings.TrimSuffix(u.EscapedPath(), ".git"), "/") + parts := strings.Split(path, "/") + if len(parts) < 2 { + return "", fmt.Errorf("invalid repo path in %q", repoURL) + } + + // Namespace is host + all path segments except the last; name is the last segment. + ns := strings.Join(append([]string{host}, parts[:len(parts)-1]...), "/") + name := parts[len(parts)-1] + + p := packageurl.NewPackageURL("generic", ns, name, "", nil, "") + + return p.ToString(), nil +} diff --git a/vulnfeeds/git/repository_test.go b/vulnfeeds/git/repository_test.go index dfe07b704e4..729b7946d42 100644 --- a/vulnfeeds/git/repository_test.go +++ b/vulnfeeds/git/repository_test.go @@ -383,3 +383,60 @@ func TestInvalidRepos(t *testing.T) { t.Errorf("These redundant repos are in InvalidRepos: %s", diff) } } + +func TestBuildGenericRepoPURL(t *testing.T) { + t.Parallel() + + tests := []struct { + desc string + inputURL string + wantPURL string + wantError bool + }{ + { + desc: "GitHub repo", + inputURL: "https://github.com/eclipse-openj9/openj9", + wantPURL: "pkg:generic/github.com/eclipse-openj9/openj9", + }, + { + desc: "GitHub repo with .git suffix", + inputURL: "https://github.com/torvalds/linux.git", + wantPURL: "pkg:generic/github.com/torvalds/linux", + }, + { + desc: "GitLab subgroup repo", + inputURL: "https://gitlab.com/group/subgroup/repo", + wantPURL: "pkg:generic/gitlab.com/group/subgroup/repo", + }, + { + desc: "Self-hosted cgit repo with .git", + inputURL: "https://git.libssh.org/projects/libssh.git", + wantPURL: "pkg:generic/git.libssh.org/projects/libssh", + }, + { + desc: "Insufficient path segments", + inputURL: "https://github.com/onlyowner", + wantError: true, + }, + } + + for _, tc := range tests { + t.Run(tc.desc, func(t *testing.T) { + t.Parallel() + got, err := BuildGenericRepoPURL(tc.inputURL) + if tc.wantError { + if err == nil { + t.Fatalf("BuildGenericRepoPURL(%q) = %q, want error", tc.inputURL, got) + } + + return + } + if err != nil { + t.Fatalf("BuildGenericRepoPURL(%q) unexpected error: %v", tc.inputURL, err) + } + if got != tc.wantPURL { + t.Fatalf("BuildGenericRepoPURL(%q) = %q, want %q", tc.inputURL, got, tc.wantPURL) + } + }) + } +} diff --git a/vulnfeeds/go.mod b/vulnfeeds/go.mod index 0dcf5133f68..cf1e03745b5 100644 --- a/vulnfeeds/go.mod +++ b/vulnfeeds/go.mod @@ -11,6 +11,7 @@ require ( github.com/google/osv-scanner v1.9.2 github.com/knqyf263/go-cpe v0.0.0-20230627041855-cb0794d06872 github.com/ossf/osv-schema/bindings/go v0.0.0-20250902063920-695987a6b7da + github.com/package-url/packageurl-go v0.1.3 github.com/sethvargo/go-retry v0.3.0 gopkg.in/dnaeon/go-vcr.v4 v4.0.5 gopkg.in/yaml.v2 v2.4.0 @@ -40,7 +41,6 @@ require ( github.com/googleapis/gax-go/v2 v2.14.2 // indirect github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 // indirect github.com/kevinburke/ssh_config v1.2.0 // indirect - github.com/package-url/packageurl-go v0.1.3 // indirect github.com/pjbgf/sha1cd v0.3.2 // indirect github.com/pkg/errors v0.9.1 // indirect github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3 // indirect From a6e4d12c7e7750348b3b8120f45e595c6538840c Mon Sep 17 00:00:00 2001 From: ashmod Date: Sun, 14 Sep 2025 18:57:16 +0300 Subject: [PATCH 2/2] versions --- vulnfeeds/cmd/combine-to-osv/main.go | 76 +++++++++++++++++++++-- vulnfeeds/cmd/combine-to-osv/main_test.go | 40 ++++++++++++ 2 files changed, 111 insertions(+), 5 deletions(-) diff --git a/vulnfeeds/cmd/combine-to-osv/main.go b/vulnfeeds/cmd/combine-to-osv/main.go index 07cef66bd25..d9d8be1eda9 100644 --- a/vulnfeeds/cmd/combine-to-osv/main.go +++ b/vulnfeeds/cmd/combine-to-osv/main.go @@ -8,6 +8,7 @@ import ( "net/url" "os" "path" + "sort" "strings" "time" @@ -279,13 +280,78 @@ func enrichRepoPURLs(v *vulns.Vulnerability) { return } for i := range v.Affected { - if v.Affected[i].Package.Purl != "" { - continue + aff := &v.Affected[i] + + // Ensure base purl is set (unversioned). + if aff.Package.Purl == "" { + if repo := repoURLFromRanges(aff.Ranges); repo != "" { + if p, err := gitpurl.BuildGenericRepoPURL(repo); err == nil && p != "" { + aff.Package.Purl = p + } + } + } + + // Add versioned repo pURLs when possible. + if repo := repoURLFromRanges(aff.Ranges); repo != "" { + addVersionedRepoPURLs(aff, repo) } - if repo := repoURLFromRanges(v.Affected[i].Ranges); repo != "" { - if p, err := gitpurl.BuildGenericRepoPURL(repo); err == nil && p != "" { - v.Affected[i].Package.Purl = p + } +} + +var repoTagsCache = make(gitpurl.RepoTagsCache) + +// addVersionedRepoPURLs populates affected.database_specific["repo_purls"] +// with pkg:generic/...@ entries, using affected.versions if available. +func addVersionedRepoPURLs(aff *osvschema.Affected, repo string) { + if aff == nil || repo == "" { + return + } + + var tags []string + if len(aff.Versions) > 0 { + tags = append(tags, aff.Versions...) + } else if os.Getenv("ENABLE_REPO_PURL_TAGS") == "1" { + norm, err := gitpurl.NormalizeRepoTags(repo, repoTagsCache) + if err == nil && len(norm) > 0 { + for tag := range norm { + tags = append(tags, tag) } + sort.Strings(tags) + const maxTags = 200 + if len(tags) > maxTags { + tags = tags[:maxTags] + } + } + } + + if len(tags) == 0 { + return + } + + base, err := gitpurl.BuildGenericRepoPURL(repo) + if err != nil || base == "" { + return + } + + // Dedup and format. + seen := make(map[string]struct{}, len(tags)) + vPURLs := make([]string, 0, len(tags)) + for _, t := range tags { + if t == "" { + continue + } + if _, ok := seen[t]; ok { + continue } + seen[t] = struct{}{} + vPURLs = append(vPURLs, base+"@"+t) + } + if len(vPURLs) == 0 { + return + } + + if aff.DatabaseSpecific == nil { + aff.DatabaseSpecific = map[string]any{} } + aff.DatabaseSpecific["repo_purls"] = vPURLs } diff --git a/vulnfeeds/cmd/combine-to-osv/main_test.go b/vulnfeeds/cmd/combine-to-osv/main_test.go index d6000654907..5fc3db6baf6 100644 --- a/vulnfeeds/cmd/combine-to-osv/main_test.go +++ b/vulnfeeds/cmd/combine-to-osv/main_test.go @@ -12,6 +12,7 @@ import ( "maps" "github.com/google/osv/vulnfeeds/cves" + gitpurl "github.com/google/osv/vulnfeeds/git" "github.com/google/osv/vulnfeeds/utility" "github.com/ossf/osv-schema/bindings/go/osvschema" ) @@ -216,3 +217,42 @@ func TestRepoURLFromRanges_NoGIT(t *testing.T) { t.Fatalf("repoURLFromRanges() = %q, want empty", got) } } + +func TestAddVersionedRepoPURLs_FromVersions(t *testing.T) { + t.Setenv("ENABLE_REPO_PURL_TAGS", "") // ensure derivation path is off + + repo := "https://github.com/chriskohlhoff/asio" + aff := &osvschema.Affected{ + Package: osvschema.Package{Ecosystem: "GIT", Name: "asio"}, + Versions: []string{"asio-1-13-0", "asio-1-12-0"}, + Ranges: []osvschema.Range{{Type: "GIT", Repo: repo, Events: []osvschema.Event{{Introduced: "0"}}}}, + } + + addVersionedRepoPURLs(aff, repo) + + base, err := gitpurl.BuildGenericRepoPURL(repo) + if err != nil || base == "" { + t.Fatalf("failed to build base purl: %v", err) + } + + ds := aff.DatabaseSpecific + list, ok := ds["repo_purls"].([]string) + if !ok || len(list) == 0 { + t.Fatalf("repo_purls missing/empty: %#v", ds) + } + + want1 := base + "@asio-1-13-0" + want2 := base + "@asio-1-12-0" + found1, found2 := false, false + for _, p := range list { + if p == want1 { + found1 = true + } + if p == want2 { + found2 = true + } + } + if !found1 || !found2 { + t.Fatalf("missing expected entries, got %#v", list) + } +}