diff --git a/vulnfeeds/cmd/combine-to-osv/main.go b/vulnfeeds/cmd/combine-to-osv/main.go index be44f13f395..d9d8be1eda9 100644 --- a/vulnfeeds/cmd/combine-to-osv/main.go +++ b/vulnfeeds/cmd/combine-to-osv/main.go @@ -8,10 +8,12 @@ import ( "net/url" "os" "path" + "sort" "strings" "time" "github.com/google/osv/vulnfeeds/cves" + gitpurl "github.com/google/osv/vulnfeeds/git" "github.com/google/osv/vulnfeeds/utility/logger" "github.com/google/osv/vulnfeeds/vulns" "github.com/ossf/osv-schema/bindings/go/osvschema" @@ -183,6 +185,7 @@ func combineIntoOSV(loadedCves map[cves.CVEID]cves.Vulnerability, allParts map[c if cvePartsModifiedTime[cveID].After(cveModified) { convertedCve.Modified = cvePartsModifiedTime[cveID] } + enrichRepoPURLs(convertedCve) convertedCves[cveID] = convertedCve } logger.Info("Ended writing OSV files", slog.Int("count", len(convertedCves))) @@ -258,3 +261,97 @@ func addReference(cveID string, ecosystem string, convertedCve *vulns.Vulnerabil convertedCve.References = append(convertedCve.References, securityReference) } + +// repoURLFromRanges returns the first repo URL from a GIT-type range, if present. +func repoURLFromRanges(ranges []osvschema.Range) string { + for _, r := range ranges { + if r.Type == "GIT" && r.Repo != "" { + return r.Repo + } + } + + return "" +} + +// enrichRepoPURLs sets affected.package.purl to an unversioned pkg:generic repo pURL +// when a GIT range with a repo URL exists and purl is currently empty. +func enrichRepoPURLs(v *vulns.Vulnerability) { + if v == nil || len(v.Affected) == 0 { + return + } + for i := range v.Affected { + aff := &v.Affected[i] + + // Ensure base purl is set (unversioned). + if aff.Package.Purl == "" { + if repo := repoURLFromRanges(aff.Ranges); repo != "" { + if p, err := gitpurl.BuildGenericRepoPURL(repo); err == nil && p != "" { + aff.Package.Purl = p + } + } + } + + // Add versioned repo pURLs when possible. + if repo := repoURLFromRanges(aff.Ranges); repo != "" { + addVersionedRepoPURLs(aff, repo) + } + } +} + +var repoTagsCache = make(gitpurl.RepoTagsCache) + +// addVersionedRepoPURLs populates affected.database_specific["repo_purls"] +// with pkg:generic/...@ entries, using affected.versions if available. +func addVersionedRepoPURLs(aff *osvschema.Affected, repo string) { + if aff == nil || repo == "" { + return + } + + var tags []string + if len(aff.Versions) > 0 { + tags = append(tags, aff.Versions...) + } else if os.Getenv("ENABLE_REPO_PURL_TAGS") == "1" { + norm, err := gitpurl.NormalizeRepoTags(repo, repoTagsCache) + if err == nil && len(norm) > 0 { + for tag := range norm { + tags = append(tags, tag) + } + sort.Strings(tags) + const maxTags = 200 + if len(tags) > maxTags { + tags = tags[:maxTags] + } + } + } + + if len(tags) == 0 { + return + } + + base, err := gitpurl.BuildGenericRepoPURL(repo) + if err != nil || base == "" { + return + } + + // Dedup and format. + seen := make(map[string]struct{}, len(tags)) + vPURLs := make([]string, 0, len(tags)) + for _, t := range tags { + if t == "" { + continue + } + if _, ok := seen[t]; ok { + continue + } + seen[t] = struct{}{} + vPURLs = append(vPURLs, base+"@"+t) + } + if len(vPURLs) == 0 { + return + } + + if aff.DatabaseSpecific == nil { + aff.DatabaseSpecific = map[string]any{} + } + aff.DatabaseSpecific["repo_purls"] = vPURLs +} diff --git a/vulnfeeds/cmd/combine-to-osv/main_test.go b/vulnfeeds/cmd/combine-to-osv/main_test.go index e3cd32e6ca7..5fc3db6baf6 100644 --- a/vulnfeeds/cmd/combine-to-osv/main_test.go +++ b/vulnfeeds/cmd/combine-to-osv/main_test.go @@ -12,7 +12,9 @@ import ( "maps" "github.com/google/osv/vulnfeeds/cves" + gitpurl "github.com/google/osv/vulnfeeds/git" "github.com/google/osv/vulnfeeds/utility" + "github.com/ossf/osv-schema/bindings/go/osvschema" ) func loadTestData2(cveName string) cves.Vulnerability { @@ -179,3 +181,78 @@ func TestUpdateModifiedDate(t *testing.T) { t.Errorf("Wrong modified time, expected: %s, got: %s", time2, combinedOSV["CVE-2022-32746"].Modified) } } + +func TestRepoURLFromRanges_GIT(t *testing.T) { + t.Parallel() + + ranges := []osvschema.Range{ + { + Type: "GIT", + Repo: "https://github.com/eclipse-openj9/openj9", + Events: []osvschema.Event{ + {Introduced: "0"}, + }, + }, + } + got := repoURLFromRanges(ranges) + want := "https://github.com/eclipse-openj9/openj9" + if got != want { + t.Fatalf("repoURLFromRanges() = %q, want %q", got, want) + } +} + +func TestRepoURLFromRanges_NoGIT(t *testing.T) { + t.Parallel() + + ranges := []osvschema.Range{ + { + Type: "ECOSYSTEM", + Events: []osvschema.Event{ + {Introduced: "0"}, + {Fixed: "1.2.3"}, + }, + }, + } + if got := repoURLFromRanges(ranges); got != "" { + t.Fatalf("repoURLFromRanges() = %q, want empty", got) + } +} + +func TestAddVersionedRepoPURLs_FromVersions(t *testing.T) { + t.Setenv("ENABLE_REPO_PURL_TAGS", "") // ensure derivation path is off + + repo := "https://github.com/chriskohlhoff/asio" + aff := &osvschema.Affected{ + Package: osvschema.Package{Ecosystem: "GIT", Name: "asio"}, + Versions: []string{"asio-1-13-0", "asio-1-12-0"}, + Ranges: []osvschema.Range{{Type: "GIT", Repo: repo, Events: []osvschema.Event{{Introduced: "0"}}}}, + } + + addVersionedRepoPURLs(aff, repo) + + base, err := gitpurl.BuildGenericRepoPURL(repo) + if err != nil || base == "" { + t.Fatalf("failed to build base purl: %v", err) + } + + ds := aff.DatabaseSpecific + list, ok := ds["repo_purls"].([]string) + if !ok || len(list) == 0 { + t.Fatalf("repo_purls missing/empty: %#v", ds) + } + + want1 := base + "@asio-1-13-0" + want2 := base + "@asio-1-12-0" + found1, found2 := false, false + for _, p := range list { + if p == want1 { + found1 = true + } + if p == want2 { + found2 = true + } + } + if !found1 || !found2 { + t.Fatalf("missing expected entries, got %#v", list) + } +} diff --git a/vulnfeeds/git/purl.go b/vulnfeeds/git/purl.go new file mode 100644 index 00000000000..a576a847057 --- /dev/null +++ b/vulnfeeds/git/purl.go @@ -0,0 +1,33 @@ +package git + +import ( + "fmt" + "net/url" + "strings" + + packageurl "github.com/package-url/packageurl-go" +) + +// BuildGenericRepoPURL returns an unversioned generic purl +// Example: pkg:generic/github.com/owner/repo +func BuildGenericRepoPURL(repoURL string) (string, error) { + u, err := url.Parse(repoURL) + if err != nil { + return "", fmt.Errorf("invalid repo url: %w", err) + } + + host := strings.ToLower(u.Hostname()) + path := strings.Trim(strings.TrimSuffix(u.EscapedPath(), ".git"), "/") + parts := strings.Split(path, "/") + if len(parts) < 2 { + return "", fmt.Errorf("invalid repo path in %q", repoURL) + } + + // Namespace is host + all path segments except the last; name is the last segment. + ns := strings.Join(append([]string{host}, parts[:len(parts)-1]...), "/") + name := parts[len(parts)-1] + + p := packageurl.NewPackageURL("generic", ns, name, "", nil, "") + + return p.ToString(), nil +} diff --git a/vulnfeeds/git/repository_test.go b/vulnfeeds/git/repository_test.go index dfe07b704e4..729b7946d42 100644 --- a/vulnfeeds/git/repository_test.go +++ b/vulnfeeds/git/repository_test.go @@ -383,3 +383,60 @@ func TestInvalidRepos(t *testing.T) { t.Errorf("These redundant repos are in InvalidRepos: %s", diff) } } + +func TestBuildGenericRepoPURL(t *testing.T) { + t.Parallel() + + tests := []struct { + desc string + inputURL string + wantPURL string + wantError bool + }{ + { + desc: "GitHub repo", + inputURL: "https://github.com/eclipse-openj9/openj9", + wantPURL: "pkg:generic/github.com/eclipse-openj9/openj9", + }, + { + desc: "GitHub repo with .git suffix", + inputURL: "https://github.com/torvalds/linux.git", + wantPURL: "pkg:generic/github.com/torvalds/linux", + }, + { + desc: "GitLab subgroup repo", + inputURL: "https://gitlab.com/group/subgroup/repo", + wantPURL: "pkg:generic/gitlab.com/group/subgroup/repo", + }, + { + desc: "Self-hosted cgit repo with .git", + inputURL: "https://git.libssh.org/projects/libssh.git", + wantPURL: "pkg:generic/git.libssh.org/projects/libssh", + }, + { + desc: "Insufficient path segments", + inputURL: "https://github.com/onlyowner", + wantError: true, + }, + } + + for _, tc := range tests { + t.Run(tc.desc, func(t *testing.T) { + t.Parallel() + got, err := BuildGenericRepoPURL(tc.inputURL) + if tc.wantError { + if err == nil { + t.Fatalf("BuildGenericRepoPURL(%q) = %q, want error", tc.inputURL, got) + } + + return + } + if err != nil { + t.Fatalf("BuildGenericRepoPURL(%q) unexpected error: %v", tc.inputURL, err) + } + if got != tc.wantPURL { + t.Fatalf("BuildGenericRepoPURL(%q) = %q, want %q", tc.inputURL, got, tc.wantPURL) + } + }) + } +} diff --git a/vulnfeeds/go.mod b/vulnfeeds/go.mod index 0dcf5133f68..cf1e03745b5 100644 --- a/vulnfeeds/go.mod +++ b/vulnfeeds/go.mod @@ -11,6 +11,7 @@ require ( github.com/google/osv-scanner v1.9.2 github.com/knqyf263/go-cpe v0.0.0-20230627041855-cb0794d06872 github.com/ossf/osv-schema/bindings/go v0.0.0-20250902063920-695987a6b7da + github.com/package-url/packageurl-go v0.1.3 github.com/sethvargo/go-retry v0.3.0 gopkg.in/dnaeon/go-vcr.v4 v4.0.5 gopkg.in/yaml.v2 v2.4.0 @@ -40,7 +41,6 @@ require ( github.com/googleapis/gax-go/v2 v2.14.2 // indirect github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 // indirect github.com/kevinburke/ssh_config v1.2.0 // indirect - github.com/package-url/packageurl-go v0.1.3 // indirect github.com/pjbgf/sha1cd v0.3.2 // indirect github.com/pkg/errors v0.9.1 // indirect github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3 // indirect