Skip to content

Commit 683b756

Browse files
committed
feat: dedupe snyk API calls for identical PURLs
1 parent f405d1e commit 683b756

File tree

3 files changed

+216
-35
lines changed

3 files changed

+216
-35
lines changed

lib/snyk/enrich_cyclonedx.go

Lines changed: 46 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,12 @@ import (
3333

3434
type cdxEnricher = func(*Config, *cdx.Component, *packageurl.PackageURL)
3535

36+
type cdxPurlGroup struct {
37+
purl packageurl.PackageURL
38+
components []*cdx.Component
39+
bomRef string
40+
}
41+
3642
var cdxEnrichers = []cdxEnricher{
3743
enrichCDXSnykAdvisorData,
3844
enrichCDXSnykVulnerabilityDBData,
@@ -85,32 +91,51 @@ func enrichCycloneDX(cfg *Config, bom *cdx.BOM, logger *zerolog.Logger) *cdx.BOM
8591
logger.Debug().Str("org_id", orgID.String()).Msg("Inferred Snyk organization ID")
8692

8793
var mutex = &sync.Mutex{}
88-
vulnerabilities := make(map[cdx.Component][]issues.CommonIssueModelVThree)
94+
vulnerabilities := make(map[*cdx.Component][]issues.CommonIssueModelVThree)
8995
wg := sizedwaitgroup.New(20)
9096

9197
comps := utils.DiscoverCDXComponents(bom)
9298
logger.Debug().Msgf("Detected %d packages", len(comps))
9399

100+
// Group components by PURL to deduplicate API calls
101+
purlGroups := make(map[string]*cdxPurlGroup)
94102
for i := range comps {
103+
component := comps[i]
104+
l := logger.With().Str("bom-ref", component.BOMRef).Logger()
105+
106+
purl, err := packageurl.FromString(component.PackageURL)
107+
if err != nil {
108+
l.Debug().
109+
Err(err).
110+
Msg("Could not identify package")
111+
continue
112+
}
113+
for _, enrichFunc := range cdxEnrichers {
114+
enrichFunc(cfg, component, &purl)
115+
}
116+
117+
key := purl.ToString()
118+
group, ok := purlGroups[key]
119+
if !ok {
120+
group = &cdxPurlGroup{purl: purl, bomRef: component.BOMRef}
121+
purlGroups[key] = group
122+
}
123+
group.components = append(group.components, component)
124+
}
125+
126+
// Fetch vulnerabilities for each unique PURL
127+
for _, group := range purlGroups {
95128
wg.Add()
96-
go func(component *cdx.Component) {
129+
go func() {
97130
defer wg.Done()
98-
l := logger.With().Str("bom-ref", component.BOMRef).Logger()
131+
l := logger.With().
132+
Str("bom-ref", group.bomRef).
133+
Str("purl", group.purl.ToString()).
134+
Logger()
99135

100-
purl, err := packageurl.FromString(component.PackageURL)
101-
if err != nil {
102-
l.Debug().
103-
Err(err).
104-
Msg("Could not identify package")
105-
return
106-
}
107-
for _, enrichFunc := range cdxEnrichers {
108-
enrichFunc(cfg, component, &purl)
109-
}
110-
resp, err := GetPackageVulnerabilities(cfg, &purl, auth, orgID, logger)
136+
resp, err := GetPackageVulnerabilities(cfg, &group.purl, auth, orgID, logger)
111137
if err != nil {
112138
l.Err(err).
113-
Str("purl", purl.ToString()).
114139
Msg("Failed to fetch vulnerabilities for package")
115140
return
116141
}
@@ -126,18 +151,20 @@ func enrichCycloneDX(cfg *Config, bom *cdx.BOM, logger *zerolog.Logger) *cdx.BOM
126151

127152
if packageDoc.Data != nil {
128153
mutex.Lock()
129-
vulnerabilities[*component] = *packageDoc.Data
154+
for _, component := range group.components {
155+
vulnerabilities[component] = *packageDoc.Data
156+
}
130157
mutex.Unlock()
131158
}
132-
}(comps[i])
159+
}()
133160
}
134161
wg.Wait()
135162

136163
var vulns []cdx.Vulnerability
137-
for k, v := range vulnerabilities {
164+
for comp, v := range vulnerabilities {
138165
for _, issue := range v {
139166
vuln := cdx.Vulnerability{
140-
BOMRef: k.BOMRef,
167+
BOMRef: comp.BOMRef,
141168
}
142169
if issue.Id != nil {
143170
vuln.ID = *issue.Id

lib/snyk/enrich_spdx.go

Lines changed: 41 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,12 @@ import (
3434

3535
type spdxEnricher = func(*Config, *spdx_2_3.Package, *packageurl.PackageURL)
3636

37+
type spdxPurlGroup struct {
38+
purl packageurl.PackageURL
39+
packages []*spdx_2_3.Package
40+
spdxID string
41+
}
42+
3743
var spdxEnrichers = []spdxEnricher{
3844
enrichSPDXSnykAdvisorData,
3945
enrichSPDXSnykVulnerabilityDBData,
@@ -97,25 +103,42 @@ func enrichSPDX(cfg *Config, bom *spdx.Document, logger *zerolog.Logger) *spdx.D
97103
packages := bom.Packages
98104
logger.Debug().Msgf("Detected %d packages", len(packages))
99105

100-
for i, pkg := range packages {
101-
wg.Add()
106+
// Group packages by PURL to deduplicate API calls
107+
purlGroups := make(map[string]*spdxPurlGroup)
108+
for _, pkg := range packages {
109+
l := logger.With().Str("SPDXID", string(pkg.PackageSPDXIdentifier)).Logger()
102110

103-
go func(pkg *spdx_2_3.Package, i int) {
111+
purl, err := utils.GetPurlFromSPDXPackage(pkg)
112+
if err != nil || purl == nil {
113+
l.Debug().Msg("Could not identify package")
114+
continue
115+
}
116+
for _, enrichFn := range spdxEnrichers {
117+
enrichFn(cfg, pkg, purl)
118+
}
119+
120+
key := purl.ToString()
121+
group, ok := purlGroups[key]
122+
if !ok {
123+
group = &spdxPurlGroup{purl: *purl, spdxID: string(pkg.PackageSPDXIdentifier)}
124+
purlGroups[key] = group
125+
}
126+
group.packages = append(group.packages, pkg)
127+
}
128+
129+
// Fetch vulnerabilities for each unique PURL
130+
for _, group := range purlGroups {
131+
wg.Add()
132+
go func() {
104133
defer wg.Done()
105-
l := logger.With().Str("SPDXID", string(pkg.PackageSPDXIdentifier)).Logger()
134+
l := logger.With().
135+
Str("SPDXID", group.spdxID).
136+
Str("purl", group.purl.ToString()).
137+
Logger()
106138

107-
purl, err := utils.GetPurlFromSPDXPackage(pkg)
108-
if err != nil || purl == nil {
109-
l.Debug().Msg("Could not identify package")
110-
return
111-
}
112-
for _, enrichFn := range spdxEnrichers {
113-
enrichFn(cfg, pkg, purl)
114-
}
115-
resp, err := GetPackageVulnerabilities(cfg, purl, auth, orgID, logger)
139+
resp, err := GetPackageVulnerabilities(cfg, &group.purl, auth, orgID, logger)
116140
if err != nil {
117141
l.Err(err).
118-
Str("purl", purl.ToString()).
119142
Msg("Failed to fetch vulnerabilities for package")
120143
return
121144
}
@@ -131,10 +154,12 @@ func enrichSPDX(cfg *Config, bom *spdx.Document, logger *zerolog.Logger) *spdx.D
131154

132155
if packageDoc.Data != nil {
133156
mutex.Lock()
134-
vulnerabilities[pkg] = *packageDoc.Data
157+
for _, pkg := range group.packages {
158+
vulnerabilities[pkg] = *packageDoc.Data
159+
}
135160
mutex.Unlock()
136161
}
137-
}(pkg, i)
162+
}()
138163
}
139164

140165
wg.Wait()

lib/snyk/enrich_test.go

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
_ "embed"
55
"net/http"
66
"net/http/httptest"
7+
"sync/atomic"
78
"testing"
89

910
cdx "github.com/CycloneDX/cyclonedx-go"
@@ -56,6 +57,61 @@ func TestEnrichSBOM_CycloneDXWithVulnerabilities(t *testing.T) {
5657
assert.Equal(t, (*vuln.Ratings)[1].Method, cdx.ScoringMethodCVSSv3)
5758
}
5859

60+
func TestEnrichSBOM_CycloneDXDeduplicatesRequests(t *testing.T) {
61+
var numRequests int32
62+
mux := http.NewServeMux()
63+
mux.HandleFunc(
64+
"GET /rest/self",
65+
func(w http.ResponseWriter, r *http.Request) {
66+
respond(w, selfBody)
67+
})
68+
mux.HandleFunc(
69+
"GET /rest/orgs/{org_id}/packages/{purl}/issues",
70+
func(w http.ResponseWriter, r *http.Request) {
71+
atomic.AddInt32(&numRequests, 1)
72+
respond(w, numpyIssues)
73+
})
74+
75+
srv := httptest.NewServer(mux)
76+
t.Cleanup(srv.Close)
77+
78+
cfg := DefaultConfig()
79+
cfg.APIToken = "asdf"
80+
cfg.SnykAPIURL = srv.URL
81+
82+
logger := zerolog.Nop()
83+
svc := NewService(cfg, &logger)
84+
85+
bom := &cdx.BOM{
86+
Components: &[]cdx.Component{
87+
{
88+
BOMRef: "pkg:pypi/numpy@1.16.0",
89+
Name: "numpy",
90+
Version: "1.16.0",
91+
PackageURL: "pkg:pypi/numpy@1.16.0",
92+
},
93+
{
94+
BOMRef: "pkg:pypi/numpy@1.16.0#dup",
95+
Name: "numpy",
96+
Version: "1.16.0",
97+
PackageURL: "pkg:pypi/numpy@1.16.0",
98+
},
99+
},
100+
}
101+
doc := &sbom.SBOMDocument{BOM: bom}
102+
103+
svc.EnrichSBOM(doc)
104+
105+
assert.Equal(t, int32(1), atomic.LoadInt32(&numRequests))
106+
require.NotNil(t, bom.Vulnerabilities)
107+
vulnByRef := map[string]int{}
108+
for _, vuln := range *bom.Vulnerabilities {
109+
vulnByRef[vuln.BOMRef]++
110+
}
111+
assert.Greater(t, vulnByRef["pkg:pypi/numpy@1.16.0"], 0)
112+
assert.Greater(t, vulnByRef["pkg:pypi/numpy@1.16.0#dup"], 0)
113+
}
114+
59115
func TestEnrichSBOM_CycloneDXExternalRefs(t *testing.T) {
60116
svc := setupTestEnv(t)
61117

@@ -199,6 +255,79 @@ func TestEnrichSBOM_SPDXWithVulnerabilities(t *testing.T) {
199255
assert.Equal(t, "Arbitrary Code Execution", vulnRef.ExternalRefComment)
200256
}
201257

258+
func TestEnrichSBOM_SPDXDeduplicatesRequests(t *testing.T) {
259+
var numRequests int32
260+
mux := http.NewServeMux()
261+
mux.HandleFunc(
262+
"GET /rest/self",
263+
func(w http.ResponseWriter, r *http.Request) {
264+
respond(w, selfBody)
265+
})
266+
mux.HandleFunc(
267+
"GET /rest/orgs/{org_id}/packages/{purl}/issues",
268+
func(w http.ResponseWriter, r *http.Request) {
269+
atomic.AddInt32(&numRequests, 1)
270+
respond(w, numpyIssues)
271+
})
272+
273+
srv := httptest.NewServer(mux)
274+
t.Cleanup(srv.Close)
275+
276+
cfg := DefaultConfig()
277+
cfg.APIToken = "asdf"
278+
cfg.SnykAPIURL = srv.URL
279+
280+
logger := zerolog.Nop()
281+
svc := NewService(cfg, &logger)
282+
283+
bom := &spdx_2_3.Document{
284+
Packages: []*spdx_2_3.Package{
285+
{
286+
PackageSPDXIdentifier: "pkg:pypi/numpy@1.16.0",
287+
PackageName: "numpy",
288+
PackageVersion: "1.16.0",
289+
PackageExternalReferences: []*spdx_2_3.PackageExternalReference{
290+
{
291+
Category: spdx.CategoryPackageManager,
292+
RefType: "purl",
293+
Locator: "pkg:pypi/numpy@1.16.0",
294+
},
295+
},
296+
},
297+
{
298+
PackageSPDXIdentifier: "pkg:pypi/numpy@1.16.0-dup",
299+
PackageName: "numpy",
300+
PackageVersion: "1.16.0",
301+
PackageExternalReferences: []*spdx_2_3.PackageExternalReference{
302+
{
303+
Category: spdx.CategoryPackageManager,
304+
RefType: "purl",
305+
Locator: "pkg:pypi/numpy@1.16.0",
306+
},
307+
},
308+
},
309+
},
310+
}
311+
doc := &sbom.SBOMDocument{BOM: bom}
312+
313+
svc.EnrichSBOM(doc)
314+
315+
assert.Equal(t, int32(1), atomic.LoadInt32(&numRequests))
316+
expectedLocator := "https://security.snyk.io/vuln/SNYK-PYTHON-NUMPY-73513"
317+
for _, pkg := range bom.Packages {
318+
hasVulnRef := false
319+
for _, ref := range pkg.PackageExternalReferences {
320+
if ref.Category == spdx.CategorySecurity &&
321+
ref.RefType == "advisory" &&
322+
ref.Locator == expectedLocator {
323+
hasVulnRef = true
324+
break
325+
}
326+
}
327+
assert.Truef(t, hasVulnRef, "expected vulnerability reference for %s", pkg.PackageSPDXIdentifier)
328+
}
329+
}
330+
202331
func TestEnrichSBOM_SPDXExternalRefs(t *testing.T) {
203332
svc := setupTestEnv(t)
204333

0 commit comments

Comments
 (0)