Skip to content

Commit 9c36a12

Browse files
authored
perf(local): avoid loading the entire advisory unless it will actually be used (#2450)
Right now we always parse advisories even if we don't end up loading them into the database, which is relatively expensive at scale so now we use `gjson` to extract the subset of data we need from the raw bytes to determine if the advisory is relevant before we do the advisory parsing. This is especially useful for databases with a high amount of MAL advisories since their packages are very rare, such as the NPM database (which has 209647 MAL advisories out of a total of 214057 advisories) - before this it takes about 10 seconds to do a scan, whereas after this optimization it takes about 3 seconds
1 parent d44e578 commit 9c36a12

File tree

2 files changed

+69
-15
lines changed

2 files changed

+69
-15
lines changed

internal/clients/clientimpl/localmatcher/zip.go

Lines changed: 24 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,15 @@ import (
1212
"net/http"
1313
"os"
1414
"path"
15+
"slices"
1516
"strings"
1617

1718
"github.com/google/osv-scalibr/extractor"
1819
"github.com/google/osv-scanner/v2/internal/cmdlogger"
1920
"github.com/google/osv-scanner/v2/internal/imodels"
2021
"github.com/google/osv-scanner/v2/internal/utility/vulns"
2122
"github.com/ossf/osv-schema/bindings/go/osvschema"
23+
"github.com/tidwall/gjson"
2224
"google.golang.org/protobuf/encoding/protojson"
2325
)
2426

@@ -160,16 +162,22 @@ func (db *ZipDB) fetchZip(ctx context.Context) (*os.File, error) {
160162
return f, nil
161163
}
162164

163-
func mightAffectPackages(v *osvschema.Vulnerability, names []string) bool {
164-
for _, affected := range v.GetAffected() {
165-
for _, name := range names {
166-
if affected.GetPackage().GetName() == name {
167-
return true
168-
}
165+
func mightAffectPackagesBytes(content []byte, names []string) bool {
166+
affected := gjson.GetBytes(content, "affected")
167+
168+
for _, name := range affected.Get("#.package.name").Array() {
169+
if slices.Contains(names, name.String()) {
170+
return true
171+
}
172+
}
169173

170-
// "name" will be the git repository in the case of the GIT ecosystem
171-
for _, ran := range affected.GetRanges() {
172-
if vulns.NormalizeRepo(ran.GetRepo()) == vulns.NormalizeRepo(name) {
174+
for _, repos := range affected.Get("#.ranges.#.repo").Array() {
175+
for _, repo := range repos.Array() {
176+
repoName := vulns.NormalizeRepo(repo.String())
177+
178+
for _, name := range names {
179+
// "name" will be the git repository in the case of the GIT ecosystem
180+
if repoName == vulns.NormalizeRepo(name) {
173181
return true
174182
}
175183
}
@@ -197,18 +205,20 @@ func (db *ZipDB) loadZipFile(zipFile *zip.File, names []string) {
197205
return
198206
}
199207

208+
// if we have been provided a list of package names, only load advisories
209+
// that might actually affect those packages, rather than all advisories
210+
if len(names) > 0 && !mightAffectPackagesBytes(content, names) {
211+
return
212+
}
213+
200214
vulnerability := &osvschema.Vulnerability{}
201215
if err := protojson.Unmarshal(content, vulnerability); err != nil {
202216
cmdlogger.Warnf("%s is not a valid JSON file: %v", zipFile.Name, err)
203217

204218
return
205219
}
206220

207-
// if we have been provided a list of package names, only load advisories
208-
// that might actually affect those packages, rather than all advisories
209-
if len(names) == 0 || mightAffectPackages(vulnerability, names) {
210-
db.Vulnerabilities = append(db.Vulnerabilities, vulnerability)
211-
}
221+
db.Vulnerabilities = append(db.Vulnerabilities, vulnerability)
212222
}
213223

214224
// load fetches a zip archive of the OSV database and loads known vulnerabilities

internal/clients/clientimpl/localmatcher/zip_test.go

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -502,6 +502,34 @@ func TestNewZippedDB_WithSpecificPackages(t *testing.T) {
502502
{Package: &osvschema.Package{Name: "pkg-2"}},
503503
},
504504
},
505+
"GHSA-7.json": {
506+
Id: "GHSA-7",
507+
Affected: []*osvschema.Affected{
508+
{
509+
Ranges: []*osvschema.Range{
510+
{Type: osvschema.Range_SEMVER},
511+
{Type: osvschema.Range_GIT, Repo: "https://github.com/org/repo"},
512+
},
513+
},
514+
},
515+
},
516+
"GHSA-8.json": {
517+
Id: "GHSA-8",
518+
Affected: []*osvschema.Affected{
519+
{Ranges: []*osvschema.Range{{Type: osvschema.Range_SEMVER}}},
520+
{Ranges: []*osvschema.Range{{Type: osvschema.Range_GIT, Repo: "git://github.com/org/repo.git"}}},
521+
},
522+
},
523+
"GHSA-9.json": {
524+
Id: "GHSA-9",
525+
Affected: []*osvschema.Affected{
526+
{
527+
Ranges: []*osvschema.Range{
528+
{Type: osvschema.Range_GIT, Repo: "https://github.com/anotherorg/anotherrepo"},
529+
},
530+
},
531+
},
532+
},
505533
})
506534
})
507535

@@ -512,7 +540,7 @@ func TestNewZippedDB_WithSpecificPackages(t *testing.T) {
512540
ts.URL,
513541
userAgent,
514542
false,
515-
[]*extractor.Package{{Name: "pkg-1"}, {Name: "pkg-3"}},
543+
[]*extractor.Package{{Name: "pkg-1"}, {Name: "pkg-3"}, {Name: "https://github.com/org/repo"}},
516544
)
517545

518546
if err != nil {
@@ -545,5 +573,21 @@ func TestNewZippedDB_WithSpecificPackages(t *testing.T) {
545573
{Package: &osvschema.Package{Name: "pkg-2"}},
546574
},
547575
},
576+
{
577+
Id: "GHSA-7",
578+
Affected: []*osvschema.Affected{
579+
{Ranges: []*osvschema.Range{
580+
{Type: osvschema.Range_SEMVER},
581+
{Type: osvschema.Range_GIT, Repo: "https://github.com/org/repo"},
582+
}},
583+
},
584+
},
585+
{
586+
Id: "GHSA-8",
587+
Affected: []*osvschema.Affected{
588+
{Ranges: []*osvschema.Range{{Type: osvschema.Range_SEMVER}}},
589+
{Ranges: []*osvschema.Range{{Type: osvschema.Range_GIT, Repo: "git://github.com/org/repo.git"}}},
590+
},
591+
},
548592
})
549593
}

0 commit comments

Comments
 (0)