Skip to content

Commit ef99b5d

Browse files
committed
clean: Simplify and correct malicious packages scanner implementation
1 parent 0401b2a commit ef99b5d

File tree

3 files changed

+237
-410
lines changed

3 files changed

+237
-410
lines changed
Lines changed: 231 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,231 @@
1+
package tool
2+
3+
import (
4+
"compress/gzip"
5+
"encoding/json"
6+
"fmt"
7+
"os"
8+
"slices"
9+
"strings"
10+
11+
ptypes "github.com/aquasecurity/trivy/pkg/types"
12+
codacy "github.com/codacy/codacy-engine-golang-seed/v6"
13+
"github.com/samber/lo"
14+
"golang.org/x/mod/semver"
15+
)
16+
17+
const maliciousPackagesIndexPath = "/dist/cache/codacy-trivy/openssf-malicious-packages-index.json.gz"
18+
19+
// maliciousPackage represents a shallow representation of an Open Source Vulnerability (OSV).
20+
// Although it's schema is generic, it is guaranteed that it is only instantiated for Malicious Package vulnerabilities.
21+
//
22+
// See https://ossf.github.io/osv-schema/
23+
type maliciousPackage struct {
24+
// OpenSSF identifier of the malicious package.
25+
ID string `json:"id"`
26+
// A summary of why the package is malicious.
27+
Summary string `json:"summary"`
28+
// The versions of the malicious package.
29+
// The version syntax is the one defined by the package ecosystem where the malicious package is deployed.
30+
Versions []string `json:"versions"`
31+
// The range of versions considered malicious.
32+
// This is usually defined if `Versions` is empty, but sometimes both are defined.
33+
Ranges []maliciousPackageRange `json:"ranges"`
34+
}
35+
36+
// matchesVersion checks if the reported malicious package versions match version.
37+
//
38+
// `Ranges` is only checked if there is no direct match in `Versions`.
39+
func (o maliciousPackage) matchesVersion(version string) bool {
40+
if slices.Contains(o.Versions, version) {
41+
return true
42+
}
43+
for _, affectedRange := range o.Ranges {
44+
if affectedRange.matchesVersion(version) {
45+
return true
46+
}
47+
}
48+
return false
49+
}
50+
51+
// maliciousPackageRange represents range of versions considered malicious.
52+
//
53+
// See https://ossf.github.io/osv-schema/#affectedranges-field
54+
type maliciousPackageRange struct {
55+
Type string `json:"type"`
56+
Events []maliciousPackageRangeEvent `json:"events"`
57+
}
58+
59+
// matchesVersion checks if version matches any of the range events but only if range is of type '[SEMVER]'.
60+
//
61+
// [SEMVER]: https://ossf.github.io/osv-schema/#affectedrangestype-field
62+
func (r maliciousPackageRange) matchesVersion(version string) bool {
63+
if r.Type != "SEMVER" {
64+
return false
65+
}
66+
67+
for _, event := range r.Events {
68+
if event.matchesVersion(version) {
69+
return true
70+
}
71+
}
72+
return false
73+
}
74+
75+
// maliciousPackageRangeEvent describes a version that either fixed or introduced a vulnerability.
76+
//
77+
// See https://ossf.github.io/osv-schema/#affectedrangesevents-fields
78+
type maliciousPackageRangeEvent struct {
79+
Introduced string `json:"introduced,omitempty"`
80+
Fixed string `json:"fixed,omitempty"`
81+
}
82+
83+
// matchesVersion checks if version is after Introduced or before Fixed.
84+
//
85+
// According to [OSV schema], either 'fixed' or 'introduced' are defined in an event, but not both.
86+
//
87+
// [OSV schema]: https://ossf.github.io/osv-schema/#requirements
88+
func (e maliciousPackageRangeEvent) matchesVersion(version string) bool {
89+
if e.Introduced != "" {
90+
return semverCompare(version, e.Fixed) >= 0
91+
}
92+
if e.Fixed != "" {
93+
return semverCompare(version, e.Fixed) < 0
94+
}
95+
return false
96+
}
97+
98+
// maliciousPackagesByEcosystemAndName maps ecosystem names to vulnerable packages.
99+
type maliciousPackagesByEcosystemAndName map[string]maliciousPackagesByName
100+
101+
// maliciousPackagesByName maps malicious package names to their OSV entries.
102+
type maliciousPackagesByName map[string][]maliciousPackage
103+
104+
// MaliciousPackagesScanner handles scanning for malicious packages.
105+
// It expects an index of data in the OSV format.
106+
//
107+
// See https://ossf.github.io/osv-schema/
108+
type MaliciousPackagesScanner struct {
109+
index maliciousPackagesByEcosystemAndName
110+
}
111+
112+
// NewMaliciousPackagesScanner creates a new OpenSSF malicious packages scanner and loads
113+
// malicious data from disk, as defined by the build process of this tool.
114+
func NewMaliciousPackagesScanner() (*MaliciousPackagesScanner, error) {
115+
index, err := loadIndex(maliciousPackagesIndexPath)
116+
if err != nil {
117+
return nil, err
118+
}
119+
120+
return &MaliciousPackagesScanner{index: index}, nil
121+
}
122+
123+
// Scans the given Trivy report for malicious packages.
124+
func (s MaliciousPackagesScanner) Scan(report ptypes.Report, toolExecution codacy.ToolExecution) []codacy.Result {
125+
maliciousPackagesEnabled := lo.SomeBy(*toolExecution.Patterns, func(p codacy.Pattern) bool {
126+
return p.ID == ruleIDMaliciousPackages
127+
})
128+
if !maliciousPackagesEnabled {
129+
return []codacy.Result{}
130+
}
131+
132+
var issues []codacy.Issue
133+
for _, result := range report.Results {
134+
for _, pkg := range result.Packages {
135+
// For now we require PURL to be defined, but in the future we can try to infer it.
136+
if pkg.Identifier.PURL == nil {
137+
continue
138+
}
139+
140+
pkgEcosystem := osvPackageEcosystem(pkg.Identifier.PURL.Type)
141+
maliciousPkgs, ok := s.index[pkgEcosystem]
142+
if !ok {
143+
continue
144+
}
145+
maliciousPkg, ok := maliciousPkgs[strings.ToLower(pkg.Name)]
146+
if !ok {
147+
continue
148+
}
149+
150+
for _, candidate := range maliciousPkg {
151+
if pkg.Version != "" && candidate.matchesVersion(pkg.Version) {
152+
153+
var lineNumber int
154+
if len(pkg.Locations) > 0 {
155+
lineNumber = pkg.Locations[0].StartLine
156+
} else {
157+
lineNumber = fallbackSearchForLineNumber(toolExecution.SourceDir, result.Target, pkg.Name)
158+
}
159+
160+
issue := codacy.Issue{
161+
File: result.Target,
162+
Line: lineNumber,
163+
Message: fmt.Sprintf("%s - %s@%s", candidate.Summary, pkg.Name, pkg.Version),
164+
PatternID: ruleIDMaliciousPackages,
165+
SourceID: candidate.ID,
166+
}
167+
issues = append(issues, issue)
168+
}
169+
}
170+
171+
}
172+
}
173+
174+
return mapIssuesWithoutLineNumber(filterIssuesFromKnownFiles(issues, *toolExecution.Files))
175+
}
176+
177+
// loadIndex attempts to load into memory the gzipped prebuilt index.
178+
func loadIndex(indexPath string) (maliciousPackagesByEcosystemAndName, error) {
179+
f, err := os.Open(indexPath)
180+
if err != nil {
181+
return nil, &ToolError{msg: "Failed to open malicious package index", w: err}
182+
}
183+
defer f.Close()
184+
185+
gz, err := gzip.NewReader(f)
186+
if err != nil {
187+
return nil, &ToolError{msg: "Failed to read malicious package index", w: err}
188+
}
189+
defer gz.Close()
190+
191+
var idx maliciousPackagesByEcosystemAndName
192+
if err := json.NewDecoder(gz).Decode(&idx); err != nil {
193+
return nil, &ToolError{msg: "Failed to decode malicious package index", w: err}
194+
}
195+
return idx, nil
196+
}
197+
198+
// semverCompare compares two versions, handling both with and without "v" prefix.
199+
//
200+
// See [semver.Compare] documentation.
201+
//
202+
// [semver.Compare]: https://pkg.go.dev/golang.org/x/mod/semver#Compare
203+
func semverCompare(v1, v2 string) int {
204+
// Ensure versions have "v" prefix for semver.Compare
205+
normalizeVersion := func(version string) string {
206+
if !strings.HasPrefix(version, "v") {
207+
return "v" + version
208+
}
209+
return version
210+
}
211+
212+
// Ensure both versions have consistent prefix handling
213+
return semver.Compare(normalizeVersion(v1), normalizeVersion(v2))
214+
}
215+
216+
// osvPackageEcosystem returns the corresponding Ecosystem defined by the OSV schema, for the PURL type of a package identified by Trivy.
217+
//
218+
// See https://ossf.github.io/osv-schema/#affectedpackage-field
219+
func osvPackageEcosystem(purlType string) string {
220+
lowerPurlType := strings.ToLower(purlType)
221+
switch lowerPurlType {
222+
case "golang":
223+
return "go"
224+
case "gem":
225+
return "rubygems"
226+
case "cargo":
227+
return "crates.io"
228+
default:
229+
return lowerPurlType
230+
}
231+
}

0 commit comments

Comments
 (0)