Skip to content

Commit 81f681c

Browse files
authored
refactor(vulnfeeds): Uncouple Alpine vulns (#3954)
1 parent bd0d026 commit 81f681c

File tree

10 files changed

+230
-177
lines changed

10 files changed

+230
-177
lines changed

deployment/clouddeploy/gke-workers/base/alpine-cve-convert.yaml

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,10 @@ spec:
1212
activeDeadlineSeconds: 3600
1313
template:
1414
spec:
15+
tolerations:
16+
- key: workloadType
17+
operator: Equal
18+
value: highend
1519
containers:
1620
- name: alpine-cve-convert
1721
image: alpine-cve-convert
@@ -20,11 +24,11 @@ spec:
2024
privileged: true
2125
resources:
2226
requests:
23-
cpu: "1"
24-
memory: "1G"
27+
cpu: "2"
28+
memory: "16G"
2529
limits:
26-
cpu: "1"
27-
memory: "2G"
30+
cpu: "4"
31+
memory: "16G"
2832
restartPolicy: Never
2933
volumes:
3034
- name: "ssd"

deployment/clouddeploy/gke-workers/environments/oss-vdb-test/alpine-cve-convert.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,3 +14,7 @@ spec:
1414
value: oss-vdb-test
1515
- name: OUTPUT_GCS_BUCKET
1616
value: osv-test-cve-osv-conversion
17+
- name: INPUT_GCS_BUCKET
18+
value: osv-test-cve-osv-conversion
19+
- name: NUM_WORKERS
20+
value: '256'

deployment/clouddeploy/gke-workers/environments/oss-vdb/alpine-cve-convert.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,3 +14,7 @@ spec:
1414
value: oss-vdb
1515
- name: OUTPUT_GCS_BUCKET
1616
value: cve-osv-conversion
17+
- name: INPUT_GCS_BUCKET
18+
value: cve-osv-conversion
19+
- name: NUM_WORKERS
20+
value: '256'

vulnfeeds/cmd/alpine/Dockerfile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,13 @@ COPY ./go.sum /src/go.sum
2222
RUN go mod download
2323

2424
COPY ./ /src/
25-
RUN go build -o alpine-osv ./cmd/alpine/
25+
RUN go build -o alpine ./cmd/alpine/
2626

2727

2828
FROM gcr.io/google.com/cloudsdktool/google-cloud-cli:alpine@sha256:cdac858d976cb0e6bfdc3288fee5a0a7bf6348a009089be130b2009e28463c52
2929

3030
WORKDIR /root/
31-
COPY --from=GO_BUILD /src/alpine-osv ./
31+
COPY --from=GO_BUILD /src/alpine ./
3232
COPY ./cmd/alpine/run_alpine_convert.sh ./
3333

3434
ENTRYPOINT ["/root/run_alpine_convert.sh"]

vulnfeeds/cmd/alpine/main.go

Lines changed: 104 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -2,26 +2,34 @@
22
package main
33

44
import (
5+
"context"
56
"encoding/json"
67
"flag"
78
"fmt"
89
"io"
910
"log/slog"
1011
"net/http"
1112
"os"
12-
"path"
1313
"regexp"
14+
"sort"
1415
"strings"
16+
"sync"
17+
"time"
1518

19+
"cloud.google.com/go/storage"
20+
"github.com/google/osv/vulnfeeds/cves"
1621
"github.com/google/osv/vulnfeeds/models"
1722
"github.com/google/osv/vulnfeeds/utility/logger"
1823
"github.com/google/osv/vulnfeeds/vulns"
24+
"github.com/ossf/osv-schema/bindings/go/osvschema"
1925
)
2026

2127
const (
2228
alpineURLBase = "https://secdb.alpinelinux.org/%s/main.json"
2329
alpineIndexURL = "https://secdb.alpinelinux.org/"
24-
alpineOutputPathDefault = "parts/alpine"
30+
alpineOutputPathDefault = "alpine"
31+
defaultCvePath = "cve_jsons"
32+
outputBucketDefault = "osv-test-cve-osv-conversion"
2533
)
2634

2735
func main() {
@@ -31,15 +39,51 @@ func main() {
3139
"alpineOutput",
3240
alpineOutputPathDefault,
3341
"path to output general alpine affected package information")
42+
outputBucketName := flag.String("output_bucket", outputBucketDefault, "The GCS bucket to write to.")
43+
numWorkers := flag.Int("num_workers", 64, "Number of workers to process records")
44+
uploadToGCS := flag.Bool("uploadToGCS", false, "If true, do not write to GCS bucket and instead write to local disk.")
3445
flag.Parse()
3546

3647
err := os.MkdirAll(*alpineOutputPath, 0755)
3748
if err != nil {
3849
logger.Fatal("Can't create output path", slog.Any("err", err))
3950
}
4051

52+
allCVEs := vulns.LoadAllCVEs(defaultCvePath)
4153
allAlpineSecDB := getAlpineSecDBData()
42-
generateAlpineOSV(allAlpineSecDB, *alpineOutputPath)
54+
osvVulnerabilities := generateAlpineOSV(allAlpineSecDB, allCVEs)
55+
56+
ctx := context.Background()
57+
var bkt *storage.BucketHandle
58+
if *uploadToGCS {
59+
storageClient, err := storage.NewClient(ctx)
60+
if err != nil {
61+
logger.Fatal("Failed to create storage client", slog.Any("err", err))
62+
}
63+
bkt = storageClient.Bucket(*outputBucketName)
64+
}
65+
var wg sync.WaitGroup
66+
vulnChan := make(chan *vulns.Vulnerability)
67+
68+
for range *numWorkers {
69+
wg.Add(1)
70+
go func() {
71+
defer wg.Done()
72+
vulns.Worker(ctx, vulnChan, bkt, *alpineOutputPath)
73+
}()
74+
}
75+
76+
for _, v := range osvVulnerabilities {
77+
if len(v.Affected) == 0 {
78+
logger.Warn(fmt.Sprintf("Skipping %s as no affected versions found.", v.ID), slog.String("id", v.ID))
79+
continue
80+
}
81+
vulnChan <- v
82+
}
83+
84+
close(vulnChan)
85+
wg.Wait()
86+
logger.Info("Alpine CVE conversion succeeded.")
4387
}
4488

4589
// getAllAlpineVersions gets all available version name in alpine secdb
@@ -55,7 +99,7 @@ func getAllAlpineVersions() []string {
5599
logger.Fatal("Failed to get alpine index page", slog.Any("err", err))
56100
}
57101

58-
exp := regexp.MustCompile("href=\"(v[\\d.]*)/\"")
102+
exp := regexp.MustCompile(`href="(v[\d.]*)/"`)
59103

60104
searchRes := exp.FindAllStringSubmatch(buf.String(), -1)
61105
alpineVersions := make([]string, 0, len(searchRes))
@@ -87,8 +131,9 @@ func getAlpineSecDBData() map[string][]VersionAndPkg {
87131
cveID = strings.Split(cveID, " ")[0]
88132

89133
if !validVersion(version) {
90-
logger.Warn("Invalid alpine version",
91-
slog.String("version", version),
134+
logger.Warn(fmt.Sprintf("[%s] Invalid alpine version: '%s', on package: '%s', and alpine version: '%s'",
135+
cveID, version, pkg.Pkg.Name,
136+
alpineVer), slog.String("version", version),
92137
slog.String("package", pkg.Pkg.Name),
93138
slog.String("alpine_version", alpineVer),
94139
)
@@ -111,9 +156,53 @@ func getAlpineSecDBData() map[string][]VersionAndPkg {
111156
}
112157

113158
// generateAlpineOSV generates the generic PackageInfo package from the information given by alpine advisory
114-
func generateAlpineOSV(allAlpineSecDb map[string][]VersionAndPkg, alpineOutputPath string) {
115-
for cveID, verPkgs := range allAlpineSecDb {
116-
pkgInfos := make([]vulns.PackageInfo, 0, len(verPkgs))
159+
func generateAlpineOSV(allAlpineSecDb map[string][]VersionAndPkg, allCVEs map[cves.CVEID]cves.Vulnerability) (osvVulnerabilities []*vulns.Vulnerability) {
160+
cveIDs := make([]string, 0, len(allAlpineSecDb))
161+
for cveID := range allAlpineSecDb {
162+
cveIDs = append(cveIDs, cveID)
163+
}
164+
sort.Strings(cveIDs)
165+
166+
for _, cveID := range cveIDs {
167+
verPkgs := allAlpineSecDb[cveID]
168+
sort.Slice(verPkgs, func(i, j int) bool {
169+
if verPkgs[i].Pkg != verPkgs[j].Pkg {
170+
return verPkgs[i].Pkg < verPkgs[j].Pkg
171+
}
172+
if verPkgs[i].AlpineVer != verPkgs[j].AlpineVer {
173+
return verPkgs[i].AlpineVer < verPkgs[j].AlpineVer
174+
}
175+
176+
return verPkgs[i].Ver < verPkgs[j].Ver
177+
})
178+
cve, ok := allCVEs[cves.CVEID(cveID)]
179+
var published time.Time
180+
var details string
181+
if ok {
182+
published = cve.CVE.Published.Time
183+
if len(cve.CVE.Descriptions) > 0 {
184+
details = cve.CVE.Descriptions[0].Value
185+
}
186+
} else {
187+
// TODO: add support for non-CVE reports
188+
logger.Warn(fmt.Sprintf("CVE %s not found in cve_jsons", cveID), slog.String("cveID", cveID))
189+
continue
190+
}
191+
192+
v := &vulns.Vulnerability{
193+
Vulnerability: osvschema.Vulnerability{
194+
ID: "ALPINE-" + cveID,
195+
Upstream: []string{cveID},
196+
Published: published,
197+
Details: details,
198+
References: []osvschema.Reference{
199+
{
200+
Type: "ADVISORY",
201+
URL: "https://security.alpinelinux.org/vuln/" + cveID,
202+
},
203+
},
204+
},
205+
}
117206

118207
for _, verPkg := range verPkgs {
119208
pkgInfo := vulns.PackageInfo{
@@ -124,23 +213,17 @@ func generateAlpineOSV(allAlpineSecDb map[string][]VersionAndPkg, alpineOutputPa
124213
Ecosystem: "Alpine:" + verPkg.AlpineVer,
125214
PURL: "pkg:apk/alpine/" + verPkg.Pkg + "?arch=source",
126215
}
127-
pkgInfos = append(pkgInfos, pkgInfo)
216+
v.AddPkgInfo(pkgInfo)
128217
}
129218

130-
file, err := os.OpenFile(path.Join(alpineOutputPath, cveID+".alpine.json"), os.O_CREATE|os.O_RDWR, 0644)
131-
if err != nil {
132-
logger.Fatal("Failed to create/write osv output file", slog.Any("err", err))
133-
}
134-
encoder := json.NewEncoder(file)
135-
encoder.SetIndent("", " ")
136-
err = encoder.Encode(&pkgInfos)
137-
if err != nil {
138-
logger.Fatal("Failed to encode package info output file", slog.Any("err", err))
219+
if len(v.Affected) == 0 {
220+
logger.Warn(fmt.Sprintf("Skipping %s as no affected versions found.", v.ID), slog.String("cveID", cveID))
221+
continue
139222
}
140-
_ = file.Close()
223+
osvVulnerabilities = append(osvVulnerabilities, v)
141224
}
142225

143-
logger.Info("Finished")
226+
return osvVulnerabilities
144227
}
145228

146229
// downloadAlpine downloads Alpine SecDB data from their API

vulnfeeds/cmd/alpine/run_alpine_convert.sh

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,20 @@
88

99
set -e
1010

11-
OSV_PARTS_OUTPUT="parts/alpine"
12-
OUTPUT_BUCKET="${OUTPUT_GCS_BUCKET:=cve-osv-conversion}"
11+
OSV_OUTPUT_PATH="alpine"
12+
INPUT_BUCKET="${INPUT_GCS_BUCKET:=osv-test-cve-osv-conversion}"
13+
OUTPUT_BUCKET="${OUTPUT_GCS_BUCKET:=osv-test-cve-osv-conversion}"
14+
CVE_OUTPUT="cve_jsons/"
15+
WORKERS="${NUM_WORKERS:=256}"
1316

14-
echo "Setup initial directories"
15-
rm -rf $OSV_PARTS_OUTPUT && mkdir -p $OSV_PARTS_OUTPUT
1617

17-
./alpine-osv
18-
echo "Begin Syncing with cloud"
19-
gsutil -q -m rsync -c -d $OSV_PARTS_OUTPUT "gs://$OUTPUT_BUCKET/$OSV_PARTS_OUTPUT"
20-
echo "Successfully synced with cloud"
18+
echo "Setup initial directories ${OSV_OUTPUT_PATH}"
19+
rm -rf $OSV_OUTPUT_PATH && mkdir -p $OSV_OUTPUT_PATH
20+
rm -rf $CVE_OUTPUT && mkdir -p $CVE_OUTPUT
21+
22+
echo "Begin syncing NVD data from GCS bucket ${INPUT_BUCKET}"
23+
gcloud --no-user-output-enabled storage -q cp "gs://${INPUT_BUCKET}/nvd/*-????.json" "${CVE_OUTPUT}"
24+
echo "Successfully synced from GCS bucket"
25+
26+
./alpine -output_bucket "$OUTPUT_BUCKET" -output_path "$OSV_OUTPUT_PATH" -num_workers "$WORKERS" -uploadToGCS
27+
echo "Successfully converted and uploaded to cloud"

vulnfeeds/cmd/combine-to-osv/main.go

Lines changed: 4 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ import (
55
"encoding/json"
66
"flag"
77
"log/slog"
8-
"net/url"
98
"os"
109
"path"
1110
"strings"
@@ -14,7 +13,6 @@ import (
1413
"github.com/google/osv/vulnfeeds/cves"
1514
"github.com/google/osv/vulnfeeds/utility/logger"
1615
"github.com/google/osv/vulnfeeds/vulns"
17-
"github.com/ossf/osv-schema/bindings/go/osvschema"
1816
)
1917

2018
const (
@@ -23,9 +21,8 @@ const (
2321
defaultOSVOutputPath = "osv_output"
2422
defaultCVEListPath = "."
2523

26-
alpineEcosystem = "Alpine"
27-
debianEcosystem = "Debian"
28-
alpineSecurityTrackerURL = "https://security.alpinelinux.org/vuln"
24+
alpineEcosystem = "Alpine"
25+
debianEcosystem = "Debian"
2926
)
3027

3128
func main() {
@@ -165,17 +162,12 @@ func combineIntoOSV(loadedCves map[cves.CVEID]cves.Vulnerability, allParts map[c
165162
}
166163
}
167164

168-
addedAlpineURL := false
169165
for _, pkgInfo := range allParts[cveID] {
170-
// skip debian parts, but still write out the CVEs.
171-
if strings.HasPrefix(pkgInfo.Ecosystem, debianEcosystem) {
166+
// skip debian and alpine parts, but still write out the CVEs.
167+
if strings.HasPrefix(pkgInfo.Ecosystem, debianEcosystem) || strings.HasPrefix(pkgInfo.Ecosystem, alpineEcosystem) {
172168
continue
173169
}
174170
convertedCve.AddPkgInfo(pkgInfo)
175-
if strings.HasPrefix(pkgInfo.Ecosystem, alpineEcosystem) && !addedAlpineURL {
176-
addReference(string(cveID), alpineEcosystem, convertedCve)
177-
addedAlpineURL = true
178-
}
179171
}
180172

181173
cveModified := convertedCve.Modified
@@ -207,17 +199,3 @@ func writeOSVFile(osvData map[cves.CVEID]*vulns.Vulnerability, osvOutputPath str
207199

208200
logger.Info("Successfully written OSV files", slog.Int("count", len(osvData)))
209201
}
210-
211-
// addReference adds the related security tracker URL to a given vulnerability's references
212-
func addReference(cveID string, ecosystem string, convertedCve *vulns.Vulnerability) {
213-
securityReference := osvschema.Reference{Type: osvschema.ReferenceAdvisory}
214-
if ecosystem == alpineEcosystem {
215-
securityReference.URL, _ = url.JoinPath(alpineSecurityTrackerURL, cveID)
216-
}
217-
218-
if securityReference.URL == "" {
219-
return
220-
}
221-
222-
convertedCve.References = append(convertedCve.References, securityReference)
223-
}

vulnfeeds/cmd/combine-to-osv/main_test.go

Lines changed: 0 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -99,32 +99,6 @@ func TestCombineIntoOSV(t *testing.T) {
9999
if actualCombined != expectedCombined {
100100
t.Errorf("Expected %d in combination, got %d: %#v", expectedCombined, actualCombined, combinedOSV)
101101
}
102-
for cve := range cveStuff {
103-
if len(combinedOSV[cve].Affected) != len(allParts[cve]) {
104-
t.Errorf("Affected lengths for %s do not match", cve)
105-
}
106-
107-
found := false
108-
switch cve {
109-
case "CVE-2022-33745":
110-
for _, reference := range combinedOSV[cve].References {
111-
if reference.Type == "ADVISORY" &&
112-
reference.URL == "https://security.alpinelinux.org/vuln/CVE-2022-33745" {
113-
found = true
114-
}
115-
}
116-
case "CVE-2022-32746":
117-
for _, reference := range combinedOSV[cve].References {
118-
if reference.Type == "ADVISORY" &&
119-
reference.URL == "https://security.alpinelinux.org/vuln/CVE-2022-32746" {
120-
found = true
121-
}
122-
}
123-
}
124-
if !found {
125-
t.Errorf("%s doesn't have all expected references", cve)
126-
}
127-
}
128102
}
129103
func TestGetModifiedTime(t *testing.T) {
130104
_, err := getModifiedTime("../../test_data/parts/debian/CVE-2016-1585.debian.json")

0 commit comments

Comments
 (0)