Skip to content

Commit 08684be

Browse files
authored
fix(vulnfeeds): only upload Debian CVEs if file has changed. (#3987)
Closes #3983
1 parent 74ae487 commit 08684be

File tree

6 files changed

+228
-90
lines changed

6 files changed

+228
-90
lines changed

deployment/clouddeploy/gke-workers/environments/oss-vdb-test/debian-cve-convert.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,5 @@ spec:
1616
value: osv-test-debian-osv
1717
- name: INPUT_GCS_BUCKET
1818
value: osv-test-cve-osv-conversion
19+
- name: NUM_WORKERS
20+
value: 256

vulnfeeds/cmd/debian/Dockerfile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,13 @@ COPY ./go.sum /src/go.sum
2222
RUN go mod download
2323

2424
COPY ./ /src/
25-
RUN go build -o debian-osv ./cmd/debian/
25+
RUN go build -o debian ./cmd/debian/
2626

2727

2828
FROM gcr.io/google.com/cloudsdktool/google-cloud-cli:alpine@sha256:af3dac7ae5f9d28fee1c2ec738f616467a99f9f23bc31a49bb8a297af5f1eabf
2929

3030
WORKDIR /root/
31-
COPY --from=GO_BUILD /src/debian-osv ./
31+
COPY --from=GO_BUILD /src/debian ./
3232
COPY ./cmd/debian/run_debian_convert.sh ./
3333

3434
RUN chmod 755 ./run_debian_convert.sh

vulnfeeds/cmd/debian/main.go

Lines changed: 104 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -2,20 +2,25 @@
22
package main
33

44
import (
5+
"context"
6+
"crypto/sha256"
57
"encoding/csv"
8+
"encoding/hex"
69
"encoding/json"
10+
"errors"
711
"flag"
812
"fmt"
913
"log/slog"
10-
1114
"net/http"
1215
"os"
1316
"path"
1417
"sort"
1518
"strconv"
1619
"strings"
20+
"sync"
1721
"time"
1822

23+
"cloud.google.com/go/storage"
1924
"github.com/google/osv/vulnfeeds/cves"
2025
"github.com/google/osv/vulnfeeds/faulttolerant"
2126
"github.com/google/osv/vulnfeeds/models"
@@ -29,12 +34,16 @@ const (
2934
debianOutputPathDefault = "debian-cve-osv"
3035
debianDistroInfoURL = "https://debian.pages.debian.net/distro-info-data/debian.csv"
3136
debianSecurityTrackerURL = "https://security-tracker.debian.org/tracker/data/json"
37+
outputBucketDefault = "debian-osv"
38+
hashMetadataKey = "sha256-hash"
3239
)
3340

3441
func main() {
3542
logger.InitGlobalLogger()
3643

3744
debianOutputPath := flag.String("output_path", debianOutputPathDefault, "Path to output OSV files.")
45+
outputBucketName := flag.String("output_bucket", outputBucketDefault, "The GCS bucket to write to.")
46+
numWorkers := flag.String("num_workers", "64", "Number of workers to process records")
3847
flag.Parse()
3948

4049
err := os.MkdirAll(*debianOutputPath, 0755)
@@ -53,15 +62,106 @@ func main() {
5362
}
5463

5564
allCVEs := vulns.LoadAllCVEs(defaultCvePath)
56-
osvCves := generateOSVFromDebianTracker(debianData, debianReleaseMap, allCVEs)
5765

58-
if err = writeToOutput(osvCves, *debianOutputPath); err != nil {
59-
logger.Fatal("Failed to write OSV output file", slog.Any("err", err))
66+
ctx := context.Background()
67+
storageClient, err := storage.NewClient(ctx)
68+
if err != nil {
69+
logger.Fatal("Failed to create storage client", slog.Any("err", err))
70+
}
71+
bkt := storageClient.Bucket(*outputBucketName)
72+
73+
var wg sync.WaitGroup
74+
vulnChan := make(chan *vulns.Vulnerability)
75+
76+
for range *numWorkers {
77+
wg.Add(1)
78+
go func() {
79+
defer wg.Done()
80+
worker(ctx, vulnChan, bkt, *debianOutputPath)
81+
}()
82+
}
83+
84+
osvCVEs := generateOSVFromDebianTracker(debianData, debianReleaseMap, allCVEs)
85+
86+
for _, v := range osvCVEs {
87+
if len(v.Affected) == 0 {
88+
logger.Warn(fmt.Sprintf("Skipping %s as no affected versions found.", v.ID), slog.String("id", v.ID))
89+
continue
90+
}
91+
vulnChan <- v
6092
}
93+
close(vulnChan)
94+
wg.Wait()
6195

6296
logger.Info("Debian CVE conversion succeeded.")
6397
}
6498

99+
func worker(ctx context.Context, vulnChan <-chan *vulns.Vulnerability, bkt *storage.BucketHandle, outputDir string) {
100+
for v := range vulnChan {
101+
debianID := v.ID
102+
if len(v.Affected) == 0 {
103+
logger.Warn(fmt.Sprintf("Skipping %s as no affected versions found.", debianID), slog.String("id", debianID))
104+
continue
105+
}
106+
107+
// Marshal before setting modified time to generate hash.
108+
buf, err := json.MarshalIndent(v, "", " ")
109+
if err != nil {
110+
logger.Error("failed to marshal vulnerability", slog.String("id", debianID), slog.Any("err", err))
111+
continue
112+
}
113+
114+
hash := sha256.Sum256(buf)
115+
hexHash := hex.EncodeToString(hash[:])
116+
117+
objName := path.Join(outputDir, debianID+".json")
118+
obj := bkt.Object(objName)
119+
120+
// Check if object exists and if hash matches.
121+
attrs, err := obj.Attrs(ctx)
122+
if err == nil {
123+
// Object exists, check hash.
124+
if attrs.Metadata != nil && attrs.Metadata[hashMetadataKey] == hexHash {
125+
logger.Info("Skipping upload, hash matches", slog.String("id", debianID))
126+
continue
127+
}
128+
} else if !errors.Is(err, storage.ErrObjectNotExist) {
129+
logger.Error("failed to get object attributes", slog.String("id", debianID), slog.Any("err", err))
130+
continue
131+
}
132+
133+
// Object does not exist or hash differs, upload.
134+
v.Modified = time.Now().UTC()
135+
buf, err = json.MarshalIndent(v, "", " ")
136+
if err != nil {
137+
logger.Error("failed to marshal vulnerability with modified time", slog.String("id", debianID), slog.Any("err", err))
138+
continue
139+
}
140+
141+
logger.Info("Uploading", slog.String("id", debianID))
142+
wc := obj.NewWriter(ctx)
143+
wc.Metadata = map[string]string{
144+
hashMetadataKey: hexHash,
145+
}
146+
wc.ContentType = "application/json"
147+
148+
if _, err := wc.Write(buf); err != nil {
149+
logger.Error("failed to write to GCS object", slog.String("id", debianID), slog.Any("err", err))
150+
// Try to close writer even if write failed.
151+
if closeErr := wc.Close(); closeErr != nil {
152+
logger.Error("failed to close GCS writer after write error", slog.String("id", debianID), slog.Any("err", closeErr))
153+
}
154+
155+
continue
156+
}
157+
158+
if err := wc.Close(); err != nil {
159+
logger.Error("failed to close GCS writer", slog.String("id", debianID), slog.Any("err", err))
160+
continue
161+
}
162+
}
163+
}
164+
65165
// generateOSVFromDebianTracker converts Debian Security Tracker entries to OSV format.
66166
func generateOSVFromDebianTracker(debianData DebianSecurityTrackerData, debianReleaseMap map[string]string, allCVEs map[cves.CVEID]cves.Vulnerability) map[string]*vulns.Vulnerability {
67167
logger.Info("Converting Debian Security Tracker data to OSV.")
@@ -100,7 +200,6 @@ func generateOSVFromDebianTracker(debianData DebianSecurityTrackerData, debianRe
100200
Vulnerability: osvschema.Vulnerability{
101201
ID: "DEBIAN-" + cveID,
102202
Upstream: []string{cveID},
103-
Modified: time.Now().UTC(),
104203
Published: allCVEs[cves.CVEID(cveID)].CVE.Published.Time,
105204
Details: cveData.Description,
106205
References: []osvschema.Reference{
@@ -198,34 +297,6 @@ func getDebianReleaseMap() (map[string]string, error) {
198297
return releaseMap, err
199298
}
200299

201-
func writeToOutput(osvCves map[string]*vulns.Vulnerability, debianOutputPath string) error {
202-
logger.Info("Writing OSV files to the output.")
203-
for cveID, osv := range osvCves {
204-
debianID := "DEBIAN-" + cveID
205-
if len(osv.Affected) == 0 {
206-
logger.Warn(fmt.Sprintf("Skipping %s as no affected versions found.", debianID), slog.String("id", debianID))
207-
continue
208-
}
209-
file, err := os.OpenFile(path.Join(debianOutputPath, debianID+".json"), os.O_CREATE|os.O_RDWR|os.O_TRUNC, 0644)
210-
if err != nil {
211-
return err
212-
}
213-
214-
encoder := json.NewEncoder(file)
215-
encoder.SetIndent("", " ")
216-
err = encoder.Encode(osv)
217-
closeErr := file.Close()
218-
if err != nil {
219-
return err
220-
}
221-
if closeErr != nil {
222-
return closeErr
223-
}
224-
}
225-
226-
return nil
227-
}
228-
229300
// downloadDebianSecurityTracker download Debian json file
230301
func downloadDebianSecurityTracker() (DebianSecurityTrackerData, error) {
231302
res, err := faulttolerant.Get(debianSecurityTrackerURL)

vulnfeeds/cmd/debian/run_debian_convert.sh

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,10 @@
99
set -e
1010

1111
OSV_OUTPUT_PATH="debian-cve-osv"
12-
INPUT_BUCKET="${INPUT_GCS_BUCKET:=cve-osv-conversion}"
13-
OUTPUT_BUCKET="${OUTPUT_GCS_BUCKET:=debian-osv}"
12+
INPUT_BUCKET="${INPUT_GCS_BUCKET:=osv-test-cve-osv-conversion}"
13+
OUTPUT_BUCKET="${OUTPUT_GCS_BUCKET:=osv-test-debian-osv}"
1414
CVE_OUTPUT="cve_jsons/"
15-
15+
WORKERS="${NUM_WORKERS:=256}"
1616

1717
echo "Setup initial directories ${OSV_OUTPUT_PATH}"
1818
rm -rf $OSV_OUTPUT_PATH && mkdir -p $OSV_OUTPUT_PATH
@@ -22,7 +22,5 @@ echo "Begin syncing NVD data from GCS bucket ${INPUT_BUCKET}"
2222
gcloud --no-user-output-enabled storage -q cp "gs://${INPUT_BUCKET}/nvd/*-????.json" "${CVE_OUTPUT}"
2323
echo "Successfully synced from GCS bucket"
2424

25-
./debian-osv
26-
echo "Begin Syncing with cloud, GCS bucket: ${OUTPUT_BUCKET}"
27-
gsutil -q -m rsync -c -d $OSV_OUTPUT_PATH "gs://$OUTPUT_BUCKET/$OSV_OUTPUT_PATH"
28-
echo "Successfully synced with cloud"
25+
./debian-osv -output_bucket "$OUTPUT_BUCKET" -output_path "$OSV_OUTPUT_PATH" -num_workers "$WORKERS"
26+
echo "Successfully converted and uploaded to cloud"

vulnfeeds/go.mod

Lines changed: 33 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ go 1.24.6
44

55
require (
66
cloud.google.com/go/secretmanager v1.15.0
7+
cloud.google.com/go/storage v1.56.2
78
github.com/aquasecurity/go-pep440-version v0.0.1
89
github.com/atombender/go-jsonschema v0.20.0
910
github.com/charmbracelet/lipgloss v1.0.0
@@ -18,29 +19,41 @@ require (
1819
)
1920

2021
require (
21-
cloud.google.com/go/auth v0.16.2 // indirect
22+
cel.dev/expr v0.24.0 // indirect
23+
cloud.google.com/go v0.121.6 // indirect
24+
cloud.google.com/go/auth v0.16.5 // indirect
2225
cloud.google.com/go/auth/oauth2adapt v0.2.8 // indirect
23-
cloud.google.com/go/compute/metadata v0.7.0 // indirect
26+
cloud.google.com/go/compute/metadata v0.8.0 // indirect
2427
cloud.google.com/go/iam v1.5.2 // indirect
28+
cloud.google.com/go/monitoring v1.24.2 // indirect
2529
dario.cat/mergo v1.0.2 // indirect
30+
github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.27.0 // indirect
31+
github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/metric v0.53.0 // indirect
32+
github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapping v0.53.0 // indirect
2633
github.com/Microsoft/go-winio v0.6.2 // indirect
2734
github.com/ProtonMail/go-crypto v1.1.6 // indirect
2835
github.com/aquasecurity/go-version v0.0.1 // indirect
2936
github.com/aymanbagabas/go-osc52/v2 v2.0.1 // indirect
37+
github.com/cespare/xxhash/v2 v2.3.0 // indirect
3038
github.com/charmbracelet/x/ansi v0.4.5 // indirect
3139
github.com/cloudflare/circl v1.6.1 // indirect
40+
github.com/cncf/xds/go v0.0.0-20250501225837-2ac532fd4443 // indirect
3241
github.com/cyphar/filepath-securejoin v0.4.1 // indirect
3342
github.com/emirpasic/gods v1.18.1 // indirect
43+
github.com/envoyproxy/go-control-plane/envoy v1.32.4 // indirect
44+
github.com/envoyproxy/protoc-gen-validate v1.2.1 // indirect
3445
github.com/felixge/httpsnoop v1.0.4 // indirect
3546
github.com/go-git/gcfg v1.5.1-0.20230307220236-3a3c6141e376 // indirect
3647
github.com/go-git/go-billy/v5 v5.6.2 // indirect
37-
github.com/go-logr/logr v1.4.2 // indirect
48+
github.com/go-jose/go-jose/v4 v4.0.5 // indirect
49+
github.com/go-logr/logr v1.4.3 // indirect
3850
github.com/go-logr/stdr v1.2.2 // indirect
3951
github.com/goccy/go-yaml v1.18.0 // indirect
4052
github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8 // indirect
4153
github.com/google/s2a-go v0.1.9 // indirect
54+
github.com/google/uuid v1.6.0 // indirect
4255
github.com/googleapis/enterprise-certificate-proxy v0.3.6 // indirect
43-
github.com/googleapis/gax-go/v2 v2.14.2 // indirect
56+
github.com/googleapis/gax-go/v2 v2.15.0 // indirect
4457
github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 // indirect
4558
github.com/kevinburke/ssh_config v1.2.0 // indirect
4659
github.com/lucasb-eyer/go-colorful v1.2.0 // indirect
@@ -49,30 +62,36 @@ require (
4962
github.com/package-url/packageurl-go v0.1.3 // indirect
5063
github.com/pjbgf/sha1cd v0.3.2 // indirect
5164
github.com/pkg/errors v0.9.1 // indirect
65+
github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 // indirect
5266
github.com/rivo/uniseg v0.4.7 // indirect
5367
github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3 // indirect
5468
github.com/skeema/knownhosts v1.3.1 // indirect
69+
github.com/spiffe/go-spiffe/v2 v2.5.0 // indirect
5570
github.com/xanzy/ssh-agent v0.3.3 // indirect
71+
github.com/zeebo/errs v1.4.0 // indirect
5672
go.opentelemetry.io/auto/sdk v1.1.0 // indirect
73+
go.opentelemetry.io/contrib/detectors/gcp v1.36.0 // indirect
5774
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.61.0 // indirect
5875
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0 // indirect
5976
go.opentelemetry.io/otel v1.36.0 // indirect
6077
go.opentelemetry.io/otel/metric v1.36.0 // indirect
78+
go.opentelemetry.io/otel/sdk v1.36.0 // indirect
79+
go.opentelemetry.io/otel/sdk/metric v1.36.0 // indirect
6180
go.opentelemetry.io/otel/trace v1.36.0 // indirect
62-
golang.org/x/crypto v0.39.0 // indirect
81+
golang.org/x/crypto v0.41.0 // indirect
6382
golang.org/x/exp v0.0.0-20250819193227-8b4c13bb791b // indirect
64-
golang.org/x/net v0.41.0 // indirect
83+
golang.org/x/net v0.43.0 // indirect
6584
golang.org/x/oauth2 v0.30.0 // indirect
6685
golang.org/x/sync v0.16.0 // indirect
67-
golang.org/x/sys v0.33.0 // indirect
68-
golang.org/x/text v0.26.0 // indirect
86+
golang.org/x/sys v0.35.0 // indirect
87+
golang.org/x/text v0.28.0 // indirect
6988
golang.org/x/time v0.12.0 // indirect
7089
golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da // indirect
71-
google.golang.org/api v0.237.0 // indirect
72-
google.golang.org/genproto v0.0.0-20250505200425-f936aa4a68b2 // indirect
73-
google.golang.org/genproto/googleapis/api v0.0.0-20250603155806-513f23925822 // indirect
74-
google.golang.org/genproto/googleapis/rpc v0.0.0-20250603155806-513f23925822 // indirect
75-
google.golang.org/grpc v1.73.0 // indirect
76-
google.golang.org/protobuf v1.36.6 // indirect
90+
google.golang.org/api v0.247.0 // indirect
91+
google.golang.org/genproto v0.0.0-20250603155806-513f23925822 // indirect
92+
google.golang.org/genproto/googleapis/api v0.0.0-20250818200422-3122310a409c // indirect
93+
google.golang.org/genproto/googleapis/rpc v0.0.0-20250818200422-3122310a409c // indirect
94+
google.golang.org/grpc v1.74.2 // indirect
95+
google.golang.org/protobuf v1.36.7 // indirect
7796
gopkg.in/warnings.v0 v0.1.2 // indirect
7897
)

0 commit comments

Comments
 (0)