Skip to content

Commit 0165ba4

Browse files
authored
Merge pull request #51 from puerco/merge
vex.MergeDocuments()
2 parents 308feef + eb8ba79 commit 0165ba4

10 files changed

+648
-313
lines changed

pkg/vex/functions_documents.go

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
/*
2+
Copyright 2023 The OpenVEX Authors
3+
SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package vex
7+
8+
import (
9+
"crypto/sha256"
10+
"errors"
11+
"fmt"
12+
"sort"
13+
"strings"
14+
)
15+
16+
type MergeOptions struct {
17+
DocumentID string // ID to use in the new document
18+
Author string // Author to use in the new document
19+
AuthorRole string // Role of the document author
20+
Products []string // Product IDs to consider
21+
Vulnerabilities []string // IDs of vulnerabilities to merge
22+
}
23+
24+
// MergeDocuments is a convenience wrapper over MergeDocumentsWithOptions
25+
// that does not take options.
26+
func MergeDocuments(docs []*VEX) (*VEX, error) {
27+
return MergeDocumentsWithOptions(&MergeOptions{}, docs)
28+
}
29+
30+
// Merge combines the statements from a number of documents into
31+
// a new one, preserving time context from each of them.
32+
func MergeDocumentsWithOptions(mergeOpts *MergeOptions, docs []*VEX) (*VEX, error) {
33+
if len(docs) == 0 {
34+
return nil, fmt.Errorf("at least one vex document is required to merge")
35+
}
36+
37+
docID := mergeOpts.DocumentID
38+
// If no document id is specified we compute a
39+
// deterministic ID using the merged docs
40+
if docID == "" {
41+
ids := []string{}
42+
for i, d := range docs {
43+
if d.ID == "" {
44+
ids = append(ids, fmt.Sprintf("VEX-DOC-%d", i))
45+
} else {
46+
ids = append(ids, d.ID)
47+
}
48+
}
49+
50+
sort.Strings(ids)
51+
h := sha256.New()
52+
h.Write([]byte(strings.Join(ids, ":")))
53+
// Hash the sorted IDs list
54+
docID = fmt.Sprintf("merged-vex-%x", h.Sum(nil))
55+
}
56+
57+
newDoc := New()
58+
59+
newDoc.ID = docID
60+
if author := mergeOpts.Author; author != "" {
61+
newDoc.Author = author
62+
}
63+
if authorRole := mergeOpts.AuthorRole; authorRole != "" {
64+
newDoc.AuthorRole = authorRole
65+
}
66+
67+
ss := []Statement{}
68+
69+
// Create an inverse dict of products and vulnerabilities to filter
70+
// these will only be used if ids to filter on are defined in the options.
71+
iProds := map[string]struct{}{}
72+
iVulns := map[string]struct{}{}
73+
for _, id := range mergeOpts.Products {
74+
iProds[id] = struct{}{}
75+
}
76+
for _, id := range mergeOpts.Vulnerabilities {
77+
iVulns[id] = struct{}{}
78+
}
79+
80+
for _, doc := range docs {
81+
for _, s := range doc.Statements { //nolint:gocritic // this IS supposed to copy
82+
matchesProduct := false
83+
for id := range iProds {
84+
if s.MatchesProduct(id, "") {
85+
matchesProduct = true
86+
break
87+
}
88+
}
89+
if len(iProds) > 0 && !matchesProduct {
90+
continue
91+
}
92+
93+
matchesVuln := false
94+
for id := range iVulns {
95+
if s.Vulnerability.Matches(id) {
96+
matchesVuln = true
97+
break
98+
}
99+
}
100+
if len(iVulns) > 0 && !matchesVuln {
101+
continue
102+
}
103+
104+
// If statement does not have a timestamp, cascade
105+
// the timestamp down from the document.
106+
// See https://github.com/chainguard-dev/vex/issues/49
107+
if s.Timestamp == nil {
108+
if doc.Timestamp == nil {
109+
return nil, errors.New("unable to cascade timestamp from doc to timeless statement")
110+
}
111+
s.Timestamp = doc.Timestamp
112+
}
113+
114+
ss = append(ss, s)
115+
}
116+
}
117+
118+
SortStatements(ss, *newDoc.Metadata.Timestamp)
119+
120+
newDoc.Statements = ss
121+
122+
return &newDoc, nil
123+
}
124+
125+
// SortDocuments sorts and returns a slice of documents based on their date.
126+
// VEXes should be applied sequentially in chronological order as they capture
127+
// knowledge about an artifact as it changes over time.
128+
func SortDocuments(docs []*VEX) []*VEX {
129+
sort.Slice(docs, func(i, j int) bool {
130+
if docs[j].Timestamp == nil {
131+
return true
132+
}
133+
if docs[i].Timestamp == nil {
134+
return false
135+
}
136+
return docs[i].Timestamp.Before(*(docs[j].Timestamp))
137+
})
138+
return docs
139+
}

pkg/vex/functions_documents_test.go

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
/*
2+
Copyright 2023 The OpenVEX Authors
3+
SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package vex
7+
8+
import (
9+
"testing"
10+
11+
"github.com/stretchr/testify/require"
12+
)
13+
14+
func TestMergeDocumentsWithOptions(t *testing.T) {
15+
doc1, err := Open("testdata/v001-1.vex.json")
16+
require.NoError(t, err)
17+
doc2, err := Open("testdata/v001-2.vex.json")
18+
require.NoError(t, err)
19+
20+
doc3, err := Open("testdata/v020-1.vex.json")
21+
require.NoError(t, err)
22+
doc4, err := Open("testdata/v020-2.vex.json")
23+
require.NoError(t, err)
24+
25+
for _, tc := range []struct {
26+
opts MergeOptions
27+
docs []*VEX
28+
expectedDoc *VEX
29+
shouldErr bool
30+
}{
31+
// Zero docs should fail
32+
{
33+
opts: MergeOptions{},
34+
docs: []*VEX{},
35+
expectedDoc: &VEX{},
36+
shouldErr: true,
37+
},
38+
// One doc results in the same doc
39+
{
40+
opts: MergeOptions{},
41+
docs: []*VEX{doc1},
42+
expectedDoc: doc1,
43+
shouldErr: false,
44+
},
45+
// Two docs, as they are
46+
{
47+
opts: MergeOptions{},
48+
docs: []*VEX{doc1, doc2},
49+
expectedDoc: &VEX{
50+
Metadata: Metadata{},
51+
Statements: []Statement{
52+
doc1.Statements[0],
53+
doc2.Statements[0],
54+
},
55+
},
56+
shouldErr: false,
57+
},
58+
// Two docs, filter product
59+
{
60+
opts: MergeOptions{
61+
Products: []string{"pkg:apk/wolfi/[email protected]"},
62+
},
63+
docs: []*VEX{doc3, doc4},
64+
expectedDoc: &VEX{
65+
Metadata: Metadata{},
66+
Statements: []Statement{
67+
doc4.Statements[0],
68+
},
69+
},
70+
shouldErr: false,
71+
},
72+
// Two docs, filter vulnerability
73+
{
74+
opts: MergeOptions{
75+
Vulnerabilities: []string{"CVE-9876-54321"},
76+
},
77+
docs: []*VEX{doc3, doc4},
78+
expectedDoc: &VEX{
79+
Metadata: Metadata{},
80+
Statements: []Statement{
81+
doc3.Statements[0],
82+
},
83+
},
84+
shouldErr: false,
85+
},
86+
} {
87+
doc, err := MergeDocumentsWithOptions(&tc.opts, tc.docs)
88+
if tc.shouldErr {
89+
require.Error(t, err)
90+
continue
91+
}
92+
93+
// Check doc
94+
require.Len(t, doc.Statements, len(tc.expectedDoc.Statements))
95+
require.Equal(t, doc.Statements, tc.expectedDoc.Statements)
96+
}
97+
}

0 commit comments

Comments
 (0)