openvex
diff --git a/‎pkg/vex/functions_documents.go
Lines changed: 139 additions & 0 deletions b/‎pkg/vex/functions_documents.go
Lines changed: 139 additions & 0 deletions
diff --git a/‎pkg/vex/functions_documents_test.go
Lines changed: 97 additions & 0 deletions b/‎pkg/vex/functions_documents_test.go
Lines changed: 97 additions & 0 deletions
@@ -0,0 +1,139 @@
+/*
+Copyright 2023 The OpenVEX Authors
+SPDX-License-Identifier: Apache-2.0
+*/
+
+package vex
+
+import (
+	"crypto/sha256"
+	"errors"
+	"fmt"
+	"sort"
+	"strings"
+)
+
+type MergeOptions struct {
+	DocumentID      string   // ID to use in the new document
+	Author          string   // Author to use in the new document
+	AuthorRole      string   // Role of the document author
+	Products        []string // Product IDs to consider
+	Vulnerabilities []string // IDs of vulnerabilities to merge
+}
+
+// MergeDocuments is a convenience wrapper over MergeDocumentsWithOptions
+// that does not take options.
+func MergeDocuments(docs []*VEX) (*VEX, error) {
+	return MergeDocumentsWithOptions(&MergeOptions{}, docs)
+}
+
+// Merge combines the statements from a number of documents into
+// a new one, preserving time context from each of them.
+func MergeDocumentsWithOptions(mergeOpts *MergeOptions, docs []*VEX) (*VEX, error) {
+	if len(docs) == 0 {
+		return nil, fmt.Errorf("at least one vex document is required to merge")
+	}
+
+	docID := mergeOpts.DocumentID
+	// If no document id is specified we compute a
+	// deterministic ID using the merged docs
+	if docID == "" {
+		ids := []string{}
+		for i, d := range docs {
+			if d.ID == "" {
+				ids = append(ids, fmt.Sprintf("VEX-DOC-%d", i))
+			} else {
+				ids = append(ids, d.ID)
+			}
+		}
+
+		sort.Strings(ids)
+		h := sha256.New()
+		h.Write([]byte(strings.Join(ids, ":")))
+		// Hash the sorted IDs list
+		docID = fmt.Sprintf("merged-vex-%x", h.Sum(nil))
+	}
+
+	newDoc := New()
+
+	newDoc.ID = docID
+	if author := mergeOpts.Author; author != "" {
+		newDoc.Author = author
+	}
+	if authorRole := mergeOpts.AuthorRole; authorRole != "" {
+		newDoc.AuthorRole = authorRole
+	}
+
+	ss := []Statement{}
+
+	// Create an inverse dict of products and vulnerabilities to filter
+	// these will only be used if ids to filter on are defined in the options.
+	iProds := map[string]struct{}{}
+	iVulns := map[string]struct{}{}
+	for _, id := range mergeOpts.Products {
+		iProds[id] = struct{}{}
+	}
+	for _, id := range mergeOpts.Vulnerabilities {
+		iVulns[id] = struct{}{}
+	}
+
+	for _, doc := range docs {
+		for _, s := range doc.Statements { //nolint:gocritic // this IS supposed to copy
+			matchesProduct := false
+			for id := range iProds {
+				if s.MatchesProduct(id, "") {
+					matchesProduct = true
+					break
+				}
+			}
+			if len(iProds) > 0 && !matchesProduct {
+				continue
+			}
+
+			matchesVuln := false
+			for id := range iVulns {
+				if s.Vulnerability.Matches(id) {
+					matchesVuln = true
+					break
+				}
+			}
+			if len(iVulns) > 0 && !matchesVuln {
+				continue
+			}
+
+			// If statement does not have a timestamp, cascade
+			// the timestamp down from the document.
+			// See https://github.com/chainguard-dev/vex/issues/49
+			if s.Timestamp == nil {
+				if doc.Timestamp == nil {
+					return nil, errors.New("unable to cascade timestamp from doc to timeless statement")
+				}
+				s.Timestamp = doc.Timestamp
+			}
+
+			ss = append(ss, s)
+		}
+	}
+
+	SortStatements(ss, *newDoc.Metadata.Timestamp)
+
+	newDoc.Statements = ss
+
+	return &newDoc, nil
+}
+
+// SortDocuments sorts and returns a slice of documents based on their date.
+// VEXes should be applied sequentially in chronological order as they capture
+// knowledge about an artifact as it changes over time.
+func SortDocuments(docs []*VEX) []*VEX {
+	sort.Slice(docs, func(i, j int) bool {
+		if docs[j].Timestamp == nil {
+			return true
+		}
+		if docs[i].Timestamp == nil {
+			return false
+		}
+		return docs[i].Timestamp.Before(*(docs[j].Timestamp))
+	})
+	return docs
+}
@@ -0,0 +1,97 @@
+/*
+Copyright 2023 The OpenVEX Authors
+SPDX-License-Identifier: Apache-2.0
+*/
+
+package vex
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/require"
+)
+
+func TestMergeDocumentsWithOptions(t *testing.T) {
+	doc1, err := Open("testdata/v001-1.vex.json")
+	require.NoError(t, err)
+	doc2, err := Open("testdata/v001-2.vex.json")
+	require.NoError(t, err)
+
+	doc3, err := Open("testdata/v020-1.vex.json")
+	require.NoError(t, err)
+	doc4, err := Open("testdata/v020-2.vex.json")
+	require.NoError(t, err)
+
+	for _, tc := range []struct {
+		opts        MergeOptions
+		docs        []*VEX
+		expectedDoc *VEX
+		shouldErr   bool
+	}{
+		// Zero docs should fail
+		{
+			opts:        MergeOptions{},
+			docs:        []*VEX{},
+			expectedDoc: &VEX{},
+			shouldErr:   true,
+		},
+		// One doc results in the same doc
+		{
+			opts:        MergeOptions{},
+			docs:        []*VEX{doc1},
+			expectedDoc: doc1,
+			shouldErr:   false,
+		},
+		// Two docs, as they are
+		{
+			opts: MergeOptions{},
+			docs: []*VEX{doc1, doc2},
+			expectedDoc: &VEX{
+				Metadata: Metadata{},
+				Statements: []Statement{
+					doc1.Statements[0],
+					doc2.Statements[0],
+				},
+			},
+			shouldErr: false,
+		},
+		// Two docs, filter product
+		{
+			opts: MergeOptions{
+				Products: []string{"pkg:apk/wolfi/[email protected]"},
+			},
+			docs: []*VEX{doc3, doc4},
+			expectedDoc: &VEX{
+				Metadata: Metadata{},
+				Statements: []Statement{
+					doc4.Statements[0],
+				},
+			},
+			shouldErr: false,
+		},
+		// Two docs, filter vulnerability
+		{
+			opts: MergeOptions{
+				Vulnerabilities: []string{"CVE-9876-54321"},
+			},
+			docs: []*VEX{doc3, doc4},
+			expectedDoc: &VEX{
+				Metadata: Metadata{},
+				Statements: []Statement{
+					doc3.Statements[0],
+				},
+			},
+			shouldErr: false,
+		},
+	} {
+		doc, err := MergeDocumentsWithOptions(&tc.opts, tc.docs)
+		if tc.shouldErr {
+			require.Error(t, err)
+			continue
+		}
+
+		// Check doc
+		require.Len(t, doc.Statements, len(tc.expectedDoc.Statements))
+		require.Equal(t, doc.Statements, tc.expectedDoc.Statements)
+	}
+}