From 031417afb6c45b2f72691805173f71e6c37cafb8 Mon Sep 17 00:00:00 2001 From: Spencer Schrock Date: Mon, 23 Sep 2024 14:07:56 -0700 Subject: [PATCH 1/4] bump GitHub library to v65 Signed-off-by: Spencer Schrock --- app/server/post_results.go | 2 +- app/server/post_results_e2e_test.go | 2 +- app/server/verify_workflow.go | 2 +- app/server/verify_workflow_test.go | 2 +- go.mod | 2 +- go.sum | 4 ++-- 6 files changed, 7 insertions(+), 7 deletions(-) diff --git a/app/server/post_results.go b/app/server/post_results.go index 441815b4..d5bf22ee 100644 --- a/app/server/post_results.go +++ b/app/server/post_results.go @@ -35,7 +35,7 @@ import ( "github.com/cyberphone/json-canonicalization/go/src/webpki.org/jsoncanonicalizer" "github.com/go-openapi/runtime/middleware" "github.com/go-openapi/strfmt" - "github.com/google/go-github/v42/github" + "github.com/google/go-github/v65/github" merkleproof "github.com/transparency-dev/merkle/proof" "github.com/transparency-dev/merkle/rfc6962" "gocloud.dev/blob" diff --git a/app/server/post_results_e2e_test.go b/app/server/post_results_e2e_test.go index 81dfeac5..85036e57 100644 --- a/app/server/post_results_e2e_test.go +++ b/app/server/post_results_e2e_test.go @@ -20,7 +20,7 @@ import ( "net/http" "os" - "github.com/google/go-github/v42/github" + "github.com/google/go-github/v65/github" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" diff --git a/app/server/verify_workflow.go b/app/server/verify_workflow.go index ac54325a..8041ce1d 100644 --- a/app/server/verify_workflow.go +++ b/app/server/verify_workflow.go @@ -22,7 +22,7 @@ import ( "regexp" "strings" - "github.com/google/go-github/v42/github" + "github.com/google/go-github/v65/github" "github.com/rhysd/actionlint" ) diff --git a/app/server/verify_workflow_test.go b/app/server/verify_workflow_test.go index 1607b131..e4f36dab 100644 --- a/app/server/verify_workflow_test.go +++ b/app/server/verify_workflow_test.go @@ -22,7 +22,7 @@ import ( "testing" "unicode/utf8" - "github.com/google/go-github/v42/github" + "github.com/google/go-github/v65/github" "github.com/stretchr/testify/assert" ) diff --git a/go.mod b/go.mod index 48a3cc6c..5e315f38 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,7 @@ module github.com/ossf/scorecard-webapp go 1.23.0 require ( - github.com/google/go-github/v42 v42.0.0 + github.com/google/go-github/v65 v65.0.0 github.com/rhysd/actionlint v1.7.1 github.com/stretchr/testify v1.9.0 ) diff --git a/go.sum b/go.sum index 90bc8d42..fd2ea9da 100644 --- a/go.sum +++ b/go.sum @@ -131,8 +131,8 @@ github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/ github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= -github.com/google/go-github/v42 v42.0.0 h1:YNT0FwjPrEysRkLIiKuEfSvBPCGKphW5aS5PxwaoLec= -github.com/google/go-github/v42 v42.0.0/go.mod h1:jgg/jvyI0YlDOM1/ps6XYh04HNQ3vKf0CVko62/EhRg= +github.com/google/go-github/v65 v65.0.0 h1:pQ7BmO3DZivvFk92geC0jB0q2m3gyn8vnYPgV7GSLhQ= +github.com/google/go-github/v65 v65.0.0/go.mod h1:DvrqWo5hvsdhJvHd4WyVF9ttANN3BniqjP8uTFMNb60= github.com/google/go-querystring v1.1.0 h1:AnCroh3fv4ZBgVIf1Iwtovgjaw/GiKJo8M8yD/fhyJ8= github.com/google/go-querystring v1.1.0/go.mod h1:Kcdr2DB4koayq7X8pmAG4sNG59So17icRSOU623lUBU= github.com/google/go-replayers/grpcreplay v1.1.0 h1:S5+I3zYyZ+GQz68OfbURDdt/+cSMqCK1wrvNx7WBzTE= From 5b5289863fcebd0b384770da24315dfb0586eee3 Mon Sep 17 00:00:00 2001 From: Spencer Schrock Date: Tue, 24 Sep 2024 13:26:10 -0700 Subject: [PATCH 2/4] move githubVerifier to its own file Signed-off-by: Spencer Schrock --- app/server/github_verifier.go | 92 ++++++++++++++++++++++++++++++ app/server/github_verifier_test.go | 76 ++++++++++++++++++++++++ app/server/verify_workflow.go | 72 ----------------------- app/server/verify_workflow_test.go | 55 ------------------ 4 files changed, 168 insertions(+), 127 deletions(-) create mode 100644 app/server/github_verifier.go create mode 100644 app/server/github_verifier_test.go diff --git a/app/server/github_verifier.go b/app/server/github_verifier.go new file mode 100644 index 00000000..6e6bebeb --- /dev/null +++ b/app/server/github_verifier.go @@ -0,0 +1,92 @@ +// Copyright 2024 OpenSSF Scorecard Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package server + +import ( + "context" + "fmt" + "net/http" + + "github.com/google/go-github/v65/github" +) + +type githubVerifier struct { + ctx context.Context + client *github.Client +} + +// contains makes two "core" API calls: one for the default branch, and one to compare the target hash to a branch +// if the repo is "github/codeql-action", also check releases/v1 before failing. +func (g *githubVerifier) contains(owner, repo, hash string) (bool, error) { + defaultBranch, err := g.defaultBranch(owner, repo) + if err != nil { + return false, err + } + contains, err := g.branchContains(defaultBranch, owner, repo, hash) + if err != nil { + return false, err + } + if contains { + return true, nil + } + + switch { + // github/codeql-action has commits from their release branches that don't show up in the default branch + // this isn't the best approach for now, but theres no universal "does this commit belong to this repo" call + case owner == "github" && repo == "codeql-action": + releaseBranches := []string{"releases/v3", "releases/v2", "releases/v1"} + for _, branch := range releaseBranches { + contains, err = g.branchContains(branch, owner, repo, hash) + if err != nil { + return false, err + } + if contains { + return true, nil + } + } + + // add fallback lookup for actions/upload-artifact v3/node20 branch + // https://github.com/actions/starter-workflows/pull/2348#discussion_r1536228344 + case owner == "actions" && repo == "upload-artifact": + contains, err = g.branchContains("v3/node20", owner, repo, hash) + } + return contains, err +} + +func (g *githubVerifier) defaultBranch(owner, repo string) (string, error) { + githubRepository, _, err := g.client.Repositories.Get(g.ctx, owner, repo) + if err != nil { + return "", fmt.Errorf("fetching repository info: %w", err) + } + if githubRepository == nil || githubRepository.DefaultBranch == nil { + return "", errNoDefaultBranch + } + return *githubRepository.DefaultBranch, nil +} + +func (g *githubVerifier) branchContains(branch, owner, repo, hash string) (bool, error) { + opts := &github.ListOptions{PerPage: 1} + diff, resp, err := g.client.Repositories.CompareCommits(g.ctx, owner, repo, branch, hash, opts) + if err != nil { + if resp.StatusCode == http.StatusNotFound { + // NotFound can be returned for some divergent cases: "404 No common ancestor between ..." + return false, nil + } + return false, fmt.Errorf("error comparing revisions: %w", err) + } + + // Target should be behind or at the base ref if it is considered contained. + return diff.GetStatus() == "behind" || diff.GetStatus() == "identical", nil +} diff --git a/app/server/github_verifier_test.go b/app/server/github_verifier_test.go new file mode 100644 index 00000000..29a73881 --- /dev/null +++ b/app/server/github_verifier_test.go @@ -0,0 +1,76 @@ +// Copyright 2024 OpenSSF Scorecard Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package server + +import ( + "context" + "net/http" + "testing" + + "github.com/google/go-github/v65/github" +) + +func Test_githubVerifier_contains_codeql_v1(t *testing.T) { + t.Parallel() + httpClient := http.Client{ + Transport: suffixStubTripper{ + responsePaths: map[string]string{ + "codeql-action": "./testdata/api/github/repository.json", // api call which finds the default branch + "main...somehash": "./testdata/api/github/divergent.json", // doesnt belong to default branch + "v3...somehash": "./testdata/api/github/divergent.json", // doesnt belong to releases/v3 branch + "v2...somehash": "./testdata/api/github/divergent.json", // doesnt belong to releases/v2 branch + "v1...somehash": "./testdata/api/github/containsCommit.json", // belongs to releases/v1 branch + }, + }, + } + client := github.NewClient(&httpClient) + gv := githubVerifier{ + ctx: context.Background(), + client: client, + } + got, err := gv.contains("github", "codeql-action", "somehash") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if got != true { + t.Errorf("expected to contain hash, but it didnt") + } +} + +func Test_githubVerifier_contains_codeql_v2(t *testing.T) { + t.Parallel() + httpClient := http.Client{ + Transport: suffixStubTripper{ + responsePaths: map[string]string{ + "codeql-action": "./testdata/api/github/repository.json", // api call which finds the default branch + "main...somehash": "./testdata/api/github/divergent.json", // doesnt belong to default branch + "v3...somehash": "./testdata/api/github/divergent.json", // doesnt belong to releases/v3 branch either + "v2...somehash": "./testdata/api/github/containsCommit.json", // belongs to releases/v2 branch + }, + }, + } + client := github.NewClient(&httpClient) + gv := githubVerifier{ + ctx: context.Background(), + client: client, + } + got, err := gv.contains("github", "codeql-action", "somehash") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if got != true { + t.Errorf("expected to contain hash, but it didnt") + } +} diff --git a/app/server/verify_workflow.go b/app/server/verify_workflow.go index 8041ce1d..0e9155d8 100644 --- a/app/server/verify_workflow.go +++ b/app/server/verify_workflow.go @@ -15,14 +15,11 @@ package server import ( - "context" "errors" "fmt" - "net/http" "regexp" "strings" - "github.com/google/go-github/v65/github" "github.com/rhysd/actionlint" ) @@ -245,75 +242,6 @@ func isCommitHash(s string) bool { return reCommitSHA.MatchString(s) } -type githubVerifier struct { - ctx context.Context - client *github.Client -} - -// contains makes two "core" API calls: one for the default branch, and one to compare the target hash to a branch -// if the repo is "github/codeql-action", also check releases/v1 before failing. -func (g *githubVerifier) contains(owner, repo, hash string) (bool, error) { - defaultBranch, err := g.defaultBranch(owner, repo) - if err != nil { - return false, err - } - contains, err := g.branchContains(defaultBranch, owner, repo, hash) - if err != nil { - return false, err - } - if contains { - return true, nil - } - - switch { - // github/codeql-action has commits from their release branches that don't show up in the default branch - // this isn't the best approach for now, but theres no universal "does this commit belong to this repo" call - case owner == "github" && repo == "codeql-action": - releaseBranches := []string{"releases/v3", "releases/v2", "releases/v1"} - for _, branch := range releaseBranches { - contains, err = g.branchContains(branch, owner, repo, hash) - if err != nil { - return false, err - } - if contains { - return true, nil - } - } - - // add fallback lookup for actions/upload-artifact v3/node20 branch - // https://github.com/actions/starter-workflows/pull/2348#discussion_r1536228344 - case owner == "actions" && repo == "upload-artifact": - contains, err = g.branchContains("v3/node20", owner, repo, hash) - } - return contains, err -} - -func (g *githubVerifier) defaultBranch(owner, repo string) (string, error) { - githubRepository, _, err := g.client.Repositories.Get(g.ctx, owner, repo) - if err != nil { - return "", fmt.Errorf("fetching repository info: %w", err) - } - if githubRepository == nil || githubRepository.DefaultBranch == nil { - return "", errNoDefaultBranch - } - return *githubRepository.DefaultBranch, nil -} - -func (g *githubVerifier) branchContains(branch, owner, repo, hash string) (bool, error) { - opts := &github.ListOptions{PerPage: 1} - diff, resp, err := g.client.Repositories.CompareCommits(g.ctx, owner, repo, branch, hash, opts) - if err != nil { - if resp.StatusCode == http.StatusNotFound { - // NotFound can be returned for some divergent cases: "404 No common ancestor between ..." - return false, nil - } - return false, fmt.Errorf("error comparing revisions: %w", err) - } - - // Target should be behind or at the base ref if it is considered contained. - return diff.GetStatus() == "behind" || diff.GetStatus() == "identical", nil -} - func hasServices(j *actionlint.Job) bool { if j == nil { return false diff --git a/app/server/verify_workflow_test.go b/app/server/verify_workflow_test.go index e4f36dab..c77cb8ca 100644 --- a/app/server/verify_workflow_test.go +++ b/app/server/verify_workflow_test.go @@ -15,14 +15,12 @@ package server import ( - "context" "net/http" "os" "strings" "testing" "unicode/utf8" - "github.com/google/go-github/v65/github" "github.com/stretchr/testify/assert" ) @@ -115,59 +113,6 @@ func (s suffixStubTripper) RoundTrip(r *http.Request) (*http.Response, error) { }, nil } -func Test_githubVerifier_contains_codeql_v1(t *testing.T) { - t.Parallel() - httpClient := http.Client{ - Transport: suffixStubTripper{ - responsePaths: map[string]string{ - "codeql-action": "./testdata/api/github/repository.json", // api call which finds the default branch - "main...somehash": "./testdata/api/github/divergent.json", // doesnt belong to default branch - "v3...somehash": "./testdata/api/github/divergent.json", // doesnt belong to releases/v3 branch - "v2...somehash": "./testdata/api/github/divergent.json", // doesnt belong to releases/v2 branch - "v1...somehash": "./testdata/api/github/containsCommit.json", // belongs to releases/v1 branch - }, - }, - } - client := github.NewClient(&httpClient) - gv := githubVerifier{ - ctx: context.Background(), - client: client, - } - got, err := gv.contains("github", "codeql-action", "somehash") - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - if got != true { - t.Errorf("expected to contain hash, but it didnt") - } -} - -func Test_githubVerifier_contains_codeql_v2(t *testing.T) { - t.Parallel() - httpClient := http.Client{ - Transport: suffixStubTripper{ - responsePaths: map[string]string{ - "codeql-action": "./testdata/api/github/repository.json", // api call which finds the default branch - "main...somehash": "./testdata/api/github/divergent.json", // doesnt belong to default branch - "v3...somehash": "./testdata/api/github/divergent.json", // doesnt belong to releases/v3 branch either - "v2...somehash": "./testdata/api/github/containsCommit.json", // belongs to releases/v2 branch - }, - }, - } - client := github.NewClient(&httpClient) - gv := githubVerifier{ - ctx: context.Background(), - client: client, - } - got, err := gv.contains("github", "codeql-action", "somehash") - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - if got != true { - t.Errorf("expected to contain hash, but it didnt") - } -} - func FuzzVerifyWorkflow(f *testing.F) { testfiles := []string{ "testdata/workflow-valid.yml", From 2f11571ae9ae6a25f9d81bcc86ba183ee691aca7 Mon Sep 17 00:00:00 2001 From: Spencer Schrock Date: Tue, 24 Sep 2024 13:36:42 -0700 Subject: [PATCH 3/4] define a commit type this helps us use a map later. Signed-off-by: Spencer Schrock --- app/server/github_verifier.go | 3 ++- app/server/github_verifier_test.go | 4 ++-- app/server/post_results_e2e_test.go | 4 ++-- app/server/verify_workflow.go | 14 +++++++++++--- app/server/verify_workflow_test.go | 4 ++-- 5 files changed, 19 insertions(+), 10 deletions(-) diff --git a/app/server/github_verifier.go b/app/server/github_verifier.go index 6e6bebeb..af5a7370 100644 --- a/app/server/github_verifier.go +++ b/app/server/github_verifier.go @@ -29,7 +29,8 @@ type githubVerifier struct { // contains makes two "core" API calls: one for the default branch, and one to compare the target hash to a branch // if the repo is "github/codeql-action", also check releases/v1 before failing. -func (g *githubVerifier) contains(owner, repo, hash string) (bool, error) { +func (g *githubVerifier) contains(c commit) (bool, error) { + owner, repo, hash := c.owner, c.repo, c.hash defaultBranch, err := g.defaultBranch(owner, repo) if err != nil { return false, err diff --git a/app/server/github_verifier_test.go b/app/server/github_verifier_test.go index 29a73881..e013b43f 100644 --- a/app/server/github_verifier_test.go +++ b/app/server/github_verifier_test.go @@ -40,7 +40,7 @@ func Test_githubVerifier_contains_codeql_v1(t *testing.T) { ctx: context.Background(), client: client, } - got, err := gv.contains("github", "codeql-action", "somehash") + got, err := gv.contains(commit{"github", "codeql-action", "somehash"}) if err != nil { t.Fatalf("unexpected error: %v", err) } @@ -66,7 +66,7 @@ func Test_githubVerifier_contains_codeql_v2(t *testing.T) { ctx: context.Background(), client: client, } - got, err := gv.contains("github", "codeql-action", "somehash") + got, err := gv.contains(commit{"github", "codeql-action", "somehash"}) if err != nil { t.Fatalf("unexpected error: %v", err) } diff --git a/app/server/post_results_e2e_test.go b/app/server/post_results_e2e_test.go index 85036e57..f8c0dda7 100644 --- a/app/server/post_results_e2e_test.go +++ b/app/server/post_results_e2e_test.go @@ -134,14 +134,14 @@ var _ = Describe("E2E Test: githubVerifier_contains", func() { Context("E2E Test: Validate known good commits", func() { It("can detect actions/upload-artifact v3-node20 commits", func() { gv := getGithubVerifier() - c, err := gv.contains("actions", "upload-artifact", "97a0fba1372883ab732affbe8f94b823f91727db") + c, err := gv.contains(commit{"actions", "upload-artifact", "97a0fba1372883ab732affbe8f94b823f91727db"}) Expect(err).Should(BeNil()) Expect(c).To(BeTrue()) }) It("can detect github/codeql-action backport commits", func() { gv := getGithubVerifier() - c, err := gv.contains("github", "codeql-action", "a82bad71823183e5b120ab52d521460ecb0585fe") + c, err := gv.contains(commit{"github", "codeql-action", "a82bad71823183e5b120ab52d521460ecb0585fe"}) Expect(err).Should(BeNil()) Expect(c).To(BeTrue()) }) diff --git a/app/server/verify_workflow.go b/app/server/verify_workflow.go index 0e9155d8..7b32b3c0 100644 --- a/app/server/verify_workflow.go +++ b/app/server/verify_workflow.go @@ -56,8 +56,12 @@ var ubuntuRunners = map[string]bool{ "ubuntu-18.04": true, } +type commit struct { + owner, repo, hash string +} + type commitVerifier interface { - contains(owner, repo, hash string) (bool, error) + contains(c commit) (bool, error) } type verificationError struct { @@ -169,8 +173,12 @@ func verifyScorecardWorkflow(workflowContent string, verifier commitVerifier) er if isCommitHash(ref) { s := strings.Split(stepName, "/") // no need to length check as the step name is one of the ones above - owner, repo := s[0], s[1] - contains, err := verifier.contains(owner, repo, ref) + c := commit{ + owner: s[0], + repo: s[1], + hash: ref, + } + contains, err := verifier.contains(c) if err != nil { return err } diff --git a/app/server/verify_workflow_test.go b/app/server/verify_workflow_test.go index c77cb8ca..883847f4 100644 --- a/app/server/verify_workflow_test.go +++ b/app/server/verify_workflow_test.go @@ -28,8 +28,8 @@ type allowListVerifier struct { allowed map[string]bool } -func (a *allowListVerifier) contains(owner, repo, hash string) (bool, error) { - return a.allowed[hash], nil +func (a *allowListVerifier) contains(c commit) (bool, error) { + return a.allowed[c.hash], nil } var allowCommitVerifier = &allowListVerifier{ From c67d7065752010ffa640ad676e95036c67a7ba91 Mon Sep 17 00:00:00 2001 From: Spencer Schrock Date: Tue, 24 Sep 2024 13:39:13 -0700 Subject: [PATCH 4/4] use release and tag APIs to enhance imposter commit verification The old verification process assumed a commit was always in the default branch, or came from a select number of hardcoded release branches. This was error prone whenever new releases branches were used by some of the actions we check. The commit verification workflow now uses dynamic data to determine which branches to check. The steps are now: 1. Check if the commit is one of the latest 100 tags, which should be the most common case. 2. Check the default branch (unchanged from before). 3. Check branches associated with the most recent releases. This removes the hardcoded checks and should require fewer updates in the future. Signed-off-by: Spencer Schrock --- app/server/github_verifier.go | 223 +++++++++++++++--- app/server/github_verifier_test.go | 16 +- app/server/post_results.go | 5 +- app/server/post_results_e2e_test.go | 16 +- .../testdata/api/github/codeqlV3Tags.json | 12 + go.mod | 1 + go.sum | 2 + 7 files changed, 227 insertions(+), 48 deletions(-) create mode 100644 app/server/testdata/api/github/codeqlV3Tags.json diff --git a/app/server/github_verifier.go b/app/server/github_verifier.go index af5a7370..e857c720 100644 --- a/app/server/github_verifier.go +++ b/app/server/github_verifier.go @@ -16,26 +16,62 @@ package server import ( "context" + "errors" "fmt" "net/http" + "strconv" + "strings" "github.com/google/go-github/v65/github" + "golang.org/x/mod/semver" ) +var errInvalidCodeQLVersion = errors.New("codeql version invalid") + type githubVerifier struct { - ctx context.Context - client *github.Client + ctx context.Context + client *github.Client + cachedCommits map[commit]bool + codeqlActionMajor string } -// contains makes two "core" API calls: one for the default branch, and one to compare the target hash to a branch -// if the repo is "github/codeql-action", also check releases/v1 before failing. +// returns a new githubVerifier, with an instantiated map. +// most uses should use this constructor. +func newGitHubVerifier(ctx context.Context, client *github.Client) *githubVerifier { + verifier := githubVerifier{ + ctx: ctx, + client: client, + } + verifier.cachedCommits = map[commit]bool{} + return &verifier +} + +// contains may make several "core" API calls: +// - one to get repository tags (we expect most cases to only need this call) +// - two to get the default branch name and check it +// - up to 10 requests when checking previous release branches func (g *githubVerifier) contains(c commit) (bool, error) { - owner, repo, hash := c.owner, c.repo, c.hash - defaultBranch, err := g.defaultBranch(owner, repo) + // return the cached answer if we've seen this commit before + if contains, ok := g.cachedCommits[c]; ok { + return contains, nil + } + + // fetch 100 most recent tags first, as this should handle the most common scenario + if err := g.getTags(c.owner, c.repo); err != nil { + return false, err + } + + // check cache again now that it's populated with tags + if contains, ok := g.cachedCommits[c]; ok { + return contains, nil + } + + // check default branch + defaultBranch, err := g.defaultBranch(c.owner, c.repo) if err != nil { return false, err } - contains, err := g.branchContains(defaultBranch, owner, repo, hash) + contains, err := g.branchContains(defaultBranch, c.owner, c.repo, c.hash) if err != nil { return false, err } @@ -43,27 +79,9 @@ func (g *githubVerifier) contains(c commit) (bool, error) { return true, nil } - switch { - // github/codeql-action has commits from their release branches that don't show up in the default branch - // this isn't the best approach for now, but theres no universal "does this commit belong to this repo" call - case owner == "github" && repo == "codeql-action": - releaseBranches := []string{"releases/v3", "releases/v2", "releases/v1"} - for _, branch := range releaseBranches { - contains, err = g.branchContains(branch, owner, repo, hash) - if err != nil { - return false, err - } - if contains { - return true, nil - } - } - - // add fallback lookup for actions/upload-artifact v3/node20 branch - // https://github.com/actions/starter-workflows/pull/2348#discussion_r1536228344 - case owner == "actions" && repo == "upload-artifact": - contains, err = g.branchContains("v3/node20", owner, repo, hash) - } - return contains, err + // finally, check the most recent 10 release branches. This limit is arbitrary and can be adjusted in the future. + const lookback = 10 + return g.checkReleaseBranches(c.owner, c.repo, c.hash, defaultBranch, lookback) } func (g *githubVerifier) defaultBranch(owner, repo string) (string, error) { @@ -89,5 +107,152 @@ func (g *githubVerifier) branchContains(branch, owner, repo, hash string) (bool, } // Target should be behind or at the base ref if it is considered contained. - return diff.GetStatus() == "behind" || diff.GetStatus() == "identical", nil + contains := diff.GetStatus() == "behind" || diff.GetStatus() == "identical" + if contains { + g.markContains(owner, repo, hash) + } + return contains, nil +} + +func (g *githubVerifier) getTags(owner, repo string) error { + // 100 releases is the maximum supported by /repos/{owner}/{repo}/tags endpoint + opts := github.ListOptions{PerPage: 100} + tags, _, err := g.client.Repositories.ListTags(g.ctx, owner, repo, &opts) + if err != nil { + return fmt.Errorf("fetch tags: %w", err) + } + isCodeQL := owner == "github" && repo == "codeql-action" + for _, t := range tags { + if t.Commit != nil && t.Commit.SHA != nil { + g.markContains(owner, repo, *t.Commit.SHA) + } + // store the highest major version for github/codeql-action + // this helps check release branches later, due to their release process. + if isCodeQL { + version := t.GetName() + if semver.IsValid(version) && semver.Compare(version, g.codeqlActionMajor) == 1 { + g.codeqlActionMajor = semver.Major(version) + } + } + } + return nil +} + +func (g *githubVerifier) markContains(owner, repo, sha string) { + commit := commit{ + owner: owner, + repo: repo, + hash: strings.ToLower(sha), + } + g.cachedCommits[commit] = true +} + +// check the most recent release branches, ignoring the default branch which was already checked. +func (g *githubVerifier) checkReleaseBranches(owner, repo, hash, defaultBranch string, limit int) (bool, error) { + var ( + analyzedBranches int + branches []string + err error + ) + + switch { + // special case: github/codeql-action releases all come from "main", even though the tags are on different branches + case owner == "github" && repo == "codeql-action": + branches, err = g.getCodeQLBranches() + if err != nil { + return false, err + } + default: + branches, err = g.getBranches(owner, repo) + if err != nil { + return false, err + } + // we may have discovered more commit SHAs from the release process + c := commit{ + owner: owner, + repo: repo, + hash: hash, + } + if contains, ok := g.cachedCommits[c]; ok { + return contains, nil + } + } + + for _, branch := range branches { + if analyzedBranches >= limit { + break + } + if branch == defaultBranch { + continue + } + analyzedBranches++ + contains, err := g.branchContains(branch, owner, repo, hash) + if err != nil { + return false, err + } + if contains { + return true, nil + } + } + + return false, nil +} + +// returns the integer version from the expected format: "v1", "v2", "v3" .. +func parseCodeQLVersion(version string) (int, error) { + if !strings.HasPrefix(version, "v") { + return 0, fmt.Errorf("%w: %s", errInvalidCodeQLVersion, version) + } + major, err := strconv.Atoi(version[1:]) + if major < 1 || err != nil { + return 0, fmt.Errorf("%w: %s", errInvalidCodeQLVersion, version) + } + return major, nil +} + +// these branches follow the releases/v3 pattern, so we can make assumptions about what they're called. +// this should be called after g.getTags(), because it requires g.codeqlActionMajor to be set. +func (g *githubVerifier) getCodeQLBranches() ([]string, error) { + if g.codeqlActionMajor == "" { + return nil, nil + } + version, err := parseCodeQLVersion(g.codeqlActionMajor) + if err != nil { + return nil, err + } + branches := make([]string, version) + // descending order (e..g releases/v5, releases/v4, ... releases/v1) + for i := 0; i < version; i++ { + branches[i] = "releases/v" + strconv.Itoa(version-i) + } + return branches, nil +} + +func (g *githubVerifier) getBranches(owner, repo string) ([]string, error) { + var branches []string + seen := map[string]struct{}{} + + // 100 releases is the maximum supported by /repos/{owner}/{repo}/releases endpoint + opts := github.ListOptions{PerPage: 100} + releases, _, err := g.client.Repositories.ListReleases(g.ctx, owner, repo, &opts) + if err != nil { + return nil, fmt.Errorf("fetch releases: %w", err) + } + + for _, r := range releases { + if r.TargetCommitish != nil { + if isCommitHash(*r.TargetCommitish) { + // if a commit, we know it's in the repo + g.markContains(owner, repo, *r.TargetCommitish) + } else { + // otherwise we have a release branch to check + if _, ok := seen[*r.TargetCommitish]; !ok { + seen[*r.TargetCommitish] = struct{}{} + branches = append(branches, *r.TargetCommitish) + } + } + } + } + + return branches, nil } diff --git a/app/server/github_verifier_test.go b/app/server/github_verifier_test.go index e013b43f..d4bb765a 100644 --- a/app/server/github_verifier_test.go +++ b/app/server/github_verifier_test.go @@ -27,6 +27,7 @@ func Test_githubVerifier_contains_codeql_v1(t *testing.T) { httpClient := http.Client{ Transport: suffixStubTripper{ responsePaths: map[string]string{ + "tags": "./testdata/api/github/codeqlV3Tags.json", // start lookback from v3 "codeql-action": "./testdata/api/github/repository.json", // api call which finds the default branch "main...somehash": "./testdata/api/github/divergent.json", // doesnt belong to default branch "v3...somehash": "./testdata/api/github/divergent.json", // doesnt belong to releases/v3 branch @@ -36,11 +37,8 @@ func Test_githubVerifier_contains_codeql_v1(t *testing.T) { }, } client := github.NewClient(&httpClient) - gv := githubVerifier{ - ctx: context.Background(), - client: client, - } - got, err := gv.contains(commit{"github", "codeql-action", "somehash"}) + gv := newGitHubVerifier(context.Background(), client) + got, err := gv.contains(commit{owner: "github", repo: "codeql-action", hash: "somehash"}) if err != nil { t.Fatalf("unexpected error: %v", err) } @@ -54,6 +52,7 @@ func Test_githubVerifier_contains_codeql_v2(t *testing.T) { httpClient := http.Client{ Transport: suffixStubTripper{ responsePaths: map[string]string{ + "tags": "./testdata/api/github/codeqlV3Tags.json", // start lookback from v3 "codeql-action": "./testdata/api/github/repository.json", // api call which finds the default branch "main...somehash": "./testdata/api/github/divergent.json", // doesnt belong to default branch "v3...somehash": "./testdata/api/github/divergent.json", // doesnt belong to releases/v3 branch either @@ -62,11 +61,8 @@ func Test_githubVerifier_contains_codeql_v2(t *testing.T) { }, } client := github.NewClient(&httpClient) - gv := githubVerifier{ - ctx: context.Background(), - client: client, - } - got, err := gv.contains(commit{"github", "codeql-action", "somehash"}) + gv := newGitHubVerifier(context.Background(), client) + got, err := gv.contains(commit{owner: "github", repo: "codeql-action", hash: "somehash"}) if err != nil { t.Fatalf("unexpected error: %v", err) } diff --git a/app/server/post_results.go b/app/server/post_results.go index d5bf22ee..7bbcac99 100644 --- a/app/server/post_results.go +++ b/app/server/post_results.go @@ -226,10 +226,7 @@ func getAndVerifyWorkflowContent(ctx context.Context, return fmt.Errorf("error decoding workflow contents: %w", err) } - verifier := &githubVerifier{ - ctx: ctx, - client: client, - } + verifier := newGitHubVerifier(ctx, client) // Verify scorecard workflow. return verifyScorecardWorkflow(workflowContent, verifier) } diff --git a/app/server/post_results_e2e_test.go b/app/server/post_results_e2e_test.go index f8c0dda7..e9862c64 100644 --- a/app/server/post_results_e2e_test.go +++ b/app/server/post_results_e2e_test.go @@ -116,7 +116,7 @@ var _ = Describe("E2E Test: getAndVerifyWorkflowContent", func() { }) // helper function to setup a github verifier with an appropriately set token. -func getGithubVerifier() githubVerifier { +func getGithubVerifier() *githubVerifier { httpClient := http.DefaultClient token, _ := readGitHubTokens() if token != "" { @@ -124,14 +124,12 @@ func getGithubVerifier() githubVerifier { token: token, } } - return githubVerifier{ - ctx: context.Background(), - client: github.NewClient(httpClient), - } + return newGitHubVerifier(context.Background(), github.NewClient(httpClient)) } var _ = Describe("E2E Test: githubVerifier_contains", func() { Context("E2E Test: Validate known good commits", func() { + // https://github.com/actions/starter-workflows/pull/2348#discussion_r1536228344 It("can detect actions/upload-artifact v3-node20 commits", func() { gv := getGithubVerifier() c, err := gv.contains(commit{"actions", "upload-artifact", "97a0fba1372883ab732affbe8f94b823f91727db"}) @@ -145,5 +143,13 @@ var _ = Describe("E2E Test: githubVerifier_contains", func() { Expect(err).Should(BeNil()) Expect(c).To(BeTrue()) }) + + // https://github.com/ossf/scorecard-action/issues/1367#issuecomment-2333175627 + It("can detect release branch commits", func() { + gv := getGithubVerifier() + c, err := gv.contains(commit{"actions", "upload-artifact", "ff15f0306b3f739f7b6fd43fb5d26cd321bd4de5"}) + Expect(err).Should(BeNil()) + Expect(c).To(BeTrue()) + }) }) }) diff --git a/app/server/testdata/api/github/codeqlV3Tags.json b/app/server/testdata/api/github/codeqlV3Tags.json new file mode 100644 index 00000000..c46fffec --- /dev/null +++ b/app/server/testdata/api/github/codeqlV3Tags.json @@ -0,0 +1,12 @@ +[ + { + "name": "v3.26.9", + "zipball_url": "https://api.github.com/repos/github/codeql-action/zipball/refs/tags/v3.26.9", + "tarball_url": "https://api.github.com/repos/github/codeql-action/tarball/refs/tags/v3.26.9", + "commit": { + "sha": "461ef6c76dfe95d5c364de2f431ddbd31a417628", + "url": "https://api.github.com/repos/github/codeql-action/commits/461ef6c76dfe95d5c364de2f431ddbd31a417628" + }, + "node_id": "MDM6UmVmMjU5NDQ1ODc4OnJlZnMvdGFncy92My4yNi45" + } +] diff --git a/go.mod b/go.mod index 5e315f38..dfe98dc9 100644 --- a/go.mod +++ b/go.mod @@ -25,6 +25,7 @@ require ( github.com/spf13/pflag v1.0.5 github.com/transparency-dev/merkle v0.0.2 gocloud.dev v0.37.0 + golang.org/x/mod v0.20.0 golang.org/x/net v0.29.0 ) diff --git a/go.sum b/go.sum index fd2ea9da..48c235f4 100644 --- a/go.sum +++ b/go.sum @@ -251,6 +251,8 @@ golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91 golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/mod v0.14.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= +golang.org/x/mod v0.20.0 h1:utOm6MM3R3dnawAiJgn0y+xvuYRsm1RKM/4giyfDgV0= +golang.org/x/mod v0.20.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=