From e263a6b479dfd080214c33fb52adf2597e722aad Mon Sep 17 00:00:00 2001 From: Will Rollason Date: Wed, 31 May 2023 07:51:50 +0100 Subject: [PATCH] feat: add deps.dev API integration --- README.md | 81 +++++- acceptance.bats | 5 + go.mod | 2 - go.sum | 4 - internal/commands/deps/enrich.go | 38 +++ internal/commands/deps/root.go | 1 + lib/deps/enrich.go | 37 +++ lib/deps/enrich_cyclonedx.go | 155 +++++++++++ lib/deps/enrich_spdx.go | 110 ++++++++ lib/deps/enrich_test.go | 397 +++++++++++++++++++++++++++ lib/deps/repo.go | 205 +++++++++++++- lib/ecosystems/enrich_spdx.go | 13 + lib/snyk/package.go | 18 +- testing/sbom-with-vcs.cyclonedx.json | 99 +++++++ 14 files changed, 1143 insertions(+), 22 deletions(-) create mode 100644 internal/commands/deps/enrich.go create mode 100644 lib/deps/enrich.go create mode 100644 lib/deps/enrich_cyclonedx.go create mode 100644 lib/deps/enrich_spdx.go create mode 100644 lib/deps/enrich_test.go create mode 100644 testing/sbom-with-vcs.cyclonedx.json diff --git a/README.md b/README.md index d27b1ad..c79b1ea 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,7 @@ * [ecosyste.ms](https://ecosyste.ms) * [Snyk](https://snyk.io) * [OpenSSF Scorecard](https://securityscorecards.dev/) +* [deps.dev](https://deps.dev) By enrich, we mean add additional information. You put in an SBOM, and you get a richer SBOM back. In many cases SBOMs have a minimum of information, often just the name and version of a given package. By enriching that with additional information we can make better decisions about the packages we're using. @@ -191,7 +192,7 @@ Snyk will add a new [vulnerability](https://cyclonedx.org/docs/1.4/json/#vulnera } ``` -For SPDX, vulnerability informatio is added as additional `externalRefs`: +For SPDX, vulnerability information is added as additional `externalRefs`: ```json { @@ -246,6 +247,74 @@ This will currently add an external reference to the [Scorecard API](https://api We're currently looking at the best way of encoding some of the scorecard data in the SBOM itself as well. +## Enriching with deps.dev + +The [deps.dev](https://deps.dev) service provides repository insights and security data for open source packages. `parlay` can enrich SBOMs with repository metadata from deps.dev. + +``` +parlay deps enrich testing/sbom-with-vcs.cyclonedx.json +``` + +This will add repository information as properties for components that have VCS external references: + +```json +{ + "bom-ref": "68-subtext@6.0.12", + "type": "library", + "name": "subtext", + "version": "6.0.12", + "purl": "pkg:npm/subtext@6.0.12", + "externalReferences": [ + { + "url": "https://github.com/hapijs/subtext", + "type": "vcs" + } + ], + "properties": [ + { + "name": "deps:open_issues_count", + "value": "7" + }, + { + "name": "deps:stars_count", + "value": "24" + }, + { + "name": "deps:forks_count", + "value": "25" + }, + { + "name": "deps:license", + "value": "non-standard" + }, + { + "name": "deps:description", + "value": "HTTP payload parser" + }, + { + "name": "deps:scorecard", + "value": "4.30" + } + ] +} +``` + +For SPDX format, the same information is added as external references: + +```json +{ + "referenceCategory": "OTHER", + "referenceType": "deps:stars_count", + "referenceLocator": "24", + "comment": "deps.dev deps:stars_count" +} +``` + +You can also return raw JSON information about a specific repository from deps.dev: + +``` +parlay deps repo github.com/hapijs/subtext +``` ## What about enriching with other data sources? @@ -256,10 +325,10 @@ There are lots of other sources of package data, and it would be great to add su `parlay` is a fan of stdin and stdout. You can pipe SBOMs from other tools into `parlay`, and pipe between the separate `enrich` commands too. -Maybe you want to enrich an SBOM with both ecosyste.ms and Snyk data: +Maybe you want to enrich an SBOM with ecosyste.ms, Snyk, and deps.dev data: ``` -cat testing/sbom.cyclonedx.json | ./parlay e enrich - | ./parlay s enrich - | jq +cat testing/sbom.cyclonedx.json | ./parlay e enrich - | ./parlay s enrich - | ./parlay d enrich - | jq ``` Maybe you want to take the output from Syft and add vulnerabilitity data? @@ -268,7 +337,7 @@ Maybe you want to take the output from Syft and add vulnerabilitity data? syft -o cyclonedx-json nginx | parlay s enrich - | jq ``` -Maybe you want to geneate an SBOM with `cdxgen`, enrich that with extra information, and test that with `bomber`: +Maybe you want to generate an SBOM with `cdxgen`, enrich that with extra information, and test that with `bomber`: ``` cdxgen -o | parlay e enrich - | bomber scan --provider snyk - @@ -348,3 +417,7 @@ The various services used to enrich the SBOM data have data for a subset of purl * `pypi` Note that Scorecard data is available only for a subset of projects from supported Git repositories. See the [Scorecard project](https://github.com/ossf/scorecard) for more information. + +### deps.dev + +deps.dev enrichment works with any component that has VCS external references pointing to supported Git repositories (GitHub, GitLab, etc.). diff --git a/acceptance.bats b/acceptance.bats index 73eb6ab..f75eba5 100644 --- a/acceptance.bats +++ b/acceptance.bats @@ -29,3 +29,8 @@ run ./parlay ecosystems enrich not-here [ "$status" -eq 1 ] } + +@test "Not fail when testing deps enrichment" { + run ./parlay deps enrich testing/sbom-with-vcs.cyclonedx.json + [ "$status" -eq 0 ] +} diff --git a/go.mod b/go.mod index 539a21f..bc4fce0 100644 --- a/go.mod +++ b/go.mod @@ -5,7 +5,6 @@ go 1.23 require ( github.com/CycloneDX/cyclonedx-go v0.9.2 github.com/deepmap/oapi-codegen v1.12.4 - github.com/edoardottt/depsdev v0.0.3 github.com/google/uuid v1.5.0 github.com/hashicorp/go-retryablehttp v0.7.7 github.com/jarcoal/httpmock v1.3.0 @@ -22,7 +21,6 @@ require ( require ( github.com/anchore/go-struct-converter v0.0.0-20221118182256-c68fdcfa2092 // indirect github.com/apapsch/go-jsonmerge/v2 v2.0.0 // indirect - github.com/avast/retry-go v3.0.0+incompatible // indirect github.com/davecgh/go-spew v1.1.1 // indirect github.com/fsnotify/fsnotify v1.6.0 // indirect github.com/hashicorp/go-cleanhttp v0.5.2 // indirect diff --git a/go.sum b/go.sum index a1ebd52..c450859 100644 --- a/go.sum +++ b/go.sum @@ -45,8 +45,6 @@ github.com/anchore/go-struct-converter v0.0.0-20221118182256-c68fdcfa2092 h1:aM1 github.com/anchore/go-struct-converter v0.0.0-20221118182256-c68fdcfa2092/go.mod h1:rYqSE9HbjzpHTI74vwPvae4ZVYZd1lue2ta6xHPdblA= github.com/apapsch/go-jsonmerge/v2 v2.0.0 h1:axGnT1gRIfimI7gJifB699GoE/oq+F2MU7Dml6nw9rQ= github.com/apapsch/go-jsonmerge/v2 v2.0.0/go.mod h1:lvDnEdqiQrp0O42VQGgmlKpxL1AP2+08jFMw88y4klk= -github.com/avast/retry-go v3.0.0+incompatible h1:4SOWQ7Qs+oroOTQOYnAHqelpCO0biHSxpiH9JdtuBj0= -github.com/avast/retry-go v3.0.0+incompatible/go.mod h1:XtSnn+n/sHqQIpZ10K1qAevBhOOCWBLXXy3hyiqqBrY= github.com/bmatcuk/doublestar v1.1.1/go.mod h1:UD6OnuiIn0yFxxA2le/rnRU1G4RaI4UvFv1sNto9p6w= github.com/bradleyjkemp/cupaloy/v2 v2.8.0 h1:any4BmKE+jGIaMpnU8YgH/I2LPiLBufr6oMMlVBbn9M= github.com/bradleyjkemp/cupaloy/v2 v2.8.0/go.mod h1:bm7JXdkRd4BHJk9HpwqAI8BoAY1lps46Enkdqw6aRX0= @@ -65,8 +63,6 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/deepmap/oapi-codegen v1.12.4 h1:pPmn6qI9MuOtCz82WY2Xaw46EQjgvxednXXrP7g5Q2s= github.com/deepmap/oapi-codegen v1.12.4/go.mod h1:3lgHGMu6myQ2vqbbTXH2H1o4eXFTGnFiDaOaKKl5yas= -github.com/edoardottt/depsdev v0.0.3 h1:QqTZGjdvrq8aZ0qhlPxUHiDrB+LadqUVsHX9a03pWO0= -github.com/edoardottt/depsdev v0.0.3/go.mod h1:IQTpYyqJbheAt6AXD/96CUMSGHha5r6rMLNKD8CXkiY= github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= diff --git a/internal/commands/deps/enrich.go b/internal/commands/deps/enrich.go new file mode 100644 index 0000000..747ccaf --- /dev/null +++ b/internal/commands/deps/enrich.go @@ -0,0 +1,38 @@ +package deps + +import ( + "os" + + "github.com/snyk/parlay/internal/utils" + "github.com/snyk/parlay/lib/deps" + "github.com/snyk/parlay/lib/sbom" + + "github.com/rs/zerolog" + "github.com/spf13/cobra" +) + +func NewEnrichCommand(logger *zerolog.Logger) *cobra.Command { + cmd := cobra.Command{ + Use: "enrich ", + Short: "Enrich an SBOM with deps.dev data", + Args: cobra.ExactArgs(1), + Run: func(cmd *cobra.Command, args []string) { + b, err := utils.GetUserInput(args[0], os.Stdin) + if err != nil { + logger.Fatal().Err(err).Msg("Failed to read input") + } + + doc, err := sbom.DecodeSBOMDocument(b) + if err != nil { + logger.Fatal().Err(err).Msg("Failed to read SBOM input") + } + + deps.EnrichSBOM(doc, logger) + + if err := doc.Encode(os.Stdout); err != nil { + logger.Fatal().Err(err).Msg("Failed to encode new SBOM") + } + }, + } + return &cmd +} diff --git a/internal/commands/deps/root.go b/internal/commands/deps/root.go index d5f1ab4..c0ecc05 100644 --- a/internal/commands/deps/root.go +++ b/internal/commands/deps/root.go @@ -20,6 +20,7 @@ func NewDepsRootCommand(logger *zerolog.Logger) *cobra.Command { } cmd.AddCommand(NewRepoCommand(logger)) + cmd.AddCommand(NewEnrichCommand(logger)) return &cmd } diff --git a/lib/deps/enrich.go b/lib/deps/enrich.go new file mode 100644 index 0000000..4b9f025 --- /dev/null +++ b/lib/deps/enrich.go @@ -0,0 +1,37 @@ +/* + * © 2023 Snyk Limited All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package deps + +import ( + cdx "github.com/CycloneDX/cyclonedx-go" + "github.com/rs/zerolog" + "github.com/spdx/tools-golang/spdx" + + "github.com/snyk/parlay/lib/sbom" +) + +func EnrichSBOM(doc *sbom.SBOMDocument, logger *zerolog.Logger) *sbom.SBOMDocument { + switch bom := doc.BOM.(type) { + case *cdx.BOM: + enrichCDX(bom, logger) + case *spdx.Document: + enrichSPDX(bom, logger) + default: + logger.Debug().Msg("Unsupported SBOM format for deps.dev enrichment") + } + return doc +} diff --git a/lib/deps/enrich_cyclonedx.go b/lib/deps/enrich_cyclonedx.go new file mode 100644 index 0000000..595b67a --- /dev/null +++ b/lib/deps/enrich_cyclonedx.go @@ -0,0 +1,155 @@ +/* + * © 2023 Snyk Limited All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package deps + +import ( + "fmt" + "strconv" + + cdx "github.com/CycloneDX/cyclonedx-go" + "github.com/remeh/sizedwaitgroup" + "github.com/rs/zerolog" +) + +func enrichOpenIssues(component cdx.Component, project Project) cdx.Component { + if project.OpenIssuesCount != nil { + return enrichProperty(component, "deps:open_issues_count", strconv.Itoa(*project.OpenIssuesCount)) + } + return component +} + +func enrichStars(component cdx.Component, project Project) cdx.Component { + if project.StarsCount != nil { + return enrichProperty(component, "deps:stars_count", strconv.Itoa(*project.StarsCount)) + } + return component +} + +func enrichScorecard(component cdx.Component, project Project) cdx.Component { + if project.Scorecard == nil || project.Scorecard.OverallScore == nil { + return component + } + return enrichProperty(component, "deps:scorecard", fmt.Sprintf("%.2f", *project.Scorecard.OverallScore)) +} + +func enrichForks(component cdx.Component, project Project) cdx.Component { + if project.ForksCount != nil { + return enrichProperty(component, "deps:forks_count", strconv.Itoa(*project.ForksCount)) + } + return component +} + +func enrichLicense(component cdx.Component, project Project) cdx.Component { + if project.License != nil { + return enrichProperty(component, "deps:license", *project.License) + } + return component +} + +func enrichDescription(component cdx.Component, project Project) cdx.Component { + if project.Description != nil { + return enrichProperty(component, "deps:description", *project.Description) + } + return component +} + +func enrichHomepage(component cdx.Component, project Project) cdx.Component { + if project.Homepage != nil { + return enrichProperty(component, "deps:homepage", *project.Homepage) + } + return component +} + +func enrichProperty(component cdx.Component, name string, value string) cdx.Component { + prop := cdx.Property{ + Name: name, + Value: value, + } + if component.Properties == nil { + component.Properties = &[]cdx.Property{prop} + } else { + *component.Properties = append(*component.Properties, prop) + } + return component +} + +func enrichComponents(bom *cdx.BOM, enrichFuncs []func(cdx.Component, Project) cdx.Component, logger *zerolog.Logger, cache *projectCache) { + wg := sizedwaitgroup.New(20) + newComponents := make([]cdx.Component, len(*bom.Components)) + for i, component := range *bom.Components { + wg.Add() + go func(component cdx.Component, i int) { + defer wg.Done() + + newComponents[i] = enrichComponentRecursive(component, enrichFuncs, logger, cache) + }(component, i) + } + wg.Wait() + bom.Components = &newComponents +} + +func enrichComponentRecursive(component cdx.Component, enrichFuncs []func(cdx.Component, Project) cdx.Component, logger *zerolog.Logger, cache *projectCache) cdx.Component { + if component.ExternalReferences != nil { + for _, ref := range *component.ExternalReferences { + if ref.Type == "vcs" { + proj, err := GetRepoDataWithCache(ref.URL, logger, cache) + if err != nil { + logger.Warn(). + Str("component", component.Name). + Str("url", ref.URL). + Err(err). + Msg("Failed to fetch deps.dev data for component") + } else { + for _, enrichFunc := range enrichFuncs { + component = enrichFunc(component, *proj) + } + } + break + } + } + } + + if component.Components != nil { + children := *component.Components + newChildren := make([]cdx.Component, len(children)) + for i, child := range children { + newChildren[i] = enrichComponentRecursive(child, enrichFuncs, logger, cache) + } + component.Components = &newChildren + } + + return component +} + +func enrichCDX(bom *cdx.BOM, logger *zerolog.Logger) { + if bom.Components == nil { + return + } + + enrichFuncs := []func(cdx.Component, Project) cdx.Component{ + enrichOpenIssues, + enrichStars, + enrichForks, + enrichLicense, + enrichDescription, + enrichHomepage, + enrichScorecard, + } + + cache := newProjectCache() + enrichComponents(bom, enrichFuncs, logger, cache) +} diff --git a/lib/deps/enrich_spdx.go b/lib/deps/enrich_spdx.go new file mode 100644 index 0000000..2c1910e --- /dev/null +++ b/lib/deps/enrich_spdx.go @@ -0,0 +1,110 @@ +/* + * © 2023 Snyk Limited All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package deps + +import ( + "fmt" + "strconv" + "strings" + + "github.com/remeh/sizedwaitgroup" + "github.com/rs/zerolog" + spdx "github.com/spdx/tools-golang/spdx" + spdx_2_3 "github.com/spdx/tools-golang/spdx/v2/v2_3" +) + +func enrichSPDX(bom *spdx.Document, logger *zerolog.Logger) { + wg := sizedwaitgroup.New(20) + cache := newProjectCache() + + for i, pkg := range bom.Packages { + wg.Add() + + go func(pkg *spdx_2_3.Package, i int) { + defer wg.Done() + + // Look for VCS external references + var repoURL string + for _, ref := range pkg.PackageExternalReferences { + if ref.RefType == "vcs" || (ref.RefType == "url" && (strings.Contains(ref.Locator, "github.com") || strings.Contains(ref.Locator, "gitlab.com") || strings.Contains(ref.Locator, "bitbucket.org"))) { + repoURL = ref.Locator + break + } + } + + // If no VCS reference found, skip this package + if repoURL == "" { + logger.Debug(). + Str("package", pkg.PackageName). + Msg("No VCS reference found, skipping deps.dev enrichment") + return + } + + proj, err := GetRepoDataWithCache(repoURL, logger, cache) + if err != nil { + logger.Debug(). + Str("package", pkg.PackageName). + Str("url", repoURL). + Err(err). + Msg("Failed to get repository data from deps.dev") + return + } + + // Add external references for each piece of data + if proj.OpenIssuesCount != nil { + enrichSPDXExternalRef(pkg, "deps:open_issues_count", strconv.Itoa(*proj.OpenIssuesCount)) + } + if proj.StarsCount != nil { + enrichSPDXExternalRef(pkg, "deps:stars_count", strconv.Itoa(*proj.StarsCount)) + } + if proj.ForksCount != nil { + enrichSPDXExternalRef(pkg, "deps:forks_count", strconv.Itoa(*proj.ForksCount)) + } + + if proj.License != nil && *proj.License != "" { + enrichSPDXExternalRef(pkg, "deps:license", *proj.License) + } + if proj.Description != nil && *proj.Description != "" { + enrichSPDXExternalRef(pkg, "deps:description", *proj.Description) + } + if proj.Homepage != nil && *proj.Homepage != "" { + enrichSPDXExternalRef(pkg, "deps:homepage", *proj.Homepage) + } + + if proj.Scorecard != nil && proj.Scorecard.OverallScore != nil { + enrichSPDXExternalRef(pkg, "deps:scorecard", fmt.Sprintf("%.2f", *proj.Scorecard.OverallScore)) + } + + logger.Debug(). + Str("package", pkg.PackageName). + Msg("Successfully enriched package with deps.dev data") + }(pkg, i) + } + + wg.Wait() +} + +func enrichSPDXExternalRef(pkg *spdx_2_3.Package, name string, value string) { + ref := &spdx_2_3.PackageExternalReference{ + Category: spdx.CategoryOther, + RefType: name, + Locator: value, + ExternalRefComment: fmt.Sprintf("deps.dev %s", name), + } + + pkg.PackageExternalReferences = append(pkg.PackageExternalReferences, ref) +} diff --git a/lib/deps/enrich_test.go b/lib/deps/enrich_test.go new file mode 100644 index 0000000..f820f57 --- /dev/null +++ b/lib/deps/enrich_test.go @@ -0,0 +1,397 @@ +/* + * © 2023 Snyk Limited All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package deps + +import ( + "net/http" + "regexp" + "testing" + + cdx "github.com/CycloneDX/cyclonedx-go" + "github.com/jarcoal/httpmock" + "github.com/rs/zerolog" + "github.com/spdx/tools-golang/spdx" + spdx_2_3 "github.com/spdx/tools-golang/spdx/v2/v2_3" + "github.com/stretchr/testify/assert" + + "github.com/snyk/parlay/lib/sbom" +) + +const depsDevAPIURL = "https://api.deps.dev/v3/projects/github.com%2Fsnyk%2Fparlay" + +func TestEnrichSBOM_CycloneDX(t *testing.T) { + teardown := setupDepsDevAPIMock(t) + defer teardown() + + bom := &cdx.BOM{ + Components: &[]cdx.Component{ + { + Name: "test-package", + ExternalReferences: &[]cdx.ExternalReference{ + { + Type: "vcs", + URL: "https://github.com/snyk/parlay", + }, + }, + }, + }, + } + doc := &sbom.SBOMDocument{BOM: bom} + logger := zerolog.Nop() + + EnrichSBOM(doc, &logger) + + assert.NotNil(t, bom.Components) + component := (*bom.Components)[0] + + assert.NotNil(t, component.Properties) + + hasOpenIssues := false + hasStars := false + hasScorecard := false + + for _, prop := range *component.Properties { + switch prop.Name { + case "deps:open_issues_count": + hasOpenIssues = true + assert.Equal(t, "42", prop.Value) + case "deps:stars_count": + hasStars = true + assert.Equal(t, "1250", prop.Value) + case "deps:scorecard": + hasScorecard = true + assert.Equal(t, "7.50", prop.Value) + } + } + + assert.True(t, hasOpenIssues, "Should have open issues count") + assert.True(t, hasStars, "Should have stars count") + assert.True(t, hasScorecard, "Should have scorecard score") +} + +func TestEnrichSBOM_SPDX(t *testing.T) { + teardown := setupDepsDevAPIMock(t) + defer teardown() + + pkg := &spdx_2_3.Package{ + PackageName: "test-package", + PackageSPDXIdentifier: "SPDXRef-Package", + PackageExternalReferences: []*spdx_2_3.PackageExternalReference{ + { + Category: "PACKAGE-MANAGER", + RefType: "vcs", + Locator: "https://github.com/snyk/parlay", + }, + }, + } + + doc := &spdx.Document{ + Packages: []*spdx_2_3.Package{pkg}, + } + sbomDoc := &sbom.SBOMDocument{BOM: doc} + logger := zerolog.Nop() + + EnrichSBOM(sbomDoc, &logger) + + assert.Greater(t, len(pkg.PackageExternalReferences), 1, "Should have added external references") + + hasOpenIssues := false + hasStars := false + hasScorecard := false + + for _, ref := range pkg.PackageExternalReferences { + switch ref.RefType { + case "deps:open_issues_count": + hasOpenIssues = true + assert.Equal(t, "42", ref.Locator) + case "deps:stars_count": + hasStars = true + assert.Equal(t, "1250", ref.Locator) + case "deps:scorecard": + hasScorecard = true + assert.Equal(t, "7.50", ref.Locator) + } + } + + assert.True(t, hasOpenIssues, "Should have open issues count") + assert.True(t, hasStars, "Should have stars count") + assert.True(t, hasScorecard, "Should have scorecard score") +} + +func TestEnrichSBOM_UnsupportedFormat(t *testing.T) { + logger := zerolog.Nop() + doc := &sbom.SBOMDocument{BOM: "unsupported"} + + EnrichSBOM(doc, &logger) +} + +func setupDepsDevAPIMock(t *testing.T) func() { + originalGetRetryClient := getRetryClient + + getRetryClient = func(logger *zerolog.Logger) *http.Client { + return &http.Client{Transport: httpmock.DefaultTransport} + } + + httpmock.Activate() + + mockResponse := `{ + "projectKey": { + "id": "github.com/snyk/parlay" + }, + "openIssuesCount": 42, + "starsCount": 1250, + "forksCount": 28, + "license": "Apache-2.0", + "description": "A great tool for SBOMs", + "homepage": "https://github.com/snyk/parlay", + "scorecard": { + "overallScore": 7.5, + "date": "2023-08-05T00:00:00Z" + } + }` + + httpmock.RegisterResponder("GET", depsDevAPIURL, + httpmock.NewStringResponder(http.StatusOK, mockResponse)) + + return func() { + httpmock.DeactivateAndReset() + getRetryClient = originalGetRetryClient + } +} + +func TestNormalizeRepoURL(t *testing.T) { + testCases := []struct { + name string + input string + expected string + }{ + { + name: "https URL", + input: "https://github.com/snyk/parlay", + expected: "github.com/snyk/parlay", + }, + { + name: "http URL", + input: "http://github.com/snyk/parlay", + expected: "github.com/snyk/parlay", + }, + { + name: "git URL", + input: "git://github.com/snyk/parlay", + expected: "github.com/snyk/parlay", + }, + { + name: "SSH URL", + input: "git@github.com:snyk/parlay", + expected: "github.com/snyk/parlay", + }, + { + name: "URL with .git suffix", + input: "https://github.com/snyk/parlay.git", + expected: "github.com/snyk/parlay", + }, + { + name: "URL with trailing slash", + input: "https://github.com/snyk/parlay/", + expected: "github.com/snyk/parlay", + }, + { + name: "Complex case", + input: "git@github.com:snyk/parlay.git/", + expected: "github.com/snyk/parlay", + }, + { + name: "URL with extra path segments", + input: "https://github.com/snyk/parlay/tree/main/lib", + expected: "github.com/snyk/parlay", + }, + { + name: "GitLab URL", + input: "https://gitlab.com/owner/project.git", + expected: "gitlab.com/owner/project", + }, + { + name: "Bitbucket URL", + input: "git@bitbucket.org:owner/project.git", + expected: "bitbucket.org/owner/project", + }, + { + name: "Invalid URL fallback", + input: "not-a-valid-url/but/has/slashes.git/", + expected: "not-a-valid-url/but/has/slashes", + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + result := normalizeRepoURL(tc.input) + assert.Equal(t, tc.expected, result) + }) + } +} + +func TestGetRepoDataWithCache(t *testing.T) { + teardown := setupDepsDevAPIMock(t) + defer teardown() + + logger := zerolog.Nop() + cache := newProjectCache() + + project1, err := GetRepoDataWithCache("https://github.com/snyk/parlay", &logger, cache) + assert.NoError(t, err) + assert.NotNil(t, project1) + assert.NotNil(t, project1.OpenIssuesCount) + assert.Equal(t, 42, *project1.OpenIssuesCount) + + project2, err := GetRepoDataWithCache("https://github.com/snyk/parlay", &logger, cache) + assert.NoError(t, err) + assert.NotNil(t, project2) + assert.Equal(t, project1, project2) + + info := httpmock.GetCallCountInfo() + assert.Equal(t, 1, info["GET "+depsDevAPIURL]) +} + +func TestGetRepoData_NilValues(t *testing.T) { + originalGetRetryClient := getRetryClient + + getRetryClient = func(logger *zerolog.Logger) *http.Client { + return &http.Client{Transport: httpmock.DefaultTransport} + } + defer func() { + getRetryClient = originalGetRetryClient + }() + + httpmock.Activate() + defer httpmock.DeactivateAndReset() + + // Simulate BitBucket response with nil values + mockResponse := `{ + "projectKey": { + "id": "bitbucket.org/owner/repo" + }, + "openIssuesCount": null, + "starsCount": null, + "forksCount": null, + "license": "MIT", + "description": null, + "homepage": null, + "scorecard": null + }` + + httpmock.RegisterResponder("GET", "https://api.deps.dev/v3/projects/bitbucket.org%2Fowner%2Frepo", + httpmock.NewStringResponder(http.StatusOK, mockResponse)) + + logger := zerolog.Nop() + project, err := GetRepoDataWithLogger("https://bitbucket.org/owner/repo", &logger) + + assert.NoError(t, err) + assert.NotNil(t, project) + assert.Nil(t, project.OpenIssuesCount) + assert.Nil(t, project.StarsCount) + assert.Nil(t, project.ForksCount) + assert.NotNil(t, project.License) + assert.Equal(t, "MIT", *project.License) + assert.Nil(t, project.Description) + assert.Nil(t, project.Homepage) + assert.Nil(t, project.Scorecard) +} + +func TestGetRepoData_ServerError(t *testing.T) { + originalGetRetryClient := getRetryClient + + getRetryClient = func(logger *zerolog.Logger) *http.Client { + return &http.Client{Transport: httpmock.DefaultTransport} + } + defer func() { + getRetryClient = originalGetRetryClient + }() + + httpmock.Activate() + defer httpmock.DeactivateAndReset() + + httpmock.RegisterResponder("GET", depsDevAPIURL, + httpmock.NewStringResponder(http.StatusInternalServerError, "Server error")) + + logger := zerolog.Nop() + project, err := GetRepoDataWithLogger("https://github.com/snyk/parlay", &logger) + + assert.Error(t, err) + assert.Nil(t, project) +} + +func TestGetRepoDataNotFound(t *testing.T) { + originalGetRetryClient := getRetryClient + + getRetryClient = func(logger *zerolog.Logger) *http.Client { + return &http.Client{Transport: httpmock.DefaultTransport} + } + defer func() { + getRetryClient = originalGetRetryClient + }() + + httpmock.Activate() + defer httpmock.DeactivateAndReset() + + httpmock.RegisterResponder("GET", "https://api.deps.dev/v3/projects/github.com%2Fnonexistent%2Frepo", + httpmock.NewStringResponder(http.StatusNotFound, "Not found")) + + logger := zerolog.Nop() + project, err := GetRepoDataWithLogger("https://github.com/nonexistent/repo", &logger) + + assert.Error(t, err) + assert.Nil(t, project) + assert.Contains(t, err.Error(), "repository not found") +} + +func TestEnrichSBOM_ErrorHandling(t *testing.T) { + originalGetRetryClient := getRetryClient + + getRetryClient = func(logger *zerolog.Logger) *http.Client { + return &http.Client{Transport: httpmock.DefaultTransport} + } + defer func() { + getRetryClient = originalGetRetryClient + }() + + httpmock.Activate() + defer httpmock.DeactivateAndReset() + + httpmock.RegisterRegexpResponder("GET", regexp.MustCompile(`https://api\.deps\.dev/.*`), + httpmock.NewStringResponder(http.StatusNotFound, "Not found")) + + bom := &cdx.BOM{ + Components: &[]cdx.Component{ + { + Name: "test-package", + ExternalReferences: &[]cdx.ExternalReference{ + { + Type: "vcs", + URL: "https://github.com/nonexistent/repo", + }, + }, + }, + }, + } + doc := &sbom.SBOMDocument{BOM: bom} + logger := zerolog.Nop() + + EnrichSBOM(doc, &logger) + + component := (*bom.Components)[0] + assert.Nil(t, component.Properties) +} diff --git a/lib/deps/repo.go b/lib/deps/repo.go index b0effb3..180669a 100644 --- a/lib/deps/repo.go +++ b/lib/deps/repo.go @@ -17,13 +17,208 @@ package deps import ( - "github.com/edoardottt/depsdev/pkg/depsdev" + "encoding/json" + "fmt" + "net/http" + "net/url" + "strconv" + "strings" + "sync" + "time" + + "github.com/hashicorp/go-retryablehttp" + "github.com/rs/zerolog" ) -func GetRepoData(url string) (*depsdev.Project, error) { - proj, err := depsdev.GetProject(url) +// Project represents a deps.dev project +type Project struct { + ProjectKey ProjectKey `json:"projectKey"` + OpenIssuesCount *int `json:"openIssuesCount"` + StarsCount *int `json:"starsCount"` + ForksCount *int `json:"forksCount"` + License *string `json:"license"` + Description *string `json:"description"` + Homepage *string `json:"homepage"` + Scorecard *Scorecard `json:"scorecard"` +} + +type ProjectKey struct { + ID string `json:"id"` +} + +type Scorecard struct { + OverallScore *float64 `json:"overallScore"` +} + +type projectCache struct { + mu sync.RWMutex + data map[string]*Project +} + +func newProjectCache() *projectCache { + return &projectCache{data: make(map[string]*Project)} +} + +func (c *projectCache) Get(key string) (*Project, bool) { + c.mu.RLock() + defer c.mu.RUnlock() + val, ok := c.data[key] + return val, ok +} + +func (c *projectCache) Set(key string, val *Project) { + c.mu.Lock() + defer c.mu.Unlock() + c.data[key] = val +} + +func normalizeRepoURL(repoURL string) string { + // Handle SSH URLs first + if strings.HasPrefix(repoURL, "git@") { + // Convert git@github.com:owner/repo.git to https://github.com/owner/repo + repoURL = strings.TrimPrefix(repoURL, "git@") + repoURL = strings.Replace(repoURL, ":", "/", 1) + repoURL = "https://" + repoURL + } + + // Remove trailing slashes and .git suffix before parsing + repoURL = strings.TrimSuffix(repoURL, "/") + repoURL = strings.TrimSuffix(repoURL, ".git/") + repoURL = strings.TrimSuffix(repoURL, ".git") + + // Parse the URL + parsedURL, err := url.Parse(repoURL) + if err != nil || parsedURL.Host == "" { + // If parsing fails or no host, return the original URL unchanged + // This handles cases like "not-a-valid-url/but/has/slashes" + return repoURL + } + + // Extract host and path + host := parsedURL.Host + path := strings.TrimPrefix(parsedURL.Path, "/") + + // Extract only the first two path segments (owner/repo) + pathParts := strings.Split(path, "/") + if len(pathParts) >= 2 { + path = pathParts[0] + "/" + pathParts[1] + } + + // Combine host and path + if path == "" { + return host + } + return host + "/" + path +} + +func GetRepoData(repoURL string) (*Project, error) { + return GetRepoDataWithLogger(repoURL, nil) +} + +func GetRepoDataWithLogger(repoURL string, logger *zerolog.Logger) (*Project, error) { + return GetRepoDataWithCache(repoURL, logger, nil) +} + +// getRetryClient is a variable to allow mocking in tests +var getRetryClient = func(logger *zerolog.Logger) *http.Client { + return createRetryClient(logger) +} + +func GetRepoDataWithCache(repoURL string, logger *zerolog.Logger, cache *projectCache) (*Project, error) { + if logger == nil { + nop := zerolog.Nop() + logger = &nop + } + + normalizedURL := normalizeRepoURL(repoURL) + + if cache != nil { + if cached, found := cache.Get(normalizedURL); found { + logger.Debug().Str("repo", normalizedURL).Msg("deps.dev data found in cache") + return cached, nil + } + } + + apiURL := fmt.Sprintf("https://api.deps.dev/v3/projects/%s", url.QueryEscape(normalizedURL)) + + client := getRetryClient(logger) + resp, err := client.Get(apiURL) if err != nil { - return nil, err + return nil, fmt.Errorf("failed to make request to deps.dev: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode == http.StatusNotFound { + logger.Debug().Str("repo", normalizedURL).Msg("Repository not found in deps.dev") + return nil, fmt.Errorf("repository not found in deps.dev") + } + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("deps.dev API request failed with status %d", resp.StatusCode) + } + + var project Project + if err := json.NewDecoder(resp.Body).Decode(&project); err != nil { + return nil, fmt.Errorf("failed to decode deps.dev response: %w", err) + } + + if cache != nil { + cache.Set(normalizedURL, &project) } - return &proj, nil + logger.Debug().Str("repo", normalizedURL).Msg("Successfully fetched deps.dev data") + + return &project, nil +} + +func createRetryClient(logger *zerolog.Logger) *http.Client { + rc := retryablehttp.NewClient() + rc.RetryMax = 10 + rc.Logger = nil + rc.ErrorHandler = retryablehttp.PassthroughErrorHandler + rc.ResponseLogHook = func(_ retryablehttp.Logger, r *http.Response) { + if r != nil && r.StatusCode >= 400 { + logger.Warn().Msgf("Unexpected status code (%s) for %s %s", r.Status, r.Request.Method, r.Request.URL.String()) + } + } + rc.Backoff = func(min, max time.Duration, attemptNum int, resp *http.Response) time.Duration { + if resp == nil { + // For transport/client errors, don't add delay so tests and callers fail fast + return 0 + } + + // Check for Retry-After header for rate limiting + if retryAfter := resp.Header.Get("Retry-After"); retryAfter != "" { + if sleep, ok := parseRetryAfterHeader(retryAfter); ok { + logger.Warn(). + Dur("Retry-After", sleep). + Msg("Getting rate-limited by deps.dev, waiting...") + return sleep + } + } + + return retryablehttp.DefaultBackoff(min, max, attemptNum, resp) + } + + return rc.StandardClient() +} + +func parseRetryAfterHeader(v string) (time.Duration, bool) { + if v == "" { + return 0, false + } + + // First try to parse as seconds + if sec, err := strconv.ParseInt(v, 10, 64); err == nil { + return time.Duration(sec) * time.Second, true + } + + // Then try to parse as HTTP date + if t, err := http.ParseTime(v); err == nil { + sleep := time.Until(t) + if sleep > 0 { + return sleep, true + } + } + + return 0, false } diff --git a/lib/ecosystems/enrich_spdx.go b/lib/ecosystems/enrich_spdx.go index 7c1dd1c..20630e8 100644 --- a/lib/ecosystems/enrich_spdx.go +++ b/lib/ecosystems/enrich_spdx.go @@ -55,6 +55,7 @@ func enrichSPDX(bom *spdx.Document, logger *zerolog.Logger) { enrichSPDXDescription(pkg, pkgData) enrichSPDXHomepage(pkg, pkgData) + enrichSPDXRepositoryURL(pkg, pkgData) enrichSPDXSupplier(pkg, pkgData) packageVersionResp, err := cache.GetPackageVersionData(*purl) @@ -113,6 +114,18 @@ func enrichSPDXHomepage(pkg *v2_3.Package, data *packages.Package) { pkg.PackageHomePage = *data.Homepage } +func enrichSPDXRepositoryURL(pkg *v2_3.Package, data *packages.Package) { + if data.RepositoryUrl == nil { + return + } + ref := &v2_3.PackageExternalReference{ + Category: spdx.CategoryOther, + RefType: "vcs", + Locator: *data.RepositoryUrl, + } + pkg.PackageExternalReferences = append(pkg.PackageExternalReferences, ref) +} + func enrichSPDXDescription(pkg *v2_3.Package, data *packages.Package) { if data.Description == nil { return diff --git a/lib/snyk/package.go b/lib/snyk/package.go index d499ac0..1cc8696 100644 --- a/lib/snyk/package.go +++ b/lib/snyk/package.go @@ -119,14 +119,18 @@ func getRetryClient(logger *zerolog.Logger) *http.Client { } } rc.Backoff = func(min, max time.Duration, attemptNum int, resp *http.Response) time.Duration { - if resp != nil { - if sleep, ok := parseRateLimitHeader(resp.Header.Get("X-RateLimit-Reset")); ok { - logger.Warn(). - Dur("Retry-After", sleep). - Msg("Getting rate-limited, waiting...") - return sleep - } + if resp == nil { + // For transport/client errors, don't add delay so tests and callers fail fast + return 0 } + + if sleep, ok := parseRateLimitHeader(resp.Header.Get("X-RateLimit-Reset")); ok { + logger.Warn(). + Dur("Retry-After", sleep). + Msg("Getting rate-limited, waiting...") + return sleep + } + return retryablehttp.DefaultBackoff(min, max, attemptNum, resp) } diff --git a/testing/sbom-with-vcs.cyclonedx.json b/testing/sbom-with-vcs.cyclonedx.json new file mode 100644 index 0000000..4ce9905 --- /dev/null +++ b/testing/sbom-with-vcs.cyclonedx.json @@ -0,0 +1,99 @@ +{ + "bomFormat": "CycloneDX", + "specVersion": "1.4", + "version": 1, + "metadata": { + "timestamp": "2024-01-15T10:00:00Z", + "tools": [ + { + "vendor": "Test", + "name": "Test SBOM Generator" + } + ], + "component": { + "bom-ref": "root", + "type": "application", + "name": "test-app", + "version": "1.0.0" + } + }, + "components": [ + { + "bom-ref": "1-express@4.18.1", + "type": "library", + "name": "express", + "version": "4.18.1", + "purl": "pkg:npm/express@4.18.1", + "externalReferences": [ + { + "type": "vcs", + "url": "https://github.com/expressjs/express" + } + ] + }, + { + "bom-ref": "2-react@18.2.0", + "type": "library", + "name": "react", + "version": "18.2.0", + "purl": "pkg:npm/react@18.2.0", + "externalReferences": [ + { + "type": "vcs", + "url": "https://github.com/facebook/react" + } + ] + }, + { + "bom-ref": "3-lodash@4.17.21", + "type": "library", + "name": "lodash", + "version": "4.17.21", + "purl": "pkg:npm/lodash@4.17.21", + "externalReferences": [ + { + "type": "vcs", + "url": "https://github.com/lodash/lodash" + } + ] + }, + { + "bom-ref": "4-axios@1.4.0", + "type": "library", + "name": "axios", + "version": "1.4.0", + "purl": "pkg:npm/axios@1.4.0", + "externalReferences": [ + { + "type": "vcs", + "url": "https://github.com/axios/axios" + } + ] + }, + { + "bom-ref": "5-typescript@5.0.4", + "type": "library", + "name": "typescript", + "version": "5.0.4", + "purl": "pkg:npm/typescript@5.0.4", + "externalReferences": [ + { + "type": "vcs", + "url": "https://github.com/microsoft/TypeScript" + } + ] + } + ], + "dependencies": [ + { + "ref": "root", + "dependsOn": [ + "1-express@4.18.1", + "2-react@18.2.0", + "3-lodash@4.17.21", + "4-axios@1.4.0", + "5-typescript@5.0.4" + ] + } + ] +}