Skip to content

Commit 7f6de14

Browse files
WVerlaekona-agent
andcommitted
Add root trace span for build command and fix S3 cache timeout
- Add leeway.command span that wraps entire build execution including workspace loading and cache operations - Move OTel tracing initialization to root command for reuse across commands - Replace ListObjects with parallel HeadObject calls in ExistingPackages to fix timeout on buckets with millions of objects - Add tracing spans for cache download operations with size attributes - Add Bytes field to DownloadResult to track artifact sizes Co-authored-by: Ona <no-reply@ona.com>
1 parent 74b1482 commit 7f6de14

File tree

15 files changed

+232
-375
lines changed

15 files changed

+232
-375
lines changed

cmd/build.go

Lines changed: 48 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -16,16 +16,15 @@ import (
1616
"github.com/gitpod-io/leeway/pkg/leeway/cache"
1717
"github.com/gitpod-io/leeway/pkg/leeway/cache/local"
1818
"github.com/gitpod-io/leeway/pkg/leeway/cache/remote"
19-
"github.com/gitpod-io/leeway/pkg/leeway/telemetry"
2019
"github.com/gookit/color"
2120
log "github.com/sirupsen/logrus"
2221
"github.com/spf13/cobra"
23-
"go.opentelemetry.io/otel"
24-
sdktrace "go.opentelemetry.io/otel/sdk/trace"
22+
"go.opentelemetry.io/otel/attribute"
23+
"go.opentelemetry.io/otel/codes"
24+
"go.opentelemetry.io/otel/trace"
2525
)
2626

27-
// CleanupFunc is a function that performs cleanup operations and must be deferred
28-
type CleanupFunc func()
27+
2928

3029
// buildCmd represents the build command
3130
var buildCmd = &cobra.Command{
@@ -58,13 +57,45 @@ Examples:
5857
},
5958
}
6059

61-
func build(cmd *cobra.Command, args []string) error {
60+
func build(cmd *cobra.Command, args []string) (buildErr error) {
61+
// Create command span if tracing is enabled
62+
var commandSpan trace.Span
63+
commandCtx := cmd.Context()
64+
if tracer != nil {
65+
var ctx context.Context
66+
ctx, commandSpan = tracer.Start(rootSpanCtx, "leeway.command",
67+
trace.WithSpanKind(trace.SpanKindInternal),
68+
)
69+
commandSpan.SetAttributes(
70+
attribute.String("leeway.version", leeway.Version),
71+
attribute.String("leeway.command", "build"),
72+
)
73+
commandCtx = ctx
74+
75+
defer func() {
76+
if buildErr != nil {
77+
commandSpan.RecordError(buildErr)
78+
commandSpan.SetStatus(codes.Error, buildErr.Error())
79+
} else {
80+
commandSpan.SetStatus(codes.Ok, "")
81+
}
82+
commandSpan.End()
83+
}()
84+
}
85+
6286
_, pkg, _, _ := getTarget(args, false)
6387
if pkg == nil {
6488
return errors.New("build needs a package")
6589
}
66-
opts, localCache, shutdown := getBuildOpts(cmd)
67-
defer shutdown()
90+
91+
// Add target package info to command span
92+
if commandSpan != nil {
93+
commandSpan.SetAttributes(
94+
attribute.String("leeway.target.package", pkg.FullName()),
95+
)
96+
}
97+
98+
opts, localCache := getBuildOpts(cmd, commandCtx)
6899

69100
var (
70101
watch, _ = cmd.Flags().GetBool("watch")
@@ -240,13 +271,9 @@ func addBuildFlags(cmd *cobra.Command) {
240271
cmd.Flags().Bool("report-github", os.Getenv("GITHUB_OUTPUT") != "", "Report package build success/failure to GitHub Actions using the GITHUB_OUTPUT environment variable")
241272
cmd.Flags().Bool("fixed-build-dir", true, "Use a fixed build directory for each package, instead of based on the package version, to better utilize caches based on absolute paths (defaults to true)")
242273
cmd.Flags().Bool("docker-export-to-cache", false, "Export Docker images to cache instead of pushing directly (enables SLSA L3 compliance)")
243-
cmd.Flags().String("otel-endpoint", os.Getenv("OTEL_EXPORTER_OTLP_ENDPOINT"), "OpenTelemetry OTLP endpoint URL for tracing (defaults to $OTEL_EXPORTER_OTLP_ENDPOINT)")
244-
cmd.Flags().Bool("otel-insecure", os.Getenv("OTEL_EXPORTER_OTLP_INSECURE") == "true", "Disable TLS for OTLP endpoint (for local development only, defaults to $OTEL_EXPORTER_OTLP_INSECURE)")
245-
cmd.Flags().String("trace-parent", os.Getenv("TRACEPARENT"), "W3C Trace Context traceparent header for distributed tracing (defaults to $TRACEPARENT)")
246-
cmd.Flags().String("trace-state", os.Getenv("TRACESTATE"), "W3C Trace Context tracestate header for distributed tracing (defaults to $TRACESTATE)")
247274
}
248275

249-
func getBuildOpts(cmd *cobra.Command) ([]leeway.BuildOption, cache.LocalCache, CleanupFunc) {
276+
func getBuildOpts(cmd *cobra.Command, commandCtx context.Context) ([]leeway.BuildOption, cache.LocalCache) {
250277
// Track if user explicitly set LEEWAY_DOCKER_EXPORT_TO_CACHE before workspace loading.
251278
// This allows us to distinguish:
252279
// - User set explicitly: High priority (overrides package config)
@@ -347,59 +374,9 @@ func getBuildOpts(cmd *cobra.Command) ([]leeway.BuildOption, cache.LocalCache, C
347374
reporter = append(reporter, leeway.NewGitHubReporter())
348375
}
349376

350-
// Initialize OpenTelemetry reporter if endpoint is configured
351-
var tracerProvider *sdktrace.TracerProvider
352-
var otelShutdown func()
353-
if otelEndpoint, err := cmd.Flags().GetString("otel-endpoint"); err != nil {
354-
log.Fatal(err)
355-
} else if otelEndpoint != "" {
356-
// Set leeway version for telemetry
357-
telemetry.SetLeewayVersion(leeway.Version)
358-
359-
// Get insecure flag
360-
otelInsecure, err := cmd.Flags().GetBool("otel-insecure")
361-
if err != nil {
362-
log.Fatal(err)
363-
}
364-
365-
// Initialize tracer with the provided endpoint and TLS configuration
366-
tp, err := telemetry.InitTracer(context.Background(), otelEndpoint, otelInsecure)
367-
if err != nil {
368-
log.WithError(err).Warn("failed to initialize OpenTelemetry tracer")
369-
} else {
370-
tracerProvider = tp
371-
372-
// Parse trace context if provided
373-
traceParent, _ := cmd.Flags().GetString("trace-parent")
374-
traceState, _ := cmd.Flags().GetString("trace-state")
375-
376-
parentCtx := context.Background()
377-
if traceParent != "" {
378-
if err := telemetry.ValidateTraceParent(traceParent); err != nil {
379-
log.WithError(err).Warn("invalid trace-parent format")
380-
} else {
381-
ctx, err := telemetry.ParseTraceContext(traceParent, traceState)
382-
if err != nil {
383-
log.WithError(err).Warn("failed to parse trace context")
384-
} else {
385-
parentCtx = ctx
386-
}
387-
}
388-
}
389-
390-
// Create OTel reporter
391-
tracer := otel.Tracer("leeway")
392-
reporter = append(reporter, leeway.NewOTelReporter(tracer, parentCtx))
393-
394-
// Create shutdown function
395-
otelShutdown = func() {
396-
shutdownCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
397-
defer cancel()
398-
if err := telemetry.Shutdown(shutdownCtx, tracerProvider); err != nil {
399-
log.WithError(err).Warn("failed to shutdown tracer provider")
400-
}
401-
}
402-
}
377+
// Add OpenTelemetry reporter if tracer was initialized in root command
378+
if tracer != nil {
379+
reporter = append(reporter, leeway.NewOTelReporter(tracer, commandCtx))
403380
}
404381

405382
dontTest, err := cmd.Flags().GetBool("dont-test")
@@ -465,11 +442,6 @@ func getBuildOpts(cmd *cobra.Command) ([]leeway.BuildOption, cache.LocalCache, C
465442
dockerExportSet = true
466443
}
467444

468-
// Create a no-op shutdown function if otelShutdown is nil
469-
if otelShutdown == nil {
470-
otelShutdown = func() {}
471-
}
472-
473445
return []leeway.BuildOption{
474446
leeway.WithLocalCache(localCache),
475447
leeway.WithRemoteCache(remoteCache),
@@ -488,7 +460,7 @@ func getBuildOpts(cmd *cobra.Command) ([]leeway.BuildOption, cache.LocalCache, C
488460
leeway.WithInFlightChecksums(inFlightChecksums),
489461
leeway.WithDockerExportToCache(dockerExportToCache, dockerExportSet),
490462
leeway.WithDockerExportEnv(dockerExportEnvValue, dockerExportEnvSet),
491-
}, localCache, otelShutdown
463+
}, localCache
492464
}
493465

494466
type pushOnlyRemoteCache struct {
@@ -621,6 +593,10 @@ func getRemoteCache(cmd *cobra.Command) cache.RemoteCache {
621593
if err != nil {
622594
log.Fatalf("cannot access remote S3 cache: %v", err)
623595
}
596+
// Set tracer if available
597+
if tracer != nil {
598+
rc.SetTracer(tracer)
599+
}
624600
return rc
625601
default:
626602
if slsaConfig != nil && slsaConfig.Verification {

cmd/build_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,7 @@ func TestGetBuildOptsWithInFlightChecksums(t *testing.T) {
242242
}
243243

244244
// Test getBuildOpts function
245-
opts, localCache, _ := getBuildOpts(cmd)
245+
opts, localCache := getBuildOpts(cmd, cmd.Context())
246246

247247
// We can't directly test the WithInFlightChecksums option since it's internal,
248248
// but we can verify the function doesn't error and returns options

cmd/provenance-assert.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ func getProvenanceTarget(cmd *cobra.Command, args []string) (bundleFN, pkgFN str
125125
log.Fatal("provenance export requires a package")
126126
}
127127

128-
_, cache, _ := getBuildOpts(cmd)
128+
_, cache := getBuildOpts(cmd, cmd.Context())
129129

130130
var ok bool
131131
pkgFN, ok = cache.Location(pkg)

cmd/root.go

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,18 @@ import (
66
"os"
77
"runtime/trace"
88
"strings"
9+
"time"
910

1011
"github.com/gookit/color"
1112
log "github.com/sirupsen/logrus"
1213
"github.com/spf13/cobra"
14+
"go.opentelemetry.io/otel"
15+
sdktrace "go.opentelemetry.io/otel/sdk/trace"
16+
otelTrace "go.opentelemetry.io/otel/trace"
1317
"golang.org/x/xerrors"
1418

1519
"github.com/gitpod-io/leeway/pkg/leeway"
20+
"github.com/gitpod-io/leeway/pkg/leeway/telemetry"
1621
)
1722

1823
const (
@@ -95,6 +100,11 @@ var (
95100
buildArgs []string
96101
verbose bool
97102
variant string
103+
104+
// OpenTelemetry tracing
105+
tracerProvider *sdktrace.TracerProvider
106+
tracer otelTrace.Tracer
107+
rootSpanCtx context.Context
98108
)
99109

100110
// rootCmd represents the base command when called without any subcommands
@@ -134,6 +144,46 @@ variables have an effect on leeway:
134144
if verbose {
135145
log.SetLevel(log.DebugLevel)
136146
}
147+
148+
// Initialize OpenTelemetry tracing if endpoint is configured
149+
otelEndpoint, _ := cmd.Flags().GetString("otel-endpoint")
150+
if otelEndpoint != "" {
151+
telemetry.SetLeewayVersion(leeway.Version)
152+
153+
otelInsecure, _ := cmd.Flags().GetBool("otel-insecure")
154+
tp, err := telemetry.InitTracer(cmd.Context(), otelEndpoint, otelInsecure)
155+
if err != nil {
156+
log.WithError(err).Warn("failed to initialize OpenTelemetry tracer")
157+
} else {
158+
tracerProvider = tp
159+
tracer = otel.Tracer("leeway")
160+
161+
// Parse trace context if provided
162+
traceParent, _ := cmd.Flags().GetString("trace-parent")
163+
traceState, _ := cmd.Flags().GetString("trace-state")
164+
165+
parentCtx := cmd.Context()
166+
if traceParent != "" {
167+
if err := telemetry.ValidateTraceParent(traceParent); err != nil {
168+
log.WithError(err).Warn("invalid trace-parent format")
169+
} else if ctx, err := telemetry.ParseTraceContext(traceParent, traceState); err != nil {
170+
log.WithError(err).Warn("failed to parse trace context")
171+
} else {
172+
parentCtx = ctx
173+
}
174+
}
175+
rootSpanCtx = parentCtx
176+
}
177+
}
178+
},
179+
PersistentPostRun: func(cmd *cobra.Command, args []string) {
180+
if tracerProvider != nil {
181+
shutdownCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
182+
defer cancel()
183+
if err := telemetry.Shutdown(shutdownCtx, tracerProvider); err != nil {
184+
log.WithError(err).Warn("failed to shutdown tracer provider")
185+
}
186+
}
137187
},
138188
BashCompletionFunction: bashCompletionFunc,
139189
}
@@ -183,6 +233,12 @@ func init() {
183233
rootCmd.PersistentFlags().StringVar(&variant, "variant", "", "selects a package variant")
184234
rootCmd.PersistentFlags().BoolVarP(&verbose, "verbose", "v", false, "enables verbose logging")
185235
rootCmd.PersistentFlags().Bool("dut", false, "used for testing only - doesn't actually do anything")
236+
237+
// OpenTelemetry tracing flags
238+
rootCmd.PersistentFlags().String("otel-endpoint", os.Getenv("OTEL_EXPORTER_OTLP_ENDPOINT"), "OpenTelemetry OTLP endpoint URL for tracing (defaults to $OTEL_EXPORTER_OTLP_ENDPOINT)")
239+
rootCmd.PersistentFlags().Bool("otel-insecure", os.Getenv("OTEL_EXPORTER_OTLP_INSECURE") == "true", "Disable TLS for OTLP endpoint (for local development only, defaults to $OTEL_EXPORTER_OTLP_INSECURE)")
240+
rootCmd.PersistentFlags().String("trace-parent", os.Getenv("TRACEPARENT"), "W3C Trace Context traceparent header for distributed tracing (defaults to $TRACEPARENT)")
241+
rootCmd.PersistentFlags().String("trace-state", os.Getenv("TRACESTATE"), "W3C Trace Context tracestate header for distributed tracing (defaults to $TRACESTATE)")
186242
}
187243

188244
func getWorkspace() (leeway.Workspace, error) {

cmd/run.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ Should any of the scripts fail Leeway will exit with an exit code of 1 once all
2727
if script == nil {
2828
return errors.New("run needs a script")
2929
}
30-
opts, _, _ := getBuildOpts(cmd)
30+
opts, _ := getBuildOpts(cmd, cmd.Context())
3131
return script.Run(opts...)
3232
})
3333
}

cmd/sbom-export.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ If no package is specified, the workspace's default target is used.`,
3232
}
3333

3434
// Get build options and cache
35-
_, localCache, _ := getBuildOpts(cmd)
35+
_, localCache := getBuildOpts(cmd, cmd.Context())
3636

3737
// Get output format and file
3838
format, _ := cmd.Flags().GetString("format")

cmd/sbom-scan.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ If no package is specified, the workspace's default target is used.`,
3030
}
3131

3232
// Get cache
33-
_, localCache, _ := getBuildOpts(cmd)
33+
_, localCache := getBuildOpts(cmd, cmd.Context())
3434

3535
// Get output directory
3636
outputDir, _ := cmd.Flags().GetString("output-dir")

0 commit comments

Comments
 (0)