Skip to content

Commit b9d5406

Browse files
craig[bot]herkolategan
andcommitted
Merge #143738
143738: roachprod-microbench: post github issues r=DarrylWong,srosenberg a=herkolategan This PR adds functionality to automatically post GitHub issues for microbenchmark failures in CI (teamcity). It will only post failures for executions from the master branch. Typically two binaries are interleaved to produce a comparison, but with this change the binary from master (experiment) will be executed first, if it fails it will cancel all further executions of the benchmark and post the failure to github. There is one caveat regarding the posted issues. We retrieve stdout and stderr separately, so the log (on a github issue) will be a concatenation of the two. Failures will be labeled with `O-microbench` as well as `C-test-failure` and `release-blocker` A timeout (> 20 minutes for one iteration) will also count as a failure. Epic: None Release note: None Co-authored-by: Herko Lategan <[email protected]>
2 parents dae02f8 + 1146ea6 commit b9d5406

File tree

8 files changed

+285
-5
lines changed

8 files changed

+285
-5
lines changed

build/teamcity/cockroach/nightlies/microbenchmark_weekly.sh

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,13 @@ for sha in "${build_sha_arr[@]}"; do
139139
./bin/roachprod-microbench stage --quiet "$ROACHPROD_CLUSTER" "gs://$BENCH_BUCKET/builds/$archive_name" "$remote_dir/$sha"
140140
done
141141

142+
# Post issues to github for triggered builds (triggered builds are always on master)
143+
if [ -n "${TRIGGERED_BUILD:-}" ]; then
144+
GITHUB_BRANCH="master"
145+
GITHUB_SHA="${build_sha_arr[0]}"
146+
GITHUB_BINARY="experiment"
147+
fi
148+
142149
# Execute microbenchmarks
143150
./bin/roachprod-microbench run "$ROACHPROD_CLUSTER" \
144151
--binaries experiment="$remote_dir/${build_sha_arr[0]}" \
@@ -149,7 +156,8 @@ done
149156
${BENCH_TIMEOUT:+--timeout="$BENCH_TIMEOUT"} \
150157
${BENCH_EXCLUDE:+--exclude="$BENCH_EXCLUDE"} \
151158
${BENCH_IGNORE_PACKAGES:+--ignore-package="$BENCH_IGNORE_PACKAGES"} \
152-
--quiet \
159+
${TRIGGERED_BUILD:+--post-issues} \
160+
--quiet \
153161
-- "$TEST_ARGS" \
154162
|| exit_status=$?
155163

pkg/cmd/bazci/githubpost/githubpost.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -758,3 +758,12 @@ func postGeneralFailureImpl(logs string, fileIssue func(context.Context, Failure
758758
}
759759

760760
}
761+
762+
// MicrobenchmarkFailure creates a Failure struct for a microbenchmark failure.
763+
func MicrobenchmarkFailure(packageName string, benchmarkName string, logs string) Failure {
764+
return Failure{
765+
packageName: packageName,
766+
testName: benchmarkName,
767+
testMessage: logs,
768+
}
769+
}

pkg/cmd/roachprod-microbench/BUILD.bazel

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ go_library(
88
"compress.go",
99
"executor.go",
1010
"export.go",
11+
"github.go",
1112
"main.go",
1213
"metadata.go",
1314
"report.go",
@@ -18,6 +19,8 @@ go_library(
1819
importpath = "github.com/cockroachdb/cockroach/pkg/cmd/roachprod-microbench",
1920
visibility = ["//visibility:private"],
2021
deps = [
22+
"//pkg/cmd/bazci/githubpost",
23+
"//pkg/cmd/bazci/githubpost/issues",
2124
"//pkg/cmd/roachprod-microbench/cluster",
2225
"//pkg/cmd/roachprod-microbench/google",
2326
"//pkg/cmd/roachprod-microbench/model",
@@ -54,15 +57,19 @@ go_test(
5457
"compare_test.go",
5558
"executor_test.go",
5659
"export_test.go",
60+
"github_test.go",
5761
],
5862
data = glob(["testdata/**"]),
5963
embed = [":roachprod-microbench_lib"],
6064
deps = [
65+
"//pkg/cmd/bazci/githubpost/issues",
66+
"//pkg/cmd/roachprod-microbench/cluster",
6167
"//pkg/cmd/roachprod-microbench/model",
6268
"//pkg/cmd/roachprod-microbench/parser",
6369
"//pkg/testutils/datapathutils",
6470
"//pkg/util/timeutil",
6571
"@com_github_cockroachdb_datadriven//:datadriven",
72+
"@com_github_cockroachdb_errors//:errors",
6673
"@com_github_stretchr_testify//require",
6774
"@org_golang_x_exp//maps",
6875
],

pkg/cmd/roachprod-microbench/executor.go

Lines changed: 52 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ type executorConfig struct {
4444
affinity bool
4545
quiet bool
4646
recoverable bool
47+
postIssues bool
4748
}
4849

4950
type executor struct {
@@ -52,6 +53,13 @@ type executor struct {
5253
ignorePackages map[string]struct{}
5354
runOptions install.RunOptions
5455
log *logger.Logger
56+
postConfig postConfig
57+
}
58+
59+
type postConfig struct {
60+
branch string
61+
binary string
62+
commitSHA string
5563
}
5664

5765
type benchmark struct {
@@ -103,6 +111,18 @@ func newExecutor(config executorConfig) (*executor, error) {
103111
return nil, errors.New("iterations must be greater than 0")
104112
}
105113

114+
var pc postConfig
115+
if config.postIssues {
116+
pc = postConfig{
117+
branch: os.Getenv("GITHUB_BRANCH"),
118+
binary: os.Getenv("GITHUB_BINARY"),
119+
commitSHA: os.Getenv("GITHUB_SHA"),
120+
}
121+
if pc.branch == "" || pc.binary == "" || pc.commitSHA == "" {
122+
return nil, errors.New("GITHUB_BRANCH, GITHUB_BINARY, and GITHUB_SHA environment variables must be set when post-issues is enabled")
123+
}
124+
}
125+
106126
roachprodConfig.Quiet = config.quiet
107127
timestamp := timeutil.Now()
108128
l := InitLogger(filepath.Join(config.outputDir, fmt.Sprintf("roachprod-microbench-%s.log", timestamp.Format(util.TimeFormat))))
@@ -114,6 +134,7 @@ func newExecutor(config executorConfig) (*executor, error) {
114134
ignorePackages: ignorePackages,
115135
runOptions: runOptions,
116136
log: l,
137+
postConfig: pc,
117138
}, nil
118139
}
119140

@@ -265,6 +286,21 @@ func (e *executor) generateBenchmarkCommands(
265286
binaryKeys := maps.Keys(e.binaries)
266287
sort.Strings(binaryKeys)
267288

289+
// If post issues is enabled, move the post config binary key to the front
290+
// of the binary keys list to ensure it runs first. Since we might only run
291+
// one iteration before cancelling the other iterations, we want to report
292+
// the failure as soon as possible.
293+
if e.postIssues {
294+
for i, key := range binaryKeys {
295+
if key == e.postConfig.binary {
296+
// Move the key to front by removing it and inserting at index 0
297+
copy(binaryKeys[1:i+1], binaryKeys[0:i])
298+
binaryKeys[0] = e.postConfig.binary
299+
break
300+
}
301+
}
302+
}
303+
268304
// Generate the commands for each benchmark binary.
269305
for _, bench := range benchmarks {
270306
runCommand := fmt.Sprintf("./run.sh %s -test.benchmem -test.bench=^%s$ -test.run=^$ -test.v",
@@ -283,6 +319,7 @@ func (e *executor) generateBenchmarkCommands(
283319
command := cluster.RemoteCommand{
284320
Args: []string{"sh", "-c", shellCommand},
285321
Metadata: benchmarkKey{bench, key},
322+
GroupID: fmt.Sprintf("%s/%s", bench.pkg, bench.name),
286323
}
287324
benchmarkCommands = append(benchmarkCommands, command)
288325
}
@@ -314,6 +351,7 @@ func (e *executor) generateBenchmarkCommands(
314351
// corresponding microbenchmark. When running in lenient mode errors will not
315352
// fail the execution, and will still be logged to the aforementioned logs.
316353
func (e *executor) executeBenchmarks() error {
354+
var executorError error
317355

318356
// Remote execution Logging is captured and saved to appropriate log files and
319357
// the main logger is used for orchestration logging only. Therefore, we use a
@@ -387,7 +425,6 @@ func (e *executor) executeBenchmarks() error {
387425
}
388426

389427
// Execute commands.
390-
errorCount := 0
391428
logIndex := 0
392429
missingBenchmarks := make(map[benchmark]int, 0)
393430
failedBenchmarks := make(map[benchmark]int, 0)
@@ -410,14 +447,25 @@ func (e *executor) executeBenchmarks() error {
410447
fmt.Println()
411448
}
412449
tag := fmt.Sprintf("%d", logIndex)
450+
timeout := false
413451
if response.ExitStatus == 124 || response.ExitStatus == 137 {
414452
tag = fmt.Sprintf("%d-timeout", logIndex)
453+
timeout = true
415454
}
416455
err = report.writeBenchmarkErrorLogs(response, tag)
417456
if err != nil {
418457
e.log.Errorf("Failed to write error logs - %v", err)
419458
}
420-
errorCount++
459+
460+
if e.postIssues && benchmarkResponse.key == e.postConfig.binary {
461+
artifactsDir := fmt.Sprintf("%s/%s", e.outputDir, benchmarkResponse.key)
462+
formatter, req := createBenchmarkPostRequest(artifactsDir, response, timeout)
463+
err = postBenchmarkIssue(context.Background(), e.log, formatter, req)
464+
if err != nil {
465+
e.log.Errorf("Failed to post benchmark issue - %v", err)
466+
executorError = errors.CombineErrors(executorError, errors.Wrap(err, "failed to post benchmark issue"))
467+
}
468+
}
421469
logIndex++
422470
}
423471
if _, writeErr := report.analyticsOutput[benchmarkResponse.pkg].WriteString(
@@ -456,7 +504,7 @@ func (e *executor) executeBenchmarks() error {
456504
for res, count := range missingBenchmarks {
457505
e.log.Errorf("Missing benchmark: %s/%s in %d iterations", res.pkg, res.name, count)
458506
}
459-
460507
e.log.Printf("Completed benchmarks, results located at %s", e.outputDir)
461-
return nil
508+
509+
return executorError
462510
}
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
// Copyright 2025 The Cockroach Authors.
2+
//
3+
// Use of this software is governed by the CockroachDB Software License
4+
// included in the /LICENSE file.
5+
//
6+
7+
package main
8+
9+
import (
10+
"context"
11+
"fmt"
12+
"strings"
13+
"time"
14+
15+
"github.com/cockroachdb/cockroach/pkg/cmd/bazci/githubpost"
16+
"github.com/cockroachdb/cockroach/pkg/cmd/bazci/githubpost/issues"
17+
"github.com/cockroachdb/cockroach/pkg/cmd/roachprod-microbench/cluster"
18+
"github.com/cockroachdb/cockroach/pkg/roachprod/logger"
19+
)
20+
21+
// createBenchmarkPostRequest creates a post request for a benchmark failure.
22+
func createBenchmarkPostRequest(
23+
artifactsDir string, response cluster.RemoteResponse, timeout bool,
24+
) (issues.IssueFormatter, issues.PostRequest) {
25+
b := response.Metadata.(benchmarkKey).benchmark
26+
var combinedOutput strings.Builder
27+
if timeout {
28+
combinedOutput.WriteString(fmt.Sprintf("%s timed out after %s\n", b.name, response.Duration.Round(time.Second)))
29+
} else {
30+
combinedOutput.WriteString(response.Stdout)
31+
combinedOutput.WriteString("\n")
32+
combinedOutput.WriteString(response.Stderr)
33+
}
34+
35+
f := githubpost.MicrobenchmarkFailure(
36+
b.pkg,
37+
b.name,
38+
combinedOutput.String(),
39+
)
40+
formatter, req := githubpost.DefaultFormatter(context.Background(), f)
41+
req.Artifacts = artifactsDir
42+
req.Labels = append(req.Labels, "O-microbench")
43+
return formatter, req
44+
}
45+
46+
// postBenchmarkIssue posts a benchmark issue to github.
47+
func postBenchmarkIssue(
48+
ctx context.Context, l *logger.Logger, formatter issues.IssueFormatter, req issues.PostRequest,
49+
) error {
50+
opts := issues.DefaultOptionsFromEnv()
51+
_, err := issues.Post(ctx, l, formatter, req, opts)
52+
return err
53+
}
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
// Copyright 2025 The Cockroach Authors.
2+
//
3+
// Use of this software is governed by the CockroachDB Software License
4+
// included in the /LICENSE file.
5+
//
6+
7+
package main
8+
9+
import (
10+
"fmt"
11+
"net/url"
12+
"strings"
13+
"testing"
14+
"time"
15+
16+
"github.com/cockroachdb/cockroach/pkg/cmd/bazci/githubpost/issues"
17+
"github.com/cockroachdb/cockroach/pkg/cmd/roachprod-microbench/cluster"
18+
"github.com/cockroachdb/cockroach/pkg/testutils/datapathutils"
19+
"github.com/cockroachdb/datadriven"
20+
"github.com/cockroachdb/errors"
21+
)
22+
23+
func TestCreatePostRequest(t *testing.T) {
24+
datadriven.Walk(t, datapathutils.TestDataPath(t, "github"), func(t *testing.T, path string) {
25+
var response cluster.RemoteResponse
26+
var bk benchmarkKey
27+
datadriven.RunTest(t, path, func(t *testing.T, d *datadriven.TestData) string {
28+
switch d.Cmd {
29+
case "benchmark":
30+
d.ScanArgs(t, "name", &bk.name)
31+
d.ScanArgs(t, "pkg", &bk.pkg)
32+
d.ScanArgs(t, "args", &response.Args)
33+
return ""
34+
case "stdout":
35+
response.Stdout = d.Input
36+
return ""
37+
case "stderr":
38+
response.Stderr = d.Input
39+
return ""
40+
case "post":
41+
response.Err = errors.New("benchmark failed")
42+
response.ExitStatus = 1
43+
response.Duration = time.Second * 10
44+
response.Metadata = bk
45+
formatter, req := createBenchmarkPostRequest("", response, false)
46+
str, err := formatPostRequest(formatter, req)
47+
if err != nil {
48+
t.Fatal(err)
49+
}
50+
return str
51+
}
52+
return ""
53+
})
54+
})
55+
}
56+
57+
// formatPostRequest emulates the behavior of the githubpost package.
58+
func formatPostRequest(formatter issues.IssueFormatter, req issues.PostRequest) (string, error) {
59+
// These fields can vary based on the test env so we set them to arbitrary
60+
// values here.
61+
req.MentionOnCreate = []string{"@test-eng"}
62+
req.ProjectColumnID = 0
63+
64+
data := issues.TemplateData{
65+
PostRequest: req,
66+
Parameters: req.ExtraParams,
67+
CondensedMessage: issues.CondensedMessage(req.Message),
68+
PackageNameShort: req.PackageName,
69+
}
70+
71+
r := &issues.Renderer{}
72+
if err := formatter.Body(r, data); err != nil {
73+
return "", err
74+
}
75+
76+
var post strings.Builder
77+
post.WriteString(r.String())
78+
79+
u, err := url.Parse("https://github.com/cockroachdb/cockroach/issues/new")
80+
if err != nil {
81+
return "", err
82+
}
83+
q := u.Query()
84+
q.Add("title", formatter.Title(data))
85+
q.Add("body", post.String())
86+
// Adding a template parameter is required to be able to view the rendered
87+
// template on GitHub, otherwise it just takes you to the template selection
88+
// page.
89+
q.Add("template", "none")
90+
u.RawQuery = q.Encode()
91+
post.WriteString(fmt.Sprintf("Rendered:\n%s", u.String()))
92+
93+
return post.String(), nil
94+
}

pkg/cmd/roachprod-microbench/main.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ func makeRunCommand() *cobra.Command {
116116
cmd.Flags().BoolVar(&config.affinity, "affinity", config.affinity, "run benchmarks with each iteration's binaries having affinity to the same node, while different iterations can run on different nodes")
117117
cmd.Flags().BoolVar(&config.quiet, "quiet", config.quiet, "suppress roachprod progress output")
118118
cmd.Flags().BoolVar(&config.recoverable, "recoverable", config.recoverable, "VMs are able to recover from transient failures (e.g., running spot instances on a MIG in GCE)")
119+
cmd.Flags().BoolVar(&config.postIssues, "post-issues", config.postIssues, "post issues to github (requires env vars for github issues to be set)")
119120
return cmd
120121
}
121122

0 commit comments

Comments
 (0)