Skip to content

Commit 0eb1ca1

Browse files
committed
roachtest: extend github issue poster to include ip node mapping
Quality of life improvement, previously to find this information, you would need to find the name of the cluster in test.log, then use that cluster name to find it's cluster creation log in artifacts_dir/_runner-logs/cluster-create. Now this information will be available in the github issue. Resolves #138356 Epic: None Release note: None
1 parent c9eda2b commit 0eb1ca1

16 files changed

+634
-51
lines changed

pkg/cmd/bazci/githubpost/issues/condense.go

Lines changed: 37 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,13 @@ type FatalNodeRoachtest struct {
5555
FatalLogs string
5656
}
5757

58+
// NodeToIpMappingRoachtest contains the node to ip mapping from a roachtest
59+
// cluster
60+
type NodeToIpMappingRoachtest struct {
61+
Message,
62+
NodeToIpMapping string
63+
}
64+
5865
// A CondensedMessage is a test log output garnished with useful helper methods
5966
// that extract concise information for seamless debugging.
6067
type CondensedMessage string
@@ -67,7 +74,16 @@ var fatalRE = regexp.MustCompile(`(?ms)(^F\d{6}.*?\n)(goroutine \d+.*?\n)\n`)
6774
var crasherRE = regexp.MustCompile(`(?s)( *rsg_test.go:\d{3}: Crash detected:.*?\n)(.*?;\n)`)
6875
var reproRE = regexp.MustCompile(`(?s)( *rsg_test.go:\d{3}: To reproduce, use schema:)`)
6976

70-
var roachtestNodeFatalRE = regexp.MustCompile(`(?ms)\A(.*?\n)((?:^F\d{6}\b[^\n]*(?:\n|$))+)`)
77+
var nodeFatalRoachtestRE = regexp.MustCompile(`(?ms)\A(.*?\n)((?:^F\d{6}\b[^\n]*(?:\n|$))+)`)
78+
79+
// nodeToIpRoachtestRE matches an entire Markdown table block
80+
// - including header, separator, and data rows:
81+
// - handles an arbitrary number of columns
82+
//
83+
// | Node | Public IP | Private IP |
84+
// | --- | --- | --- |
85+
// | node-0001 | 1.1.1.0 | 1.1.1.1 |
86+
var nodeToIpRoachtestRE = regexp.MustCompile(`(?m)^[[:space:]]*\|(?:[^|\n]*\|)+[[:space:]]*\n^[[:space:]]*\|(?:[[:space:]]*:?-{3,}:?[[:space:]]*\|)+[[:space:]]*\n(?:^[[:space:]]*\|(?:[^|\n]*\|)+[[:space:]]*(?:\n|$))+`)
7187

7288
// FatalOrPanic constructs a FatalOrPanic. If no fatal or panic occurred in the
7389
// test, ok=false is returned.
@@ -112,17 +128,35 @@ func (s CondensedMessage) RSGCrash(lineLimit int) (c RSGCrash, ok bool) {
112128
// ok=false is returned
113129
func (s CondensedMessage) FatalNodeRoachtest() (fnr FatalNodeRoachtest, ok bool) {
114130
ss := string(s)
115-
if matches := roachtestNodeFatalRE.FindStringSubmatchIndex(ss); matches != nil {
131+
if matches := nodeFatalRoachtestRE.FindStringSubmatchIndex(ss); matches != nil {
116132
if len(matches) != 6 {
117133
return FatalNodeRoachtest{}, false
118134
}
119-
fnr.Message = ss[matches[2] : matches[3]-1]
135+
msg := ss[matches[2]:matches[3]]
136+
fnr.Message = strings.TrimRight(msg, "\n")
120137
fnr.FatalLogs = ss[matches[4]:matches[5]]
121138
return fnr, true
122139
}
123140
return FatalNodeRoachtest{}, false
124141
}
125142

143+
// NodeToIpMappingRoachtest constructs a NodeToIpMappingRoachtest which is
144+
// used to construct an issue that contains cluster information
145+
func (s CondensedMessage) NodeToIpMappingRoachtest() (nodeIpMap NodeToIpMappingRoachtest, ok bool) {
146+
ss := string(s)
147+
if matches := nodeToIpRoachtestRE.FindStringSubmatchIndex(ss); matches != nil {
148+
if len(matches) != 2 {
149+
// only expecting a single match
150+
return NodeToIpMappingRoachtest{}, false
151+
}
152+
msg := ss[0:matches[0]]
153+
nodeIpMap.Message = strings.TrimRight(msg, "\n")
154+
nodeIpMap.NodeToIpMapping = ss[matches[0]:matches[1]]
155+
return nodeIpMap, true
156+
}
157+
return NodeToIpMappingRoachtest{}, false
158+
}
159+
126160
// String calls .Digest(30).
127161
func (s CondensedMessage) String() string {
128162
return s.Digest(30)

pkg/cmd/bazci/githubpost/issues/formatter_unit.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,17 @@ func (unitTestFormatterTyp) Body(r *Renderer, data TemplateData) error {
7474
r.CodeBlock("", fnr.Message)
7575
r.Escaped("Fatal entries found in Cockroach logs:")
7676
r.CodeBlock("", fnr.FatalLogs)
77+
if nodeIpMap, ok := data.CondensedMessage.NodeToIpMappingRoachtest(); ok {
78+
r.Escaped("Cluster Node to Ip Mapping:")
79+
r.nl()
80+
r.Escaped(nodeIpMap.NodeToIpMapping)
81+
}
82+
} else if nodeIpMap, ok := data.CondensedMessage.NodeToIpMappingRoachtest(); ok {
83+
r.Escaped("Failed with:")
84+
r.CodeBlock("", nodeIpMap.Message)
85+
r.Escaped("Cluster Node to Ip Mapping:")
86+
r.nl()
87+
r.Escaped(nodeIpMap.NodeToIpMapping)
7788
} else {
7889
r.CodeBlock("", data.CondensedMessage.Digest(50))
7990
}

pkg/cmd/roachtest/cluster.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -797,7 +797,7 @@ type clusterFactory struct {
797797
// counter is incremented with every new cluster. It's used as part of the cluster's name.
798798
// Accessed atomically.
799799
counter atomic.Uint64
800-
// The registry with whom all clustered will be registered.
800+
// The registry with whom all clusters will be registered.
801801
r *clusterRegistry
802802
// artifactsDir is the directory in which the cluster creation log file will be placed.
803803
artifactsDir string
@@ -977,7 +977,7 @@ func (f *clusterFactory) newCluster(
977977
if i > 1 {
978978
retryStr = "-retry" + strconv.Itoa(i-1)
979979
}
980-
logPath := filepath.Join(f.artifactsDir, runnerLogsDir, "cluster-create", genName+retryStr+".log")
980+
logPath := filepath.Join(f.artifactsDir, runnerLogsDir, clusterCreateDir, genName+retryStr+".log")
981981
l, err := logger.RootLogger(logPath, teeOpt)
982982
if err != nil {
983983
log.Fatalf("%v", err)

pkg/cmd/roachtest/github_test.go

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@ func TestCreatePostRequest(t *testing.T) {
121121
type githubIssueOpts struct {
122122
failures []failure
123123
loadTeamsFailed bool
124+
message string
124125
}
125126

126127
datadriven.Walk(t, datapathutils.TestDataPath(t, "github"), func(t *testing.T, path string) {
@@ -168,7 +169,7 @@ func TestCreatePostRequest(t *testing.T) {
168169
// on where this test is run and prone to flaking.
169170
fmt.Fprintf(&b, "%v", f.squashedErr)
170171
}
171-
message := b.String()
172+
message := b.String() + testCase.message
172173

173174
params := getTestParameters(ti, issueInfo.cluster, issueInfo.vmCreateOpts)
174175
req, err := github.createPostRequest(
@@ -225,10 +226,7 @@ func TestCreatePostRequest(t *testing.T) {
225226
// Lose the error object which should make our flake detection fail.
226227
refError = errors.Newf("%s", redact.SafeString(refError.Error()))
227228
case "node-fatal":
228-
refError = errors.Newf(`(monitor.go:267).Wait: monitor failure: dial tcp 127.0.0.1:29000: connect: connection refused
229-
test artifacts and logs in: artifacts/roachtest/manual/monitor/test-failure/node-fatal-explicit-monitor/cpu_arch=arm64/run_1
230-
F250826 19:49:07.194443 3106 sql/sem/builtins/builtins.go:6063 ⋮ [T1,Vsystem,n1,client=127.0.0.1:54552,hostssl,user=‹roachprod›] 250 force_log_fatal(): ‹oops›
231-
`)
229+
refError = errors.Newf(`(monitor.go:267).Wait: monitor failure: dial tcp 127.0.0.1:29000: connect: connection refused`)
232230
}
233231
}
234232
}
@@ -251,6 +249,18 @@ F250826 19:49:07.194443 3106 sql/sem/builtins/builtins.go:6063 ⋮ [T1,Vsystem,n
251249
ti.spec.CockroachBinary = registry.RuntimeAssertionsCockroach
252250
case "set-coverage-enabled-build":
253251
ti.goCoverEnabled = true
252+
case "add-additional-info":
253+
msg_type := d.CmdArgs[0].Vals[0]
254+
switch msg_type {
255+
case "ip-node-info":
256+
testCase.message = fmt.Sprintf("%s\n%s", testCase.message, `| Node | Public IP | Private IP |
257+
| --- | --- | --- |
258+
| teamcity-1758834520-01-n1cpu4-0001 | 34.139.44.53 | 10.142.0.2 |`)
259+
case "fatal-logs":
260+
testCase.message = fmt.Sprintf("%s\n%s", testCase.message, `F250826 19:49:07.194443 3106 sql/sem/builtins/builtins.go:6063 ⋮ [T1,Vsystem,n1,client=127.0.0.1:54552,hostssl,user=‹roachprod›] 250 force_log_fatal(): ‹oops›`)
261+
default:
262+
return fmt.Sprintf("unknown additional info argument: %s", msg_type)
263+
}
254264
}
255265

256266
return "ok"

pkg/cmd/roachtest/main.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,10 @@ const (
5656
// runnerLogsDir is the dir under the artifacts root where the test runner log
5757
// and other runner-related logs (i.e. cluster creation logs) will be written.
5858
runnerLogsDir = "_runner-logs"
59+
60+
// clusterCreateDir is the dir under runnerLogsDir where cluster creation
61+
// related logs will be written
62+
clusterCreateDir = "cluster-create"
5963
)
6064

6165
func main() {

pkg/cmd/roachtest/roachtestutil/utils.go

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -395,3 +395,42 @@ var ClusterSettingRetryOpts = retry.Options{
395395
MaxBackoff: 5 * time.Second,
396396
MaxRetries: 5,
397397
}
398+
399+
var (
400+
errEmptyTableData = errors.New("empty table data")
401+
errMismatchedCols = errors.New("row has mismatched number of columns")
402+
)
403+
404+
// ToMarkdownTable renders a 2D list of strings as a Markdown table. Assumes
405+
// header is the first entry.
406+
//
407+
// Example:
408+
//
409+
// | Name | Age | City |
410+
// | --- | --- | --- |
411+
// | Alice | 30 | New York |
412+
// | Bob | 25 | San Francisco |
413+
// | Charlie | 35 | Chicago |
414+
func ToMarkdownTable(data [][]string) (string, error) {
415+
if len(data) == 0 || len(data[0]) == 0 {
416+
return "", errEmptyTableData
417+
}
418+
var sb strings.Builder
419+
// Write header row
420+
sb.WriteString("| " + strings.Join(data[0], " | ") + " |\n")
421+
// Write separator row (--- under each header)
422+
separators := make([]string, len(data[0]))
423+
for i := range separators {
424+
separators[i] = "---"
425+
}
426+
sb.WriteString("| " + strings.Join(separators, " | ") + " |\n")
427+
// Write remaining rows
428+
for i, row := range data[1:] {
429+
if len(data[0]) != len(row) {
430+
return "", fmt.Errorf("%w: row %d has %d columns, expected %d",
431+
errMismatchedCols, i, len(row), len(data[0]))
432+
}
433+
sb.WriteString("| " + strings.Join(row, " | ") + " |\n")
434+
}
435+
return sb.String(), nil
436+
}

pkg/cmd/roachtest/roachtestutil/utils_test.go

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import (
1111

1212
"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/option"
1313
"github.com/stretchr/testify/assert"
14+
"github.com/stretchr/testify/require"
1415
)
1516

1617
func TestCmdLogFileName(t *testing.T) {
@@ -27,3 +28,54 @@ func TestCmdLogFileName(t *testing.T) {
2728
cmdLogFileName(ts, nodes, "./cockroach bla --foo bar"),
2829
)
2930
}
31+
32+
func TestToMarkdownTable(t *testing.T) {
33+
tests := []struct {
34+
name string
35+
input [][]string
36+
expectedOut string
37+
expectedErr error
38+
}{
39+
{
40+
name: "valid table",
41+
input: [][]string{
42+
{"Name", "Age", "City"},
43+
{"Alice", "30", "New York"},
44+
{"Bob", "25", "San Francisco"},
45+
},
46+
expectedOut: `| Name | Age | City |
47+
| --- | --- | --- |
48+
| Alice | 30 | New York |
49+
| Bob | 25 | San Francisco |
50+
`,
51+
expectedErr: nil,
52+
},
53+
{
54+
name: "empty data",
55+
input: [][]string{},
56+
expectedOut: "",
57+
expectedErr: errEmptyTableData,
58+
},
59+
{
60+
name: "row with wrong number of columns",
61+
input: [][]string{
62+
{"Name", "Age", "City"},
63+
{"Alice", "30"},
64+
},
65+
expectedOut: "",
66+
expectedErr: errMismatchedCols,
67+
},
68+
}
69+
70+
for _, tt := range tests {
71+
t.Run(tt.name, func(t *testing.T) {
72+
out, err := ToMarkdownTable(tt.input)
73+
if tt.expectedErr != nil {
74+
require.ErrorIs(t, err, tt.expectedErr, "expected %v, got %v", tt.expectedErr, err)
75+
} else {
76+
require.NoError(t, err)
77+
require.Equal(t, tt.expectedOut, out)
78+
}
79+
})
80+
}
81+
}

pkg/cmd/roachtest/test_impl.go

Lines changed: 40 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -141,9 +141,13 @@ type testImpl struct {
141141
// in case github issue posting is disabled.
142142
extraParams map[string]string
143143

144-
// githubMessage contains additional message information that will be
145-
// passed to github.MaybePost
146-
githubMessage string
144+
// githubFatalLogs contains node fatal logs that will be passed to
145+
// github.MaybePost
146+
githubFatalLogs string
147+
148+
// githubIpToNodeMapping contains the ip to node map that will be passed to
149+
// github.MaybePost
150+
githubIpToNodeMapping string
147151
}
148152
// Map from version to path to the cockroach binary to be used when
149153
// mixed-version test wants a binary for that binary. If a particular version
@@ -562,16 +566,45 @@ func (t *testImpl) failureMsg() string {
562566
return b.String()
563567
}
564568

565-
func (t *testImpl) getGithubMessage() string {
569+
func (t *testImpl) getGithubFatalLogs() string {
566570
t.mu.RLock()
567571
defer t.mu.RUnlock()
568-
return t.mu.githubMessage
572+
return t.mu.githubFatalLogs
569573
}
570574

571-
func (t *testImpl) appendGithubMessage(msg string) {
575+
func (t *testImpl) appendGithubFatalLogs(msg string) {
572576
t.mu.Lock()
573577
defer t.mu.Unlock()
574-
t.mu.githubMessage += msg
578+
t.mu.githubFatalLogs += msg
579+
}
580+
581+
func (t *testImpl) getGithubIpToNodeMapping() string {
582+
t.mu.RLock()
583+
defer t.mu.RUnlock()
584+
return t.mu.githubIpToNodeMapping
585+
}
586+
587+
func (t *testImpl) appendGithubIpToNodeMapping(msg string) {
588+
t.mu.Lock()
589+
defer t.mu.Unlock()
590+
t.mu.githubIpToNodeMapping += msg
591+
}
592+
593+
// getGithubMessage abstracts details on how the Github message is constructed
594+
// TODO(wchoe): Currently, all information we want to include in the Github
595+
// message must be passed in as a single string. This is not ideal, especially
596+
// as we add more information or want to format based on the failure or what
597+
// information is available. #157021 aims to address this.
598+
func (t *testImpl) getGithubMessage(failureMsg string) string {
599+
githubMsg := failureMsg
600+
if githubFatalLogs := t.getGithubFatalLogs(); githubFatalLogs != "" {
601+
githubMsg = fmt.Sprintf("%s\n%s", githubMsg, githubFatalLogs)
602+
}
603+
if githubIpToNodeMapping := t.getGithubIpToNodeMapping(); githubIpToNodeMapping != "" {
604+
githubMsg = fmt.Sprintf("%s\n%s", githubMsg, githubIpToNodeMapping)
605+
}
606+
t.L().Printf("github message: %s", githubMsg)
607+
return githubMsg
575608
}
576609

577610
// failuresMatchingError checks whether the first error in trees of

0 commit comments

Comments
 (0)