Skip to content

Commit eb2dbf6

Browse files
craig[bot]williamchoe3kyle-a-wong
committed
153962: roachtest: extend github issue poster to include ip node mapping r=srosenberg,DarrylWong a=williamchoe3 Resolves #138356 #### Motivation Quality of life improvement, previously to find this information, you would need to find the name of the cluster in`test.log`, then use that cluster name to find it's cluster creation log in `{artifacts_dir}/_runner-logs/cluster-create`. #### Approach Get's node ip info from roachprod's `cloud.Cluster` ~~Similar to #151850 ~~In the test cleanup phase in `inspectArtifacts` added a new function `gatherNodeIpMapping` which is best effort to gather the cluster information in the log.~~ * ~~First we need to find the right log file given that there could be retries for cluster creation which creates additional log files. Given the naming convention, I opted to the just sort the log files that contained the cluster name. I realized a bit later though that because lexicographical sorting on numerical strings, this approach only works if retry attempts is <10. I was trying to avoid unnecessary string parsing if not needed. Also I would assume cluster retry attempts >=10 would be rare / too high of a retry limit to be ever set.~~ After finding the correct file, use regex to find the string in the log. Then store it in `test_impl`, then pass to `issues.Post` and I added a new case for parsing out the IP table * The regex is flexible to number of columns so we can change the table fields without having to modify the regex Adds Cluster Node IP information to github issue e.g. ``` | Node | Private IP | Public IP | Machine Type | | --- | --- | --- | --- | | willchoe-1758834520-01-n1cpu4-0001 | 10.142.0.2 | 34.139.44.53 | n2-standard-4 | ``` * Opted for keep the VM name as is vs. replacing with something like `n1`. My thinking was that if people wanted to reference the vm in logs, it would be nice to keep the names consistent, but if folks want this as n1 / the vm name isn't helpful then happy to change to `n1`, etc. Also added a new `node-ips.log` that will also contain this table #### Notes Saw "cluster-create" being used as a magic string so created a const for it `clusterCreateDir = "cluster-create"` #### Verification Added datadriven test Manual Verification Verified the renderer in `issues` was formatting the new code block correctly in debug mode, holding off on generating more debug github issues, but can if folks want to see 156489: sql, sqlstats: Refactor Statement Recording r=kyle-a-wong a=kyle-a-wong A new RecordedStatementStatsBuilder was added to the sql package to make building a RecordedStmtStats struct easier. New interfaces were also added to sqlstats to make make building RecordedStmtStats easier. These new interfaces and builders should help to decouple Recording statement stats from the conn executor, allowing them to be recorded in other places as well Epic: None Release note: None Co-authored-by: William Choe <[email protected]> Co-authored-by: Kyle Wong <[email protected]>
3 parents 1781fc1 + 0eb1ca1 + 5308d41 commit eb2dbf6

24 files changed

+981
-162
lines changed

pkg/cmd/bazci/githubpost/issues/condense.go

Lines changed: 37 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,13 @@ type FatalNodeRoachtest struct {
5555
FatalLogs string
5656
}
5757

58+
// NodeToIpMappingRoachtest contains the node to ip mapping from a roachtest
59+
// cluster
60+
type NodeToIpMappingRoachtest struct {
61+
Message,
62+
NodeToIpMapping string
63+
}
64+
5865
// A CondensedMessage is a test log output garnished with useful helper methods
5966
// that extract concise information for seamless debugging.
6067
type CondensedMessage string
@@ -67,7 +74,16 @@ var fatalRE = regexp.MustCompile(`(?ms)(^F\d{6}.*?\n)(goroutine \d+.*?\n)\n`)
6774
var crasherRE = regexp.MustCompile(`(?s)( *rsg_test.go:\d{3}: Crash detected:.*?\n)(.*?;\n)`)
6875
var reproRE = regexp.MustCompile(`(?s)( *rsg_test.go:\d{3}: To reproduce, use schema:)`)
6976

70-
var roachtestNodeFatalRE = regexp.MustCompile(`(?ms)\A(.*?\n)((?:^F\d{6}\b[^\n]*(?:\n|$))+)`)
77+
var nodeFatalRoachtestRE = regexp.MustCompile(`(?ms)\A(.*?\n)((?:^F\d{6}\b[^\n]*(?:\n|$))+)`)
78+
79+
// nodeToIpRoachtestRE matches an entire Markdown table block
80+
// - including header, separator, and data rows:
81+
// - handles an arbitrary number of columns
82+
//
83+
// | Node | Public IP | Private IP |
84+
// | --- | --- | --- |
85+
// | node-0001 | 1.1.1.0 | 1.1.1.1 |
86+
var nodeToIpRoachtestRE = regexp.MustCompile(`(?m)^[[:space:]]*\|(?:[^|\n]*\|)+[[:space:]]*\n^[[:space:]]*\|(?:[[:space:]]*:?-{3,}:?[[:space:]]*\|)+[[:space:]]*\n(?:^[[:space:]]*\|(?:[^|\n]*\|)+[[:space:]]*(?:\n|$))+`)
7187

7288
// FatalOrPanic constructs a FatalOrPanic. If no fatal or panic occurred in the
7389
// test, ok=false is returned.
@@ -112,17 +128,35 @@ func (s CondensedMessage) RSGCrash(lineLimit int) (c RSGCrash, ok bool) {
112128
// ok=false is returned
113129
func (s CondensedMessage) FatalNodeRoachtest() (fnr FatalNodeRoachtest, ok bool) {
114130
ss := string(s)
115-
if matches := roachtestNodeFatalRE.FindStringSubmatchIndex(ss); matches != nil {
131+
if matches := nodeFatalRoachtestRE.FindStringSubmatchIndex(ss); matches != nil {
116132
if len(matches) != 6 {
117133
return FatalNodeRoachtest{}, false
118134
}
119-
fnr.Message = ss[matches[2] : matches[3]-1]
135+
msg := ss[matches[2]:matches[3]]
136+
fnr.Message = strings.TrimRight(msg, "\n")
120137
fnr.FatalLogs = ss[matches[4]:matches[5]]
121138
return fnr, true
122139
}
123140
return FatalNodeRoachtest{}, false
124141
}
125142

143+
// NodeToIpMappingRoachtest constructs a NodeToIpMappingRoachtest which is
144+
// used to construct an issue that contains cluster information
145+
func (s CondensedMessage) NodeToIpMappingRoachtest() (nodeIpMap NodeToIpMappingRoachtest, ok bool) {
146+
ss := string(s)
147+
if matches := nodeToIpRoachtestRE.FindStringSubmatchIndex(ss); matches != nil {
148+
if len(matches) != 2 {
149+
// only expecting a single match
150+
return NodeToIpMappingRoachtest{}, false
151+
}
152+
msg := ss[0:matches[0]]
153+
nodeIpMap.Message = strings.TrimRight(msg, "\n")
154+
nodeIpMap.NodeToIpMapping = ss[matches[0]:matches[1]]
155+
return nodeIpMap, true
156+
}
157+
return NodeToIpMappingRoachtest{}, false
158+
}
159+
126160
// String calls .Digest(30).
127161
func (s CondensedMessage) String() string {
128162
return s.Digest(30)

pkg/cmd/bazci/githubpost/issues/formatter_unit.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,17 @@ func (unitTestFormatterTyp) Body(r *Renderer, data TemplateData) error {
7474
r.CodeBlock("", fnr.Message)
7575
r.Escaped("Fatal entries found in Cockroach logs:")
7676
r.CodeBlock("", fnr.FatalLogs)
77+
if nodeIpMap, ok := data.CondensedMessage.NodeToIpMappingRoachtest(); ok {
78+
r.Escaped("Cluster Node to Ip Mapping:")
79+
r.nl()
80+
r.Escaped(nodeIpMap.NodeToIpMapping)
81+
}
82+
} else if nodeIpMap, ok := data.CondensedMessage.NodeToIpMappingRoachtest(); ok {
83+
r.Escaped("Failed with:")
84+
r.CodeBlock("", nodeIpMap.Message)
85+
r.Escaped("Cluster Node to Ip Mapping:")
86+
r.nl()
87+
r.Escaped(nodeIpMap.NodeToIpMapping)
7788
} else {
7889
r.CodeBlock("", data.CondensedMessage.Digest(50))
7990
}

pkg/cmd/roachtest/cluster.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -797,7 +797,7 @@ type clusterFactory struct {
797797
// counter is incremented with every new cluster. It's used as part of the cluster's name.
798798
// Accessed atomically.
799799
counter atomic.Uint64
800-
// The registry with whom all clustered will be registered.
800+
// The registry with whom all clusters will be registered.
801801
r *clusterRegistry
802802
// artifactsDir is the directory in which the cluster creation log file will be placed.
803803
artifactsDir string
@@ -977,7 +977,7 @@ func (f *clusterFactory) newCluster(
977977
if i > 1 {
978978
retryStr = "-retry" + strconv.Itoa(i-1)
979979
}
980-
logPath := filepath.Join(f.artifactsDir, runnerLogsDir, "cluster-create", genName+retryStr+".log")
980+
logPath := filepath.Join(f.artifactsDir, runnerLogsDir, clusterCreateDir, genName+retryStr+".log")
981981
l, err := logger.RootLogger(logPath, teeOpt)
982982
if err != nil {
983983
log.Fatalf("%v", err)

pkg/cmd/roachtest/github_test.go

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@ func TestCreatePostRequest(t *testing.T) {
121121
type githubIssueOpts struct {
122122
failures []failure
123123
loadTeamsFailed bool
124+
message string
124125
}
125126

126127
datadriven.Walk(t, datapathutils.TestDataPath(t, "github"), func(t *testing.T, path string) {
@@ -168,7 +169,7 @@ func TestCreatePostRequest(t *testing.T) {
168169
// on where this test is run and prone to flaking.
169170
fmt.Fprintf(&b, "%v", f.squashedErr)
170171
}
171-
message := b.String()
172+
message := b.String() + testCase.message
172173

173174
params := getTestParameters(ti, issueInfo.cluster, issueInfo.vmCreateOpts)
174175
req, err := github.createPostRequest(
@@ -225,10 +226,7 @@ func TestCreatePostRequest(t *testing.T) {
225226
// Lose the error object which should make our flake detection fail.
226227
refError = errors.Newf("%s", redact.SafeString(refError.Error()))
227228
case "node-fatal":
228-
refError = errors.Newf(`(monitor.go:267).Wait: monitor failure: dial tcp 127.0.0.1:29000: connect: connection refused
229-
test artifacts and logs in: artifacts/roachtest/manual/monitor/test-failure/node-fatal-explicit-monitor/cpu_arch=arm64/run_1
230-
F250826 19:49:07.194443 3106 sql/sem/builtins/builtins.go:6063 ⋮ [T1,Vsystem,n1,client=127.0.0.1:54552,hostssl,user=‹roachprod›] 250 force_log_fatal(): ‹oops›
231-
`)
229+
refError = errors.Newf(`(monitor.go:267).Wait: monitor failure: dial tcp 127.0.0.1:29000: connect: connection refused`)
232230
}
233231
}
234232
}
@@ -251,6 +249,18 @@ F250826 19:49:07.194443 3106 sql/sem/builtins/builtins.go:6063 ⋮ [T1,Vsystem,n
251249
ti.spec.CockroachBinary = registry.RuntimeAssertionsCockroach
252250
case "set-coverage-enabled-build":
253251
ti.goCoverEnabled = true
252+
case "add-additional-info":
253+
msg_type := d.CmdArgs[0].Vals[0]
254+
switch msg_type {
255+
case "ip-node-info":
256+
testCase.message = fmt.Sprintf("%s\n%s", testCase.message, `| Node | Public IP | Private IP |
257+
| --- | --- | --- |
258+
| teamcity-1758834520-01-n1cpu4-0001 | 34.139.44.53 | 10.142.0.2 |`)
259+
case "fatal-logs":
260+
testCase.message = fmt.Sprintf("%s\n%s", testCase.message, `F250826 19:49:07.194443 3106 sql/sem/builtins/builtins.go:6063 ⋮ [T1,Vsystem,n1,client=127.0.0.1:54552,hostssl,user=‹roachprod›] 250 force_log_fatal(): ‹oops›`)
261+
default:
262+
return fmt.Sprintf("unknown additional info argument: %s", msg_type)
263+
}
254264
}
255265

256266
return "ok"

pkg/cmd/roachtest/main.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,10 @@ const (
5656
// runnerLogsDir is the dir under the artifacts root where the test runner log
5757
// and other runner-related logs (i.e. cluster creation logs) will be written.
5858
runnerLogsDir = "_runner-logs"
59+
60+
// clusterCreateDir is the dir under runnerLogsDir where cluster creation
61+
// related logs will be written
62+
clusterCreateDir = "cluster-create"
5963
)
6064

6165
func main() {

pkg/cmd/roachtest/roachtestutil/utils.go

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -395,3 +395,42 @@ var ClusterSettingRetryOpts = retry.Options{
395395
MaxBackoff: 5 * time.Second,
396396
MaxRetries: 5,
397397
}
398+
399+
var (
400+
errEmptyTableData = errors.New("empty table data")
401+
errMismatchedCols = errors.New("row has mismatched number of columns")
402+
)
403+
404+
// ToMarkdownTable renders a 2D list of strings as a Markdown table. Assumes
405+
// header is the first entry.
406+
//
407+
// Example:
408+
//
409+
// | Name | Age | City |
410+
// | --- | --- | --- |
411+
// | Alice | 30 | New York |
412+
// | Bob | 25 | San Francisco |
413+
// | Charlie | 35 | Chicago |
414+
func ToMarkdownTable(data [][]string) (string, error) {
415+
if len(data) == 0 || len(data[0]) == 0 {
416+
return "", errEmptyTableData
417+
}
418+
var sb strings.Builder
419+
// Write header row
420+
sb.WriteString("| " + strings.Join(data[0], " | ") + " |\n")
421+
// Write separator row (--- under each header)
422+
separators := make([]string, len(data[0]))
423+
for i := range separators {
424+
separators[i] = "---"
425+
}
426+
sb.WriteString("| " + strings.Join(separators, " | ") + " |\n")
427+
// Write remaining rows
428+
for i, row := range data[1:] {
429+
if len(data[0]) != len(row) {
430+
return "", fmt.Errorf("%w: row %d has %d columns, expected %d",
431+
errMismatchedCols, i, len(row), len(data[0]))
432+
}
433+
sb.WriteString("| " + strings.Join(row, " | ") + " |\n")
434+
}
435+
return sb.String(), nil
436+
}

pkg/cmd/roachtest/roachtestutil/utils_test.go

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import (
1111

1212
"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/option"
1313
"github.com/stretchr/testify/assert"
14+
"github.com/stretchr/testify/require"
1415
)
1516

1617
func TestCmdLogFileName(t *testing.T) {
@@ -27,3 +28,54 @@ func TestCmdLogFileName(t *testing.T) {
2728
cmdLogFileName(ts, nodes, "./cockroach bla --foo bar"),
2829
)
2930
}
31+
32+
func TestToMarkdownTable(t *testing.T) {
33+
tests := []struct {
34+
name string
35+
input [][]string
36+
expectedOut string
37+
expectedErr error
38+
}{
39+
{
40+
name: "valid table",
41+
input: [][]string{
42+
{"Name", "Age", "City"},
43+
{"Alice", "30", "New York"},
44+
{"Bob", "25", "San Francisco"},
45+
},
46+
expectedOut: `| Name | Age | City |
47+
| --- | --- | --- |
48+
| Alice | 30 | New York |
49+
| Bob | 25 | San Francisco |
50+
`,
51+
expectedErr: nil,
52+
},
53+
{
54+
name: "empty data",
55+
input: [][]string{},
56+
expectedOut: "",
57+
expectedErr: errEmptyTableData,
58+
},
59+
{
60+
name: "row with wrong number of columns",
61+
input: [][]string{
62+
{"Name", "Age", "City"},
63+
{"Alice", "30"},
64+
},
65+
expectedOut: "",
66+
expectedErr: errMismatchedCols,
67+
},
68+
}
69+
70+
for _, tt := range tests {
71+
t.Run(tt.name, func(t *testing.T) {
72+
out, err := ToMarkdownTable(tt.input)
73+
if tt.expectedErr != nil {
74+
require.ErrorIs(t, err, tt.expectedErr, "expected %v, got %v", tt.expectedErr, err)
75+
} else {
76+
require.NoError(t, err)
77+
require.Equal(t, tt.expectedOut, out)
78+
}
79+
})
80+
}
81+
}

pkg/cmd/roachtest/test_impl.go

Lines changed: 40 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -141,9 +141,13 @@ type testImpl struct {
141141
// in case github issue posting is disabled.
142142
extraParams map[string]string
143143

144-
// githubMessage contains additional message information that will be
145-
// passed to github.MaybePost
146-
githubMessage string
144+
// githubFatalLogs contains node fatal logs that will be passed to
145+
// github.MaybePost
146+
githubFatalLogs string
147+
148+
// githubIpToNodeMapping contains the ip to node map that will be passed to
149+
// github.MaybePost
150+
githubIpToNodeMapping string
147151
}
148152
// Map from version to path to the cockroach binary to be used when
149153
// mixed-version test wants a binary for that binary. If a particular version
@@ -562,16 +566,45 @@ func (t *testImpl) failureMsg() string {
562566
return b.String()
563567
}
564568

565-
func (t *testImpl) getGithubMessage() string {
569+
func (t *testImpl) getGithubFatalLogs() string {
566570
t.mu.RLock()
567571
defer t.mu.RUnlock()
568-
return t.mu.githubMessage
572+
return t.mu.githubFatalLogs
569573
}
570574

571-
func (t *testImpl) appendGithubMessage(msg string) {
575+
func (t *testImpl) appendGithubFatalLogs(msg string) {
572576
t.mu.Lock()
573577
defer t.mu.Unlock()
574-
t.mu.githubMessage += msg
578+
t.mu.githubFatalLogs += msg
579+
}
580+
581+
func (t *testImpl) getGithubIpToNodeMapping() string {
582+
t.mu.RLock()
583+
defer t.mu.RUnlock()
584+
return t.mu.githubIpToNodeMapping
585+
}
586+
587+
func (t *testImpl) appendGithubIpToNodeMapping(msg string) {
588+
t.mu.Lock()
589+
defer t.mu.Unlock()
590+
t.mu.githubIpToNodeMapping += msg
591+
}
592+
593+
// getGithubMessage abstracts details on how the Github message is constructed
594+
// TODO(wchoe): Currently, all information we want to include in the Github
595+
// message must be passed in as a single string. This is not ideal, especially
596+
// as we add more information or want to format based on the failure or what
597+
// information is available. #157021 aims to address this.
598+
func (t *testImpl) getGithubMessage(failureMsg string) string {
599+
githubMsg := failureMsg
600+
if githubFatalLogs := t.getGithubFatalLogs(); githubFatalLogs != "" {
601+
githubMsg = fmt.Sprintf("%s\n%s", githubMsg, githubFatalLogs)
602+
}
603+
if githubIpToNodeMapping := t.getGithubIpToNodeMapping(); githubIpToNodeMapping != "" {
604+
githubMsg = fmt.Sprintf("%s\n%s", githubMsg, githubIpToNodeMapping)
605+
}
606+
t.L().Printf("github message: %s", githubMsg)
607+
return githubMsg
575608
}
576609

577610
// failuresMatchingError checks whether the first error in trees of

0 commit comments

Comments
 (0)