Skip to content

Commit 52d5277

Browse files
craig[bot]Abhinav1299
andcommitted
Merge #151518
151518: pkg/cli: embed metadata in tsdump files r=Abhinav1299 a=Abhinav1299 This commit introduces functionality for writing and reading embedded metadata in tsdump files. Additionally, the upload process now prioritizes embedded metadata over external YAML configurations for store-to-node mappings, enhancing the tsdump upload workflow. Summary of changes - Current flow of tsdump generation now adds metadata information to it by default. Metadata consist of store-to-node mappings, CRDB version and creation timestamp. - The `tsdump upload` and `roachprod` are now capable of decoding this metadata so no more need to provide store-to-node mapping manually. - Changes are backward compatible for both `tsdump upload` and `roachprod upload` so existing flow of providing store-to-node mapping is not hampered. Recompiling of roachprod is required though. - A small bug fix of leaked go routine when raw tsdump is converted to any other format like csv. Part of: CRDB-51457 Epic: CRDB-52093 Release note: None Co-authored-by: Abhinav1299 <[email protected]>
2 parents 8f5ae52 + 3d76e54 commit 52d5277

File tree

12 files changed

+645
-52
lines changed

12 files changed

+645
-52
lines changed

pkg/BUILD.bazel

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -709,6 +709,7 @@ ALL_TESTS = [
709709
"//pkg/testutils/zerofields:zerofields_test",
710710
"//pkg/testutils:testutils_test",
711711
"//pkg/ts/testmodel:testmodel_test",
712+
"//pkg/ts/tsdumpmeta:tsdumpmeta_test",
712713
"//pkg/ts:ts_test",
713714
"//pkg/ui:ui_test",
714715
"//pkg/upgrade/upgradecluster:upgradecluster_test",
@@ -2524,6 +2525,8 @@ GO_TARGETS = [
25242525
"//pkg/ts/catalog:catalog",
25252526
"//pkg/ts/testmodel:testmodel",
25262527
"//pkg/ts/testmodel:testmodel_test",
2528+
"//pkg/ts/tsdumpmeta:tsdumpmeta",
2529+
"//pkg/ts/tsdumpmeta:tsdumpmeta_test",
25272530
"//pkg/ts/tspb:tspb",
25282531
"//pkg/ts/tsutil:tsutil",
25292532
"//pkg/ts:ts",

pkg/cli/BUILD.bazel

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,7 @@ go_library(
193193
"//pkg/testutils/serverutils",
194194
"//pkg/ts",
195195
"//pkg/ts/catalog",
196+
"//pkg/ts/tsdumpmeta",
196197
"//pkg/ts/tspb",
197198
"//pkg/ts/tsutil",
198199
"//pkg/upgrade/upgrades",
@@ -453,6 +454,7 @@ go_test(
453454
"//pkg/testutils/sqlutils",
454455
"//pkg/testutils/testcluster",
455456
"//pkg/ts",
457+
"//pkg/ts/tsdumpmeta",
456458
"//pkg/ts/tspb",
457459
"//pkg/util",
458460
"//pkg/util/envutil",
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
# Test embedded metadata functionality for tsdump upload
2+
3+
# Test case 1: embedded metadata only (no YAML file)
4+
# This tests that store metrics get proper node_id tags based on embedded store-to-node mapping
5+
upload-datadog-embedded-only
6+
cr.store.rocksdb.block.cache.usage,2021-01-01T00:00:00Z,1,75.2
7+
cr.store.rocksdb.num.entries,2021-01-01T00:00:10Z,3,42.0
8+
cr.node.sql.query.count,2021-01-01T00:00:20Z,1,100.5
9+
----
10+
----
11+
{"series":[{"interval":10,"metric":"crdb.tsdump.rocksdb.block.cache.usage","points":[{"timestamp":1609459200,"value":75.2}],"tags":["node_id:1","store:1","cluster_type:SELF_HOSTED","cluster_label:\"test-cluster\"","cluster_id:test-cluster-id","zendesk_ticket:zd-test","org_name:test-org","user_name:test-user","upload_id:\"test-cluster\"-20241114000000","upload_timestamp:2024-11-14 00:00:00","upload_year:2024","upload_month:11","upload_day:14"],"type":3},{"interval":10,"metric":"crdb.tsdump.rocksdb.num.entries","points":[{"timestamp":1609459210,"value":42}],"tags":["node_id:2","store:3","cluster_type:SELF_HOSTED","cluster_label:\"test-cluster\"","cluster_id:test-cluster-id","zendesk_ticket:zd-test","org_name:test-org","user_name:test-user","upload_id:\"test-cluster\"-20241114000000","upload_timestamp:2024-11-14 00:00:00","upload_year:2024","upload_month:11","upload_day:14"],"type":0},{"interval":10,"metric":"crdb.tsdump.sql.query.count","points":[{"timestamp":1609459220,"value":100.5}],"tags":["node_id:1","cluster_type:SELF_HOSTED","cluster_label:\"test-cluster\"","cluster_id:test-cluster-id","zendesk_ticket:zd-test","org_name:test-org","user_name:test-user","upload_id:\"test-cluster\"-20241114000000","upload_timestamp:2024-11-14 00:00:00","upload_year:2024","upload_month:11","upload_day:14"],"type":1}]}
12+
13+
[{"ddsource":"tsdump_upload","ddtags":"cluster_type:SELF_HOSTED,cluster_label:\"test-cluster\",cluster_id:test-cluster-id,zendesk_ticket:zd-test,org_name:test-org,user_name:test-user,upload_id:\"test-cluster\"-20241114000000,upload_timestamp:2024-11-14 00:00:00,upload_year:2024,upload_month:11,upload_day:14,series_uploaded:3","dry_run":"false","duration":"0","estimated_cost":"0.000186986301369863","hostname":"hostname","message":"tsdump upload completed: uploaded 3 series overall","series_uploaded":"3","service":"tsdump_upload","success":"true"}]
14+
----
15+
----
16+
17+
# Test case 2: YAML file only (no embedded metadata)
18+
# This tests that store metrics get proper node_id tags from external YAML when no embedded metadata is present
19+
upload-datadog-yaml-only
20+
cr.store.rocksdb.block.cache.usage,2021-01-01T00:00:00Z,1,75.2
21+
----
22+
----
23+
{"series":[{"interval":10,"metric":"crdb.tsdump.rocksdb.block.cache.usage","points":[{"timestamp":1609459200,"value":75.2}],"tags":["node_id:1","store:1","cluster_type:SELF_HOSTED","cluster_label:\"test-cluster\"","cluster_id:test-cluster-id","zendesk_ticket:zd-test","org_name:test-org","user_name:test-user","upload_id:\"test-cluster\"-20241114000000","upload_timestamp:2024-11-14 00:00:00","upload_year:2024","upload_month:11","upload_day:14"],"type":3}]}
24+
25+
[{"ddsource":"tsdump_upload","ddtags":"cluster_type:SELF_HOSTED,cluster_label:\"test-cluster\",cluster_id:test-cluster-id,zendesk_ticket:zd-test,org_name:test-org,user_name:test-user,upload_id:\"test-cluster\"-20241114000000,upload_timestamp:2024-11-14 00:00:00,upload_year:2024,upload_month:11,upload_day:14,series_uploaded:1","dry_run":"false","duration":"0","estimated_cost":"6.232876712328767e-05","hostname":"hostname","message":"tsdump upload completed: uploaded 1 series overall","series_uploaded":"1","service":"tsdump_upload","success":"true"}]
26+
----
27+
----
28+
29+
# Test case 3: both embedded metadata and YAML file (embedded takes priority)
30+
# This tests that embedded metadata is prioritized over external YAML when both are present, proving precedence
31+
upload-datadog-embedded-priority
32+
cr.store.rocksdb.block.cache.usage,2021-01-01T00:00:00Z,1,75.2
33+
cr.store.rocksdb.num.entries,2021-01-01T00:00:10Z,3,42.0
34+
----
35+
----
36+
{"series":[{"interval":10,"metric":"crdb.tsdump.rocksdb.block.cache.usage","points":[{"timestamp":1609459200,"value":75.2}],"tags":["node_id:1","store:1","cluster_type:SELF_HOSTED","cluster_label:\"test-cluster\"","cluster_id:test-cluster-id","zendesk_ticket:zd-test","org_name:test-org","user_name:test-user","upload_id:\"test-cluster\"-20241114000000","upload_timestamp:2024-11-14 00:00:00","upload_year:2024","upload_month:11","upload_day:14"],"type":3},{"interval":10,"metric":"crdb.tsdump.rocksdb.num.entries","points":[{"timestamp":1609459210,"value":42}],"tags":["node_id:2","store:3","cluster_type:SELF_HOSTED","cluster_label:\"test-cluster\"","cluster_id:test-cluster-id","zendesk_ticket:zd-test","org_name:test-org","user_name:test-user","upload_id:\"test-cluster\"-20241114000000","upload_timestamp:2024-11-14 00:00:00","upload_year:2024","upload_month:11","upload_day:14"],"type":0}]}
37+
38+
[{"ddsource":"tsdump_upload","ddtags":"cluster_type:SELF_HOSTED,cluster_label:\"test-cluster\",cluster_id:test-cluster-id,zendesk_ticket:zd-test,org_name:test-org,user_name:test-user,upload_id:\"test-cluster\"-20241114000000,upload_timestamp:2024-11-14 00:00:00,upload_year:2024,upload_month:11,upload_day:14,series_uploaded:2","dry_run":"false","duration":"0","estimated_cost":"0.00012465753424657533","hostname":"hostname","message":"tsdump upload completed: uploaded 2 series overall","series_uploaded":"2","service":"tsdump_upload","success":"true"}]
39+
----
40+
----
41+
42+
# Test case 4: no metadata and no YAML file (normal upload)
43+
# This tests normal upload behavior when no store-to-node mapping is available, store metrics get only store tags
44+
upload-datadog-no-mapping
45+
cr.store.rocksdb.block.cache.usage,2021-01-01T00:00:00Z,1,75.2
46+
cr.node.sql.query.count,2021-01-01T00:00:20Z,1,100.5
47+
----
48+
----
49+
{"series":[{"interval":10,"metric":"crdb.tsdump.rocksdb.block.cache.usage","points":[{"timestamp":1609459200,"value":75.2}],"tags":["store:1","cluster_type:SELF_HOSTED","cluster_label:\"test-cluster\"","cluster_id:test-cluster-id","zendesk_ticket:zd-test","org_name:test-org","user_name:test-user","upload_id:\"test-cluster\"-20241114000000","upload_timestamp:2024-11-14 00:00:00","upload_year:2024","upload_month:11","upload_day:14"],"type":3},{"interval":10,"metric":"crdb.tsdump.sql.query.count","points":[{"timestamp":1609459220,"value":100.5}],"tags":["node_id:1","cluster_type:SELF_HOSTED","cluster_label:\"test-cluster\"","cluster_id:test-cluster-id","zendesk_ticket:zd-test","org_name:test-org","user_name:test-user","upload_id:\"test-cluster\"-20241114000000","upload_timestamp:2024-11-14 00:00:00","upload_year:2024","upload_month:11","upload_day:14"],"type":1}]}
50+
51+
[{"ddsource":"tsdump_upload","ddtags":"cluster_type:SELF_HOSTED,cluster_label:\"test-cluster\",cluster_id:test-cluster-id,zendesk_ticket:zd-test,org_name:test-org,user_name:test-user,upload_id:\"test-cluster\"-20241114000000,upload_timestamp:2024-11-14 00:00:00,upload_year:2024,upload_month:11,upload_day:14,series_uploaded:2","dry_run":"false","duration":"0","estimated_cost":"0.00012465753424657533","hostname":"hostname","message":"tsdump upload completed: uploaded 2 series overall","series_uploaded":"2","service":"tsdump_upload","success":"true"}]
52+
----
53+
----

pkg/cli/tsdump.go

Lines changed: 77 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,13 @@ import (
2222
"sync"
2323
"time"
2424

25+
"github.com/cockroachdb/cockroach/pkg/build"
2526
"github.com/cockroachdb/cockroach/pkg/cli/clierrorplus"
2627
"github.com/cockroachdb/cockroach/pkg/cli/clisqlclient"
2728
"github.com/cockroachdb/cockroach/pkg/roachpb"
2829
"github.com/cockroachdb/cockroach/pkg/server/serverpb"
2930
"github.com/cockroachdb/cockroach/pkg/ts"
31+
"github.com/cockroachdb/cockroach/pkg/ts/tsdumpmeta"
3032
"github.com/cockroachdb/cockroach/pkg/ts/tspb"
3133
"github.com/cockroachdb/cockroach/pkg/ts/tsutil"
3234
"github.com/cockroachdb/cockroach/pkg/util/netutil/addr"
@@ -192,13 +194,6 @@ will then convert it to the --format requested in the current invocation.
192194
return errors.Wrapf(err, "connecting to %s", target)
193195
}
194196

195-
// Buffer the writes to os.Stdout since we're going to
196-
// be writing potentially a lot of data to it.
197-
w := bufio.NewWriterSize(os.Stdout, 1024*1024)
198-
if err := tsutil.DumpRawTo(stream, w); err != nil {
199-
return err
200-
}
201-
202197
// get the node details so that we can get the SQL port
203198
statusClient := conn.NewStatusClient()
204199
resp, err := statusClient.Details(ctx, &serverpb.DetailsRequest{NodeId: "local"})
@@ -213,6 +208,32 @@ will then convert it to the --format requested in the current invocation.
213208
return err
214209
}
215210

211+
// Get store-to-node mapping for metadata
212+
storeToNodeMap, err := getStoreToNodeMapping(ctx)
213+
if err != nil {
214+
return err
215+
}
216+
217+
// Create metadata header
218+
metadata := tsdumpmeta.Metadata{
219+
Version: build.BinaryVersion(),
220+
StoreToNodeMap: storeToNodeMap,
221+
CreatedAt: timeutil.Now(),
222+
}
223+
224+
// Buffer the writes to os.Stdout since we're going to
225+
// be writing potentially a lot of data to it.
226+
w := bufio.NewWriterSize(os.Stdout, 1024*1024)
227+
228+
// Write embedded metadata first
229+
if err := tsdumpmeta.Write(w, metadata); err != nil {
230+
return err
231+
}
232+
233+
if err := tsutil.DumpRawTo(stream, w); err != nil {
234+
return err
235+
}
236+
216237
if err = createYAML(ctx); err != nil {
217238
return err
218239
}
@@ -228,12 +249,26 @@ will then convert it to the --format requested in the current invocation.
228249
if err != nil {
229250
return err
230251
}
252+
defer f.Close()
231253
type tup struct {
232254
data *tspb.TimeSeriesData
233255
err error
234256
}
235257

236258
dec := gob.NewDecoder(f)
259+
260+
// Try to read embedded metadata first
261+
embeddedMetadata, metadataErr := tsdumpmeta.Read(dec)
262+
if metadataErr != nil {
263+
// No embedded metadata, restart from beginning
264+
if _, err := f.Seek(0, io.SeekStart); err != nil {
265+
return err
266+
}
267+
dec = gob.NewDecoder(f) // Reset decoder to read from beginning
268+
} else {
269+
fmt.Printf("Found embedded store-to-node mapping with %d entries\n", len(embeddedMetadata.StoreToNodeMap))
270+
}
271+
237272
decodeOne := func() (*tspb.TimeSeriesData, error) {
238273
var v roachpb.KeyValue
239274
err := dec.Decode(&v)
@@ -260,6 +295,10 @@ will then convert it to the --format requested in the current invocation.
260295
for {
261296
data, err := decodeOne()
262297
ch <- tup{data, err}
298+
// Exit the goroutine if we encounter EOF or any error
299+
if err != nil {
300+
break
301+
}
263302
}
264303
}()
265304

@@ -465,38 +504,57 @@ type openMetricsWriter struct {
465504
// createYAML generates and writes tsdump.yaml to default /tmp or to a specified path.
466505
// This file is used for staging the tsdump data into a local database for debugging
467506
func createYAML(ctx context.Context) (resErr error) {
507+
// Write the YAML file for backward compatibility
468508
file, err := os.OpenFile(debugTimeSeriesDumpOpts.yaml, os.O_TRUNC|os.O_CREATE|os.O_WRONLY, 0666)
469509
if err != nil {
470510
return err
471511
}
472512
defer file.Close()
473513

474-
sqlConn, err := makeSQLClient(ctx, "cockroach tsdump", useSystemDb)
514+
mapping, err := getStoreToNodeMapping(ctx)
475515
if err != nil {
476516
return err
477517
}
478-
defer func() { resErr = errors.CombineErrors(resErr, sqlConn.Close()) }()
518+
519+
for storeID, nodeID := range mapping {
520+
_, err := fmt.Fprintf(file, "%s: %s\n", storeID, nodeID)
521+
if err != nil {
522+
return err
523+
}
524+
}
525+
return nil
526+
}
527+
528+
// getStoreToNodeMapping retrieves the store-to-node mapping from the database
529+
func getStoreToNodeMapping(ctx context.Context) (map[string]string, error) {
530+
sqlConn, err := makeSQLClient(ctx, "cockroach tsdump", useSystemDb)
531+
if err != nil {
532+
return nil, err
533+
}
534+
defer func() {
535+
if closeErr := sqlConn.Close(); closeErr != nil {
536+
fmt.Fprintf(os.Stderr, "Warning: failed to close SQL connection: %v\n", closeErr)
537+
}
538+
}()
479539

480540
_, rows, err := sqlExecCtx.RunQuery(
481541
ctx,
482542
sqlConn,
483-
clisqlclient.MakeQuery(`SELECT store_id || ': ' || node_id FROM crdb_internal.kv_store_status`), false)
543+
clisqlclient.MakeQuery(`SELECT store_id, node_id FROM crdb_internal.kv_store_status`), false)
484544

485545
if err != nil {
486-
return err
546+
return nil, err
487547
}
488548

489-
var strStoreNodeID string
549+
mapping := make(map[string]string)
490550
for _, row := range rows {
491-
storeNodeID := row
492-
strStoreNodeID = strings.Join(storeNodeID, " ")
493-
strStoreNodeID += "\n"
494-
_, err := file.WriteString(strStoreNodeID)
495-
if err != nil {
496-
return err
551+
if len(row) >= 2 {
552+
storeID := strings.TrimSpace(row[0])
553+
nodeID := strings.TrimSpace(row[1])
554+
mapping[storeID] = nodeID
497555
}
498556
}
499-
return nil
557+
return mapping, nil
500558
}
501559

502560
func makeOpenMetricsWriter(out io.Writer) *openMetricsWriter {

pkg/cli/tsdump_test.go

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ package cli
88
import (
99
"bytes"
1010
"compress/gzip"
11+
"encoding/gob"
1112
"fmt"
1213
"io"
1314
"math/rand"
@@ -22,6 +23,7 @@ import (
2223

2324
"github.com/DataDog/datadog-api-client-go/v2/api/datadogV2"
2425
"github.com/cockroachdb/cockroach/pkg/testutils"
26+
"github.com/cockroachdb/cockroach/pkg/ts/tsdumpmeta"
2527
"github.com/cockroachdb/cockroach/pkg/ts/tspb"
2628
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
2729
"github.com/cockroachdb/cockroach/pkg/util/log"
@@ -134,6 +136,113 @@ func TestMakeOpenMetricsWriter(t *testing.T) {
134136
require.Equal(t, dataPointsNum+1 /* datapoints + EOF final line */, len(res))
135137
}
136138

139+
// TestTSDumpConversionWithEmbeddedMetadata tests the conversion of tsdump files
140+
// containing embedded metadata to CSV format, verifying metadata extraction.
141+
func TestTSDumpConversionWithEmbeddedMetadata(t *testing.T) {
142+
defer leaktest.AfterTest(t)()
143+
defer log.Scope(t).Close(t)
144+
145+
tmpFile, err := os.CreateTemp("", "tsdump_with_metadata_*.gob")
146+
require.NoError(t, err)
147+
defer func(name string) {
148+
err := os.Remove(name)
149+
if err != nil {
150+
t.Fatalf("failed to remove temporary file %s: %v", name, err)
151+
}
152+
}(tmpFile.Name())
153+
154+
metadata := tsdumpmeta.Metadata{
155+
Version: "v23.1.0",
156+
StoreToNodeMap: map[string]string{
157+
"1": "1",
158+
"2": "2",
159+
},
160+
CreatedAt: timeutil.Unix(1609459200, 0),
161+
}
162+
err = tsdumpmeta.Write(tmpFile, metadata)
163+
require.NoError(t, err)
164+
165+
enc := gob.NewEncoder(tmpFile)
166+
kv, err := createMockTimeSeriesKV("cr.node.sql.query.count", "1", 1609459200000000000, 100.5)
167+
require.NoError(t, err)
168+
err = enc.Encode(kv)
169+
require.NoError(t, err)
170+
171+
tmpFile.Close()
172+
173+
c := NewCLITest(TestCLIParams{})
174+
defer c.Cleanup()
175+
176+
// Convert to CSV format
177+
out, err := c.RunWithCapture(fmt.Sprintf(
178+
"debug tsdump --format=csv %s",
179+
tmpFile.Name(),
180+
))
181+
require.NoError(t, err)
182+
183+
lines := strings.Split(strings.TrimSpace(out), "\n")
184+
185+
require.Contains(t, out, "Found embedded store-to-node mapping with 2 entries")
186+
187+
csvFound := false
188+
for _, line := range lines {
189+
if strings.Contains(line, "cr.node.sql.query.count") &&
190+
strings.Contains(line, "2021-01-01T00:00:00Z") {
191+
csvFound = true
192+
break
193+
}
194+
}
195+
require.True(t, csvFound, "should contain converted CSV data")
196+
}
197+
198+
// TestTSDumpRawGenerationWithEmbeddedMetadata tests that raw format tsdump generation
199+
// automatically includes embedded store-to-node mapping metadata in the output.
200+
func TestTSDumpRawGenerationWithEmbeddedMetadata(t *testing.T) {
201+
defer leaktest.AfterTest(t)()
202+
defer log.Scope(t).Close(t)
203+
204+
c := NewCLITest(TestCLIParams{})
205+
defer c.Cleanup()
206+
207+
out, err := c.RunWithCapture("debug tsdump --format=raw --cluster-name=test-cluster-1 --disable-cluster-name-verification")
208+
require.NoError(t, err)
209+
require.NotEmpty(t, out)
210+
211+
// Remove the command prefix from the output to get the actual binary data
212+
// The output starts with something like "debug tsdump --format=raw..."
213+
// Find the first occurrence of gob magic bytes or skip the command line
214+
actualData := out
215+
if idx := strings.Index(out, "\n"); idx != -1 {
216+
actualData = out[idx+1:] // Skip the command line
217+
}
218+
219+
// Write the cleaned binary data to file
220+
tmpFile, err := os.CreateTemp("", "tsdump_*.gob")
221+
require.NoError(t, err)
222+
defer func(name string) {
223+
err := os.Remove(name)
224+
if err != nil {
225+
t.Fatalf("failed to remove temporary file %s: %v", name, err)
226+
}
227+
}(tmpFile.Name())
228+
229+
_, err = tmpFile.Write([]byte(actualData))
230+
require.NoError(t, err)
231+
tmpFile.Close()
232+
233+
file, err := os.Open(tmpFile.Name())
234+
require.NoError(t, err)
235+
defer file.Close()
236+
237+
dec := gob.NewDecoder(file)
238+
readMetadata, err := tsdumpmeta.Read(dec)
239+
require.NoError(t, err)
240+
241+
// Verify store-to-node mapping is embedded
242+
require.NotNil(t, readMetadata.StoreToNodeMap)
243+
require.Equal(t, "1", readMetadata.StoreToNodeMap["1"])
244+
}
245+
137246
func makeTS(name, source string, dataPointsNum int) *tspb.TimeSeriesData {
138247
dps := make([]tspb.TimeSeriesDatapoint, dataPointsNum)
139248
for i := range dps {

0 commit comments

Comments
 (0)