Skip to content

Commit 3d76e54

Browse files
committed
pkg/cli: embed metadata in tsdump files
This commit introduces functionality for writing and reading embedded metadata in tsdump files. Additionally, the upload process now prioritizes embedded metadata over external YAML configurations for store-to-node mappings, enhancing the tsdump upload workflow. Summary of changes - Current flow of tsdump generation now adds metadata information to it by default. Metadata consist of store-to-node mappings, CRDB version and creation timestamp. - The `tsdump upload` and `roachprod` are now capable of decoding this metadata so no more need to provide store-to-node mapping manually. - Changes are backward compatible for both `tsdump upload` and `roachprod upload` so existing flow of providing store-to-node mapping is not hampered. Recompiling of roachprod is required though. - A small bug fix of leaked go routine when raw tsdump is converted to any other format like csv. Part of: CRDB-51457 Epic: CRDB-52093 Release note: None
1 parent 8f5ae52 commit 3d76e54

File tree

12 files changed

+645
-52
lines changed

12 files changed

+645
-52
lines changed

pkg/BUILD.bazel

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -709,6 +709,7 @@ ALL_TESTS = [
709709
"//pkg/testutils/zerofields:zerofields_test",
710710
"//pkg/testutils:testutils_test",
711711
"//pkg/ts/testmodel:testmodel_test",
712+
"//pkg/ts/tsdumpmeta:tsdumpmeta_test",
712713
"//pkg/ts:ts_test",
713714
"//pkg/ui:ui_test",
714715
"//pkg/upgrade/upgradecluster:upgradecluster_test",
@@ -2524,6 +2525,8 @@ GO_TARGETS = [
25242525
"//pkg/ts/catalog:catalog",
25252526
"//pkg/ts/testmodel:testmodel",
25262527
"//pkg/ts/testmodel:testmodel_test",
2528+
"//pkg/ts/tsdumpmeta:tsdumpmeta",
2529+
"//pkg/ts/tsdumpmeta:tsdumpmeta_test",
25272530
"//pkg/ts/tspb:tspb",
25282531
"//pkg/ts/tsutil:tsutil",
25292532
"//pkg/ts:ts",

pkg/cli/BUILD.bazel

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,7 @@ go_library(
193193
"//pkg/testutils/serverutils",
194194
"//pkg/ts",
195195
"//pkg/ts/catalog",
196+
"//pkg/ts/tsdumpmeta",
196197
"//pkg/ts/tspb",
197198
"//pkg/ts/tsutil",
198199
"//pkg/upgrade/upgrades",
@@ -453,6 +454,7 @@ go_test(
453454
"//pkg/testutils/sqlutils",
454455
"//pkg/testutils/testcluster",
455456
"//pkg/ts",
457+
"//pkg/ts/tsdumpmeta",
456458
"//pkg/ts/tspb",
457459
"//pkg/util",
458460
"//pkg/util/envutil",
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
# Test embedded metadata functionality for tsdump upload
2+
3+
# Test case 1: embedded metadata only (no YAML file)
4+
# This tests that store metrics get proper node_id tags based on embedded store-to-node mapping
5+
upload-datadog-embedded-only
6+
cr.store.rocksdb.block.cache.usage,2021-01-01T00:00:00Z,1,75.2
7+
cr.store.rocksdb.num.entries,2021-01-01T00:00:10Z,3,42.0
8+
cr.node.sql.query.count,2021-01-01T00:00:20Z,1,100.5
9+
----
10+
----
11+
{"series":[{"interval":10,"metric":"crdb.tsdump.rocksdb.block.cache.usage","points":[{"timestamp":1609459200,"value":75.2}],"tags":["node_id:1","store:1","cluster_type:SELF_HOSTED","cluster_label:\"test-cluster\"","cluster_id:test-cluster-id","zendesk_ticket:zd-test","org_name:test-org","user_name:test-user","upload_id:\"test-cluster\"-20241114000000","upload_timestamp:2024-11-14 00:00:00","upload_year:2024","upload_month:11","upload_day:14"],"type":3},{"interval":10,"metric":"crdb.tsdump.rocksdb.num.entries","points":[{"timestamp":1609459210,"value":42}],"tags":["node_id:2","store:3","cluster_type:SELF_HOSTED","cluster_label:\"test-cluster\"","cluster_id:test-cluster-id","zendesk_ticket:zd-test","org_name:test-org","user_name:test-user","upload_id:\"test-cluster\"-20241114000000","upload_timestamp:2024-11-14 00:00:00","upload_year:2024","upload_month:11","upload_day:14"],"type":0},{"interval":10,"metric":"crdb.tsdump.sql.query.count","points":[{"timestamp":1609459220,"value":100.5}],"tags":["node_id:1","cluster_type:SELF_HOSTED","cluster_label:\"test-cluster\"","cluster_id:test-cluster-id","zendesk_ticket:zd-test","org_name:test-org","user_name:test-user","upload_id:\"test-cluster\"-20241114000000","upload_timestamp:2024-11-14 00:00:00","upload_year:2024","upload_month:11","upload_day:14"],"type":1}]}
12+
13+
[{"ddsource":"tsdump_upload","ddtags":"cluster_type:SELF_HOSTED,cluster_label:\"test-cluster\",cluster_id:test-cluster-id,zendesk_ticket:zd-test,org_name:test-org,user_name:test-user,upload_id:\"test-cluster\"-20241114000000,upload_timestamp:2024-11-14 00:00:00,upload_year:2024,upload_month:11,upload_day:14,series_uploaded:3","dry_run":"false","duration":"0","estimated_cost":"0.000186986301369863","hostname":"hostname","message":"tsdump upload completed: uploaded 3 series overall","series_uploaded":"3","service":"tsdump_upload","success":"true"}]
14+
----
15+
----
16+
17+
# Test case 2: YAML file only (no embedded metadata)
18+
# This tests that store metrics get proper node_id tags from external YAML when no embedded metadata is present
19+
upload-datadog-yaml-only
20+
cr.store.rocksdb.block.cache.usage,2021-01-01T00:00:00Z,1,75.2
21+
----
22+
----
23+
{"series":[{"interval":10,"metric":"crdb.tsdump.rocksdb.block.cache.usage","points":[{"timestamp":1609459200,"value":75.2}],"tags":["node_id:1","store:1","cluster_type:SELF_HOSTED","cluster_label:\"test-cluster\"","cluster_id:test-cluster-id","zendesk_ticket:zd-test","org_name:test-org","user_name:test-user","upload_id:\"test-cluster\"-20241114000000","upload_timestamp:2024-11-14 00:00:00","upload_year:2024","upload_month:11","upload_day:14"],"type":3}]}
24+
25+
[{"ddsource":"tsdump_upload","ddtags":"cluster_type:SELF_HOSTED,cluster_label:\"test-cluster\",cluster_id:test-cluster-id,zendesk_ticket:zd-test,org_name:test-org,user_name:test-user,upload_id:\"test-cluster\"-20241114000000,upload_timestamp:2024-11-14 00:00:00,upload_year:2024,upload_month:11,upload_day:14,series_uploaded:1","dry_run":"false","duration":"0","estimated_cost":"6.232876712328767e-05","hostname":"hostname","message":"tsdump upload completed: uploaded 1 series overall","series_uploaded":"1","service":"tsdump_upload","success":"true"}]
26+
----
27+
----
28+
29+
# Test case 3: both embedded metadata and YAML file (embedded takes priority)
30+
# This tests that embedded metadata is prioritized over external YAML when both are present, proving precedence
31+
upload-datadog-embedded-priority
32+
cr.store.rocksdb.block.cache.usage,2021-01-01T00:00:00Z,1,75.2
33+
cr.store.rocksdb.num.entries,2021-01-01T00:00:10Z,3,42.0
34+
----
35+
----
36+
{"series":[{"interval":10,"metric":"crdb.tsdump.rocksdb.block.cache.usage","points":[{"timestamp":1609459200,"value":75.2}],"tags":["node_id:1","store:1","cluster_type:SELF_HOSTED","cluster_label:\"test-cluster\"","cluster_id:test-cluster-id","zendesk_ticket:zd-test","org_name:test-org","user_name:test-user","upload_id:\"test-cluster\"-20241114000000","upload_timestamp:2024-11-14 00:00:00","upload_year:2024","upload_month:11","upload_day:14"],"type":3},{"interval":10,"metric":"crdb.tsdump.rocksdb.num.entries","points":[{"timestamp":1609459210,"value":42}],"tags":["node_id:2","store:3","cluster_type:SELF_HOSTED","cluster_label:\"test-cluster\"","cluster_id:test-cluster-id","zendesk_ticket:zd-test","org_name:test-org","user_name:test-user","upload_id:\"test-cluster\"-20241114000000","upload_timestamp:2024-11-14 00:00:00","upload_year:2024","upload_month:11","upload_day:14"],"type":0}]}
37+
38+
[{"ddsource":"tsdump_upload","ddtags":"cluster_type:SELF_HOSTED,cluster_label:\"test-cluster\",cluster_id:test-cluster-id,zendesk_ticket:zd-test,org_name:test-org,user_name:test-user,upload_id:\"test-cluster\"-20241114000000,upload_timestamp:2024-11-14 00:00:00,upload_year:2024,upload_month:11,upload_day:14,series_uploaded:2","dry_run":"false","duration":"0","estimated_cost":"0.00012465753424657533","hostname":"hostname","message":"tsdump upload completed: uploaded 2 series overall","series_uploaded":"2","service":"tsdump_upload","success":"true"}]
39+
----
40+
----
41+
42+
# Test case 4: no metadata and no YAML file (normal upload)
43+
# This tests normal upload behavior when no store-to-node mapping is available, store metrics get only store tags
44+
upload-datadog-no-mapping
45+
cr.store.rocksdb.block.cache.usage,2021-01-01T00:00:00Z,1,75.2
46+
cr.node.sql.query.count,2021-01-01T00:00:20Z,1,100.5
47+
----
48+
----
49+
{"series":[{"interval":10,"metric":"crdb.tsdump.rocksdb.block.cache.usage","points":[{"timestamp":1609459200,"value":75.2}],"tags":["store:1","cluster_type:SELF_HOSTED","cluster_label:\"test-cluster\"","cluster_id:test-cluster-id","zendesk_ticket:zd-test","org_name:test-org","user_name:test-user","upload_id:\"test-cluster\"-20241114000000","upload_timestamp:2024-11-14 00:00:00","upload_year:2024","upload_month:11","upload_day:14"],"type":3},{"interval":10,"metric":"crdb.tsdump.sql.query.count","points":[{"timestamp":1609459220,"value":100.5}],"tags":["node_id:1","cluster_type:SELF_HOSTED","cluster_label:\"test-cluster\"","cluster_id:test-cluster-id","zendesk_ticket:zd-test","org_name:test-org","user_name:test-user","upload_id:\"test-cluster\"-20241114000000","upload_timestamp:2024-11-14 00:00:00","upload_year:2024","upload_month:11","upload_day:14"],"type":1}]}
50+
51+
[{"ddsource":"tsdump_upload","ddtags":"cluster_type:SELF_HOSTED,cluster_label:\"test-cluster\",cluster_id:test-cluster-id,zendesk_ticket:zd-test,org_name:test-org,user_name:test-user,upload_id:\"test-cluster\"-20241114000000,upload_timestamp:2024-11-14 00:00:00,upload_year:2024,upload_month:11,upload_day:14,series_uploaded:2","dry_run":"false","duration":"0","estimated_cost":"0.00012465753424657533","hostname":"hostname","message":"tsdump upload completed: uploaded 2 series overall","series_uploaded":"2","service":"tsdump_upload","success":"true"}]
52+
----
53+
----

pkg/cli/tsdump.go

Lines changed: 77 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,13 @@ import (
2222
"sync"
2323
"time"
2424

25+
"github.com/cockroachdb/cockroach/pkg/build"
2526
"github.com/cockroachdb/cockroach/pkg/cli/clierrorplus"
2627
"github.com/cockroachdb/cockroach/pkg/cli/clisqlclient"
2728
"github.com/cockroachdb/cockroach/pkg/roachpb"
2829
"github.com/cockroachdb/cockroach/pkg/server/serverpb"
2930
"github.com/cockroachdb/cockroach/pkg/ts"
31+
"github.com/cockroachdb/cockroach/pkg/ts/tsdumpmeta"
3032
"github.com/cockroachdb/cockroach/pkg/ts/tspb"
3133
"github.com/cockroachdb/cockroach/pkg/ts/tsutil"
3234
"github.com/cockroachdb/cockroach/pkg/util/netutil/addr"
@@ -192,13 +194,6 @@ will then convert it to the --format requested in the current invocation.
192194
return errors.Wrapf(err, "connecting to %s", target)
193195
}
194196

195-
// Buffer the writes to os.Stdout since we're going to
196-
// be writing potentially a lot of data to it.
197-
w := bufio.NewWriterSize(os.Stdout, 1024*1024)
198-
if err := tsutil.DumpRawTo(stream, w); err != nil {
199-
return err
200-
}
201-
202197
// get the node details so that we can get the SQL port
203198
statusClient := conn.NewStatusClient()
204199
resp, err := statusClient.Details(ctx, &serverpb.DetailsRequest{NodeId: "local"})
@@ -213,6 +208,32 @@ will then convert it to the --format requested in the current invocation.
213208
return err
214209
}
215210

211+
// Get store-to-node mapping for metadata
212+
storeToNodeMap, err := getStoreToNodeMapping(ctx)
213+
if err != nil {
214+
return err
215+
}
216+
217+
// Create metadata header
218+
metadata := tsdumpmeta.Metadata{
219+
Version: build.BinaryVersion(),
220+
StoreToNodeMap: storeToNodeMap,
221+
CreatedAt: timeutil.Now(),
222+
}
223+
224+
// Buffer the writes to os.Stdout since we're going to
225+
// be writing potentially a lot of data to it.
226+
w := bufio.NewWriterSize(os.Stdout, 1024*1024)
227+
228+
// Write embedded metadata first
229+
if err := tsdumpmeta.Write(w, metadata); err != nil {
230+
return err
231+
}
232+
233+
if err := tsutil.DumpRawTo(stream, w); err != nil {
234+
return err
235+
}
236+
216237
if err = createYAML(ctx); err != nil {
217238
return err
218239
}
@@ -228,12 +249,26 @@ will then convert it to the --format requested in the current invocation.
228249
if err != nil {
229250
return err
230251
}
252+
defer f.Close()
231253
type tup struct {
232254
data *tspb.TimeSeriesData
233255
err error
234256
}
235257

236258
dec := gob.NewDecoder(f)
259+
260+
// Try to read embedded metadata first
261+
embeddedMetadata, metadataErr := tsdumpmeta.Read(dec)
262+
if metadataErr != nil {
263+
// No embedded metadata, restart from beginning
264+
if _, err := f.Seek(0, io.SeekStart); err != nil {
265+
return err
266+
}
267+
dec = gob.NewDecoder(f) // Reset decoder to read from beginning
268+
} else {
269+
fmt.Printf("Found embedded store-to-node mapping with %d entries\n", len(embeddedMetadata.StoreToNodeMap))
270+
}
271+
237272
decodeOne := func() (*tspb.TimeSeriesData, error) {
238273
var v roachpb.KeyValue
239274
err := dec.Decode(&v)
@@ -260,6 +295,10 @@ will then convert it to the --format requested in the current invocation.
260295
for {
261296
data, err := decodeOne()
262297
ch <- tup{data, err}
298+
// Exit the goroutine if we encounter EOF or any error
299+
if err != nil {
300+
break
301+
}
263302
}
264303
}()
265304

@@ -465,38 +504,57 @@ type openMetricsWriter struct {
465504
// createYAML generates and writes tsdump.yaml to default /tmp or to a specified path.
466505
// This file is used for staging the tsdump data into a local database for debugging
467506
func createYAML(ctx context.Context) (resErr error) {
507+
// Write the YAML file for backward compatibility
468508
file, err := os.OpenFile(debugTimeSeriesDumpOpts.yaml, os.O_TRUNC|os.O_CREATE|os.O_WRONLY, 0666)
469509
if err != nil {
470510
return err
471511
}
472512
defer file.Close()
473513

474-
sqlConn, err := makeSQLClient(ctx, "cockroach tsdump", useSystemDb)
514+
mapping, err := getStoreToNodeMapping(ctx)
475515
if err != nil {
476516
return err
477517
}
478-
defer func() { resErr = errors.CombineErrors(resErr, sqlConn.Close()) }()
518+
519+
for storeID, nodeID := range mapping {
520+
_, err := fmt.Fprintf(file, "%s: %s\n", storeID, nodeID)
521+
if err != nil {
522+
return err
523+
}
524+
}
525+
return nil
526+
}
527+
528+
// getStoreToNodeMapping retrieves the store-to-node mapping from the database
529+
func getStoreToNodeMapping(ctx context.Context) (map[string]string, error) {
530+
sqlConn, err := makeSQLClient(ctx, "cockroach tsdump", useSystemDb)
531+
if err != nil {
532+
return nil, err
533+
}
534+
defer func() {
535+
if closeErr := sqlConn.Close(); closeErr != nil {
536+
fmt.Fprintf(os.Stderr, "Warning: failed to close SQL connection: %v\n", closeErr)
537+
}
538+
}()
479539

480540
_, rows, err := sqlExecCtx.RunQuery(
481541
ctx,
482542
sqlConn,
483-
clisqlclient.MakeQuery(`SELECT store_id || ': ' || node_id FROM crdb_internal.kv_store_status`), false)
543+
clisqlclient.MakeQuery(`SELECT store_id, node_id FROM crdb_internal.kv_store_status`), false)
484544

485545
if err != nil {
486-
return err
546+
return nil, err
487547
}
488548

489-
var strStoreNodeID string
549+
mapping := make(map[string]string)
490550
for _, row := range rows {
491-
storeNodeID := row
492-
strStoreNodeID = strings.Join(storeNodeID, " ")
493-
strStoreNodeID += "\n"
494-
_, err := file.WriteString(strStoreNodeID)
495-
if err != nil {
496-
return err
551+
if len(row) >= 2 {
552+
storeID := strings.TrimSpace(row[0])
553+
nodeID := strings.TrimSpace(row[1])
554+
mapping[storeID] = nodeID
497555
}
498556
}
499-
return nil
557+
return mapping, nil
500558
}
501559

502560
func makeOpenMetricsWriter(out io.Writer) *openMetricsWriter {

pkg/cli/tsdump_test.go

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ package cli
88
import (
99
"bytes"
1010
"compress/gzip"
11+
"encoding/gob"
1112
"fmt"
1213
"io"
1314
"math/rand"
@@ -22,6 +23,7 @@ import (
2223

2324
"github.com/DataDog/datadog-api-client-go/v2/api/datadogV2"
2425
"github.com/cockroachdb/cockroach/pkg/testutils"
26+
"github.com/cockroachdb/cockroach/pkg/ts/tsdumpmeta"
2527
"github.com/cockroachdb/cockroach/pkg/ts/tspb"
2628
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
2729
"github.com/cockroachdb/cockroach/pkg/util/log"
@@ -134,6 +136,113 @@ func TestMakeOpenMetricsWriter(t *testing.T) {
134136
require.Equal(t, dataPointsNum+1 /* datapoints + EOF final line */, len(res))
135137
}
136138

139+
// TestTSDumpConversionWithEmbeddedMetadata tests the conversion of tsdump files
140+
// containing embedded metadata to CSV format, verifying metadata extraction.
141+
func TestTSDumpConversionWithEmbeddedMetadata(t *testing.T) {
142+
defer leaktest.AfterTest(t)()
143+
defer log.Scope(t).Close(t)
144+
145+
tmpFile, err := os.CreateTemp("", "tsdump_with_metadata_*.gob")
146+
require.NoError(t, err)
147+
defer func(name string) {
148+
err := os.Remove(name)
149+
if err != nil {
150+
t.Fatalf("failed to remove temporary file %s: %v", name, err)
151+
}
152+
}(tmpFile.Name())
153+
154+
metadata := tsdumpmeta.Metadata{
155+
Version: "v23.1.0",
156+
StoreToNodeMap: map[string]string{
157+
"1": "1",
158+
"2": "2",
159+
},
160+
CreatedAt: timeutil.Unix(1609459200, 0),
161+
}
162+
err = tsdumpmeta.Write(tmpFile, metadata)
163+
require.NoError(t, err)
164+
165+
enc := gob.NewEncoder(tmpFile)
166+
kv, err := createMockTimeSeriesKV("cr.node.sql.query.count", "1", 1609459200000000000, 100.5)
167+
require.NoError(t, err)
168+
err = enc.Encode(kv)
169+
require.NoError(t, err)
170+
171+
tmpFile.Close()
172+
173+
c := NewCLITest(TestCLIParams{})
174+
defer c.Cleanup()
175+
176+
// Convert to CSV format
177+
out, err := c.RunWithCapture(fmt.Sprintf(
178+
"debug tsdump --format=csv %s",
179+
tmpFile.Name(),
180+
))
181+
require.NoError(t, err)
182+
183+
lines := strings.Split(strings.TrimSpace(out), "\n")
184+
185+
require.Contains(t, out, "Found embedded store-to-node mapping with 2 entries")
186+
187+
csvFound := false
188+
for _, line := range lines {
189+
if strings.Contains(line, "cr.node.sql.query.count") &&
190+
strings.Contains(line, "2021-01-01T00:00:00Z") {
191+
csvFound = true
192+
break
193+
}
194+
}
195+
require.True(t, csvFound, "should contain converted CSV data")
196+
}
197+
198+
// TestTSDumpRawGenerationWithEmbeddedMetadata tests that raw format tsdump generation
199+
// automatically includes embedded store-to-node mapping metadata in the output.
200+
func TestTSDumpRawGenerationWithEmbeddedMetadata(t *testing.T) {
201+
defer leaktest.AfterTest(t)()
202+
defer log.Scope(t).Close(t)
203+
204+
c := NewCLITest(TestCLIParams{})
205+
defer c.Cleanup()
206+
207+
out, err := c.RunWithCapture("debug tsdump --format=raw --cluster-name=test-cluster-1 --disable-cluster-name-verification")
208+
require.NoError(t, err)
209+
require.NotEmpty(t, out)
210+
211+
// Remove the command prefix from the output to get the actual binary data
212+
// The output starts with something like "debug tsdump --format=raw..."
213+
// Find the first occurrence of gob magic bytes or skip the command line
214+
actualData := out
215+
if idx := strings.Index(out, "\n"); idx != -1 {
216+
actualData = out[idx+1:] // Skip the command line
217+
}
218+
219+
// Write the cleaned binary data to file
220+
tmpFile, err := os.CreateTemp("", "tsdump_*.gob")
221+
require.NoError(t, err)
222+
defer func(name string) {
223+
err := os.Remove(name)
224+
if err != nil {
225+
t.Fatalf("failed to remove temporary file %s: %v", name, err)
226+
}
227+
}(tmpFile.Name())
228+
229+
_, err = tmpFile.Write([]byte(actualData))
230+
require.NoError(t, err)
231+
tmpFile.Close()
232+
233+
file, err := os.Open(tmpFile.Name())
234+
require.NoError(t, err)
235+
defer file.Close()
236+
237+
dec := gob.NewDecoder(file)
238+
readMetadata, err := tsdumpmeta.Read(dec)
239+
require.NoError(t, err)
240+
241+
// Verify store-to-node mapping is embedded
242+
require.NotNil(t, readMetadata.StoreToNodeMap)
243+
require.Equal(t, "1", readMetadata.StoreToNodeMap["1"])
244+
}
245+
137246
func makeTS(name, source string, dataPointsNum int) *tspb.TimeSeriesData {
138247
dps := make([]tspb.TimeSeriesDatapoint, dataPointsNum)
139248
for i := range dps {

0 commit comments

Comments
 (0)