Skip to content

Commit 301342a

Browse files
craig[bot]arjunmahishi
andcommitted
Merge #151385
151385: pkg/cli: add gzip support to tsdump upload command r=arjunmahishi a=arjunmahishi This commit enhances the tsdump upload command to support gzipped input files in addition to the existing zip and raw file support. Previously, only zip archives and uncompressed files could be processed. Now users can upload gzipped tsdump files directly. Changes: * Add gzip file signature detection in `getFileReader()` * Add tests cases for gzip and zip in `TestTSDumpUploadE2E` The implementation detects file types using magic number signatures and handles each format appropriately: raw files are read directly, zip archives extract the first file (with warnings for multiple files), and gzip files are decompressed transparently. Fixes: CC-33309 Release note: None Co-authored-by: Arjun Mahishi <[email protected]>
2 parents 7c9d334 + b01aa56 commit 301342a

File tree

3 files changed

+134
-48
lines changed

3 files changed

+134
-48
lines changed

pkg/cli/testdata/tsdump_upload_e2e

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,31 @@ cr.store.rocksdb.block.cache.usage,2021-01-01T00:00:00Z,2,75.2
1111
[{"ddsource":"tsdump_upload","ddtags":"cluster_type:SELF_HOSTED,cluster_label:\"test-cluster\",cluster_id:test-cluster-id,zendesk_ticket:zd-test,org_name:test-org,user_name:test-user,upload_id:\"test-cluster\"-20241114000000,upload_timestamp:2024-11-14 00:00:00,upload_year:2024,upload_month:11,upload_day:14,series_uploaded:4","dry_run":"false","duration":"0","estimated_cost":"0.000186986301369863","hostname":"hostname","message":"tsdump upload completed: uploaded 4 series overall","series_uploaded":"4","service":"tsdump_upload","success":"true"}]
1212
----
1313
----
14+
15+
16+
upload-datadog compression=gzip
17+
cr.node.admission.admitted.elastic-cpu,2025-05-26T08:32:00Z,1,1
18+
cr.node.sql.query.count,2021-01-01T00:00:00Z,1,100.5
19+
cr.node.sql.query.count,2021-01-01T00:00:10Z,1,102.3
20+
cr.store.rocksdb.block.cache.usage,2021-01-01T00:00:00Z,2,75.2
21+
----
22+
----
23+
{"series":[{"interval":10,"metric":"cockroachdb.admission.admitted.elastic-cpu","points":[{"timestamp":1748248320,"value":1}],"tags":["node_id:1","cluster_type:SELF_HOSTED","cluster_label:\"test-cluster\"","cluster_id:test-cluster-id","zendesk_ticket:zd-test","org_name:test-org","user_name:test-user","upload_id:\"test-cluster\"-20241114000000","upload_timestamp:2024-11-14 00:00:00","upload_year:2024","upload_month:11","upload_day:14"],"type":1},{"interval":10,"metric":"cockroachdb.sql.query.count","points":[{"timestamp":1609459200,"value":100.5}],"tags":["node_id:1","cluster_type:SELF_HOSTED","cluster_label:\"test-cluster\"","cluster_id:test-cluster-id","zendesk_ticket:zd-test","org_name:test-org","user_name:test-user","upload_id:\"test-cluster\"-20241114000000","upload_timestamp:2024-11-14 00:00:00","upload_year:2024","upload_month:11","upload_day:14"],"type":1},{"interval":10,"metric":"cockroachdb.sql.query.count","points":[{"timestamp":1609459210,"value":102.3}],"tags":["node_id:1","cluster_type:SELF_HOSTED","cluster_label:\"test-cluster\"","cluster_id:test-cluster-id","zendesk_ticket:zd-test","org_name:test-org","user_name:test-user","upload_id:\"test-cluster\"-20241114000000","upload_timestamp:2024-11-14 00:00:00","upload_year:2024","upload_month:11","upload_day:14"],"type":1},{"interval":10,"metric":"cockroachdb.rocksdb.block.cache.usage","points":[{"timestamp":1609459200,"value":75.2}],"tags":["store:2","cluster_type:SELF_HOSTED","cluster_label:\"test-cluster\"","cluster_id:test-cluster-id","zendesk_ticket:zd-test","org_name:test-org","user_name:test-user","upload_id:\"test-cluster\"-20241114000000","upload_timestamp:2024-11-14 00:00:00","upload_year:2024","upload_month:11","upload_day:14"],"type":3}]}
24+
25+
[{"ddsource":"tsdump_upload","ddtags":"cluster_type:SELF_HOSTED,cluster_label:\"test-cluster\",cluster_id:test-cluster-id,zendesk_ticket:zd-test,org_name:test-org,user_name:test-user,upload_id:\"test-cluster\"-20241114000000,upload_timestamp:2024-11-14 00:00:00,upload_year:2024,upload_month:11,upload_day:14,series_uploaded:4","dry_run":"false","duration":"0","estimated_cost":"0.000186986301369863","hostname":"hostname","message":"tsdump upload completed: uploaded 4 series overall","series_uploaded":"4","service":"tsdump_upload","success":"true"}]
26+
----
27+
----
28+
29+
30+
upload-datadog compression=zip
31+
cr.node.admission.admitted.elastic-cpu,2025-05-26T08:32:00Z,1,1
32+
cr.node.sql.query.count,2021-01-01T00:00:00Z,1,100.5
33+
cr.node.sql.query.count,2021-01-01T00:00:10Z,1,102.3
34+
cr.store.rocksdb.block.cache.usage,2021-01-01T00:00:00Z,2,75.2
35+
----
36+
----
37+
{"series":[{"interval":10,"metric":"cockroachdb.admission.admitted.elastic-cpu","points":[{"timestamp":1748248320,"value":1}],"tags":["node_id:1","cluster_type:SELF_HOSTED","cluster_label:\"test-cluster\"","cluster_id:test-cluster-id","zendesk_ticket:zd-test","org_name:test-org","user_name:test-user","upload_id:\"test-cluster\"-20241114000000","upload_timestamp:2024-11-14 00:00:00","upload_year:2024","upload_month:11","upload_day:14"],"type":1},{"interval":10,"metric":"cockroachdb.sql.query.count","points":[{"timestamp":1609459200,"value":100.5}],"tags":["node_id:1","cluster_type:SELF_HOSTED","cluster_label:\"test-cluster\"","cluster_id:test-cluster-id","zendesk_ticket:zd-test","org_name:test-org","user_name:test-user","upload_id:\"test-cluster\"-20241114000000","upload_timestamp:2024-11-14 00:00:00","upload_year:2024","upload_month:11","upload_day:14"],"type":1},{"interval":10,"metric":"cockroachdb.sql.query.count","points":[{"timestamp":1609459210,"value":102.3}],"tags":["node_id:1","cluster_type:SELF_HOSTED","cluster_label:\"test-cluster\"","cluster_id:test-cluster-id","zendesk_ticket:zd-test","org_name:test-org","user_name:test-user","upload_id:\"test-cluster\"-20241114000000","upload_timestamp:2024-11-14 00:00:00","upload_year:2024","upload_month:11","upload_day:14"],"type":1},{"interval":10,"metric":"cockroachdb.rocksdb.block.cache.usage","points":[{"timestamp":1609459200,"value":75.2}],"tags":["store:2","cluster_type:SELF_HOSTED","cluster_label:\"test-cluster\"","cluster_id:test-cluster-id","zendesk_ticket:zd-test","org_name:test-org","user_name:test-user","upload_id:\"test-cluster\"-20241114000000","upload_timestamp:2024-11-14 00:00:00","upload_year:2024","upload_month:11","upload_day:14"],"type":3}]}
38+
39+
[{"ddsource":"tsdump_upload","ddtags":"cluster_type:SELF_HOSTED,cluster_label:\"test-cluster\",cluster_id:test-cluster-id,zendesk_ticket:zd-test,org_name:test-org,user_name:test-user,upload_id:\"test-cluster\"-20241114000000,upload_timestamp:2024-11-14 00:00:00,upload_year:2024,upload_month:11,upload_day:14,series_uploaded:4","dry_run":"false","duration":"0","estimated_cost":"0.000186986301369863","hostname":"hostname","message":"tsdump upload completed: uploaded 4 series overall","series_uploaded":"4","service":"tsdump_upload","success":"true"}]
40+
----
41+
----

pkg/cli/tsdump_upload.go

Lines changed: 27 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ package cli
88
import (
99
"archive/zip"
1010
"bytes"
11+
"compress/gzip"
1112
"context"
1213
"encoding/gob"
1314
"encoding/json"
@@ -58,6 +59,7 @@ var (
5859
datadogDashboardURLFormat = "https://us5.datadoghq.com/dashboard/zx7-9yt-dz9?" +
5960
"tpl_var_cluster=%s&tpl_var_upload_id=%s&tpl_var_upload_day=%d&tpl_var_upload_month=%d&tpl_var_upload_year=%d&from_ts=%d&to_ts=%d"
6061
zipFileSignature = []byte{0x50, 0x4B, 0x03, 0x04}
62+
gzipFileSignature = []byte{0x1f, 0x8b}
6163
logMessageFormat = "tsdump upload to datadog is partially failed for metric: %s"
6264
partialFailureMessageFormat = "The Tsdump upload to Datadog succeeded but %d metrics partially failed to upload." +
6365
" These failures can be due to transient network errors.\nMetrics:\n%s\n" +
@@ -863,7 +865,7 @@ func getFileReader(fileName string) (io.Reader, error) {
863865
return nil, err
864866
}
865867

866-
// Check if the file is a zip file by reading its magic number
868+
// Read magic number to detect file type
867869
buf := make([]byte, 4)
868870
if _, err := file.Read(buf); err != nil {
869871
return nil, err
@@ -874,27 +876,37 @@ func getFileReader(fileName string) (io.Reader, error) {
874876
return nil, err
875877
}
876878

877-
// Check for zip file signature
878-
if bytes.HasPrefix(buf, zipFileSignature) {
879+
switch {
880+
case bytes.HasPrefix(buf, zipFileSignature):
879881
zipReader, err := zip.NewReader(file, fileSize(file))
880882
if err != nil {
881883
return nil, err
882884
}
883885

884-
if len(zipReader.File) > 0 {
885-
if len(zipReader.File) > 1 {
886-
fmt.Printf("tsdump datadog upload: warning: more than one file in zip archive, using the first file %s\n", zipReader.File[0].Name)
887-
}
888-
firstFile, err := zipReader.File[0].Open()
889-
if err != nil {
890-
return nil, err
891-
}
892-
return firstFile, nil
886+
if len(zipReader.File) == 0 {
887+
return nil, fmt.Errorf("zip archive is empty")
893888
}
894-
return nil, fmt.Errorf("zip archive is empty")
895-
}
896889

897-
return file, nil
890+
if len(zipReader.File) > 1 {
891+
fmt.Printf("tsdump datadog upload: warning: more than one file in zip archive, using the first file %s\n", zipReader.File[0].Name)
892+
}
893+
894+
firstFile, err := zipReader.File[0].Open()
895+
if err != nil {
896+
return nil, err
897+
}
898+
return firstFile, nil
899+
900+
case bytes.HasPrefix(buf, gzipFileSignature):
901+
gzipReader, err := gzip.NewReader(file)
902+
if err != nil {
903+
return nil, err
904+
}
905+
return gzipReader, nil
906+
907+
default:
908+
return file, nil
909+
}
898910
}
899911

900912
// fileSize returns the size of the file.

pkg/cli/tsdump_upload_test.go

Lines changed: 79 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
package cli
77

88
import (
9+
"archive/zip"
910
"compress/gzip"
1011
"encoding/csv"
1112
"encoding/gob"
@@ -74,8 +75,11 @@ func TestTSDumpUploadE2E(t *testing.T) {
7475
debugTimeSeriesDumpOpts.zendeskTicket = "zd-test"
7576
debugTimeSeriesDumpOpts.organizationName = "test-org"
7677
debugTimeSeriesDumpOpts.userName = "test-user"
77-
dumpFilePath := generateMockTSDumpFromCSV(t, d.Input)
7878

79+
var compression string
80+
d.MaybeScanArgs(t, "compression", &compression)
81+
82+
dumpFilePath := generateMockTSDumpFromCSV(t, d.Input, withCompression(compression))
7983
var clusterLabel, apiKey string
8084
if d.HasArg("cluster-label") {
8185
d.ScanArgs(t, "cluster-label", &clusterLabel)
@@ -276,7 +280,7 @@ func TestTSDumpUploadWithEmbeddedMetadataDataDriven(t *testing.T) {
276280
case "upload-datadog-embedded-only":
277281
// Embedded metadata only (no YAML file)
278282
// This tests that store metrics get proper node_id tags based on embedded store-to-node mapping
279-
dumpFilePath = generateMockTSDumpWithEmbeddedMetadata(t, d.Input)
283+
dumpFilePath = generateMockTSDumpFromCSV(t, d.Input, withEmbeddedMetadata())
280284

281285
case "upload-datadog-yaml-only":
282286
// YAML file only (no embedded metadata)
@@ -292,7 +296,7 @@ func TestTSDumpUploadWithEmbeddedMetadataDataDriven(t *testing.T) {
292296
case "upload-datadog-embedded-priority":
293297
// Both embedded metadata and YAML file (embedded takes priority)
294298
// This tests that embedded metadata is prioritized over external YAML when both are present, proving precedence
295-
dumpFilePath = generateMockTSDumpWithEmbeddedMetadata(t, d.Input)
299+
dumpFilePath = generateMockTSDumpFromCSV(t, d.Input, withEmbeddedMetadata())
296300

297301
yamlContent := `1: "99"
298302
2: "99"
@@ -326,53 +330,95 @@ func TestTSDumpUploadWithEmbeddedMetadataDataDriven(t *testing.T) {
326330
})
327331
}
328332

329-
// generateMockTSDumpWithEmbeddedMetadata creates a mock tsdump GOB file that includes
330-
// embedded store-to-node mapping metadata along with time series data from CSV input.
331-
func generateMockTSDumpWithEmbeddedMetadata(t *testing.T, csvInput string) string {
332-
t.Helper()
333+
// mockTSDumpOption represents a functional option for configuring tsdump generation.
334+
type mockTSDumpOption func(*mockTSDumpConfig)
333335

334-
tmpFile, err := os.CreateTemp("", "mock_tsdump_with_metadata_*.gob")
335-
require.NoError(t, err)
336-
defer tmpFile.Close()
336+
type mockTSDumpConfig struct {
337+
compression string
338+
metadata *tsdumpmeta.Metadata
339+
}
337340

338-
metadata := tsdumpmeta.Metadata{
339-
Version: "v23.1.0",
340-
StoreToNodeMap: map[string]string{
341-
"1": "1",
342-
"2": "1",
343-
"3": "2",
344-
},
345-
CreatedAt: timeutil.Unix(1609459200, 0),
341+
// withCompression sets the compression format for the tsdump file.
342+
func withCompression(compression string) mockTSDumpOption {
343+
return func(c *mockTSDumpConfig) {
344+
c.compression = compression
346345
}
347-
err = tsdumpmeta.Write(tmpFile, metadata)
348-
require.NoError(t, err)
349-
350-
encoder := gob.NewEncoder(tmpFile)
351-
writeTimeSeriesDataFromCSV(t, csvInput, encoder)
346+
}
352347

353-
t.Cleanup(func() {
354-
require.NoError(t, os.Remove(tmpFile.Name()), "failed to remove temporary file")
355-
})
356-
return tmpFile.Name()
348+
// withEmbeddedMetadata adds embedded metadata to the tsdump file.
349+
func withEmbeddedMetadata() mockTSDumpOption {
350+
return func(c *mockTSDumpConfig) {
351+
c.metadata = &tsdumpmeta.Metadata{
352+
Version: "v23.1.0",
353+
StoreToNodeMap: map[string]string{
354+
"1": "1",
355+
"2": "1",
356+
"3": "2",
357+
},
358+
CreatedAt: timeutil.Unix(1609459200, 0),
359+
}
360+
}
357361
}
358362

359363
// generateMockTSDumpFromCSV creates a mock tsdump file from CSV input string.
360364
// CSV format: metric_name,timestamp,source,value
361365
// Example: cr.node.admission.admitted.elastic-cpu,2025-05-26T08:32:00Z,1,1
362366
// NOTE: this is the same format generated by the `cockroach tsdump` command
363367
// when --format=csv is used.
364-
func generateMockTSDumpFromCSV(t *testing.T, csvInput string) string {
368+
// Options can be used to configure compression and embedded metadata.
369+
func generateMockTSDumpFromCSV(t *testing.T, csvInput string, options ...mockTSDumpOption) string {
365370
t.Helper()
366371

367-
// Create temporary file
368-
tmpFile, err := os.CreateTemp("", "mock_tsdump_*.gob")
372+
// Apply options to config
373+
config := &mockTSDumpConfig{}
374+
for _, option := range options {
375+
option(config)
376+
}
377+
378+
// Parse CSV data from input string
379+
reader := csv.NewReader(strings.NewReader(csvInput))
380+
csvData, err := reader.ReadAll()
381+
require.NoError(t, err)
382+
require.Greater(t, len(csvData), 0, "CSV input must have at least one data row")
383+
384+
// Create file and encoder based on compression in single switch
385+
var filePattern string
386+
var encoder *gob.Encoder
387+
tmpFile, err := os.CreateTemp("", filePattern)
369388
require.NoError(t, err)
370389
defer tmpFile.Close()
371390

372-
// Create gob encoder
373-
encoder := gob.NewEncoder(tmpFile)
374-
writeTimeSeriesDataFromCSV(t, csvInput, encoder)
391+
if config.metadata != nil {
392+
err = tsdumpmeta.Write(tmpFile, *config.metadata)
393+
require.NoError(t, err)
394+
}
395+
396+
switch config.compression {
397+
case "gzip":
398+
filePattern = "mock_tsdump_*.gob.gz"
399+
gzipWriter := gzip.NewWriter(tmpFile)
400+
defer func() {
401+
require.NoError(t, gzipWriter.Close(), "failed to close gzip writer")
402+
}()
403+
encoder = gob.NewEncoder(gzipWriter)
404+
405+
case "zip":
406+
filePattern = "mock_tsdump_*.gob.zip"
407+
zipWriter := zip.NewWriter(tmpFile)
408+
defer func() {
409+
require.NoError(t, zipWriter.Close(), "failed to close zip writer")
410+
}()
411+
412+
writer, err := zipWriter.Create("tsdump.gob")
413+
require.NoError(t, err)
414+
encoder = gob.NewEncoder(writer)
415+
416+
default:
417+
filePattern = "mock_tsdump_*.gob"
418+
encoder = gob.NewEncoder(tmpFile)
419+
}
375420

421+
writeTimeSeriesDataFromCSV(t, csvInput, encoder)
376422
t.Cleanup(func() {
377423
require.NoError(t, os.Remove(tmpFile.Name()), "failed to remove temporary file")
378424
})

0 commit comments

Comments
 (0)