Skip to content

Commit 85525d4

Browse files
authored
Merge pull request #71 from ethpandaops/feat/xatu-cbt-parquet-exporter-pt2
feat: add generate-transformation-test command for xatu-cbt
2 parents 0891d11 + fb7b5c8 commit 85525d4

File tree

11 files changed

+2159
-23
lines changed

11 files changed

+2159
-23
lines changed

pkg/commands/lab_xatu_cbt.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,18 @@ func NewLabXatuCBTCommand(log logrus.FieldLogger, configPath string) *cobra.Comm
1414
for tests.
1515
1616
Common workflows:
17-
1. Generate seed data for tests:
17+
1. Generate seed data for a single external model:
1818
xcli lab xatu-cbt generate-seed-data
1919
20+
2. Generate test YAML for transformation models (auto-resolves dependencies):
21+
xcli lab xatu-cbt generate-transformation-test
22+
2023
Use 'xcli lab xatu-cbt [command] --help' for more information about a command.`,
2124
}
2225

2326
// Add xatu-cbt subcommands
2427
cmd.AddCommand(NewLabXatuCBTGenerateSeedDataCommand(log, configPath))
28+
cmd.AddCommand(NewLabXatuCBTGenerateTransformationTestCommand(log, configPath))
2529

2630
return cmd
2731
}

pkg/commands/lab_xatu_cbt_generate_seed_data.go

Lines changed: 33 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -21,16 +21,17 @@ const (
2121
// NewLabXatuCBTGenerateSeedDataCommand creates the lab xatu-cbt generate-seed-data command.
2222
func NewLabXatuCBTGenerateSeedDataCommand(log logrus.FieldLogger, configPath string) *cobra.Command {
2323
var (
24-
model string
25-
network string
26-
spec string
27-
rangeColumn string
28-
from string
29-
to string
30-
filters []string
31-
limit int
32-
output string
33-
upload bool
24+
model string
25+
network string
26+
spec string
27+
rangeColumn string
28+
from string
29+
to string
30+
filters []string
31+
limit int
32+
output string
33+
upload bool
34+
noSanitizeIPs bool
3435
)
3536

3637
cmd := &cobra.Command{
@@ -66,7 +67,7 @@ S3 Upload Configuration (defaults to Cloudflare R2):
6667
S3_BUCKET Override bucket (default: ethpandaops-platform-production-public)`,
6768
RunE: func(cmd *cobra.Command, args []string) error {
6869
return runGenerateSeedData(cmd.Context(), log, configPath,
69-
model, network, spec, rangeColumn, from, to, filters, limit, output, upload)
70+
model, network, spec, rangeColumn, from, to, filters, limit, output, upload, !noSanitizeIPs)
7071
},
7172
}
7273

@@ -80,6 +81,7 @@ S3 Upload Configuration (defaults to Cloudflare R2):
8081
cmd.Flags().IntVar(&limit, "limit", defaultRowLimit, "Max rows (0 = unlimited)")
8182
cmd.Flags().StringVarP(&output, "output", "o", "", "Output file path (default: ./{model}.parquet)")
8283
cmd.Flags().BoolVar(&upload, "upload", false, "Upload to S3 after generation")
84+
cmd.Flags().BoolVar(&noSanitizeIPs, "no-sanitize-ips", false, "Disable IP address sanitization (IPs are sanitized by default)")
8385

8486
return cmd
8587
}
@@ -94,6 +96,7 @@ func runGenerateSeedData(
9496
limit int,
9597
output string,
9698
upload bool,
99+
sanitizeIPs bool,
97100
) error {
98101
// Load configuration
99102
labCfg, _, err := config.LoadLabConfig(configPath)
@@ -229,6 +232,18 @@ func runGenerateSeedData(
229232
output = fmt.Sprintf("./%s.parquet", model)
230233
}
231234

235+
// Generate salt for IP sanitization if enabled
236+
var salt string
237+
238+
if sanitizeIPs {
239+
var saltErr error
240+
241+
salt, saltErr = seeddata.GenerateSalt()
242+
if saltErr != nil {
243+
return fmt.Errorf("failed to generate salt for IP sanitization: %w", saltErr)
244+
}
245+
}
246+
232247
// Generate seed data
233248
ui.Header("Generating seed data")
234249

@@ -244,6 +259,8 @@ func runGenerateSeedData(
244259
Filters: filters,
245260
Limit: limit,
246261
OutputPath: output,
262+
SanitizeIPs: sanitizeIPs,
263+
Salt: salt,
247264
})
248265
if err != nil {
249266
spinner.Fail("Failed to generate seed data")
@@ -253,6 +270,11 @@ func runGenerateSeedData(
253270

254271
spinner.Success(fmt.Sprintf("Written to: %s (%s)", result.OutputPath, formatFileSize(result.FileSize)))
255272

273+
// Display sanitized columns if any
274+
if len(result.SanitizedColumns) > 0 {
275+
ui.Info(fmt.Sprintf("Sanitized IP columns: %v", result.SanitizedColumns))
276+
}
277+
256278
// Upload to S3 if requested
257279
var publicURL string
258280

0 commit comments

Comments
 (0)