Skip to content

Commit 02adab7

Browse files
craig[bot]sambhav-jain-16
andcommitted
Merge #143490
143490: workload: improve histogram file handling to avoid corrupted files r=csgourav,srosenberg a=sambhav-jain-16 This commit adds a robust approach to writing histogram files in workload runs to prevent file corruption and partial writes. The previous implementation was vulnerable to producing corrupted histogram files when processes terminated abnormally while the file is geeting closed. Changes include: - Writing histogram data to a temporary file first - Using atomic file rename operation when writing is complete - Adding proper cleanup of temporary files in error cases Fixes: #142513 Release note: None Co-authored-by: Sambhav Jain <[email protected]>
2 parents 37357b1 + e87d2e1 commit 02adab7

File tree

1 file changed

+53
-20
lines changed

1 file changed

+53
-20
lines changed

pkg/workload/cli/run.go

Lines changed: 53 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -432,27 +432,11 @@ func runRun(gen workload.Generator, urls []string, dbName string) error {
432432
publisher = histogram.CreateUdpPublisher(*individualOperationReceiverAddr)
433433
}
434434

435-
metricsExporter, file, err := maybeInitAndCreateExporter(gen)
435+
metricsExporter, file, err := maybeInitAndCreateExporter()
436436
if err != nil {
437437
return errors.Wrap(err, "error creating metrics exporter")
438438
}
439-
defer func() {
440-
if metricsExporter != nil {
441-
if err = metricsExporter.Close(func() error {
442-
if file == nil {
443-
log.Infof(ctx, "no file to close")
444-
return nil
445-
}
446-
447-
if err := file.Close(); err != nil {
448-
return err
449-
}
450-
return nil
451-
}); err != nil {
452-
log.Warningf(ctx, "failed to close metrics exporter: %v", err)
453-
}
454-
}
455-
}()
439+
defer closeExporter(ctx, metricsExporter, file)
456440

457441
reg := histogram.NewRegistryWithPublisherAndExporter(
458442
*histogramsMaxLatency,
@@ -666,13 +650,15 @@ func maybeLogRandomSeed(ctx context.Context, gen workload.Generator) {
666650
}
667651
}
668652

669-
func maybeInitAndCreateExporter(gen workload.Generator) (exporter.Exporter, *os.File, error) {
653+
func maybeInitAndCreateExporter() (exporter.Exporter, *os.File, error) {
670654
if *histograms == "" {
671655
return nil, nil, nil
672656
}
673657

674658
var metricsExporter exporter.Exporter
675659
var file *os.File
660+
var tempFilePath string
661+
var finalPath string
676662

677663
switch *histogramExportFormat {
678664
case "json":
@@ -705,7 +691,13 @@ func maybeInitAndCreateExporter(gen workload.Generator) (exporter.Exporter, *os.
705691
return nil, nil, err
706692
}
707693

708-
file, err = os.Create(*histograms)
694+
// Create a temporary file path
695+
finalPath = *histograms
696+
dir := filepath.Dir(finalPath)
697+
tempFilePath = filepath.Join(dir, fmt.Sprintf(".%s.tmp.%d", filepath.Base(finalPath), timeutil.Now().UnixNano()))
698+
699+
// Create the temporary file instead of the final file
700+
file, err = os.Create(tempFilePath)
709701
if err != nil {
710702
return nil, nil, err
711703
}
@@ -715,3 +707,44 @@ func maybeInitAndCreateExporter(gen workload.Generator) (exporter.Exporter, *os.
715707

716708
return metricsExporter, file, nil
717709
}
710+
711+
func closeExporter(ctx context.Context, metricsExporter exporter.Exporter, file *os.File) {
712+
if metricsExporter != nil {
713+
if err := metricsExporter.Close(func() error {
714+
if file == nil {
715+
log.Infof(ctx, "no file to close")
716+
return nil
717+
}
718+
return renameTempFile(file, *histograms)
719+
}); err != nil {
720+
log.Warningf(ctx, "failed to close metrics exporter: %v", err)
721+
}
722+
}
723+
}
724+
725+
func renameTempFile(file *os.File, finalPath string) error {
726+
tempPath := file.Name()
727+
defer func() {
728+
_ = os.Remove(tempPath) // Clean up the temp folder if still exists
729+
}()
730+
731+
// Sync file to ensure all data is written to disk
732+
if err := file.Sync(); err != nil {
733+
// If we are not able to sync the file, we should not attempt to rename it.
734+
// This is to avoid the case where an incomplete file is renamed.
735+
return err
736+
}
737+
738+
// Close the file
739+
if err := file.Close(); err != nil {
740+
return err
741+
}
742+
743+
// Rename from temp to final path
744+
// This is atomic on all unix-like systems
745+
if err := os.Rename(tempPath, finalPath); err != nil {
746+
return errors.Wrap(err, "failed to rename temporary file")
747+
}
748+
749+
return nil
750+
}

0 commit comments

Comments
 (0)