Skip to content

Commit 4c5809a

Browse files
committed
db: improve metrics and logs to measure all filesystem ops
Enhance the log writer to instrument all filesystem operations done for writing the log file in the latency histogram (create, write, sync, etc.).
1 parent 3d22cee commit 4c5809a

17 files changed

+1855
-71
lines changed

commit_test.go

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ import (
2020
"github.com/cockroachdb/pebble/internal/invariants"
2121
"github.com/cockroachdb/pebble/record"
2222
"github.com/cockroachdb/pebble/vfs"
23-
"github.com/prometheus/client_golang/prometheus"
2423
"github.com/stretchr/testify/require"
2524
)
2625

@@ -253,7 +252,6 @@ func TestCommitPipelineWALClose(t *testing.T) {
253252
}
254253
p := newCommitPipeline(testEnv)
255254
wal = record.NewLogWriter(sf, 0 /* logNum */, record.LogWriterConfig{
256-
WALFsyncLatency: prometheus.NewHistogram(prometheus.HistogramOpts{}),
257255
QueueSemChan: p.logSyncQSem,
258256
WriteWALSyncOffsets: func() bool { return false },
259257
})
@@ -395,7 +393,6 @@ func BenchmarkCommitPipeline(b *testing.B) {
395393
p := newCommitPipeline(nullCommitEnv)
396394
wal = record.NewLogWriter(io.Discard, 0, /* logNum */
397395
record.LogWriterConfig{
398-
WALFsyncLatency: prometheus.NewHistogram(prometheus.HistogramOpts{}),
399396
QueueSemChan: p.logSyncQSem,
400397
WriteWALSyncOffsets: func() bool { return false },
401398
})

db.go

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -31,15 +31,13 @@ import (
3131
"github.com/cockroachdb/pebble/objstorage"
3232
"github.com/cockroachdb/pebble/objstorage/remote"
3333
"github.com/cockroachdb/pebble/rangekey"
34-
"github.com/cockroachdb/pebble/record"
3534
"github.com/cockroachdb/pebble/sstable"
3635
"github.com/cockroachdb/pebble/sstable/blob"
3736
"github.com/cockroachdb/pebble/sstable/block"
3837
"github.com/cockroachdb/pebble/vfs"
3938
"github.com/cockroachdb/pebble/vfs/atomicfs"
4039
"github.com/cockroachdb/pebble/wal"
4140
"github.com/cockroachdb/tokenbucket"
42-
"github.com/prometheus/client_golang/prometheus"
4341
)
4442

4543
const (
@@ -376,13 +374,7 @@ type DB struct {
376374
// commitPipeline.mu and DB.mu to be held when rotating the WAL/memtable
377375
// (i.e. makeRoomForWrite). Can be nil.
378376
writer wal.Writer
379-
metrics struct {
380-
// fsyncLatency has its own internal synchronization, and is not
381-
// protected by mu.
382-
fsyncLatency prometheus.Histogram
383-
// Updated whenever a wal.Writer is closed.
384-
record.LogWriterMetrics
385-
}
377+
metrics WALMetrics
386378
}
387379

388380
mem struct {
@@ -1962,8 +1954,7 @@ func (d *DB) Metrics() *Metrics {
19621954
metrics.BlobFiles.ReferencedValueSize = blobStats.ReferencedValueSize
19631955
metrics.BlobFiles.ReferencedBackingValueSize = blobStats.ReferencedBackingValueSize
19641956

1965-
metrics.LogWriter.FsyncLatency = d.mu.log.metrics.fsyncLatency
1966-
if err := metrics.LogWriter.Merge(&d.mu.log.metrics.LogWriterMetrics); err != nil {
1957+
if err := metrics.WALMetrics.Merge(&d.mu.log.metrics.LogWriterMetrics); err != nil {
19671958
d.opts.Logger.Errorf("metrics error: %s", err)
19681959
}
19691960
metrics.Flush.WriteThroughput = d.mu.compact.flushWriteThroughput

metrics.go

Lines changed: 60 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -481,10 +481,7 @@ type Metrics struct {
481481
Failover wal.FailoverStats
482482
}
483483

484-
LogWriter struct {
485-
FsyncLatency prometheus.Histogram
486-
record.LogWriterMetrics
487-
}
484+
WALMetrics WALMetrics
488485

489486
CategoryStats []block.CategoryStatsAggregate
490487

@@ -539,6 +536,41 @@ func (cm *CompressionMetrics) MergeWith(o *CompressionMetrics) {
539536
cm.Zstd.Add(o.Zstd)
540537
}
541538

539+
// DirectoryContext identifies which WAL directory an operation targets
540+
type DirectoryContext int
541+
542+
const (
543+
// DirectoryPrimary indicates operation targets the primary WAL directory
544+
DirectoryPrimary DirectoryContext = iota
545+
// DirectorySecondary indicates operation targets the secondary WAL directory
546+
DirectorySecondary
547+
// DirectoryUnknown indicates directory context is unknown or not applicable
548+
DirectoryUnknown
549+
)
550+
551+
// WALMetrics contains directory-specific latency histograms for all WAL operations
552+
type WALMetrics struct {
553+
// Directory-specific latency histograms for all filesystem operations
554+
Primary struct {
555+
CreateLatency prometheus.Histogram // File creation operations
556+
WriteLatency prometheus.Histogram // Write operations
557+
FsyncLatency prometheus.Histogram // Fsync operations
558+
CloseLatency prometheus.Histogram // File close operations
559+
StatLatency prometheus.Histogram // File stat operations
560+
OpenDirLatency prometheus.Histogram // Directory open operations
561+
}
562+
Secondary struct {
563+
CreateLatency prometheus.Histogram // File creation operations
564+
WriteLatency prometheus.Histogram // Write operations
565+
FsyncLatency prometheus.Histogram // Fsync operations
566+
CloseLatency prometheus.Histogram // File close operations
567+
StatLatency prometheus.Histogram // File stat operations
568+
OpenDirLatency prometheus.Histogram // Directory open operations
569+
}
570+
// Embedded runtime metrics from record.LogWriterMetrics
571+
record.LogWriterMetrics
572+
}
573+
542574
var (
543575
// FsyncLatencyBuckets are prometheus histogram buckets suitable for a histogram
544576
// that records latencies for fsyncs.
@@ -547,6 +579,30 @@ var (
547579
prometheus.ExponentialBucketsRange(float64(time.Millisecond*5), float64(10*time.Second), 50)...,
548580
)
549581

582+
// WALCreateLatencyBuckets are histogram buckets for file creation operations.
583+
WALCreateLatencyBuckets = append(
584+
prometheus.LinearBuckets(0.0, float64(time.Microsecond*50), 40),
585+
prometheus.ExponentialBucketsRange(float64(time.Millisecond*2), float64(5*time.Second), 40)...,
586+
)
587+
588+
// WALWriteLatencyBuckets are histogram buckets for write operations.
589+
WALWriteLatencyBuckets = append(
590+
prometheus.LinearBuckets(0.0, float64(time.Microsecond*10), 50),
591+
prometheus.ExponentialBucketsRange(float64(time.Millisecond*1), float64(2*time.Second), 40)...,
592+
)
593+
594+
// WALStatLatencyBuckets are histogram buckets for stat operations.
595+
WALStatLatencyBuckets = append(
596+
prometheus.LinearBuckets(0.0, float64(time.Microsecond*5), 20),
597+
prometheus.ExponentialBucketsRange(float64(time.Microsecond*100), float64(200*time.Millisecond), 30)...,
598+
)
599+
600+
// WALOpenDirLatencyBuckets are histogram buckets for directory open operations.
601+
WALOpenDirLatencyBuckets = append(
602+
prometheus.LinearBuckets(0.0, float64(time.Microsecond*20), 50),
603+
prometheus.ExponentialBucketsRange(float64(time.Millisecond*1), float64(3*time.Second), 40)...,
604+
)
605+
550606
// SecondaryCacheIOBuckets exported to enable exporting from package pebble to
551607
// enable exporting metrics with below buckets in CRDB.
552608
SecondaryCacheIOBuckets = sharedcache.IOBuckets

open.go

Lines changed: 51 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -324,9 +324,56 @@ func Open(dirname string, opts *Options) (db *DB, err error) {
324324
d.mu.mem.queue = append(d.mu.mem.queue, entry)
325325
}
326326

327-
d.mu.log.metrics.fsyncLatency = prometheus.NewHistogram(prometheus.HistogramOpts{
328-
Buckets: FsyncLatencyBuckets,
329-
})
327+
// Create enhanced WAL metrics for comprehensive filesystem operation tracking
328+
walFileMetrics := record.WALFileMetrics{
329+
CreateLatency: prometheus.NewHistogram(prometheus.HistogramOpts{
330+
Buckets: WALCreateLatencyBuckets,
331+
}),
332+
WriteLatency: prometheus.NewHistogram(prometheus.HistogramOpts{
333+
Buckets: WALWriteLatencyBuckets,
334+
}),
335+
FsyncLatency: prometheus.NewHistogram(prometheus.HistogramOpts{
336+
Buckets: FsyncLatencyBuckets, // Reuse existing sync buckets
337+
}),
338+
CloseLatency: prometheus.NewHistogram(prometheus.HistogramOpts{
339+
Buckets: FsyncLatencyBuckets, // Close is similar to sync in timing
340+
}),
341+
StatLatency: prometheus.NewHistogram(prometheus.HistogramOpts{
342+
Buckets: WALStatLatencyBuckets,
343+
}),
344+
OpenDirLatency: prometheus.NewHistogram(prometheus.HistogramOpts{
345+
Buckets: WALOpenDirLatencyBuckets,
346+
}),
347+
}
348+
349+
// Initialize enhanced LogWriter metrics structure
350+
d.mu.log.metrics = WALMetrics{
351+
Primary: struct {
352+
CreateLatency prometheus.Histogram
353+
WriteLatency prometheus.Histogram
354+
FsyncLatency prometheus.Histogram
355+
CloseLatency prometheus.Histogram
356+
StatLatency prometheus.Histogram
357+
OpenDirLatency prometheus.Histogram
358+
}{
359+
CreateLatency: walFileMetrics.CreateLatency,
360+
WriteLatency: walFileMetrics.WriteLatency,
361+
FsyncLatency: walFileMetrics.FsyncLatency,
362+
CloseLatency: walFileMetrics.CloseLatency,
363+
StatLatency: walFileMetrics.StatLatency,
364+
OpenDirLatency: walFileMetrics.OpenDirLatency,
365+
},
366+
Secondary: struct {
367+
CreateLatency prometheus.Histogram
368+
WriteLatency prometheus.Histogram
369+
FsyncLatency prometheus.Histogram
370+
CloseLatency prometheus.Histogram
371+
StatLatency prometheus.Histogram
372+
OpenDirLatency prometheus.Histogram
373+
}{
374+
// Empty for now, could be populated for failover scenarios
375+
},
376+
}
330377

331378
walOpts := wal.Options{
332379
Primary: wal.Dir{FS: opts.FS, Dirname: walDirname},
@@ -337,7 +384,7 @@ func Open(dirname string, opts *Options) (db *DB, err error) {
337384
BytesPerSync: opts.WALBytesPerSync,
338385
PreallocateSize: d.walPreallocateSize,
339386
MinSyncInterval: opts.WALMinSyncInterval,
340-
FsyncLatency: d.mu.log.metrics.fsyncLatency,
387+
WALFileMetrics: walFileMetrics,
341388
QueueSemChan: d.commit.logSyncQSem,
342389
Logger: opts.Logger,
343390
EventListener: walEventListenerAdaptor{l: opts.EventListener},

record/log_writer.go

Lines changed: 72 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -455,8 +455,12 @@ type LogWriter struct {
455455
err error
456456
// minSyncInterval is the minimum duration between syncs.
457457
minSyncInterval durationFunc
458-
fsyncLatency prometheus.Histogram
459-
pending []*block
458+
459+
// Enhanced WAL metrics for all filesystem operations
460+
walFileMetrics WALFileMetrics
461+
// Directory type for metrics recording
462+
directoryType DirectoryType
463+
pending []*block
460464
// Pushing and popping from pendingSyncs does not require flusher mutex to
461465
// be held.
462466
pendingSyncs pendingSyncs
@@ -486,10 +490,52 @@ type LogWriter struct {
486490
emitFragment func(n int, p []byte) (remainingP []byte)
487491
}
488492

493+
// DirectoryType identifies which WAL directory an operation targets
494+
type DirectoryType int
495+
496+
const (
497+
// DirectoryPrimary indicates operation targets the primary WAL directory
498+
DirectoryPrimary DirectoryType = iota
499+
// DirectorySecondary indicates operation targets the secondary WAL directory
500+
DirectorySecondary
501+
// DirectoryUnknown indicates directory context is unknown or not applicable
502+
DirectoryUnknown
503+
)
504+
505+
// String returns the string representation of DirectoryType.
506+
func (d DirectoryType) String() string {
507+
switch d {
508+
case DirectoryPrimary:
509+
return "Primary"
510+
case DirectorySecondary:
511+
return "Secondary"
512+
case DirectoryUnknown:
513+
return "Unknown"
514+
default:
515+
return "Invalid"
516+
}
517+
}
518+
519+
// WALFileMetrics contains all latency histograms for individual WAL file operations
520+
type WALFileMetrics struct {
521+
CreateLatency prometheus.Histogram // File creation operations
522+
WriteLatency prometheus.Histogram // Write operations
523+
FsyncLatency prometheus.Histogram // Fsync operations
524+
CloseLatency prometheus.Histogram // File close operations
525+
StatLatency prometheus.Histogram // File stat operations
526+
OpenDirLatency prometheus.Histogram // Directory open operations
527+
}
528+
489529
// LogWriterConfig is a struct used for configuring new LogWriters
490530
type LogWriterConfig struct {
491531
WALMinSyncInterval durationFunc
492-
WALFsyncLatency prometheus.Histogram
532+
533+
// Directory type for this LogWriter (Primary, Secondary, or Unknown)
534+
DirectoryType DirectoryType
535+
536+
// Enhanced WAL metrics for this directory
537+
WALFileMetrics WALFileMetrics
538+
493539
// QueueSemChan is an optional channel to pop from when popping from
494540
// LogWriter.flusher.syncQueue. It functions as a semaphore that prevents
495541
// the syncQueue from overflowing (which will cause a panic). All production
@@ -581,7 +627,8 @@ func NewLogWriter(
581627

582628
f := &r.flusher
583629
f.minSyncInterval = logWriterConfig.WALMinSyncInterval
584-
f.fsyncLatency = logWriterConfig.WALFsyncLatency
630+
f.walFileMetrics = logWriterConfig.WALFileMetrics
631+
f.directoryType = logWriterConfig.DirectoryType
585632

586633
go func() {
587634
pprof.Do(context.Background(), walSyncLabels, r.flushLoop)
@@ -725,9 +772,9 @@ func (w *LogWriter) flushLoop(context.Context) {
725772
writtenOffset += uint64(len(data))
726773
synced, syncLatency, bytesWritten, err := w.flushPending(data, pending, snap)
727774
f.Lock()
728-
if synced && f.fsyncLatency != nil {
775+
if synced && f.walFileMetrics.FsyncLatency != nil {
729776
w.syncedOffset.Store(writtenOffset)
730-
f.fsyncLatency.Observe(float64(syncLatency))
777+
f.walFileMetrics.FsyncLatency.Observe(float64(syncLatency))
731778
}
732779
f.err = err
733780
if f.err != nil {
@@ -784,7 +831,12 @@ func (w *LogWriter) flushPending(
784831
}
785832
if n := len(data); err == nil && n > 0 {
786833
bytesWritten += int64(n)
834+
// Measure write latency
835+
writeStart := crtime.NowMono()
787836
_, err = w.w.Write(data)
837+
if writeLatency := writeStart.Elapsed(); w.flusher.walFileMetrics.WriteLatency != nil {
838+
w.flusher.walFileMetrics.WriteLatency.Observe(float64(writeLatency))
839+
}
788840
}
789841

790842
synced = !snap.empty()
@@ -811,7 +863,13 @@ func (w *LogWriter) syncWithLatency() (time.Duration, error) {
811863
}
812864

813865
func (w *LogWriter) flushBlock(b *block) error {
814-
if _, err := w.w.Write(b.buf[b.flushed:]); err != nil {
866+
// Measure write latency for block flush
867+
writeStart := crtime.NowMono()
868+
_, err := w.w.Write(b.buf[b.flushed:])
869+
if writeLatency := writeStart.Elapsed(); w.flusher.walFileMetrics.WriteLatency != nil {
870+
w.flusher.walFileMetrics.WriteLatency.Observe(float64(writeLatency))
871+
}
872+
if err != nil {
815873
return err
816874
}
817875
b.written.Store(0)
@@ -885,8 +943,8 @@ func (w *LogWriter) closeInternal(lastQueuedRecord PendingSyncIndex) error {
885943
syncLatency, err = w.syncWithLatency()
886944
}
887945
f.Lock()
888-
if err == nil && f.fsyncLatency != nil {
889-
f.fsyncLatency.Observe(float64(syncLatency))
946+
if err == nil && f.walFileMetrics.FsyncLatency != nil {
947+
f.walFileMetrics.FsyncLatency.Observe(float64(syncLatency))
890948
}
891949
free := w.free.blocks
892950
f.Unlock()
@@ -897,7 +955,12 @@ func (w *LogWriter) closeInternal(lastQueuedRecord PendingSyncIndex) error {
897955
w.pendingSyncsBackingIndex.externalSyncQueueCallback(lastQueuedRecord, err)
898956
}
899957
if w.c != nil {
958+
// Measure close latency
959+
closeStart := crtime.NowMono()
900960
cerr := w.c.Close()
961+
if closeLatency := closeStart.Elapsed(); w.flusher.walFileMetrics.CloseLatency != nil {
962+
w.flusher.walFileMetrics.CloseLatency.Observe(float64(closeLatency))
963+
}
901964
w.c = nil
902965
err = firstError(err, cerr)
903966
}

0 commit comments

Comments
 (0)