Skip to content

Commit 53cb40b

Browse files
authored
Expose lag as prometheus metric (#379)
1 parent 46f7a0e commit 53cb40b

File tree

3 files changed

+34
-1
lines changed

3 files changed

+34
-1
lines changed

cmd/litefs/mount_linux.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -359,6 +359,10 @@ func (c *MountCommand) initStore(ctx context.Context) error {
359359
if err := c.initStoreBackupClient(ctx); err != nil {
360360
return err
361361
}
362+
363+
// Initialize as a singleton so we can automatically collect metrics.
364+
litefs.GlobalStore.Store(c.Store)
365+
362366
return nil
363367
}
364368

db.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3602,7 +3602,7 @@ var (
36023602
}, []string{"db"})
36033603

36043604
dbLatencySecondsMetricVec = promauto.NewGaugeVec(prometheus.GaugeOpts{
3605-
Name: "litefs_db_latency_seconds",
3605+
Name: "litefs_db_lag_seconds",
36063606
Help: "Latency between generating an LTX file and consuming it.",
36073607
}, []string{"db"})
36083608
)

store.go

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,15 @@ const (
4545
DefaultBackupFullSyncInterval = 10 * time.Second
4646
)
4747

48+
const (
49+
MetricsMonitorInterval = 1 * time.Second
50+
)
51+
4852
var ErrStoreClosed = fmt.Errorf("store closed")
4953

54+
// GlobalStore represents a single store used for metrics collection.
55+
var GlobalStore atomic.Value
56+
5057
// Store represents a collection of databases.
5158
type Store struct {
5259
mu sync.Mutex
@@ -1595,6 +1602,18 @@ func (s *Store) setPrimaryTimestamp(ts int64) {
15951602
}
15961603
}
15971604

1605+
// Lag returns the number of seconds that the local instance is lagging
1606+
// behind the primary node. Returns 0 if the node is the primary or if the
1607+
// node is not marked as ready yet.
1608+
func (s *Store) Lag() time.Duration {
1609+
switch ts := s.PrimaryTimestamp(); ts {
1610+
case 0, -1:
1611+
return 0 // primary or not ready
1612+
default:
1613+
return time.Duration(time.Now().UnixMilli()-ts) * time.Millisecond
1614+
}
1615+
}
1616+
15981617
// Expvar returns a variable for debugging output.
15991618
func (s *Store) Expvar() expvar.Var { return (*StoreVar)(s) }
16001619

@@ -1821,4 +1840,14 @@ var (
18211840
Name: "litefs_subscriber_count",
18221841
Help: "Number of connected subscribers",
18231842
})
1843+
1844+
_ = promauto.NewGaugeFunc(prometheus.GaugeOpts{
1845+
Name: "litefs_lag_seconds",
1846+
Help: "Lag behind the primary node, in seconds",
1847+
}, func() float64 {
1848+
if s := GlobalStore.Load(); s != nil {
1849+
return s.(*Store).Lag().Seconds()
1850+
}
1851+
return 0
1852+
})
18241853
)

0 commit comments

Comments
 (0)