Skip to content
66 changes: 64 additions & 2 deletions pkg/node/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,10 @@ func NewBee(
session accesscontrol.Session,
o *Options,
) (b *Bee, err error) {
// start time for node warmup duration measurement
warmupStartTime := time.Now()
var pullSyncStartTime time.Time

tracer, tracerCloser, err := tracing.NewTracer(&tracing.Options{
Enabled: o.TracingEnabled,
Endpoint: o.TracingEndpoint,
Expand Down Expand Up @@ -595,9 +599,28 @@ func NewBee(
logger.Info("node warmup check initiated. monitoring activity rate to determine readiness.", "startTime", t)
}

detector.OnStabilized = func(t time.Time, totalCount int) {
logger.Info("node warmup complete. system is considered stable and ready.", "stabilizationTime", t, "totalMonitoredEvents", totalCount)
nodeWarmupDuration := prometheus.NewHistogram(
prometheus.HistogramOpts{
Namespace: metrics.Namespace,
Subsystem: "init",
Name: "warmup_duration_seconds",
Help: "Duration in seconds for node warmup to complete",
},
)
prometheus.MustRegister(nodeWarmupDuration)

warmupMeasurement := func(t time.Time, totalCount int) {
warmupDuration := t.Sub(warmupStartTime).Seconds()
logger.Info("node warmup complete. system is considered stable and ready.",
"stabilizationTime", t,
"totalMonitoredEvents", totalCount,
"warmupDurationSeconds", warmupDuration)

// Record the warmup duration in the prometheus metric
nodeWarmupDuration.Observe(warmupDuration)
pullSyncStartTime = t
}
detector.OnStabilized = warmupMeasurement

detector.OnPeriodComplete = func(t time.Time, periodCount int, stDev float64) {
logger.Debug("node warmup check: period complete.", "periodEndTime", t, "eventsInPeriod", periodCount, "rateStdDev", stDev)
Expand Down Expand Up @@ -1130,6 +1153,45 @@ func NewBee(
localStore.StartReserveWorker(ctx, pullerService, waitNetworkRFunc)
nodeStatus.SetSync(pullerService)

// measure full sync duration
detector.OnStabilized = func(t time.Time, totalCount int) {
warmupMeasurement(t, totalCount)
fullSyncDuration := prometheus.NewHistogram(
prometheus.HistogramOpts{
Namespace: metrics.Namespace,
Subsystem: "init",
Name: "full_sync_duration_seconds",
Help: "Duration in seconds for node warmup to complete",
},
)
prometheus.MustRegister(fullSyncDuration)

reserveTreshold := reserveCapacity >> 1
isFullySynced := func() bool {
return pullerService.SyncRate() == 0 && saludService.IsHealthy() && localStore.ReserveSize() >= reserveTreshold
}

syncCheckTicker := time.NewTicker(time.Second)
go func() {
defer syncCheckTicker.Stop()
for {
select {
case <-ctx.Done():
return
case <-syncCheckTicker.C:
synced := isFullySynced()
logger.Debug("sync status check", "synced", synced, "reserveSize", localStore.ReserveSize(), "threshold", reserveTreshold, "syncRate", pullerService.SyncRate())
Copy link
Member

@gacevicljubisa gacevicljubisa Jul 30, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe change log level to Trace, because it will spam every second until ReserveSize reaches trashold? Or we can even increase the time checking to 2 seconds?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I increased the time check to 2 seconds because debug level is the most verbose.

if synced {
fullSyncTime := pullSyncStartTime.Sub(t)
fullSyncDuration.Observe(fullSyncTime.Seconds())
syncCheckTicker.Stop()
return
}
}
}
}()
}

if o.EnableStorageIncentives {

redistributionContractAddress := chainCfg.RedistributionAddress
Expand Down
Loading