Skip to content

Commit 6b2b328

Browse files
rjl493456442karalabe
authored andcommitted
ethdb: add leveldb write delay statistic (#16499)
1 parent 2a1fc3d commit 6b2b328

File tree

1 file changed

+81
-14
lines changed

1 file changed

+81
-14
lines changed

ethdb/database.go

Lines changed: 81 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
package ethdb
1818

1919
import (
20+
"fmt"
2021
"strconv"
2122
"strings"
2223
"sync"
@@ -32,17 +33,25 @@ import (
3233
"github.com/syndtr/goleveldb/leveldb/util"
3334
)
3435

36+
const (
37+
writeDelayNThreshold = 200
38+
writeDelayThreshold = 350 * time.Millisecond
39+
writeDelayWarningThrottler = 1 * time.Minute
40+
)
41+
3542
var OpenFileLimit = 64
3643

3744
type LDBDatabase struct {
3845
fn string // filename for reporting
3946
db *leveldb.DB // LevelDB instance
4047

41-
compTimeMeter metrics.Meter // Meter for measuring the total time spent in database compaction
42-
compReadMeter metrics.Meter // Meter for measuring the data read during compaction
43-
compWriteMeter metrics.Meter // Meter for measuring the data written during compaction
44-
diskReadMeter metrics.Meter // Meter for measuring the effective amount of data read
45-
diskWriteMeter metrics.Meter // Meter for measuring the effective amount of data written
48+
compTimeMeter metrics.Meter // Meter for measuring the total time spent in database compaction
49+
compReadMeter metrics.Meter // Meter for measuring the data read during compaction
50+
compWriteMeter metrics.Meter // Meter for measuring the data written during compaction
51+
writeDelayNMeter metrics.Meter // Meter for measuring the write delay number due to database compaction
52+
writeDelayMeter metrics.Meter // Meter for measuring the write delay duration due to database compaction
53+
diskReadMeter metrics.Meter // Meter for measuring the effective amount of data read
54+
diskWriteMeter metrics.Meter // Meter for measuring the effective amount of data written
4655

4756
quitLock sync.Mutex // Mutex protecting the quit channel access
4857
quitChan chan chan error // Quit channel to stop the metrics collection before closing the database
@@ -147,16 +156,17 @@ func (db *LDBDatabase) LDB() *leveldb.DB {
147156

148157
// Meter configures the database metrics collectors and
149158
func (db *LDBDatabase) Meter(prefix string) {
150-
// Short circuit metering if the metrics system is disabled
151-
if !metrics.Enabled {
152-
return
159+
if metrics.Enabled {
160+
// Initialize all the metrics collector at the requested prefix
161+
db.compTimeMeter = metrics.NewRegisteredMeter(prefix+"compact/time", nil)
162+
db.compReadMeter = metrics.NewRegisteredMeter(prefix+"compact/input", nil)
163+
db.compWriteMeter = metrics.NewRegisteredMeter(prefix+"compact/output", nil)
164+
db.diskReadMeter = metrics.NewRegisteredMeter(prefix+"disk/read", nil)
165+
db.diskWriteMeter = metrics.NewRegisteredMeter(prefix+"disk/write", nil)
153166
}
154-
// Initialize all the metrics collector at the requested prefix
155-
db.compTimeMeter = metrics.NewRegisteredMeter(prefix+"compact/time", nil)
156-
db.compReadMeter = metrics.NewRegisteredMeter(prefix+"compact/input", nil)
157-
db.compWriteMeter = metrics.NewRegisteredMeter(prefix+"compact/output", nil)
158-
db.diskReadMeter = metrics.NewRegisteredMeter(prefix+"disk/read", nil)
159-
db.diskWriteMeter = metrics.NewRegisteredMeter(prefix+"disk/write", nil)
167+
// Initialize write delay metrics no matter we are in metric mode or not.
168+
db.writeDelayMeter = metrics.NewRegisteredMeter(prefix+"compact/writedelay/duration", nil)
169+
db.writeDelayNMeter = metrics.NewRegisteredMeter(prefix+"compact/writedelay/counter", nil)
160170

161171
// Create a quit channel for the periodic collector and run it
162172
db.quitLock.Lock()
@@ -178,6 +188,9 @@ func (db *LDBDatabase) Meter(prefix string) {
178188
// 2 | 523 | 1000.37159 | 7.26059 | 66.86342 | 66.77884
179189
// 3 | 570 | 1113.18458 | 0.00000 | 0.00000 | 0.00000
180190
//
191+
// This is how the write delay look like (currently):
192+
// DelayN:5 Delay:406.604657ms
193+
//
181194
// This is how the iostats look like (currently):
182195
// Read(MB):3895.04860 Write(MB):3654.64712
183196
func (db *LDBDatabase) meter(refresh time.Duration) {
@@ -188,6 +201,14 @@ func (db *LDBDatabase) meter(refresh time.Duration) {
188201
}
189202
// Create storage for iostats.
190203
var iostats [2]float64
204+
205+
// Create storage and warning log tracer for write delay.
206+
var (
207+
delaystats [2]int64
208+
lastWriteDelay time.Time
209+
lastWriteDelayN time.Time
210+
)
211+
191212
// Iterate ad infinitum and collect the stats
192213
for i := 1; ; i++ {
193214
// Retrieve the database stats
@@ -236,6 +257,52 @@ func (db *LDBDatabase) meter(refresh time.Duration) {
236257
db.compWriteMeter.Mark(int64((compactions[i%2][2] - compactions[(i-1)%2][2]) * 1024 * 1024))
237258
}
238259

260+
// Retrieve the write delay statistic
261+
writedelay, err := db.db.GetProperty("leveldb.writedelay")
262+
if err != nil {
263+
db.log.Error("Failed to read database write delay statistic", "err", err)
264+
return
265+
}
266+
var (
267+
delayN int64
268+
delayDuration string
269+
duration time.Duration
270+
)
271+
if n, err := fmt.Sscanf(writedelay, "DelayN:%d Delay:%s", &delayN, &delayDuration); n != 2 || err != nil {
272+
db.log.Error("Write delay statistic not found")
273+
return
274+
}
275+
duration, err = time.ParseDuration(delayDuration)
276+
if err != nil {
277+
db.log.Error("Failed to parse delay duration", "err", err)
278+
return
279+
}
280+
if db.writeDelayNMeter != nil {
281+
db.writeDelayNMeter.Mark(delayN - delaystats[0])
282+
// If the write delay number been collected in the last minute exceeds the predefined threshold,
283+
// print a warning log here.
284+
// If a warning that db performance is laggy has been displayed,
285+
// any subsequent warnings will be withhold for 1 minute to don't overwhelm the user.
286+
if int(db.writeDelayNMeter.Rate1()) > writeDelayNThreshold &&
287+
time.Now().After(lastWriteDelayN.Add(writeDelayWarningThrottler)) {
288+
db.log.Warn("Write delay number exceeds the threshold (200 per second) in the last minute")
289+
lastWriteDelayN = time.Now()
290+
}
291+
}
292+
if db.writeDelayMeter != nil {
293+
db.writeDelayMeter.Mark(duration.Nanoseconds() - delaystats[1])
294+
// If the write delay duration been collected in the last minute exceeds the predefined threshold,
295+
// print a warning log here.
296+
// If a warning that db performance is laggy has been displayed,
297+
// any subsequent warnings will be withhold for 1 minute to don't overwhelm the user.
298+
if int64(db.writeDelayMeter.Rate1()) > writeDelayThreshold.Nanoseconds() &&
299+
time.Now().After(lastWriteDelay.Add(writeDelayWarningThrottler)) {
300+
db.log.Warn("Write delay duration exceeds the threshold (35% of the time) in the last minute")
301+
lastWriteDelay = time.Now()
302+
}
303+
}
304+
delaystats[0], delaystats[1] = delayN, duration.Nanoseconds()
305+
239306
// Retrieve the database iostats.
240307
ioStats, err := db.db.GetProperty("leveldb.iostats")
241308
if err != nil {

0 commit comments

Comments
 (0)