Skip to content

Commit cd6d703

Browse files
committed
Merge pull request #1447 from karalabe/fix-database-metrics-race-3
eth, ethdb: fix a data race during startup/shutdown
2 parents dd140be + eb2d168 commit cd6d703

File tree

2 files changed

+83
-61
lines changed

2 files changed

+83
-61
lines changed

eth/backend.go

Lines changed: 3 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,6 @@ import (
4141
"github.com/ethereum/go-ethereum/event"
4242
"github.com/ethereum/go-ethereum/logger"
4343
"github.com/ethereum/go-ethereum/logger/glog"
44-
"github.com/ethereum/go-ethereum/metrics"
4544
"github.com/ethereum/go-ethereum/miner"
4645
"github.com/ethereum/go-ethereum/p2p"
4746
"github.com/ethereum/go-ethereum/p2p/discover"
@@ -267,42 +266,21 @@ func New(config *Config) (*Ethereum, error) {
267266
return nil, fmt.Errorf("blockchain db err: %v", err)
268267
}
269268
if db, ok := blockDb.(*ethdb.LDBDatabase); ok {
270-
db.GetTimer = metrics.NewTimer("eth/db/block/user/gets")
271-
db.PutTimer = metrics.NewTimer("eth/db/block/user/puts")
272-
db.MissMeter = metrics.NewMeter("eth/db/block/user/misses")
273-
db.ReadMeter = metrics.NewMeter("eth/db/block/user/reads")
274-
db.WriteMeter = metrics.NewMeter("eth/db/block/user/writes")
275-
db.CompTimeMeter = metrics.NewMeter("eth/db/block/compact/time")
276-
db.CompReadMeter = metrics.NewMeter("eth/db/block/compact/input")
277-
db.CompWriteMeter = metrics.NewMeter("eth/db/block/compact/output")
269+
db.Meter("eth/db/block/")
278270
}
279271
stateDb, err := newdb(filepath.Join(config.DataDir, "state"))
280272
if err != nil {
281273
return nil, fmt.Errorf("state db err: %v", err)
282274
}
283275
if db, ok := stateDb.(*ethdb.LDBDatabase); ok {
284-
db.GetTimer = metrics.NewTimer("eth/db/state/user/gets")
285-
db.PutTimer = metrics.NewTimer("eth/db/state/user/puts")
286-
db.MissMeter = metrics.NewMeter("eth/db/state/user/misses")
287-
db.ReadMeter = metrics.NewMeter("eth/db/state/user/reads")
288-
db.WriteMeter = metrics.NewMeter("eth/db/state/user/writes")
289-
db.CompTimeMeter = metrics.NewMeter("eth/db/state/compact/time")
290-
db.CompReadMeter = metrics.NewMeter("eth/db/state/compact/input")
291-
db.CompWriteMeter = metrics.NewMeter("eth/db/state/compact/output")
276+
db.Meter("eth/db/state/")
292277
}
293278
extraDb, err := newdb(filepath.Join(config.DataDir, "extra"))
294279
if err != nil {
295280
return nil, fmt.Errorf("extra db err: %v", err)
296281
}
297282
if db, ok := extraDb.(*ethdb.LDBDatabase); ok {
298-
db.GetTimer = metrics.NewTimer("eth/db/extra/user/gets")
299-
db.PutTimer = metrics.NewTimer("eth/db/extra/user/puts")
300-
db.MissMeter = metrics.NewMeter("eth/db/extra/user/misses")
301-
db.ReadMeter = metrics.NewMeter("eth/db/extra/user/reads")
302-
db.WriteMeter = metrics.NewMeter("eth/db/extra/user/writes")
303-
db.CompTimeMeter = metrics.NewMeter("eth/db/extra/compact/time")
304-
db.CompReadMeter = metrics.NewMeter("eth/db/extra/compact/input")
305-
db.CompWriteMeter = metrics.NewMeter("eth/db/extra/compact/output")
283+
db.Meter("eth/db/extra/")
306284
}
307285
nodeDb := filepath.Join(config.DataDir, "nodes")
308286

ethdb/database.go

Lines changed: 80 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -19,16 +19,19 @@ package ethdb
1919
import (
2020
"strconv"
2121
"strings"
22+
"sync"
2223
"time"
2324

2425
"github.com/ethereum/go-ethereum/compression/rle"
2526
"github.com/ethereum/go-ethereum/logger"
2627
"github.com/ethereum/go-ethereum/logger/glog"
27-
"github.com/rcrowley/go-metrics"
28+
"github.com/ethereum/go-ethereum/metrics"
2829
"github.com/syndtr/goleveldb/leveldb"
2930
"github.com/syndtr/goleveldb/leveldb/errors"
3031
"github.com/syndtr/goleveldb/leveldb/iterator"
3132
"github.com/syndtr/goleveldb/leveldb/opt"
33+
34+
gometrics "github.com/rcrowley/go-metrics"
3235
)
3336

3437
var OpenFileLimit = 64
@@ -37,15 +40,18 @@ type LDBDatabase struct {
3740
fn string // filename for reporting
3841
db *leveldb.DB // LevelDB instance
3942

40-
GetTimer metrics.Timer // Timer for measuring the database get request counts and latencies
41-
PutTimer metrics.Timer // Timer for measuring the database put request counts and latencies
42-
DelTimer metrics.Timer // Timer for measuring the database delete request counts and latencies
43-
MissMeter metrics.Meter // Meter for measuring the missed database get requests
44-
ReadMeter metrics.Meter // Meter for measuring the database get request data usage
45-
WriteMeter metrics.Meter // Meter for measuring the database put request data usage
46-
CompTimeMeter metrics.Meter // Meter for measuring the total time spent in database compaction
47-
CompReadMeter metrics.Meter // Meter for measuring the data read during compaction
48-
CompWriteMeter metrics.Meter // Meter for measuring the data written during compaction
43+
getTimer gometrics.Timer // Timer for measuring the database get request counts and latencies
44+
putTimer gometrics.Timer // Timer for measuring the database put request counts and latencies
45+
delTimer gometrics.Timer // Timer for measuring the database delete request counts and latencies
46+
missMeter gometrics.Meter // Meter for measuring the missed database get requests
47+
readMeter gometrics.Meter // Meter for measuring the database get request data usage
48+
writeMeter gometrics.Meter // Meter for measuring the database put request data usage
49+
compTimeMeter gometrics.Meter // Meter for measuring the total time spent in database compaction
50+
compReadMeter gometrics.Meter // Meter for measuring the data read during compaction
51+
compWriteMeter gometrics.Meter // Meter for measuring the data written during compaction
52+
53+
quitLock sync.Mutex // Mutex protecting the quit channel access
54+
quitChan chan chan error // Quit channel to stop the metrics collection before closing the database
4955
}
5056

5157
// NewLDBDatabase returns a LevelDB wrapped object. LDBDatabase does not persist data by
@@ -54,64 +60,61 @@ type LDBDatabase struct {
5460
func NewLDBDatabase(file string) (*LDBDatabase, error) {
5561
// Open the db
5662
db, err := leveldb.OpenFile(file, &opt.Options{OpenFilesCacheCapacity: OpenFileLimit})
57-
// check for curruption and attempt to recover
63+
// check for corruption and attempt to recover
5864
if _, iscorrupted := err.(*errors.ErrCorrupted); iscorrupted {
5965
db, err = leveldb.RecoverFile(file, nil)
6066
}
6167
// (re) check for errors and abort if opening of the db failed
6268
if err != nil {
6369
return nil, err
6470
}
65-
database := &LDBDatabase{
71+
return &LDBDatabase{
6672
fn: file,
6773
db: db,
68-
}
69-
go database.meter(3 * time.Second)
70-
71-
return database, nil
74+
}, nil
7275
}
7376

7477
// Put puts the given key / value to the queue
7578
func (self *LDBDatabase) Put(key []byte, value []byte) error {
7679
// Measure the database put latency, if requested
77-
if self.PutTimer != nil {
78-
defer self.PutTimer.UpdateSince(time.Now())
80+
if self.putTimer != nil {
81+
defer self.putTimer.UpdateSince(time.Now())
7982
}
8083
// Generate the data to write to disk, update the meter and write
8184
dat := rle.Compress(value)
8285

83-
if self.WriteMeter != nil {
84-
self.WriteMeter.Mark(int64(len(dat)))
86+
if self.writeMeter != nil {
87+
self.writeMeter.Mark(int64(len(dat)))
8588
}
8689
return self.db.Put(key, dat, nil)
8790
}
8891

8992
// Get returns the given key if it's present.
9093
func (self *LDBDatabase) Get(key []byte) ([]byte, error) {
9194
// Measure the database get latency, if requested
92-
if self.GetTimer != nil {
93-
defer self.GetTimer.UpdateSince(time.Now())
95+
if self.getTimer != nil {
96+
defer self.getTimer.UpdateSince(time.Now())
9497
}
9598
// Retrieve the key and increment the miss counter if not found
9699
dat, err := self.db.Get(key, nil)
97100
if err != nil {
98-
if self.MissMeter != nil {
99-
self.MissMeter.Mark(1)
101+
if self.missMeter != nil {
102+
self.missMeter.Mark(1)
100103
}
101104
return nil, err
102105
}
103106
// Otherwise update the actually retrieved amount of data
104-
if self.ReadMeter != nil {
105-
self.ReadMeter.Mark(int64(len(dat)))
107+
if self.readMeter != nil {
108+
self.readMeter.Mark(int64(len(dat)))
106109
}
107110
return rle.Decompress(dat)
108111
}
109112

110113
// Delete deletes the key from the queue and database
111114
func (self *LDBDatabase) Delete(key []byte) error {
112115
// Measure the database delete latency, if requested
113-
if self.DelTimer != nil {
114-
defer self.DelTimer.UpdateSince(time.Now())
116+
if self.delTimer != nil {
117+
defer self.delTimer.UpdateSince(time.Now())
115118
}
116119
// Execute the actual operation
117120
return self.db.Delete(key, nil)
@@ -127,8 +130,20 @@ func (self *LDBDatabase) Flush() error {
127130
}
128131

129132
func (self *LDBDatabase) Close() {
133+
// Stop the metrics collection to avoid internal database races
134+
self.quitLock.Lock()
135+
defer self.quitLock.Unlock()
136+
137+
if self.quitChan != nil {
138+
errc := make(chan error)
139+
self.quitChan <- errc
140+
if err := <-errc; err != nil {
141+
glog.V(logger.Error).Infof("metrics failure in '%s': %v\n", self.fn, err)
142+
}
143+
}
144+
// Flush and close the database
130145
if err := self.Flush(); err != nil {
131-
glog.V(logger.Error).Infof("error: flush '%s': %v\n", self.fn, err)
146+
glog.V(logger.Error).Infof("flushing '%s' failed: %v\n", self.fn, err)
132147
}
133148
self.db.Close()
134149
glog.V(logger.Error).Infoln("flushed and closed db:", self.fn)
@@ -138,6 +153,27 @@ func (self *LDBDatabase) LDB() *leveldb.DB {
138153
return self.db
139154
}
140155

156+
// Meter configures the database metrics collectors and
157+
func (self *LDBDatabase) Meter(prefix string) {
158+
// Initialize all the metrics collector at the requested prefix
159+
self.getTimer = metrics.NewTimer(prefix + "user/gets")
160+
self.putTimer = metrics.NewTimer(prefix + "user/puts")
161+
self.delTimer = metrics.NewTimer(prefix + "user/dels")
162+
self.missMeter = metrics.NewMeter(prefix + "user/misses")
163+
self.readMeter = metrics.NewMeter(prefix + "user/reads")
164+
self.writeMeter = metrics.NewMeter(prefix + "user/writes")
165+
self.compTimeMeter = metrics.NewMeter(prefix + "compact/time")
166+
self.compReadMeter = metrics.NewMeter(prefix + "compact/input")
167+
self.compWriteMeter = metrics.NewMeter(prefix + "compact/output")
168+
169+
// Create a quit channel for the periodic collector and run it
170+
self.quitLock.Lock()
171+
self.quitChan = make(chan chan error)
172+
self.quitLock.Unlock()
173+
174+
go self.meter(3 * time.Second)
175+
}
176+
141177
// meter periodically retrieves internal leveldb counters and reports them to
142178
// the metrics subsystem.
143179
//
@@ -193,16 +229,24 @@ func (self *LDBDatabase) meter(refresh time.Duration) {
193229
}
194230
}
195231
// Update all the requested meters
196-
if self.CompTimeMeter != nil {
197-
self.CompTimeMeter.Mark(int64((counters[i%2][0] - counters[(i-1)%2][0]) * 1000 * 1000 * 1000))
232+
if self.compTimeMeter != nil {
233+
self.compTimeMeter.Mark(int64((counters[i%2][0] - counters[(i-1)%2][0]) * 1000 * 1000 * 1000))
198234
}
199-
if self.CompReadMeter != nil {
200-
self.CompReadMeter.Mark(int64((counters[i%2][1] - counters[(i-1)%2][1]) * 1024 * 1024))
235+
if self.compReadMeter != nil {
236+
self.compReadMeter.Mark(int64((counters[i%2][1] - counters[(i-1)%2][1]) * 1024 * 1024))
201237
}
202-
if self.CompWriteMeter != nil {
203-
self.CompWriteMeter.Mark(int64((counters[i%2][2] - counters[(i-1)%2][2]) * 1024 * 1024))
238+
if self.compWriteMeter != nil {
239+
self.compWriteMeter.Mark(int64((counters[i%2][2] - counters[(i-1)%2][2]) * 1024 * 1024))
204240
}
205241
// Sleep a bit, then repeat the stats collection
206-
time.Sleep(refresh)
242+
select {
243+
case errc := <-self.quitChan:
244+
// Quit requesting, stop hammering the database
245+
errc <- nil
246+
return
247+
248+
case <-time.After(refresh):
249+
// Timeout, gather a new set of stats
250+
}
207251
}
208252
}

0 commit comments

Comments
 (0)