33package badger
44
55import (
6+ "github.com/onflow/flow-go/module/component"
7+ "github.com/onflow/flow-go/module/irrecoverable"
68 "math/rand"
79 "time"
810
@@ -12,76 +14,91 @@ import (
1214 "github.com/onflow/flow-go/module"
1315)
1416
17+ // Cleaner uses component.ComponentManager to implement module.Startable and module.ReadyDoneAware
18+ // to run an internal goroutine which run badger value log garbage collection on timely basis.
1519type Cleaner struct {
16- log zerolog.Logger
17- db * badger.DB
18- metrics module.CleanerMetrics
19- enabled bool
20- ratio float64
21- freq int
22- calls int
20+ * component.ComponentManager
21+ log zerolog.Logger
22+ db * badger.DB
23+ metrics module.CleanerMetrics
24+ ratio float64
25+ interval time.Duration
2326}
2427
25- // NewCleaner returns a cleaner that runs the badger value log garbage collection once every `frequency` calls
26- // if a frequency of zero is passed in, we will not run the GC at all
27- func NewCleaner (log zerolog.Logger , db * badger.DB , metrics module.CleanerMetrics , frequency int ) * Cleaner {
28+ var _ component.Component = (* Cleaner )(nil )
29+
30+ // NewCleaner returns a cleaner that runs the badger value log garbage collection once every `interval` duration
31+ // if an interval of zero is passed in, we will not run the GC at all.
32+ func NewCleaner (log zerolog.Logger , db * badger.DB , metrics module.CleanerMetrics , interval time.Duration ) * Cleaner {
2833 // NOTE: we run garbage collection frequently at points in our business
2934 // logic where we are likely to have a small breather in activity; it thus
3035 // makes sense to run garbage collection often, with a smaller ratio, rather
3136 // than running it rarely and having big rewrites at once
3237 c := & Cleaner {
33- log : log .With ().Str ("component" , "cleaner" ).Logger (),
34- db : db ,
35- metrics : metrics ,
36- ratio : 0.2 ,
37- freq : frequency ,
38- enabled : frequency > 0 , // Disable if passed in 0 as frequency
38+ log : log .With ().Str ("component" , "cleaner" ).Logger (),
39+ db : db ,
40+ metrics : metrics ,
41+ ratio : 0.2 ,
42+ interval : interval ,
3943 }
40- // we don't want the entire network to run GC at the same time, so
41- // distribute evenly over time
42- if c .enabled {
43- c .calls = rand .Intn (c .freq )
44+
45+ cmBuilder := component .NewComponentManagerBuilder ()
46+
47+ // Disable if passed in 0 as interval
48+ if c .interval > 0 {
49+ cmBuilder .AddWorker (c .gcWorkerRoutine )
4450 }
51+
52+ c .ComponentManager = cmBuilder .Build ()
4553 return c
4654}
4755
48- func (c * Cleaner ) RunGC () {
49- if ! c .enabled {
56+ // gcWorkerRoutine runs badger GC on timely basis.
57+ func (c * Cleaner ) gcWorkerRoutine (ctx irrecoverable.SignalerContext , ready component.ReadyFunc ) {
58+ ready ()
59+ ticker := time .NewTicker (c .nextWaitDuration ())
60+ for {
61+ select {
62+ case <- ctx .Done ():
63+ return
64+ case <- ticker .C :
65+ c .runGC ()
66+
67+ // reset the ticker with a new interval and random jitter
68+ ticker .Reset (c .nextWaitDuration ())
69+ }
70+ }
71+ }
72+
73+ // nextWaitDuration calculates next duration for Cleaner to wait before attempting to run GC.
74+ // We add 20% jitter into the interval, so that we don't risk nodes syncing
75+ // up on their GC calls over time.
76+ func (c * Cleaner ) nextWaitDuration () time.Duration {
77+ return time .Duration (c .interval .Milliseconds () + rand .Int63n (c .interval .Milliseconds ()/ 5 ))
78+ }
79+
80+ // runGC runs garbage collection for badger DB, handles sentinel errors and reports metrics.
81+ func (c * Cleaner ) runGC () {
82+ started := time .Now ()
83+ err := c .db .RunValueLogGC (c .ratio )
84+ if err == badger .ErrRejected {
85+ // NOTE: this happens when a GC call is already running
86+ c .log .Warn ().Msg ("garbage collection on value log already running" )
5087 return
5188 }
52- // only actually run approximately every frequency number of calls
53- c .calls ++
54- if c .calls < c .freq {
89+ if err == badger .ErrNoRewrite {
90+ // NOTE: this happens when no files have any garbage to drop
91+ c .log .Debug ().Msg ("garbage collection on value log unnecessary" )
92+ return
93+ }
94+ if err != nil {
95+ c .log .Error ().Err (err ).Msg ("garbage collection on value log failed" )
5596 return
5697 }
5798
58- // we add 20% jitter into the interval, so that we don't risk nodes syncing
59- // up on their GC calls over time
60- c .calls = rand .Intn (c .freq / 5 )
61-
62- // run the garbage collection in own goroutine and handle sentinel errors
63- go func () {
64- started := time .Now ()
65- err := c .db .RunValueLogGC (c .ratio )
66- if err == badger .ErrRejected {
67- // NOTE: this happens when a GC call is already running
68- c .log .Warn ().Msg ("garbage collection on value log already running" )
69- return
70- }
71- if err == badger .ErrNoRewrite {
72- // NOTE: this happens when no files have any garbage to drop
73- c .log .Debug ().Msg ("garbage collection on value log unnecessary" )
74- return
75- }
76- if err != nil {
77- c .log .Error ().Err (err ).Msg ("garbage collection on value log failed" )
78- return
79- }
80-
81- runtime := time .Since (started )
82- c .log .Debug ().
83- Dur ("gc_duration" , runtime ).
84- Msg ("garbage collection on value log executed" )
85- c .metrics .RanGC (runtime )
86- }()
99+ runtime := time .Since (started )
100+ c .log .Debug ().
101+ Dur ("gc_duration" , runtime ).
102+ Msg ("garbage collection on value log executed" )
103+ c .metrics .RanGC (runtime )
87104}
0 commit comments