Skip to content

Commit f4c50fa

Browse files
authored
Merge pull request #17514 from MinaProtocol/lyh/revamp-reuse-lmdb-keys
LMDB Cache: partially reuse GCed IDs so to save cycles on cleaning LMDB V2
2 parents 9b8fc10 + a5c5bba commit f4c50fa

File tree

1 file changed

+35
-13
lines changed

1 file changed

+35
-13
lines changed

src/lib/disk_cache/lmdb/disk_cache.ml

Lines changed: 35 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -20,39 +20,61 @@ module Make (Data : Binable.S) = struct
2020
{ env : Rw.t
2121
; db : Rw.holder
2222
; counter : int ref
23-
; garbage : int Hash_set.t
24-
(** A list of ids that are no longer reachable from OCaml's side *)
23+
; reusable_keys : int Queue.t
24+
(** A list of ids that are no longer reachable from OCaml runtime, but
25+
haven't been cleared inside the LMDB disk cache *)
26+
; queue_guard : Error_checking_mutex.t
2527
}
2628

27-
(** How big can the above hashset be before we do a cleanup *)
28-
let garbage_size_limit = 512
29+
(** How big can the queue [reusable_keys] be before we do a cleanup *)
30+
let reuse_size_limit = 512
2931

3032
let initialize path ~logger =
3133
Async.Deferred.Result.map (Disk_cache_utils.initialize_dir path ~logger)
3234
~f:(fun path ->
3335
let env, db = Rw.create path in
34-
{ env; db; counter = ref 0; garbage = Hash_set.create (module Int) } )
36+
{ env
37+
; db
38+
; counter = ref 0
39+
; reusable_keys = Queue.create ()
40+
; queue_guard = Error_checking_mutex.create ()
41+
} )
3542

3643
type id = { idx : int }
3744

3845
let get ({ env; db; _ } : t) ({ idx } : id) : Data.t =
3946
Rw.get ~env db idx |> Option.value_exn
4047

41-
let put ({ env; db; counter; garbage } : t) (x : Data.t) : id =
42-
(* TODO: we may reuse IDs by pulling them from the `garbage` hash set *)
43-
let idx = !counter in
44-
incr counter ;
48+
let put ({ env; db; counter; reusable_keys; queue_guard } : t) (x : Data.t) :
49+
id =
50+
let idx =
51+
match
52+
Error_checking_mutex.critical_section queue_guard ~f:(fun () ->
53+
Queue.dequeue reusable_keys )
54+
with
55+
| None ->
56+
(* We don't have reusable keys, assign a new one nobody ever used *)
57+
incr counter ; !counter - 1
58+
| Some reused_key ->
59+
(* Any key inside [reusable_keys] is marked as garbage by GC, so we're
60+
free to use them *)
61+
reused_key
62+
in
4563
let res = { idx } in
4664
(* When this reference is GC'd, delete the file. *)
4765
Gc.Expert.add_finalizer_last_exn res (fun () ->
4866
(* The actual deletion is delayed, as GC maybe triggered in LMDB's
4967
critical section. LMDB critical section then will be re-entered if
5068
it's invoked directly in a GC hook.
5169
This causes mutex double-acquiring and node freezes. *)
52-
Hash_set.add garbage idx ) ;
53-
if Hash_set.length garbage >= garbage_size_limit then (
54-
Hash_set.iter garbage ~f:(fun to_remove -> Rw.remove ~env db to_remove) ;
55-
Hash_set.clear garbage ) ;
70+
Error_checking_mutex.critical_section queue_guard ~f:(fun () ->
71+
Queue.enqueue reusable_keys idx ) ) ;
72+
73+
Error_checking_mutex.critical_section queue_guard ~f:(fun () ->
74+
if Queue.length reusable_keys >= reuse_size_limit then (
75+
Queue.iter reusable_keys ~f:(fun to_remove ->
76+
Rw.remove ~env db to_remove ) ;
77+
Queue.clear reusable_keys ) ) ;
5678
Rw.set ~env db idx x ;
5779
res
5880

0 commit comments

Comments
 (0)