Skip to content

Commit 5adf5ed

Browse files
committed
Cache MST blocks within applyWrites
1 parent f34d425 commit 5adf5ed

File tree

5 files changed

+177
-240
lines changed

5 files changed

+177
-240
lines changed

mist/lib/mst.ml

Lines changed: 11 additions & 164 deletions
Original file line numberDiff line numberDiff line change
@@ -233,8 +233,6 @@ module type Intf = sig
233233

234234
val create_empty : Store.t -> (t, exn) Lwt_result.t
235235

236-
val get_cid : t -> string -> Cid.t option Lwt.t
237-
238236
val of_assoc : Store.t -> (string * Cid.t) list -> t Lwt.t
239237

240238
val add : t -> string -> Cid.t -> t Lwt.t
@@ -252,8 +250,6 @@ module type Intf = sig
252250

253251
val leaves_of_root : t -> (string * Cid.t) list Lwt.t
254252

255-
val null_diff : t -> data_diff Lwt.t
256-
257253
val equal : t -> t -> bool Lwt.t
258254
end
259255

@@ -789,53 +785,6 @@ struct
789785
Lwt_result.bind (Store.put_block blockstore cid encoded) (fun _ ->
790786
Lwt.return_ok {blockstore; root= cid} )
791787

792-
(* returns the cid for a given key, if it exists *)
793-
let get_cid t key : Cid.t option Lwt.t =
794-
let rec get_in_node (n : node) : Cid.t option Lwt.t =
795-
let sorted_entries =
796-
List.sort
797-
(fun (a : entry) (b : entry) -> String.compare a.key b.key)
798-
n.entries
799-
in
800-
let rec scan (prev : entry option) (entries : entry list) :
801-
Cid.t option Lwt.t =
802-
match entries with
803-
| [] -> (
804-
match prev with
805-
| Some p -> (
806-
p.right
807-
>>? function Some r -> get_in_node r | None -> Lwt.return_none )
808-
| None -> (
809-
n.left
810-
>>? function Some l -> get_in_node l | None -> Lwt.return_none ) )
811-
| e :: rest ->
812-
if key = e.key then Lwt.return_some e.value
813-
else if key < e.key then
814-
match prev with
815-
| Some p -> (
816-
p.right
817-
>>? function
818-
| Some r ->
819-
get_in_node r
820-
| None ->
821-
Lwt.return_none )
822-
| None -> (
823-
n.left
824-
>>? function
825-
| Some l ->
826-
get_in_node l
827-
| None ->
828-
Lwt.return_none )
829-
else scan (Some e) rest
830-
in
831-
scan None sorted_entries
832-
in
833-
match%lwt retrieve_node t t.root with
834-
| None ->
835-
Lwt.fail (Invalid_argument "root cid not found in repo store")
836-
| Some root ->
837-
get_in_node root
838-
839788
(* builds and persists a canonical mst from sorted leaves *)
840789
let of_assoc blockstore assoc : t Lwt.t =
841790
let open Lwt.Infix in
@@ -975,25 +924,6 @@ struct
975924
let%lwt _ = Store.put_block blockstore cid encoded in
976925
Lwt.return cid
977926

978-
(* returns the layer a raw node belongs to *)
979-
let rec get_layer_raw (t : t) (raw : node_raw) : int Lwt.t =
980-
match (raw.l, raw.e) with
981-
| None, [] ->
982-
Lwt.return 0
983-
| Some left_cid, [] -> (
984-
match%lwt retrieve_node_raw t left_cid with
985-
| Some left_raw ->
986-
let%lwt left_layer = get_layer_raw t left_raw in
987-
Lwt.return (left_layer + 1)
988-
| None ->
989-
failwith ("couldn't find node " ^ Cid.to_string left_cid) )
990-
| _, e :: _ -> (
991-
match e.p with
992-
| 0 ->
993-
Lwt.return (Util.leading_zeros_on_hash (Bytes.to_string e.k))
994-
| _ ->
995-
failwith "first node entry has nonzero p value" )
996-
997927
(* decompress entry keys from a raw node *)
998928
let decompress_keys (raw : node_raw) : string list =
999929
let last_key = ref "" in
@@ -1049,7 +979,7 @@ struct
1049979
let t' = {blockstore; root= result.root} in
1050980
match%lwt retrieve_node_raw t' result.root with
1051981
| Some raw ->
1052-
let%lwt layer = get_layer_raw t' raw in
982+
let%lwt layer = get_node_height t' raw in
1053983
Lwt.return (Some result.root, layer)
1054984
| None ->
1055985
Lwt.return (Some result.root, 0) )
@@ -1077,7 +1007,7 @@ struct
10771007
| None ->
10781008
failwith ("couldn't find node " ^ Cid.to_string root_cid)
10791009
| Some raw ->
1080-
let%lwt root_layer = get_layer_raw t raw in
1010+
let%lwt root_layer = get_node_height t raw in
10811011
if key_layer > root_layer then
10821012
add_above_root t root_cid root_layer key value key_layer
10831013
else if key_layer = root_layer then
@@ -1295,7 +1225,7 @@ struct
12951225
let%lwt new_left_layer =
12961226
match new_left_raw_opt with
12971227
| Some r ->
1298-
get_layer_raw t r
1228+
get_node_height t r
12991229
| None ->
13001230
Lwt.return 0
13011231
in
@@ -1360,7 +1290,7 @@ struct
13601290
let%lwt new_right_layer =
13611291
match new_right_raw_opt with
13621292
| Some r ->
1363-
get_layer_raw t r
1293+
get_node_height t r
13641294
| None ->
13651295
Lwt.return 0
13661296
in
@@ -1429,7 +1359,7 @@ struct
14291359
| None ->
14301360
Lwt.return_none
14311361
| Some raw ->
1432-
let%lwt root_layer = get_layer_raw t raw in
1362+
let%lwt root_layer = get_node_height t raw in
14331363
if key_layer > root_layer then
14341364
(* key can't exist above root *)
14351365
Lwt.return_some (root_cid, root_layer)
@@ -1482,7 +1412,7 @@ struct
14821412
let%lwt result_layer =
14831413
match%lwt retrieve_node_raw t result.root with
14841414
| Some r ->
1485-
get_layer_raw t r
1415+
get_node_height t r
14861416
| None ->
14871417
Lwt.return 0
14881418
in
@@ -1616,19 +1546,6 @@ struct
16161546
| Some (new_root, _layer) ->
16171547
Lwt.return {t with root= new_root}
16181548

1619-
(* produces a diff from an empty mst to the current one *)
1620-
let null_diff curr : data_diff Lwt.t =
1621-
let%lwt curr_nodes, _, curr_leaf_set = collect_nodes_and_leaves curr in
1622-
let%lwt curr_leaves = leaves_of_root curr in
1623-
let adds = List.map (fun (key, cid) : diff_add -> {key; cid}) curr_leaves in
1624-
Lwt.return
1625-
{ adds
1626-
; updates= []
1627-
; deletes= []
1628-
; new_mst_blocks= curr_nodes
1629-
; new_leaf_cids= curr_leaf_set
1630-
; removed_cids= Cid.Set.empty }
1631-
16321549
(* checks that two msts are identical by recursively comparing their entries *)
16331550
let equal (t1 : t) (t2 : t) : bool Lwt.t =
16341551
let rec nodes_equal (n1 : node) (n2 : node) : bool Lwt.t =
@@ -1700,78 +1617,6 @@ struct
17001617
Lwt.return false
17011618
end
17021619

1703-
module Differ (Prev : Intf) (Curr : Intf) = struct
1704-
let diff ~(t_curr : Curr.t) ~(t_prev : Prev.t) : data_diff Lwt.t =
1705-
let%lwt curr_nodes, curr_node_set, curr_leaf_set =
1706-
Curr.collect_nodes_and_leaves t_curr
1707-
in
1708-
let%lwt _, prev_node_set, prev_leaf_set =
1709-
Prev.collect_nodes_and_leaves t_prev
1710-
in
1711-
(* just convenient to have these functions *)
1712-
let in_prev_nodes cid = Cid.Set.mem cid prev_node_set in
1713-
let in_curr_nodes cid = Cid.Set.mem cid curr_node_set in
1714-
let in_prev_leaves cid = Cid.Set.mem cid prev_leaf_set in
1715-
let in_curr_leaves cid = Cid.Set.mem cid curr_leaf_set in
1716-
(* new mst blocks are curr nodes that are not in prev *)
1717-
let new_mst_blocks =
1718-
List.filter (fun (cid, _) -> not (in_prev_nodes cid)) curr_nodes
1719-
in
1720-
(* removed cids are prev nodes not in curr plus prev leaves not in curr *)
1721-
let removed_node_cids =
1722-
Cid.Set.fold
1723-
(fun cid acc ->
1724-
if not (in_curr_nodes cid) then Cid.Set.add cid acc else acc )
1725-
prev_node_set Cid.Set.empty
1726-
in
1727-
let removed_leaf_cids =
1728-
Cid.Set.fold
1729-
(fun cid acc ->
1730-
if not (in_curr_leaves cid) then Cid.Set.add cid acc else acc )
1731-
prev_leaf_set Cid.Set.empty
1732-
in
1733-
let removed_cids = Cid.Set.union removed_node_cids removed_leaf_cids in
1734-
(* new leaf cids are curr leaves not in prev *)
1735-
let new_leaf_cids =
1736-
Cid.Set.fold
1737-
(fun cid acc ->
1738-
if not (in_prev_leaves cid) then Cid.Set.add cid acc else acc )
1739-
curr_leaf_set Cid.Set.empty
1740-
in
1741-
(* compute adds/updates/deletes by merging sorted leaves *)
1742-
let%lwt curr_leaves = Curr.leaves_of_root t_curr in
1743-
let%lwt prev_leaves = Prev.leaves_of_root t_prev in
1744-
let rec merge (pl : (string * Cid.t) list) (cl : (string * Cid.t) list)
1745-
(adds : diff_add list) (updates : diff_update list)
1746-
(deletes : diff_delete list) =
1747-
match (pl, cl) with
1748-
| [], [] ->
1749-
(* we prepend for speed, then reverse at the end *)
1750-
(List.rev adds, List.rev updates, List.rev deletes)
1751-
| [], (k, c) :: cr ->
1752-
(* more curr than prev, goes in adds *)
1753-
merge [] cr ({key= k; cid= c} :: adds) updates deletes
1754-
| (k, c) :: pr, [] ->
1755-
(* more prev than curr, goes in deletes *)
1756-
merge pr [] adds updates ({key= k; cid= c} :: deletes)
1757-
| (k1, c1) :: pr, (k2, c2) :: cr ->
1758-
if k1 = k2 then (* if key & value are the same, keep going *)
1759-
if Cid.equal c1 c2 then merge pr cr adds updates deletes
1760-
else (* same key, different value; update *)
1761-
merge pr cr adds ({key= k1; prev= c1; cid= c2} :: updates) deletes
1762-
else if k1 < k2 then
1763-
merge pr ((k2, c2) :: cr) adds updates
1764-
({key= k1; cid= c1} :: deletes)
1765-
else
1766-
merge ((k1, c1) :: pr) cr
1767-
({key= k2; cid= c2} :: adds)
1768-
updates deletes
1769-
in
1770-
let adds, updates, deletes = merge prev_leaves curr_leaves [] [] [] in
1771-
Lwt.return
1772-
{adds; updates; deletes; new_mst_blocks; new_leaf_cids; removed_cids}
1773-
end
1774-
17751620
module Inductive (M : Intf) = struct
17761621
module Cache_bs = Cache_blockstore (Memory_blockstore)
17771622
module Mem_mst = Make (Cache_bs)
@@ -1792,7 +1637,7 @@ module Inductive (M : Intf) = struct
17921637
(String_map.bindings map)
17931638
in
17941639
(* save this now so we can read blocks from it later *)
1795-
let block_map = mem_mst.blockstore.bs.blocks in
1640+
let blockstore = mem_mst.blockstore in
17961641
(* apply inverse of operations in reverse order,
17971642
check that mst root matches prev_root *)
17981643
let%lwt inverted_mst, added_cids =
@@ -1815,11 +1660,13 @@ module Inductive (M : Intf) = struct
18151660
(Cid.to_string prev_root)
18161661
(Cid.to_string inverted_mst.root) ) ;
18171662
let proof_cids =
1818-
Cid.Set.union added_cids mem_mst.blockstore.reads
1663+
Cid.Set.union added_cids (Cache_bs.get_reads blockstore)
18191664
|> Cid.Set.remove prev_root |> Cid.Set.add new_root
18201665
in
18211666
let {blocks= proof_bm; _} : Block_map.with_missing =
1822-
Block_map.get_many (Cid.Set.elements proof_cids) block_map
1667+
Block_map.get_many
1668+
(Cid.Set.elements proof_cids)
1669+
(Cache_bs.get_cache blockstore)
18231670
in
18241671
Lwt.return_ok proof_bm
18251672
with e -> Lwt.return_error e

mist/lib/storage/block_map.ml

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,16 +18,12 @@ let remove = Cid_map.remove
1818

1919
let get_many cids m =
2020
let blocks, missing =
21-
List.fold_left
22-
(fun (b, mis) cid ->
23-
match get cid m with
24-
| Some bytes ->
25-
(Cid_map.add cid bytes b, mis)
26-
| None ->
27-
(b, mis @ [cid]) )
28-
(Cid_map.empty, []) cids
21+
List.partition_map
22+
(fun cid ->
23+
match get cid m with Some data -> Left (cid, data) | None -> Right cid )
24+
cids
2925
in
30-
{blocks; missing= List.rev missing}
26+
{blocks= Cid_map.of_list blocks; missing}
3127

3228
let has = Cid_map.mem
3329

Lines changed: 55 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,37 +1,76 @@
1-
type 'bs data = {mutable reads: Cid.Set.t; bs: 'bs}
1+
type 'bs data = {mutable reads: Cid.Set.t; mutable cache: Block_map.t; bs: 'bs}
22

33
module Make
44
(Bs : Blockstore.Writable) : sig
55
include Blockstore.Writable
66

77
val create : Bs.t -> t
8+
9+
val get_reads : t -> Cid.Set.t
10+
11+
val get_cache : t -> Block_map.t
812
end
913
with type t = Bs.t data = struct
1014
type t = Bs.t data
1115

12-
let create bs = {reads= Cid.Set.empty; bs}
16+
let create bs = {reads= Cid.Set.empty; cache= Block_map.empty; bs}
17+
18+
let get_reads t = t.reads
19+
20+
let get_cache t = t.cache
1321

1422
let get_bytes t cid =
15-
match%lwt Bs.get_bytes t.bs cid with
16-
| Some _ as res ->
23+
match Block_map.get cid t.cache with
24+
| Some _ as cached ->
1725
t.reads <- Cid.Set.add cid t.reads ;
18-
Lwt.return res
19-
| None ->
20-
Lwt.return_none
26+
Lwt.return cached
27+
| None -> (
28+
match%lwt Bs.get_bytes t.bs cid with
29+
| Some data as res ->
30+
t.cache <- Block_map.set cid data t.cache ;
31+
t.reads <- Cid.Set.add cid t.reads ;
32+
Lwt.return res
33+
| None ->
34+
Lwt.return_none )
2135

22-
let has t cid = Bs.has t.bs cid
36+
let has t cid =
37+
if Block_map.has cid t.cache then Lwt.return_true else Bs.has t.bs cid
2338

2439
let get_blocks t cids =
25-
let%lwt bm = Bs.get_blocks t.bs cids in
26-
t.reads <-
27-
Cid.Set.union t.reads (Cid.Set.of_list (Block_map.keys bm.blocks)) ;
28-
Lwt.return bm
40+
let {Block_map.blocks= cached; missing} = Block_map.get_many cids t.cache in
41+
(* mark cached as read *)
42+
Block_map.iter (fun cid _ -> t.reads <- Cid.Set.add cid t.reads) cached ;
43+
(* fetch missing from underlying store *)
44+
let%lwt fetched = Bs.get_blocks t.bs missing in
45+
(* cache and mark as read *)
46+
Block_map.iter
47+
(fun cid data ->
48+
t.cache <- Block_map.set cid data t.cache ;
49+
t.reads <- Cid.Set.add cid t.reads )
50+
fetched.blocks ;
51+
(* combine results *)
52+
let blocks =
53+
List.fold_left
54+
(fun acc (cid, data) -> Block_map.set cid data acc)
55+
fetched.blocks (Block_map.entries cached)
56+
in
57+
Lwt.return {Block_map.blocks; missing= fetched.missing}
2958

30-
let put_block t cid bytes = Bs.put_block t.bs cid bytes
59+
let put_block t cid bytes =
60+
t.cache <- Block_map.set cid bytes t.cache ;
61+
Bs.put_block t.bs cid bytes
3162

32-
let put_many t blocks = Bs.put_many t.bs blocks
63+
let put_many t blocks =
64+
Block_map.iter
65+
(fun cid data -> t.cache <- Block_map.set cid data t.cache)
66+
blocks ;
67+
Bs.put_many t.bs blocks
3368

34-
let delete_block t cid = Bs.delete_block t.bs cid
69+
let delete_block t cid =
70+
t.cache <- Block_map.remove cid t.cache ;
71+
Bs.delete_block t.bs cid
3572

36-
let delete_many t cids = Bs.delete_many t.bs cids
73+
let delete_many t cids =
74+
List.iter (fun cid -> t.cache <- Block_map.remove cid t.cache) cids ;
75+
Bs.delete_many t.bs cids
3776
end

0 commit comments

Comments
 (0)