Skip to content

Commit a8ebf99

Browse files
committed
Optimize in-memory repo parsing
1 parent a4e4a7b commit a8ebf99

File tree

2 files changed

+90
-21
lines changed

2 files changed

+90
-21
lines changed

mist/lib/mst.ml

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,86 @@ let ( >>? ) lazy_opt_lwt f =
118118
let%lwt result = Lazy.force lazy_opt_lwt in
119119
f result
120120

121+
(* extracts leaves from a block map *)
122+
let leaves_from_blocks (blocks : Block_map.t) (root : Cid.t) :
123+
(string * Cid.t) list =
124+
let leaves = ref [] in
125+
let stack = Stack.create () in
126+
Stack.push (root, "") stack ;
127+
while not (Stack.is_empty stack) do
128+
let cid, prefix = Stack.pop stack in
129+
match Block_map.get cid blocks with
130+
| None ->
131+
() (* missing block probably a record *)
132+
| Some bytes -> (
133+
try
134+
let node = decode_block_raw bytes in
135+
(* proess left subtree *)
136+
( match node.l with
137+
| Some left_cid ->
138+
Stack.push (left_cid, prefix) stack
139+
| None ->
140+
() ) ;
141+
(* process entries in reverse order so they come out in correct order *)
142+
let last_key = ref prefix in
143+
List.iter
144+
(fun (entry : entry_raw) ->
145+
let key_prefix =
146+
if entry.p = 0 then ""
147+
else if entry.p <= String.length !last_key then
148+
String.sub !last_key 0 entry.p
149+
else !last_key
150+
in
151+
let full_key = key_prefix ^ Bytes.to_string entry.k in
152+
last_key := full_key ;
153+
leaves := (full_key, entry.v) :: !leaves ;
154+
(* push right subtree to stack *)
155+
match entry.t with
156+
| Some right_cid ->
157+
Stack.push (right_cid, full_key) stack
158+
| None ->
159+
() )
160+
node.e
161+
with Invalid_argument _ -> () )
162+
done ;
163+
List.sort (fun (k1, _) (k2, _) -> String.compare k1 k2) !leaves
164+
165+
(* extracts just mst node cids (non-leaf blocks) from a block map *)
166+
let mst_node_cids_from_blocks (blocks : Block_map.t) (root : Cid.t) : Cid.t list
167+
=
168+
let nodes = ref [] in
169+
let visited = ref Cid.Set.empty in
170+
let stack = Stack.create () in
171+
Stack.push root stack ;
172+
while not (Stack.is_empty stack) do
173+
let cid = Stack.pop stack in
174+
if not (Cid.Set.mem cid !visited) then (
175+
visited := Cid.Set.add cid !visited ;
176+
match Block_map.get cid blocks with
177+
| None ->
178+
()
179+
| Some bytes -> (
180+
try
181+
let node = decode_block_raw bytes in
182+
nodes := cid :: !nodes ;
183+
(* add all children to stack *)
184+
( match node.l with
185+
| Some left_cid ->
186+
Stack.push left_cid stack
187+
| None ->
188+
() ) ;
189+
List.iter
190+
(fun (entry : entry_raw) ->
191+
match entry.t with
192+
| Some right_cid ->
193+
Stack.push right_cid stack
194+
| None ->
195+
() )
196+
node.e
197+
with Invalid_argument _ -> () ) )
198+
done ;
199+
!nodes
200+
121201
module type Intf = sig
122202
module Store : Writable_blockstore
123203

pegasus/lib/repository.ml

Lines changed: 10 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -592,20 +592,9 @@ let import_car t (stream : Car.stream) : (t, exn) Lwt_result.t =
592592
failwith ("invalid commit: " ^ e)
593593
in
594594
if commit.did <> t.did then failwith "did does not match commit did" ;
595-
(* create in-memory mst to walk *)
596-
let mem_bs = Mist.Storage.Memory_blockstore.create ~blocks:all_blocks () in
597-
let mem_mst : Mem_mst.t = {blockstore= mem_bs; root= commit.data} in
598-
let%lwt leaves = Mem_mst.leaves_of_root mem_mst in
599-
let leaf_cids =
600-
List.fold_left
601-
(fun acc (_, cid) -> Cid.Set.add cid acc)
602-
Cid.Set.empty leaves
603-
in
604-
(* get mst nodes by filtering out leaves and commit from all blocks *)
595+
let leaves = Mist.Mst.leaves_from_blocks all_blocks commit.data in
605596
let mst_node_cids =
606-
Block_map.keys all_blocks
607-
|> List.filter (fun cid ->
608-
(not (Cid.equal cid root)) && not (Cid.Set.mem cid leaf_cids) )
597+
Mist.Mst.mst_node_cids_from_blocks all_blocks commit.data
609598
in
610599
(* collect mst node blocks for insert *)
611600
let mst_blocks =
@@ -620,23 +609,23 @@ let import_car t (stream : Car.stream) : (t, exn) Lwt_result.t =
620609
in
621610
(* collect record data for insert *)
622611
let since = Tid.now () in
623-
let blob_refs : (string * Cid.t) list ref = ref [] in
624-
let record_data =
625-
List.map
626-
(fun (path, cid) ->
612+
let record_data, blob_refs =
613+
List.fold_left
614+
(fun (acc_data, acc_refs) (path, cid) ->
627615
match Block_map.get cid all_blocks with
628616
| Some data ->
629617
let record = Lex.of_cbor data in
630618
let record_refs =
631619
Util.find_blob_refs record
632620
|> List.map (fun (br : Mist.Blob_ref.t) -> (path, br.ref))
633621
in
634-
blob_refs := record_refs @ !blob_refs ;
635-
(path, cid, data, since)
622+
( (path, cid, data, since) :: acc_data
623+
, List.rev_append record_refs acc_refs )
636624
| None ->
637625
failwith ("missing record block: " ^ Cid.to_string cid) )
638-
leaves
626+
([], []) leaves
639627
in
628+
let record_data = List.rev record_data in
640629
let%lwt _ =
641630
Util.use_pool t.db.db (fun conn ->
642631
Util.transact conn (fun () ->
@@ -645,7 +634,7 @@ let import_car t (stream : Car.stream) : (t, exn) Lwt_result.t =
645634
let$! () = User_store.Bulk.put_blocks mst_blocks conn in
646635
let$! () =
647636
[%rapper execute {sql| DELETE FROM records |sql}] () conn
648-
in
637+
in
649638
let$! () = User_store.Bulk.put_records record_data conn in
650639
let$! () = User_store.Bulk.put_blob_refs !blob_refs conn in
651640
Lwt.return_ok () ) )

0 commit comments

Comments
 (0)