@@ -87,10 +87,8 @@ def compact(car_in: str, car_out: str):
8787 write_block (carfile_out , new_header )
8888 write_block (carfile_out , bytes (bs .car_root ) + encode_dag_cbor (commit ))
8989
90- for node in NodeWalker (NodeStore (bs ), commit ["data" ]).iter_nodes ():
91- write_block (carfile_out , bytes (node .cid ) + node .serialised )
92- for v in node .vals :
93- write_block (carfile_out , bytes (v ) + bs .get_block (bytes (v )))
90+ for cid in NodeWalker (NodeStore (bs ), commit ["data" ]).iter_preorder_cids ():
91+ write_block (carfile_out , bytes (cid ) + bs .get_block (bytes (cid )))
9492
9593def _delta_str (a : str , b : str ):
9694 if a == b :
@@ -112,42 +110,48 @@ def print_record_diff(car_a: str, car_b: str):
112110 for delta in record_diff (ns , mst_created , mst_deleted ):
113111 print (delta )
114112
115- def verify_car ( car_path : str ):
113+ def verify_car_streaming ( carstream : CarStreamReader ):
116114 blocks = {} # for a preorder-traversal-ordered CAR, this never grows beyond 0
115+ optimistic = [True ]
116+ car_iter = iter (carstream )
117+ def lazy_get (key : CID ) -> bytes :
118+ print ("len" , len (blocks ))
119+ if optimistic [0 ]:
120+ try :
121+ k , v = next (car_iter )
122+ except StopIteration :
123+ raise ValueError (f"lookup failed for { key } " )
124+ if k == key :
125+ return v
126+ # if we reached here the CAR is not canonically ordered
127+ optimistic [0 ] = False
128+ blocks [k ] = v
129+ for k , v in car_iter : # slurp the entire rest of CAR into RAM
130+ blocks [k ] = v
131+ # fall thru
132+ return blocks [key ] # TODO: reopen input and re-slurp if this fails
133+ commit = decode_dag_cbor (lazy_get (carstream .car_root ))
134+ assert isinstance (commit , dict )
135+ root_cid = commit ["data" ]
136+ assert isinstance (root_cid , CID )
137+ def verify_mst (node_cid : CID ):
138+ node = MSTNode .deserialise (lazy_get (node_cid ))
139+ if node .subtrees [0 ] is not None :
140+ verify_mst (node .subtrees [0 ])
141+ for k , v , subtree in zip (node .keys , node .vals , node .subtrees [1 :]):
142+ print (k )
143+ rv = lazy_get (v )
144+ print (k , len (rv ))
145+ if subtree is not None :
146+ verify_mst (subtree )
147+
148+ verify_mst (root_cid )
149+ print (carstream .file .tell ()) # should be at EOF now
150+
151+ def verify_car (car_path : str ):
117152 with open (car_path , "rb" ) as carfile :
118153 carstream = CarStreamReader (carfile )
119- car_iter = iter (carstream )
120- def lazy_get (key : CID , fallback : dict = {}) -> bytes :
121- print ("len" , len (blocks ))
122- if key in fallback :
123- return fallback [key ]
124- if key in blocks :
125- return blocks .pop (key )
126- while True :
127- try :
128- k , v = next (car_iter )
129- except StopIteration :
130- raise ValueError (f"lookup failed for { key } " )
131- if k == key :
132- return v
133- blocks [k ] = v
134- commit = decode_dag_cbor (lazy_get (carstream .car_root ))
135- assert isinstance (commit , dict )
136- root_cid = commit ["data" ]
137- assert isinstance (root_cid , CID )
138- def verify_mst (node_cid : CID , ctx : dict ):
139- node = MSTNode .deserialise (lazy_get (node_cid ))
140- fallback = ctx .copy ()
141- for k , v in zip (node .keys , node .vals ):
142- print (k )
143- rv = lazy_get (v , fallback )
144- fallback [v ] = rv
145- print (k , len (rv ))
146- for subtree in node .subtrees :
147- if subtree is not None :
148- verify_mst (subtree , fallback )
149- verify_mst (root_cid , {})
150- print (carstream .file .tell ()) # should be at EOF now
154+ verify_car_streaming (carstream )
151155
152156COMMANDS = {
153157 "info" : (print_info , "print CAR header and repo info" ),
0 commit comments