Skip to content

Commit ccb902c

Browse files
wip deterministic car
1 parent fe39c7d commit ccb902c

File tree

2 files changed

+53
-37
lines changed

2 files changed

+53
-37
lines changed

src/atmst/cartool.py

Lines changed: 41 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -87,10 +87,8 @@ def compact(car_in: str, car_out: str):
8787
write_block(carfile_out, new_header)
8888
write_block(carfile_out, bytes(bs.car_root) + encode_dag_cbor(commit))
8989

90-
for node in NodeWalker(NodeStore(bs), commit["data"]).iter_nodes():
91-
write_block(carfile_out, bytes(node.cid) + node.serialised)
92-
for v in node.vals:
93-
write_block(carfile_out, bytes(v) + bs.get_block(bytes(v)))
90+
for cid in NodeWalker(NodeStore(bs), commit["data"]).iter_preorder_cids():
91+
write_block(carfile_out, bytes(cid) + bs.get_block(bytes(cid)))
9492

9593
def _delta_str(a: str, b: str):
9694
if a == b:
@@ -112,42 +110,48 @@ def print_record_diff(car_a: str, car_b: str):
112110
for delta in record_diff(ns, mst_created, mst_deleted):
113111
print(delta)
114112

115-
def verify_car(car_path: str):
113+
def verify_car_streaming(carstream: CarStreamReader):
116114
blocks = {} # for a preorder-traversal-ordered CAR, this never grows beyond 0
115+
optimistic = [True]
116+
car_iter = iter(carstream)
117+
def lazy_get(key: CID) -> bytes:
118+
print("len", len(blocks))
119+
if optimistic[0]:
120+
try:
121+
k, v = next(car_iter)
122+
except StopIteration:
123+
raise ValueError(f"lookup failed for {key}")
124+
if k == key:
125+
return v
126+
# if we reached here the CAR is not canonically ordered
127+
optimistic[0] = False
128+
blocks[k] = v
129+
for k, v in car_iter: # slurp the entire rest of CAR into RAM
130+
blocks[k] = v
131+
# fall thru
132+
return blocks[key] # TODO: reopen input and re-slurp if this fails
133+
commit = decode_dag_cbor(lazy_get(carstream.car_root))
134+
assert isinstance(commit, dict)
135+
root_cid = commit["data"]
136+
assert isinstance(root_cid, CID)
137+
def verify_mst(node_cid: CID):
138+
node = MSTNode.deserialise(lazy_get(node_cid))
139+
if node.subtrees[0] is not None:
140+
verify_mst(node.subtrees[0])
141+
for k, v, subtree in zip(node.keys, node.vals, node.subtrees[1:]):
142+
print(k)
143+
rv = lazy_get(v)
144+
print(k, len(rv))
145+
if subtree is not None:
146+
verify_mst(subtree)
147+
148+
verify_mst(root_cid)
149+
print(carstream.file.tell()) # should be at EOF now
150+
151+
def verify_car(car_path: str):
117152
with open(car_path, "rb") as carfile:
118153
carstream = CarStreamReader(carfile)
119-
car_iter = iter(carstream)
120-
def lazy_get(key: CID, fallback: dict={}) -> bytes:
121-
print("len", len(blocks))
122-
if key in fallback:
123-
return fallback[key]
124-
if key in blocks:
125-
return blocks.pop(key)
126-
while True:
127-
try:
128-
k, v = next(car_iter)
129-
except StopIteration:
130-
raise ValueError(f"lookup failed for {key}")
131-
if k == key:
132-
return v
133-
blocks[k] = v
134-
commit = decode_dag_cbor(lazy_get(carstream.car_root))
135-
assert isinstance(commit, dict)
136-
root_cid = commit["data"]
137-
assert isinstance(root_cid, CID)
138-
def verify_mst(node_cid: CID, ctx: dict):
139-
node = MSTNode.deserialise(lazy_get(node_cid))
140-
fallback = ctx.copy()
141-
for k, v in zip(node.keys, node.vals):
142-
print(k)
143-
rv = lazy_get(v, fallback)
144-
fallback[v] = rv
145-
print(k, len(rv))
146-
for subtree in node.subtrees:
147-
if subtree is not None:
148-
verify_mst(subtree, fallback)
149-
verify_mst(root_cid, {})
150-
print(carstream.file.tell()) # should be at EOF now
154+
verify_car_streaming(carstream)
151155

152156
COMMANDS = {
153157
"info": (print_info, "print CAR header and repo info"),

src/atmst/mst/node_walker.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,18 @@ def iter_nodes(self) -> Iterable[MSTNode]:
165165
yield self.frame.node
166166
self.right_or_up()
167167

168+
# iterate in "preorder": node, left subtree, val, subtree, val, subtree, ...
169+
# this matches the canonical CAR block ordering used by sync1.1
170+
def iter_preorder_cids(self) -> Iterable[CID]:
171+
yield self.frame.node.cid
172+
while not self.is_final:
173+
while self.subtree:
174+
self.down()
175+
yield self.frame.node.cid
176+
self.right_or_up()
177+
assert self.lval is not None
178+
yield self.lval
179+
168180
def iter_node_cids(self) -> Iterable[CID]:
169181
for node in self.iter_nodes():
170182
yield node.cid

0 commit comments

Comments
 (0)