|
6 | 6 |
|
7 | 7 | from cbrrr import encode_dag_cbor, decode_dag_cbor, CID |
8 | 8 |
|
9 | | -from .blockstore.car_file import ReadOnlyCARBlockStore, encode_varint, CarStreamReader |
| 9 | +from .blockstore.car_file import ReadOnlyCARBlockStore, encode_varint, CarStreamReader, OpportunisticStreamingCarBlockStore, OptimisticRetryError |
10 | 10 | from .blockstore import OverlayBlockStore |
11 | 11 | from .mst.node_store import NodeStore |
12 | 12 | from .mst.node_walker import NodeWalker |
13 | 13 | from .mst.diff import mst_diff, record_diff |
14 | | -from .mst.node import MSTNode |
15 | 14 |
|
16 | 15 |
|
17 | 16 | def prettify_record(record) -> str: |
@@ -110,48 +109,28 @@ def print_record_diff(car_a: str, car_b: str): |
110 | 109 | for delta in record_diff(ns, mst_created, mst_deleted): |
111 | 110 | print(delta) |
112 | 111 |
|
113 | | -def verify_car_streaming(carstream: CarStreamReader): |
114 | | - blocks = {} # for a preorder-traversal-ordered CAR, this never grows beyond 0 |
115 | | - optimistic = [True] |
116 | | - car_iter = iter(carstream) |
117 | | - def lazy_get(key: CID) -> bytes: |
118 | | - print("len", len(blocks)) |
119 | | - if optimistic[0]: |
120 | | - try: |
121 | | - k, v = next(car_iter) |
122 | | - except StopIteration: |
123 | | - raise ValueError(f"lookup failed for {key}") |
124 | | - if k == key: |
125 | | - return v |
126 | | - # if we reached here the CAR is not canonically ordered |
127 | | - optimistic[0] = False |
128 | | - blocks[k] = v |
129 | | - for k, v in car_iter: # slurp the entire rest of CAR into RAM |
130 | | - blocks[k] = v |
131 | | - # fall thru |
132 | | - return blocks[key] # TODO: reopen input and re-slurp if this fails |
133 | | - commit = decode_dag_cbor(lazy_get(carstream.car_root)) |
| 112 | +def verify_car_streaming(carstream: CarStreamReader, optimistic: bool=True): |
| 113 | + bs = OpportunisticStreamingCarBlockStore(carstream, optimistic=optimistic) |
| 114 | + commit = decode_dag_cbor(bs.get_block(bytes(bs.car_root))) |
134 | 115 | assert isinstance(commit, dict) |
135 | | - root_cid = commit["data"] |
136 | | - assert isinstance(root_cid, CID) |
137 | | - def verify_mst(node_cid: CID): |
138 | | - node = MSTNode.deserialise(lazy_get(node_cid)) |
139 | | - if node.subtrees[0] is not None: |
140 | | - verify_mst(node.subtrees[0]) |
141 | | - for k, v, subtree in zip(node.keys, node.vals, node.subtrees[1:]): |
142 | | - print(k) |
143 | | - rv = lazy_get(v) |
144 | | - print(k, len(rv)) |
145 | | - if subtree is not None: |
146 | | - verify_mst(subtree) |
147 | | - |
148 | | - verify_mst(root_cid) |
149 | | - print(carstream.file.tell()) # should be at EOF now |
| 116 | + root = commit["data"] |
| 117 | + assert isinstance(root, CID) |
| 118 | + ns = NodeStore(bs) |
| 119 | + count = 0 |
| 120 | + for k, v in NodeWalker(ns, root).iter_kv(): |
| 121 | + bs.get_block(bytes(v)) # force-read every record block |
| 122 | + count += 1 |
| 123 | + print(f"Verified {count} records (assuming commit signature is valid)") |
| 124 | + print("canonical:", bs.is_canonical()) |
150 | 125 |
|
151 | 126 | def verify_car(car_path: str): |
152 | | - with open(car_path, "rb") as carfile: |
153 | | - carstream = CarStreamReader(carfile) |
154 | | - verify_car_streaming(carstream) |
| 127 | + try: |
| 128 | + with open(car_path, "rb") as carfile: |
| 129 | + verify_car_streaming(CarStreamReader(carfile)) |
| 130 | + except OptimisticRetryError: |
| 131 | + print("Optimistic streaming failed, retrying with full buffering...") |
| 132 | + with open(car_path, "rb") as carfile: |
| 133 | + verify_car_streaming(CarStreamReader(carfile), optimistic=False) |
155 | 134 |
|
156 | 135 | COMMANDS = { |
157 | 136 | "info": (print_info, "print CAR header and repo info"), |
|
0 commit comments