|
1 | 1 | //! A library for performing object database integrity and connectivity checks
|
2 |
| -#![deny(rust_2018_idioms)] |
| 2 | +#![deny(rust_2018_idioms, unsafe_code, missing_docs)] |
3 | 3 |
|
4 | 4 | use gix_hash::ObjectId;
|
5 | 5 | use gix_hashtable::HashSet;
|
6 | 6 | use gix_object::{tree::EntryMode, Exists, FindExt, Kind};
|
| 7 | +use std::cell::RefCell; |
| 8 | +use std::ops::{Deref, DerefMut}; |
7 | 9 |
|
8 |
| -pub struct ConnectivityCheck<'a, T, F> |
| 10 | +/// Perform a connectivity check. |
| 11 | +pub struct Connectivity<T, F> |
9 | 12 | where
|
10 | 13 | T: FindExt + Exists,
|
11 | 14 | F: FnMut(&ObjectId, Kind),
|
12 | 15 | {
|
13 | 16 | /// ODB handle to use for the check
|
14 |
| - db: &'a T, |
| 17 | + db: T, |
15 | 18 | /// Closure to invoke when a missing object is encountered
|
16 | 19 | missing_cb: F,
|
17 | 20 | /// Set of Object IDs already (or about to be) scanned during the check
|
18 | 21 | oid_set: HashSet,
|
19 |
| - /// Single buffer for decoding objects from the ODB |
20 |
| - /// This is slightly faster than allocating throughout the connectivity check (and reduces the memory requirements) |
21 |
| - buf: Vec<u8>, |
| 22 | + /// A free-list of buffers for recursive tree decoding. |
| 23 | + free_list: FreeList, |
22 | 24 | }
|
23 | 25 |
|
24 |
| -impl<'a, T, F> ConnectivityCheck<'a, T, F> |
| 26 | +impl<T, F> Connectivity<T, F> |
25 | 27 | where
|
26 | 28 | T: FindExt + Exists,
|
27 | 29 | F: FnMut(&ObjectId, Kind),
|
28 | 30 | {
|
29 |
| - /// Instantiate a connectivity check |
30 |
| - pub fn new(db: &'a T, missing_cb: F) -> ConnectivityCheck<'a, T, F> { |
31 |
| - ConnectivityCheck { |
| 31 | + /// Instantiate a connectivity check. |
| 32 | + pub fn new(db: T, missing_cb: F) -> Connectivity<T, F> { |
| 33 | + Connectivity { |
32 | 34 | db,
|
33 | 35 | missing_cb,
|
34 | 36 | oid_set: HashSet::default(),
|
35 |
| - buf: Vec::new(), |
| 37 | + free_list: Default::default(), |
36 | 38 | }
|
37 | 39 | }
|
38 | 40 |
|
39 |
| - /// Run the connectivity check on the provided commit object ID |
40 |
| - /// - This will walk the trees and blobs referenced by the commit and verify they exist in the ODB |
41 |
| - /// - Any objects previously encountered by this [`ConnectivityCheck`] instance will be skipped |
42 |
| - /// - Any referenced blobs that are not present in the ODB will result in a call to the `missing_cb` |
43 |
| - /// - Missing commits or trees will currently result in panic |
| 41 | + /// Run the connectivity check on the provided commit `oid`. |
| 42 | + /// |
| 43 | + /// ### Algorithm |
| 44 | + /// |
| 45 | + /// Walk the trees and blobs referenced by the commit and verify they exist in the ODB. |
| 46 | + /// Any objects previously encountered by this instance will be skipped silently. |
| 47 | + /// Any referenced blobs that are not present in the ODB will result in a call to the `missing_cb`. |
| 48 | + /// Missing commits or trees will cause an error to be returned. |
44 | 49 | /// - TODO: consider how to handle a missing commit (invoke `missing_cb`, or possibly return a Result?)
|
45 |
| - pub fn check_commit(&mut self, oid: &ObjectId) { |
| 50 | + pub fn check_commit(&mut self, oid: &ObjectId) -> Result<(), gix_object::find::existing_object::Error> { |
46 | 51 | // Attempt to insert the commit ID in the set, and if already present, return immediately
|
47 | 52 | if !self.oid_set.insert(*oid) {
|
48 |
| - return; |
| 53 | + return Ok(()); |
49 | 54 | }
|
50 | 55 | // Obtain the commit's tree ID
|
51 | 56 | let tree_id = {
|
52 |
| - let commit = self.db.find_commit(oid, &mut self.buf).expect("failed to find commit"); |
| 57 | + let mut buf = self.free_list.buf(); |
| 58 | + let commit = self.db.find_commit(oid, &mut buf)?; |
53 | 59 | commit.tree()
|
54 | 60 | };
|
55 | 61 |
|
56 |
| - // Attempt to insert the tree ID in the set, and if already present, return immediately |
57 | 62 | if self.oid_set.insert(tree_id) {
|
58 |
| - self.check_tree(&tree_id); |
| 63 | + check_tree( |
| 64 | + &tree_id, |
| 65 | + &self.db, |
| 66 | + &mut self.free_list, |
| 67 | + &mut self.missing_cb, |
| 68 | + &mut self.oid_set, |
| 69 | + ); |
59 | 70 | }
|
| 71 | + |
| 72 | + Ok(()) |
60 | 73 | }
|
| 74 | +} |
61 | 75 |
|
62 |
| - fn check_tree(&mut self, oid: &ObjectId) { |
63 |
| - let tree = match self.db.find_tree(oid, &mut self.buf) { |
64 |
| - Ok(tree) => tree, |
65 |
| - Err(_) => { |
66 |
| - // Tree is missing, so invoke `missing_cb` |
67 |
| - (self.missing_cb)(oid, Kind::Tree); |
68 |
| - return; |
69 |
| - } |
70 |
| - }; |
| 76 | +#[derive(Default)] |
| 77 | +struct FreeList(RefCell<Vec<Vec<u8>>>); |
71 | 78 |
|
72 |
| - // Keeping separate sets for trees and blobs for now... |
73 |
| - // This is about a wash when compared to using a HashMap<ObjectID, Kind> |
74 |
| - struct TreeEntries { |
75 |
| - trees: HashSet<ObjectId>, |
76 |
| - blobs: HashSet<ObjectId>, |
77 |
| - } |
| 79 | +impl FreeList { |
| 80 | + fn buf(&self) -> ReturnToFreeListOnDrop<'_> { |
| 81 | + let buf = self.0.borrow_mut().pop().unwrap_or_default(); |
| 82 | + ReturnToFreeListOnDrop { buf, list: &self.0 } |
| 83 | + } |
| 84 | +} |
78 | 85 |
|
79 |
| - // Build up a set of trees and a set of blobs |
80 |
| - let entries: TreeEntries = { |
81 |
| - let mut entries = TreeEntries { |
82 |
| - trees: HashSet::default(), |
83 |
| - blobs: HashSet::default(), |
84 |
| - }; |
85 |
| - |
86 |
| - // For each entry in the tree |
87 |
| - for entry_ref in tree.entries.iter() { |
88 |
| - match entry_ref.mode { |
89 |
| - EntryMode::Tree => { |
90 |
| - let tree_id = entry_ref.oid.to_owned(); |
91 |
| - // Check if the tree has already been encountered |
92 |
| - if self.oid_set.insert(tree_id) { |
93 |
| - entries.trees.insert(tree_id); |
94 |
| - } |
95 |
| - } |
96 |
| - EntryMode::Blob | EntryMode::BlobExecutable | EntryMode::Link => { |
97 |
| - let blob_id = entry_ref.oid.to_owned(); |
98 |
| - // Check if the blob has already been encountered |
99 |
| - if self.oid_set.insert(blob_id) { |
100 |
| - entries.blobs.insert(blob_id); |
101 |
| - } |
102 |
| - } |
103 |
| - EntryMode::Commit => { |
104 |
| - // This implies a submodule (OID is the commit hash of the submodule) |
105 |
| - // Skip it as it's not in this repository! |
106 |
| - } |
107 |
| - } |
108 |
| - } |
109 |
| - entries |
110 |
| - }; |
| 86 | +struct ReturnToFreeListOnDrop<'a> { |
| 87 | + list: &'a RefCell<Vec<Vec<u8>>>, |
| 88 | + buf: Vec<u8>, |
| 89 | +} |
111 | 90 |
|
112 |
| - for tree_id in entries.trees.iter() { |
113 |
| - self.check_tree(tree_id); |
114 |
| - } |
115 |
| - for blob_id in entries.blobs.iter() { |
116 |
| - self.check_blob(blob_id); |
| 91 | +impl Drop for ReturnToFreeListOnDrop<'_> { |
| 92 | + fn drop(&mut self) { |
| 93 | + if !self.buf.is_empty() { |
| 94 | + self.list.borrow_mut().push(std::mem::take(&mut self.buf)); |
117 | 95 | }
|
118 | 96 | }
|
| 97 | +} |
| 98 | + |
| 99 | +impl Deref for ReturnToFreeListOnDrop<'_> { |
| 100 | + type Target = Vec<u8>; |
| 101 | + |
| 102 | + fn deref(&self) -> &Self::Target { |
| 103 | + &self.buf |
| 104 | + } |
| 105 | +} |
| 106 | + |
| 107 | +impl DerefMut for ReturnToFreeListOnDrop<'_> { |
| 108 | + fn deref_mut(&mut self) -> &mut Self::Target { |
| 109 | + &mut self.buf |
| 110 | + } |
| 111 | +} |
| 112 | + |
| 113 | +fn check_blob<F>(db: impl Exists, oid: &ObjectId, mut missing_cb: F) |
| 114 | +where |
| 115 | + F: FnMut(&ObjectId, Kind), |
| 116 | +{ |
| 117 | + // Check if the blob is missing from the ODB |
| 118 | + if !db.exists(oid) { |
| 119 | + // Blob is missing, so invoke `missing_cb` |
| 120 | + missing_cb(oid, Kind::Blob); |
| 121 | + } |
| 122 | +} |
119 | 123 |
|
120 |
| - fn check_blob(&mut self, oid: &ObjectId) { |
121 |
| - // Check if the blob is missing from the ODB |
122 |
| - if !self.db.exists(oid) { |
123 |
| - // Blob is missing, so invoke `missing_cb` |
124 |
| - (self.missing_cb)(oid, Kind::Blob); |
| 124 | +fn check_tree<F>( |
| 125 | + oid: &ObjectId, |
| 126 | + db: &(impl FindExt + Exists), |
| 127 | + list: &FreeList, |
| 128 | + missing_cb: &mut F, |
| 129 | + oid_set: &mut HashSet, |
| 130 | +) where |
| 131 | + F: FnMut(&ObjectId, Kind), |
| 132 | +{ |
| 133 | + let mut buf = list.buf(); |
| 134 | + let Ok(tree) = db.find_tree(oid, &mut buf) else { |
| 135 | + missing_cb(oid, Kind::Tree); |
| 136 | + return; |
| 137 | + }; |
| 138 | + |
| 139 | + // Build up a set of trees and a set of blobs |
| 140 | + // For each entry in the tree |
| 141 | + for entry_ref in tree.entries.iter() { |
| 142 | + match entry_ref.mode { |
| 143 | + EntryMode::Tree => { |
| 144 | + let tree_id = entry_ref.oid.to_owned(); |
| 145 | + if oid_set.insert(tree_id) { |
| 146 | + check_tree(&tree_id, &*db, list, &mut *missing_cb, oid_set); |
| 147 | + } |
| 148 | + } |
| 149 | + EntryMode::Blob | EntryMode::BlobExecutable | EntryMode::Link => { |
| 150 | + let blob_id = entry_ref.oid.to_owned(); |
| 151 | + if oid_set.insert(blob_id) { |
| 152 | + check_blob(&*db, &blob_id, &mut *missing_cb); |
| 153 | + } |
| 154 | + } |
| 155 | + EntryMode::Commit => { |
| 156 | + // This implies a submodule (OID is the commit hash of the submodule) |
| 157 | + // Skip it as it's not in this repository! |
| 158 | + } |
125 | 159 | }
|
126 | 160 | }
|
127 | 161 | }
|
0 commit comments