Skip to content

Commit 0024131

Browse files
RajivTSfacebook-github-bot
authored andcommitted
Implement support for in-pack Ref Deltas during Mononoke Git push
Summary: As highlighted in the previous diff, we currently do not support this. Everytime the pack parser encounters a ref-delta, it assumes that the base of the delta is an existing object that is present on the server. In 99% of the cases, this is true. However, in certain scenarios, Git can push a packfile containing ref deltas where the base of the delta is present in the same packfile. The strategy to support such deltas involves making the pack parser iterative instead of doing a single pass over the packfile and reporting the result. The steps are as follows: 1. Identify all ref deltas in the pack and collect the ID of all the base objects 2. Fetch all the bases objects from Mononoke, if they are already present in our storage 3. Try to unpack the pack file and use the fetched base objects for resolving deltas in case we encounter any 4. If we encounter a ref delta which points to a base object that is unknown to us, keep track of it and continue instead of erroring out 5. At the end of the iteration, collect all the unpacked objects and add them to the set of base objects already known to us 6. Using the updated set of base objects, continue again from step 3 Any valid packfile will eventually get parsed and unpacked using this strategy. However, we could end up in an infinite loop in case of an invalid packfile with ref delta refering to an invalid base object with the above steps. To avoid this, we have added an upper limit to the max number of iterations. Differential Revision: D68025406 fbshipit-source-id: 1aa406fe9033490e985c042b3fe4c6c82dde1ae8
1 parent 26e54c4 commit 0024131

File tree

3 files changed

+170
-63
lines changed

3 files changed

+170
-63
lines changed

eden/mononoke/git/git_types/src/object.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,13 +53,17 @@ impl ObjectKind {
5353
}
5454
}
5555

56-
#[derive(Clone, Debug)]
56+
#[derive(Clone, Debug, Hash, Eq, PartialEq)]
5757
pub struct ObjectContent {
5858
pub parsed: Object,
5959
pub raw: Bytes,
6060
}
6161

6262
impl ObjectContent {
63+
pub fn new(parsed: Object, raw: Bytes) -> Self {
64+
Self { parsed, raw }
65+
}
66+
6367
pub fn is_tree(&self) -> bool {
6468
self.parsed.as_tree().is_some()
6569
}

eden/mononoke/git/protocol/src/pack_processor.rs

Lines changed: 147 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -22,20 +22,20 @@ use futures::stream;
2222
use futures::StreamExt;
2323
use futures::TryStreamExt;
2424
use futures_stats::TimedTryFutureExt;
25-
use git_types::fetch_git_object_bytes;
25+
use git_types::fetch_git_object;
2626
use git_types::GitIdentifier;
27-
use git_types::HeaderState;
2827
use git_types::ObjectContent;
2928
use gix_features::progress::Discard;
3029
use gix_hash::Kind;
3130
use gix_hash::ObjectId;
3231
use gix_object::encode::loose_header;
33-
use gix_object::ObjectRef;
3432
use gix_object::WriteTo;
3533
use gix_pack::cache::Never;
3634
use gix_pack::data;
3735
use gix_pack::data::decode::entry::ResolvedBase;
38-
use gix_pack::data::input;
36+
use gix_pack::data::decode::Error as PackError;
37+
use gix_pack::data::input::Entry as InputEntry;
38+
use gix_pack::data::input::Error as InputError;
3939
use gix_pack::data::File;
4040
use mononoke_types::hash::GitSha1;
4141
use packfile::types::BaseObject;
@@ -46,9 +46,72 @@ use tempfile::Builder;
4646

4747
use crate::PACKFILE_SUFFIX;
4848

49-
type ObjectMap = HashMap<ObjectId, (Bytes, gix_object::Kind)>;
49+
const MAX_ALLOWED_DEPTH: u8 = 30;
50+
type ObjectMap = HashMap<ObjectId, ObjectContent>;
5051

51-
fn into_data_entry(pack_entry: input::Entry) -> data::Entry {
52+
#[derive(Debug, Clone, Hash, PartialEq, Eq)]
53+
enum PackEntry {
54+
Pending(InputEntry),
55+
Processed((ObjectId, ObjectContent)),
56+
}
57+
58+
#[derive(Debug, Clone, Default)]
59+
struct PackEntries {
60+
entries: HashSet<PackEntry>,
61+
}
62+
63+
impl PackEntries {
64+
fn from_pending_and_processed(pending: Vec<InputEntry>, processed: ObjectMap) -> Self {
65+
let mut entries = HashSet::new();
66+
for entry in pending {
67+
entries.insert(PackEntry::Pending(entry));
68+
}
69+
for (id, object_content) in processed {
70+
entries.insert(PackEntry::Processed((id, object_content)));
71+
}
72+
Self { entries }
73+
}
74+
75+
fn from_entries(entries: HashSet<PackEntry>) -> Self {
76+
Self { entries }
77+
}
78+
79+
fn into_pending_and_processed(self) -> (Vec<InputEntry>, ObjectMap) {
80+
let mut pending = Vec::new();
81+
let mut processed = HashMap::new();
82+
for entry in self.entries {
83+
match entry {
84+
PackEntry::Pending(entry) => pending.push(entry),
85+
PackEntry::Processed((id, content)) => {
86+
processed.insert(id, content);
87+
}
88+
}
89+
}
90+
(pending, processed)
91+
}
92+
93+
fn into_processed(self) -> Result<FxHashMap<ObjectId, ObjectContent>> {
94+
let mut object_map = FxHashMap::default();
95+
for entry in self.entries {
96+
match entry {
97+
PackEntry::Processed((id, content)) => {
98+
object_map.insert(id, content);
99+
}
100+
_ => anyhow::bail!("Packfile entries are not completely processed"),
101+
}
102+
}
103+
Ok(object_map)
104+
}
105+
106+
fn is_processed(&self) -> bool {
107+
self.entries.iter().all(|entry| match entry {
108+
PackEntry::Processed(_) => true,
109+
_ => false,
110+
})
111+
}
112+
}
113+
114+
fn into_data_entry(pack_entry: InputEntry) -> data::Entry {
52115
data::Entry {
53116
header: pack_entry.header,
54117
decompressed_size: pack_entry.decompressed_size,
@@ -72,10 +135,10 @@ fn resolve_delta(
72135
out: &mut Vec<u8>,
73136
known_objects: &ObjectMap,
74137
) -> Option<ResolvedBase> {
75-
known_objects.get(oid).map(|(bytes, kind)| {
76-
out.extend_from_slice(bytes);
138+
known_objects.get(oid).map(|object_content| {
139+
object_content.parsed.write_to(out.by_ref()).unwrap();
77140
ResolvedBase::OutOfPack {
78-
kind: kind.clone(),
141+
kind: object_content.parsed.kind().clone(),
79142
end: out.len(),
80143
}
81144
})
@@ -104,17 +167,15 @@ async fn fetch_prereq_objects(
104167
async move {
105168
let git_identifier =
106169
GitIdentifier::Basic(GitSha1::from_object_id(object_id.as_ref())?);
107-
let fallible_git_bytes =
108-
fetch_git_object_bytes(&ctx, blobstore, &git_identifier, HeaderState::Included)
109-
.await;
110-
match fallible_git_bytes {
111-
Ok(git_bytes) => {
112-
let object = ObjectRef::from_loose(&git_bytes)
113-
.context("Failure in converting bytes into git object")?;
114-
let kind = object.kind();
115-
let mut git_bytes = Vec::new();
170+
let fallible_git_object = fetch_git_object(&ctx, blobstore, &git_identifier).await;
171+
match fallible_git_object {
172+
Ok(object) => {
173+
let mut git_bytes = object.loose_header().into_vec();
116174
object.write_to(git_bytes.by_ref())?;
117-
anyhow::Ok(Some((object_id, (Bytes::from(git_bytes), kind))))
175+
anyhow::Ok(Some((
176+
object_id,
177+
ObjectContent::new(object, Bytes::from(git_bytes)),
178+
)))
118179
}
119180
// The object might not be present in the data store since its an inpack object
120181
_ => anyhow::Ok(None),
@@ -148,6 +209,50 @@ pub async fn parse_pack(
148209
response
149210
}
150211

212+
fn process_pack_entries(pack_file: &data::File, entries: PackEntries) -> Result<PackEntries> {
213+
let (pending_entries, prereq_objects) = entries.into_pending_and_processed();
214+
let output_entries = pending_entries
215+
.into_iter()
216+
.map(|entry| {
217+
let mut output = vec![];
218+
let err_context = format!("Error in decoding packfile entry: {:?}", &entry.header);
219+
let outcome = pack_file.decode_entry(
220+
into_data_entry(entry.clone()),
221+
&mut output,
222+
&mut gix_features::zlib::Inflate::default(),
223+
&|oid, out| resolve_delta(oid, out, &prereq_objects),
224+
&mut Never,
225+
);
226+
match outcome {
227+
Ok(outcome) => {
228+
let object_bytes = Bytes::from(git_object_bytes(
229+
output,
230+
outcome.kind,
231+
outcome.object_size as usize,
232+
));
233+
let base_object = BaseObject::new(object_bytes.clone())
234+
.context("Error in converting bytes to git object")?;
235+
let id = base_object.hash;
236+
let object = ObjectContent::new(base_object.object, object_bytes);
237+
let processed_entry = PackEntry::Processed((id, object));
238+
anyhow::Ok(processed_entry)
239+
}
240+
Err(e) => match e {
241+
PackError::DeltaBaseUnresolved(_) => anyhow::Ok(PackEntry::Pending(entry)),
242+
_ => Err(e).context(err_context),
243+
},
244+
}
245+
})
246+
.collect::<Result<HashSet<PackEntry>>>()
247+
.context("Failure in decoding packfile entries")?;
248+
let output_entries = prereq_objects
249+
.into_iter()
250+
.map(|(id, object_content)| PackEntry::Processed((id, object_content)))
251+
.chain(output_entries)
252+
.collect();
253+
Ok(PackEntries::from_entries(output_entries))
254+
}
255+
151256
async fn parse_stored_pack(
152257
pack_path: &Path,
153258
ctx: &CoreContext,
@@ -177,45 +282,32 @@ async fn parse_stored_pack(
177282
.try_timed()
178283
.await?
179284
.log_future_stats(ctx.scuba().clone(), "Fetched Prerequisite Objects", None);
285+
// Fetch all the entries that need to be processed
286+
let pending_entries = pack_file
287+
.streaming_iter()
288+
.context("Failure in iterating packfile")?
289+
.collect::<Result<Vec<_>, InputError>>()?;
180290

181-
let parsed_objects = stream::iter(
182-
pack_file
183-
.streaming_iter()
184-
.context("Failure in iterating packfile")?,
185-
)
186-
.map(move |fallible_entry| match fallible_entry {
187-
Ok(entry) => {
188-
let mut output = vec![];
189-
let err_context = format!("Error in decoding packfile entry: {:?}", &entry.header);
190-
let outcome = pack_file
191-
.decode_entry(
192-
into_data_entry(entry),
193-
&mut output,
194-
&mut gix_features::zlib::Inflate::default(),
195-
&|oid, out| resolve_delta(oid, out, &prereq_objects),
196-
&mut Never,
197-
)
198-
.context(err_context)?;
199-
let object_bytes = Bytes::from(git_object_bytes(
200-
output,
201-
outcome.kind,
202-
outcome.object_size as usize,
203-
));
204-
let base_object = BaseObject::new(object_bytes.clone())
205-
.context("Error in converting bytes to git object")?;
206-
let id = base_object.hash;
207-
let object = ObjectContent {
208-
parsed: base_object.object,
209-
raw: object_bytes,
210-
};
211-
anyhow::Ok((id, object))
291+
// Process all the entries
292+
tokio::task::spawn_blocking({
293+
let pack_file = pack_file.clone();
294+
move || {
295+
let mut pack_entries =
296+
PackEntries::from_pending_and_processed(pending_entries, prereq_objects);
297+
let mut counter = 0;
298+
while !pack_entries.is_processed() {
299+
if counter > MAX_ALLOWED_DEPTH {
300+
anyhow::bail!(
301+
"Maximum allowed depth reached while processing packfile entries"
302+
);
303+
}
304+
counter += 1;
305+
pack_entries = process_pack_entries(&pack_file, pack_entries)?;
306+
}
307+
pack_entries.into_processed()
212308
}
213-
Err(e) => anyhow::bail!("Failure in iterating packfile entry: {:?}", e),
214309
})
215-
.try_collect::<FxHashMap<_, _>>()
216310
.try_timed()
217311
.await?
218-
.log_future_stats(ctx.scuba().clone(), "Decoded objects from Packfile", None);
219-
220-
Ok(parsed_objects)
312+
.log_future_stats(ctx.scuba().clone(), "Decoded objects from Packfile", None)
221313
}

eden/mononoke/tests/integration/mononoke_git_server/test-mononoke-git-server-push-empty-repo-with-deltas.t

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -24,25 +24,36 @@
2424
$ echo "AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1." > file1
2525
$ git add .
2626
$ git commit -qam "Add file1"
27-
$ git tag -a -m "new tag" first_tag
2827
# File1 should be large enough that Git create a delta for it
2928
$ echo "AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile1.AddFile2." > file1
3029
$ echo "This is file2" > file2.txt
3130
$ git add .
3231
$ git commit -qam "Modified file1, Added file2"
33-
$ git tag -a empty_tag -m ""
32+
# Get list of all objects for verification later
33+
$ git rev-list --objects --all | git cat-file --batch-check='%(objectname) %(objecttype) %(rest)' | sort > $TESTTMP/object_list
3434

35-
Push Git repository to Mononoke
35+
# Push Git repository to Mononoke
3636
$ git pack-objects --all --stdout > packfile
3737
$ last_commit=$(git rev-parse HEAD)
38-
Create the capabilities string
38+
# Create the capabilities string
3939
$ capabilities="report-status quiet object-format=sha1"
4040
$ printf "00980000000000000000000000000000000000000000 $last_commit refs/heads/master_bookmark\0 $capabilities" >> push_data
4141
$ echo -n "0000" >> push_data
4242
$ cat packfile >> push_data
43-
Pipe the push data to CURL
43+
# Pipe the push data to CURL
4444
$ curl -X POST $MONONOKE_GIT_SERVICE_BASE_URL/$REPONAME.git/git-receive-pack -H 'Content-Type: application/x-git-receive-pack-request' -H 'Accept: application/x-git-receive-pack-result' -H 'Accept-Encoding: deflate, gzip, br' -k --cert "$TEST_CERTDIR/client0.crt" --key "$TEST_CERTDIR/client0.key" --data-binary "@push_data" -s -w "\nResponse code: %{http_code}\n"
45-
Error in decoding packfile entry: RefDelta { base_id: Sha1(ccce194714ee3fae382fad2f4a67cd6c1a7320cd) }: A delta chain could not be followed as the ref base with id ccce194714ee3fae382fad2f4a67cd6c1a7320cd could not be found
46-
Response code: 500
45+
*unpack ok (glob)
46+
*ok refs/heads/master_bookmark (glob)
47+
* (glob)
48+
Response code: 200
49+
50+
$ wait_for_git_bookmark_create refs/heads/master_bookmark
51+
52+
# Clone the repo from Mononoke and validate that the push worked
53+
$ cd $TESTTMP
54+
$ git_client clone -q $MONONOKE_GIT_SERVICE_BASE_URL/$REPONAME.git check_repo
55+
$ cd check_repo
56+
$ git rev-list --objects --all | git cat-file --batch-check='%(objectname) %(objecttype) %(rest)' | sort > $TESTTMP/new_object_list
57+
$ diff -w $TESTTMP/new_object_list $TESTTMP/object_list
4758

4859

0 commit comments

Comments
 (0)