Skip to content

Commit 7f868d1

Browse files
committed
feat: add blob::Platform::clear_resource_cache_keep_allocation().
It allows to keep a free-list of buffers around after clearing, to help prevent re-allocating and growing bufers over and over.
1 parent e079250 commit 7f868d1

File tree

3 files changed

+61
-5
lines changed

3 files changed

+61
-5
lines changed

gix-diff/src/blob/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,8 @@ pub struct Platform {
119119
/// That way, expensive rewrite-checks with NxM matrix checks would be as fast as possible,
120120
/// avoiding duplicate work.
121121
diff_cache: HashMap<platform::CacheKey, platform::CacheValue>,
122+
/// A list of previously used buffers, ready for re-use.
123+
free_list: Vec<Vec<u8>>,
122124
}
123125

124126
mod impls {

gix-diff/src/blob/platform.rs

Lines changed: 36 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
use std::{io::Write, process::Stdio};
2-
31
use bstr::{BStr, BString, ByteSlice};
2+
use std::cmp::Ordering;
3+
use std::{io::Write, process::Stdio};
44

55
use super::Algorithm;
66
use crate::blob::{pipeline, Pipeline, Platform, ResourceKind};
@@ -325,6 +325,7 @@ impl Platform {
325325
old: None,
326326
new: None,
327327
diff_cache: Default::default(),
328+
free_list: Vec::with_capacity(2),
328329
options,
329330
filter,
330331
filter_mode,
@@ -542,7 +543,7 @@ impl Platform {
542543

543544
/// Every call to [set_resource()](Self::set_resource()) will keep the diffable data in memory, and that will never be cleared.
544545
///
545-
/// Use this method to clear the cache, releasing memory. Note that this will also loose all information about resources
546+
/// Use this method to clear the cache, releasing memory. Note that this will also lose all information about resources
546547
/// which means diffs would fail unless the resources are set again.
547548
///
548549
/// Note that this also has to be called if the same resource is going to be diffed in different states, i.e. using different
@@ -551,6 +552,37 @@ impl Platform {
551552
self.old = None;
552553
self.new = None;
553554
self.diff_cache.clear();
555+
self.free_list.clear();
556+
}
557+
558+
/// Every call to [set_resource()](Self::set_resource()) will keep the diffable data in memory, and that will never be cleared.
559+
///
560+
/// Use this method to clear the cache, but keep the previously used buffers around for later re-use.
561+
///
562+
/// If there are more buffers on the free-list than there are stored sources, we half that amount each time this method is called,
563+
/// or keep as many resources as were previously stored, or 2 buffers, whatever is larger.
564+
/// If there are fewer buffers in the free-list than are in the resource cache, we will keep as many as needed to match the
565+
/// number of previously stored resources.
566+
///
567+
/// Returns the number of available buffers.
568+
pub fn clear_resource_cache_keep_allocation(&mut self) -> usize {
569+
self.old = None;
570+
self.new = None;
571+
572+
let diff_cache = std::mem::take(&mut self.diff_cache);
573+
match self.free_list.len().cmp(&diff_cache.len()) {
574+
Ordering::Less => {
575+
let to_take = diff_cache.len() - self.free_list.len();
576+
self.free_list
577+
.extend(diff_cache.into_values().map(|v| v.buffer).take(to_take));
578+
}
579+
Ordering::Equal => {}
580+
Ordering::Greater => {
581+
let new_len = (self.free_list.len() / 2).max(diff_cache.len()).max(2);
582+
self.free_list.truncate(new_len);
583+
}
584+
}
585+
self.free_list.len()
554586
}
555587
}
556588

@@ -591,7 +623,7 @@ impl Platform {
591623
kind,
592624
rela_path: rela_path.to_owned(),
593625
})?;
594-
let mut buf = Vec::new();
626+
let mut buf = self.free_list.pop().unwrap_or_default();
595627
let out = self.filter.convert_to_diffable(
596628
&id,
597629
mode,

gix-diff/tests/blob/platform.rs

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,13 +121,35 @@ fn resources_of_worktree_and_odb_and_check_link() -> crate::Result {
121121
"Also obvious that symlinks are definitely special, but it's what git does as well"
122122
);
123123

124-
platform.clear_resource_cache();
124+
assert_eq!(
125+
platform.clear_resource_cache_keep_allocation(),
126+
3,
127+
"some buffers are retained and reused"
128+
);
125129
assert_eq!(
126130
platform.resources(),
127131
None,
128132
"clearing the cache voids resources and one has to set it up again"
129133
);
130134

135+
assert_eq!(
136+
platform.clear_resource_cache_keep_allocation(),
137+
2,
138+
"doing this again keeps 2 buffers"
139+
);
140+
assert_eq!(
141+
platform.clear_resource_cache_keep_allocation(),
142+
2,
143+
"no matter what - after all we need at least two resources for a diff"
144+
);
145+
146+
platform.clear_resource_cache();
147+
assert_eq!(
148+
platform.clear_resource_cache_keep_allocation(),
149+
0,
150+
"after a proper clearing, the free-list is also emptied, and it won't be recreated"
151+
);
152+
131153
Ok(())
132154
}
133155

0 commit comments

Comments
 (0)