|
| 1 | +use crate::Rewrites; |
| 2 | + |
| 3 | +/// Types related to the rename tracker for renames, rewrites and copies. |
| 4 | +pub mod tracker; |
| 5 | + |
| 6 | +/// A type to retain state related to an ongoing tracking operation to retain sets of interesting changes |
| 7 | +/// of which some are retained to at a later stage compute the ones that seem to be renames or copies. |
| 8 | +pub struct Tracker<T> { |
| 9 | + /// The tracked items thus far, which will be used to determine renames/copies and rewrites later. |
| 10 | + items: Vec<tracker::Item<T>>, |
| 11 | + /// A place to store all paths in to reduce amount of allocations. |
| 12 | + path_backing: Vec<u8>, |
| 13 | + /// A buffer for use when fetching objects for similarity tests. |
| 14 | + buf1: Vec<u8>, |
| 15 | + /// Another buffer for use when fetching objects for similarity tests. |
| 16 | + buf2: Vec<u8>, |
| 17 | + /// How to track copies and/or rewrites. |
| 18 | + rewrites: Rewrites, |
| 19 | + /// The diff algorithm to use when checking for similarity. |
| 20 | + diff_algo: crate::blob::Algorithm, |
| 21 | +} |
| 22 | + |
| 23 | +/// Determine in which set of files to search for copies. |
| 24 | +#[derive(Default, Debug, Copy, Clone, Eq, PartialEq)] |
| 25 | +pub enum CopySource { |
| 26 | + /// Find copies from the set of modified files only. |
| 27 | + #[default] |
| 28 | + FromSetOfModifiedFiles, |
| 29 | + /// Find copies from the set of modified files, as well as all files known to the source (i.e. previous state of the tree). |
| 30 | + /// |
| 31 | + /// This can be an expensive operation as it scales exponentially with the total amount of files in the set. |
| 32 | + FromSetOfModifiedFilesAndAllSources, |
| 33 | +} |
| 34 | + |
| 35 | +/// Under which circumstances we consider a file to be a copy. |
| 36 | +#[derive(Debug, Copy, Clone, PartialEq)] |
| 37 | +pub struct Copies { |
| 38 | + /// The set of files to search when finding the source of copies. |
| 39 | + pub source: CopySource, |
| 40 | + /// Equivalent to [`Rewrites::percentage`], but used for copy tracking. |
| 41 | + /// |
| 42 | + /// Useful to have similarity-based rename tracking and cheaper copy tracking. |
| 43 | + pub percentage: Option<f32>, |
| 44 | +} |
| 45 | + |
| 46 | +impl Default for Copies { |
| 47 | + fn default() -> Self { |
| 48 | + Copies { |
| 49 | + source: CopySource::default(), |
| 50 | + percentage: Some(0.5), |
| 51 | + } |
| 52 | + } |
| 53 | +} |
| 54 | + |
| 55 | +/// Information collected while handling rewrites of files which may be tracked. |
| 56 | +#[derive(Default, Clone, Copy, Debug, PartialEq)] |
| 57 | +pub struct Outcome { |
| 58 | + /// The options used to guide the rewrite tracking. Either fully provided by the caller or retrieved from git configuration. |
| 59 | + pub options: Rewrites, |
| 60 | + /// The amount of similarity checks that have been conducted to find renamed files and potentially copies. |
| 61 | + pub num_similarity_checks: usize, |
| 62 | + /// Set to the amount of worst-case rename permutations we didn't search as our limit didn't allow it. |
| 63 | + pub num_similarity_checks_skipped_for_rename_tracking_due_to_limit: usize, |
| 64 | + /// Set to the amount of worst-case copy permutations we didn't search as our limit didn't allow it. |
| 65 | + pub num_similarity_checks_skipped_for_copy_tracking_due_to_limit: usize, |
| 66 | +} |
| 67 | + |
| 68 | +/// The default settings for rewrites according to the git configuration defaults. |
| 69 | +impl Default for Rewrites { |
| 70 | + fn default() -> Self { |
| 71 | + Rewrites { |
| 72 | + copies: None, |
| 73 | + percentage: Some(0.5), |
| 74 | + limit: 1000, |
| 75 | + } |
| 76 | + } |
| 77 | +} |
0 commit comments