Skip to content

Commit 31b9cc9

Browse files
Merge pull request #293 from reddevilmidzy/rename-dir
feat: Improve rename detection and directory tracking
2 parents dd864a1 + 0dab065 commit 31b9cc9

File tree

2 files changed

+99
-102
lines changed

2 files changed

+99
-102
lines changed

src/git/file_tracker.rs

Lines changed: 80 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -1,92 +1,104 @@
1-
use git2::{Commit, Delta, DiffFindOptions, DiffOptions, ErrorCode, Repository};
1+
use git2::{Commit, Delta, DiffFindOptions, ErrorCode, Repository};
22
use std::path;
33

4-
/// Finds the last commit ID that contains the target file
4+
/// Represents the result of searching for the last commit that touched a target path.
5+
///
6+
/// This struct contains both the commit that last modified the target path and,
7+
/// if the path was renamed in that commit, the new path it was renamed to.
8+
///
9+
/// # Fields
10+
/// * `commit` - The commit that last touched the target path
11+
/// * `renamed_path` - If the target path was renamed in this commit, contains the new path.
12+
/// For files, this is the full new file path. For directories, this is the new directory
13+
/// path with a trailing slash. If the path was not renamed, this is `None`.
14+
pub struct CommitSearchResult<'a> {
15+
/// The commit that last touched the target path
16+
pub commit: Commit<'a>,
17+
/// The new path if the target was renamed in this commit, `None` otherwise
18+
pub renamed_path: Option<String>,
19+
}
20+
21+
/// This function searches through the commit history from HEAD backwards to find
22+
/// the most recent commit that modified the target path. It also detects if the
23+
/// path was renamed in that commit and returns the new path.
24+
///
25+
/// The function ignores merge commits (commits with 2+ parents) and initial commits
26+
/// (commits with 0 parents), following the same behavior as `git whatchanged`.
527
///
628
/// # Arguments
7-
/// * `target_file` - The path to the target file
29+
/// * `target_file` - The path to the target file or directory to search for
830
/// * `repo` - The repository to search in
931
///
1032
/// # Returns
11-
/// * `Ok(git2::Commit)` - The last commit ID that contains the target file
12-
/// * `Err(git2::Error)` - If there was an error accessing the repository
33+
/// * `Ok(CommitSearchResult)` - Contains the commit that last touched the target path
34+
/// and optionally the new path if it was renamed in that commit
35+
/// * `Err(git2::Error)` - If there was an error accessing the repository or if the
36+
/// target path was not found in the repository history
1337
pub fn find_last_commit_id<'a>(
1438
target_file: &str,
1539
repo: &'a Repository,
16-
) -> Result<Commit<'a>, git2::Error> {
40+
) -> Result<CommitSearchResult<'a>, git2::Error> {
41+
let target_path = path::Path::new(target_file);
1742
let mut revwalk = repo.revwalk()?;
1843
revwalk.push_head()?;
1944

2045
for commit_id in revwalk {
2146
let commit_id = commit_id?;
2247
let commit = repo.find_commit(commit_id)?;
2348

24-
// Ignore merge commits(2+ Parents) because that's what 'git whatchenged' does.
25-
// Ignore commit with 0 parent (initial commit) because there's nothing to diff against.
26-
2749
if commit.parent_count() == 1 {
2850
let prev_commit = commit.parent(0)?;
2951
let tree = commit.tree()?;
3052
let prev_tree = prev_commit.tree()?;
31-
let diff = repo.diff_tree_to_tree(Some(&prev_tree), Some(&tree), None)?;
53+
let mut diff = repo.diff_tree_to_tree(Some(&prev_tree), Some(&tree), None)?;
54+
55+
let mut find_opts = DiffFindOptions::new();
56+
find_opts.rename_threshold(50); // Git default threshold 50%
57+
diff.find_similar(Some(&mut find_opts))?;
3258
for delta in diff.deltas() {
59+
let mut renamed_path = None;
60+
61+
// file check
3362
if let Some(file_path) = delta.new_file().path()
34-
&& let Some(file_path_str) = file_path.to_str()
35-
&& file_path_str == target_file
63+
&& file_path == target_path
64+
{
65+
return Ok(CommitSearchResult {
66+
commit,
67+
renamed_path: None,
68+
});
69+
}
70+
// directory check
71+
if let Some(old_path) = delta.old_file().path()
72+
&& old_path.starts_with(target_path)
3673
{
37-
return Ok(commit);
74+
if old_path == target_path && delta.status() == Delta::Renamed {
75+
renamed_path = delta
76+
.new_file()
77+
.path()
78+
.and_then(|p| p.to_str())
79+
.map(|s| s.to_string());
80+
} else if delta.status() == Delta::Renamed
81+
&& let Some(path) = delta.new_file().path()
82+
&& let Some(parent) = path.parent()
83+
{
84+
let mut dir = parent.to_string_lossy().to_string();
85+
if !dir.ends_with('/') && !dir.ends_with('\\') {
86+
dir.push('/');
87+
}
88+
renamed_path = Some(dir);
89+
}
90+
91+
return Ok(CommitSearchResult {
92+
commit,
93+
renamed_path,
94+
});
3895
}
3996
}
4097
}
4198
}
42-
4399
Err(git2::Error::from_str("File not found"))
44100
}
45101

46-
/// Finds the new path of a file that has been moved in a commit
47-
///
48-
/// # Arguments
49-
/// * `repo` - The repository to search in
50-
/// * `commit` - The commit to search in
51-
/// * `target_file` - The path to the target file
52-
///
53-
pub fn track_file_rename_in_commit(
54-
repo: &Repository,
55-
commit: &Commit,
56-
target_file: &str,
57-
) -> Result<Option<String>, git2::Error> {
58-
if commit.parent_count() != 1 {
59-
return Ok(None);
60-
}
61-
62-
let mut diff_opts = DiffOptions::new();
63-
64-
let mut find_opts = DiffFindOptions::new();
65-
// TODO 적절한 값 찾기
66-
find_opts.rename_threshold(28);
67-
68-
let parent = commit.parent(0)?;
69-
let tree = commit.tree()?;
70-
let parent_tree = parent.tree()?;
71-
72-
let mut diff = repo.diff_tree_to_tree(Some(&parent_tree), Some(&tree), Some(&mut diff_opts))?;
73-
diff.find_similar(Some(&mut find_opts))?;
74-
75-
for delta in diff.deltas() {
76-
if delta.status() == Delta::Renamed
77-
&& (delta.old_file().path().and_then(|p| p.to_str()) == Some(target_file))
78-
{
79-
return Ok(delta
80-
.new_file()
81-
.path()
82-
.and_then(|p| p.to_str())
83-
.map(|s| s.to_string()));
84-
}
85-
}
86-
87-
Ok(None)
88-
}
89-
90102
/// Checks if a file exists in the repository at the given path
91103
///
92104
/// # Arguments
@@ -115,30 +127,6 @@ mod tests {
115127
use super::*;
116128
use serial_test::serial;
117129

118-
#[test]
119-
#[serial]
120-
fn test_track_file_rename_in_commit() -> Result<(), git2::Error> {
121-
let github_url = GitHubUrl::new(
122-
"reddevilmidzy".to_string(),
123-
"kingsac".to_string(),
124-
Some("main".to_string()),
125-
None,
126-
);
127-
let repo_manager = RepoManager::from(&github_url)?;
128-
let commit = find_last_commit_id("main.rs", repo_manager.get_repo())?;
129-
// see https://github.com/reddevilmidzy/kingsac/commit/2f3e99cbea53c55c8428d5bc11bfe7f1ff5cccd7
130-
assert_eq!(
131-
commit.id().to_string(),
132-
"2f3e99cbea53c55c8428d5bc11bfe7f1ff5cccd7"
133-
);
134-
assert_eq!(
135-
track_file_rename_in_commit(repo_manager.get_repo(), &commit, "main.rs")?,
136-
Some("src/main.rs".to_string())
137-
);
138-
139-
Ok(())
140-
}
141-
142130
#[test]
143131
#[serial]
144132
fn test_file_exists_in_repo() -> Result<(), git2::Error> {
@@ -180,23 +168,19 @@ mod tests {
180168
let repo_manager = RepoManager::from(&github_url)?;
181169

182170
// 1. Find the commit where test_for_multiple_moves.rs was moved to foo/test_for_multiple_moves.rs
183-
let commit = find_last_commit_id("test_for_multiple_moves.rs", repo_manager.get_repo())?;
184-
let new_path = track_file_rename_in_commit(
185-
repo_manager.get_repo(),
186-
&commit,
187-
"test_for_multiple_moves.rs",
188-
)?;
189-
assert_eq!(new_path, Some("foo/test_for_multiple_moves.rs".to_string()));
171+
let result = find_last_commit_id("test_for_multiple_moves.rs", repo_manager.get_repo())?;
172+
assert_eq!(
173+
result.renamed_path,
174+
Some("foo/test_for_multiple_moves.rs".to_string())
175+
);
190176

191177
// 2. Find the commit where foo/test_for_multiple_moves.rs was moved to bar/test_for_multiple_moves.rs
192-
let commit =
178+
let result =
193179
find_last_commit_id("foo/test_for_multiple_moves.rs", repo_manager.get_repo())?;
194-
let new_path = track_file_rename_in_commit(
195-
repo_manager.get_repo(),
196-
&commit,
197-
"foo/test_for_multiple_moves.rs",
198-
)?;
199-
assert_eq!(new_path, Some("bar/test_for_multiple_moves.rs".to_string()));
180+
assert_eq!(
181+
result.renamed_path,
182+
Some("bar/test_for_multiple_moves.rs".to_string())
183+
);
200184

201185
// 3. Verify that the file exists at the final location
202186
assert!(file_exists_in_repo(

src/git/repo.rs

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use crate::{GitHubUrl, file_exists_in_repo, find_last_commit_id, track_file_rename_in_commit};
1+
use crate::{GitHubUrl, file_exists_in_repo, find_last_commit_id};
22
use git2::{
33
BranchType, Cred, Oid, PushOptions, RemoteCallbacks, Repository, Signature,
44
build::CheckoutBuilder,
@@ -110,23 +110,23 @@ impl RepoManager {
110110
return Ok(Some(current_path));
111111
}
112112

113-
let commit = match find_last_commit_id(&current_path, repo) {
114-
Ok(commit) => commit,
113+
let result = match find_last_commit_id(&current_path, repo) {
114+
Ok(result) => result,
115115
Err(e) => {
116116
error!("Error finding last commit for {}: {}", current_path, e);
117117
return Ok(None);
118118
}
119119
};
120120

121-
match track_file_rename_in_commit(repo, &commit, &current_path)? {
121+
match result.renamed_path {
122122
Some(new_path) => {
123123
current_path = new_path;
124124
}
125125
None => {
126126
error!(
127127
"Could not find new path for {} in commit {}",
128128
current_path,
129-
commit.id()
129+
result.commit.id()
130130
);
131131
return Ok(None);
132132
}
@@ -399,7 +399,7 @@ mod tests {
399399
/// 1. Initially located at: tmp.txt (root directory)
400400
/// 2. First moved to: dockerfile_history/tmp.txt
401401
/// 3. Finally moved to: img/tmp.txt
402-
fn test_find_current_location() {
402+
fn test_find_current_location_file() {
403403
let url = GitHubUrl::parse(
404404
"https://github.com/reddevilmidzy/zero2prod/blob/test_for_queensac/tmp.txt",
405405
)
@@ -413,6 +413,19 @@ mod tests {
413413
);
414414
}
415415

416+
#[test]
417+
fn test_find_current_location_directory() {
418+
let url =
419+
GitHubUrl::parse("https://github.com/reddevilmidzy/kingsac/tree/main/foo/intrinsics")
420+
.unwrap();
421+
let repo_manager = RepoManager::from(&url).unwrap();
422+
423+
assert_eq!(
424+
repo_manager.find_current_location(&url).unwrap(),
425+
Some("foo/".to_string())
426+
)
427+
}
428+
416429
#[test]
417430
/// This test verifies the behavior when a file cannot be found in the repository.
418431
/// It tests two scenarios:

0 commit comments

Comments
 (0)