Skip to content

Commit 24f2d62

Browse files
j178Copilot
andauthored
Add destroyed-symlinks builtin hook (#1851)
Closes #1848 --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
1 parent fc529df commit 24f2d62

File tree

7 files changed

+418
-1
lines changed

7 files changed

+418
-1
lines changed

crates/prek/src/hooks/builtin_hooks/mod.rs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ pub(crate) enum BuiltinHooks {
4141
CheckVcsPermalinks,
4242
CheckXml,
4343
CheckYaml,
44+
DestroyedSymlinks,
4445
DetectPrivateKey,
4546
EndOfFileFixer,
4647
FileContentsSorter,
@@ -85,6 +86,7 @@ impl BuiltinHooks {
8586
}
8687
Self::CheckXml => pre_commit_hooks::check_xml(hook, filenames).await,
8788
Self::CheckYaml => pre_commit_hooks::check_yaml(hook, filenames).await,
89+
Self::DestroyedSymlinks => pre_commit_hooks::destroyed_symlinks(hook, filenames).await,
8890
Self::DetectPrivateKey => pre_commit_hooks::detect_private_key(hook, filenames).await,
8991
Self::EndOfFileFixer => pre_commit_hooks::fix_end_of_file(hook, filenames).await,
9092
Self::FileContentsSorter => {
@@ -273,6 +275,20 @@ impl BuiltinHook {
273275
..Default::default()
274276
},
275277
},
278+
BuiltinHooks::DestroyedSymlinks => BuiltinHook {
279+
id: "destroyed-symlinks".to_string(),
280+
name: "detect destroyed symlinks".to_string(),
281+
entry: "destroyed-symlinks".to_string(),
282+
priority: None,
283+
options: HookOptions {
284+
description: Some(
285+
"detects symlinks that were replaced with regular files whose contents are the original symlink target path.".to_string(),
286+
),
287+
types: Some(tags::TAG_SET_FILE),
288+
stages: Some([Stage::PreCommit, Stage::PrePush, Stage::Manual].into()),
289+
..Default::default()
290+
},
291+
},
276292
BuiltinHooks::DetectPrivateKey => BuiltinHook {
277293
id: "detect-private-key".to_string(),
278294
name: "detect private key".to_string(),
Lines changed: 235 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,235 @@
1+
use std::io::Write as _;
2+
use std::path::Path;
3+
use std::str;
4+
5+
use anyhow::{Context, Result};
6+
use rustc_hash::FxHashSet;
7+
8+
use crate::git;
9+
use crate::hook::Hook;
10+
11+
const ORDINARY_CHANGED_ENTRY_MARKER: &str = "1";
12+
const PERMS_LINK: u32 = 0o120_000;
13+
const PERMS_NONEXIST: u32 = 0;
14+
15+
pub(crate) async fn destroyed_symlinks(hook: &Hook, filenames: &[&Path]) -> Result<(i32, Vec<u8>)> {
16+
let status_output = git_status_output(hook.work_dir()).await?;
17+
let entries = status_output
18+
.split(|&byte| byte == b'\0')
19+
.filter_map(|entry| match parse_ordinary_changed_entry(entry) {
20+
Ok(Some(entry)) => Some(Ok(entry)),
21+
Ok(None) => None,
22+
Err(err) => Some(Err(err)),
23+
});
24+
25+
let destroyed_links = find_destroyed_symlinks(hook, filenames, entries).await?;
26+
if destroyed_links.is_empty() {
27+
return Ok((0, Vec::new()));
28+
}
29+
30+
let mut output = Vec::new();
31+
writeln!(output, "Destroyed symlinks:")?;
32+
for destroyed_link in &destroyed_links {
33+
writeln!(output, "- {}", destroyed_link.display())?;
34+
}
35+
let destroyed_links_shell = destroyed_links
36+
.iter()
37+
.map(|path| path.to_string_lossy().into_owned())
38+
.collect::<Vec<_>>();
39+
writeln!(output, "You should unstage affected files:")?;
40+
writeln!(
41+
output,
42+
"\tgit reset HEAD -- {}",
43+
shlex::try_join(destroyed_links_shell.iter().map(String::as_str))?
44+
)?;
45+
writeln!(
46+
output,
47+
"And retry commit. As a long term solution you may try to explicitly tell git that your environment does not support symlinks:"
48+
)?;
49+
writeln!(output, "\tgit config core.symlinks false")?;
50+
51+
Ok((1, output))
52+
}
53+
54+
async fn git_status_output(work_dir: &Path) -> Result<Vec<u8>> {
55+
Ok(git::git_cmd("git status")?
56+
.current_dir(work_dir)
57+
.arg("status")
58+
.arg("--porcelain=v2")
59+
.arg("-z")
60+
// Query the whole project with a single pathspec to avoid one-argv-entry-per-file
61+
// command lines that can exceed the platform limit for very large commits.
62+
.arg("--")
63+
.arg(".")
64+
.check(true)
65+
.output()
66+
.await?
67+
.stdout)
68+
}
69+
70+
async fn find_destroyed_symlinks<'a>(
71+
hook: &Hook,
72+
filenames: &[&Path],
73+
entries: impl IntoIterator<Item = Result<OrdinaryChangedEntry<'a>>>,
74+
) -> Result<Vec<&'a Path>> {
75+
if filenames.is_empty() {
76+
return Ok(Vec::new());
77+
}
78+
79+
let filenames = filenames.iter().copied().collect::<FxHashSet<_>>();
80+
let relative_prefix = hook.project().relative_path();
81+
let mut destroyed_links = Vec::new();
82+
83+
for entry in entries {
84+
let entry = entry?;
85+
// `git status -z` reports paths relative to the repository root, so strip the project
86+
// prefix before comparing against the requested filenames.
87+
let Ok(entry_path) = entry.path.strip_prefix(relative_prefix) else {
88+
continue;
89+
};
90+
if !filenames.contains(entry_path) {
91+
continue;
92+
}
93+
94+
// We only care about entries that used to be symlinks in HEAD but are
95+
// now staged as regular files. Still-a-symlink entries are fine, and a
96+
// deleted symlink is not a "destroyed symlink" case.
97+
if entry.head_mode != PERMS_LINK
98+
|| entry.index_mode == PERMS_LINK
99+
|| entry.index_mode == PERMS_NONEXIST
100+
{
101+
continue;
102+
}
103+
104+
if is_destroyed_symlink(hook.work_dir(), &entry).await? {
105+
destroyed_links.push(entry_path);
106+
}
107+
}
108+
109+
Ok(destroyed_links)
110+
}
111+
// Parsed from `git status --porcelain=v2` ordinary changed entries:
112+
// `1 <XY> <sub> <mH> <mI> <mW> <hH> <hI> <path>`
113+
// See: https://git-scm.com/docs/git-status#_changed_tracked_entries
114+
#[derive(Debug, PartialEq, Eq)]
115+
struct OrdinaryChangedEntry<'a> {
116+
// `<mH>`: The octal file mode in HEAD.
117+
head_mode: u32,
118+
// `<mI>`: The octal file mode in the index.
119+
index_mode: u32,
120+
// `<hH>`: The object name in HEAD.
121+
head_hash: &'a str,
122+
// `<hI>`: The object name in the index.
123+
index_hash: &'a str,
124+
// `<path>`: The pathname, reported relative to the repository root when
125+
// using `git status --porcelain=v2 -z`.
126+
path: &'a Path,
127+
}
128+
129+
fn parse_ordinary_changed_entry(line: &[u8]) -> Result<Option<OrdinaryChangedEntry<'_>>> {
130+
if line.is_empty() {
131+
return Ok(None);
132+
}
133+
134+
let mut fields = line.splitn(9, |&byte| byte == b' ');
135+
let mut next_field = || {
136+
fields
137+
.next()
138+
.context("malformed `git status --porcelain=v2` output")
139+
};
140+
let parse_mode = |field| -> Result<u32> { Ok(u32::from_str_radix(str::from_utf8(field)?, 8)?) };
141+
let marker = next_field()?;
142+
// `git status --porcelain=v2` emits several record types. We only parse
143+
// ordinary changed entries (`1 ...`) here and let callers skip the rest.
144+
if marker != ORDINARY_CHANGED_ENTRY_MARKER.as_bytes() {
145+
return Ok(None);
146+
}
147+
148+
let _xy = next_field()?;
149+
let _sub = next_field()?;
150+
let head_mode = parse_mode(next_field()?)?;
151+
let index_mode = parse_mode(next_field()?)?;
152+
let _mode_worktree = next_field()?;
153+
let head_hash = str::from_utf8(next_field()?)?;
154+
let index_hash = str::from_utf8(next_field()?)?;
155+
let path = Path::new(str::from_utf8(next_field()?)?);
156+
157+
Ok(Some(OrdinaryChangedEntry {
158+
head_mode,
159+
index_mode,
160+
head_hash,
161+
index_hash,
162+
path,
163+
}))
164+
}
165+
166+
async fn is_destroyed_symlink(work_dir: &Path, entry: &OrdinaryChangedEntry<'_>) -> Result<bool> {
167+
// If the staged blob is byte-for-byte identical to the old symlink blob, we
168+
// already know this is a destroyed symlink: the path used to be stored as a
169+
// symlink target and is now staged as a regular file with the same contents.
170+
if entry.head_hash == entry.index_hash {
171+
return Ok(true);
172+
}
173+
174+
let index_size = git_object_size(work_dir, entry.index_hash).await?;
175+
let head_size = git_object_size(work_dir, entry.head_hash).await?;
176+
// Formatting hooks may have appended a trailing newline or converted LF to
177+
// CRLF, so allow the staged file to grow by at most two bytes before doing
178+
// the more expensive content comparison.
179+
if index_size > head_size.saturating_add(2) {
180+
return Ok(false);
181+
}
182+
183+
let head_content = git_object_content(work_dir, entry.head_hash).await?;
184+
let index_content = git_object_content(work_dir, entry.index_hash).await?;
185+
186+
// Match upstream behavior by ignoring trailing ASCII whitespace here. That
187+
// keeps "path", "path\n", and "path\r\n" in the destroyed-symlink bucket.
188+
Ok(head_content.trim_ascii_end() == index_content.trim_ascii_end())
189+
}
190+
191+
async fn git_object_size(work_dir: &Path, object: &str) -> Result<u64> {
192+
let output = git::git_cmd("git cat-file")?
193+
.current_dir(work_dir)
194+
.arg("cat-file")
195+
.arg("-s")
196+
.arg(object)
197+
.check(true)
198+
.output()
199+
.await?;
200+
201+
Ok(str::from_utf8(&output.stdout)?.trim_ascii().parse()?)
202+
}
203+
204+
async fn git_object_content(work_dir: &Path, object: &str) -> Result<Vec<u8>> {
205+
Ok(git::git_cmd("git cat-file")?
206+
.current_dir(work_dir)
207+
.arg("cat-file")
208+
.arg("-p")
209+
.arg(object)
210+
.check(true)
211+
.output()
212+
.await?
213+
.stdout)
214+
}
215+
216+
#[cfg(test)]
217+
mod tests {
218+
use super::*;
219+
220+
#[test]
221+
fn parse_ordinary_changed_entry_supports_spaces_in_paths() -> Result<()> {
222+
let entry = parse_ordinary_changed_entry(
223+
b"1 M. N... 120000 100644 100644 headhash indexhash path with spaces.txt",
224+
)?
225+
.expect("entry should parse");
226+
227+
assert_eq!(entry.head_mode, PERMS_LINK);
228+
assert_eq!(entry.index_mode, 0o100_644);
229+
assert_eq!(entry.head_hash, "headhash");
230+
assert_eq!(entry.index_hash, "indexhash");
231+
assert_eq!(entry.path, Path::new("path with spaces.txt"));
232+
233+
Ok(())
234+
}
235+
}

crates/prek/src/hooks/pre_commit_hooks/mod.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ mod check_toml;
1616
mod check_vcs_permalinks;
1717
mod check_xml;
1818
mod check_yaml;
19+
mod destroyed_symlinks;
1920
mod detect_private_key;
2021
mod file_contents_sorter;
2122
mod fix_byte_order_marker;
@@ -36,6 +37,7 @@ pub(crate) use check_toml::check_toml;
3637
pub(crate) use check_vcs_permalinks::check_vcs_permalinks;
3738
pub(crate) use check_xml::check_xml;
3839
pub(crate) use check_yaml::check_yaml;
40+
pub(crate) use destroyed_symlinks::destroyed_symlinks;
3941
pub(crate) use detect_private_key::detect_private_key;
4042
pub(crate) use file_contents_sorter::file_contents_sorter;
4143
pub(crate) use fix_byte_order_marker::fix_byte_order_marker;
@@ -62,6 +64,7 @@ pub(crate) enum PreCommitHooks {
6264
CheckToml,
6365
CheckXml,
6466
CheckYaml,
67+
DestroyedSymlinks,
6568
MixedLineEnding,
6669
DetectPrivateKey,
6770
NoCommitToBranch,
@@ -98,6 +101,7 @@ impl PreCommitHooks {
98101
Self::CheckToml => check_toml(hook, filenames).await,
99102
Self::CheckYaml => check_yaml(hook, filenames).await,
100103
Self::CheckXml => check_xml(hook, filenames).await,
104+
Self::DestroyedSymlinks => destroyed_symlinks(hook, filenames).await,
101105
Self::MixedLineEnding => mixed_line_ending(hook, filenames).await,
102106
Self::DetectPrivateKey => detect_private_key(hook, filenames).await,
103107
Self::NoCommitToBranch => no_commit_to_branch(hook).await,

0 commit comments

Comments
 (0)