|
| 1 | +use std::io::Write as _; |
| 2 | +use std::path::Path; |
| 3 | +use std::str; |
| 4 | + |
| 5 | +use anyhow::{Context, Result}; |
| 6 | +use rustc_hash::FxHashSet; |
| 7 | + |
| 8 | +use crate::git; |
| 9 | +use crate::hook::Hook; |
| 10 | + |
| 11 | +const ORDINARY_CHANGED_ENTRY_MARKER: &str = "1"; |
| 12 | +const PERMS_LINK: u32 = 0o120_000; |
| 13 | +const PERMS_NONEXIST: u32 = 0; |
| 14 | + |
| 15 | +pub(crate) async fn destroyed_symlinks(hook: &Hook, filenames: &[&Path]) -> Result<(i32, Vec<u8>)> { |
| 16 | + let status_output = git_status_output(hook.work_dir()).await?; |
| 17 | + let entries = status_output |
| 18 | + .split(|&byte| byte == b'\0') |
| 19 | + .filter_map(|entry| match parse_ordinary_changed_entry(entry) { |
| 20 | + Ok(Some(entry)) => Some(Ok(entry)), |
| 21 | + Ok(None) => None, |
| 22 | + Err(err) => Some(Err(err)), |
| 23 | + }); |
| 24 | + |
| 25 | + let destroyed_links = find_destroyed_symlinks(hook, filenames, entries).await?; |
| 26 | + if destroyed_links.is_empty() { |
| 27 | + return Ok((0, Vec::new())); |
| 28 | + } |
| 29 | + |
| 30 | + let mut output = Vec::new(); |
| 31 | + writeln!(output, "Destroyed symlinks:")?; |
| 32 | + for destroyed_link in &destroyed_links { |
| 33 | + writeln!(output, "- {}", destroyed_link.display())?; |
| 34 | + } |
| 35 | + let destroyed_links_shell = destroyed_links |
| 36 | + .iter() |
| 37 | + .map(|path| path.to_string_lossy().into_owned()) |
| 38 | + .collect::<Vec<_>>(); |
| 39 | + writeln!(output, "You should unstage affected files:")?; |
| 40 | + writeln!( |
| 41 | + output, |
| 42 | + "\tgit reset HEAD -- {}", |
| 43 | + shlex::try_join(destroyed_links_shell.iter().map(String::as_str))? |
| 44 | + )?; |
| 45 | + writeln!( |
| 46 | + output, |
| 47 | + "And retry commit. As a long term solution you may try to explicitly tell git that your environment does not support symlinks:" |
| 48 | + )?; |
| 49 | + writeln!(output, "\tgit config core.symlinks false")?; |
| 50 | + |
| 51 | + Ok((1, output)) |
| 52 | +} |
| 53 | + |
| 54 | +async fn git_status_output(work_dir: &Path) -> Result<Vec<u8>> { |
| 55 | + Ok(git::git_cmd("git status")? |
| 56 | + .current_dir(work_dir) |
| 57 | + .arg("status") |
| 58 | + .arg("--porcelain=v2") |
| 59 | + .arg("-z") |
| 60 | + // Query the whole project with a single pathspec to avoid one-argv-entry-per-file |
| 61 | + // command lines that can exceed the platform limit for very large commits. |
| 62 | + .arg("--") |
| 63 | + .arg(".") |
| 64 | + .check(true) |
| 65 | + .output() |
| 66 | + .await? |
| 67 | + .stdout) |
| 68 | +} |
| 69 | + |
| 70 | +async fn find_destroyed_symlinks<'a>( |
| 71 | + hook: &Hook, |
| 72 | + filenames: &[&Path], |
| 73 | + entries: impl IntoIterator<Item = Result<OrdinaryChangedEntry<'a>>>, |
| 74 | +) -> Result<Vec<&'a Path>> { |
| 75 | + if filenames.is_empty() { |
| 76 | + return Ok(Vec::new()); |
| 77 | + } |
| 78 | + |
| 79 | + let filenames = filenames.iter().copied().collect::<FxHashSet<_>>(); |
| 80 | + let relative_prefix = hook.project().relative_path(); |
| 81 | + let mut destroyed_links = Vec::new(); |
| 82 | + |
| 83 | + for entry in entries { |
| 84 | + let entry = entry?; |
| 85 | + // `git status -z` reports paths relative to the repository root, so strip the project |
| 86 | + // prefix before comparing against the requested filenames. |
| 87 | + let Ok(entry_path) = entry.path.strip_prefix(relative_prefix) else { |
| 88 | + continue; |
| 89 | + }; |
| 90 | + if !filenames.contains(entry_path) { |
| 91 | + continue; |
| 92 | + } |
| 93 | + |
| 94 | + // We only care about entries that used to be symlinks in HEAD but are |
| 95 | + // now staged as regular files. Still-a-symlink entries are fine, and a |
| 96 | + // deleted symlink is not a "destroyed symlink" case. |
| 97 | + if entry.head_mode != PERMS_LINK |
| 98 | + || entry.index_mode == PERMS_LINK |
| 99 | + || entry.index_mode == PERMS_NONEXIST |
| 100 | + { |
| 101 | + continue; |
| 102 | + } |
| 103 | + |
| 104 | + if is_destroyed_symlink(hook.work_dir(), &entry).await? { |
| 105 | + destroyed_links.push(entry_path); |
| 106 | + } |
| 107 | + } |
| 108 | + |
| 109 | + Ok(destroyed_links) |
| 110 | +} |
| 111 | +// Parsed from `git status --porcelain=v2` ordinary changed entries: |
| 112 | +// `1 <XY> <sub> <mH> <mI> <mW> <hH> <hI> <path>` |
| 113 | +// See: https://git-scm.com/docs/git-status#_changed_tracked_entries |
| 114 | +#[derive(Debug, PartialEq, Eq)] |
| 115 | +struct OrdinaryChangedEntry<'a> { |
| 116 | + // `<mH>`: The octal file mode in HEAD. |
| 117 | + head_mode: u32, |
| 118 | + // `<mI>`: The octal file mode in the index. |
| 119 | + index_mode: u32, |
| 120 | + // `<hH>`: The object name in HEAD. |
| 121 | + head_hash: &'a str, |
| 122 | + // `<hI>`: The object name in the index. |
| 123 | + index_hash: &'a str, |
| 124 | + // `<path>`: The pathname, reported relative to the repository root when |
| 125 | + // using `git status --porcelain=v2 -z`. |
| 126 | + path: &'a Path, |
| 127 | +} |
| 128 | + |
| 129 | +fn parse_ordinary_changed_entry(line: &[u8]) -> Result<Option<OrdinaryChangedEntry<'_>>> { |
| 130 | + if line.is_empty() { |
| 131 | + return Ok(None); |
| 132 | + } |
| 133 | + |
| 134 | + let mut fields = line.splitn(9, |&byte| byte == b' '); |
| 135 | + let mut next_field = || { |
| 136 | + fields |
| 137 | + .next() |
| 138 | + .context("malformed `git status --porcelain=v2` output") |
| 139 | + }; |
| 140 | + let parse_mode = |field| -> Result<u32> { Ok(u32::from_str_radix(str::from_utf8(field)?, 8)?) }; |
| 141 | + let marker = next_field()?; |
| 142 | + // `git status --porcelain=v2` emits several record types. We only parse |
| 143 | + // ordinary changed entries (`1 ...`) here and let callers skip the rest. |
| 144 | + if marker != ORDINARY_CHANGED_ENTRY_MARKER.as_bytes() { |
| 145 | + return Ok(None); |
| 146 | + } |
| 147 | + |
| 148 | + let _xy = next_field()?; |
| 149 | + let _sub = next_field()?; |
| 150 | + let head_mode = parse_mode(next_field()?)?; |
| 151 | + let index_mode = parse_mode(next_field()?)?; |
| 152 | + let _mode_worktree = next_field()?; |
| 153 | + let head_hash = str::from_utf8(next_field()?)?; |
| 154 | + let index_hash = str::from_utf8(next_field()?)?; |
| 155 | + let path = Path::new(str::from_utf8(next_field()?)?); |
| 156 | + |
| 157 | + Ok(Some(OrdinaryChangedEntry { |
| 158 | + head_mode, |
| 159 | + index_mode, |
| 160 | + head_hash, |
| 161 | + index_hash, |
| 162 | + path, |
| 163 | + })) |
| 164 | +} |
| 165 | + |
| 166 | +async fn is_destroyed_symlink(work_dir: &Path, entry: &OrdinaryChangedEntry<'_>) -> Result<bool> { |
| 167 | + // If the staged blob is byte-for-byte identical to the old symlink blob, we |
| 168 | + // already know this is a destroyed symlink: the path used to be stored as a |
| 169 | + // symlink target and is now staged as a regular file with the same contents. |
| 170 | + if entry.head_hash == entry.index_hash { |
| 171 | + return Ok(true); |
| 172 | + } |
| 173 | + |
| 174 | + let index_size = git_object_size(work_dir, entry.index_hash).await?; |
| 175 | + let head_size = git_object_size(work_dir, entry.head_hash).await?; |
| 176 | + // Formatting hooks may have appended a trailing newline or converted LF to |
| 177 | + // CRLF, so allow the staged file to grow by at most two bytes before doing |
| 178 | + // the more expensive content comparison. |
| 179 | + if index_size > head_size.saturating_add(2) { |
| 180 | + return Ok(false); |
| 181 | + } |
| 182 | + |
| 183 | + let head_content = git_object_content(work_dir, entry.head_hash).await?; |
| 184 | + let index_content = git_object_content(work_dir, entry.index_hash).await?; |
| 185 | + |
| 186 | + // Match upstream behavior by ignoring trailing ASCII whitespace here. That |
| 187 | + // keeps "path", "path\n", and "path\r\n" in the destroyed-symlink bucket. |
| 188 | + Ok(head_content.trim_ascii_end() == index_content.trim_ascii_end()) |
| 189 | +} |
| 190 | + |
| 191 | +async fn git_object_size(work_dir: &Path, object: &str) -> Result<u64> { |
| 192 | + let output = git::git_cmd("git cat-file")? |
| 193 | + .current_dir(work_dir) |
| 194 | + .arg("cat-file") |
| 195 | + .arg("-s") |
| 196 | + .arg(object) |
| 197 | + .check(true) |
| 198 | + .output() |
| 199 | + .await?; |
| 200 | + |
| 201 | + Ok(str::from_utf8(&output.stdout)?.trim_ascii().parse()?) |
| 202 | +} |
| 203 | + |
| 204 | +async fn git_object_content(work_dir: &Path, object: &str) -> Result<Vec<u8>> { |
| 205 | + Ok(git::git_cmd("git cat-file")? |
| 206 | + .current_dir(work_dir) |
| 207 | + .arg("cat-file") |
| 208 | + .arg("-p") |
| 209 | + .arg(object) |
| 210 | + .check(true) |
| 211 | + .output() |
| 212 | + .await? |
| 213 | + .stdout) |
| 214 | +} |
| 215 | + |
| 216 | +#[cfg(test)] |
| 217 | +mod tests { |
| 218 | + use super::*; |
| 219 | + |
| 220 | + #[test] |
| 221 | + fn parse_ordinary_changed_entry_supports_spaces_in_paths() -> Result<()> { |
| 222 | + let entry = parse_ordinary_changed_entry( |
| 223 | + b"1 M. N... 120000 100644 100644 headhash indexhash path with spaces.txt", |
| 224 | + )? |
| 225 | + .expect("entry should parse"); |
| 226 | + |
| 227 | + assert_eq!(entry.head_mode, PERMS_LINK); |
| 228 | + assert_eq!(entry.index_mode, 0o100_644); |
| 229 | + assert_eq!(entry.head_hash, "headhash"); |
| 230 | + assert_eq!(entry.index_hash, "indexhash"); |
| 231 | + assert_eq!(entry.path, Path::new("path with spaces.txt")); |
| 232 | + |
| 233 | + Ok(()) |
| 234 | + } |
| 235 | +} |
0 commit comments