Skip to content

Commit eff4c00

Browse files
committed
feat: add validation for path components
That way it's easier to assure that forbidden names are never used as part of path components.
1 parent 0d78db2 commit eff4c00

File tree

4 files changed

+516
-0
lines changed

4 files changed

+516
-0
lines changed

gix-validate/src/lib.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,7 @@ pub mod tag;
1313
///
1414
#[allow(clippy::empty_docs)]
1515
pub mod submodule;
16+
17+
///
18+
#[allow(clippy::empty_docs)]
19+
pub mod path;

gix-validate/src/path.rs

Lines changed: 246 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,246 @@
1+
use bstr::{BStr, ByteSlice};
2+
3+
///
4+
#[allow(clippy::empty_docs)]
5+
pub mod component {
6+
/// The error returned by [`component()`](super::component()).
7+
#[derive(Debug, thiserror::Error)]
8+
#[allow(missing_docs)]
9+
pub enum Error {
10+
#[error("A path component must not be empty")]
11+
Empty,
12+
#[error("Path separators like / or \\ are not allowed")]
13+
PathSeparator,
14+
#[error("Window path prefixes are not allowed")]
15+
WindowsPathPrefix,
16+
#[error("The .git name may never be used")]
17+
DotGitDir,
18+
#[error("The .gitmodules file must not be a symlink")]
19+
SymlinkedGitModules,
20+
}
21+
22+
/// Further specify what to check for in [`component()`](super::component())
23+
///
24+
/// Note that the `Default` implementation maximizes safety by enabling all protections.
25+
#[derive(Debug, Copy, Clone)]
26+
pub struct Options {
27+
/// This flag should be turned on when on Windows, but can be turned on when on other platforms
28+
/// as well to prevent path components that can cause trouble on Windows.
29+
pub protect_windows: bool,
30+
/// If `true`, protections for the MacOS HFS+ filesystem will be active, checking for
31+
/// special directories that we should never write while ignoring codepoints just like HFS+ would.
32+
///
33+
/// This field is equivalent to `core.protectHFS`.
34+
pub protect_hfs: bool,
35+
/// If `true`, protections for Windows NTFS specific features will be active. This adds special handling
36+
/// for `8.3` filenames and alternate data streams, both of which could be used to mask th etrue name of
37+
/// what would be created on disk.
38+
///
39+
/// This field is equivalent to `core.protectNTFS`.
40+
pub protect_ntfs: bool,
41+
}
42+
43+
impl Default for Options {
44+
fn default() -> Self {
45+
Options {
46+
protect_windows: true,
47+
protect_hfs: true,
48+
protect_ntfs: true,
49+
}
50+
}
51+
}
52+
53+
/// The mode of the component, if it's the leaf of a path.
54+
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
55+
pub enum Mode {
56+
/// The item is a symbolic link.
57+
Symlink,
58+
}
59+
}
60+
61+
/// Assure the given `input` resembles a valid name for a tree or blob, and in that sense, a path component.
62+
/// `mode` indicates the kind of `input` and it should be `Some` if `input` is the last component in the underlying
63+
/// path. Currently, this is only used to determine if `.gitmodules` is a symlink.
64+
///
65+
/// `input` must not make it possible to exit the repository, or to specify absolute paths.
66+
pub fn component(
67+
input: &BStr,
68+
mode: Option<component::Mode>,
69+
component::Options {
70+
protect_windows,
71+
protect_hfs,
72+
protect_ntfs,
73+
}: component::Options,
74+
) -> Result<&BStr, component::Error> {
75+
if input.is_empty() {
76+
return Err(component::Error::Empty);
77+
}
78+
if protect_windows {
79+
if input.find_byteset(b"/\\").is_some() {
80+
return Err(component::Error::PathSeparator);
81+
}
82+
if input.chars().skip(1).next() == Some(':') {
83+
return Err(component::Error::WindowsPathPrefix);
84+
}
85+
} else if input.find_byte(b'/').is_some() {
86+
return Err(component::Error::PathSeparator);
87+
}
88+
if protect_hfs {
89+
if is_dot_hfs(input, "git") {
90+
return Err(component::Error::DotGitDir);
91+
}
92+
if is_symlink(mode) && is_dot_hfs(input, "gitmodules") {
93+
return Err(component::Error::SymlinkedGitModules);
94+
}
95+
}
96+
97+
if protect_ntfs {
98+
if is_dot_git_ntfs(input) {
99+
return Err(component::Error::DotGitDir);
100+
}
101+
if is_symlink(mode) && is_dot_ntfs(input, "gitmodules", "gi7eba") {
102+
return Err(component::Error::SymlinkedGitModules);
103+
}
104+
}
105+
106+
if !(protect_hfs | protect_ntfs) {
107+
if input.eq_ignore_ascii_case(b".git") {
108+
return Err(component::Error::DotGitDir);
109+
}
110+
if is_symlink(mode) && input.eq_ignore_ascii_case(b".gitmodules") {
111+
return Err(component::Error::SymlinkedGitModules);
112+
}
113+
}
114+
Ok(input)
115+
}
116+
117+
fn is_symlink(mode: Option<component::Mode>) -> bool {
118+
mode.map_or(false, |m| m == component::Mode::Symlink)
119+
}
120+
121+
fn is_dot_hfs(input: &BStr, search_case_insensitive: &str) -> bool {
122+
let mut input = input.chars().filter(|c| match *c as u32 {
123+
0x200c | /* ZERO WIDTH NON-JOINER */
124+
0x200d | /* ZERO WIDTH JOINER */
125+
0x200e | /* LEFT-TO-RIGHT MARK */
126+
0x200f | /* RIGHT-TO-LEFT MARK */
127+
0x202a | /* LEFT-TO-RIGHT EMBEDDING */
128+
0x202b | /* RIGHT-TO-LEFT EMBEDDING */
129+
0x202c | /* POP DIRECTIONAL FORMATTING */
130+
0x202d | /* LEFT-TO-RIGHT OVERRIDE */
131+
0x202e | /* RIGHT-TO-LEFT OVERRIDE */
132+
0x206a | /* INHIBIT SYMMETRIC SWAPPING */
133+
0x206b | /* ACTIVATE SYMMETRIC SWAPPING */
134+
0x206c | /* INHIBIT ARABIC FORM SHAPING */
135+
0x206d | /* ACTIVATE ARABIC FORM SHAPING */
136+
0x206e | /* NATIONAL DIGIT SHAPES */
137+
0x206f | /* NOMINAL DIGIT SHAPES */
138+
0xfeff => false, /* ZERO WIDTH NO-BREAK SPACE */
139+
_ => true
140+
});
141+
if input.next() != Some('.') {
142+
return false;
143+
}
144+
145+
let mut comp = search_case_insensitive.chars();
146+
loop {
147+
match (comp.next(), input.next()) {
148+
(Some(a), Some(b)) => {
149+
if !a.eq_ignore_ascii_case(&b) {
150+
return false;
151+
}
152+
}
153+
(None, None) => return true,
154+
_ => return false,
155+
}
156+
}
157+
}
158+
159+
fn is_dot_git_ntfs(input: &BStr) -> bool {
160+
if input
161+
.get(..4)
162+
.map_or(false, |input| input.eq_ignore_ascii_case(b".git"))
163+
{
164+
return is_done_ntfs(input.get(4..));
165+
}
166+
if input
167+
.get(..5)
168+
.map_or(false, |input| input.eq_ignore_ascii_case(b"git~1"))
169+
{
170+
return is_done_ntfs(input.get(5..));
171+
}
172+
false
173+
}
174+
175+
fn is_dot_ntfs(input: &BStr, search_case_insensitive: &str, ntfs_shortname_prefix: &str) -> bool {
176+
if input.get(0) == Some(&b'.') {
177+
let end_pos = 1 + search_case_insensitive.len();
178+
if input.get(1..end_pos).map_or(false, |input| {
179+
input.eq_ignore_ascii_case(search_case_insensitive.as_bytes())
180+
}) {
181+
is_done_ntfs(input.get(end_pos..))
182+
} else {
183+
false
184+
}
185+
} else {
186+
let search_case_insensitive: &[u8] = search_case_insensitive.as_bytes();
187+
if search_case_insensitive
188+
.get(..6)
189+
.zip(input.get(..6))
190+
.map_or(false, |(ntfs_prefix, first_6_of_input)| {
191+
first_6_of_input.eq_ignore_ascii_case(ntfs_prefix)
192+
&& input.get(6) == Some(&b'~')
193+
&& input.get(7).map_or(false, |num| num >= &b'1' && num <= &b'4')
194+
})
195+
{
196+
return is_done_ntfs(input.get(8..));
197+
}
198+
199+
let ntfs_shortname_prefix: &[u8] = ntfs_shortname_prefix.as_bytes();
200+
let mut saw_tilde = false;
201+
let mut pos = 0;
202+
while pos < 8 {
203+
let Some(b) = input.get(pos).copied() else {
204+
return false;
205+
};
206+
if saw_tilde {
207+
if b < b'0' || b > b'9' {
208+
return false;
209+
}
210+
} else if b == b'~' {
211+
saw_tilde = true;
212+
pos += 1;
213+
let Some(b) = input.get(pos).copied() else {
214+
return false;
215+
};
216+
if b < b'1' || b > b'9' {
217+
return false;
218+
}
219+
} else if pos >= 6 {
220+
return false;
221+
} else if b & 0x80 == 0x80 {
222+
return false;
223+
} else if ntfs_shortname_prefix
224+
.get(pos)
225+
.map_or(true, |ob| !b.eq_ignore_ascii_case(ob))
226+
{
227+
return false;
228+
}
229+
pos += 1;
230+
}
231+
is_done_ntfs(input.get(pos..))
232+
}
233+
}
234+
235+
fn is_done_ntfs(input: Option<&[u8]>) -> bool {
236+
let Some(input) = input else { return true };
237+
for b in input.bytes() {
238+
if b == b':' {
239+
return true;
240+
}
241+
if b != b' ' && b != b'.' {
242+
return false;
243+
}
244+
}
245+
true
246+
}

0 commit comments

Comments
 (0)