Skip to content

Commit a214d8f

Browse files
authored
fix two problems in the Dir module: (#2674)
readdir_r does not work well on systems where {NAME_MAX} can vary. The main reason to use it is for thread-safety. However, POSIX 1003.1-2024 Issue 8 makes readdir_r obsolete and clarifies that readdir must be thread-safe except when concurrent calls are made to the same directory stream. So all applications should prefer it now. The original rationale for Nix using readdir_r[^1], in addition to thread safety, was that the Rust standard library used it. But that is no longer the case. So Nix should switch to plain readdir. The second problem is that on some operating systems, libc::dirent is an open-ended structure where the last field (d_name) has a size of 1 byte, but additional data follow the end of the structure. Nix currently relies on copying the libc::dirent structure, which can't possibly work on those platforms. Fix that bug by copying the d_name field to an owned CString. [^1]: f9ebcb7 , from 2018 Fixes #2673 Fixes #1783
1 parent 09d66fe commit a214d8f

File tree

2 files changed

+76
-66
lines changed

2 files changed

+76
-66
lines changed

changelog/2674.fixed.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Fixed the Dir module on NTO, Solaris, Hurd, and possibly other platforms. The
2+
d_name field was not copied correctly on those platforms. For some other
3+
platforms, it could be copied incorrectly for files with very long pathnames.

src/dir.rs

Lines changed: 73 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -5,16 +5,10 @@ use crate::fcntl::{self, OFlag};
55
use crate::sys;
66
use crate::{NixPath, Result};
77
use cfg_if::cfg_if;
8-
use std::ffi;
8+
use std::ffi::{CStr, CString};
99
use std::os::unix::io::{AsRawFd, IntoRawFd, RawFd};
1010
use std::ptr;
1111

12-
#[cfg(target_os = "linux")]
13-
use libc::{dirent64 as dirent, readdir64_r as readdir_r};
14-
15-
#[cfg(not(target_os = "linux"))]
16-
use libc::{dirent, readdir_r};
17-
1812
/// An open directory.
1913
///
2014
/// This is a lower-level interface than [`std::fs::ReadDir`]. Notable differences:
@@ -26,7 +20,7 @@ use libc::{dirent, readdir_r};
2620
/// * can be iterated through multiple times without closing and reopening the file
2721
/// descriptor. Each iteration rewinds when finished.
2822
/// * returns entries for `.` (current directory) and `..` (parent directory).
29-
/// * returns entries' names as a [`CStr`][cstr] (no allocation or conversion beyond whatever libc
23+
/// * returns entries' names as a [`CStr`] (no allocation or conversion beyond whatever libc
3024
/// does).
3125
///
3226
/// [AsFd]: std::os::fd::AsFd
@@ -124,10 +118,7 @@ impl Dir {
124118
}
125119
}
126120

127-
// `Dir` is not `Sync`. With the current implementation, it could be, but according to
128-
// https://www.gnu.org/software/libc/manual/html_node/Reading_002fClosing-Directory.html,
129-
// future versions of POSIX are likely to obsolete `readdir_r` and specify that it's unsafe to
130-
// call `readdir` simultaneously from multiple threads.
121+
// `Dir` is not `Sync` because it's unsafe to call `readdir` simultaneously from multiple threads.
131122
//
132123
// `Dir` is safe to pass from one thread to another, as it's not reference-counted.
133124
unsafe impl Send for Dir {}
@@ -160,29 +151,22 @@ impl Drop for Dir {
160151
}
161152

162153
// The pass by mut is technically needless only because the inner NonNull is
163-
// Copy. But philosophically we're mutating the Dir, so we pass by mut.
154+
// Copy. But we are actually mutating the Dir, so we pass by mut.
164155
#[allow(clippy::needless_pass_by_ref_mut)]
165-
fn next(dir: &mut Dir) -> Option<Result<Entry>> {
156+
fn readdir(dir: &mut Dir) -> Option<Result<Entry>> {
157+
Errno::clear();
166158
unsafe {
167-
// Note: POSIX specifies that portable applications should dynamically allocate a
168-
// buffer with room for a `d_name` field of size `pathconf(..., _PC_NAME_MAX)` plus 1
169-
// for the NUL byte. It doesn't look like the std library does this; it just uses
170-
// fixed-sized buffers (and libc's dirent seems to be sized so this is appropriate).
171-
// Probably fine here too then.
172-
let mut ent = std::mem::MaybeUninit::<dirent>::uninit();
173-
let mut result = ptr::null_mut();
174-
if let Err(e) = Errno::result(readdir_r(
175-
dir.0.as_ptr(),
176-
ent.as_mut_ptr(),
177-
&mut result,
178-
)) {
179-
return Some(Err(e));
180-
}
181-
if result.is_null() {
182-
return None;
159+
let de = libc::readdir(dir.0.as_ptr());
160+
if de.is_null() {
161+
if Errno::last_raw() == 0 {
162+
// EOF
163+
None
164+
} else {
165+
Some(Err(Errno::last()))
166+
}
167+
} else {
168+
Some(Ok(Entry::from_raw(&*de)))
183169
}
184-
assert_eq!(result, ent.as_mut_ptr());
185-
Some(Ok(Entry(ent.assume_init())))
186170
}
187171
}
188172

@@ -194,7 +178,7 @@ impl Iterator for Iter<'_> {
194178
type Item = Result<Entry>;
195179

196180
fn next(&mut self) -> Option<Self::Item> {
197-
next(self.0)
181+
readdir(self.0)
198182
}
199183
}
200184

@@ -212,7 +196,7 @@ impl Iterator for OwningIter {
212196
type Item = Result<Entry>;
213197

214198
fn next(&mut self) -> Option<Self::Item> {
215-
next(&mut self.0)
199+
readdir(&mut self.0)
216200
}
217201
}
218202

@@ -252,9 +236,18 @@ impl IntoIterator for Dir {
252236
/// A directory entry, similar to `std::fs::DirEntry`.
253237
///
254238
/// Note that unlike the std version, this may represent the `.` or `..` entries.
255-
#[derive(Copy, Clone, Debug, Eq, Hash, PartialEq)]
256-
#[repr(transparent)]
257-
pub struct Entry(dirent);
239+
#[derive(Clone, Debug, Eq, Hash, PartialEq)]
240+
pub struct Entry {
241+
ino: u64,
242+
type_: Option<Type>,
243+
// On some platforms libc::dirent is a "flexible-length structure", where there may be
244+
// data beyond the end of the struct definition. So it isn't possible to copy it and subsequently
245+
// dereference the copy's d_name field. Nor is it possible for Entry to wrap a &libc::dirent.
246+
// At least, not if it is to work with the Iterator trait. Because the Entry would need to
247+
// maintain a mutable reference to the Dir, which would conflict with the iterator's mutable
248+
// reference to the same Dir. So we're forced to copy the name here.
249+
name: CString,
250+
}
258251

259252
/// Type of file referenced by a directory entry
260253
#[derive(Copy, Clone, Debug, Eq, Hash, PartialEq)]
@@ -277,10 +270,28 @@ pub enum Type {
277270

278271
impl Entry {
279272
/// Returns the inode number (`d_ino`) of the underlying `dirent`.
273+
pub fn ino(&self) -> u64 {
274+
self.ino
275+
}
276+
277+
/// Returns the bare file name of this directory entry without any other leading path component.
278+
pub fn file_name(&self) -> &CStr {
279+
self.name.as_c_str()
280+
}
281+
282+
/// Returns the type of this directory entry, if known.
283+
///
284+
/// See platform `readdir(3)` or `dirent(5)` manpage for when the file type is known;
285+
/// notably, some Linux filesystems don't implement this. The caller should use `stat` or
286+
/// `fstat` if this returns `None`.
287+
pub fn file_type(&self) -> Option<Type> {
288+
self.type_
289+
}
290+
280291
#[allow(clippy::useless_conversion)] // Not useless on all OSes
281292
// The cast is not unnecessary on all platforms.
282293
#[allow(clippy::unnecessary_cast)]
283-
pub fn ino(&self) -> u64 {
294+
fn from_raw(de: &libc::dirent) -> Self {
284295
cfg_if! {
285296
if #[cfg(any(target_os = "aix",
286297
target_os = "emscripten",
@@ -291,38 +302,34 @@ impl Entry {
291302
solarish,
292303
linux_android,
293304
apple_targets))] {
294-
self.0.d_ino as u64
305+
let ino = de.d_ino as u64;
295306
} else {
296-
u64::from(self.0.d_fileno)
307+
let ino = u64::from(de.d_fileno);
297308
}
298309
}
299-
}
300-
301-
/// Returns the bare file name of this directory entry without any other leading path component.
302-
pub fn file_name(&self) -> &ffi::CStr {
303-
unsafe { ffi::CStr::from_ptr(self.0.d_name.as_ptr()) }
304-
}
305-
306-
/// Returns the type of this directory entry, if known.
307-
///
308-
/// See platform `readdir(3)` or `dirent(5)` manpage for when the file type is known;
309-
/// notably, some Linux filesystems don't implement this. The caller should use `stat` or
310-
/// `fstat` if this returns `None`.
311-
pub fn file_type(&self) -> Option<Type> {
312-
#[cfg(not(any(solarish, target_os = "aix", target_os = "haiku")))]
313-
match self.0.d_type {
314-
libc::DT_FIFO => Some(Type::Fifo),
315-
libc::DT_CHR => Some(Type::CharacterDevice),
316-
libc::DT_DIR => Some(Type::Directory),
317-
libc::DT_BLK => Some(Type::BlockDevice),
318-
libc::DT_REG => Some(Type::File),
319-
libc::DT_LNK => Some(Type::Symlink),
320-
libc::DT_SOCK => Some(Type::Socket),
321-
/* libc::DT_UNKNOWN | */ _ => None,
310+
cfg_if! {
311+
if #[cfg(not(any(solarish, target_os = "aix", target_os = "haiku")))] {
312+
let type_ = match de.d_type {
313+
libc::DT_FIFO => Some(Type::Fifo),
314+
libc::DT_CHR => Some(Type::CharacterDevice),
315+
libc::DT_DIR => Some(Type::Directory),
316+
libc::DT_BLK => Some(Type::BlockDevice),
317+
libc::DT_REG => Some(Type::File),
318+
libc::DT_LNK => Some(Type::Symlink),
319+
libc::DT_SOCK => Some(Type::Socket),
320+
/* libc::DT_UNKNOWN | */ _ => None,
321+
};
322+
} else {
323+
// illumos, Solaris, and Haiku systems do not have the d_type member at all:
324+
#[cfg(any(solarish, target_os = "aix", target_os = "haiku"))]
325+
let type_ = None;
326+
}
322327
}
328+
// Annoyingly, since libc::dirent is open-ended, the easiest way to read the name field in
329+
// Rust is to treat it like a pointer.
330+
// Safety: safe because we knod that de.d_name is in valid memory.
331+
let name = unsafe { CStr::from_ptr(de.d_name.as_ptr()) }.to_owned();
323332

324-
// illumos, Solaris, and Haiku systems do not have the d_type member at all:
325-
#[cfg(any(solarish, target_os = "aix", target_os = "haiku"))]
326-
None
333+
Entry { ino, type_, name }
327334
}
328335
}

0 commit comments

Comments
 (0)