Skip to content

Commit 44ae977

Browse files
committed
du: count reflink disk usage only once
1 parent 2da2c90 commit 44ae977

File tree

5 files changed

+347
-22
lines changed

5 files changed

+347
-22
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/uu/du/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ uucore = { workspace = true, features = [
3131
] }
3232
thiserror = { workspace = true }
3333
fluent = { workspace = true }
34+
libc = { workspace = true }
3435

3536
[target.'cfg(target_os = "windows")'.dependencies]
3637
windows-sys = { workspace = true, features = [

src/uu/du/src/du.rs

Lines changed: 136 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
// For the full copyright and license information, please view the LICENSE
44
// file that was distributed with this source code.
55
//
6-
// spell-checker:ignore fstatat openat dirfd
6+
// spell-checker:ignore dedupe dirfd fiemap fstatat openat reflinks
77

88
use clap::{Arg, ArgAction, ArgMatches, Command, builder::PossibleValue};
99
use glob::Pattern;
@@ -21,27 +21,32 @@ use std::str::FromStr;
2121
use std::sync::mpsc;
2222
use std::thread;
2323
use thiserror::Error;
24+
2425
use uucore::display::{Quotable, print_verbatim};
2526
use uucore::error::{FromIo, UError, UResult, USimpleError, set_exit_code};
2627
use uucore::fsext::{MetadataTimeField, metadata_get_time};
2728
use uucore::line_ending::LineEnding;
28-
#[cfg(target_os = "linux")]
29-
use uucore::safe_traversal::DirFd;
30-
use uucore::translate;
31-
3229
use uucore::parser::parse_glob;
3330
use uucore::parser::parse_size::{ParseSizeError, parse_size_non_zero_u64, parse_size_u64};
3431
use uucore::parser::shortcut_value_parser::ShortcutValueParser;
32+
#[cfg(target_os = "linux")]
33+
use uucore::safe_traversal::DirFd;
3534
use uucore::time::{FormatSystemTimeFallback, format, format_system_time};
35+
use uucore::translate;
3636
use uucore::{format_usage, show, show_error, show_warning};
3737
#[cfg(windows)]
38-
use windows_sys::Win32::Foundation::HANDLE;
39-
#[cfg(windows)]
40-
use windows_sys::Win32::Storage::FileSystem::{
41-
FILE_ID_128, FILE_ID_INFO, FILE_STANDARD_INFO, FileIdInfo, FileStandardInfo,
42-
GetFileInformationByHandleEx,
38+
use windows_sys::Win32::{
39+
Foundation::HANDLE,
40+
Storage::FileSystem::{
41+
FILE_ID_128, FILE_ID_INFO, FILE_STANDARD_INFO, FileIdInfo, FileStandardInfo,
42+
GetFileInformationByHandleEx,
43+
},
4344
};
4445

46+
pub mod fiemap;
47+
#[cfg(target_os = "linux")]
48+
use crate::fiemap::{FIEMAP_EXTENT_ENCODED, FIEMAP_EXTENT_SHARED, walk_fiemap_extents};
49+
4550
mod options {
4651
pub const HELP: &str = "help";
4752
pub const NULL: &str = "0";
@@ -73,12 +78,15 @@ mod options {
7378
pub const FILE: &str = "FILE";
7479
}
7580

81+
const POSIX_BLOCK_SIZE: u64 = 512;
82+
7683
struct TraversalOptions {
7784
all: bool,
7885
separate_dirs: bool,
7986
one_file_system: bool,
8087
dereference: Deref,
8188
count_links: bool,
89+
dedupe_reflinks: bool,
8290
verbose: bool,
8391
excludes: Vec<Pattern>,
8492
}
@@ -117,6 +125,13 @@ struct FileInfo {
117125
dev_id: u64,
118126
}
119127

128+
#[derive(PartialEq, Eq, Hash, Clone, Copy)]
129+
struct SharedExtentKey {
130+
dev_id: u64,
131+
physical: u64,
132+
length: u64,
133+
}
134+
120135
struct Stat {
121136
path: PathBuf,
122137
size: u64,
@@ -270,6 +285,60 @@ fn get_file_info(path: &Path, _metadata: &Metadata) -> Option<FileInfo> {
270285
result
271286
}
272287

288+
#[cfg(target_os = "linux")]
289+
fn adjust_blocks_for_reflinks(
290+
path: &Path,
291+
dev_id: u64,
292+
blocks: u64,
293+
shared_extents: &mut HashSet<SharedExtentKey>,
294+
) -> u64 {
295+
if blocks == 0 {
296+
return blocks;
297+
}
298+
299+
let Ok(file) = File::open(path) else {
300+
return blocks;
301+
};
302+
303+
let mut dedup_bytes = 0_u64;
304+
305+
if walk_fiemap_extents(&file, 0, |extent| {
306+
if (extent.fe_flags & FIEMAP_EXTENT_SHARED) != 0
307+
&& (extent.fe_flags & FIEMAP_EXTENT_ENCODED) == 0
308+
&& extent.fe_physical != 0
309+
{
310+
let key = SharedExtentKey {
311+
dev_id,
312+
physical: extent.fe_physical,
313+
length: extent.fe_length,
314+
};
315+
316+
if !shared_extents.insert(key) {
317+
dedup_bytes = dedup_bytes.saturating_add(extent.fe_length);
318+
}
319+
}
320+
321+
true
322+
})
323+
.is_err()
324+
{
325+
return blocks;
326+
}
327+
328+
let dedup_blocks = dedup_bytes / POSIX_BLOCK_SIZE;
329+
blocks.saturating_sub(dedup_blocks)
330+
}
331+
332+
#[cfg(not(target_os = "linux"))]
333+
fn adjust_blocks_for_reflinks(
334+
_path: &Path,
335+
_dev_id: u64,
336+
blocks: u64,
337+
_shared_extents: &mut HashSet<SharedExtentKey>,
338+
) -> u64 {
339+
blocks
340+
}
341+
273342
fn block_size_from_env() -> Option<u64> {
274343
for env_var in ["DU_BLOCK_SIZE", "BLOCK_SIZE", "BLOCKSIZE"] {
275344
if let Ok(env_size) = env::var(env_var) {
@@ -287,7 +356,7 @@ fn read_block_size(s: Option<&str>) -> UResult<u64> {
287356
} else if let Some(bytes) = block_size_from_env() {
288357
Ok(bytes)
289358
} else if env::var("POSIXLY_CORRECT").is_ok() {
290-
Ok(512)
359+
Ok(POSIX_BLOCK_SIZE)
291360
} else {
292361
Ok(1024)
293362
}
@@ -301,6 +370,7 @@ fn safe_du(
301370
options: &TraversalOptions,
302371
depth: usize,
303372
seen_inodes: &mut HashSet<FileInfo>,
373+
shared_extents: &mut HashSet<SharedExtentKey>,
304374
print_tx: &mpsc::Sender<UResult<StatPrintInfo>>,
305375
parent_fd: Option<&DirFd>,
306376
) -> Result<Stat, Box<mpsc::SendError<UResult<StatPrintInfo>>>> {
@@ -391,6 +461,11 @@ fn safe_du(
391461
}
392462
};
393463
if !my_stat.metadata.is_dir() {
464+
if options.dedupe_reflinks {
465+
let dev_id = my_stat.inode.map_or(0, |inode| inode.dev_id);
466+
my_stat.blocks =
467+
adjust_blocks_for_reflinks(&my_stat.path, dev_id, my_stat.blocks, shared_extents);
468+
}
394469
return Ok(my_stat);
395470
}
396471

@@ -439,6 +514,7 @@ fn safe_du(
439514
const S_IFMT: u32 = 0o170_000;
440515
const S_IFDIR: u32 = 0o040_000;
441516
const S_IFLNK: u32 = 0o120_000;
517+
const S_IFREG: u32 = 0o100_000;
442518
let is_symlink = (lstat.st_mode & S_IFMT) == S_IFLNK;
443519

444520
// Handle symlinks with -L option
@@ -451,6 +527,7 @@ fn safe_du(
451527
}
452528

453529
let is_dir = (lstat.st_mode & S_IFMT) == S_IFDIR;
530+
let is_regular = (lstat.st_mode & S_IFMT) == S_IFREG;
454531
let entry_stat = lstat;
455532

456533
let file_info = (entry_stat.st_ino != 0).then_some(FileInfo {
@@ -460,7 +537,7 @@ fn safe_du(
460537

461538
// For safe traversal, we need to handle stats differently
462539
// We can't use std::fs::Metadata since that requires the full path
463-
let this_stat = if is_dir {
540+
let mut this_stat = if is_dir {
464541
// For directories, recurse using safe_du
465542
Stat {
466543
path: entry_path.clone(),
@@ -507,6 +584,14 @@ fn safe_du(
507584
seen_inodes.insert(inode);
508585
}
509586

587+
if options.dedupe_reflinks && is_regular {
588+
let dev_id = this_stat
589+
.inode
590+
.map_or(entry_stat.st_dev, |inode| inode.dev_id);
591+
this_stat.blocks =
592+
adjust_blocks_for_reflinks(&entry_path, dev_id, this_stat.blocks, shared_extents);
593+
}
594+
510595
// Process directories recursively
511596
if is_dir {
512597
if options.one_file_system {
@@ -522,6 +607,7 @@ fn safe_du(
522607
options,
523608
depth + 1,
524609
seen_inodes,
610+
shared_extents,
525611
print_tx,
526612
Some(&dir_fd),
527613
)?;
@@ -555,12 +641,13 @@ fn safe_du(
555641
// Only used on non-Linux platforms
556642
// Regular traversal using std::fs
557643
// Used on non-Linux platforms and as fallback for symlinks on Linux
558-
#[allow(clippy::cognitive_complexity)]
644+
#[allow(clippy::cognitive_complexity, clippy::too_many_arguments)]
559645
fn du_regular(
560646
mut my_stat: Stat,
561647
options: &TraversalOptions,
562648
depth: usize,
563649
seen_inodes: &mut HashSet<FileInfo>,
650+
shared_extents: &mut HashSet<SharedExtentKey>,
564651
print_tx: &mpsc::Sender<UResult<StatPrintInfo>>,
565652
ancestors: Option<&mut HashSet<FileInfo>>,
566653
symlink_depth: Option<usize>,
@@ -571,6 +658,15 @@ fn du_regular(
571658
// Maximum symlink depth to prevent infinite loops
572659
const MAX_SYMLINK_DEPTH: usize = 40;
573660

661+
if !my_stat.metadata.is_dir() {
662+
if options.dedupe_reflinks {
663+
let dev_id = my_stat.inode.map_or(0, |inode| inode.dev_id);
664+
my_stat.blocks =
665+
adjust_blocks_for_reflinks(&my_stat.path, dev_id, my_stat.blocks, shared_extents);
666+
}
667+
return Ok(my_stat);
668+
}
669+
574670
// Add current directory to ancestors if it's a directory
575671
let my_inode = if my_stat.metadata.is_dir() {
576672
my_stat.inode
@@ -621,7 +717,7 @@ fn du_regular(
621717
}
622718

623719
match Stat::new(&entry_path, Some(&entry), options) {
624-
Ok(this_stat) => {
720+
Ok(mut this_stat) => {
625721
// Check if symlink with -L points to an ancestor (cycle detection)
626722
if is_symlink
627723
&& options.dereference == Deref::All
@@ -681,6 +777,7 @@ fn du_regular(
681777
options,
682778
depth + 1,
683779
seen_inodes,
780+
shared_extents,
684781
print_tx,
685782
Some(ancestors),
686783
Some(current_symlink_depth),
@@ -696,9 +793,20 @@ fn du_regular(
696793
depth: depth + 1,
697794
}))?;
698795
} else {
796+
if options.dedupe_reflinks {
797+
let dev_id = this_stat.inode.map_or(0, |inode| inode.dev_id);
798+
this_stat.blocks = adjust_blocks_for_reflinks(
799+
&this_stat.path,
800+
dev_id,
801+
this_stat.blocks,
802+
shared_extents,
803+
);
804+
}
805+
699806
my_stat.size += this_stat.size;
700807
my_stat.blocks += this_stat.blocks;
701808
my_stat.inodes += 1;
809+
702810
if options.all {
703811
print_tx.send(Ok(StatPrintInfo {
704812
stat: this_stat,
@@ -804,9 +912,10 @@ impl StatPrinter {
804912
} else if self.apparent_size {
805913
stat.size
806914
} else {
807-
// The st_blocks field indicates the number of blocks allocated to the file, 512-byte units.
915+
// The st_blocks field indicates the number of blocks allocated to the file,
916+
// in POSIX_BLOCK_SIZE-byte units.
808917
// See: http://linux.die.net/man/2/stat
809-
stat.blocks * 512
918+
stat.blocks * POSIX_BLOCK_SIZE
810919
}
811920
}
812921

@@ -1017,6 +1126,10 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
10171126
SizeFormat::BlockSize(block_size)
10181127
};
10191128

1129+
let inodes = matches.get_flag(options::INODES);
1130+
let apparent_size =
1131+
matches.get_flag(options::APPARENT_SIZE) || matches.get_flag(options::BYTES);
1132+
10201133
let traversal_options = TraversalOptions {
10211134
all: matches.get_flag(options::ALL),
10221135
separate_dirs: matches.get_flag(options::SEPARATE_DIRS),
@@ -1030,6 +1143,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
10301143
Deref::None
10311144
},
10321145
count_links,
1146+
dedupe_reflinks: !count_links && !apparent_size && !inodes,
10331147
verbose: matches.get_flag(options::VERBOSE),
10341148
excludes: build_exclude_patterns(&matches)?,
10351149
};
@@ -1045,7 +1159,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
10451159
size_format,
10461160
summarize,
10471161
total: matches.get_flag(options::TOTAL),
1048-
inodes: matches.get_flag(options::INODES),
1162+
inodes,
10491163
threshold: matches
10501164
.get_one::<String>(options::THRESHOLD)
10511165
.map(|s| {
@@ -1054,16 +1168,14 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
10541168
})
10551169
})
10561170
.transpose()?,
1057-
apparent_size: matches.get_flag(options::APPARENT_SIZE) || matches.get_flag(options::BYTES),
1171+
apparent_size,
10581172
time,
10591173
time_format,
10601174
line_ending: LineEnding::from_zero_flag(matches.get_flag(options::NULL)),
10611175
total_text: translate!("du-total"),
10621176
};
10631177

1064-
if stat_printer.inodes
1065-
&& (matches.get_flag(options::APPARENT_SIZE) || matches.get_flag(options::BYTES))
1066-
{
1178+
if inodes && apparent_size {
10671179
show_warning!(
10681180
"{}",
10691181
translate!("du-warning-apparent-size-ineffective-with-inodes")
@@ -1094,6 +1206,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
10941206

10951207
// Check existence of path provided in argument
10961208
let mut seen_inodes: HashSet<FileInfo> = HashSet::new();
1209+
let mut seen_shared_extents: HashSet<SharedExtentKey> = HashSet::new();
10971210

10981211
// Determine which traversal method to use
10991212
#[cfg(target_os = "linux")]
@@ -1117,6 +1230,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
11171230
&traversal_options,
11181231
0,
11191232
&mut seen_inodes,
1233+
&mut seen_shared_extents,
11201234
&print_tx,
11211235
None,
11221236
) {
@@ -1148,6 +1262,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
11481262
&traversal_options,
11491263
0,
11501264
&mut seen_inodes,
1265+
&mut seen_shared_extents,
11511266
&print_tx,
11521267
None,
11531268
None,

0 commit comments

Comments
 (0)