33// For the full copyright and license information, please view the LICENSE
44// file that was distributed with this source code.
55//
6- // spell-checker:ignore fstatat openat dirfd
6+ // spell-checker:ignore dedupe dirfd fiemap fstatat openat reflinks
77
88use clap:: { Arg , ArgAction , ArgMatches , Command , builder:: PossibleValue } ;
99use glob:: Pattern ;
@@ -21,27 +21,32 @@ use std::str::FromStr;
2121use std:: sync:: mpsc;
2222use std:: thread;
2323use thiserror:: Error ;
24+
2425use uucore:: display:: { Quotable , print_verbatim} ;
2526use uucore:: error:: { FromIo , UError , UResult , USimpleError , set_exit_code} ;
2627use uucore:: fsext:: { MetadataTimeField , metadata_get_time} ;
2728use uucore:: line_ending:: LineEnding ;
28- #[ cfg( target_os = "linux" ) ]
29- use uucore:: safe_traversal:: DirFd ;
30- use uucore:: translate;
31-
3229use uucore:: parser:: parse_glob;
3330use uucore:: parser:: parse_size:: { ParseSizeError , parse_size_non_zero_u64, parse_size_u64} ;
3431use uucore:: parser:: shortcut_value_parser:: ShortcutValueParser ;
32+ #[ cfg( target_os = "linux" ) ]
33+ use uucore:: safe_traversal:: DirFd ;
3534use uucore:: time:: { FormatSystemTimeFallback , format, format_system_time} ;
35+ use uucore:: translate;
3636use uucore:: { format_usage, show, show_error, show_warning} ;
3737#[ cfg( windows) ]
38- use windows_sys:: Win32 :: Foundation :: HANDLE ;
39- #[ cfg( windows) ]
40- use windows_sys:: Win32 :: Storage :: FileSystem :: {
41- FILE_ID_128 , FILE_ID_INFO , FILE_STANDARD_INFO , FileIdInfo , FileStandardInfo ,
42- GetFileInformationByHandleEx ,
38+ use windows_sys:: Win32 :: {
39+ Foundation :: HANDLE ,
40+ Storage :: FileSystem :: {
41+ FILE_ID_128 , FILE_ID_INFO , FILE_STANDARD_INFO , FileIdInfo , FileStandardInfo ,
42+ GetFileInformationByHandleEx ,
43+ } ,
4344} ;
4445
46+ pub mod fiemap;
47+ #[ cfg( target_os = "linux" ) ]
48+ use crate :: fiemap:: { FIEMAP_EXTENT_ENCODED , FIEMAP_EXTENT_SHARED , walk_fiemap_extents} ;
49+
4550mod options {
4651 pub const HELP : & str = "help" ;
4752 pub const NULL : & str = "0" ;
@@ -73,12 +78,15 @@ mod options {
7378 pub const FILE : & str = "FILE" ;
7479}
7580
81+ const POSIX_BLOCK_SIZE : u64 = 512 ;
82+
7683struct TraversalOptions {
7784 all : bool ,
7885 separate_dirs : bool ,
7986 one_file_system : bool ,
8087 dereference : Deref ,
8188 count_links : bool ,
89+ dedupe_reflinks : bool ,
8290 verbose : bool ,
8391 excludes : Vec < Pattern > ,
8492}
@@ -117,6 +125,13 @@ struct FileInfo {
117125 dev_id : u64 ,
118126}
119127
128+ #[ derive( PartialEq , Eq , Hash , Clone , Copy ) ]
129+ struct SharedExtentKey {
130+ dev_id : u64 ,
131+ physical : u64 ,
132+ length : u64 ,
133+ }
134+
120135struct Stat {
121136 path : PathBuf ,
122137 size : u64 ,
@@ -270,6 +285,60 @@ fn get_file_info(path: &Path, _metadata: &Metadata) -> Option<FileInfo> {
270285 result
271286}
272287
288+ #[ cfg( target_os = "linux" ) ]
289+ fn adjust_blocks_for_reflinks (
290+ path : & Path ,
291+ dev_id : u64 ,
292+ blocks : u64 ,
293+ shared_extents : & mut HashSet < SharedExtentKey > ,
294+ ) -> u64 {
295+ if blocks == 0 {
296+ return blocks;
297+ }
298+
299+ let Ok ( file) = File :: open ( path) else {
300+ return blocks;
301+ } ;
302+
303+ let mut dedup_bytes = 0_u64 ;
304+
305+ if walk_fiemap_extents ( & file, 0 , |extent| {
306+ if ( extent. fe_flags & FIEMAP_EXTENT_SHARED ) != 0
307+ && ( extent. fe_flags & FIEMAP_EXTENT_ENCODED ) == 0
308+ && extent. fe_physical != 0
309+ {
310+ let key = SharedExtentKey {
311+ dev_id,
312+ physical : extent. fe_physical ,
313+ length : extent. fe_length ,
314+ } ;
315+
316+ if !shared_extents. insert ( key) {
317+ dedup_bytes = dedup_bytes. saturating_add ( extent. fe_length ) ;
318+ }
319+ }
320+
321+ true
322+ } )
323+ . is_err ( )
324+ {
325+ return blocks;
326+ }
327+
328+ let dedup_blocks = dedup_bytes / POSIX_BLOCK_SIZE ;
329+ blocks. saturating_sub ( dedup_blocks)
330+ }
331+
332+ #[ cfg( not( target_os = "linux" ) ) ]
333+ fn adjust_blocks_for_reflinks (
334+ _path : & Path ,
335+ _dev_id : u64 ,
336+ blocks : u64 ,
337+ _shared_extents : & mut HashSet < SharedExtentKey > ,
338+ ) -> u64 {
339+ blocks
340+ }
341+
273342fn block_size_from_env ( ) -> Option < u64 > {
274343 for env_var in [ "DU_BLOCK_SIZE" , "BLOCK_SIZE" , "BLOCKSIZE" ] {
275344 if let Ok ( env_size) = env:: var ( env_var) {
@@ -287,7 +356,7 @@ fn read_block_size(s: Option<&str>) -> UResult<u64> {
287356 } else if let Some ( bytes) = block_size_from_env ( ) {
288357 Ok ( bytes)
289358 } else if env:: var ( "POSIXLY_CORRECT" ) . is_ok ( ) {
290- Ok ( 512 )
359+ Ok ( POSIX_BLOCK_SIZE )
291360 } else {
292361 Ok ( 1024 )
293362 }
@@ -301,6 +370,7 @@ fn safe_du(
301370 options : & TraversalOptions ,
302371 depth : usize ,
303372 seen_inodes : & mut HashSet < FileInfo > ,
373+ shared_extents : & mut HashSet < SharedExtentKey > ,
304374 print_tx : & mpsc:: Sender < UResult < StatPrintInfo > > ,
305375 parent_fd : Option < & DirFd > ,
306376) -> Result < Stat , Box < mpsc:: SendError < UResult < StatPrintInfo > > > > {
@@ -391,6 +461,11 @@ fn safe_du(
391461 }
392462 } ;
393463 if !my_stat. metadata . is_dir ( ) {
464+ if options. dedupe_reflinks {
465+ let dev_id = my_stat. inode . map_or ( 0 , |inode| inode. dev_id ) ;
466+ my_stat. blocks =
467+ adjust_blocks_for_reflinks ( & my_stat. path , dev_id, my_stat. blocks , shared_extents) ;
468+ }
394469 return Ok ( my_stat) ;
395470 }
396471
@@ -439,6 +514,7 @@ fn safe_du(
439514 const S_IFMT : u32 = 0o170_000 ;
440515 const S_IFDIR : u32 = 0o040_000 ;
441516 const S_IFLNK : u32 = 0o120_000 ;
517+ const S_IFREG : u32 = 0o100_000 ;
442518 let is_symlink = ( lstat. st_mode & S_IFMT ) == S_IFLNK ;
443519
444520 // Handle symlinks with -L option
@@ -451,6 +527,7 @@ fn safe_du(
451527 }
452528
453529 let is_dir = ( lstat. st_mode & S_IFMT ) == S_IFDIR ;
530+ let is_regular = ( lstat. st_mode & S_IFMT ) == S_IFREG ;
454531 let entry_stat = lstat;
455532
456533 let file_info = ( entry_stat. st_ino != 0 ) . then_some ( FileInfo {
@@ -460,7 +537,7 @@ fn safe_du(
460537
461538 // For safe traversal, we need to handle stats differently
462539 // We can't use std::fs::Metadata since that requires the full path
463- let this_stat = if is_dir {
540+ let mut this_stat = if is_dir {
464541 // For directories, recurse using safe_du
465542 Stat {
466543 path : entry_path. clone ( ) ,
@@ -507,6 +584,14 @@ fn safe_du(
507584 seen_inodes. insert ( inode) ;
508585 }
509586
587+ if options. dedupe_reflinks && is_regular {
588+ let dev_id = this_stat
589+ . inode
590+ . map_or ( entry_stat. st_dev , |inode| inode. dev_id ) ;
591+ this_stat. blocks =
592+ adjust_blocks_for_reflinks ( & entry_path, dev_id, this_stat. blocks , shared_extents) ;
593+ }
594+
510595 // Process directories recursively
511596 if is_dir {
512597 if options. one_file_system {
@@ -522,6 +607,7 @@ fn safe_du(
522607 options,
523608 depth + 1 ,
524609 seen_inodes,
610+ shared_extents,
525611 print_tx,
526612 Some ( & dir_fd) ,
527613 ) ?;
@@ -555,12 +641,13 @@ fn safe_du(
555641// Only used on non-Linux platforms
556642// Regular traversal using std::fs
557643// Used on non-Linux platforms and as fallback for symlinks on Linux
558- #[ allow( clippy:: cognitive_complexity) ]
644+ #[ allow( clippy:: cognitive_complexity, clippy :: too_many_arguments ) ]
559645fn du_regular (
560646 mut my_stat : Stat ,
561647 options : & TraversalOptions ,
562648 depth : usize ,
563649 seen_inodes : & mut HashSet < FileInfo > ,
650+ shared_extents : & mut HashSet < SharedExtentKey > ,
564651 print_tx : & mpsc:: Sender < UResult < StatPrintInfo > > ,
565652 ancestors : Option < & mut HashSet < FileInfo > > ,
566653 symlink_depth : Option < usize > ,
@@ -571,6 +658,15 @@ fn du_regular(
571658 // Maximum symlink depth to prevent infinite loops
572659 const MAX_SYMLINK_DEPTH : usize = 40 ;
573660
661+ if !my_stat. metadata . is_dir ( ) {
662+ if options. dedupe_reflinks {
663+ let dev_id = my_stat. inode . map_or ( 0 , |inode| inode. dev_id ) ;
664+ my_stat. blocks =
665+ adjust_blocks_for_reflinks ( & my_stat. path , dev_id, my_stat. blocks , shared_extents) ;
666+ }
667+ return Ok ( my_stat) ;
668+ }
669+
574670 // Add current directory to ancestors if it's a directory
575671 let my_inode = if my_stat. metadata . is_dir ( ) {
576672 my_stat. inode
@@ -621,7 +717,7 @@ fn du_regular(
621717 }
622718
623719 match Stat :: new ( & entry_path, Some ( & entry) , options) {
624- Ok ( this_stat) => {
720+ Ok ( mut this_stat) => {
625721 // Check if symlink with -L points to an ancestor (cycle detection)
626722 if is_symlink
627723 && options. dereference == Deref :: All
@@ -681,6 +777,7 @@ fn du_regular(
681777 options,
682778 depth + 1 ,
683779 seen_inodes,
780+ shared_extents,
684781 print_tx,
685782 Some ( ancestors) ,
686783 Some ( current_symlink_depth) ,
@@ -696,9 +793,20 @@ fn du_regular(
696793 depth : depth + 1 ,
697794 } ) ) ?;
698795 } else {
796+ if options. dedupe_reflinks {
797+ let dev_id = this_stat. inode . map_or ( 0 , |inode| inode. dev_id ) ;
798+ this_stat. blocks = adjust_blocks_for_reflinks (
799+ & this_stat. path ,
800+ dev_id,
801+ this_stat. blocks ,
802+ shared_extents,
803+ ) ;
804+ }
805+
699806 my_stat. size += this_stat. size ;
700807 my_stat. blocks += this_stat. blocks ;
701808 my_stat. inodes += 1 ;
809+
702810 if options. all {
703811 print_tx. send ( Ok ( StatPrintInfo {
704812 stat : this_stat,
@@ -804,9 +912,10 @@ impl StatPrinter {
804912 } else if self . apparent_size {
805913 stat. size
806914 } else {
807- // The st_blocks field indicates the number of blocks allocated to the file, 512-byte units.
915+ // The st_blocks field indicates the number of blocks allocated to the file,
916+ // in POSIX_BLOCK_SIZE-byte units.
808917 // See: http://linux.die.net/man/2/stat
809- stat. blocks * 512
918+ stat. blocks * POSIX_BLOCK_SIZE
810919 }
811920 }
812921
@@ -1017,6 +1126,10 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
10171126 SizeFormat :: BlockSize ( block_size)
10181127 } ;
10191128
1129+ let inodes = matches. get_flag ( options:: INODES ) ;
1130+ let apparent_size =
1131+ matches. get_flag ( options:: APPARENT_SIZE ) || matches. get_flag ( options:: BYTES ) ;
1132+
10201133 let traversal_options = TraversalOptions {
10211134 all : matches. get_flag ( options:: ALL ) ,
10221135 separate_dirs : matches. get_flag ( options:: SEPARATE_DIRS ) ,
@@ -1030,6 +1143,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
10301143 Deref :: None
10311144 } ,
10321145 count_links,
1146+ dedupe_reflinks : !count_links && !apparent_size && !inodes,
10331147 verbose : matches. get_flag ( options:: VERBOSE ) ,
10341148 excludes : build_exclude_patterns ( & matches) ?,
10351149 } ;
@@ -1045,7 +1159,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
10451159 size_format,
10461160 summarize,
10471161 total : matches. get_flag ( options:: TOTAL ) ,
1048- inodes : matches . get_flag ( options :: INODES ) ,
1162+ inodes,
10491163 threshold : matches
10501164 . get_one :: < String > ( options:: THRESHOLD )
10511165 . map ( |s| {
@@ -1054,16 +1168,14 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
10541168 } )
10551169 } )
10561170 . transpose ( ) ?,
1057- apparent_size : matches . get_flag ( options :: APPARENT_SIZE ) || matches . get_flag ( options :: BYTES ) ,
1171+ apparent_size,
10581172 time,
10591173 time_format,
10601174 line_ending : LineEnding :: from_zero_flag ( matches. get_flag ( options:: NULL ) ) ,
10611175 total_text : translate ! ( "du-total" ) ,
10621176 } ;
10631177
1064- if stat_printer. inodes
1065- && ( matches. get_flag ( options:: APPARENT_SIZE ) || matches. get_flag ( options:: BYTES ) )
1066- {
1178+ if inodes && apparent_size {
10671179 show_warning ! (
10681180 "{}" ,
10691181 translate!( "du-warning-apparent-size-ineffective-with-inodes" )
@@ -1094,6 +1206,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
10941206
10951207 // Check existence of path provided in argument
10961208 let mut seen_inodes: HashSet < FileInfo > = HashSet :: new ( ) ;
1209+ let mut seen_shared_extents: HashSet < SharedExtentKey > = HashSet :: new ( ) ;
10971210
10981211 // Determine which traversal method to use
10991212 #[ cfg( target_os = "linux" ) ]
@@ -1117,6 +1230,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
11171230 & traversal_options,
11181231 0 ,
11191232 & mut seen_inodes,
1233+ & mut seen_shared_extents,
11201234 & print_tx,
11211235 None ,
11221236 ) {
@@ -1148,6 +1262,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
11481262 & traversal_options,
11491263 0 ,
11501264 & mut seen_inodes,
1265+ & mut seen_shared_extents,
11511266 & print_tx,
11521267 None ,
11531268 None ,
0 commit comments