@@ -12,6 +12,7 @@ use data::data_client::default_config;
1212use data:: migration_tool:: hub_client_token_refresher:: HubClientTokenRefresher ;
1313use data:: migration_tool:: migrate:: migrate_files_impl;
1414use hub_client:: { BearerCredentialHelper , HubClient , Operation , RepoInfo } ;
15+ use humansize:: { BINARY , DECIMAL , format_size} ;
1516use merklehash:: MerkleHash ;
1617use utils:: auth:: TokenRefresher ;
1718use walkdir:: WalkDir ;
@@ -76,6 +77,8 @@ enum Command {
7677 Dedup ( DedupArg ) ,
7778 /// Queries reconstruction information about a file.
7879 Query ( QueryArg ) ,
80+ /// Calculates the compressed size of a xet-file by summing url_range sizes.
81+ CompressedSize ( CompressedSizeArg ) ,
7982}
8083
8184#[ derive( Args ) ]
@@ -116,6 +119,12 @@ struct QueryArg {
116119 bytes_range : Option < FileRange > ,
117120}
118121
122+ #[ derive( Args ) ]
123+ struct CompressedSizeArg {
124+ /// Xet-hash of a file.
125+ hash : String ,
126+ }
127+
119128impl Command {
120129 async fn run ( self , hub_client : HubClient ) -> Result < ( ) > {
121130 match self {
@@ -161,6 +170,40 @@ impl Command {
161170
162171 Ok ( ( ) )
163172 } ,
173+ Command :: CompressedSize ( arg) => {
174+ let file_hash = MerkleHash :: from_hex ( & arg. hash ) ?;
175+ // Query reconstruction for full file (no Range header)
176+ let ret = query_reconstruction ( file_hash, None , hub_client) . await ?;
177+
178+ match ret {
179+ Some ( response) => {
180+ let mut total_compressed_size = 0u64 ;
181+
182+ for fetch_infos in response. fetch_info . values ( ) {
183+ for fetch_info in fetch_infos {
184+ // Calculate end - start for each url_range as specified
185+ let range_size = fetch_info. url_range . end - fetch_info. url_range . start ;
186+ total_compressed_size += range_size;
187+ }
188+ }
189+
190+ let total_uncompressed_size: u64 =
191+ response. terms . iter ( ) . map ( |term| term. unpacked_length as u64 ) . sum ( ) ;
192+
193+ println ! ( "Compressed Size: {}" , format_bytes_with_units( total_compressed_size) ) ;
194+ println ! ( "Uncompressed Size: {}" , format_bytes_with_units( total_uncompressed_size) ) ;
195+ println ! (
196+ "Compression Ratio: {:.2}%" ,
197+ ( total_compressed_size as f64 / total_uncompressed_size as f64 ) * 100.0
198+ ) ;
199+ Ok ( ( ) )
200+ } ,
201+ None => {
202+ eprintln ! ( "No reconstruction information found for hash {}" , arg. hash) ;
203+ Ok ( ( ) )
204+ } ,
205+ }
206+ } ,
164207 }
165208 }
166209}
@@ -193,6 +236,13 @@ fn is_git_special_files(path: &str) -> bool {
193236 matches ! ( path, ".git" | ".gitignore" | ".gitattributes" )
194237}
195238
239+ /// Format bytes with binary and decimal units on one line
240+ fn format_bytes_with_units ( bytes : u64 ) -> String {
241+ let binary = format_size ( bytes, BINARY ) ;
242+ let decimal = format_size ( bytes, DECIMAL ) ;
243+ format ! ( "{} bytes {} {}" , bytes, binary, decimal)
244+ }
245+
196246async fn query_reconstruction (
197247 file_hash : MerkleHash ,
198248 bytes_range : Option < FileRange > ,
0 commit comments