@@ -12,6 +12,7 @@ use data::data_client::default_config;
1212use data:: migration_tool:: hub_client_token_refresher:: HubClientTokenRefresher ;
1313use data:: migration_tool:: migrate:: migrate_files_impl;
1414use hub_client:: { BearerCredentialHelper , HubClient , Operation , RepoInfo } ;
15+ use humansize:: { BINARY , DECIMAL , format_size} ;
1516use merklehash:: MerkleHash ;
1617use utils:: auth:: TokenRefresher ;
1718use walkdir:: WalkDir ;
@@ -76,6 +77,8 @@ enum Command {
7677 Dedup ( DedupArg ) ,
7778 /// Queries reconstruction information about a file.
7879 Query ( QueryArg ) ,
80+ /// Calculates the compressed size of a xet-file by summing url_range sizes.
81+ CompressedSize ( CompressedSizeArg ) ,
7982}
8083
8184#[ derive( Args ) ]
@@ -116,6 +119,12 @@ struct QueryArg {
116119 bytes_range : Option < FileRange > ,
117120}
118121
122+ #[ derive( Args ) ]
123+ struct CompressedSizeArg {
124+ /// Xet-hash of a file.
125+ hash : String ,
126+ }
127+
119128impl Command {
120129 async fn run ( self , hub_client : HubClient ) -> Result < ( ) > {
121130 match self {
@@ -161,6 +170,39 @@ impl Command {
161170
162171 Ok ( ( ) )
163172 } ,
173+ Command :: CompressedSize ( arg) => {
174+ let file_hash = MerkleHash :: from_hex ( & arg. hash ) ?;
175+ // Query reconstruction for full file (no Range header)
176+ let ret = query_reconstruction ( file_hash, None , hub_client) . await ?;
177+
178+ match ret {
179+ Some ( response) => {
180+ let mut total_compressed_size = 0u64 ;
181+
182+ for fetch_infos in response. fetch_info . values ( ) {
183+ for fetch_info in fetch_infos {
184+ let range_size = fetch_info. url_range . end - fetch_info. url_range . start ;
185+ total_compressed_size += range_size;
186+ }
187+ }
188+
189+ let total_uncompressed_size: u64 =
190+ response. terms . iter ( ) . map ( |term| term. unpacked_length as u64 ) . sum ( ) ;
191+
192+ println ! ( "Compressed Size: {}" , format_bytes_with_units( total_compressed_size) ) ;
193+ println ! ( "Uncompressed Size: {}" , format_bytes_with_units( total_uncompressed_size) ) ;
194+ println ! (
195+ "Compression Ratio: {:.2}%" ,
196+ ( total_compressed_size as f64 / total_uncompressed_size as f64 ) * 100.0
197+ ) ;
198+ Ok ( ( ) )
199+ } ,
200+ None => {
201+ eprintln ! ( "No reconstruction information found for hash {}" , arg. hash) ;
202+ Ok ( ( ) )
203+ } ,
204+ }
205+ } ,
164206 }
165207 }
166208}
@@ -193,6 +235,13 @@ fn is_git_special_files(path: &str) -> bool {
193235 matches ! ( path, ".git" | ".gitignore" | ".gitattributes" )
194236}
195237
238+ /// Format bytes with binary and decimal units on one line
239+ fn format_bytes_with_units ( bytes : u64 ) -> String {
240+ let binary = format_size ( bytes, BINARY ) ;
241+ let decimal = format_size ( bytes, DECIMAL ) ;
242+ format ! ( "{} bytes {} {}" , bytes, binary, decimal)
243+ }
244+
196245async fn query_reconstruction (
197246 file_hash : MerkleHash ,
198247 bytes_range : Option < FileRange > ,
0 commit comments