@@ -25,6 +25,7 @@ use std::mem::take;
2525use std:: ops:: DerefMut ;
2626use std:: path:: { Path , PathBuf } ;
2727use std:: sync:: Arc ;
28+ use std:: time:: Instant ;
2829use tokio:: sync:: mpsc:: error:: TryRecvError ;
2930use tokio:: sync:: mpsc:: { channel, Receiver , Sender } ;
3031use tokio:: sync:: Mutex ;
@@ -80,6 +81,9 @@ pub struct Cleaner {
8081
8182 // Auxiliary info
8283 file_name : Option < PathBuf > ,
84+
85+ // Telemetry
86+ start : Instant ,
8387}
8488
8589impl Cleaner {
@@ -117,6 +121,7 @@ impl Cleaner {
117121 tracking_info : Mutex :: new ( Default :: default ( ) ) ,
118122 small_file_buffer : Mutex :: new ( Some ( Vec :: with_capacity ( small_file_threshold) ) ) ,
119123 file_name : file_name. map ( |f| f. to_owned ( ) ) ,
124+ start : Instant :: now ( ) ,
120125 } ) ;
121126
122127 Self :: run ( cleaner. clone ( ) , chunk_c) . await ;
@@ -242,8 +247,9 @@ impl Cleaner {
242247 Ok ( false )
243248 }
244249
245- async fn dedup ( & self , chunks : & [ ChunkYieldType ] ) -> Result < ( ) > {
250+ async fn dedup ( & self , chunks : & [ ChunkYieldType ] ) -> Result < u64 > {
246251 info ! ( "Dedup {} chunks" , chunks. len( ) ) ;
252+ let mut total_compressed_bytes = 0 ;
247253 let mut tracking_info = self . tracking_info . lock ( ) . await ;
248254
249255 let enable_global_dedup = self . enable_global_dedup_queries ;
@@ -472,13 +478,14 @@ impl Cleaner {
472478 tracking_info. cas_data . data . extend ( bytes) ;
473479
474480 if tracking_info. cas_data . data . len ( ) > TARGET_CAS_BLOCK_SIZE {
475- let cas_hash = register_new_cas_block (
481+ let ( cas_hash, compressed_bytes ) = register_new_cas_block (
476482 & mut tracking_info. cas_data ,
477483 & self . shard_manager ,
478484 & self . cas ,
479485 & self . cas_prefix ,
480486 )
481487 . await ?;
488+ total_compressed_bytes += compressed_bytes;
482489
483490 for i in take ( & mut tracking_info. current_cas_file_info_indices ) {
484491 tracking_info. file_info [ i] . cas_hash = cas_hash;
@@ -492,7 +499,7 @@ impl Cleaner {
492499 }
493500 }
494501
495- Ok ( ( ) )
502+ Ok ( total_compressed_bytes )
496503 }
497504
498505 async fn finish ( & self ) -> Result < ( ) > {
@@ -525,7 +532,8 @@ impl Cleaner {
525532 Ok ( ( ) )
526533 }
527534
528- async fn summarize_dedup_info ( & self ) -> Result < ( MerkleHash , u64 ) > {
535+ async fn summarize_dedup_info ( & self ) -> Result < ( MerkleHash , u64 , u64 ) > {
536+ let mut total_compressed_bytes = 0 ;
529537 let mut tracking_info = self . tracking_info . lock ( ) . await ;
530538
531539 let file_hash = file_node_hash (
@@ -585,13 +593,14 @@ impl Cleaner {
585593 if cas_data_accumulator. data . len ( ) >= TARGET_CAS_BLOCK_SIZE {
586594 let mut new_cas_data = take ( cas_data_accumulator. deref_mut ( ) ) ;
587595 drop ( cas_data_accumulator) ; // Release the lock.
588- register_new_cas_block (
596+ let ( _cas_hash , compressed_bytes ) = register_new_cas_block (
589597 & mut new_cas_data,
590598 & self . shard_manager ,
591599 & self . cas ,
592600 & self . cas_prefix ,
593601 )
594602 . await ?;
603+ total_compressed_bytes += compressed_bytes;
595604 } else {
596605 drop ( cas_data_accumulator) ;
597606 }
@@ -601,11 +610,11 @@ impl Cleaner {
601610
602611 * tracking_info = Default :: default ( ) ;
603612
604- Ok ( ( file_hash, file_size) )
613+ Ok ( ( file_hash, file_size, total_compressed_bytes ) )
605614 }
606615
607616 async fn to_pointer_file ( & self ) -> Result < String > {
608- let ( hash, filesize) = self . summarize_dedup_info ( ) . await ?;
617+ let ( hash, filesize, compressed_size ) = self . summarize_dedup_info ( ) . await ?;
609618 let pointer_file = PointerFile :: init_from_info (
610619 & self
611620 . file_name
@@ -614,6 +623,8 @@ impl Cleaner {
614623 . unwrap_or_default ( ) ,
615624 & hash. hex ( ) ,
616625 filesize,
626+ compressed_size,
627+ self . start . elapsed ( ) ,
617628 ) ;
618629 Ok ( pointer_file. to_string ( ) )
619630 }
0 commit comments