@@ -21,6 +21,7 @@ use std::mem::take;
2121use std:: ops:: DerefMut ;
2222use std:: path:: { Path , PathBuf } ;
2323use std:: sync:: Arc ;
24+ use std:: time:: Instant ;
2425use tokio:: sync:: mpsc:: error:: TryRecvError ;
2526use tokio:: sync:: mpsc:: { channel, Receiver , Sender } ;
2627use tokio:: sync:: Mutex ;
@@ -76,6 +77,9 @@ pub struct Cleaner {
7677
7778 // Auxiliary info
7879 file_name : Option < PathBuf > ,
80+
81+ // Telemetry
82+ start : Instant ,
7983}
8084
8185impl Cleaner {
@@ -113,6 +117,7 @@ impl Cleaner {
113117 tracking_info : Mutex :: new ( Default :: default ( ) ) ,
114118 small_file_buffer : Mutex :: new ( Some ( Vec :: with_capacity ( small_file_threshold) ) ) ,
115119 file_name : file_name. map ( |f| f. to_owned ( ) ) ,
120+ start : Instant :: now ( ) ,
116121 } ) ;
117122
118123 Self :: run ( cleaner. clone ( ) , chunk_c) . await ;
@@ -239,8 +244,9 @@ impl Cleaner {
239244 Ok ( false )
240245 }
241246
242- async fn dedup ( & self , chunks : & [ ChunkYieldType ] ) -> Result < ( ) > {
247+ async fn dedup ( & self , chunks : & [ ChunkYieldType ] ) -> Result < u64 > {
243248 info ! ( "Dedup {} chunks" , chunks. len( ) ) ;
249+ let mut total_compressed_bytes = 0 ;
244250 let mut tracking_info = self . tracking_info . lock ( ) . await ;
245251
246252 let enable_global_dedup = self . enable_global_dedup_queries ;
@@ -463,13 +469,14 @@ impl Cleaner {
463469 tracking_info. cas_data . data . extend ( bytes) ;
464470
465471 if tracking_info. cas_data . data . len ( ) > TARGET_CAS_BLOCK_SIZE {
466- let cas_hash = register_new_cas_block (
472+ let ( cas_hash, compressed_bytes ) = register_new_cas_block (
467473 & mut tracking_info. cas_data ,
468474 & self . shard_manager ,
469475 & self . cas ,
470476 & self . cas_prefix ,
471477 )
472478 . await ?;
479+ total_compressed_bytes += compressed_bytes;
473480
474481 for i in take ( & mut tracking_info. current_cas_file_info_indices ) {
475482 tracking_info. file_info [ i] . cas_hash = cas_hash;
@@ -483,7 +490,7 @@ impl Cleaner {
483490 }
484491 }
485492
486- Ok ( ( ) )
493+ Ok ( total_compressed_bytes )
487494 }
488495
489496 async fn finish ( & self ) -> Result < ( ) > {
@@ -516,7 +523,8 @@ impl Cleaner {
516523 Ok ( ( ) )
517524 }
518525
519- async fn summarize_dedup_info ( & self ) -> Result < ( MerkleHash , u64 ) > {
526+ async fn summarize_dedup_info ( & self ) -> Result < ( MerkleHash , u64 , u64 ) > {
527+ let mut total_compressed_bytes = 0 ;
520528 let mut tracking_info = self . tracking_info . lock ( ) . await ;
521529
522530 let file_hash = file_node_hash (
@@ -577,13 +585,14 @@ impl Cleaner {
577585 if cas_data_accumulator. data . len ( ) >= TARGET_CAS_BLOCK_SIZE {
578586 let mut new_cas_data = take ( cas_data_accumulator. deref_mut ( ) ) ;
579587 drop ( cas_data_accumulator) ; // Release the lock.
580- register_new_cas_block (
588+ let ( _cas_hash , compressed_bytes ) = register_new_cas_block (
581589 & mut new_cas_data,
582590 & self . shard_manager ,
583591 & self . cas ,
584592 & self . cas_prefix ,
585593 )
586594 . await ?;
595+ total_compressed_bytes += compressed_bytes;
587596 } else {
588597 drop ( cas_data_accumulator) ;
589598 }
@@ -593,11 +602,11 @@ impl Cleaner {
593602
594603 * tracking_info = Default :: default ( ) ;
595604
596- Ok ( ( file_hash, file_size) )
605+ Ok ( ( file_hash, file_size, total_compressed_bytes ) )
597606 }
598607
599608 async fn to_pointer_file ( & self ) -> Result < String > {
600- let ( hash, filesize) = self . summarize_dedup_info ( ) . await ?;
609+ let ( hash, filesize, compressed_size ) = self . summarize_dedup_info ( ) . await ?;
601610 let pointer_file = PointerFile :: init_from_info (
602611 & self
603612 . file_name
@@ -606,6 +615,8 @@ impl Cleaner {
606615 . unwrap_or_default ( ) ,
607616 & hash. hex ( ) ,
608617 filesize,
618+ compressed_size,
619+ self . start . elapsed ( ) ,
609620 ) ;
610621 Ok ( pointer_file. to_string ( ) )
611622 }
0 commit comments