@@ -11,11 +11,8 @@ use crate::utils;
1111use rustwide:: logging:: { self , LogStorage } ;
1212use rustwide:: { BuildDirectory , Workspace } ;
1313use std:: collections:: HashMap ;
14- use std:: sync:: Condvar ;
15- use std:: sync:: {
16- atomic:: { AtomicBool , Ordering } ,
17- Mutex ,
18- } ;
14+ use std:: sync:: Mutex ;
15+ use std:: sync:: { Arc , Condvar , OnceLock } ;
1916use std:: time:: Duration ;
2017
2118pub trait RecordProgress : Send + Sync {
@@ -51,8 +48,10 @@ pub(super) struct Worker<'a> {
5148 ex : & ' a Experiment ,
5249 config : & ' a crate :: config:: Config ,
5350 api : & ' a dyn RecordProgress ,
54- target_dir_cleanup : AtomicBool ,
5551 next_crate : & ' a ( dyn Fn ( ) -> Fallible < Option < Crate > > + Send + Sync ) ,
52+
53+ // Called by the worker thread between crates, when no global state (namely caches) is in use.
54+ pub ( super ) between_crates : OnceLock < Box < dyn Fn ( bool ) + Send + Sync + ' a > > ,
5655}
5756
5857impl < ' a > Worker < ' a > {
@@ -81,7 +80,8 @@ impl<'a> Worker<'a> {
8180 config,
8281 next_crate,
8382 api,
84- target_dir_cleanup : AtomicBool :: new ( false ) ,
83+
84+ between_crates : OnceLock :: new ( ) ,
8585 }
8686 }
8787
@@ -163,14 +163,21 @@ impl<'a> Worker<'a> {
163163 // Backoff from calling the server again, to reduce load when we're spinning until
164164 // the next experiment is ready.
165165 std:: thread:: sleep ( Duration :: from_secs ( rand:: random_range ( 60 ..120 ) ) ) ;
166+
167+ if let Some ( cb) = self . between_crates . get ( ) {
168+ cb ( true ) ;
169+ }
170+
166171 // We're done if no more crates left.
167172 return Ok ( ( ) ) ;
168173 } ;
169174
170- self . maybe_cleanup_target_dir ( ) ?;
171-
172175 info ! ( "{} processing crate {}" , self . name, krate) ;
173176
177+ if let Some ( cb) = self . between_crates . get ( ) {
178+ cb ( false ) ;
179+ }
180+
174181 if !self . ex . ignore_blacklist && self . config . should_skip ( & krate) {
175182 for tc in & self . ex . toolchains {
176183 // If a skipped crate is somehow sent to the agent (for example, when a crate was
@@ -338,41 +345,36 @@ impl<'a> Worker<'a> {
338345 }
339346 }
340347 }
341-
342- fn maybe_cleanup_target_dir ( & self ) -> Fallible < ( ) > {
343- if !self . target_dir_cleanup . swap ( false , Ordering :: SeqCst ) {
344- return Ok ( ( ) ) ;
345- }
346- info ! ( "purging target dir for {}" , self . name) ;
347- for dir in self . build_dir . values ( ) {
348- dir. lock ( ) . unwrap ( ) . purge ( ) ?;
349- }
350-
351- Ok ( ( ) )
352- }
353-
354- fn schedule_target_dir_cleanup ( & self ) {
355- self . target_dir_cleanup . store ( true , Ordering :: SeqCst ) ;
356- }
357348}
358349
359- pub ( super ) struct DiskSpaceWatcher < ' a > {
350+ pub ( super ) struct DiskSpaceWatcher {
360351 interval : Duration ,
361352 threshold : f32 ,
362- workers : & ' a [ Worker < ' a > ] ,
363353 should_stop : Mutex < bool > ,
364354 waiter : Condvar ,
355+
356+ worker_count : usize ,
357+
358+ // If the bool is true, that means we're waiting for the cache to reach zero, in which case
359+ // workers will wait for it to be false before starting. This gives us a global 'is the cache
360+ // in use' synchronization point.
361+ cache_in_use : Mutex < ( usize , bool ) > ,
362+ cache_waiter : Condvar ,
365363}
366364
367- impl < ' a > DiskSpaceWatcher < ' a > {
368- pub ( super ) fn new ( interval : Duration , threshold : f32 , workers : & ' a [ Worker < ' a > ] ) -> Self {
369- DiskSpaceWatcher {
365+ impl DiskSpaceWatcher {
366+ pub ( super ) fn new ( interval : Duration , threshold : f32 , worker_count : usize ) -> Arc < Self > {
367+ Arc :: new ( DiskSpaceWatcher {
370368 interval,
371369 threshold,
372- workers,
373370 should_stop : Mutex :: new ( false ) ,
374371 waiter : Condvar :: new ( ) ,
375- }
372+
373+ worker_count,
374+
375+ cache_in_use : Mutex :: new ( ( 0 , false ) ) ,
376+ cache_waiter : Condvar :: new ( ) ,
377+ } )
376378 }
377379
378380 pub ( super ) fn stop ( & self ) {
@@ -406,14 +408,67 @@ impl<'a> DiskSpaceWatcher<'a> {
406408 } ;
407409
408410 if usage. is_threshold_reached ( self . threshold ) {
409- warn ! ( "running the scheduled thread cleanup" ) ;
410- for worker in self . workers {
411- worker. schedule_target_dir_cleanup ( ) ;
412- }
411+ self . clean ( workspace) ;
412+ }
413+ }
413414
414- if let Err ( e) = workspace. purge_all_caches ( ) {
415- warn ! ( "failed to purge caches: {:?}" , e) ;
416- }
415+ fn clean ( & self , workspace : & dyn ToClean ) {
416+ warn ! ( "declaring interest in worker idle" ) ;
417+
418+ // Set interest in cleaning caches and then wait for cache use to drain to zero.
419+ let mut guard = self . cache_in_use . lock ( ) . unwrap ( ) ;
420+ guard. 1 = true ;
421+
422+ self . cache_waiter . notify_all ( ) ;
423+
424+ warn ! ( "declared interest in workers, waiting for everyone to idle" ) ;
425+
426+ let mut guard = self
427+ . cache_waiter
428+ . wait_while ( guard, |c| c. 0 != self . worker_count )
429+ . unwrap ( ) ;
430+
431+ // OK, purging caches, clear interest.
432+ guard. 1 = false ;
433+
434+ self . cache_waiter . notify_all ( ) ;
435+
436+ warn ! ( "purging all build dirs and caches" ) ;
437+
438+ workspace. purge ( ) ;
439+ }
440+
441+ pub ( super ) fn worker_idle ( & self , permanent : bool ) {
442+ log:: trace!( "worker at idle point" ) ;
443+ let mut guard = self . cache_in_use . lock ( ) . unwrap ( ) ;
444+ log:: trace!( "worker declared idle" ) ;
445+ // note that we're not running right now.
446+ guard. 0 += 1 ;
447+ self . cache_waiter . notify_all ( ) ;
448+ if !permanent {
449+ let mut guard = self . cache_waiter . wait_while ( guard, |c| c. 1 ) . unwrap ( ) ;
450+ // Then set ourselves as running.
451+ guard. 0 -= 1 ;
452+ self . cache_waiter . notify_all ( ) ;
453+ }
454+ }
455+ }
456+
457+ trait ToClean {
458+ fn purge ( & self ) ;
459+ }
460+
461+ impl ToClean for Workspace {
462+ fn purge ( & self ) {
463+ if let Err ( e) = self . purge_all_caches ( ) {
464+ warn ! ( "failed to purge caches: {:?}" , e) ;
465+ }
466+
467+ if let Err ( e) = self . purge_all_build_dirs ( ) {
468+ warn ! ( "failed to purge build directories: {:?}" , e) ;
417469 }
418470 }
419471}
472+
473+ #[ cfg( test) ]
474+ mod test;
0 commit comments