@@ -309,7 +309,8 @@ fn init_logging() {
309309 }
310310}
311311
312- const MAX_PER_CORE_LOAD : f64 = 10f64 ;
312+ // Maximum number of jobs per core - only occurs for one core, usually less, see load_weight()
313+ const MAX_PER_CORE_LOAD : f64 = 2f64 ;
313314const SERVER_REMEMBER_ERROR_TIMEOUT : Duration = Duration :: from_secs ( 300 ) ;
314315const UNCLAIMED_PENDING_TIMEOUT : Duration = Duration :: from_secs ( 300 ) ;
315316const UNCLAIMED_READY_TIMEOUT : Duration = Duration :: from_secs ( 60 ) ;
@@ -399,6 +400,20 @@ impl Default for Scheduler {
399400 }
400401}
401402
403+ fn load_weight ( job_count : usize , core_count : usize ) -> f64 {
404+ // Oversubscribe cores just a little to make up for network and I/O latency. This formula is
405+ // not based on hard data but an extrapolation to high core counts of the conventional wisdom
406+ // that slightly more jobs than cores achieve the shortest compile time. Which is originally
407+ // about local compiles and this is over the network, so be slightly less conservative.
408+ let cores_plus_slack = core_count + 1 + core_count / 8 ;
409+ // Note >=, not >, because the question is "can we add another job"?
410+ if job_count >= cores_plus_slack {
411+ MAX_PER_CORE_LOAD + 1f64 // no new jobs for now
412+ } else {
413+ job_count as f64 / core_count as f64
414+ }
415+ }
416+
402417impl SchedulerIncoming for Scheduler {
403418 fn handle_alloc_job (
404419 & self ,
@@ -415,7 +430,7 @@ impl SchedulerIncoming for Scheduler {
415430 let mut best_load: f64 = MAX_PER_CORE_LOAD ;
416431 let now = Instant :: now ( ) ;
417432 for ( & server_id, details) in servers. iter_mut ( ) {
418- let load = details. jobs_assigned . len ( ) as f64 / details. num_cpus as f64 ;
433+ let load = load_weight ( details. jobs_assigned . len ( ) , details. num_cpus ) ;
419434
420435 if let Some ( last_error) = details. last_error {
421436 if load < MAX_PER_CORE_LOAD {
0 commit comments