@@ -15,6 +15,12 @@ use log::{debug, error, info, warn};
1515static RUNNER_ID : AtomicU64 = AtomicU64 :: new ( 0 ) ;
1616static EXITING : AtomicU32 = AtomicU32 :: new ( 0 ) ;
1717const MAX_SPAWN_RETRIES : u32 = 10 ;
18+ /// The final builder name will be {BUILDER_NAME}.{RUNNER_SUFFIX_ENV}.{RUNNER_ID}, same for RUNNER
19+ const BUILDER_NAME : & str = "dresden-hos-builder" ;
20+ const RUNNER_NAME : & str = "dresden-hos-runner" ;
21+ const RUNNER_SUFFIX_ENV : & str = "RUNNER_SUFFIX" ;
22+ /// How long the loop will sleep.
23+ const LOOP_SLEEP : u64 = 30 ;
1824
1925#[ derive( Parser , Debug ) ]
2026#[ clap( version) ]
@@ -45,8 +51,9 @@ impl RunnerConfig {
4551 RunnerConfig {
4652 servo_ci_scope : servo_ci_scope. to_string ( ) ,
4753 name : format ! (
48- "dresden-hos-builder.{}.{}" ,
49- std:: env:: var( "RUNNER_SUFFIX" ) . unwrap_or_default( ) ,
54+ "{}.{}.{}" ,
55+ BUILDER_NAME ,
56+ std:: env:: var( RUNNER_SUFFIX_ENV ) . unwrap_or_default( ) ,
5057 RUNNER_ID . fetch_add( 1 , Ordering :: Relaxed ) ,
5158 ) ,
5259 runner_group_id : 1 ,
@@ -88,8 +95,9 @@ impl RunnerConfig {
8895 Ok ( RunnerConfig {
8996 servo_ci_scope : servo_ci_scope. to_string ( ) ,
9097 name : format ! (
91- "dresden-hos-runner.{}.{}" ,
92- std:: env:: var( "RUNNER_SUFFIX" ) . unwrap_or_default( ) ,
98+ "{}.{}.{}" ,
99+ RUNNER_NAME ,
100+ std:: env:: var( RUNNER_SUFFIX_ENV ) . unwrap_or_default( ) ,
93101 RUNNER_ID . fetch_add( 1 , Ordering :: Relaxed )
94102 ) ,
95103 runner_group_id : 1 ,
@@ -152,7 +160,7 @@ fn call_github_runner_api(
152160 cmd. arg ( "--raw-field" ) . arg ( format ! ( "labels[]={label}" ) ) ;
153161 }
154162 cmd. arg ( "--raw-field" )
155- // Todo: perhaps have a count here? Or add information if it has a device or not
163+ // Todo: perhaps add information if it has a device or not
156164 . arg ( format ! ( "name={}" , config. name) )
157165 . arg ( "--raw-field" )
158166 . arg ( format ! ( "work_folder={}" , config. work_folder) )
@@ -242,8 +250,18 @@ fn kill_offline_runners(servo_ci_scope: &str) -> Result<(), SpawnRunnerError> {
242250 let filtered_response = runner_response
243251 . runners
244252 . iter ( )
245- . filter ( |runner| runner. name . contains ( "dresden-hos" ) )
246- . filter ( |runner| runner. status . contains ( "offline" ) ) ;
253+ . filter ( |runner| runner. status . contains ( "offline" ) )
254+ . filter ( |runner| {
255+ runner. name . contains ( & format ! (
256+ "{}.{}" ,
257+ RUNNER_NAME ,
258+ std:: env:: var( RUNNER_SUFFIX_ENV ) . unwrap_or_default( )
259+ ) ) || runner. name . contains ( & format ! (
260+ "{}.{}" ,
261+ BUILDER_NAME ,
262+ std:: env:: var( RUNNER_SUFFIX_ENV ) . unwrap_or_default( )
263+ ) )
264+ } ) ;
247265
248266 for i in filtered_response {
249267 info ! (
@@ -297,45 +315,46 @@ fn main() -> anyhow::Result<()> {
297315 let mut running_hos_runners = vec ! [ ] ;
298316 // Todo: implement something to reserve devices for the duration of the docker run child process.
299317 const MAX_HOS_RUNNERS : usize = 1 ;
300- let mut retries = 0 ;
318+ let mut retries_builder = 0 ;
319+ let mut retries_runner = 0 ;
301320
302321 loop {
303322 let exiting = EXITING . load ( Ordering :: Relaxed ) ;
304323 if running_hos_builders. len ( ) < args. concurrent_builders . into ( ) && exiting == 0 {
305324 match spawn_runner ( & RunnerConfig :: new_hos_builder ( & servo_ci_scope) ) {
306325 Ok ( child) => {
307- retries = 0 ;
326+ retries_builder = 0 ;
308327 running_hos_builders. push ( child)
309328 }
310329 Err ( SpawnRunnerError :: GhApiError ( _, message) )
311330 if message. contains ( "gh: Already exists" ) =>
312331 {
313332 // Might happen if containers were not killed properly after a forced exit.
314333 info ! ( "Runner name already taken - Will retry with new name later." ) ;
315- check_and_inc_retries ( & mut retries ) ;
334+ check_and_inc_retries ( & mut retries_builder ) ;
316335 }
317336 Err ( e) => {
318337 error ! ( "Failed to spawn JIT runner: {e:?}" ) ;
319- check_and_inc_retries ( & mut retries ) ;
338+ check_and_inc_retries ( & mut retries_builder ) ;
320339 }
321340 } ;
322341 }
323342 if running_hos_runners. len ( ) < MAX_HOS_RUNNERS && exiting == 0 {
324343 match RunnerConfig :: new_hos_runner ( & servo_ci_scope) . and_then ( |cfg| spawn_runner ( & cfg) ) {
325344 Ok ( child) => {
326- retries = 0 ;
345+ retries_runner = 0 ;
327346 running_hos_runners. push ( child)
328347 }
329348 Err ( SpawnRunnerError :: GhApiError ( _, message) )
330349 if message. contains ( "gh: Already exists" ) =>
331350 {
332351 // Might happen if containers were not killed properly after a forced exit.
333352 info ! ( "Runner name already taken - Will retry with new name later." ) ;
334- check_and_inc_retries ( & mut retries ) ;
353+ check_and_inc_retries ( & mut retries_runner ) ;
335354 }
336355 Err ( e) => {
337356 error ! ( "Failed to spawn JIT runner with HOS device: {e:?}" ) ;
338- check_and_inc_retries ( & mut retries ) ;
357+ check_and_inc_retries ( & mut retries_runner ) ;
339358 }
340359 } ;
341360 }
@@ -392,7 +411,7 @@ fn main() -> anyhow::Result<()> {
392411 thread:: sleep ( Duration :: from_millis ( 500 ) ) ;
393412 }
394413
395- thread:: sleep ( Duration :: from_secs ( 5 ) ) ;
414+ thread:: sleep ( Duration :: from_secs ( LOOP_SLEEP ) ) ;
396415 // Check if some still running images are listed as offline from github api point of view
397416 if let Err ( e) = kill_offline_runners ( & servo_ci_scope) {
398417 error ! ( "Killing offline runners failed with {e:?}" ) ;
0 commit comments