Skip to content

Commit 90e34bc

Browse files
committed
Requested changes
Signed-off-by: Narfinger <[email protected]>
1 parent 9396dbb commit 90e34bc

File tree

1 file changed

+37
-16
lines changed
  • docker/docker_jit_monitor/src

1 file changed

+37
-16
lines changed

docker/docker_jit_monitor/src/main.rs

Lines changed: 37 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,12 @@ use log::{debug, error, info, warn};
1515
static RUNNER_ID: AtomicU64 = AtomicU64::new(0);
1616
static EXITING: AtomicU32 = AtomicU32::new(0);
1717
const MAX_SPAWN_RETRIES: u32 = 10;
18+
/// The final builder name will be {BUILDER_NAME}.{RUNNER_SUFFIX_ENV}.{RUNNER_ID}, same for RUNNER
19+
const BUILDER_NAME: &str = "dresden-hos-builder";
20+
const RUNNER_NAME: &str = "dresden-hos-runner";
21+
const RUNNER_SUFFIX_ENV: &str = "RUNNER_SUFFIX";
22+
/// Timeout for when we start the checking loop again not spawn the api.
23+
const LOOP_TIMEOUT: u64 = 30;
1824

1925
#[derive(Parser, Debug)]
2026
#[clap(version)]
@@ -45,8 +51,9 @@ impl RunnerConfig {
4551
RunnerConfig {
4652
servo_ci_scope: servo_ci_scope.to_string(),
4753
name: format!(
48-
"dresden-hos-builder.{}.{}",
49-
std::env::var("RUNNER_SUFFIX").unwrap_or_default(),
54+
"{}.{}.{}",
55+
BUILDER_NAME,
56+
std::env::var(RUNNER_SUFFIX_ENV).unwrap_or_default(),
5057
RUNNER_ID.fetch_add(1, Ordering::Relaxed),
5158
),
5259
runner_group_id: 1,
@@ -88,8 +95,9 @@ impl RunnerConfig {
8895
Ok(RunnerConfig {
8996
servo_ci_scope: servo_ci_scope.to_string(),
9097
name: format!(
91-
"dresden-hos-runner.{}.{}",
92-
std::env::var("RUNNER_SUFFIX").unwrap_or_default(),
98+
"{}.{}.{}",
99+
RUNNER_NAME,
100+
std::env::var(RUNNER_SUFFIX_ENV).unwrap_or_default(),
93101
RUNNER_ID.fetch_add(1, Ordering::Relaxed)
94102
),
95103
runner_group_id: 1,
@@ -152,7 +160,7 @@ fn call_github_runner_api(
152160
cmd.arg("--raw-field").arg(format!("labels[]={label}"));
153161
}
154162
cmd.arg("--raw-field")
155-
// Todo: perhaps have a count here? Or add information if it has a device or not
163+
// Todo: perhaps add information if it has a device or not
156164
.arg(format!("name={}", config.name))
157165
.arg("--raw-field")
158166
.arg(format!("work_folder={}", config.work_folder))
@@ -242,8 +250,20 @@ fn kill_offline_runners(servo_ci_scope: &str) -> Result<(), SpawnRunnerError> {
242250
let filtered_response = runner_response
243251
.runners
244252
.iter()
245-
.filter(|runner| runner.name.contains("dresden-hos"))
246-
.filter(|runner| runner.status.contains("offline"));
253+
.filter(|runner| runner.status.contains("offline"))
254+
.filter(|runner| {
255+
runner.name.contains(
256+
format!(
257+
"{}.{}",
258+
RUNNER_NAME,
259+
std::env::var(RUNNER_SUFFIX_ENV).unwrap_or_default()
260+
) || runner.name.contains(format!(
261+
"{}.{}",
262+
BUILDER_NAME,
263+
std::env::var(RUNNER_SUFFIX_ENV).unwrap_or_default()
264+
)),
265+
)
266+
});
247267

248268
for i in filtered_response {
249269
info!(
@@ -297,45 +317,46 @@ fn main() -> anyhow::Result<()> {
297317
let mut running_hos_runners = vec![];
298318
// Todo: implement something to reserve devices for the duration of the docker run child process.
299319
const MAX_HOS_RUNNERS: usize = 1;
300-
let mut retries = 0;
320+
let mut retries_builder = 0;
321+
let mut retries_runner = 0;
301322

302323
loop {
303324
let exiting = EXITING.load(Ordering::Relaxed);
304325
if running_hos_builders.len() < args.concurrent_builders.into() && exiting == 0 {
305326
match spawn_runner(&RunnerConfig::new_hos_builder(&servo_ci_scope)) {
306327
Ok(child) => {
307-
retries = 0;
328+
retries_builder = 0;
308329
running_hos_builders.push(child)
309330
}
310331
Err(SpawnRunnerError::GhApiError(_, message))
311332
if message.contains("gh: Already exists") =>
312333
{
313334
// Might happen if containers were not killed properly after a forced exit.
314-
info!("Runner name already taken - Will retry with new name later.");
315-
check_and_inc_retries(&mut retries);
335+
info!("Runner name already taken - Will retry with new name later.");oh
336+
check_and_inc_retries(&mut retries_builder);
316337
}
317338
Err(e) => {
318339
error!("Failed to spawn JIT runner: {e:?}");
319-
check_and_inc_retries(&mut retries);
340+
check_and_inc_retries(&mut retries_builder);
320341
}
321342
};
322343
}
323344
if running_hos_runners.len() < MAX_HOS_RUNNERS && exiting == 0 {
324345
match RunnerConfig::new_hos_runner(&servo_ci_scope).and_then(|cfg| spawn_runner(&cfg)) {
325346
Ok(child) => {
326-
retries = 0;
347+
retries_runner = 0;
327348
running_hos_runners.push(child)
328349
}
329350
Err(SpawnRunnerError::GhApiError(_, message))
330351
if message.contains("gh: Already exists") =>
331352
{
332353
// Might happen if containers were not killed properly after a forced exit.
333354
info!("Runner name already taken - Will retry with new name later.");
334-
check_and_inc_retries(&mut retries);
355+
check_and_inc_retries(&mut retries_runner);
335356
}
336357
Err(e) => {
337358
error!("Failed to spawn JIT runner with HOS device: {e:?}");
338-
check_and_inc_retries(&mut retries);
359+
check_and_inc_retries(&mut retries_runner);
339360
}
340361
};
341362
}
@@ -392,7 +413,7 @@ fn main() -> anyhow::Result<()> {
392413
thread::sleep(Duration::from_millis(500));
393414
}
394415

395-
thread::sleep(Duration::from_secs(5));
416+
thread::sleep(Duration::from_secs(LOOP_TIMEOUT));
396417
// Check if some still running images are listed as offline from github api point of view
397418
if let Err(e) = kill_offline_runners(&servo_ci_scope) {
398419
error!("Killing offline runners failed with {e:?}");

0 commit comments

Comments
 (0)