Skip to content

Commit 9faf393

Browse files
committed
Requested changes
Signed-off-by: Narfinger <[email protected]>
1 parent 9396dbb commit 9faf393

File tree

1 file changed

+34
-15
lines changed
  • docker/docker_jit_monitor/src

1 file changed

+34
-15
lines changed

docker/docker_jit_monitor/src/main.rs

Lines changed: 34 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,12 @@ use log::{debug, error, info, warn};
1515
static RUNNER_ID: AtomicU64 = AtomicU64::new(0);
1616
static EXITING: AtomicU32 = AtomicU32::new(0);
1717
const MAX_SPAWN_RETRIES: u32 = 10;
18+
/// The final builder name will be {BUILDER_NAME}.{RUNNER_SUFFIX_ENV}.{RUNNER_ID}, same for RUNNER
19+
const BUILDER_NAME: &str = "dresden-hos-builder";
20+
const RUNNER_NAME: &str = "dresden-hos-runner";
21+
const RUNNER_SUFFIX_ENV: &str = "RUNNER_SUFFIX";
22+
/// How long the loop will sleep.
23+
const LOOP_SLEEP: u64 = 30;
1824

1925
#[derive(Parser, Debug)]
2026
#[clap(version)]
@@ -45,8 +51,9 @@ impl RunnerConfig {
4551
RunnerConfig {
4652
servo_ci_scope: servo_ci_scope.to_string(),
4753
name: format!(
48-
"dresden-hos-builder.{}.{}",
49-
std::env::var("RUNNER_SUFFIX").unwrap_or_default(),
54+
"{}.{}.{}",
55+
BUILDER_NAME,
56+
std::env::var(RUNNER_SUFFIX_ENV).unwrap_or_default(),
5057
RUNNER_ID.fetch_add(1, Ordering::Relaxed),
5158
),
5259
runner_group_id: 1,
@@ -88,8 +95,9 @@ impl RunnerConfig {
8895
Ok(RunnerConfig {
8996
servo_ci_scope: servo_ci_scope.to_string(),
9097
name: format!(
91-
"dresden-hos-runner.{}.{}",
92-
std::env::var("RUNNER_SUFFIX").unwrap_or_default(),
98+
"{}.{}.{}",
99+
RUNNER_NAME,
100+
std::env::var(RUNNER_SUFFIX_ENV).unwrap_or_default(),
93101
RUNNER_ID.fetch_add(1, Ordering::Relaxed)
94102
),
95103
runner_group_id: 1,
@@ -152,7 +160,7 @@ fn call_github_runner_api(
152160
cmd.arg("--raw-field").arg(format!("labels[]={label}"));
153161
}
154162
cmd.arg("--raw-field")
155-
// Todo: perhaps have a count here? Or add information if it has a device or not
163+
// Todo: perhaps add information if it has a device or not
156164
.arg(format!("name={}", config.name))
157165
.arg("--raw-field")
158166
.arg(format!("work_folder={}", config.work_folder))
@@ -242,8 +250,18 @@ fn kill_offline_runners(servo_ci_scope: &str) -> Result<(), SpawnRunnerError> {
242250
let filtered_response = runner_response
243251
.runners
244252
.iter()
245-
.filter(|runner| runner.name.contains("dresden-hos"))
246-
.filter(|runner| runner.status.contains("offline"));
253+
.filter(|runner| runner.status.contains("offline"))
254+
.filter(|runner| {
255+
runner.name.contains(&format!(
256+
"{}.{}",
257+
RUNNER_NAME,
258+
std::env::var(RUNNER_SUFFIX_ENV).unwrap_or_default()
259+
)) || runner.name.contains(&format!(
260+
"{}.{}",
261+
BUILDER_NAME,
262+
std::env::var(RUNNER_SUFFIX_ENV).unwrap_or_default()
263+
))
264+
});
247265

248266
for i in filtered_response {
249267
info!(
@@ -297,45 +315,46 @@ fn main() -> anyhow::Result<()> {
297315
let mut running_hos_runners = vec![];
298316
// Todo: implement something to reserve devices for the duration of the docker run child process.
299317
const MAX_HOS_RUNNERS: usize = 1;
300-
let mut retries = 0;
318+
let mut retries_builder = 0;
319+
let mut retries_runner = 0;
301320

302321
loop {
303322
let exiting = EXITING.load(Ordering::Relaxed);
304323
if running_hos_builders.len() < args.concurrent_builders.into() && exiting == 0 {
305324
match spawn_runner(&RunnerConfig::new_hos_builder(&servo_ci_scope)) {
306325
Ok(child) => {
307-
retries = 0;
326+
retries_builder = 0;
308327
running_hos_builders.push(child)
309328
}
310329
Err(SpawnRunnerError::GhApiError(_, message))
311330
if message.contains("gh: Already exists") =>
312331
{
313332
// Might happen if containers were not killed properly after a forced exit.
314333
info!("Runner name already taken - Will retry with new name later.");
315-
check_and_inc_retries(&mut retries);
334+
check_and_inc_retries(&mut retries_builder);
316335
}
317336
Err(e) => {
318337
error!("Failed to spawn JIT runner: {e:?}");
319-
check_and_inc_retries(&mut retries);
338+
check_and_inc_retries(&mut retries_builder);
320339
}
321340
};
322341
}
323342
if running_hos_runners.len() < MAX_HOS_RUNNERS && exiting == 0 {
324343
match RunnerConfig::new_hos_runner(&servo_ci_scope).and_then(|cfg| spawn_runner(&cfg)) {
325344
Ok(child) => {
326-
retries = 0;
345+
retries_runner = 0;
327346
running_hos_runners.push(child)
328347
}
329348
Err(SpawnRunnerError::GhApiError(_, message))
330349
if message.contains("gh: Already exists") =>
331350
{
332351
// Might happen if containers were not killed properly after a forced exit.
333352
info!("Runner name already taken - Will retry with new name later.");
334-
check_and_inc_retries(&mut retries);
353+
check_and_inc_retries(&mut retries_runner);
335354
}
336355
Err(e) => {
337356
error!("Failed to spawn JIT runner with HOS device: {e:?}");
338-
check_and_inc_retries(&mut retries);
357+
check_and_inc_retries(&mut retries_runner);
339358
}
340359
};
341360
}
@@ -392,7 +411,7 @@ fn main() -> anyhow::Result<()> {
392411
thread::sleep(Duration::from_millis(500));
393412
}
394413

395-
thread::sleep(Duration::from_secs(5));
414+
thread::sleep(Duration::from_secs(LOOP_SLEEP));
396415
// Check if some still running images are listed as offline from github api point of view
397416
if let Err(e) = kill_offline_runners(&servo_ci_scope) {
398417
error!("Killing offline runners failed with {e:?}");

0 commit comments

Comments
 (0)