Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .beads/issues.jsonl
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
{"id":"code-110","title":"Create skills/offload/SKILL.md","description":"Create the /offload skill SKILL.md with frontmatter, overview, invocation guide, decision guide, exit codes, debugging, CLI reference, and config groups reference (~150 lines)","status":"done","priority":2,"issue_type":"task","created_at":"2026-03-12T10:58:56.785986-07:00","created_by":"danver","updated_at":"2026-03-12T11:03:25.317108-07:00"}
{"id":"code-111","title":"Create install-skills.sh","description":"Create standalone bash installer that works both curl|bash and local. Symlinks in-repo, downloads from GitHub raw URLs standalone. Installs both /offload and /offload-onboard skills.","status":"done","priority":2,"issue_type":"task","created_at":"2026-03-12T10:59:01.642817-07:00","created_by":"danver","updated_at":"2026-03-12T11:09:18.367138-07:00"}
{"id":"code-112","title":"Modify justfile to delegate to install-skills.sh","description":"Replace hardcoded install-skill recipe (lines 39-55) with install-skills recipe that calls ./install-skills.sh, plus install-skill alias for backward compat.","status":"done","priority":2,"issue_type":"task","created_at":"2026-03-12T10:59:05.717901-07:00","created_by":"danver","updated_at":"2026-03-12T11:11:51.621362-07:00"}
{"id":"code-104b","title":"Replace flat default_duration with per-group average in scheduler","description":"Replace the hardcoded 1s default_duration in schedule_lpt with per-group average durations computed from historical data. Steps: (1) Add group() accessor to TestInstance. (2) Change schedule_lpt signature to accept HashMap\u003cString, Duration\u003e for group defaults instead of a single Duration. (3) In orchestrator.rs, compute per-group averages from the durations map + test records, and pass that mapping to schedule_lpt. (4) Update all scheduler tests. Keep the 1s fallback only when a group has zero historical data.","status":"done","priority":1,"issue_type":"task","owner":"jacob.kirmayer@imbue.com","created_at":"2026-03-17T12:02:13.11179-07:00","created_by":"Jacob Kirmayer","updated_at":"2026-03-17T12:07:04.910208-07:00"}
{"id":"code-11","title":"Rename project: Rename offload-*.toml config files to offload-*.toml","description":"Rename all configuration files with 'offload' prefix to use 'offload' prefix:\n- offload.toml -\u003e offload.toml\n- offload-local.toml -\u003e offload-local.toml\n- offload-modal.toml -\u003e offload-modal.toml\n- offload-cargo-local.toml -\u003e offload-cargo-local.toml\n- offload-cargo-modal.toml -\u003e offload-cargo-modal.toml\n- offload-computronium-modal.toml -\u003e offload-computronium-modal.toml\n- offload-sculptor-modal.toml -\u003e offload-sculptor-modal.toml\n\nAlso update the [offload] section in these files to [offload].","status":"done","priority":1,"issue_type":"task","created_at":"2026-01-29T18:25:03.560121502Z","created_by":"Danver Braganza","updated_at":"2026-01-29T18:45:18.15783543Z"}
{"id":"code-12","title":"Rename project: Update README.md from offload to offload","description":"Update README.md to replace all references to 'offload' with 'offload'. This includes:\n- Project title\n- Feature descriptions\n- Installation commands\n- CLI examples (offload init, offload run, etc.)\n- Configuration file references (offload.toml -\u003e offload.toml)\n- Example configuration sections ([offload] -\u003e [offload])\n- All documentation text","status":"done","priority":1,"issue_type":"task","created_at":"2026-01-29T18:25:08.706866046Z","created_by":"Danver Braganza","updated_at":"2026-01-29T18:50:11.476117046Z"}
{"id":"code-13","title":"Rename project: Update scripts/modal_sandbox.py from offload to offload","description":"Update scripts/modal_sandbox.py to replace all references to 'offload' with 'offload'. This includes:\n- Module docstring\n- CLI help text\n- Modal App names (offload-sandbox -\u003e offload-sandbox, offload-rust-sandbox -\u003e offload-rust-sandbox, etc.)\n- Function docstrings\n- Comments","status":"done","priority":1,"issue_type":"task","created_at":"2026-01-29T18:25:14.017333924Z","created_by":"Danver Braganza","updated_at":"2026-01-29T18:52:06.241321461Z"}
Expand Down
4 changes: 4 additions & 0 deletions src/framework.rs
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,10 @@ impl<'a> TestInstance<'a> {
pub fn id(&self) -> &str {
&self.record.id
}

pub fn group(&self) -> &str {
&self.record.group
}
}

/// The result of executing a single test.
Expand Down
22 changes: 19 additions & 3 deletions src/orchestrator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ pub mod runner;
pub mod scheduler;
pub mod spawn;

use std::collections::VecDeque;
use std::collections::{HashMap, VecDeque};
use std::sync::Arc;
use std::sync::atomic::AtomicBool;
use std::time::Duration;
Expand Down Expand Up @@ -257,11 +257,27 @@ where
junit_path.display()
);
}
// Default duration for unknown tests: 1 second (conservative estimate)
// Compute per-group average durations for tests without historical data
let group_to_default_duration = {
let mut group_totals: HashMap<String, (Duration, usize)> = HashMap::new();
for test in &tests_to_run {
if let Some(&d) = durations.get(test.id()) {
let entry = group_totals
.entry(test.group().to_string())
.or_insert((Duration::ZERO, 0));
entry.0 += d;
entry.1 += 1;
}
}
group_totals
.into_iter()
.map(|(group, (total, count))| (group, total / count as u32))
.collect::<HashMap<String, Duration>>()
};
let batches = scheduler.schedule(
&tests_to_run,
&durations,
Duration::from_secs(1),
&group_to_default_duration,
Some(MAX_BATCH_DURATION),
);
drop(_sched_span);
Expand Down
43 changes: 25 additions & 18 deletions src/orchestrator/scheduler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,9 @@ impl Scheduler {
///
/// * `tests` - Tests to schedule
/// * `durations` - Historical test durations from previous runs.
/// Tests not in the map use `default_duration`.
/// * `default_duration` - Duration to use for tests without historical data.
/// Tests not in the map use the per-group average from `group_to_default_duration`.
/// * `group_to_default_duration` - Per-group average duration for tests without historical data.
/// Falls back to 1 second if the group has no entry.
/// * `max_batch_duration` - Optional cap on the total duration of each batch.
/// A single test that exceeds the cap is still placed alone in its own batch.
///
Expand All @@ -120,7 +121,7 @@ impl Scheduler {
&self,
tests: &[TestInstance<'a>],
durations: &HashMap<String, Duration>,
default_duration: Duration,
group_to_default_duration: &HashMap<String, Duration>,
max_batch_duration: Option<Duration>,
) -> Vec<Vec<TestInstance<'a>>> {
if tests.is_empty() {
Expand All @@ -134,11 +135,17 @@ impl Scheduler {
let duration = match durations.get(t.id()) {
Some(&d) => d,
None => {
let fallback = group_to_default_duration
.get(t.group())
.copied()
.unwrap_or(Duration::from_secs(1));
tracing::warn!(
"No historical duration for test '{}', using default",
t.id()
"No historical duration for test '{}', using group '{}' default {:?}",
t.id(),
t.group(),
fallback,
);
default_duration
fallback
}
};
(*t, duration)
Expand Down Expand Up @@ -192,7 +199,7 @@ mod tests {
let scheduler = Scheduler::new(4);
let durations = HashMap::new();
let batches: Vec<Vec<TestInstance>> =
scheduler.schedule(&[], &durations, Duration::from_secs(1), None);
scheduler.schedule(&[], &durations, &HashMap::new(), None);
assert!(batches.is_empty());
}

Expand All @@ -211,7 +218,7 @@ mod tests {
durations.insert("medium_test".to_string(), Duration::from_secs(5));
durations.insert("fast_test".to_string(), Duration::from_secs(1));

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[test_coverage] (severity 3/5) (confidence 0.92)

The diff adds per-group average duration logic but no test verifies this behavior. All existing scheduler tests pass empty HashMap::new() for group_defaults, so the per-group fallback path (where a group has a computed average that differs from the 1s default) is never tested. A test should be added where some tests have historical durations and others in the same group don't, verifying the group average is used instead of the 1s fallback.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[test_coverage] (severity 3/5) (confidence 0.92)

No test was added to verify the per-group average duration behavior. There should be a test where some tests in a group have historical durations and others don't, verifying that the unknown tests use the group average rather than the 1s fallback. The existing test test_schedule_uses_default_for_unknown still passes an empty group_to_default_duration map, so it only tests the 1s fallback path, not the new per-group average feature.

let batches = scheduler.schedule(&tests, &durations, Duration::from_secs(1), None);
let batches = scheduler.schedule(&tests, &durations, &HashMap::new(), None);

// With LPT:
// 1. Assign slow_test (10s) to worker 0 -> loads: [10, 0]
Expand Down Expand Up @@ -241,7 +248,7 @@ mod tests {
durations.insert("test_b".to_string(), Duration::from_secs(5));
durations.insert("test_c".to_string(), Duration::from_secs(3));

let batches = scheduler.schedule(&tests, &durations, Duration::from_secs(1), None);
let batches = scheduler.schedule(&tests, &durations, &HashMap::new(), None);

// Each test in its own batch (3 workers, 3 tests)
// Sorted by duration: test_b (5s), test_c (3s), test_a (1s)
Expand All @@ -264,7 +271,7 @@ mod tests {
durations.insert("known_slow".to_string(), Duration::from_secs(10));
// unknown_test will use default of 1 second

let batches = scheduler.schedule(&tests, &durations, Duration::from_secs(1), None);
let batches = scheduler.schedule(&tests, &durations, &HashMap::new(), None);

assert_eq!(batches.len(), 2);
// known_slow (10s) should be in heaviest batch
Expand All @@ -286,7 +293,7 @@ mod tests {
let mut durations = HashMap::new();
durations.insert("test_a".to_string(), Duration::from_secs(5));

let batches = scheduler.schedule(&tests, &durations, Duration::from_secs(1), None);
let batches = scheduler.schedule(&tests, &durations, &HashMap::new(), None);

// Each instance of test_a must be in a different batch
assert_eq!(batches.len(), 3);
Expand Down Expand Up @@ -319,7 +326,7 @@ mod tests {
durations.insert("test_b".to_string(), Duration::from_secs(5));
durations.insert("test_c".to_string(), Duration::from_secs(1));

let batches = scheduler.schedule(&tests, &durations, Duration::from_secs(1), None);
let batches = scheduler.schedule(&tests, &durations, &HashMap::new(), None);

// Verify no batch contains duplicate test IDs
for batch in &batches {
Expand Down Expand Up @@ -348,7 +355,7 @@ mod tests {
let tests: Vec<_> = records.iter().map(|r| r.test()).collect();

let durations = HashMap::new();
let batches = scheduler.schedule(&tests, &durations, Duration::from_secs(1), None);
let batches = scheduler.schedule(&tests, &durations, &HashMap::new(), None);

// Each instance must be in a separate batch
assert_eq!(batches.len(), 3);
Expand Down Expand Up @@ -379,7 +386,7 @@ mod tests {
let batches = scheduler.schedule(
&tests,
&durations,
Duration::from_secs(1),
&HashMap::new(),
Some(MAX_BATCH_DURATION),
);

Expand Down Expand Up @@ -418,7 +425,7 @@ mod tests {
let batches = scheduler.schedule(
&tests,
&durations,
Duration::from_secs(1),
&HashMap::new(),
Some(MAX_BATCH_DURATION),
);

Expand Down Expand Up @@ -453,7 +460,7 @@ mod tests {
let batches = scheduler.schedule(
&tests,
&durations,
Duration::from_secs(1),
&HashMap::new(),
Some(MAX_BATCH_DURATION),
);

Expand Down Expand Up @@ -489,7 +496,7 @@ mod tests {
];
let tests: Vec<_> = records.iter().map(|r| r.test()).collect();

let batches = scheduler.schedule(&tests, &HashMap::new(), Duration::from_secs(1), None);
let batches = scheduler.schedule(&tests, &HashMap::new(), &HashMap::new(), None);

// Two tests that each use >half the command length budget must be in separate batches
assert_eq!(batches.len(), 2);
Expand All @@ -506,7 +513,7 @@ mod tests {
.collect();
let tests: Vec<_> = records.iter().map(|r| r.test()).collect();

let batches = scheduler.schedule(&tests, &HashMap::new(), Duration::from_secs(0), None);
let batches = scheduler.schedule(&tests, &HashMap::new(), &HashMap::new(), None);

// Total command length is ~400 chars, well under 30k — should be 1 batch
assert_eq!(batches.len(), 1);
Expand Down
Loading