Skip to content

Commit 53b8a2e

Browse files
pzhan9meta-codesync[bot]
authored andcommitted
Add events for Alloc creation (#1915)
Summary: Pull Request resolved: #1915 As title. Reviewed By: mariusae Differential Revision: D87274098 fbshipit-source-id: bead529c8b5c04c40a3e05f610b93f8098b949f0
1 parent 9130b82 commit 53b8a2e

File tree

2 files changed

+45
-4
lines changed

2 files changed

+45
-4
lines changed

hyperactor_mesh/src/alloc/local.rs

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,13 @@ impl Allocator for LocalAllocator {
5353
type Alloc = LocalAlloc;
5454

5555
async fn allocate(&mut self, spec: AllocSpec) -> Result<Self::Alloc, AllocatorError> {
56-
Ok(LocalAlloc::new(spec))
56+
let alloc = LocalAlloc::new(spec);
57+
tracing::info!(
58+
name = "LocalAllocStatus",
59+
alloc_name = %alloc.world_id(),
60+
status = "Allocated",
61+
);
62+
Ok(alloc)
5763
}
5864
}
5965

@@ -259,12 +265,23 @@ impl Alloc for LocalAlloc {
259265
}
260266

261267
async fn stop(&mut self) -> Result<(), AllocatorError> {
268+
tracing::info!(
269+
name = "LocalAllocStatus",
270+
alloc_name = %self.world_id(),
271+
status = "Stopping",
272+
);
262273
for rank in 0..self.size() {
263274
self.todo_tx
264275
.send(Action::Stop(rank, ProcStopReason::Stopped))
265276
.unwrap();
266277
}
267278
self.todo_tx.send(Action::Stopped).unwrap();
279+
tracing::info!(
280+
name = "LocalAllocStatus",
281+
alloc_name = %self.world_id(),
282+
status = "Stop::Sent",
283+
"Stop was sent to local procs; check their log to determine if it exited."
284+
);
268285
Ok(())
269286
}
270287

@@ -275,7 +292,10 @@ impl Alloc for LocalAlloc {
275292

276293
impl Drop for LocalAlloc {
277294
fn drop(&mut self) {
278-
tracing::debug!(
295+
tracing::info!(
296+
name = "LocalAllocStatus",
297+
alloc_name = %self.world_id(),
298+
status = "Dropped",
279299
"dropping LocalAlloc of name: {}, world id: {}",
280300
self.name,
281301
self.world_id

hyperactor_mesh/src/alloc/process.rs

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,9 +99,16 @@ impl Allocator for ProcessAllocator {
9999
}
100100

101101
let name = ShortUuid::generate();
102+
let world_id = WorldId(name.to_string());
103+
tracing::info!(
104+
name = "ProcessAllocStatus",
105+
alloc_name = %world_id,
106+
addr = %bootstrap_addr,
107+
status = "Allocated",
108+
);
102109
Ok(ProcessAlloc {
103110
name: name.clone(),
104-
world_id: WorldId(name.to_string()),
111+
world_id,
105112
spec: spec.clone(),
106113
bootstrap_addr,
107114
rx,
@@ -666,6 +673,11 @@ impl Alloc for ProcessAlloc {
666673
}
667674

668675
async fn stop(&mut self) -> Result<(), AllocatorError> {
676+
tracing::info!(
677+
name = "ProcessAllocStatus",
678+
alloc_name = %self.world_id(),
679+
status = "Stopping",
680+
);
669681
// We rely on the teardown here, and that the process should
670682
// exit on its own. We should have a hard timeout here as well,
671683
// so that we never rely on the system functioning correctly
@@ -676,13 +688,22 @@ impl Alloc for ProcessAlloc {
676688
}
677689

678690
self.running = false;
691+
tracing::info!(
692+
name = "ProcessAllocStatus",
693+
alloc_name = %self.world_id(),
694+
status = "Stop::Sent",
695+
"StopAndExit was sent to allocators; check their logs for the stop progress."
696+
);
679697
Ok(())
680698
}
681699
}
682700

683701
impl Drop for ProcessAlloc {
684702
fn drop(&mut self) {
685-
tracing::debug!(
703+
tracing::info!(
704+
name = "ProcessAllocStatus",
705+
alloc_name = %self.world_id(),
706+
status = "Dropped",
686707
"dropping ProcessAlloc of name: {}, world id: {}",
687708
self.name,
688709
self.world_id

0 commit comments

Comments
 (0)