From 4679c1cae1c8b4431c75b3b08f8472ea6ad1c143 Mon Sep 17 00:00:00 2001 From: Marius Eriksen Date: Fri, 3 Oct 2025 16:09:55 -0700 Subject: [PATCH] [hyperactor] mesh: make it work with cargo on a mac 1) PR_DEATHSIG is a Linux feature; hide it behind a feature flag 2) heuristically find cargo-built resources in place of buck Note that (2) is not a true solution: Cargo does not take these binaries as a dependence on the tests, and thus will not automatically rebuild them. Cargo apparently will do this for integration tests, however; we should consider converting the relevant tests to integration tests. Differential Revision: [D83882828](https://our.internmc.facebook.com/intern/diff/D83882828/) **NOTE FOR REVIEWERS**: This PR has internal Meta-specific changes or comments, please review them on [Phabricator](https://our.internmc.facebook.com/intern/diff/D83882828/)! [ghstack-poisoned] --- hyperactor_mesh/src/actor_mesh.rs | 6 +-- hyperactor_mesh/src/alloc.rs | 5 ++- hyperactor_mesh/src/alloc/process.rs | 4 +- hyperactor_mesh/src/alloc/remoteprocess.rs | 38 +++++++++-------- hyperactor_mesh/src/bootstrap.rs | 23 +++++----- hyperactor_mesh/src/lib.rs | 1 + hyperactor_mesh/src/testresource.rs | 49 ++++++++++++++++++++++ hyperactor_mesh/src/v1/host_mesh.rs | 2 +- hyperactor_mesh/src/v1/testing.rs | 18 ++++---- 9 files changed, 102 insertions(+), 44 deletions(-) create mode 100644 hyperactor_mesh/src/testresource.rs diff --git a/hyperactor_mesh/src/actor_mesh.rs b/hyperactor_mesh/src/actor_mesh.rs index a8710f70b..a7ea353cb 100644 --- a/hyperactor_mesh/src/actor_mesh.rs +++ b/hyperactor_mesh/src/actor_mesh.rs @@ -1321,9 +1321,9 @@ mod tests { use crate::alloc::process::ProcessAllocator; fn process_allocator() -> ProcessAllocator { - ProcessAllocator::new(Command::new( - buck_resources::get("monarch/hyperactor_mesh/bootstrap").unwrap(), - )) + ProcessAllocator::new(Command::new(crate::testresource::get( + "monarch/hyperactor_mesh/bootstrap", + ))) } #[cfg(fbcode_build)] // we use an external binary, produced by buck diff --git a/hyperactor_mesh/src/alloc.rs b/hyperactor_mesh/src/alloc.rs index e7dd35092..4d8955fcd 100644 --- a/hyperactor_mesh/src/alloc.rs +++ b/hyperactor_mesh/src/alloc.rs @@ -710,8 +710,9 @@ pub(crate) mod testing { Duration::from_secs(1), ); - let command = - Command::new(buck_resources::get("monarch/hyperactor_mesh/bootstrap").unwrap()); + let command = Command::new(crate::testresource::get( + "monarch/hyperactor_mesh/bootstrap", + )); let mut allocator = ProcessAllocator::new(command); let mut alloc = allocator .allocate(AllocSpec { diff --git a/hyperactor_mesh/src/alloc/process.rs b/hyperactor_mesh/src/alloc/process.rs index 2f0d1373f..7f62a8d18 100644 --- a/hyperactor_mesh/src/alloc/process.rs +++ b/hyperactor_mesh/src/alloc/process.rs @@ -601,12 +601,12 @@ mod tests { #[cfg(fbcode_build)] // we use an external binary, produced by buck crate::alloc_test_suite!(ProcessAllocator::new(Command::new( - buck_resources::get("monarch/hyperactor_mesh/bootstrap").unwrap() + crate::testresource::get("monarch/hyperactor_mesh/bootstrap") ))); #[tokio::test] async fn test_sigterm_on_group_fail() { - let bootstrap_binary = buck_resources::get("monarch/hyperactor_mesh/bootstrap").unwrap(); + let bootstrap_binary = crate::testresource::get("monarch/hyperactor_mesh/bootstrap"); let mut allocator = ProcessAllocator::new(Command::new(bootstrap_binary)); let mut alloc = allocator diff --git a/hyperactor_mesh/src/alloc/remoteprocess.rs b/hyperactor_mesh/src/alloc/remoteprocess.rs index 5b60f1050..b5c8e0ab0 100644 --- a/hyperactor_mesh/src/alloc/remoteprocess.rs +++ b/hyperactor_mesh/src/alloc/remoteprocess.rs @@ -2048,13 +2048,15 @@ mod test_alloc { let task1_allocator = RemoteProcessAllocator::new(); let task1_addr = ChannelAddr::any(ChannelTransport::Unix); let task1_addr_string = task1_addr.to_string(); - let task1_cmd = - Command::new(buck_resources::get("monarch/hyperactor_mesh/bootstrap").unwrap()); + let task1_cmd = Command::new(crate::testresource::get( + "monarch/hyperactor_mesh/bootstrap", + )); let task2_allocator = RemoteProcessAllocator::new(); let task2_addr = ChannelAddr::any(ChannelTransport::Unix); let task2_addr_string = task2_addr.to_string(); - let task2_cmd = - Command::new(buck_resources::get("monarch/hyperactor_mesh/bootstrap").unwrap()); + let task2_cmd = Command::new(crate::testresource::get( + "monarch/hyperactor_mesh/bootstrap", + )); let task1_allocator_copy = task1_allocator.clone(); let task1_allocator_handle = tokio::spawn(async move { tracing::info!("spawning task1"); @@ -2175,13 +2177,15 @@ mod test_alloc { let task1_allocator = RemoteProcessAllocator::new(); let task1_addr = ChannelAddr::any(ChannelTransport::Unix); let task1_addr_string = task1_addr.to_string(); - let task1_cmd = - Command::new(buck_resources::get("monarch/hyperactor_mesh/bootstrap").unwrap()); + let task1_cmd = Command::new(crate::testresource::get( + "monarch/hyperactor_mesh/bootstrap", + )); let task2_allocator = RemoteProcessAllocator::new(); let task2_addr = ChannelAddr::any(ChannelTransport::Unix); let task2_addr_string = task2_addr.to_string(); - let task2_cmd = - Command::new(buck_resources::get("monarch/hyperactor_mesh/bootstrap").unwrap()); + let task2_cmd = Command::new(crate::testresource::get( + "monarch/hyperactor_mesh/bootstrap", + )); let task1_allocator_copy = task1_allocator.clone(); let task1_allocator_handle = tokio::spawn(async move { tracing::info!("spawning task1"); @@ -2303,8 +2307,9 @@ mod test_alloc { let task1_allocator = RemoteProcessAllocator::new(); let task1_addr = ChannelAddr::any(ChannelTransport::Unix); let task1_addr_string = task1_addr.to_string(); - let task1_cmd = - Command::new(buck_resources::get("monarch/hyperactor_mesh/bootstrap").unwrap()); + let task1_cmd = Command::new(crate::testresource::get( + "monarch/hyperactor_mesh/bootstrap", + )); let task2_allocator = RemoteProcessAllocator::new(); let task2_addr = ChannelAddr::any(ChannelTransport::Unix); let task2_addr_string = task2_addr.to_string(); @@ -2427,10 +2432,9 @@ mod test_alloc { let remote_process_allocators = addresses .iter() .map(|addr| { - Command::new( - buck_resources::get("monarch/hyperactor_mesh/remote_process_allocator") - .unwrap(), - ) + Command::new(crate::testresource::get( + "monarch/hyperactor_mesh/remote_process_allocator", + )) .env("RUST_LOG", "info") .arg(format!("--addr={addr}")) .stdout(std::process::Stdio::piped()) @@ -2442,9 +2446,9 @@ mod test_alloc { let done_allocating_addr = ChannelAddr::any(ChannelTransport::Unix); let (done_allocating_addr, mut done_allocating_rx) = channel::serve::<()>(done_allocating_addr).unwrap(); - let mut remote_process_alloc = Command::new( - buck_resources::get("monarch/hyperactor_mesh/remote_process_alloc").unwrap(), - ) + let mut remote_process_alloc = Command::new(crate::testresource::get( + "monarch/hyperactor_mesh/remote_process_alloc", + )) .arg(format!("--done-allocating-addr={}", done_allocating_addr)) .arg(format!("--addresses={}", addresses.join(","))) .arg(format!("--num-proc-meshes={}", num_proc_meshes)) diff --git a/hyperactor_mesh/src/bootstrap.rs b/hyperactor_mesh/src/bootstrap.rs index e067e3690..2eddfbd0c 100644 --- a/hyperactor_mesh/src/bootstrap.rs +++ b/hyperactor_mesh/src/bootstrap.rs @@ -396,11 +396,14 @@ impl Bootstrap { /// Install "kill me if parent dies" and close the race window. pub fn install_pdeathsig_kill() -> io::Result<()> { - // SAFETY: Calling into libc; does not dereference memory, just - // asks the kernel to deliver SIGKILL on parent death. - let rc = unsafe { libc::prctl(libc::PR_SET_PDEATHSIG, libc::SIGKILL as c_int) }; - if rc != 0 { - return Err(io::Error::last_os_error()); + #[cfg(target_os = "linux")] + { + // SAFETY: Calling into libc; does not dereference memory, just + // asks the kernel to deliver SIGKILL on parent death. + let rc = unsafe { libc::prctl(libc::PR_SET_PDEATHSIG, libc::SIGKILL as c_int) }; + if rc != 0 { + return Err(io::Error::last_os_error()); + } } // Race-close: if the parent died between our exec and prctl(), // we won't get a signal, so detect that and exit now. @@ -1343,7 +1346,7 @@ impl BootstrapCommand { #[cfg(test)] pub(crate) fn test() -> Self { Self { - program: buck_resources::get("monarch/hyperactor_mesh/bootstrap").unwrap(), + program: crate::testresource::get("monarch/hyperactor_mesh/bootstrap"), arg0: None, args: vec![], env: HashMap::new(), @@ -2548,7 +2551,7 @@ mod tests { let manager = BootstrapProcManager::new(command); // Spawn a fast-exiting child. - let mut cmd = Command::new("/bin/true"); + let mut cmd = Command::new("true"); cmd.stdout(Stdio::null()).stderr(Stdio::null()); let child = cmd.spawn().expect("spawn true"); @@ -3225,9 +3228,9 @@ mod tests { let (instance, _handle) = proc.instance("client").unwrap(); // Configure a ProcessAllocator with the bootstrap binary. - let mut allocator = ProcessAllocator::new(Command::new( - buck_resources::get("monarch/hyperactor_mesh/bootstrap").unwrap(), - )); + let mut allocator = ProcessAllocator::new(Command::new(crate::testresource::get( + "monarch/hyperactor_mesh/bootstrap", + ))); // Request a new allocation of procs from the ProcessAllocator. let alloc = allocator .allocate(AllocSpec { diff --git a/hyperactor_mesh/src/lib.rs b/hyperactor_mesh/src/lib.rs index 20cb1f5f5..cc21cbb4c 100644 --- a/hyperactor_mesh/src/lib.rs +++ b/hyperactor_mesh/src/lib.rs @@ -32,6 +32,7 @@ mod router; pub mod shared_cell; pub mod shortuuid; pub mod test_utils; +mod testresource; pub mod v1; pub use actor_mesh::RootActorMesh; diff --git a/hyperactor_mesh/src/testresource.rs b/hyperactor_mesh/src/testresource.rs new file mode 100644 index 000000000..edffc23f2 --- /dev/null +++ b/hyperactor_mesh/src/testresource.rs @@ -0,0 +1,49 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#![cfg(test)] + +use std::path::PathBuf; + +/// Fetch the named (BUCK) named resource, heuristically falling back on +/// the cargo-built path when possible. Beware! This is not actually a +/// true cargo dependency, so the binaries have to be built independently. +/// +/// We should convert these tests to integration tests, so that cargo can +/// also manage the binaries. +pub fn get(name: S) -> PathBuf +where + S: AsRef, +{ + let name = name.as_ref().to_owned(); + // TODO: actually check if we're running in Buck context or not. + if let Ok(path) = buck_resources::get(name.clone()) { + return path; + } + + assert!( + name.starts_with("monarch/hyperactor_mesh/"), + "invalid resource {}: must start with \"monarch/hyperactor_mesh/\"", + name + ); + + let path: PathBuf = name + .replace( + "monarch/hyperactor_mesh/", + "../target/debug/hyperactor_mesh_test_", + ) + .into(); + + assert!( + path.exists(), + "no cargo-built resource at {}", + path.display() + ); + + path +} diff --git a/hyperactor_mesh/src/v1/host_mesh.rs b/hyperactor_mesh/src/v1/host_mesh.rs index 30dad3a74..554fada9c 100644 --- a/hyperactor_mesh/src/v1/host_mesh.rs +++ b/hyperactor_mesh/src/v1/host_mesh.rs @@ -831,7 +831,7 @@ mod tests { let config = hyperactor::config::global::lock(); let _guard = config.override_key(crate::bootstrap::MESH_BOOTSTRAP_ENABLE_PDEATHSIG, false); - let program = buck_resources::get("monarch/hyperactor_mesh/bootstrap").unwrap(); + let program = crate::testresource::get("monarch/hyperactor_mesh/bootstrap"); let hosts = vec![free_localhost_addr(), free_localhost_addr()]; diff --git a/hyperactor_mesh/src/v1/testing.rs b/hyperactor_mesh/src/v1/testing.rs index 440470404..5994a5a1c 100644 --- a/hyperactor_mesh/src/v1/testing.rs +++ b/hyperactor_mesh/src/v1/testing.rs @@ -63,9 +63,9 @@ pub async fn proc_meshes(cx: &impl context::Actor, extent: Extent) -> Vec Vec> { vec![ // Box::new(LocalAllocator.allocate(spec.clone()).await.unwrap()), Box::new( - ProcessAllocator::new(Command::new( - buck_resources::get("monarch/hyperactor_mesh/bootstrap").unwrap(), - )) + ProcessAllocator::new(Command::new(crate::testresource::get( + "monarch/hyperactor_mesh/bootstrap", + ))) .allocate(spec.clone()) .await .unwrap(), @@ -130,9 +130,9 @@ pub async fn local_proc_mesh(extent: Extent) -> (ProcMesh, Instance<()>, DialMai /// Create a host mesh using multiple processes running on the test machine. pub async fn host_mesh(extent: Extent) -> HostMesh { - let mut allocator = ProcessAllocator::new(Command::new( - buck_resources::get("monarch/hyperactor_mesh/bootstrap").unwrap(), - )); + let mut allocator = ProcessAllocator::new(Command::new(crate::testresource::get( + "monarch/hyperactor_mesh/bootstrap", + ))); let alloc = allocator .allocate(AllocSpec { extent,