@@ -15,6 +15,7 @@ use std::future;
15
15
use std:: io;
16
16
use std:: io:: Write ;
17
17
use std:: os:: unix:: process:: ExitStatusExt ;
18
+ use std:: path:: Path ;
18
19
use std:: path:: PathBuf ;
19
20
use std:: process:: Stdio ;
20
21
use std:: sync:: Arc ;
@@ -34,6 +35,7 @@ use hyperactor::ProcId;
34
35
use hyperactor:: attrs:: Attrs ;
35
36
use hyperactor:: channel;
36
37
use hyperactor:: channel:: ChannelAddr ;
38
+ use hyperactor:: channel:: ChannelError ;
37
39
use hyperactor:: channel:: ChannelTransport ;
38
40
use hyperactor:: channel:: Rx ;
39
41
use hyperactor:: channel:: Tx ;
@@ -48,11 +50,14 @@ use hyperactor::host::HostError;
48
50
use hyperactor:: host:: ProcHandle ;
49
51
use hyperactor:: host:: ProcManager ;
50
52
use hyperactor:: host:: TerminateSummary ;
53
+ use hyperactor:: mailbox:: IntoBoxedMailboxSender ;
54
+ use hyperactor:: mailbox:: MailboxClient ;
51
55
use hyperactor:: mailbox:: MailboxServer ;
52
56
use hyperactor:: proc:: Proc ;
53
57
use libc:: c_int;
54
58
use serde:: Deserialize ;
55
59
use serde:: Serialize ;
60
+ use tempfile:: TempDir ;
56
61
use tokio:: process:: Child ;
57
62
use tokio:: process:: Command ;
58
63
use tokio:: sync:: oneshot;
@@ -65,6 +70,8 @@ use crate::v1;
65
70
use crate :: v1:: host_mesh:: mesh_agent:: HostAgentMode ;
66
71
use crate :: v1:: host_mesh:: mesh_agent:: HostMeshAgent ;
67
72
73
+ mod mailbox;
74
+
68
75
declare_attrs ! {
69
76
/// If enabled (default), bootstrap child processes install
70
77
/// `PR_SET_PDEATHSIG(SIGKILL)` so the kernel reaps them if the
@@ -213,6 +220,10 @@ pub enum Bootstrap {
213
220
backend_addr : ChannelAddr ,
214
221
/// The callback address used to indicate successful spawning.
215
222
callback_addr : ChannelAddr ,
223
+ /// Directory for storing proc socket files. Procs place their sockets
224
+ /// in this directory, so that they can be looked up by other procs
225
+ /// for direct transfer.
226
+ socket_dir_path : PathBuf ,
216
227
/// Optional config snapshot (`hyperactor::config::Attrs`)
217
228
/// captured by the parent. If present, the child installs it
218
229
/// as the `Runtime` layer so the parent's effective config
@@ -325,6 +336,7 @@ impl Bootstrap {
325
336
proc_id,
326
337
backend_addr,
327
338
callback_addr,
339
+ socket_dir_path,
328
340
config,
329
341
} => {
330
342
if let Some ( attrs) = config {
@@ -344,15 +356,39 @@ impl Bootstrap {
344
356
eprintln ! ( "(bootstrap) PDEATHSIG disabled via config" ) ;
345
357
}
346
358
347
- let result =
348
- host:: spawn_proc ( proc_id, backend_addr, callback_addr, |proc| async move {
349
- ProcMeshAgent :: boot_v1 ( proc) . await
350
- } )
351
- . await ;
352
- match result {
353
- Ok ( _proc) => halt ( ) . await ,
354
- Err ( e) => e. into ( ) ,
355
- }
359
+ let ( local_addr, name) = ok ! ( proc_id
360
+ . as_direct( )
361
+ . ok_or_else( || anyhow:: anyhow!( "invalid proc id type" ) ) ) ;
362
+ // TODO provide a direct way to construct these
363
+ let serve_addr = format ! ( "unix:{}" , socket_dir_path. join( name) . display( ) ) ;
364
+ let serve_addr = serve_addr. parse ( ) . unwrap ( ) ;
365
+
366
+ // The following is a modified host::spawn_proc to support direct
367
+ // dialing between local procs: 1) we bind each proc to a deterministic
368
+ // address in socket_dir_path; 2) we use LocalProcDialer to dial these
369
+ // addresses for local procs.
370
+ let proc_sender = mailbox:: LocalProcDialer :: new (
371
+ local_addr. clone ( ) ,
372
+ socket_dir_path,
373
+ ok ! ( MailboxClient :: dial( backend_addr) ) ,
374
+ ) ;
375
+
376
+ let proc = Proc :: new ( proc_id. clone ( ) , proc_sender. into_boxed ( ) ) ;
377
+
378
+ let agent_handle = ok ! ( ProcMeshAgent :: boot_v1( proc. clone( ) )
379
+ . await
380
+ . map_err( |e| HostError :: AgentSpawnFailure ( proc_id, e) ) ) ;
381
+
382
+ // Finally serve the proc on the same transport as the backend address,
383
+ // and call back.
384
+ let ( proc_addr, proc_rx) = ok ! ( channel:: serve( serve_addr) ) ;
385
+ proc. clone ( ) . serve ( proc_rx) ;
386
+ ok ! ( ok!( channel:: dial( callback_addr) )
387
+ . send( ( proc_addr, agent_handle. bind:: <ProcMeshAgent >( ) ) )
388
+ . await
389
+ . map_err( ChannelError :: from) ) ;
390
+
391
+ halt ( ) . await
356
392
}
357
393
Bootstrap :: Host {
358
394
addr,
@@ -370,7 +406,7 @@ impl Bootstrap {
370
406
Some ( command) => command,
371
407
None => ok ! ( BootstrapCommand :: current( ) ) ,
372
408
} ;
373
- let manager = BootstrapProcManager :: new ( command) ;
409
+ let manager = BootstrapProcManager :: new ( command) . unwrap ( ) ;
374
410
let ( host, _handle) = ok ! ( Host :: serve( manager, addr) . await ) ;
375
411
let addr = host. addr ( ) . clone ( ) ;
376
412
let host_mesh_agent = ok ! ( host
@@ -1400,6 +1436,11 @@ pub struct BootstrapProcManager {
1400
1436
/// exclusively in the [`Drop`] impl to send `SIGKILL` without
1401
1437
/// needing async context.
1402
1438
pid_table : Arc < std:: sync:: Mutex < HashMap < ProcId , u32 > > > ,
1439
+
1440
+ /// Directory for storing proc socket files. Procs place their sockets
1441
+ /// in this directory, so that they can be looked up by other procs
1442
+ /// for direct transfer.
1443
+ socket_dir : TempDir ,
1403
1444
}
1404
1445
1405
1446
impl Drop for BootstrapProcManager {
@@ -1449,12 +1490,13 @@ impl BootstrapProcManager {
1449
1490
/// This is the general entry point when you want to manage procs
1450
1491
/// backed by a specific binary path (e.g. a bootstrap
1451
1492
/// trampoline).
1452
- pub ( crate ) fn new ( command : BootstrapCommand ) -> Self {
1453
- Self {
1493
+ pub ( crate ) fn new ( command : BootstrapCommand ) -> Result < Self , io :: Error > {
1494
+ Ok ( Self {
1454
1495
command,
1455
1496
children : Arc :: new ( tokio:: sync:: Mutex :: new ( HashMap :: new ( ) ) ) ,
1456
1497
pid_table : Arc :: new ( std:: sync:: Mutex :: new ( HashMap :: new ( ) ) ) ,
1457
- }
1498
+ socket_dir : tempfile:: tempdir ( ) ?,
1499
+ } )
1458
1500
}
1459
1501
1460
1502
/// The bootstrap command used to launch processes.
@@ -1626,6 +1668,7 @@ impl ProcManager for BootstrapProcManager {
1626
1668
proc_id : proc_id. clone ( ) ,
1627
1669
backend_addr,
1628
1670
callback_addr,
1671
+ socket_dir_path : self . socket_dir . path ( ) . to_owned ( ) ,
1629
1672
config : Some ( cfg) ,
1630
1673
} ;
1631
1674
let mut cmd = Command :: new ( & self . command . program ) ;
@@ -2060,6 +2103,7 @@ mod tests {
2060
2103
proc_id : id ! ( foo[ 0 ] ) ,
2061
2104
backend_addr : ChannelAddr :: any ( ChannelTransport :: Tcp ) ,
2062
2105
callback_addr : ChannelAddr :: any ( ChannelTransport :: Unix ) ,
2106
+ socket_dir_path : PathBuf :: from ( "notexist" ) ,
2063
2107
config : None ,
2064
2108
} ,
2065
2109
] ;
@@ -2117,13 +2161,16 @@ mod tests {
2117
2161
attrs[ MESH_TAIL_LOG_LINES ] = 123 ;
2118
2162
attrs[ MESH_BOOTSTRAP_ENABLE_PDEATHSIG ] = false ;
2119
2163
2164
+ let socket_dir = tempfile:: tempdir ( ) . unwrap ( ) ;
2165
+
2120
2166
// Proc case
2121
2167
{
2122
2168
let original = Bootstrap :: Proc {
2123
2169
proc_id : id ! ( foo[ 42 ] ) ,
2124
2170
backend_addr : ChannelAddr :: any ( ChannelTransport :: Unix ) ,
2125
2171
callback_addr : ChannelAddr :: any ( ChannelTransport :: Unix ) ,
2126
2172
config : Some ( attrs. clone ( ) ) ,
2173
+ socket_dir_path : socket_dir. path ( ) . to_owned ( ) ,
2127
2174
} ;
2128
2175
let env_str = original. to_env_safe_string ( ) . expect ( "encode bootstrap" ) ;
2129
2176
let decoded = Bootstrap :: from_env_safe_string ( & env_str) . expect ( "decode bootstrap" ) ;
@@ -2163,14 +2210,13 @@ mod tests {
2163
2210
use std:: process:: Stdio ;
2164
2211
2165
2212
use tokio:: process:: Command ;
2166
- use tokio:: time:: Duration ;
2167
2213
2168
2214
// Manager; program path is irrelevant for this test.
2169
2215
let command = BootstrapCommand {
2170
2216
program : PathBuf :: from ( "/bin/true" ) ,
2171
2217
..Default :: default ( )
2172
2218
} ;
2173
- let manager = BootstrapProcManager :: new ( command) ;
2219
+ let manager = BootstrapProcManager :: new ( command) . unwrap ( ) ;
2174
2220
2175
2221
// Spawn a long-running child process (sleep 30) with
2176
2222
// kill_on_drop(true).
@@ -2550,7 +2596,7 @@ mod tests {
2550
2596
program : PathBuf :: from ( "/bin/true" ) ,
2551
2597
..Default :: default ( )
2552
2598
} ;
2553
- let manager = BootstrapProcManager :: new ( command) ;
2599
+ let manager = BootstrapProcManager :: new ( command) . unwrap ( ) ;
2554
2600
2555
2601
// Spawn a fast-exiting child.
2556
2602
let mut cmd = Command :: new ( "/bin/true" ) ;
@@ -2584,7 +2630,7 @@ mod tests {
2584
2630
program : PathBuf :: from ( "/bin/sleep" ) ,
2585
2631
..Default :: default ( )
2586
2632
} ;
2587
- let manager = BootstrapProcManager :: new ( command) ;
2633
+ let manager = BootstrapProcManager :: new ( command) . unwrap ( ) ;
2588
2634
2589
2635
// Spawn a process that will live long enough to kill.
2590
2636
let mut cmd = Command :: new ( "/bin/sleep" ) ;
@@ -2701,7 +2747,8 @@ mod tests {
2701
2747
let manager = BootstrapProcManager :: new ( BootstrapCommand {
2702
2748
program : PathBuf :: from ( "/bin/true" ) ,
2703
2749
..Default :: default ( )
2704
- } ) ;
2750
+ } )
2751
+ . unwrap ( ) ;
2705
2752
let unknown = ProcId :: Direct ( ChannelAddr :: any ( ChannelTransport :: Unix ) , "nope" . into ( ) ) ;
2706
2753
assert ! ( manager. status( & unknown) . await . is_none( ) ) ;
2707
2754
}
@@ -2711,7 +2758,8 @@ mod tests {
2711
2758
let manager = BootstrapProcManager :: new ( BootstrapCommand {
2712
2759
program : PathBuf :: from ( "/bin/sleep" ) ,
2713
2760
..Default :: default ( )
2714
- } ) ;
2761
+ } )
2762
+ . unwrap ( ) ;
2715
2763
2716
2764
// Long-ish child so it's alive while we "steal" it.
2717
2765
let mut cmd = Command :: new ( "/bin/sleep" ) ;
@@ -2750,7 +2798,8 @@ mod tests {
2750
2798
let manager = BootstrapProcManager :: new ( BootstrapCommand {
2751
2799
program : PathBuf :: from ( "/bin/sleep" ) ,
2752
2800
..Default :: default ( )
2753
- } ) ;
2801
+ } )
2802
+ . unwrap ( ) ;
2754
2803
2755
2804
let mut cmd = Command :: new ( "/bin/sleep" ) ;
2756
2805
cmd. arg ( "5" ) . stdout ( Stdio :: null ( ) ) . stderr ( Stdio :: null ( ) ) ;
@@ -3125,7 +3174,7 @@ mod tests {
3125
3174
. unwrap ( ) ;
3126
3175
let ( instance, _handle) = root. instance ( "client" ) . unwrap ( ) ;
3127
3176
3128
- let mgr = BootstrapProcManager :: new ( BootstrapCommand :: test ( ) ) ;
3177
+ let mgr = BootstrapProcManager :: new ( BootstrapCommand :: test ( ) ) . unwrap ( ) ;
3129
3178
let ( proc_id, backend_addr) = make_proc_id_and_backend_addr ( & instance, "t_term" ) . await ;
3130
3179
let handle = mgr
3131
3180
. spawn ( proc_id. clone ( ) , backend_addr. clone ( ) )
@@ -3181,7 +3230,7 @@ mod tests {
3181
3230
. unwrap ( ) ;
3182
3231
let ( instance, _handle) = root. instance ( "client" ) . unwrap ( ) ;
3183
3232
3184
- let mgr = BootstrapProcManager :: new ( BootstrapCommand :: test ( ) ) ;
3233
+ let mgr = BootstrapProcManager :: new ( BootstrapCommand :: test ( ) ) . unwrap ( ) ;
3185
3234
3186
3235
// Proc identity + host backend channel the child will dial.
3187
3236
let ( proc_id, backend_addr) = make_proc_id_and_backend_addr ( & instance, "t_kill" ) . await ;
@@ -3380,7 +3429,8 @@ mod tests {
3380
3429
let manager = BootstrapProcManager :: new ( BootstrapCommand {
3381
3430
program : std:: path:: PathBuf :: from ( "/bin/true" ) , // unused in this test
3382
3431
..Default :: default ( )
3383
- } ) ;
3432
+ } )
3433
+ . unwrap ( ) ;
3384
3434
manager. spawn_exit_monitor ( proc_id. clone ( ) , handle. clone ( ) ) ;
3385
3435
3386
3436
// Await terminal status and assert on exit code and stderr
0 commit comments