Skip to content

Commit 5d8a48c

Browse files
committed
Fork actions must not allocate
The `execve` action allocated the arrays in the forked child process. However, in a multi-threaded program we might have forked while another thread had the malloc lock. In that case, the child would wait forever because it inherited the locked mutex but not the thread that would unlock it. e.g. #0 futex_wait (private=0, expected=2, futex_word=0xffff9509cb10 <main_arena>) at ../sysdeps/nptl/futex-internal.h:146 #1 __GI___lll_lock_wait_private (futex=futex@entry=0xffff9509cb10 <main_arena>) at ./nptl/lowlevellock.c:34 #2 0x0000ffff94f8e780 in __libc_calloc (n=<optimized out>, elem_size=<optimized out>) at ./malloc/malloc.c:3650 #3 0x0000aaaac67cfa68 in make_string_array (errors=errors@entry=37, v_array=281472912006504) at fork_action.c:47 #4 0x0000aaaac67cfaf4 in action_execve (errors=37, v_config=281472912003024) at fork_action.c:61 #5 0x0000aaaac67cf93c in eio_unix_run_fork_actions (errors=errors@entry=37, v_actions=281472912002960) at fork_action.c:19
1 parent 95a58dc commit 5d8a48c

File tree

4 files changed

+88
-27
lines changed

4 files changed

+88
-27
lines changed

lib_eio/unix/fork_action.c

Lines changed: 57 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
1+
/* Note: fork actions MUST NOT allocate (either on the OCaml heap or with C malloc).
2+
* This is because e.g. we might have forked while another thread in the parent had a lock.
3+
* In the child, we inherit a copy of the locked mutex, but no corresponding thread to
4+
* release it.
5+
*/
6+
17
#include <stdlib.h>
28
#include <unistd.h>
39
#include <fcntl.h>
@@ -6,6 +12,9 @@
612

713
#include <caml/mlvalues.h>
814
#include <caml/unixsupport.h>
15+
#include <caml/memory.h>
16+
#include <caml/custom.h>
17+
#include <caml/fail.h>
918

1019
#include "fork_action.h"
1120

@@ -42,24 +51,61 @@ void eio_unix_fork_error(int fd, char *fn, char *buf) {
4251
try_write_all(fd, buf);
4352
}
4453

45-
static char **make_string_array(int errors, value v_array) {
46-
int n = Wosize_val(v_array);
47-
char **c = calloc(sizeof(char *), (n + 1));
48-
if (!c) {
49-
eio_unix_fork_error(errors, "make_string_array", "out of memory");
50-
_exit(1);
51-
}
54+
#define String_array_val(v) *((char ***)Data_custom_val(v))
55+
56+
static void finalize_string_array(value v) {
57+
free(String_array_val(v));
58+
String_array_val(v) = NULL;
59+
}
60+
61+
static struct custom_operations string_array_ops = {
62+
"string.array",
63+
finalize_string_array,
64+
custom_compare_default,
65+
custom_hash_default,
66+
custom_serialize_default,
67+
custom_deserialize_default,
68+
custom_compare_ext_default,
69+
custom_fixed_length_default
70+
};
71+
72+
CAMLprim value eio_unix_make_string_array(value v_len) {
73+
CAMLparam0();
74+
CAMLlocal1(v_str_array);
75+
int n = Int_val(v_len);
76+
uintnat total;
77+
78+
if (caml_umul_overflow(sizeof(char *), n + 1, &total))
79+
caml_raise_out_of_memory();
80+
81+
v_str_array = caml_alloc_custom_mem(&string_array_ops, sizeof(char ***), total);
82+
83+
char **c = calloc(sizeof(char *), n + 1);
84+
String_array_val(v_str_array) = c;
85+
if (!c)
86+
caml_raise_out_of_memory();
87+
88+
CAMLreturn(v_str_array);
89+
}
90+
91+
static void fill_string_array(char **c, value v_ocaml_array) {
92+
int n = Wosize_val(v_ocaml_array);
93+
5294
for (int i = 0; i < n; i++) {
53-
c[i] = (char *) String_val(Field(v_array, i));
95+
c[i] = (char *) String_val(Field(v_ocaml_array, i));
5496
}
97+
5598
c[n] = NULL;
56-
return c;
5799
}
58100

59101
static void action_execve(int errors, value v_config) {
60102
value v_exe = Field(v_config, 1);
61-
char **argv = make_string_array(errors, Field(v_config, 2));
62-
char **envp = make_string_array(errors, Field(v_config, 3));
103+
char **argv = String_array_val(Field(v_config, 2));
104+
char **envp = String_array_val(Field(v_config, 4));
105+
106+
fill_string_array(argv, Field(v_config, 3));
107+
fill_string_array(envp, Field(v_config, 5));
108+
63109
execve(String_val(v_exe), argv, envp);
64110
eio_unix_fork_error(errors, "execve", strerror(errno));
65111
_exit(1);

lib_eio/unix/fork_action.ml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,14 @@ let rec with_actions actions fn =
1717
with_actions xs @@ fun c_actions ->
1818
fn (c_action :: c_actions)
1919

20+
type c_array
21+
external make_string_array : int -> c_array = "eio_unix_make_string_array"
2022
external action_execve : unit -> fork_fn = "eio_unix_fork_execve"
2123
let action_execve = action_execve ()
22-
let execve path ~argv ~env = { run = fun k -> k (Obj.repr (action_execve, path, argv, env)) }
24+
let execve path ~argv ~env =
25+
let argv_c_array = make_string_array (Array.length argv) in
26+
let env_c_array = make_string_array (Array.length env) in
27+
{ run = fun k -> k (Obj.repr (action_execve, path, argv_c_array, argv, env_c_array, env)) }
2328

2429
external action_chdir : unit -> fork_fn = "eio_unix_fork_chdir"
2530
let action_chdir = action_chdir ()

lib_eio/unix/include/fork_action.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
#include <caml/mlvalues.h>
22
#include <caml/alloc.h>
33

4-
/* A function that runs in the forked child process. It must not run any OCaml code or invoke the GC.
4+
/* A function that runs in the forked child process.
5+
* It must not run any OCaml code, invoke the GC, or even call [malloc].
56
* If the action fails then it writes an error message to the FD [errors] and calls [_exit].
67
* v_args is the c_action tuple (where field 0 is the function itself).
78
*/

stress/stress_proc.ml

Lines changed: 23 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,35 @@
11
open Eio.Std
22

3+
let n_domains = 4
34
let n_rounds = 100
4-
let n_procs_per_round = 100
5+
let n_procs_per_round_per_domain = 100 / n_domains
56

6-
let main mgr =
7+
let run_in_domain mgr =
78
let echo n = Eio.Process.parse_out mgr Eio.Buf_read.line ["sh"; "-c"; "echo " ^ string_of_int n] in
9+
Switch.run @@ fun sw ->
10+
for j = 1 to n_procs_per_round_per_domain do
11+
Fiber.fork ~sw (fun () ->
12+
let result = echo j in
13+
assert (int_of_string result = j);
14+
(* traceln "OK: %d" j *)
15+
)
16+
done
17+
18+
let main ~dm mgr =
819
let t0 = Unix.gettimeofday () in
920
for i = 1 to n_rounds do
10-
Switch.run @@ fun sw ->
11-
for j = 1 to n_procs_per_round do
12-
Fiber.fork ~sw (fun () ->
13-
let result = echo j in
14-
assert (int_of_string result = j);
15-
(* traceln "OK: %d" j *)
16-
)
17-
done;
18-
if false then traceln "Finished round %d/%d" i n_rounds
21+
Switch.run (fun sw ->
22+
for _ = 1 to n_domains - 1 do
23+
Fiber.fork ~sw (fun () -> Eio.Domain_manager.run dm (fun () -> run_in_domain mgr))
24+
done;
25+
Fiber.fork ~sw (fun () -> run_in_domain mgr);
26+
);
27+
if true then traceln "Finished round %d/%d" i n_rounds
1928
done;
2029
let t1 = Unix.gettimeofday () in
21-
let n_procs = n_rounds * n_procs_per_round in
22-
traceln "Finished process stress test: ran %d processes in %.2fs" n_procs (t1 -. t0)
30+
let n_procs = n_rounds * n_procs_per_round_per_domain * n_domains in
31+
traceln "Finished process stress test: ran %d processes in %.2fs (using %d domains)" n_procs (t1 -. t0) n_domains
2332

2433
let () =
2534
Eio_main.run @@ fun env ->
26-
main env#process_mgr
35+
main ~dm:env#domain_mgr env#process_mgr

0 commit comments

Comments
 (0)