Skip to content

Commit fea485c

Browse files
committed
Fix a concurrency bug in OP_WAIT_TIMEOUT
Fix a bug that could explain some CI flappiness. If a message arrived before first execution of op_wait_timeout, the timer would be configured and the wait would time out unless another message arrived. Fix another bug where a process doing receive after Timeout -> Expr end would loop forever if a message was in the mailbox. Signed-off-by: Paul Guyot <pguyot@kallisys.net>
1 parent 6f66fa4 commit fea485c

File tree

4 files changed

+34
-13
lines changed

4 files changed

+34
-13
lines changed

src/libAtomVM/jit.c

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -888,6 +888,7 @@ static Context *jit_process_signal_messages(Context *ctx, JITState *jit_state)
888888
static term jit_mailbox_peek(Context *ctx)
889889
{
890890
TRACE("jit_mailbox_peek: ctx->process_id=%" PRId32 "\n", ctx->process_id);
891+
ctx->mailbox.receive_has_match_clauses = true;
891892
term out = term_invalid_term();
892893
mailbox_peek(ctx, &out);
893894
return out;
@@ -897,12 +898,14 @@ static void jit_mailbox_remove_message(Context *ctx)
897898
{
898899
TRACE("jit_mailbox_remove_message: ctx->process_id=%" PRId32 "\n", ctx->process_id);
899900
mailbox_remove_message(&ctx->mailbox, &ctx->heap);
901+
ctx->mailbox.receive_has_match_clauses = false;
900902
}
901903

902904
static void jit_timeout(Context *ctx)
903905
{
904906
TRACE("jit_timeout: ctx->process_id=%" PRId32 "\n", ctx->process_id);
905907
context_update_flags(ctx, ~WaitingTimeoutExpired, NoFlags);
908+
ctx->mailbox.receive_has_match_clauses = false;
906909
mailbox_reset(&ctx->mailbox);
907910
}
908911

@@ -952,18 +955,23 @@ static Context *jit_wait_timeout(Context *ctx, JITState *jit_state, term timeout
952955
needs_to_wait = 1;
953956
} else if (context_get_flags(ctx, WaitingTimeout) != 0) {
954957
needs_to_wait = 1;
955-
} else if (!mailbox_has_next(&ctx->mailbox)) {
956-
needs_to_wait = 1;
957958
}
959+
// else: WaitingTimeoutExpired -- fall through to timeout.
960+
// Any messages in the mailbox are left for the next receive.
958961

959962
if (needs_to_wait) {
963+
// Signal processing may have moved messages to the inner list.
964+
// If there are match clauses (loop_rec was executed), jump to
965+
// loop_rec to scan them.
966+
if (ctx->mailbox.receive_has_match_clauses && mailbox_has_next(&ctx->mailbox)) {
967+
jit_state->continuation = module_get_native_entry_point(jit_state->module, label);
968+
return ctx;
969+
}
960970
return jit_schedule_wait_cp(ctx, jit_state);
961-
} else {
962-
// clang cannot tail-optimize this, so return to loop to avoid any stack overflow
963-
// __attribute__((musttail)) return jit_state->continuation(ctx, jit_state, &module_native_interface);
964-
jit_state->continuation = module_get_native_entry_point(jit_state->module, label);
965-
return ctx;
966971
}
972+
// else: timer expired, fall through to timeout
973+
// jit_state->continuation already points to the trap handler code
974+
return ctx;
967975
}
968976

969977
static Context *jit_wait_timeout_trap_handler(Context *ctx, JITState *jit_state, int label)
@@ -976,6 +984,7 @@ static Context *jit_wait_timeout_trap_handler(Context *ctx, JITState *jit_state,
976984
return scheduler_wait(ctx);
977985
}
978986

987+
// Messages available, jump to loop_rec to scan them.
979988
jit_state->continuation = module_get_native_entry_point(jit_state->module, label);
980989
return ctx;
981990
}

src/libAtomVM/mailbox.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ void mailbox_init(Mailbox *mbx)
4545
mbx->inner_last = NULL;
4646
mbx->receive_pointer = NULL;
4747
mbx->receive_pointer_prev = NULL;
48+
mbx->receive_has_match_clauses = false;
4849
}
4950

5051
// Convert a mailbox message (struct Message or struct TermSignal) to a heap

src/libAtomVM/mailbox.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,10 @@ typedef struct
178178
// Receive pointers are on inner list items.
179179
MailboxMessage *receive_pointer;
180180
MailboxMessage *receive_pointer_prev;
181+
// Set by loop_rec, cleared by remove_message and timeout.
182+
// Used by wait_timeout to know if there are match clauses to try
183+
// when signal processing moved messages to the inner list.
184+
bool receive_has_match_clauses;
181185
} Mailbox;
182186

183187
// TODO: a lot of this code depends on Context * and should be decoupled

src/libAtomVM/opcodesswitch.h

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2798,6 +2798,7 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb)
27982798
}
27992799
PROCESS_SIGNAL_MESSAGES();
28002800
mailbox_remove_message(&ctx->mailbox, &ctx->heap);
2801+
ctx->mailbox.receive_has_match_clauses = false;
28012802
// Cannot GC now as remove_message is GC neutral
28022803
#endif
28032804
break;
@@ -2808,6 +2809,7 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb)
28082809

28092810
#ifdef IMPL_EXECUTE_LOOP
28102811
context_update_flags(ctx, ~WaitingTimeoutExpired, NoFlags);
2812+
ctx->mailbox.receive_has_match_clauses = false;
28112813

28122814
mailbox_reset(&ctx->mailbox);
28132815
#endif
@@ -2823,6 +2825,7 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb)
28232825
TRACE("loop_rec/2, dreg=%c%i\n", T_DEST_REG(dreg));
28242826

28252827
#ifdef IMPL_EXECUTE_LOOP
2828+
ctx->mailbox.receive_has_match_clauses = true;
28262829
term ret;
28272830
PROCESS_SIGNAL_MESSAGES();
28282831
if (mailbox_peek(ctx, &ret)) {
@@ -2898,15 +2901,19 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb)
28982901
needs_to_wait = 1;
28992902
} else if (context_get_flags(ctx, WaitingTimeout) != 0) {
29002903
needs_to_wait = 1;
2901-
} else if (!mailbox_has_next(&ctx->mailbox)) {
2902-
needs_to_wait = 1;
29032904
}
2905+
// else: WaitingTimeoutExpired -- fall through to timeout.
2906+
// Any messages in the mailbox are left for the next receive.
29042907

29052908
if (needs_to_wait) {
2909+
// Signal processing may have moved messages to the inner
2910+
// list. If there are match clauses (loop_rec was
2911+
// executed), jump to loop_rec to scan them.
2912+
if (ctx->mailbox.receive_has_match_clauses && mailbox_has_next(&ctx->mailbox)) {
2913+
JUMP_TO_ADDRESS(mod->labels[label]);
2914+
}
29062915
ctx->waiting_with_timeout = true;
29072916
SCHEDULE_WAIT(mod, saved_pc);
2908-
} else {
2909-
JUMP_TO_ADDRESS(mod->labels[label]);
29102917
}
29112918
#endif
29122919

@@ -2922,8 +2929,6 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb)
29222929
#ifdef IMPL_EXECUTE_LOOP
29232930
wait_timeout_trap_handler:
29242931
{
2925-
// Determine if a message arrived to either jump to timeout label
2926-
// or to continuation.
29272932
// Redo the offset computation and refetch the label
29282933
int label;
29292934
DECODE_LABEL(label, pc)
@@ -2932,6 +2937,8 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb)
29322937
TRACE("wait_timeout_trap_handler, label: %i\n", label);
29332938
PROCESS_SIGNAL_MESSAGES();
29342939
if (context_get_flags(ctx, WaitingTimeoutExpired)) {
2940+
// Timer expired -- fall through to timeout.
2941+
// Any messages in the mailbox are left for the next receive.
29352942
ctx->waiting_with_timeout = false;
29362943
} else {
29372944
if (UNLIKELY(!mailbox_has_next(&ctx->mailbox))) {

0 commit comments

Comments
 (0)