Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 6 additions & 22 deletions core/iwasm/common/wasm_runtime_common.c
Original file line number Diff line number Diff line change
Expand Up @@ -2377,32 +2377,22 @@ wasm_runtime_get_exec_env_singleton(WASMModuleInstanceCommon *module_inst_comm)
void
wasm_set_exception(WASMModuleInstance *module_inst, const char *exception)
{
WASMExecEnv *exec_env = NULL;

#if WASM_ENABLE_SHARED_MEMORY != 0
if (module_inst->memory_count > 0)
shared_memory_lock(module_inst->memories[0]);
#endif
exception_lock(module_inst);
if (exception) {
snprintf(module_inst->cur_exception, sizeof(module_inst->cur_exception),
"Exception: %s", exception);
}
else {
module_inst->cur_exception[0] = '\0';
}
#if WASM_ENABLE_SHARED_MEMORY != 0
if (module_inst->memory_count > 0)
shared_memory_unlock(module_inst->memories[0]);
#endif
exception_unlock(module_inst);

#if WASM_ENABLE_THREAD_MGR != 0
exec_env =
WASMExecEnv *exec_env =
wasm_clusters_search_exec_env((WASMModuleInstanceCommon *)module_inst);
if (exec_env) {
wasm_cluster_spread_exception(exec_env, exception ? false : true);
wasm_cluster_spread_exception(exec_env, exception);
}
#else
(void)exec_env;
#endif
}

Expand Down Expand Up @@ -2453,10 +2443,7 @@ wasm_copy_exception(WASMModuleInstance *module_inst, char *exception_buf)
{
bool has_exception = false;

#if WASM_ENABLE_SHARED_MEMORY != 0
if (module_inst->memory_count > 0)
shared_memory_lock(module_inst->memories[0]);
#endif
exception_lock(module_inst);
if (module_inst->cur_exception[0] != '\0') {
/* NULL is passed if the caller is not interested in getting the
* exception content, but only in knowing if an exception has been
Expand All @@ -2468,10 +2455,7 @@ wasm_copy_exception(WASMModuleInstance *module_inst, char *exception_buf)
sizeof(module_inst->cur_exception));
has_exception = true;
}
#if WASM_ENABLE_SHARED_MEMORY != 0
if (module_inst->memory_count > 0)
shared_memory_unlock(module_inst->memories[0]);
#endif
exception_unlock(module_inst);

return has_exception;
}
Expand Down
10 changes: 10 additions & 0 deletions core/iwasm/interpreter/wasm_runtime.h
Original file line number Diff line number Diff line change
Expand Up @@ -668,6 +668,16 @@ void
wasm_propagate_wasi_args(WASMModule *module);
#endif

#if WASM_ENABLE_THREAD_MGR != 0
void
exception_lock(WASMModuleInstance *module_inst);
void
exception_unlock(WASMModuleInstance *module_inst);
#else
#define exception_lock(module_inst) (void)(module_inst)
#define exception_unlock(module_inst) (void)(module_inst)
#endif

#ifdef __cplusplus
}
#endif
Expand Down
107 changes: 54 additions & 53 deletions core/iwasm/libraries/thread-mgr/thread_manager.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,6 @@
#include "debug_engine.h"
#endif

#if WASM_ENABLE_SHARED_MEMORY != 0
#include "wasm_shared_memory.h"
#endif

typedef struct {
bh_list_link l;
void (*destroy_cb)(WASMCluster *);
Expand All @@ -32,6 +28,8 @@ static bh_list cluster_list_head;
static bh_list *const cluster_list = &cluster_list_head;
static korp_mutex cluster_list_lock;

static korp_mutex _exception_lock;

typedef void (*list_visitor)(void *, void *);

static uint32 cluster_max_thread_num = CLUSTER_MAX_THREAD_NUM;
Expand All @@ -52,6 +50,10 @@ thread_manager_init()
return false;
if (os_mutex_init(&cluster_list_lock) != 0)
return false;
if (os_mutex_init(&_exception_lock) != 0) {
os_mutex_destroy(&cluster_list_lock);
return false;
}
return true;
}

Expand All @@ -66,6 +68,7 @@ thread_manager_destroy()
cluster = next;
}
wasm_cluster_cancel_all_callbacks();
os_mutex_destroy(&_exception_lock);
os_mutex_destroy(&cluster_list_lock);
}

Expand Down Expand Up @@ -1240,72 +1243,52 @@ wasm_cluster_resume_all(WASMCluster *cluster)
os_mutex_unlock(&cluster->lock);
}

struct spread_exception_data {
WASMExecEnv *skip;
const char *exception;
};

static void
set_exception_visitor(void *node, void *user_data)
{
WASMExecEnv *curr_exec_env = (WASMExecEnv *)node;
WASMExecEnv *exec_env = (WASMExecEnv *)user_data;
WASMModuleInstanceCommon *module_inst = get_module_inst(exec_env);
WASMModuleInstance *wasm_inst = (WASMModuleInstance *)module_inst;
const struct spread_exception_data *data = user_data;
WASMExecEnv *exec_env = (WASMExecEnv *)node;

if (curr_exec_env != exec_env) {
WASMModuleInstance *curr_wasm_inst =
(WASMModuleInstance *)get_module_inst(curr_exec_env);
if (exec_env != data->skip) {
WASMModuleInstance *wasm_inst =
(WASMModuleInstance *)get_module_inst(exec_env);

/* Only spread non "wasi proc exit" exception */
#if WASM_ENABLE_SHARED_MEMORY != 0
if (curr_wasm_inst->memory_count > 0)
shared_memory_lock(curr_wasm_inst->memories[0]);
#endif
if (!strstr(wasm_inst->cur_exception, "wasi proc exit")) {
bh_memcpy_s(curr_wasm_inst->cur_exception,
sizeof(curr_wasm_inst->cur_exception),
wasm_inst->cur_exception,
sizeof(wasm_inst->cur_exception));
exception_lock(wasm_inst);
if (data->exception != NULL) {
snprintf(wasm_inst->cur_exception, sizeof(wasm_inst->cur_exception),
"Exception: %s", data->exception);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The check for Only spread non "wasi proc exit" exception is ignored, see L1255, L1260 of the original file.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it's intentional. is it a problem? do you have a test case?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The check was introduced in PR #1988.

Raising "wasi proc exit" exception is an intentional behavior of runtime, it is not an actual exception but somewhat like setting a flag to let current thread stop running opcodes, and after the thread stops and in the end of wasm_runtime_call_wasm, the thread will clear this exception, so it ends normally without exception thrown. For multi-threading, the thread doesn't spread "wasi proc exit" exception, but just set terminate flags of other threads to let them exit also.

It may cause unexpected behavior if thread A spreads this exception to other threads: other thread (let's say thread B) may stop running opcodes first, then handle the "wasi proc exit" exception and clear exceptions of other threads, including thread A. When thread A's exception is cleared, it may continue to run and throw "unreachable" exception (Note that after calling wasi_proc_exit, in most cases the next opcode is unreachable, the bytecodes are generated by emsdk or wasi-sdk). And eventually "unreachable" exception is thrown.

I believe we found the issue when testing the was-thread related test cases and then we fixed it, the issue occurred occasionally. If we want to reproduce it, we may try running the wasi-thread cases many times.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The check was introduced in PR #1988.

Raising "wasi proc exit" exception is an intentional behavior of runtime, it is not an actual exception but somewhat like setting a flag to let current thread stop running opcodes, and after the thread stops and in the end of wasm_runtime_call_wasm, the thread will clear this exception, so it ends normally without exception thrown. For multi-threading, the thread doesn't spread "wasi proc exit" exception, but just set terminate flags of other threads to let them exit also.

while proc exit is not a real trap, what the runtime should do is almost same as real traps.
ie. terminate all threads and return the exit/trap to the api user as the result of the whole "thread group".

It may cause unexpected behavior if thread A spreads this exception to other threads: other thread (let's say thread B) may stop running opcodes first, then handle the "wasi proc exit" exception and clear exceptions of other threads, including thread A. When thread A's exception is cleared, it may continue to run and throw "unreachable" exception (Note that after calling wasi_proc_exit, in most cases the next opcode is unreachable, the bytecodes are generated by emsdk or wasi-sdk). And eventually "unreachable" exception is thrown.

if it's a problem, real traps have the same problems, don't they?

my impression is that many (all?) of the code clearing other threads' exception are just broken: #2481

I believe we found the issue when testing the was-thread related test cases and then we fixed it, the issue occurred occasionally. If we want to reproduce it, we may try running the wasi-thread cases many times.

i guess i will restore this (IMO wrong) behavior for now because it isn't the main point of this PR.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The check was introduced in PR #1988.
Raising "wasi proc exit" exception is an intentional behavior of runtime, it is not an actual exception but somewhat like setting a flag to let current thread stop running opcodes, and after the thread stops and in the end of wasm_runtime_call_wasm, the thread will clear this exception, so it ends normally without exception thrown. For multi-threading, the thread doesn't spread "wasi proc exit" exception, but just set terminate flags of other threads to let them exit also.

while proc exit is not a real trap, what the runtime should do is almost same as real traps. ie. terminate all threads and return the exit/trap to the api user as the result of the whole "thread group".

Yes, almost the same, except it doesn't spread the exception to other threads and it clears the exception before it ends.

It may cause unexpected behavior if thread A spreads this exception to other threads: other thread (let's say thread B) may stop running opcodes first, then handle the "wasi proc exit" exception and clear exceptions of other threads, including thread A. When thread A's exception is cleared, it may continue to run and throw "unreachable" exception (Note that after calling wasi_proc_exit, in most cases the next opcode is unreachable, the bytecodes are generated by emsdk or wasi-sdk). And eventually "unreachable" exception is thrown.

if it's a problem, real traps have the same problems, don't they?

No, real traps are spread to other threads and terminate flags are also set for other threads, but the trap isn't cleared before the thread ends, so thread A's exception won't be cleared by thread B.

my impression is that many (all?) of the code clearing other threads' exception are just broken: #2481

Do you mean to unify wasm_runtime_set_exception(inst, NULL) and wasm_runtime_clear_exception(inst), and to remove some unneeded exception clear?

I believe we found the issue when testing the was-thread related test cases and then we fixed it, the issue occurred occasionally. If we want to reproduce it, we may try running the wasi-thread cases many times.

i guess i will restore this (IMO wrong) behavior for now because it isn't the main point of this PR.

Yes, had better restore this and fix it with other PR if needed.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i guess i will restore this (IMO wrong) behavior for now because it isn't the main point of this PR.

done

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if it's a problem, real traps have the same problems, don't they?

No, real traps are spread to other threads and terminate flags are also set for other threads, but the trap isn't cleared before the thread ends, so thread A's exception won't be cleared by thread B.

a real trap can misbehave in a similar way if the exception is suddenly cleared by the other thread.

my impression is that many (all?) of the code clearing other threads' exception are just broken: #2481

Do you mean to unify wasm_runtime_set_exception(inst, NULL) and wasm_runtime_clear_exception(inst), and to remove some unneeded exception clear?

yes.
unifying two api is just cosmetic.
the other one is a bit cumbersome. i guess we need to investigate one-by-one to see if it should clear other threads' exceptions. (i guess most of them need to clear only the local exception.)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, thanks, it really takes effort to investigate them one by one.

}
#if WASM_ENABLE_SHARED_MEMORY != 0
if (curr_wasm_inst->memory_count > 0)
shared_memory_unlock(curr_wasm_inst->memories[0]);
#endif
else {
wasm_inst->cur_exception[0] = '\0';
}
exception_unlock(wasm_inst);

/* Terminate the thread so it can exit from dead loops */
set_thread_cancel_flags(curr_exec_env);
}
}

static void
clear_exception_visitor(void *node, void *user_data)
{
WASMExecEnv *exec_env = (WASMExecEnv *)user_data;
WASMExecEnv *curr_exec_env = (WASMExecEnv *)node;

if (curr_exec_env != exec_env) {
WASMModuleInstance *curr_wasm_inst =
(WASMModuleInstance *)get_module_inst(curr_exec_env);

#if WASM_ENABLE_SHARED_MEMORY != 0
if (curr_wasm_inst->memory_count > 0)
shared_memory_lock(curr_wasm_inst->memories[0]);
#endif
curr_wasm_inst->cur_exception[0] = '\0';
#if WASM_ENABLE_SHARED_MEMORY != 0
if (curr_wasm_inst->memory_count > 0)
shared_memory_unlock(curr_wasm_inst->memories[0]);
#endif
if (data->exception != NULL) {
set_thread_cancel_flags(exec_env);
}
}
}

void
wasm_cluster_spread_exception(WASMExecEnv *exec_env, bool clear)
wasm_cluster_spread_exception(WASMExecEnv *exec_env, const char *exception)
{
const bool has_exception = exception != NULL;
WASMCluster *cluster = wasm_exec_env_get_cluster(exec_env);
bh_assert(cluster);

struct spread_exception_data data;
data.skip = exec_env;
data.exception = exception;

os_mutex_lock(&cluster->lock);
cluster->has_exception = !clear;
traverse_list(&cluster->exec_env_list,
clear ? clear_exception_visitor : set_exception_visitor,
exec_env);
cluster->has_exception = has_exception;
traverse_list(&cluster->exec_env_list, set_exception_visitor, &data);
os_mutex_unlock(&cluster->lock);
}

Expand Down Expand Up @@ -1353,3 +1336,21 @@ wasm_cluster_is_thread_terminated(WASMExecEnv *exec_env)

return is_thread_terminated;
}

void
exception_lock(WASMModuleInstance *module_inst)
{
/*
* Note: this lock could be per module instance if desirable.
* We can revisit on AOT version bump.
* It probably doesn't matter though because the exception handling
* logic should not be executed too frequently anyway.
*/
os_mutex_lock(&_exception_lock);
}

void
exception_unlock(WASMModuleInstance *module_inst)
{
os_mutex_unlock(&_exception_lock);
}
2 changes: 1 addition & 1 deletion core/iwasm/libraries/thread-mgr/thread_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ WASMExecEnv *
wasm_clusters_search_exec_env(WASMModuleInstanceCommon *module_inst);

void
wasm_cluster_spread_exception(WASMExecEnv *exec_env, bool clear);
wasm_cluster_spread_exception(WASMExecEnv *exec_env, const char *exception);

WASMExecEnv *
wasm_cluster_spawn_exec_env(WASMExecEnv *exec_env);
Expand Down