Skip to content

Commit 1aa22a4

Browse files
rennoxdahlerlend
authored andcommitted
BUG#37661257 assertion in jit-executor at execute
This patch improves the exception handling, specially in the boundaries between polyglot and C++, where some exception types were not being handled correctly, leading to crashes on specific events. Also introduces a better handling not only of memory errors but any critical resource that may lead to malfunction of the context. Also removes the on demand garbage collection logic which now is unnecessary since establishing the RAM limit gives automatic GC as needed. Additionally updates the default RAM size to be used to 1GB. Change-Id: Id5c685cf923357d9b9dad9f17cd3374896e030a5
1 parent c3576fa commit 1aa22a4

25 files changed

+528
-369
lines changed

router/src/harness/include/mysql/harness/waiting_queue_adaptor.h

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#ifndef MYSQL_HARNESS_WAITING_QUEUE_ADAPTOR_INCLUDED
2727
#define MYSQL_HARNESS_WAITING_QUEUE_ADAPTOR_INCLUDED
2828

29+
#include <chrono>
2930
#include <condition_variable>
3031
#include <mutex>
3132

@@ -78,6 +79,31 @@ class WaitingQueueAdaptor {
7879
return true;
7980
}
8081

82+
/**
83+
* Attempt to dequeue an item from a queue for a given period of time.
84+
*
85+
* @param item dequeued item if queue was not empty within the specified time.
86+
* @param timeout max time to wait for the item to be available.
87+
*
88+
* @returns item
89+
* @retval true item dequeued
90+
* @retval false queue was empty
91+
*/
92+
template <class Rep, class Period>
93+
bool try_pop(value_type &item, std::chrono::duration<Rep, Period> timeout) {
94+
{
95+
std::unique_lock<std::mutex> lk(dequeueable_cond_mutex_);
96+
if (!dequeueable_cond_.wait_for(
97+
lk, timeout, [this, &item] { return q_.dequeue(item); })) {
98+
return false;
99+
}
100+
}
101+
102+
notify_enqueueable();
103+
104+
return true;
105+
}
106+
81107
/**
82108
* enqueue item into queue.
83109
*

router/src/jit_executor/include/mysqlrouter/jit_executor_component.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ class IServiceHandlers {
7777

7878
virtual void release_debug_context() = 0;
7979

80-
virtual void init() = 0;
80+
virtual bool init() = 0;
8181
virtual void teardown() = 0;
8282

8383
virtual std::chrono::seconds idle_time() const = 0;

router/src/jit_executor/include/mysqlrouter/jit_executor_context.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ class IContext {
4646
const std::vector<shcore::Value> &parameters,
4747
int timeout, ResultType result_type,
4848
const GlobalCallbacks &global_callbacks) = 0;
49-
virtual bool got_memory_error() const = 0;
49+
virtual bool got_resources_error() const = 0;
5050
};
5151

5252
} // namespace jit_executor

router/src/jit_executor/src/jit_executor_common_context.cc

Lines changed: 30 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727

2828
#include <iostream>
2929

30+
#include "include/my_thread.h"
3031
#include "jit_executor_javascript.h"
3132
#include "mysql/harness/logging/logging.h"
3233
#include "mysqlrouter/polyglot_file_system.h"
@@ -37,8 +38,7 @@ IMPORT_LOG_FUNCTIONS()
3738

3839
namespace jit_executor {
3940

40-
bool CommonContext::m_fatal_error = false;
41-
std::string CommonContext::m_fatal_error_description;
41+
bool CommonContext::m_global_fatal_error = false;
4242

4343
CommonContext::CommonContext(
4444
const std::shared_ptr<shcore::polyglot::IFile_system> &fs,
@@ -52,9 +52,16 @@ CommonContext::CommonContext(
5252

5353
CommonContext::~CommonContext() {
5454
// We are done
55+
{
56+
std::scoped_lock lock(m_mutex);
57+
m_terminated = true;
58+
}
59+
5560
m_finish_condition.notify_one();
56-
m_life_cycle_thread->join();
57-
m_life_cycle_thread.reset();
61+
if (m_life_cycle_thread) {
62+
m_life_cycle_thread->join();
63+
m_life_cycle_thread.reset();
64+
}
5865
}
5966

6067
void CommonContext::fatal_error() {
@@ -65,7 +72,7 @@ void CommonContext::fatal_error() {
6572

6673
// In our case, we simply set a flag indicating to log further attempts to use
6774
// JavaScript contexts
68-
m_fatal_error = true;
75+
m_global_fatal_error = true;
6976
}
7077

7178
void CommonContext::flush() {}
@@ -139,8 +146,9 @@ bool CommonContext::start() {
139146

140147
// Now waits for the destruction indication
141148
{
142-
std::unique_lock<std::mutex> lock(m_init_mutex);
143-
m_init_condition.wait(lock, [this]() { return m_initialized; });
149+
std::unique_lock<std::mutex> lock(m_mutex);
150+
m_init_condition.wait(lock,
151+
[this]() { return m_initialized || m_fatal_error; });
144152
}
145153

146154
return !m_fatal_error;
@@ -155,23 +163,33 @@ bool CommonContext::start() {
155163
* above condition.
156164
*/
157165
void CommonContext::life_cycle_thread() {
166+
my_thread_self_setname("Jit-Common");
167+
std::optional<std::string> init_error;
168+
158169
try {
159170
initialize(m_isolate_args);
160171
} catch (const shcore::polyglot::Polyglot_generic_error &error) {
161172
// This error is not reported by graal but it is a fatal error!
162-
m_fatal_error = true;
163-
m_fatal_error_description = error.message();
173+
init_error = error.message();
164174
}
165175

166-
m_initialized = true;
176+
{
177+
std::scoped_lock lock(m_mutex);
178+
if (init_error.has_value()) {
179+
m_fatal_error = true;
180+
m_fatal_error_description = *init_error;
181+
} else {
182+
m_initialized = true;
183+
}
184+
}
167185

168186
// Tell the constructor we are done
169187
m_init_condition.notify_one();
170188

171189
// Now waits for the finalization indication
172190
{
173-
std::unique_lock<std::mutex> lock(m_finish_mutex);
174-
m_finish_condition.wait(lock);
191+
std::unique_lock<std::mutex> lock(m_mutex);
192+
m_finish_condition.wait(lock, [this]() { return m_terminated; });
175193
}
176194

177195
finalize();

router/src/jit_executor/src/jit_executor_common_context.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -92,17 +92,19 @@ class CommonContext : public shcore::polyglot::Polyglot_common_context {
9292
shcore::Dictionary_t m_globals;
9393

9494
std::unique_ptr<std::thread> m_life_cycle_thread;
95-
std::mutex m_init_mutex;
95+
std::mutex m_mutex;
9696
std::condition_variable m_init_condition;
9797

9898
bool m_initialized = false;
99+
bool m_terminated = false;
99100

100101
std::mutex m_finish_mutex;
101102
std::condition_variable m_finish_condition;
102103

103104
// Global fatal error flag to indicate when the VM was ended
104-
static bool m_fatal_error;
105-
static std::string m_fatal_error_description;
105+
static bool m_global_fatal_error;
106+
bool m_fatal_error;
107+
std::string m_fatal_error_description;
106108
std::vector<std::string> m_isolate_args;
107109
};
108110

router/src/jit_executor/src/jit_executor_component.cc

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,9 @@ void JitExecutorComponent::update_active_contexts(
110110
std::unordered_map<std::string, std::shared_ptr<IServiceHandlers>>
111111
all_context_handlers = std::move(m_service_context_handlers);
112112

113+
std::unordered_map<std::string, std::shared_ptr<IServiceHandlers>>
114+
candidate_context_handlers;
115+
113116
uint64_t total_pool = 0;
114117
for (const auto &it : all_context_handlers) {
115118
// Adds the existing context handler to be discarded
@@ -127,7 +130,7 @@ void JitExecutorComponent::update_active_contexts(
127130
// Creates a new handler from the existing one
128131
auto source_handler =
129132
std::dynamic_pointer_cast<ServiceHandlers>(it.second);
130-
m_service_context_handlers.emplace(
133+
candidate_context_handlers.emplace(
131134
it.first, std::make_shared<ServiceHandlers>(*source_handler.get()));
132135
}
133136
}
@@ -139,20 +142,22 @@ void JitExecutorComponent::update_active_contexts(
139142

140143
total_pool += replacement.second->pool_size();
141144

142-
m_service_context_handlers.emplace(std::move(replacement));
145+
candidate_context_handlers.emplace(std::move(replacement));
143146
}
144147

145148
// Now updates the memory limit for each active handler and starts it
146149
if (m_global_config.maximum_ram_size.has_value()) {
147150
uint64_t mem_per_pool_item = *m_global_config.maximum_ram_size / total_pool;
148151

149-
for (const auto &it : m_service_context_handlers) {
152+
for (const auto &it : candidate_context_handlers) {
150153
it.second->set_max_heap_size(mem_per_pool_item * it.second->pool_size());
151154
}
152155
}
153156

154-
for (const auto &it : m_service_context_handlers) {
155-
it.second->init();
157+
for (const auto &it : candidate_context_handlers) {
158+
if (it.second->init()) {
159+
m_service_context_handlers.emplace(it.first, it.second);
160+
}
156161
}
157162
}
158163

@@ -168,11 +173,14 @@ std::shared_ptr<IContextHandle> JitExecutorComponent::get_context(
168173
update_active_contexts(
169174
{service_id, std::make_shared<ServiceHandlers>(config)});
170175

171-
return m_service_context_handlers.at(service_id)
172-
->get_context(debug_port);
176+
it = m_service_context_handlers.find(service_id);
173177
}
174178

175-
return it->second->get_context(debug_port);
179+
if (it != m_service_context_handlers.end()) {
180+
return it->second->get_context(debug_port);
181+
} else {
182+
throw std::runtime_error("error to go below..., needed?");
183+
}
176184
} catch (const std::runtime_error &) {
177185
// If failed to create a context, then let's try re-creating the whole
178186
// pool, if this failed on a brand new pool, then there's nothing else to

router/src/jit_executor/src/jit_executor_context_pool.cc

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,12 @@ ContextPool::ContextPool(size_t size, CommonContext *common_context)
4141
m_pool = std::make_unique<Pool<IContext *>>(
4242
size,
4343
[this]() -> JavaScriptContext * {
44-
return std::make_unique<JavaScriptContext>(m_common_context).release();
44+
auto context = std::make_unique<JavaScriptContext>(m_common_context);
45+
if (context->got_initialization_error()) {
46+
return nullptr;
47+
}
48+
49+
return context.release();
4550
},
4651
[](IContext *ctx) { delete ctx; });
4752
}
@@ -59,8 +64,8 @@ std::shared_ptr<PooledContextHandle> ContextPool::get_context() {
5964
}
6065

6166
void ContextPool::release(IContext *ctx) {
62-
if (ctx->got_memory_error()) {
63-
m_pool->on_memory_error(ctx);
67+
if (ctx->got_resources_error()) {
68+
m_pool->on_resources_error(ctx);
6469
} else {
6570
m_pool->release(ctx);
6671
}

router/src/jit_executor/src/jit_executor_context_pool.h

Lines changed: 33 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -83,8 +83,14 @@ class Pool {
8383
}
8484
}
8585

86-
m_active_items++;
87-
return m_item_factory();
86+
try {
87+
T item = m_item_factory();
88+
increase_active_items();
89+
return item;
90+
} catch (const std::runtime_error &err) {
91+
// An initialization failure would raise this exception
92+
return {};
93+
}
8894
}
8995

9096
void release(T ctx) {
@@ -93,17 +99,15 @@ class Pool {
9399

94100
if (!m_teardown && m_items.size() < m_pool_size) {
95101
m_items.push_back(ctx);
96-
m_item_availability.notify_one();
102+
m_item_availability.notify_all();
97103
return;
98104
}
99105
}
100106

101-
m_active_items--;
102-
m_teared_down.notify_one();
103-
m_item_availability.notify_one();
104107
if (m_item_destructor) {
105108
m_item_destructor(ctx);
106109
}
110+
decrease_active_items();
107111
}
108112

109113
void teardown() {
@@ -116,15 +120,15 @@ class Pool {
116120
auto item = m_items.front();
117121
m_items.pop_front();
118122

119-
m_active_items--;
120123
if (m_item_destructor) {
121124
m_item_destructor(item);
122125
}
126+
decrease_active_items();
123127
}
124128

125129
// Waits until all the contexts created by the pool get released
126130
std::unique_lock<std::mutex> lock(m_mutex);
127-
m_teared_down.wait(lock, [this]() { return m_active_items == 0; });
131+
m_item_availability.wait(lock, [this]() { return m_active_items == 0; });
128132
}
129133

130134
size_t active_items() const {
@@ -135,21 +139,34 @@ class Pool {
135139
/**
136140
* Discards the affected context and turns ON contention mode for the pool
137141
*/
138-
void on_memory_error(T ctx) {
139-
std::scoped_lock lock(m_mutex);
140-
m_contention_mode = true;
141-
142-
m_active_items--;
143-
m_teared_down.notify_one();
144-
m_item_availability.notify_one();
142+
void on_resources_error(T ctx) {
145143
if (m_item_destructor) {
146144
m_item_destructor(ctx);
147145
}
146+
decrease_active_items(true);
148147
}
149148

150149
private:
150+
void increase_active_items() {
151+
{
152+
std::scoped_lock lock(m_mutex);
153+
m_active_items++;
154+
}
155+
}
156+
157+
void decrease_active_items(bool set_contention_mode = false) {
158+
{
159+
std::scoped_lock lock(m_mutex);
160+
m_active_items--;
161+
162+
if (set_contention_mode) {
163+
m_contention_mode = true;
164+
}
165+
}
166+
m_item_availability.notify_all();
167+
}
168+
151169
std::mutex m_mutex;
152-
std::condition_variable m_teared_down;
153170
std::condition_variable m_item_availability;
154171
bool m_teardown = false;
155172
size_t m_pool_size;

0 commit comments

Comments
 (0)