Skip to content

Commit 539b706

Browse files
authored
fix(base): thread-local state of v8::Isolate can be corrupted while initializing DenoRuntime in Worker::start (#416)
* fix(base): thread-local state of `v8::Isolate` can be corrupted while initializing DenoRuntime in `Worker::start` * Revert "fix(base): partial revert some changes that were introduced from deno upgrade PR (#415)" This reverts commit db2cc41. * stamp: more robustly
1 parent e8759ec commit 539b706

File tree

2 files changed

+89
-51
lines changed

2 files changed

+89
-51
lines changed

crates/base/src/deno_runtime.rs

Lines changed: 66 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,10 @@ use std::sync::{Arc, RwLock};
4040
use std::task::Poll;
4141
use std::thread::ThreadId;
4242
use std::time::Duration;
43-
use tokio::sync::mpsc;
43+
use tokio::sync::{mpsc, OwnedSemaphorePermit, Semaphore};
4444
use tokio::time::interval;
45-
use tokio_util::sync::CancellationToken;
45+
use tokio_util::sync::{CancellationToken, PollSemaphore};
46+
use tracing::debug;
4647

4748
use crate::snapshot;
4849
use event_worker::events::{EventMetadata, WorkerEventWithMetadata};
@@ -93,6 +94,15 @@ pub static SHOULD_USE_VERBOSE_DEPRECATED_API_WARNING: OnceCell<bool> = OnceCell:
9394
pub static SHOULD_INCLUDE_MALLOCED_MEMORY_ON_MEMCHECK: OnceCell<bool> = OnceCell::new();
9495
pub static MAYBE_DENO_VERSION: OnceCell<String> = OnceCell::new();
9596

97+
thread_local! {
98+
// NOTE: Suppose we have met `.await` points while initializing a
99+
// DenoRuntime. In that case, the current v8 isolate's thread-local state can be
100+
// corrupted by a task initializing another DenoRuntime, so we must prevent this
101+
// with a Semaphore.
102+
103+
static RUNTIME_CREATION_SEM: Arc<Semaphore> = Arc::new(Semaphore::new(1));
104+
}
105+
96106
#[ctor]
97107
fn init_v8_platform() {
98108
set_v8_flags();
@@ -219,6 +229,16 @@ impl<RuntimeContext> Drop for DenoRuntime<RuntimeContext> {
219229
}
220230
}
221231

232+
impl DenoRuntime<()> {
233+
pub async fn acquire() -> OwnedSemaphorePermit {
234+
RUNTIME_CREATION_SEM
235+
.with(|v| v.clone())
236+
.acquire_owned()
237+
.await
238+
.unwrap()
239+
}
240+
}
241+
222242
impl<RuntimeContext> DenoRuntime<RuntimeContext>
223243
where
224244
RuntimeContext: GetRuntimeContext,
@@ -702,7 +722,7 @@ where
702722
let mut accumulated_cpu_time_ns = 0i64;
703723

704724
let has_inspector = self.inspector().is_some();
705-
let mod_result_rx = unsafe {
725+
let mut mod_result_rx = unsafe {
706726
self.js_runtime.v8_isolate().enter();
707727

708728
if has_inspector {
@@ -750,38 +770,38 @@ where
750770
};
751771
}
752772

753-
// {
754-
// let event_loop_fut = self.run_event_loop(
755-
// name.as_deref(),
756-
// current_thread_id,
757-
// &maybe_cpu_usage_metrics_tx,
758-
// &mut accumulated_cpu_time_ns,
759-
// );
760-
761-
// let mod_result = tokio::select! {
762-
// // Not using biased mode leads to non-determinism for relatively simple
763-
// // programs.
764-
// biased;
765-
766-
// maybe_mod_result = &mut mod_result_rx => {
767-
// debug!("received module evaluate {:#?}", maybe_mod_result);
768-
// maybe_mod_result
769-
770-
// }
771-
772-
// event_loop_result = event_loop_fut => {
773-
// if let Err(err) = event_loop_result {
774-
// Err(anyhow!("event loop error while evaluating the module: {}", err))
775-
// } else {
776-
// mod_result_rx.await
777-
// }
778-
// }
779-
// };
780-
781-
// if let Err(err) = mod_result {
782-
// return (Err(err), get_accumulated_cpu_time_ms!());
783-
// }
784-
// }
773+
{
774+
let event_loop_fut = self.run_event_loop(
775+
name.as_deref(),
776+
current_thread_id,
777+
&maybe_cpu_usage_metrics_tx,
778+
&mut accumulated_cpu_time_ns,
779+
);
780+
781+
let mod_result = tokio::select! {
782+
// Not using biased mode leads to non-determinism for relatively simple
783+
// programs.
784+
biased;
785+
786+
maybe_mod_result = &mut mod_result_rx => {
787+
debug!("received module evaluate {:#?}", maybe_mod_result);
788+
maybe_mod_result
789+
790+
}
791+
792+
event_loop_result = event_loop_fut => {
793+
if let Err(err) = event_loop_result {
794+
Err(anyhow!("event loop error while evaluating the module: {}", err))
795+
} else {
796+
mod_result_rx.await
797+
}
798+
}
799+
};
800+
801+
if let Err(err) = mod_result {
802+
return (Err(err), get_accumulated_cpu_time_ms!());
803+
}
804+
}
785805

786806
if let Err(err) = self
787807
.run_event_loop(
@@ -798,10 +818,6 @@ where
798818
);
799819
}
800820

801-
if let Err(err) = mod_result_rx.await {
802-
return (Err(err), get_accumulated_cpu_time_ms!());
803-
}
804-
805821
(Ok(()), get_accumulated_cpu_time_ms!())
806822
}
807823

@@ -818,8 +834,19 @@ where
818834
let termination_request_token = self.termination_request_token.clone();
819835

820836
let mem_check_state = is_user_worker.then(|| self.mem_check.clone());
837+
let mut poll_sem = None::<PollSemaphore>;
821838

822839
poll_fn(move |cx| {
840+
if poll_sem.is_none() {
841+
poll_sem = Some(RUNTIME_CREATION_SEM.with(|v| PollSemaphore::new(v.clone())));
842+
}
843+
844+
let Poll::Ready(Some(_permit)) = poll_sem.as_mut().unwrap().poll_acquire(cx) else {
845+
return Poll::Pending;
846+
};
847+
848+
poll_sem = None;
849+
823850
// INVARIANT: Only can steal current task by other threads when LIFO
824851
// task scheduler heuristic disabled. Turning off the heuristic is
825852
// unstable now, so it's not considered.

crates/base/src/rt_worker/worker.rs

Lines changed: 23 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -126,10 +126,23 @@ impl Worker {
126126
.then(unbounded_channel::<CPUUsageMetrics>)
127127
.unzip();
128128

129+
let permit = DenoRuntime::acquire().await;
129130
let result = match DenoRuntime::new(opts, inspector).await {
130-
Ok(mut new_runtime) => {
131+
Ok(new_runtime) => {
132+
let mut runtime = scopeguard::guard(new_runtime, |mut runtime| {
133+
unsafe {
134+
runtime.js_runtime.v8_isolate().enter();
135+
}
136+
});
137+
138+
unsafe {
139+
runtime.js_runtime.v8_isolate().exit();
140+
}
141+
142+
drop(permit);
143+
131144
let metric_src = {
132-
let js_runtime = &mut new_runtime.js_runtime;
145+
let js_runtime = &mut runtime.js_runtime;
133146
let metric_src = WorkerMetricSource::from_js_runtime(js_runtime);
134147

135148
if worker_kind.is_main_worker() {
@@ -164,7 +177,7 @@ impl Worker {
164177
// cputimer is returned from supervisor and assigned here to keep it in scope.
165178
let Ok((maybe_timer, cancel_token)) = create_supervisor(
166179
worker_key.unwrap_or(Uuid::nil()),
167-
&mut new_runtime,
180+
&mut runtime,
168181
supervisor_policy,
169182
termination_event_tx,
170183
pool_msg_tx.clone(),
@@ -181,12 +194,12 @@ impl Worker {
181194

182195
pending().boxed()
183196
} else if let Some(token) = termination_token.clone() {
184-
let is_terminated = new_runtime.is_terminated.clone();
197+
let is_terminated = runtime.is_terminated.clone();
185198
let termination_request_token =
186-
new_runtime.termination_request_token.clone();
199+
runtime.termination_request_token.clone();
187200

188201
let (waker, thread_safe_handle) = {
189-
let js_runtime = &mut new_runtime.js_runtime;
202+
let js_runtime = &mut runtime.js_runtime;
190203
(
191204
js_runtime.op_state().borrow().waker.clone(),
192205
js_runtime.v8_isolate().thread_safe_handle(),
@@ -247,19 +260,14 @@ impl Worker {
247260
});
248261
});
249262

250-
let result = unsafe {
251-
let mut runtime = scopeguard::guard(new_runtime, |mut runtime| {
252-
runtime.js_runtime.v8_isolate().enter();
253-
});
254-
263+
let result = {
255264
let supervise_cancel_token =
256265
scopeguard::guard_on_unwind(supervise_cancel_token, |token| {
257266
if let Some(token) = token {
258267
token.cancel();
259268
}
260269
});
261270

262-
runtime.js_runtime.v8_isolate().exit();
263271

264272
let result = method_cloner
265273
.handle_creation(
@@ -303,8 +311,11 @@ impl Worker {
303311
}
304312

305313
Err(err) => {
314+
drop(permit);
315+
306316
let _ = booter_signal
307317
.send(Err(anyhow!("worker boot error: {err}")));
318+
308319
method_cloner.handle_error(err)
309320
}
310321
};

0 commit comments

Comments
 (0)