Skip to content

Commit 4df2a95

Browse files
liamappelbeCommit Queue
authored andcommitted
[vm] Simplify FfiCallbackMetadata locking
Remove the locks in DLRT_GetFfiCallbackMetadata, and switch from safepoint locks to ordinary locks in FfiCallbackMetadata. This fixes the deadlock bugs at the cost of reducing thread safety in error cases. Some cases that would have failed gracefully will now have undefined behavior. Also, FATAL instead of no-op if a dead callback is invoked. Fixes: #61372 Change-Id: Ie09fca3c629ad61b2ffbdd029269338f2706df4b TEST=CI, particularly many_listener_callbacks_test on reload bot Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/449160 Commit-Queue: Liam Appelbe <[email protected]> Reviewed-by: Martin Kustermann <[email protected]> Reviewed-by: Slava Egorov <[email protected]>
1 parent d866054 commit 4df2a95

File tree

5 files changed

+129
-115
lines changed

5 files changed

+129
-115
lines changed

runtime/vm/ffi_callback_metadata.cc

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -251,7 +251,7 @@ FfiCallbackMetadata::Trampoline FfiCallbackMetadata::CreateMetadataEntry(
251251
uword target_entry_point,
252252
uint64_t context,
253253
MetadataEntry** list_head) {
254-
SafepointMutexLocker locker(&lock_);
254+
MutexLocker locker(&lock_);
255255
EnsureFreeListNotEmptyLocked();
256256
ASSERT(free_list_head_ != nullptr);
257257
MetadataEntry* entry = free_list_head_;
@@ -316,7 +316,7 @@ void FfiCallbackMetadata::DeleteAllCallbacks(MetadataEntry** list_head) {
316316

317317
void FfiCallbackMetadata::DeleteCallback(Trampoline trampoline,
318318
MetadataEntry** list_head) {
319-
SafepointMutexLocker locker(&lock_);
319+
MutexLocker locker(&lock_);
320320
auto* entry = MetadataEntryOfTrampoline(trampoline);
321321
ASSERT(entry->metadata()->IsLive());
322322
auto* prev = entry->list_prev_;
@@ -561,7 +561,8 @@ FfiCallbackMetadata::MetadataEntryOfTrampoline(Trampoline trampoline) const {
561561
#endif
562562
}
563563

564-
FfiCallbackMetadata::Metadata FfiCallbackMetadata::LookupMetadataForTrampoline(
564+
FfiCallbackMetadata::Metadata
565+
FfiCallbackMetadata::LookupMetadataForTrampolineUnlocked(
565566
Trampoline trampoline) const {
566567
return *MetadataEntryOfTrampoline(trampoline)->metadata();
567568
}

runtime/vm/ffi_callback_metadata.h

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -256,10 +256,7 @@ class FfiCallbackMetadata {
256256
};
257257

258258
// Returns the Metadata object for the given trampoline.
259-
Metadata LookupMetadataForTrampoline(Trampoline trampoline) const;
260-
261-
// The mutex that guards creation and destruction of callbacks.
262-
Mutex* lock() { return &lock_; }
259+
Metadata LookupMetadataForTrampolineUnlocked(Trampoline trampoline) const;
263260

264261
// The number of trampolines that can be stored on a single page.
265262
static constexpr intptr_t NumCallbackTrampolinesPerPage() {
@@ -351,6 +348,13 @@ class FfiCallbackMetadata {
351348
ClosurePtr closure_ptr);
352349

353350
// Visible for testing.
351+
#if defined(TESTING)
352+
353+
public:
354+
#else // TESTING
355+
356+
private:
357+
#endif // TESTING
354358
MetadataEntry* MetadataEntryOfTrampoline(Trampoline trampoline) const;
355359
Trampoline TrampolineOfMetadataEntry(MetadataEntry* metadata) const;
356360

runtime/vm/ffi_callback_metadata_test.cc

Lines changed: 24 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ VM_UNIT_TEST_CASE(FfiCallbackMetadata_CreateSyncFfiCallback) {
102102

103103
{
104104
FfiCallbackMetadata::Metadata m1 =
105-
fcm->LookupMetadataForTrampoline(tramp1);
105+
fcm->LookupMetadataForTrampolineUnlocked(tramp1);
106106
EXPECT(m1.IsLive());
107107
EXPECT_EQ(m1.target_isolate(), isolate);
108108
EXPECT_EQ(m1.target_entry_point(), code.EntryPoint());
@@ -124,7 +124,7 @@ VM_UNIT_TEST_CASE(FfiCallbackMetadata_CreateSyncFfiCallback) {
124124

125125
{
126126
FfiCallbackMetadata::Metadata m2 =
127-
fcm->LookupMetadataForTrampoline(tramp2);
127+
fcm->LookupMetadataForTrampolineUnlocked(tramp2);
128128
EXPECT(m2.IsLive());
129129
EXPECT_EQ(m2.target_isolate(), isolate);
130130
EXPECT_EQ(m2.target_entry_point(), code.EntryPoint());
@@ -147,7 +147,7 @@ VM_UNIT_TEST_CASE(FfiCallbackMetadata_CreateSyncFfiCallback) {
147147
{
148148
isolate->DeleteFfiCallback(tramp1);
149149
FfiCallbackMetadata::Metadata m1 =
150-
fcm->LookupMetadataForTrampoline(tramp1);
150+
fcm->LookupMetadataForTrampolineUnlocked(tramp1);
151151
EXPECT(!m1.IsLive());
152152

153153
// head -> tramp2
@@ -160,10 +160,12 @@ VM_UNIT_TEST_CASE(FfiCallbackMetadata_CreateSyncFfiCallback) {
160160

161161
{
162162
// Isolate has shut down, so all callbacks should be deleted.
163-
FfiCallbackMetadata::Metadata m1 = fcm->LookupMetadataForTrampoline(tramp1);
163+
FfiCallbackMetadata::Metadata m1 =
164+
fcm->LookupMetadataForTrampolineUnlocked(tramp1);
164165
EXPECT(!m1.IsLive());
165166

166-
FfiCallbackMetadata::Metadata m2 = fcm->LookupMetadataForTrampoline(tramp2);
167+
FfiCallbackMetadata::Metadata m2 =
168+
fcm->LookupMetadataForTrampolineUnlocked(tramp2);
167169
EXPECT(!m2.IsLive());
168170
}
169171
}
@@ -197,7 +199,7 @@ VM_UNIT_TEST_CASE(FfiCallbackMetadata_CreateAsyncFfiCallback) {
197199

198200
{
199201
FfiCallbackMetadata::Metadata m1 =
200-
fcm->LookupMetadataForTrampoline(tramp1);
202+
fcm->LookupMetadataForTrampolineUnlocked(tramp1);
201203
EXPECT(m1.IsLive());
202204
EXPECT_EQ(m1.target_isolate(), isolate);
203205
EXPECT_EQ(m1.target_entry_point(), code.EntryPoint());
@@ -219,7 +221,7 @@ VM_UNIT_TEST_CASE(FfiCallbackMetadata_CreateAsyncFfiCallback) {
219221

220222
{
221223
FfiCallbackMetadata::Metadata m2 =
222-
fcm->LookupMetadataForTrampoline(tramp2);
224+
fcm->LookupMetadataForTrampolineUnlocked(tramp2);
223225
EXPECT(m2.IsLive());
224226
EXPECT_EQ(m2.target_isolate(), isolate);
225227
EXPECT_EQ(m2.target_entry_point(), code.EntryPoint());
@@ -242,7 +244,7 @@ VM_UNIT_TEST_CASE(FfiCallbackMetadata_CreateAsyncFfiCallback) {
242244
{
243245
isolate->DeleteFfiCallback(tramp2);
244246
FfiCallbackMetadata::Metadata m2 =
245-
fcm->LookupMetadataForTrampoline(tramp2);
247+
fcm->LookupMetadataForTrampolineUnlocked(tramp2);
246248
EXPECT(!m2.IsLive());
247249

248250
// head -> tramp1
@@ -255,10 +257,12 @@ VM_UNIT_TEST_CASE(FfiCallbackMetadata_CreateAsyncFfiCallback) {
255257

256258
{
257259
// Isolate has shut down, so all callbacks should be deleted.
258-
FfiCallbackMetadata::Metadata m1 = fcm->LookupMetadataForTrampoline(tramp1);
260+
FfiCallbackMetadata::Metadata m1 =
261+
fcm->LookupMetadataForTrampolineUnlocked(tramp1);
259262
EXPECT(!m1.IsLive());
260263

261-
FfiCallbackMetadata::Metadata m2 = fcm->LookupMetadataForTrampoline(tramp2);
264+
FfiCallbackMetadata::Metadata m2 =
265+
fcm->LookupMetadataForTrampolineUnlocked(tramp2);
262266
EXPECT(!m2.IsLive());
263267
}
264268
}
@@ -299,7 +303,7 @@ VM_UNIT_TEST_CASE(FfiCallbackMetadata_CreateIsolateLocalFfiCallback) {
299303

300304
{
301305
FfiCallbackMetadata::Metadata m1 =
302-
fcm->LookupMetadataForTrampoline(tramp1);
306+
fcm->LookupMetadataForTrampolineUnlocked(tramp1);
303307
EXPECT(m1.IsLive());
304308
EXPECT_EQ(m1.target_isolate(), isolate);
305309
EXPECT_EQ(m1.target_entry_point(), code.EntryPoint());
@@ -323,7 +327,7 @@ VM_UNIT_TEST_CASE(FfiCallbackMetadata_CreateIsolateLocalFfiCallback) {
323327

324328
{
325329
FfiCallbackMetadata::Metadata m2 =
326-
fcm->LookupMetadataForTrampoline(tramp2);
330+
fcm->LookupMetadataForTrampolineUnlocked(tramp2);
327331
EXPECT(m2.IsLive());
328332
EXPECT_EQ(m2.target_isolate(), isolate);
329333
EXPECT_EQ(m2.target_entry_point(), code.EntryPoint());
@@ -346,7 +350,7 @@ VM_UNIT_TEST_CASE(FfiCallbackMetadata_CreateIsolateLocalFfiCallback) {
346350
{
347351
isolate->DeleteFfiCallback(tramp2);
348352
FfiCallbackMetadata::Metadata m2 =
349-
fcm->LookupMetadataForTrampoline(tramp2);
353+
fcm->LookupMetadataForTrampolineUnlocked(tramp2);
350354
EXPECT(!m2.IsLive());
351355

352356
// head -> tramp1
@@ -359,10 +363,12 @@ VM_UNIT_TEST_CASE(FfiCallbackMetadata_CreateIsolateLocalFfiCallback) {
359363

360364
{
361365
// Isolate has shut down, so all callbacks should be deleted.
362-
FfiCallbackMetadata::Metadata m1 = fcm->LookupMetadataForTrampoline(tramp1);
366+
FfiCallbackMetadata::Metadata m1 =
367+
fcm->LookupMetadataForTrampolineUnlocked(tramp1);
363368
EXPECT(!m1.IsLive());
364369

365-
FfiCallbackMetadata::Metadata m2 = fcm->LookupMetadataForTrampoline(tramp2);
370+
FfiCallbackMetadata::Metadata m2 =
371+
fcm->LookupMetadataForTrampolineUnlocked(tramp2);
366372
EXPECT(!m2.IsLive());
367373
}
368374
}
@@ -460,7 +466,7 @@ VM_UNIT_TEST_CASE(FfiCallbackMetadata_DeleteTrampolines) {
460466

461467
// Verify all the callbacks.
462468
for (FfiCallbackMetadata::Trampoline tramp : tramps) {
463-
auto metadata = fcm->LookupMetadataForTrampoline(tramp);
469+
auto metadata = fcm->LookupMetadataForTrampolineUnlocked(tramp);
464470
EXPECT(metadata.IsLive());
465471
EXPECT_EQ(metadata.target_isolate(), isolate);
466472
EXPECT_EQ(static_cast<int>(metadata.trampoline_type()),
@@ -494,7 +500,7 @@ VM_UNIT_TEST_CASE(FfiCallbackMetadata_DeleteTrampolines) {
494500
fcm->DeleteAllCallbacks(&list_head);
495501
EXPECT_EQ(list_head, nullptr);
496502
for (FfiCallbackMetadata::Trampoline tramp : tramps) {
497-
EXPECT(!fcm->LookupMetadataForTrampoline(tramp).IsLive());
503+
EXPECT(!fcm->LookupMetadataForTrampolineUnlocked(tramp).IsLive());
498504
}
499505
}
500506

@@ -574,7 +580,7 @@ static void RunBigRandomMultithreadedTest(uint64_t seed) {
574580

575581
// Verify all the callbacks.
576582
for (const auto& tramp : tramps) {
577-
auto metadata = fcm->LookupMetadataForTrampoline(tramp.tramp);
583+
auto metadata = fcm->LookupMetadataForTrampolineUnlocked(tramp.tramp);
578584
EXPECT(metadata.IsLive());
579585
EXPECT_EQ(metadata.target_isolate(), isolate);
580586
if (metadata.trampoline_type() ==

runtime/vm/runtime_entry.cc

Lines changed: 93 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -4962,85 +4962,58 @@ DEFINE_LEAF_RUNTIME_ENTRY(ExitSafepoint,
49624962
/*argument_count=*/0,
49634963
DLRT_ExitSafepoint);
49644964

4965-
// This is called by a native callback trampoline
4966-
// (see StubCodeCompiler::GenerateFfiCallbackTrampolineStub). Not registered as
4967-
// a runtime entry because we can't use Thread to look it up.
4968-
extern "C" Thread* DLRT_GetFfiCallbackMetadata(
4969-
FfiCallbackMetadata::Trampoline trampoline,
4970-
uword* out_entry_point,
4971-
uword* out_trampoline_type) {
4972-
CHECK_STACK_ALIGNMENT;
4973-
TRACE_RUNTIME_CALL("GetFfiCallbackMetadata %p",
4974-
reinterpret_cast<void*>(trampoline));
4975-
ASSERT(out_entry_point != nullptr);
4976-
ASSERT(out_trampoline_type != nullptr);
4977-
4978-
if (!Isolate::IsolateCreationEnabled()) {
4979-
TRACE_RUNTIME_CALL("GetFfiCallbackMetadata called after shutdown %p",
4980-
reinterpret_cast<void*>(trampoline));
4981-
return nullptr;
4982-
}
4983-
4965+
namespace {
4966+
Thread* HandleAsyncFfiCallback(FfiCallbackMetadata::Metadata metadata,
4967+
uword* out_entry_point,
4968+
uword* out_trampoline_type) {
4969+
// NOTE: This is only thread safe if the user is using the API correctly.
4970+
// Otherwise, the callback could have been deleted and replaced, in which case
4971+
// IsLive would still be true. Or it could have been deleted after we looked
4972+
// it up, and the target isolate could be shut down. We delay recycling
4973+
// callbacks as long as possible, so this check is better than nothing, but
4974+
// it's not infallible. Ultimately it's the user's responsibility to avoid use
4975+
// after free errors. Trying to lock FfiCallbackMetadata::lock_, or any
4976+
// similar lock, leads to deadlocks.
4977+
4978+
*out_trampoline_type = static_cast<uword>(metadata.trampoline_type());
4979+
*out_entry_point = metadata.target_entry_point();
4980+
Isolate* target_isolate = metadata.target_isolate();
4981+
4982+
Isolate* current_isolate = nullptr;
49844983
Thread* current_thread = Thread::Current();
4985-
auto* fcm = FfiCallbackMetadata::Instance();
4986-
auto metadata = fcm->LookupMetadataForTrampoline(trampoline);
4987-
4988-
// Is this an async callback?
4989-
if (metadata.trampoline_type() ==
4990-
FfiCallbackMetadata::TrampolineType::kAsync) {
4991-
// It's possible that the callback was deleted, or the target isolate was
4992-
// shut down, in between looking up the metadata above, and this point. So
4993-
// grab the lock and then check that the callback is still alive.
4994-
MutexLocker locker(fcm->lock());
4995-
auto metadata2 = fcm->LookupMetadataForTrampoline(trampoline);
4996-
*out_trampoline_type = static_cast<uword>(metadata2.trampoline_type());
4997-
4998-
// Check IsLive, but also check that the metdata hasn't changed. This is
4999-
// for the edge case that the callback was destroyed and recycled in between
5000-
// the two lookups.
5001-
if (!metadata.IsLive() || !metadata.IsSameCallback(metadata2)) {
5002-
TRACE_RUNTIME_CALL("GetFfiCallbackMetadata callback deleted %p",
5003-
reinterpret_cast<void*>(trampoline));
5004-
return nullptr;
5005-
}
5006-
5007-
*out_entry_point = metadata.target_entry_point();
5008-
Isolate* target_isolate = metadata.target_isolate();
5009-
5010-
Isolate* current_isolate = nullptr;
5011-
if (current_thread != nullptr) {
5012-
current_isolate = current_thread->isolate();
5013-
if (current_thread->execution_state() != Thread::kThreadInNative) {
5014-
FATAL("Cannot invoke native callback from a leaf call.");
5015-
}
5016-
current_thread->ExitSafepointFromNative();
5017-
current_thread->set_execution_state(Thread::kThreadInVM);
4984+
if (current_thread != nullptr) {
4985+
current_isolate = current_thread->isolate();
4986+
if (current_thread->execution_state() != Thread::kThreadInNative) {
4987+
FATAL("Cannot invoke native callback from a leaf call.");
50184988
}
4989+
current_thread->ExitSafepointFromNative();
4990+
current_thread->set_execution_state(Thread::kThreadInVM);
4991+
}
50194992

5020-
// Enter the temporary isolate. If the current isolate is in the same group
5021-
// as the target isolate, we can skip entering the temp isolate, and marshal
5022-
// the args on the current isolate.
5023-
if (current_isolate == nullptr ||
5024-
current_isolate->group() != target_isolate->group()) {
5025-
if (current_isolate != nullptr) {
5026-
Thread::ExitIsolate(/*isolate_shutdown=*/false);
5027-
}
5028-
target_isolate->group()->EnterTemporaryIsolate();
4993+
// Enter the temporary isolate. If the current isolate is in the same group
4994+
// as the target isolate, we can skip entering the temp isolate, and marshal
4995+
// the args on the current isolate.
4996+
if (current_isolate == nullptr ||
4997+
current_isolate->group() != target_isolate->group()) {
4998+
if (current_isolate != nullptr) {
4999+
Thread::ExitIsolate(/*isolate_shutdown=*/false);
50295000
}
5030-
Thread* const temp_thread = Thread::Current();
5031-
ASSERT(temp_thread != nullptr);
5032-
temp_thread->set_unboxed_int64_runtime_arg(metadata.send_port());
5033-
temp_thread->set_unboxed_int64_runtime_second_arg(
5034-
reinterpret_cast<intptr_t>(current_isolate));
5035-
ASSERT(!temp_thread->IsAtSafepoint());
5036-
return temp_thread;
5001+
target_isolate->group()->EnterTemporaryIsolate();
50375002
}
5003+
Thread* const temp_thread = Thread::Current();
5004+
ASSERT(temp_thread != nullptr);
5005+
temp_thread->set_unboxed_int64_runtime_arg(metadata.send_port());
5006+
temp_thread->set_unboxed_int64_runtime_second_arg(
5007+
reinterpret_cast<intptr_t>(current_isolate));
5008+
ASSERT(!temp_thread->IsAtSafepoint());
5009+
return temp_thread;
5010+
}
5011+
5012+
Thread* HandleSyncFfiCallback(FfiCallbackMetadata::Metadata metadata,
5013+
uword* out_entry_point,
5014+
uword* out_trampoline_type) {
5015+
Thread* current_thread = Thread::Current();
50385016

5039-
// Otherwise, this is a sync callback, so verify that we're already entered
5040-
// into the target isolate.
5041-
if (!metadata.IsLive()) {
5042-
FATAL("Callback invoked after it has been deleted.");
5043-
}
50445017
if (metadata.is_isolate_group_bound()) {
50455018
*out_entry_point = metadata.target_entry_point();
50465019
*out_trampoline_type = static_cast<uword>(metadata.trampoline_type());
@@ -5102,6 +5075,53 @@ extern "C" Thread* DLRT_GetFfiCallbackMetadata(
51025075
(void*)*out_trampoline_type);
51035076
return current_thread;
51045077
}
5078+
} // namespace
5079+
5080+
// This is called by a native callback trampoline
5081+
// (see StubCodeCompiler::GenerateFfiCallbackTrampolineStub). Not registered as
5082+
// a runtime entry because we can't use Thread to look it up.
5083+
extern "C" Thread* DLRT_GetFfiCallbackMetadata(
5084+
FfiCallbackMetadata::Trampoline trampoline,
5085+
uword* out_entry_point,
5086+
uword* out_trampoline_type) {
5087+
CHECK_STACK_ALIGNMENT;
5088+
TRACE_RUNTIME_CALL("GetFfiCallbackMetadata %p",
5089+
reinterpret_cast<void*>(trampoline));
5090+
ASSERT(out_entry_point != nullptr);
5091+
ASSERT(out_trampoline_type != nullptr);
5092+
5093+
if (!Isolate::IsolateCreationEnabled()) {
5094+
FATAL("GetFfiCallbackMetadata called after shutdown %p",
5095+
reinterpret_cast<void*>(trampoline));
5096+
}
5097+
5098+
// NOTE: We access the metadata for `trampoline` without a lock. This is safe
5099+
// because nobody will touch the metadata of the `trampoline` until it's
5100+
// deleted and the `NativeCallable` API requires the isolate to keep the
5101+
// trampoline (and therefore the metadata) alive until C code no longer
5102+
// attempts to call it.
5103+
//
5104+
// If a user of the `NativeCallable` API violates this agreement, we may
5105+
// have a use-after-free scenario here and therefore undefined behavior.
5106+
// We make some best effort to `FATAL()` in obvious cases of undefined
5107+
// behavior, but not all cases will be caught.
5108+
auto metadata =
5109+
FfiCallbackMetadata::Instance()->LookupMetadataForTrampolineUnlocked(
5110+
trampoline);
5111+
5112+
if (!metadata.IsLive()) {
5113+
FATAL("Callback invoked after it has been deleted.");
5114+
}
5115+
5116+
if (metadata.trampoline_type() ==
5117+
FfiCallbackMetadata::TrampolineType::kAsync) {
5118+
return HandleAsyncFfiCallback(metadata, out_entry_point,
5119+
out_trampoline_type);
5120+
} else {
5121+
return HandleSyncFfiCallback(metadata, out_entry_point,
5122+
out_trampoline_type);
5123+
}
5124+
}
51055125

51065126
extern "C" void DLRT_ExitIsolateGroupBoundIsolate() {
51075127
TRACE_RUNTIME_CALL("ExitIsolateGroupBoundIsolate%s", "");

0 commit comments

Comments
 (0)