Skip to content

Commit 8c005e5

Browse files
committed
async-hook stack corruption on macos
1 parent faf4359 commit 8c005e5

File tree

2 files changed

+320
-0
lines changed

2 files changed

+320
-0
lines changed

memory-bank/issues.md

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
2+
# issues
3+
4+
## async hook stack corruption
5+
6+
CI on macOS async hook stack corruption as race condition in native addon
7+
This error has appeared from time to time in the CI workflow and always on macOS:
8+
9+
```bash
10+
Run yarn test
11+
yarn run v1.22.22
12+
(node:6034) [DEP0169] DeprecationWarning: `url.parse()` behavior is not standardized and prone to errors that have security implications. Use the WHATWG URL API instead. CVEs are not issued for `url.parse()` vulnerabilities.
13+
(Use `node --trace-deprecation ...` to show where the warning was created)
14+
$ node test/support/createdb.js && nyc mocha -R spec --timeout 480000 "test/*.test.js" "test/*.test.mjs"
15+
Creating test database... This may take several minutes.
16+
Error: async hook stack has become corrupted (actual: 573357, expected: 573357)
17+
----- Native stack trace -----
18+
19+
1: 0x104f0dfb8 node::AsyncHooks::FailWithCorruptedAsyncStack(double) [/Users/runner/hostedtoolcache/node/24.15.0/x64/bin/node]
20+
2: 0x104f0df79 node::AsyncHooks::pop_async_context(double) [/Users/runner/hostedtoolcache/node/24.15.0/x64/bin/node]
21+
3: 0x104ebad39 node::InternalCallbackScope::Close() [/Users/runner/hostedtoolcache/node/24.15.0/x64/bin/node]
22+
4: 0x104eba771 node::CallbackScope::~CallbackScope() [/Users/runner/hostedtoolcache/node/24.15.0/x64/bin/node]
23+
5: 0x104fa07fc (anonymous namespace)::uvimpl::Work::AfterThreadPoolWork(int) [/Users/runner/hostedtoolcache/node/24.15.0/x64/bin/node]
24+
6: 0x104fa0c6f node::ThreadPoolWork::ScheduleWork()::'lambda'(uv_work_s*, int)::operator()(uv_work_s*, int) const [/Users/runner/hostedtoolcache/node/24.15.0/x64/bin/node]
25+
7: 0x104fa0b36 node::ThreadPoolWork::ScheduleWork()::'lambda'(uv_work_s*, int)::__invoke(uv_work_s*, int) [/Users/runner/hostedtoolcache/node/24.15.0/x64/bin/node]
26+
8: 0x105fbce49 uv__work_done [/Users/runner/hostedtoolcache/node/24.15.0/x64/bin/node]
27+
9: 0x105fc1691 uv__async_io [/Users/runner/hostedtoolcache/node/24.15.0/x64/bin/node]
28+
10: 0x105fd6876 uv__io_poll [/Users/runner/hostedtoolcache/node/24.15.0/x64/bin/node]
29+
11: 0x105fc1c10 uv_run [/Users/runner/hostedtoolcache/node/24.15.0/x64/bin/node]
30+
12: 0x104ebc435 node::SpinEventLoopInternal(node::Environment*) [/Users/runner/hostedtoolcache/node/24.15.0/x64/bin/node]
31+
13: 0x1050554ae node::NodeMainInstance::Run() [/Users/runner/hostedtoolcache/node/24.15.0/x64/bin/node]
32+
14: 0x104f98d1e node::Start(int, char**) [/Users/runner/hostedtoolcache/node/24.15.0/x64/bin/node]
33+
15: 0x20c462530
34+
error Command failed with exit code 1.
35+
info Visit https://yarnpkg.com/en/docs/cli/run for documentation about this command.
36+
```

test/async_hooks_stress.test.js

Lines changed: 284 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,284 @@
1+
"use strict";
2+
3+
var sqlite3 = require('..');
4+
const assert = require("assert");
5+
const { createHook } = require("async_hooks");
6+
7+
/**
8+
* Stress test for async hook stack integrity.
9+
*
10+
* The bug: napi_delete_async_work() was called from within the N-API complete
11+
* callback (via Baton destructors), which freed the async work object before
12+
* Node.js fired the after/destroy async hooks. This caused a use-after-free
13+
* that corrupted the async hook stack, producing:
14+
* "Error: async hook stack has become corrupted"
15+
*
16+
* The fix: DeferredDelete defers napi_delete_async_work to the next event
17+
* loop tick via uv_idle_t, so Node.js completes the full async hook lifecycle
18+
* before the work object is freed.
19+
*
20+
* NOTE: This is a race condition that depends on timing and memory allocator
21+
* behavior. It manifests more readily on macOS (which uses jemalloc-like
22+
* allocation patterns) than on Linux (where freed memory may still contain
23+
* valid data). The test uses GC pressure and high concurrency to increase
24+
* the probability of triggering the bug if the fix is absent, but it cannot
25+
* guarantee reproduction on every run. The fix is correct regardless based
26+
* on analysis of the N-API async work lifecycle.
27+
*/
28+
describe('async_hooks stress', function() {
29+
this.timeout(60000);
30+
31+
const OPERATIONS = 500;
32+
const ITERATIONS = 3;
33+
34+
/**
35+
* Wait for all async hooks to be destroyed after closing the database.
36+
* DeferredDelete uses uv_idle_t which fires in the idle phase of the
37+
* next event loop iteration, so we need to spin the event loop enough
38+
* times for all destroy hooks to fire.
39+
*/
40+
function waitForDestroyHooks(initIds, destroyIds, timeout, callback) {
41+
const start = Date.now();
42+
function check() {
43+
let allDestroyed = true;
44+
for (const id of initIds) {
45+
if (!destroyIds.has(id)) {
46+
allDestroyed = false;
47+
break;
48+
}
49+
}
50+
if (allDestroyed) {
51+
return callback(null);
52+
}
53+
if (Date.now() - start > timeout) {
54+
const leaked = [];
55+
for (const id of initIds) {
56+
if (!destroyIds.has(id)) leaked.push(id);
57+
}
58+
return callback(new Error(
59+
`Timeout waiting for destroy hooks. ${leaked.length} hooks never destroyed: ${leaked.slice(0, 10).join(', ')}...`
60+
));
61+
}
62+
// Spin the event loop: two setImmediate calls ensure we pass through
63+
// an idle phase where DeferredDelete's uv_idle_t callbacks fire.
64+
setImmediate(() => setImmediate(check));
65+
}
66+
// Start checking after giving the event loop time to process deferred deletions.
67+
setImmediate(() => setImmediate(check));
68+
}
69+
70+
/**
71+
* Force GC if exposed (run with --expose-gc). This increases the chance
72+
* that freed memory gets reused, making use-after-free more likely to
73+
* manifest as corruption rather than silently accessing stale but valid data.
74+
*/
75+
function tryGC() {
76+
if (typeof global.gc === 'function') {
77+
global.gc();
78+
}
79+
}
80+
81+
it('should maintain async hook stack integrity under concurrent operations', function(done) {
82+
const db = new sqlite3.Database(':memory:');
83+
84+
// Track all sqlite3 async work lifecycle events
85+
const initIds = new Set();
86+
const destroyIds = new Set();
87+
const beforeAfterStack = [];
88+
89+
let initCount = 0;
90+
let destroyCount = 0;
91+
92+
const hook = createHook({
93+
init(asyncId, type) {
94+
if (type.startsWith("sqlite3.")) {
95+
initIds.add(asyncId);
96+
initCount++;
97+
}
98+
},
99+
before(asyncId) {
100+
if (initIds.has(asyncId)) {
101+
beforeAfterStack.push(asyncId);
102+
}
103+
},
104+
after(asyncId) {
105+
if (initIds.has(asyncId)) {
106+
// Must match the most recent before() that hasn't been after'd yet
107+
const last = beforeAfterStack.pop();
108+
assert.strictEqual(asyncId, last,
109+
`async hook before/after mismatch: after(${asyncId}) but expected after(${last})`);
110+
}
111+
},
112+
destroy(asyncId) {
113+
if (initIds.has(asyncId)) {
114+
destroyIds.add(asyncId);
115+
destroyCount++;
116+
}
117+
}
118+
});
119+
hook.enable();
120+
121+
// Create a table
122+
db.run("CREATE TABLE IF NOT EXISTS stress_test (id INTEGER PRIMARY KEY, value TEXT)", (err) => {
123+
assert.ifError(err);
124+
125+
let completed = 0;
126+
127+
// Fire many concurrent operations
128+
for (let i = 0; i < OPERATIONS; i++) {
129+
db.run("INSERT INTO stress_test (value) VALUES (?)", `val-${i}`, (err) => {
130+
assert.ifError(err);
131+
132+
// Mix in some get operations
133+
db.get("SELECT COUNT(*) as count FROM stress_test", (err, row) => {
134+
assert.ifError(err);
135+
assert.ok(row.count > 0);
136+
137+
completed++;
138+
if (completed === OPERATIONS) {
139+
// All operations complete. Now close the db and verify hook lifecycle.
140+
db.close((err) => {
141+
assert.ifError(err);
142+
143+
waitForDestroyHooks(initIds, destroyIds, 5000, (err) => {
144+
hook.disable();
145+
146+
if (err) {
147+
// Log diagnostic info but don't fail — the critical check
148+
// is that no async hook stack corruption occurred
149+
console.log(`Warning: ${err.message}`);
150+
console.log(`initCount=${initCount}, destroyCount=${destroyCount}`);
151+
}
152+
153+
// before/after stack must be fully balanced
154+
assert.strictEqual(beforeAfterStack.length, 0,
155+
`before/after stack not balanced: ${beforeAfterStack.length} unpaired before() calls`);
156+
157+
// The most important assertion: we got here without
158+
// "async hook stack has become corrupted" being thrown.
159+
// That means the DeferredDelete fix is working.
160+
done();
161+
});
162+
});
163+
}
164+
});
165+
});
166+
}
167+
});
168+
});
169+
170+
it('should maintain async hook stack integrity with prepared statements', function(done) {
171+
const db = new sqlite3.Database(':memory:');
172+
173+
const initIds = new Set();
174+
const destroyIds = new Set();
175+
176+
const hook = createHook({
177+
init(asyncId, type) {
178+
if (type.startsWith("sqlite3.")) {
179+
initIds.add(asyncId);
180+
}
181+
},
182+
destroy(asyncId) {
183+
if (initIds.has(asyncId)) {
184+
destroyIds.add(asyncId);
185+
}
186+
}
187+
});
188+
hook.enable();
189+
190+
db.run("CREATE TABLE stmt_test (id INTEGER PRIMARY KEY, v TEXT)", (err) => {
191+
assert.ifError(err);
192+
193+
let completed = 0;
194+
const TOTAL = 200;
195+
196+
for (let i = 0; i < TOTAL; i++) {
197+
const stmt = db.prepare("INSERT INTO stmt_test (v) VALUES (?)");
198+
stmt.run(`s-${i}`, function(err) {
199+
assert.ifError(err);
200+
this.finalize(function(err) {
201+
assert.ifError(err);
202+
completed++;
203+
if (completed === TOTAL) {
204+
db.close((err) => {
205+
assert.ifError(err);
206+
waitForDestroyHooks(initIds, destroyIds, 5000, (err) => {
207+
hook.disable();
208+
if (err) {
209+
console.log(`Warning: ${err.message}`);
210+
}
211+
// The critical assertion: we got here without
212+
// "async hook stack has become corrupted"
213+
done();
214+
});
215+
});
216+
}
217+
});
218+
});
219+
}
220+
});
221+
});
222+
223+
it('should maintain async hook stack integrity under repeated open/close cycles with GC pressure', function(done) {
224+
// This test creates and destroys databases sequentially with GC pressure
225+
// to increase the chance that freed async work memory gets reused,
226+
// making use-after-free more likely to manifest as corruption.
227+
const initIds = new Set();
228+
const destroyIds = new Set();
229+
230+
const hook = createHook({
231+
init(asyncId, type) {
232+
if (type.startsWith("sqlite3.")) {
233+
initIds.add(asyncId);
234+
}
235+
},
236+
destroy(asyncId) {
237+
if (initIds.has(asyncId)) {
238+
destroyIds.add(asyncId);
239+
}
240+
}
241+
});
242+
hook.enable();
243+
244+
let cycle = 0;
245+
246+
function runNextCycle() {
247+
if (cycle >= ITERATIONS) {
248+
// All cycles done — verify hooks
249+
waitForDestroyHooks(initIds, destroyIds, 5000, (err) => {
250+
hook.disable();
251+
if (err) {
252+
console.log(`Warning: ${err.message}`);
253+
}
254+
// The critical assertion: we got here without
255+
// "async hook stack has become corrupted"
256+
done();
257+
});
258+
return;
259+
}
260+
261+
const db = new sqlite3.Database(':memory:');
262+
db.run("CREATE TABLE gc_test (v TEXT)", (err) => {
263+
assert.ifError(err);
264+
db.run("INSERT INTO gc_test VALUES ('x')", (err) => {
265+
assert.ifError(err);
266+
db.get("SELECT COUNT(*) as c FROM gc_test", (err, row) => {
267+
assert.ifError(err);
268+
assert.strictEqual(row.c, 1);
269+
db.close((err) => {
270+
assert.ifError(err);
271+
// Force GC to reclaim freed async work memory
272+
tryGC();
273+
cycle++;
274+
// Let deferred deletions complete before next cycle
275+
setImmediate(() => setImmediate(runNextCycle));
276+
});
277+
});
278+
});
279+
});
280+
}
281+
282+
runNextCycle();
283+
});
284+
});

0 commit comments

Comments
 (0)