Skip to content

Commit 03ccece

Browse files
committed
lib,src,test: fix race during tracing toggles
Keep JS tracing decisions tied to the env-local tracing state materialized by the native toggle path instead of rereading live flags in multiple places. Tracing flags can be updated from any thread, so reading them directly during span creation can observe a mid-flight toggle. That could make Tracer.startSpan() return a NonRecordingSpan after the caller had already passed an internal tracing gate, leading to crashes like TypeError: span._pushSpanDataString is not a function. Pass the full tracing bitmask from EnvList::update_tracing_flags() into JS, cache it in lib/internal/nsolid_trace.js, and make the internal OTel code consume that cached per-thread state. This removes the split-brain behavior where callback binding/unbinding followed flagsUpdated but Tracer.startSpan() could independently observe a later disable and return a NonRecordingSpan. For internal spans, stop Tracer.startSpan() from rechecking the current trace flags after the caller has already crossed an internal tracing gate. That keeps internal span creation locally consistent while tracing is being enabled and disabled and avoids crashes in code paths that expect a real N|Solid span object. Also add targeted repro coverage for the toggle race: - extend the tracing test addon with configurable setupTracing(flags), stopTracing(), and skipExpectedTracesCheck() - add a deterministic fetch-based repro that toggles HTTP client tracing while concurrent fetch() traffic is in flight - add a grpc-based repro harness for tracing reconfiguration and cover both fetch and http traffic - teach the grpc agent test client how to generate fetch transactions Together these changes make trace flags materialize per env/thread in JS, preserve the caller's local tracing decision for internal spans, and add regression coverage for the tracing enable/disable race.
1 parent d41bff4 commit 03ccece

File tree

8 files changed

+240
-17
lines changed

8 files changed

+240
-17
lines changed

lib/internal/nsolid_trace.js

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,15 @@ const TRACE_ID_PART = '(?![0]{32})[\\da-f]{32}';
1919
const PARENT_ID_PART = '(?![0]{16})[\\da-f]{16}';
2020
const FLAGS_PART = '[\\da-f]{2}';
2121
const TRACE_PARENT_REGEX = new RegExp(`^\\s?(${VERSION_PART})-(${TRACE_ID_PART})-(${PARENT_ID_PART})-(${FLAGS_PART})(-.*)?\\s?$`);
22+
let currentTraceFlags = nsolidApi.traceFlags[0];
2223

2324

2425
function generateSpan(type) {
25-
return nsolidApi.traceFlags[0] & type;
26+
return currentTraceFlags & type;
27+
}
28+
29+
function getTraceFlags() {
30+
return currentTraceFlags;
2631
}
2732

2833
function parseTraceParent(traceParent) {
@@ -69,13 +74,15 @@ function extractSpanContextFromHttpHeaders(context, headers) {
6974
}
7075

7176
const nsolidTracer = new EventEmitter();
72-
binding.setToggleTracingFn(() => {
73-
nsolidTracer.emit('flagsUpdated');
77+
binding.setToggleTracingFn((flags) => {
78+
currentTraceFlags = flags;
79+
nsolidTracer.emit('flagsUpdated', flags);
7480
});
7581

7682

7783
module.exports = {
7884
extractSpanContextFromHttpHeaders,
7985
generateSpan,
86+
getTraceFlags,
8087
nsolidTracer,
8188
};

lib/internal/otel/core.js

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ const {
44
ArrayIsArray,
55
} = primordials;
66

7-
const binding = internalBinding('nsolid_api');
87
const {
98
codes: {
109
ERR_INVALID_ARG_TYPE,
@@ -28,16 +27,16 @@ function register(api) {
2827
// TODO(santigimeno): perform some kind of validation that the api is actually
2928
// the OTEL api.
3029
api_ = api;
31-
const { nsolidTracer } = require('internal/nsolid_trace');
32-
nsolidTracer.on('flagsUpdated', () => {
33-
if (binding.trace_flags[0]) {
30+
const { getTraceFlags, nsolidTracer } = require('internal/nsolid_trace');
31+
nsolidTracer.on('flagsUpdated', (flags) => {
32+
if (flags) {
3433
enableApi();
3534
} else {
3635
disableApi();
3736
}
3837
});
3938

40-
if (binding.trace_flags[0]) {
39+
if (getTraceFlags()) {
4140
return enableApi();
4241
}
4342

@@ -55,7 +54,8 @@ function registerInstrumentations(instrumentations) {
5554
instrumentations);
5655
}
5756

58-
if (binding.trace_flags[0]) {
57+
const { getTraceFlags } = require('internal/nsolid_trace');
58+
if (getTraceFlags()) {
5959
enableInsts(instrumentations);
6060
} else {
6161
disableInsts(instrumentations);

lib/internal/otel/trace.js

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,9 @@ const { nsolid_consts } = binding;
1313
const {
1414
getApi,
1515
} = require('internal/otel/core');
16+
const {
17+
getTraceFlags,
18+
} = require('internal/nsolid_trace');
1619

1720
const {
1821
newInternalSpanId,
@@ -251,7 +254,7 @@ Span.prototype._pushSpanDataUint64 = function(type, name) {
251254
class Tracer {
252255
startSpan(name, options = {}, context) {
253256
const api = getApi();
254-
if (binding.trace_flags[0] === 0) {
257+
if (!options.internal && getTraceFlags() === 0) {
255258
return api.trace.wrapSpanContext(api.INVALID_SPAN_CONTEXT);
256259
}
257260

src/nsolid/nsolid_api.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1812,7 +1812,7 @@ void EnvList::update_tracing_flags(SharedEnvInst envinst_sp, uint32_t flags) {
18121812
HandleScope handle_scope(isolate);
18131813
Context::Scope context_scope(env->context());
18141814
Local<Value> argv[] = {
1815-
v8::Boolean::New(isolate, flags > 0)
1815+
Uint32::New(isolate, flags)
18161816
};
18171817

18181818
// We don't care if Call throws or exits. So ignore the return value.

test/addons/nsolid-tracing/binding.cc

Lines changed: 39 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,11 @@ class Trace {
117117
Tracer* tracer_ = nullptr;
118118
std::queue<std::string> spans_;
119119
json expected_traces_ = {};
120+
bool at_exit_registered_ = false;
121+
bool check_expected_traces_ = true;
122+
123+
static constexpr uint32_t kDefaultTraceFlags =
124+
kSpanDns | kSpanHttpClient | kSpanHttpServer | kSpanCustom;
120125

121126

122127
// NOLINTNEXTLINE(runtime/references)
@@ -166,6 +171,10 @@ static void at_exit_cb() {
166171
fprintf(stderr, "traces_array: %s\n", traces_array.dump(4).c_str());
167172
// fprintf(stderr, "expected_traces: %s\n", expected_traces_.dump(4).c_str());
168173

174+
if (!check_expected_traces_) {
175+
return;
176+
}
177+
169178
assert(traces_array.size() == expected_traces_.size());
170179
for (auto i = traces_array.begin(); i != traces_array.end(); ++i) {
171180
for (auto j = expected_traces_.begin();
@@ -242,22 +251,46 @@ static void ExpectedTrace(const v8::FunctionCallbackInfo<v8::Value>& args) {
242251
}
243252

244253
static void SetupTracing(const v8::FunctionCallbackInfo<v8::Value>& args) {
245-
assert(0 == args.Length());
254+
assert(args.Length() <= 1);
246255
v8::Isolate* isolate = args.GetIsolate();
247256
v8::Local<v8::Context> context = isolate->GetCurrentContext();
248257
node::nsolid::SharedEnvInst envinst = node::nsolid::GetLocalEnvInst(context);
249258
if (node::nsolid::IsMainThread(envinst)) {
250-
tracer_ = Tracer::CreateInstance(kSpanDns |
251-
kSpanHttpClient |
252-
kSpanHttpServer |
253-
kSpanCustom, got_trace);
254-
atexit(at_exit_cb);
259+
if (tracer_ != nullptr) {
260+
return;
261+
}
262+
263+
uint32_t flags = kDefaultTraceFlags;
264+
if (args.Length() == 1) {
265+
assert(args[0]->IsUint32());
266+
flags = args[0].As<v8::Uint32>()->Value();
267+
}
268+
269+
tracer_ = Tracer::CreateInstance(flags, got_trace);
270+
if (!at_exit_registered_) {
271+
atexit(at_exit_cb);
272+
at_exit_registered_ = true;
273+
}
255274
}
256275
}
257276

277+
static void StopTracing(const v8::FunctionCallbackInfo<v8::Value>& args) {
278+
assert(0 == args.Length());
279+
delete tracer_;
280+
tracer_ = nullptr;
281+
}
282+
283+
static void SkipExpectedTracesCheck(
284+
const v8::FunctionCallbackInfo<v8::Value>& args) {
285+
assert(0 == args.Length());
286+
check_expected_traces_ = false;
287+
}
288+
258289
NODE_MODULE_INIT(/* exports, module, context */) {
259290
NODE_SET_METHOD(exports, "expectedTrace", ExpectedTrace);
260291
NODE_SET_METHOD(exports, "setupTracing", SetupTracing);
292+
NODE_SET_METHOD(exports, "stopTracing", StopTracing);
293+
NODE_SET_METHOD(exports, "skipExpectedTracesCheck", SkipExpectedTracesCheck);
261294
#define V(Name, Val, Str) \
262295
NODE_DEFINE_CONSTANT(exports, Name);
263296
NSOLID_SPAN_TYPES(V)
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
// Flags: --dns-result-order=ipv4first
2+
'use strict';
3+
4+
const common = require('../../common');
5+
const assert = require('assert');
6+
const http = require('http');
7+
const { once } = require('events');
8+
9+
const bindingPath = require.resolve(`./build/${common.buildType}/binding`);
10+
const binding = require(bindingPath);
11+
12+
const concurrency = 16;
13+
const fetchRounds = 150;
14+
const toggleRounds = 400;
15+
16+
async function main() {
17+
binding.skipExpectedTracesCheck();
18+
binding.setupTracing(binding.kSpanHttpClient);
19+
20+
const server = http.createServer((req, res) => {
21+
req.resume();
22+
res.writeHead(200, { 'content-type': 'text/plain' });
23+
setImmediate(() => res.end('ok'));
24+
});
25+
26+
server.listen(0, '127.0.0.1');
27+
await once(server, 'listening');
28+
const { port } = server.address();
29+
const url = `http://127.0.0.1:${port}/`;
30+
31+
const failures = [];
32+
33+
async function runFetches() {
34+
for (let i = 0; i < fetchRounds; i++) {
35+
const batch = Array.from({ length: concurrency }, async () => {
36+
try {
37+
const res = await fetch(url, {
38+
method: 'POST',
39+
body: 'payload',
40+
});
41+
await res.text();
42+
} catch (err) {
43+
failures.push(err);
44+
}
45+
});
46+
await Promise.all(batch);
47+
}
48+
}
49+
50+
async function toggleTracing() {
51+
for (let i = 0; i < toggleRounds; i++) {
52+
binding.stopTracing();
53+
await new Promise(setImmediate);
54+
binding.setupTracing(binding.kSpanHttpClient);
55+
await new Promise(setImmediate);
56+
}
57+
}
58+
59+
try {
60+
await Promise.all([
61+
runFetches(),
62+
toggleTracing(),
63+
]);
64+
} finally {
65+
binding.stopTracing();
66+
server.close();
67+
await once(server, 'close');
68+
}
69+
70+
assert.deepStrictEqual(failures, []);
71+
}
72+
73+
main().then(common.mustCall()).catch((err) => {
74+
throw err;
75+
});
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
// Flags: --expose-internals
2+
import { mustCall, mustSucceed } from '../common/index.mjs';
3+
import {
4+
checkExitData,
5+
GRPCServer,
6+
TestClient,
7+
} from '../common/nsolid-grpc-agent/index.js';
8+
9+
const traceBursts = 100;
10+
const toggleRounds = 20;
11+
12+
async function runRepro(getEnv, kind) {
13+
return new Promise((resolve, reject) => {
14+
const grpcServer = new GRPCServer();
15+
grpcServer.start(mustSucceed(async (port) => {
16+
const env = getEnv(port);
17+
const opts = {
18+
stdio: ['inherit', 'inherit', 'inherit', 'ipc'],
19+
env,
20+
};
21+
22+
const client = new TestClient([], opts);
23+
const agentId = await client.id();
24+
25+
grpcServer.on('exit', mustCall((data) => {
26+
checkExitData(data.msg, data.metadata, agentId, { code: 0, error: null, profile: '' });
27+
grpcServer.close();
28+
resolve();
29+
}));
30+
31+
// send lots of trace requests to trigger tracing
32+
for (let i = 0; i < traceBursts; i++) {
33+
client.tracing(kind, 0);
34+
}
35+
36+
// toggle tracing on and off
37+
for (let i = 0; i < toggleRounds; i++) {
38+
const enabled = (i % 2) !== 0;
39+
await grpcServer.reconfigure(agentId, { tracingEnabled: enabled });
40+
}
41+
42+
await client.shutdown();
43+
}));
44+
});
45+
}
46+
47+
const tests = [];
48+
tests.push({
49+
name: 'should reproduce fetch tracing crash via grpc reconfigure',
50+
test: async (getEnv) => runRepro(getEnv, 'fetch'),
51+
});
52+
53+
tests.push({
54+
name: 'should reproduce http tracing crash via grpc reconfigure',
55+
test: async (getEnv) => runRepro(getEnv, 'http'),
56+
});
57+
58+
const testConfigs = [
59+
{
60+
getEnv: (port) => ({
61+
NODE_DEBUG_NATIVE: 'nsolid_grpc_agent',
62+
NSOLID_GRPC_INSECURE: 1,
63+
NSOLID_GRPC: `localhost:${port}`,
64+
NSOLID_TRACING_ENABLED: 1,
65+
NSOLID_INTERVAL: 100000,
66+
}),
67+
},
68+
];
69+
70+
for (const testConfig of testConfigs) {
71+
for (const { name, test } of tests) {
72+
console.log(`[tracing] ${name}`);
73+
await test(testConfig.getEnv);
74+
}
75+
}

test/common/nsolid-grpc-agent/client.js

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,33 @@ function execHttpTransaction() {
4646
});
4747
}
4848

49+
function execFetchTransaction() {
50+
const server = http.createServer((req, res) => {
51+
req.resume();
52+
setTimeout(() => {
53+
res.writeHead(200, { 'Content-Type': 'text/plain' });
54+
res.end('Hello World\n');
55+
}, 10);
56+
});
57+
58+
server.listen(0, '127.0.0.1', async () => {
59+
const port = server.address().port;
60+
const url = `http://127.0.0.1:${port}/`;
61+
for (let i = 0; i < 25; i++) {
62+
const responses = await Promise.all(Array.from({ length: 32 }, () => {
63+
return fetch(url, {
64+
method: 'POST',
65+
body: 'payload',
66+
});
67+
}));
68+
69+
await Promise.all(responses.map((response) => response.text()));
70+
}
71+
72+
server.close();
73+
});
74+
}
75+
4976
function execDnsTransaction() {
5077
const dns = require('node:dns');
5178
dns.lookup('example.org', () => {
@@ -97,6 +124,9 @@ function handleTrace(msg) {
97124
case 'http':
98125
execHttpTransaction();
99126
break;
127+
case 'fetch':
128+
execFetchTransaction();
129+
break;
100130
case 'dns':
101131
execDnsTransaction();
102132
break;

0 commit comments

Comments
 (0)