Skip to content

Commit b535617

Browse files
authored
feat: instrument Node↔C++ IPC metrics in EDOPro and add reproducible performance benchmarks
1 parent c307e23 commit b535617

File tree

8 files changed

+516
-2
lines changed

8 files changed

+516
-2
lines changed
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
# Node.js ↔ C++ IPC performance testing
2+
3+
This document adds reproducible local benchmarks for the child-process IPC channel.
4+
5+
## Included scripts
6+
7+
- `npm run perf:protocol`
8+
- Microbenchmark for JSON serialization/parsing and length-prefixed frame encode/decode.
9+
- `npm run perf:ipc`
10+
- End-to-end IPC benchmark using a child-process echo worker that mimics current transport:
11+
- parent writes line-delimited JSON commands,
12+
- child replies with `uint32_le + json` frames.
13+
14+
## Environment variables
15+
16+
### `perf:protocol`
17+
18+
- `BENCH_ITERATIONS` (default `200000`)
19+
- `BENCH_PAYLOAD_BYTES` (default `256`)
20+
21+
Example:
22+
23+
```bash
24+
BENCH_ITERATIONS=500000 BENCH_PAYLOAD_BYTES=512 npm run perf:protocol
25+
```
26+
27+
### `perf:ipc`
28+
29+
- `BENCH_MESSAGES` (default `20000`)
30+
- `BENCH_CONCURRENCY` (default `512`)
31+
- `BENCH_PAYLOAD_BYTES` (default `128`)
32+
33+
Example:
34+
35+
```bash
36+
BENCH_MESSAGES=50000 BENCH_CONCURRENCY=1024 BENCH_PAYLOAD_BYTES=256 npm run perf:ipc
37+
```
38+
39+
## Metrics reported
40+
41+
- Throughput (`throughputMsgPerSec`)
42+
- End-to-end latency (`p50`, `p95`, `p99`)
43+
- Backpressure stats (`drainCount`, `totalDrainWaitMs`)
44+
45+
## Suggested baseline workflow
46+
47+
1. Run both scripts on current branch.
48+
2. Save JSON output artifacts.
49+
3. Apply protocol changes (e.g. bidirectional length-prefix framing).
50+
4. Repeat benchmarks with same env vars.
51+
5. Compare p95/p99 latency and throughput deltas.
52+
53+
## Production metrics capture
54+
55+
You can enable periodic IPC metrics logs per room by setting:
56+
57+
```bash
58+
IPC_METRICS_ENABLED=true
59+
```
60+
61+
When enabled, each active EDO room logs a structured `IPC_METRICS` event every 60 seconds with:
62+
63+
- queue depth and max queue depth
64+
- commands enqueued/written and stdin write errors
65+
- drain count and total drain wait time
66+
- stdout chunk/bytes counts
67+
- processed frames, parse errors, and deferred ticks
68+
69+
Use this in production/staging to collect real IPC behavior and share snapshots for analysis.

package.json

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,9 @@
2121
"migrate-users": "ts-node-dev -r tsconfig-paths/register --respawn --poll --require dotenv/config src/utils/migrate-user-from-redis-to-postgres.ts",
2222
"migration:generate": "ts-node -r tsconfig-paths/register --require dotenv/config ./node_modules/typeorm/cli.js migration:generate -d ./src/evolution-types/src/data-source.ts ./src/evolution-types/src/migrations/$npm_config_name",
2323
"migration:run": "ts-node -r tsconfig-paths/register --require dotenv/config ./node_modules/typeorm/cli.js migration:run -d ./src/evolution-types/src/data-source.ts",
24-
"migration:revert": "ts-node -r tsconfig-paths/register --require dotenv/config ./node_modules/typeorm/cli.js migration:revert -d ./src/evolution-types/src/data-source.ts"
24+
"migration:revert": "ts-node -r tsconfig-paths/register --require dotenv/config ./node_modules/typeorm/cli.js migration:revert -d ./src/evolution-types/src/data-source.ts",
25+
"perf:protocol": "ts-node -r tsconfig-paths/register src/utils/perf/benchmark-protocol.ts",
26+
"perf:ipc": "ts-node -r tsconfig-paths/register src/utils/perf/benchmark-node-cpp-ipc.ts"
2527
},
2628
"keywords": [],
2729
"author": "",

src/edopro/room/domain/Room.ts

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,22 @@ export enum Rule {
4646
ALL,
4747
}
4848

49+
type IpcMetricsSnapshot = {
50+
uptimeMs: number;
51+
queueDepth: number;
52+
maxQueueDepth: number;
53+
commandsEnqueued: number;
54+
commandsWritten: number;
55+
stdinWriteErrors: number;
56+
drainCount: number;
57+
totalDrainWaitMs: number;
58+
stdoutChunks: number;
59+
stdoutBytes: number;
60+
framesProcessed: number;
61+
parseErrors: number;
62+
deferredProcessTicks: number;
63+
};
64+
4965
export class DeckRules {
5066
public readonly mainMin: number;
5167
public readonly mainMax: number;
@@ -156,6 +172,22 @@ export class Room extends YgoRoom {
156172
private readonly notifier: RoomClientNotifier;
157173
private readonly pendingCppMessages: string[] = [];
158174
private isWaitingCppDrain = false;
175+
private ipcMetricsStartedAt = Date.now();
176+
private ipcMetricsLogInterval: NodeJS.Timeout | null = null;
177+
private lastDrainStartAt: bigint | null = null;
178+
private ipcMetrics = {
179+
maxQueueDepth: 0,
180+
commandsEnqueued: 0,
181+
commandsWritten: 0,
182+
stdinWriteErrors: 0,
183+
drainCount: 0,
184+
totalDrainWaitNs: 0n,
185+
stdoutChunks: 0,
186+
stdoutBytes: 0,
187+
framesProcessed: 0,
188+
parseErrors: 0,
189+
deferredProcessTicks: 0,
190+
};
159191

160192
private constructor(attr: RoomAttr) {
161193
super({
@@ -560,9 +592,12 @@ export class Room extends YgoRoom {
560592
this.isWaitingCppDrain = false;
561593
this.pendingCppMessages.length = 0;
562594
this._duel.stdin.on("error", (error) => {
595+
this.ipcMetrics.stdinWriteErrors += 1;
563596
this.logger.error("Error writing to the child process");
564597
this.logger.error(error);
565598
});
599+
600+
this.startIpcMetricsReporting();
566601
}
567602

568603
private get duel(): ChildProcessWithoutNullStreams | null {
@@ -797,9 +832,30 @@ export class Room extends YgoRoom {
797832

798833
public sendMessageToCpp(message: string): void {
799834
this.pendingCppMessages.push(`${message}\n`);
835+
this.ipcMetrics.commandsEnqueued += 1;
836+
if (this.pendingCppMessages.length > this.ipcMetrics.maxQueueDepth) {
837+
this.ipcMetrics.maxQueueDepth = this.pendingCppMessages.length;
838+
}
800839
this.flushCppMessageQueue();
801840
}
802841

842+
public recordCppStdoutChunk(byteLength: number): void {
843+
this.ipcMetrics.stdoutChunks += 1;
844+
this.ipcMetrics.stdoutBytes += byteLength;
845+
}
846+
847+
public recordCppFrameProcessed(): void {
848+
this.ipcMetrics.framesProcessed += 1;
849+
}
850+
851+
public recordCppParseError(): void {
852+
this.ipcMetrics.parseErrors += 1;
853+
}
854+
855+
public recordCppDeferredProcessTick(): void {
856+
this.ipcMetrics.deferredProcessTicks += 1;
857+
}
858+
803859
isFinished(): boolean {
804860
return this.currentDuel?.isFinished ?? false;
805861
}
@@ -880,6 +936,7 @@ export class Room extends YgoRoom {
880936
this._replay.destroy();
881937
}
882938
this.roomTimer.stop();
939+
this.stopIpcMetricsReporting();
883940
});
884941
}
885942

@@ -983,18 +1040,69 @@ export class Room extends YgoRoom {
9831040

9841041
if (!writeSuccess) {
9851042
this.isWaitingCppDrain = true;
1043+
this.ipcMetrics.drainCount += 1;
1044+
this.lastDrainStartAt = process.hrtime.bigint();
9861045
duel.stdin.once("drain", () => {
1046+
if (this.lastDrainStartAt !== null) {
1047+
this.ipcMetrics.totalDrainWaitNs += process.hrtime.bigint() - this.lastDrainStartAt;
1048+
this.lastDrainStartAt = null;
1049+
}
9871050
this.isWaitingCppDrain = false;
9881051
this.flushCppMessageQueue();
9891052
});
9901053

9911054
return;
9921055
}
9931056

1057+
this.ipcMetrics.commandsWritten += 1;
9941058
this.pendingCppMessages.shift();
9951059
}
9961060
}
9971061

1062+
1063+
private startIpcMetricsReporting(): void {
1064+
if (process.env.IPC_METRICS_ENABLED !== "true") {
1065+
return;
1066+
}
1067+
1068+
if (this.ipcMetricsLogInterval) {
1069+
return;
1070+
}
1071+
1072+
this.ipcMetricsStartedAt = Date.now();
1073+
this.ipcMetricsLogInterval = setInterval(() => {
1074+
const snapshot = this.getIpcMetricsSnapshot();
1075+
this.logger.info("IPC_METRICS", snapshot);
1076+
}, 60_000);
1077+
}
1078+
1079+
private stopIpcMetricsReporting(): void {
1080+
if (!this.ipcMetricsLogInterval) {
1081+
return;
1082+
}
1083+
1084+
clearInterval(this.ipcMetricsLogInterval);
1085+
this.ipcMetricsLogInterval = null;
1086+
}
1087+
1088+
private getIpcMetricsSnapshot(): IpcMetricsSnapshot {
1089+
return {
1090+
uptimeMs: Date.now() - this.ipcMetricsStartedAt,
1091+
queueDepth: this.pendingCppMessages.length,
1092+
maxQueueDepth: this.ipcMetrics.maxQueueDepth,
1093+
commandsEnqueued: this.ipcMetrics.commandsEnqueued,
1094+
commandsWritten: this.ipcMetrics.commandsWritten,
1095+
stdinWriteErrors: this.ipcMetrics.stdinWriteErrors,
1096+
drainCount: this.ipcMetrics.drainCount,
1097+
totalDrainWaitMs: Number(this.ipcMetrics.totalDrainWaitNs) / 1e6,
1098+
stdoutChunks: this.ipcMetrics.stdoutChunks,
1099+
stdoutBytes: this.ipcMetrics.stdoutBytes,
1100+
framesProcessed: this.ipcMetrics.framesProcessed,
1101+
parseErrors: this.ipcMetrics.parseErrors,
1102+
deferredProcessTicks: this.ipcMetrics.deferredProcessTicks,
1103+
};
1104+
}
1105+
9981106
private extractDeckPoints(notes?: string): number | undefined {
9991107
if (!notes) {
10001108
return undefined;

src/edopro/room/domain/states/dueling/DuelingState.ts

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,8 @@ type SwapMessage = {
9494
team: number;
9595
};
9696
export class DuelingState extends RoomState {
97+
private static readonly MAX_MESSAGES_PER_TICK = 1000;
98+
9799
constructor(
98100
eventEmitter: EventEmitter,
99101
private readonly logger: Logger,
@@ -258,10 +260,12 @@ export class DuelingState extends RoomState {
258260
});
259261

260262
core.stdout.on("data", (data: Buffer) => {
263+
this.room.recordCppStdoutChunk(data.length);
261264
try {
262265
this.jsonMessageProcessor.read(data);
263266
this.processMessage();
264267
} catch (error) {
268+
this.room.recordCppParseError();
265269
const payload = this.jsonMessageProcessor.payload;
266270
this.logger.error(error as Error);
267271
this.logger.info(`data: ${data.toString("hex")}`);
@@ -282,8 +286,15 @@ export class DuelingState extends RoomState {
282286
}
283287

284288
private processMessage(): void {
285-
while (this.jsonMessageProcessor.isMessageReady()) {
289+
let processedMessages = 0;
290+
291+
while (
292+
processedMessages < DuelingState.MAX_MESSAGES_PER_TICK &&
293+
this.jsonMessageProcessor.isMessageReady()
294+
) {
295+
processedMessages++;
286296
this.jsonMessageProcessor.process();
297+
this.room.recordCppFrameProcessed();
287298
const payload = this.jsonMessageProcessor.payload;
288299
const message = JSON.parse(payload.data) as Message;
289300

@@ -341,6 +352,15 @@ export class DuelingState extends RoomState {
341352
this.handleCoreSwap(message as unknown as SwapMessage);
342353
}
343354
}
355+
356+
if (this.jsonMessageProcessor.isMessageReady()) {
357+
this.room.recordCppDeferredProcessTick();
358+
this.logger.info("Processing remaining core messages in next tick", {
359+
roomId: this.room.id,
360+
maxMessagesPerTick: DuelingState.MAX_MESSAGES_PER_TICK,
361+
});
362+
setImmediate(() => this.processMessage());
363+
}
344364
}
345365

346366
private handleCoreReplay(message: ReplayMessage) {

0 commit comments

Comments
 (0)