Skip to content

Commit 8930b89

Browse files
Improved debugg logging, error handling, etc for workers.
1 parent 0f0af38 commit 8930b89

File tree

6 files changed

+212
-70
lines changed

6 files changed

+212
-70
lines changed

.eslintrc.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@
9191
"no-irregular-whitespace": 2,
9292
"no-iterator": 2,
9393
"no-label-var": 2,
94-
"no-labels": 2,
94+
"no-labels": 0,
9595
"no-lone-blocks": 2,
9696
"no-mixed-spaces-and-tabs": 2,
9797
"no-multi-spaces": 2,
@@ -133,7 +133,7 @@
133133
"quotes": [2, "double", {"avoidEscape": true, "allowTemplateLiterals": true}],
134134
"radix": 0,
135135
"semi": [2, "always"],
136-
"semi-spacing": [2, { "before": false, "after": true }],
136+
"semi-spacing": [0, { "before": false, "after": true }],
137137
"space-before-blocks": [2, "always"],
138138
"space-before-function-paren": [2, "never"],
139139
"space-in-parens": [2, "never"],

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"name": "jsonrpc-bidirectional",
33
"description": "Bidirectional JSONRPC over web sockets or HTTP with extensive plugin support.",
4-
"version": "9.8.2",
4+
"version": "10.0.1",
55
"scripts": {
66
"build": "node --experimental-worker build.js",
77
"prepublish": "node --experimental-worker build.js && node --expose-gc --max-old-space-size=1024 --experimental-worker tests/main.js",

src/NodeClusterBase/MasterEndpoint.js

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -97,20 +97,25 @@ class MasterEndpoint extends NodeMultiCoreCPUBase.MasterEndpoint
9797
console.log(`Worker with PID ${worker.process.pid} and persistentId ${nPersistentWorkerID} died. Exit code: ${nExitCode}. Signal: ${nKillSignal}.`);
9898

9999
this.arrFailureTimestamps.push(new Date().getTime());
100-
this.arrFailureTimestamps = this.arrFailureTimestamps.filter((nMillisecondsUnixTime) => {
101-
return nMillisecondsUnixTime >= new Date().getTime() - (60 * 2 * 1000);
100+
this.arrFailureTimestamps = this.arrFailureTimestamps.filter((nMillisecondsUnixTimeOfFailure) => {
101+
return nMillisecondsUnixTimeOfFailure >= new Date().getTime() - (60 * 2 * 1000);
102102
});
103103

104-
if(this.arrFailureTimestamps.length / Math.max(os.cpus().length, 1) > 4)
104+
const nMaxFailuresPerMaxWorkers = process.uptime() < 15 /*seconds*/ ? Math.min(this.maxWorkersCount * 2, 20 /*times*/) : 20 /*times*/;
105+
if(this.arrFailureTimestamps.length / Math.max(this.maxWorkersCount, 1) > nMaxFailuresPerMaxWorkers)
105106
{
107+
console.error(`[Master] *Not* adding a worker because another worker has died. Doing a .gracefulExit() instead because the number of worker failures divided by .maxWorkersCount is greater than ${nMaxFailuresPerMaxWorkers} over the last 2 minutes. ${this.arrFailureTimestamps.length / Math.max(this.maxWorkersCount, 1)} > ${nMaxFailuresPerMaxWorkers}. Process uptime is ${process.uptime()} seconds.`);
106108
await this.gracefulExit(null);
107109
}
108110
else
109111
{
110112
if(!this.bShuttingDown)
111113
{
112-
await sleep(500);
114+
const nSleepMilliSeconds = Math.max(800 + 1000 * this.readyWorkersCount, 3000);
115+
await sleep(`Sleeping ${nSleepMilliSeconds} milliseconds before replacing exited worker.`);
113116
// cluster.fork();
117+
118+
console.error("[Master] Adding a worker because another worker has exited.");
114119
this._addWorker(nPersistentWorkerID);
115120
}
116121
}

src/NodeMultiCoreCPUBase/MasterEndpoint.js

Lines changed: 94 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -100,14 +100,35 @@ class MasterEndpoint extends JSONRPC.EndpointBase
100100
/**
101101
* The object has worker IDs as keys and object values like this: {client: JSONRPC.Client, ready: boolean}.
102102
*
103-
* @returns {Object<workerID:number, {client:JSONRPC.Client, ready:boolean}>}
103+
* @returns {Object<workerID:number, {client:JSONRPC.Client, ready:boolean, exited:boolean}>}
104104
*/
105105
get workerClients()
106106
{
107107
return this.objWorkerIDToState;
108108
}
109109

110110

111+
/**
112+
* DO NOT use this count to determine if more workers need to be created,
113+
* because it *excludes* workers which are in the process of becoming ready.
114+
*
115+
* @returns {integer}
116+
*/
117+
get readyWorkersCount()
118+
{
119+
let nCount = 0;
120+
for(const objWorkerClient of Object.values(this.objWorkerIDToState))
121+
{
122+
if(objWorkerClient.ready)
123+
{
124+
++nCount;
125+
}
126+
}
127+
128+
return nCount;
129+
}
130+
131+
111132
/**
112133
* @param {number} nWorkersCount
113134
*/
@@ -415,7 +436,10 @@ class MasterEndpoint extends JSONRPC.EndpointBase
415436

416437
for(const nWorkerID in this.objWorkerIDToState)
417438
{
418-
if(this.objWorkerIDToState[nWorkerID].ready)
439+
if(
440+
this.objWorkerIDToState[nWorkerID].ready
441+
&& !this.objWorkerIDToState[nWorkerID].exited
442+
)
419443
{
420444
// Do not await, need these in parallel.
421445
/*await*/ this.objWorkerIDToState[nWorkerID].client.gracefulExit()
@@ -440,20 +464,49 @@ class MasterEndpoint extends JSONRPC.EndpointBase
440464
{
441465
nWorkersGracefulExitTimeoutID = setTimeout(
442466
() => {
443-
console.error("Timed out waiting for workers' gracefulExit() to complete.");
467+
console.error("[Master] Timed out waiting for workers' gracefulExit() to complete.");
444468
process.exit(1);
445469
},
446470
this._nGracefulExitTimeoutMilliseconds
447471
);
448472
}
449473

450474

451-
console.log("Waiting for workers to exit gracefully.");
452-
while(!!Object.keys(this.objWorkerIDToState).length)
475+
console.log("[Master] Waiting for workers to exit gracefully.");
476+
await sleep(3000);
477+
478+
waitForAllWorkers:
479+
while(Object.values(this.workerClients).length)
453480
{
454-
await sleep(1000);
481+
let bLogDelimited = false;
482+
let bWorkersStillAlive = false;
483+
for(const strWorkerID of Object.keys(this.workerClients))
484+
{
485+
if(!this.workerClients[strWorkerID].exited)
486+
{
487+
if(!bLogDelimited)
488+
{
489+
console.error("------------------------------------------------------------------");
490+
bLogDelimited = true;
491+
}
492+
493+
console.error(`Worker with ID ${strWorkerID} has not yet exited. Waiting...`);
494+
bWorkersStillAlive = true;
495+
}
496+
}
497+
498+
if(bWorkersStillAlive)
499+
{
500+
await sleep(2000);
501+
continue waitForAllWorkers;
502+
}
503+
504+
if(!bWorkersStillAlive)
505+
{
506+
break waitForAllWorkers;
507+
}
455508
}
456-
console.log("All workers have exited.");
509+
console.log("[Master] All workers have exited.");
457510

458511

459512
if(nWorkersGracefulExitTimeoutID !== null)
@@ -466,7 +519,7 @@ class MasterEndpoint extends JSONRPC.EndpointBase
466519
await this._stopServices();
467520

468521

469-
console.log("[" + process.pid + "] Master process exiting gracefully.");
522+
console.log("Master process exiting gracefully.");
470523
process.exit(0);
471524
}
472525

@@ -479,15 +532,16 @@ class MasterEndpoint extends JSONRPC.EndpointBase
479532
*/
480533
async ping(incomingRequest, strReturn)
481534
{
482-
console.log("Worker said: " + JSON.stringify(strReturn));
535+
console.log("[Master] [ping] Worker said: " + JSON.stringify(strReturn));
483536
return strReturn;
484537
}
485538

486539
async sendTransferListTest(incomingRequest, arrayBufferForTest)
487540
{
488-
console.log("Received buffer", arrayBufferForTest);
541+
console.log("[sendTransferListTest] Received buffer: ", arrayBufferForTest);
489542
}
490543

544+
491545
/**
492546
* @param {JSONRPC.IncomingRequest} incomingRequest
493547
* @param {number} nWorkerID
@@ -499,19 +553,37 @@ class MasterEndpoint extends JSONRPC.EndpointBase
499553
*/
500554
async rpcWorker(incomingRequest, nWorkerID, strFunctionName, arrParams, bNotification = false)
501555
{
556+
let nWaitForReadyTriesLeft = 10;
557+
while(
558+
this.workerClients[nWorkerID]
559+
&& !this.workerClients[nWorkerID].ready
560+
&& !this.workerClients[nWorkerID].exited
561+
&& --nWaitForReadyTriesLeft >= 0
562+
)
563+
{
564+
console.error(`[Master] Can't RPC into Cluster worker.id ${nWorkerID}, the RPC client has not signaled it is ready for cluster IPC RPC, yet. Sleeping 1 second before re-rechecking ready status. ${nWaitForReadyTriesLeft} future retries left. The RPC call to worker.${strFunctionName}() will be continue normally if the ready status becomes true.`);
565+
await sleep(1000);
566+
}
567+
502568
if(!this.workerClients[nWorkerID])
503569
{
504-
throw new JSONRPC.Exception(`Cluster worker.id ${nWorkerID} is not alive.`);
570+
throw new JSONRPC.Exception(`[Master] Can't RPC worker.${strFunctionName}() into Cluster worker.id ${nWorkerID}, it never existed (or is no longer alive and the master process is exiting).`);
571+
}
572+
573+
if(this.workerClients[nWorkerID].exited)
574+
{
575+
throw new JSONRPC.Exception(`[Master] Can't RPC worker.${strFunctionName}() into cluster worker.id ${nWorkerID}, it has already exited.`);
505576
}
506577

507578
if(!this.workerClients[nWorkerID].ready)
508579
{
509-
throw new JSONRPC.Exception(`Cluster worker.id ${nWorkerID} RPC client has not signaled it is ready for cluster IPC RPC, yet.`);
580+
throw new JSONRPC.Exception(`[Master] Can't RPC worker.${strFunctionName}() into Cluster worker.id ${nWorkerID}, the RPC client has not signaled it is ready for cluster IPC RPC, yet.`);
510581
}
511582

512583
return await this.workerClients[nWorkerID].client.rpc(strFunctionName, arrParams, bNotification);
513584
}
514585

586+
515587
/**
516588
* @typedef {{ message: string, stack: string=, code: number=, type: string=, errorClass: string }} ErrorObject
517589
*
@@ -530,6 +602,16 @@ class MasterEndpoint extends JSONRPC.EndpointBase
530602

531603
for(const strWorkerID of Object.keys(this.workerClients))
532604
{
605+
if(!this.workerClients[strWorkerID].ready)
606+
{
607+
continue;
608+
}
609+
610+
if(this.workerClients[strWorkerID].exited)
611+
{
612+
continue;
613+
}
614+
533615
const nWorkerID = parseInt(strWorkerID, 10);
534616
arrPromises.push(new Promise(async(fnResolve, fnReject) => {
535617
try

src/NodeMultiCoreCPUBase/WorkerEndpoint.js

Lines changed: 61 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ const JSONRPC = {
1010
}
1111
};
1212

13+
const sleep = require("sleep-promise");
14+
1315

1416
/**
1517
* Extend this class to export extra worker RPC APIs.
@@ -36,6 +38,7 @@ class WorkerEndpoint extends JSONRPC.EndpointBase
3638
this.bShuttingDown = false;
3739

3840
this._bWorkerStarted = false;
41+
this._promiseStart = null;
3942

4043
this._nPersistentWorkerID = undefined;
4144
}
@@ -75,12 +78,31 @@ class WorkerEndpoint extends JSONRPC.EndpointBase
7578
{
7679
if(!this._masterClient)
7780
{
78-
throw new Error("The master client is ready only after calling await .startWorker().");
81+
if(!this._bWorkerStarted)
82+
{
83+
console.error(`
84+
Premature access to the .masterClient property as it was not initialized by WorkerEndpoint.start().
85+
86+
[WorkerEndpoint] process.uptime(): ${process.uptime()} seconds.
87+
88+
Desperatly calling .start() (possibly prematurely) inside the .masterClient getter to allow a future access to .masterClient to succeed and not throw.
89+
Services might not have been started yet, see .start().
90+
As a result, this worker might yet be ready for receving workers IPC RPC.
91+
92+
Lazy or premature init.
93+
Normally, the application implementing this needs to to call .start() explicitly to signal it is ready to receive RPC calls over workers IPC.
94+
`);
95+
96+
this.start().catch(console.error);
97+
}
98+
99+
throw new Error("The .masterClient property was not initialized by .start().");
79100
}
80101

81102
return this._masterClient;
82103
}
83104

105+
84106
async getIfNotPresentPersistentWorkerID()
85107
{
86108
if(this._nPersistentWorkerID === undefined)
@@ -93,7 +115,7 @@ class WorkerEndpoint extends JSONRPC.EndpointBase
93115

94116

95117
/**
96-
* This overridable function is called and awaited inside startWorker().
118+
* This overridable function is called and awaited inside start().
97119
*
98120
* This mustn't be called through JSONRPC.
99121
*
@@ -133,30 +155,50 @@ class WorkerEndpoint extends JSONRPC.EndpointBase
133155
*/
134156
async start()
135157
{
136-
if(this._bWorkerStarted)
158+
if(this._promiseStart)
137159
{
138-
throw new Error("Worker is already started.");
160+
return this._promiseStart;
139161
}
140-
this._bWorkerStarted = true;
141-
142162

143-
this._jsonrpcServer = new JSONRPC.Server();
144-
this._bidirectionalWorkerRouter = await this._makeBidirectionalRouter();
145-
146-
// By default, JSONRPC.Server rejects all requests as not authenticated and not authorized.
147-
this._jsonrpcServer.addPlugin(new JSONRPC.Plugins.Server.AuthenticationSkip());
148-
this._jsonrpcServer.addPlugin(new JSONRPC.Plugins.Server.AuthorizeAll());
163+
this._promiseStart = new Promise(async(fnResolve, fnReject) => {
164+
try
165+
{
166+
if(this._bWorkerStarted)
167+
{
168+
throw new Error("WorkerEndpoint.start() was already called.");
169+
}
170+
this._bWorkerStarted = true;
171+
172+
173+
this._jsonrpcServer = new JSONRPC.Server();
174+
this._bidirectionalWorkerRouter = await this._makeBidirectionalRouter();
175+
176+
// By default, JSONRPC.Server rejects all requests as not authenticated and not authorized.
177+
this._jsonrpcServer.addPlugin(new JSONRPC.Plugins.Server.AuthenticationSkip());
178+
this._jsonrpcServer.addPlugin(new JSONRPC.Plugins.Server.AuthorizeAll());
179+
180+
const nConnectionID = await this._bidirectionalWorkerRouter.addWorker(await this._currentWorker(), "/api-workers/IPC");
181+
this._masterClient = this._bidirectionalWorkerRouter.connectionIDToSingletonClient(nConnectionID, this.ReverseCallsClientClass);
182+
183+
this._jsonrpcServer.registerEndpoint(this);
184+
185+
// BidirectionalWorkerRouter requires to know when JSONRPC has finished its setup to avoid very likely race conditions.
186+
await this._masterClient.rpc("rpc.connectToEndpoint", ["/api-workers/IPC"]);
149187

150-
const nConnectionID = await this._bidirectionalWorkerRouter.addWorker(await this._currentWorker(), "/api-workers/IPC");
151-
this._masterClient = this._bidirectionalWorkerRouter.connectionIDToSingletonClient(nConnectionID, this.ReverseCallsClientClass);
188+
189+
await this._startServices();
190+
await this._masterClient.workerServicesReady(await this._currentWorkerID());
152191

153-
this._jsonrpcServer.registerEndpoint(this);
154192

155-
// BidirectionalWorkerRouter requires to know when JSONRPC has finished its setup to avoid very likely race conditions.
156-
await this._masterClient.rpc("rpc.connectToEndpoint", ["/api-workers/IPC"]);
193+
fnResolve();
194+
}
195+
catch(error)
196+
{
197+
fnReject(error);
198+
}
199+
});
157200

158-
await this._startServices();
159-
await this._masterClient.workerServicesReady(await this._currentWorkerID());
201+
return this._promiseStart;
160202
}
161203

162204

0 commit comments

Comments
 (0)