Skip to content

Commit ad598ec

Browse files
committed
Serverside keepalive error detection and cleanups
- Bugfix: Ensure that if session.ping returns false we correctly identify fail the keepalive and connection - Bugfix: Ensure that if the interval between keepalives being sent occurs faster than the prior keepalive's timeout that we do not overwrite the reference to the prior timeout. Prior implementation could have in theory prevented a valid keepalive timeout from clearing itself. This rewrite keeps every timeout as a local (vs a shared state per session). Even if the timeout outlives the lifetime of a session, we still guard against errors by checking that the parent interval is not false-y. I reckon this could result in a short-term memory leak per session which is bounded for a maximum of keepaliveTimeoutMs. On the other hand even with that potential for a short reference hold, this implementation proposed here is more correct I think. One alternative we could do is keep a list of pending timeouts.. which is complex for a rare situation that will self resolve anyhow when keepaliveTimeoutMs is reached. - Bug Fix: keepalive intervals were being cleared with an incorrect clearTimeout before. Not sure if this was causing intervals leaks in some nodejs impls or not. (v20.13.1 seems to accept this mismatch without issue) - Rename variables for clarity, to prevent future bugs like swapping clearInterval vs clearTimeout. - Implementation is repeated in two places, per warning from #2756 (comment) - This commit supercedes the prior PR on a master branch which was out of date. #2756
1 parent 729a3f5 commit ad598ec

File tree

1 file changed

+134
-71
lines changed

1 file changed

+134
-71
lines changed

packages/grpc-js/src/server.ts

Lines changed: 134 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -1376,8 +1376,7 @@ export class Server {
13761376

13771377
let connectionAgeTimer: NodeJS.Timeout | null = null;
13781378
let connectionAgeGraceTimer: NodeJS.Timeout | null = null;
1379-
let keeapliveTimeTimer: NodeJS.Timeout | null = null;
1380-
let keepaliveTimeoutTimer: NodeJS.Timeout | null = null;
1379+
let keepaliveInterval: NodeJS.Timeout | null = null;
13811380
let sessionClosedByServer = false;
13821381

13831382
const idleTimeoutObj = this.enableIdleTimeout(session);
@@ -1421,41 +1420,74 @@ export class Server {
14211420
}
14221421

14231422
if (this.keepaliveTimeMs < KEEPALIVE_MAX_TIME_MS) {
1424-
keeapliveTimeTimer = setInterval(() => {
1425-
keepaliveTimeoutTimer = setTimeout(() => {
1426-
sessionClosedByServer = true;
1427-
session.close();
1423+
keepaliveInterval = setInterval(() => {
1424+
// NOTE to self: document in PR that prior implementation would overwrite the prior pending timeout
1425+
// if the timeout had not occurred before the prior interval had elapsed (bad bug)
1426+
const keepaliveTimeout = setTimeout(() => {
1427+
if (keepaliveInterval) {
1428+
clearInterval(keepaliveInterval);
1429+
keepaliveInterval = null;
1430+
sessionClosedByServer = true;
1431+
this.trace('Connection dropped by keepalive timeout');
1432+
session.close();
1433+
}
14281434
}, this.keepaliveTimeoutMs);
1429-
keepaliveTimeoutTimer.unref?.();
1435+
keepaliveTimeout.unref?.();
14301436

14311437
try {
1432-
session.ping(
1433-
(err: Error | null, duration: number, payload: Buffer) => {
1434-
if (keepaliveTimeoutTimer) {
1435-
clearTimeout(keepaliveTimeoutTimer);
1438+
if (
1439+
!session.ping(
1440+
(err: Error | null, duration: number, payload: Buffer) => {
1441+
clearTimeout(keepaliveTimeout);
1442+
if (err) {
1443+
if (keepaliveInterval) {
1444+
clearInterval(keepaliveInterval);
1445+
keepaliveInterval = null;
1446+
}
1447+
sessionClosedByServer = true;
1448+
this.trace(
1449+
'Connection dropped due to error with ping frame ' +
1450+
err.message +
1451+
' return in ' +
1452+
duration
1453+
);
1454+
session.close();
1455+
}
14361456
}
1437-
1438-
if (err) {
1439-
sessionClosedByServer = true;
1440-
this.trace(
1441-
'Connection dropped due to error of a ping frame ' +
1442-
err.message +
1443-
' return in ' +
1444-
duration
1445-
);
1446-
session.close();
1447-
}
1448-
}
1449-
);
1457+
)
1458+
) {
1459+
throw new Error('Server keepalive ping send failed');
1460+
}
14501461
} catch (e) {
1451-
clearTimeout(keepaliveTimeoutTimer);
1452-
// The ping can't be sent because the session is already closed
1462+
// The ping can't be sent because the session is already closed, max outstanding pings reached, etc
1463+
clearTimeout(keepaliveTimeout);
1464+
if (keepaliveInterval) {
1465+
clearInterval(keepaliveInterval);
1466+
keepaliveInterval = null;
1467+
}
1468+
this.trace(
1469+
'Connection dropped due to error sending ping frame ' +
1470+
(e instanceof Error ? e.message : 'unknown error')
1471+
);
14531472
session.destroy();
14541473
}
14551474
}, this.keepaliveTimeMs);
1456-
keeapliveTimeTimer.unref?.();
1475+
keepaliveInterval.unref?.();
14571476
}
14581477

1478+
session.once('goaway', (errorCode, lastStreamID, opaqueData) => {
1479+
if (errorCode === http2.constants.NGHTTP2_ENHANCE_YOUR_CALM) {
1480+
this.trace('Connection dropped by client due to ENHANCE_YOUR_CALM');
1481+
} else {
1482+
this.trace(
1483+
'Connection dropped by client via GOAWAY with error code ' +
1484+
errorCode
1485+
);
1486+
}
1487+
sessionClosedByServer = true;
1488+
session.destroy();
1489+
});
1490+
14591491
session.on('close', () => {
14601492
if (!sessionClosedByServer) {
14611493
this.trace(
@@ -1471,11 +1503,9 @@ export class Server {
14711503
clearTimeout(connectionAgeGraceTimer);
14721504
}
14731505

1474-
if (keeapliveTimeTimer) {
1475-
clearInterval(keeapliveTimeTimer);
1476-
if (keepaliveTimeoutTimer) {
1477-
clearTimeout(keepaliveTimeoutTimer);
1478-
}
1506+
if (keepaliveInterval) {
1507+
clearInterval(keepaliveInterval);
1508+
keepaliveInterval = null;
14791509
}
14801510

14811511
if (idleTimeoutObj !== null) {
@@ -1521,8 +1551,7 @@ export class Server {
15211551

15221552
let connectionAgeTimer: NodeJS.Timeout | null = null;
15231553
let connectionAgeGraceTimer: NodeJS.Timeout | null = null;
1524-
let keeapliveTimeTimer: NodeJS.Timeout | null = null;
1525-
let keepaliveTimeoutTimer: NodeJS.Timeout | null = null;
1554+
let keepaliveInterval: NodeJS.Timeout | null = null;
15261555
let sessionClosedByServer = false;
15271556

15281557
const idleTimeoutObj = this.enableIdleTimeout(session);
@@ -1565,49 +1594,85 @@ export class Server {
15651594
}
15661595

15671596
if (this.keepaliveTimeMs < KEEPALIVE_MAX_TIME_MS) {
1568-
keeapliveTimeTimer = setInterval(() => {
1569-
keepaliveTimeoutTimer = setTimeout(() => {
1570-
sessionClosedByServer = true;
1571-
this.channelzTrace.addTrace(
1572-
'CT_INFO',
1573-
'Connection dropped by keepalive timeout from ' + clientAddress
1574-
);
1575-
1576-
session.close();
1597+
keepaliveInterval = setInterval(() => {
1598+
const keepaliveTimeout = setTimeout(() => {
1599+
if (keepaliveInterval) {
1600+
clearInterval(keepaliveInterval);
1601+
keepaliveInterval = null;
1602+
sessionClosedByServer = true;
1603+
this.channelzTrace.addTrace(
1604+
'CT_INFO',
1605+
'Connection dropped by keepalive timeout from ' + clientAddress
1606+
);
1607+
session.close();
1608+
}
15771609
}, this.keepaliveTimeoutMs);
1578-
keepaliveTimeoutTimer.unref?.();
1610+
keepaliveTimeout.unref?.();
15791611

15801612
try {
1581-
session.ping(
1582-
(err: Error | null, duration: number, payload: Buffer) => {
1583-
if (keepaliveTimeoutTimer) {
1584-
clearTimeout(keepaliveTimeoutTimer);
1585-
}
1586-
1587-
if (err) {
1588-
sessionClosedByServer = true;
1589-
this.channelzTrace.addTrace(
1590-
'CT_INFO',
1591-
'Connection dropped due to error of a ping frame ' +
1592-
err.message +
1593-
' return in ' +
1594-
duration
1595-
);
1596-
1597-
session.close();
1613+
if (
1614+
!session.ping(
1615+
(err: Error | null, duration: number, payload: Buffer) => {
1616+
clearTimeout(keepaliveTimeout);
1617+
if (err) {
1618+
if (keepaliveInterval) {
1619+
clearInterval(keepaliveInterval);
1620+
keepaliveInterval = null;
1621+
}
1622+
sessionClosedByServer = true;
1623+
this.channelzTrace.addTrace(
1624+
'CT_INFO',
1625+
'Connection dropped due to error with ping frame ' +
1626+
err.message +
1627+
' return in ' +
1628+
duration
1629+
);
1630+
session.close();
1631+
}
15981632
}
1599-
}
1600-
);
1633+
)
1634+
) {
1635+
throw new Error('Server keepalive ping send failed');
1636+
}
16011637
channelzSessionInfo.keepAlivesSent += 1;
16021638
} catch (e) {
1603-
clearTimeout(keepaliveTimeoutTimer);
1604-
// The ping can't be sent because the session is already closed
1639+
// The ping can't be sent because the session is already closed, max outstanding pings reached, etc
1640+
clearTimeout(keepaliveTimeout);
1641+
if (keepaliveInterval) {
1642+
clearInterval(keepaliveInterval);
1643+
keepaliveInterval = null;
1644+
}
1645+
this.channelzTrace.addTrace(
1646+
'CT_INFO',
1647+
'Connection dropped due to error sending ping frame ' +
1648+
(e instanceof Error ? e.message : 'unknown error')
1649+
);
16051650
session.destroy();
16061651
}
16071652
}, this.keepaliveTimeMs);
1608-
keeapliveTimeTimer.unref?.();
1653+
keepaliveInterval.unref?.();
16091654
}
16101655

1656+
session.once('goaway', (errorCode, lastStreamID, opaqueData) => {
1657+
if (errorCode === http2.constants.NGHTTP2_ENHANCE_YOUR_CALM) {
1658+
this.channelzTrace.addTrace(
1659+
'CT_INFO',
1660+
'Connection dropped by client due GOAWAY of ENHANCE_YOUR_CALM from ' +
1661+
clientAddress
1662+
);
1663+
} else {
1664+
this.channelzTrace.addTrace(
1665+
'CT_INFO',
1666+
'Connection dropped by client via GOAWAY with error code ' +
1667+
errorCode +
1668+
' from ' +
1669+
clientAddress
1670+
);
1671+
}
1672+
sessionClosedByServer = true;
1673+
session.destroy();
1674+
});
1675+
16111676
session.on('close', () => {
16121677
if (!sessionClosedByServer) {
16131678
this.channelzTrace.addTrace(
@@ -1627,11 +1692,9 @@ export class Server {
16271692
clearTimeout(connectionAgeGraceTimer);
16281693
}
16291694

1630-
if (keeapliveTimeTimer) {
1631-
clearInterval(keeapliveTimeTimer);
1632-
if (keepaliveTimeoutTimer) {
1633-
clearTimeout(keepaliveTimeoutTimer);
1634-
}
1695+
if (keepaliveInterval) {
1696+
clearInterval(keepaliveInterval);
1697+
keepaliveInterval = null;
16351698
}
16361699

16371700
if (idleTimeoutObj !== null) {

0 commit comments

Comments
 (0)