Skip to content

Commit a89b9fe

Browse files
Simone Sanfratellosimone-sanfratello
authored andcommitted
feat: new readiness logic
1 parent 8f9debf commit a89b9fe

File tree

10 files changed

+203
-68
lines changed

10 files changed

+203
-68
lines changed

README.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,11 @@ _Variables in bold are required._
5252
| P2P_CONNECTION_HANDLER_MAX_INBOUND_STREAMS | `1024` | p2p handler max incoming streams limit at the same time on each connection |
5353
| P2P_CONNECTION_HANDLER_MAX_OUTBOUND_STREAMS | `1024` | p2p handler max outgoing streams limit at the same time on each connection |
5454
| P2P_CONNECTION_TAGGED_PEERS_VALUE | `100` | p2p tagged peers default value, see [tagged peers](#tagged-peers). |
55-
| TELEMETRY_PORT | `3001` | The telemetry port number for the OpenTelemetry server to listen on. |
55+
| READINESS_MAX_CONNECTIONS || TODO |
56+
| READINESS_MAX_PENDING_REQUEST_BLOCKS || |
57+
| READINESS_MAX_EVENT_LOOP_UTILIZATION || |
58+
| READINESS_MAX_RESPONSE_DURATION || |
59+
| HTTP_PORT | `3001` | The telemetry port number for the OpenTelemetry server to listen on. |
5660
| NODE_DEBUG | | If it contains `aws-ipfs`, debug mode is enabled. |
5761
| LOG_LEVEL | `info` | Logging level. |
5862
| LOG_PRETTY | `false` | Enable pretty logging. |

package-lock.json

Lines changed: 29 additions & 29 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,10 @@
1616
"@aws-sdk/client-sqs": "3.231.0",
1717
"@aws-sdk/node-http-handler": "3.226.0",
1818
"@chainsafe/libp2p-noise": "10.2.0",
19-
"@libp2p/mplex": "7.1.0",
20-
"@libp2p/peer-id": "1.1.17",
21-
"@libp2p/peer-id-factory": "1.0.19",
22-
"@libp2p/websockets": "5.0.1",
19+
"@libp2p/mplex": "7.1.1",
20+
"@libp2p/peer-id": "1.1.18",
21+
"@libp2p/peer-id-factory": "1.0.20",
22+
"@libp2p/websockets": "5.0.2",
2323
"dotenv": "16.0.3",
2424
"e-ipfs-core-lib": "0.5.0",
2525
"it-length-prefixed": "8.0.3",

src/config.js

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,13 @@ export function makeConfig () {
4646

4747
peerAnnounceAddr: process.env.PEER_ANNOUNCE_ADDR,
4848
port: process.env.PORT ? parseInt(process.env.PORT) : 3000,
49-
httpPort: process.env.HTTP_PORT ? parseInt(process.env.PORT) : 3001,
49+
httpPort: process.env.HTTP_PORT ? parseInt(process.env.HTTP_PORT) : 3001,
50+
51+
// readiness
52+
readinessMaxConnections: process.env.READINESS_MAX_CONNECTIONS ? parseInt(process.env.READINESS_MAX_CONNECTIONS) : 200,
53+
readinessMaxPendingRequestBlocks: process.env.READINESS_MAX_PENDING_REQUEST_BLOCKS ? parseInt(process.env.READINESS_MAX_PENDING_REQUEST_BLOCKS) : 1e3,
54+
readinessMaxEventLoopUtilization: process.env.READINESS_MAX_EVENT_LOOP_UTILIZATION ? parseInt(process.env.READINESS_MAX_EVENT_LOOP_UTILIZATION) : 0.5, // 0 to 1
55+
readinessMaxResponseDuration: process.env.READINESS_MAX_RESPONSE_DURATION ? parseInt(process.env.READINESS_MAX_RESPONSE_DURATION) : 5e3, // 5 sec
5056

5157
// p2p
5258
p2pConnectionMaxConnections: process.env.P2P_CONNECTION_MAX_CONNECTIONS ? parseInt(process.env.P2P_CONNECTION_MAX_CONNECTIONS) : 10e3,

src/health-check.js

Lines changed: 43 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,47 @@
1+
import config from './config.js'
2+
import { telemetry } from './telemetry.js'
13

2-
const SUCCESS_CODE = 200
3-
const ERROR_CODE = 503
4+
/**
5+
* `bitswap-request-duration` is been reset every telemetry.export call (in /metrics)
6+
*/
7+
export function getHealthCheckValues () {
8+
return {
9+
connections: telemetry.getGaugeValue('bitswap-active-connections'),
10+
pendingRequestBlocks: telemetry.getGaugeValue('bitswap-pending-entries'),
11+
eventLoopUtilization: telemetry.getGaugeValue('bitswap-elu'),
12+
responseDuration: telemetry.getHistogramValue('bitswap-request-duration') ?? -1
13+
}
14+
}
15+
16+
/**
17+
* called every 1 second
18+
*/
19+
export function checkReadiness (logger) {
20+
const resources = getHealthCheckValues()
21+
22+
if (resources.connections > config.readinessMaxConnections) {
23+
logger.warn({ connections: resources.connections, maxConnections: config.readinessMaxConnections },
24+
'Service is not ready due to max connections')
25+
return false
26+
}
27+
28+
if (resources.pendingRequestBlocks > config.readinessMaxPendingRequestBlocks) {
29+
logger.warn({ pendingRequestBlocks: resources.pendingRequestBlocks, maxPendingRequestBlocks: config.readinessMaxPendingRequestBlocks },
30+
'Service is not ready due to max pending request blocks')
31+
return false
32+
}
33+
34+
if (resources.eventLoopUtilization > config.readinessMaxEventLoopUtilization) {
35+
logger.warn({ eventLoopUtilization: resources.eventLoopUtilization, maxEventLoopUtilization: config.readinessMaxEventLoopUtilization },
36+
'Service is not ready due to max event loop utilization')
37+
return false
38+
}
439

5-
export async function checkReadiness ({ logger }) {
6-
// TODO
40+
if (resources.responseDuration > config.readinessMaxResponseDuration) {
41+
logger.warn({ responseDuration: resources.pendingRequestBlocks, maxResponseDuration: config.readinessMaxResponseDuration },
42+
'Service is not ready due to max response duration')
43+
return false
44+
}
745

8-
return SUCCESS_CODE
9-
// } catch (err) {
10-
// logger.error({ err }, 'Readiness Probe Failed')
11-
// return ERROR_CODE
12-
// }
46+
return true
1347
}

src/http-server.js

Lines changed: 7 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,13 @@
22
import { createServer } from 'node:http'
33
import { URL } from 'node:url'
44
import { logger } from './logging.js'
5-
import { checkReadiness } from './health-check.js'
5+
import { getHealthCheckValues, checkReadiness } from './health-check.js'
66
import { telemetry } from './telemetry.js'
77
import { version } from './util.js'
88

9+
const SUCCESS_CODE = 200
10+
const ERROR_CODE = 503
11+
912
class HttpServer {
1013
startServer ({ port }) {
1114
if (this.server) {
@@ -20,25 +23,16 @@ class HttpServer {
2023
res.end()
2124
break
2225
case '/readiness': {
23-
checkReadiness({ logger })
24-
.then(httpStatus => {
25-
res.writeHead(httpStatus).end()
26-
})
26+
res.writeHead(checkReadiness(logger) ? SUCCESS_CODE : ERROR_CODE)
27+
.end()
2728
break
2829
}
2930
case '/load': {
3031
res.writeHead(200, {
3132
connection: 'close',
3233
'content-type': 'application/json'
3334
})
34-
35-
const resources = {
36-
connections: telemetry.getGaugeValue('bitswap-active-connections'),
37-
pendingRequestBlocks: telemetry.getGaugeValue('bitswap-pending-entries'),
38-
eventLoopUtilization: telemetry.getGaugeValue('bitswap-elu'),
39-
// note: duration it's been reset every /metrics call
40-
responseDuration: telemetry.getHistogramValue('bitswap-request-duration') ?? -1
41-
}
35+
const resources = getHealthCheckValues()
4236

4337
res.end(JSON.stringify(resources))
4438
break

src/index.js

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,6 @@ import { getPeerId } from './peer-id.js'
88
import { createConnectionConfig } from './util.js'
99

1010
async function boot () {
11-
const readinessConfig = {
12-
// TODO
13-
}
14-
1511
try {
1612
const awsClient = await createAwsClient(config, logger)
1713

test/config.test.js

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,10 @@ t.test('config - defaults', async t => {
3737
peerAnnounceAddr: undefined,
3838
port: 3000,
3939
httpPort: 3001,
40+
readinessMaxConnections: 200,
41+
readinessMaxPendingRequestBlocks: 1000,
42+
readinessMaxEventLoopUtilization: 0.5,
43+
readinessMaxResponseDuration: 5000,
4044
p2pConnectionMaxConnections: 10000,
4145
p2pConnectionMinConnections: 0,
4246
p2pConnectionPollInterval: 2000,
@@ -90,6 +94,10 @@ t.test('config - all by env vars', async t => {
9094
process.env.PEER_ANNOUNCE_ADDR = '/dns4/elastic-dev.dag.house/tcp/443/wss'
9195
process.env.PORT = '3123'
9296
process.env.HTTP_PORT = '3258'
97+
process.env.READINESS_MAX_CONNECTIONS = '1'
98+
process.env.READINESS_MAX_PENDING_REQUEST_BLOCKS = '1'
99+
process.env.READINESS_MAX_EVENT_LOOP_UTILIZATION = '0.1'
100+
process.env.READINESS_MAX_RESPONSE_DURATION = '1'
93101
process.env.P2P_CONNECTION_MAX_CONNECTIONS = '99999'
94102
process.env.P2P_CONNECTION_MIN_CONNECTIONS = '1'
95103
process.env.P2P_CONNECTION_POLL_INTERVAL = '1000'
@@ -143,7 +151,11 @@ t.test('config - all by env vars', async t => {
143151
peerIdS3Region: 'aws-s3',
144152
peerAnnounceAddr: '/dns4/elastic-dev.dag.house/tcp/443/wss',
145153
port: 3123,
146-
httpPort: 3123,
154+
httpPort: 3258,
155+
readinessMaxConnections: 1,
156+
readinessMaxPendingRequestBlocks: 1,
157+
readinessMaxEventLoopUtilization: 0,
158+
readinessMaxResponseDuration: 1,
147159
p2pConnectionMaxConnections: 99999,
148160
p2pConnectionMinConnections: 1,
149161
p2pConnectionPollInterval: 1000,

0 commit comments

Comments
 (0)