Skip to content

Commit dbd9995

Browse files
scriptorianSimon Rogers
andauthored
Add OpenAPI health endpoint and a prometheus health gauge (#1468)
Co-authored-by: Simon Rogers <[email protected]>
1 parent 3151572 commit dbd9995

File tree

10 files changed

+372
-61
lines changed

10 files changed

+372
-61
lines changed

meteor/server/api/rest/v1/index.ts

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ import { registerRoutes as registerSystemRoutes } from './system'
2020
import { registerRoutes as registerBucketsRoutes } from './buckets'
2121
import { registerRoutes as registerSnapshotRoutes } from './snapshots'
2222
import { APIFactory, ServerAPIContext } from './types'
23+
import { getSystemStatus } from '../../../systemStatus/systemStatus'
24+
import { Component, ExternalStatus } from '@sofie-automation/meteor-lib/dist/api/systemStatus'
2325

2426
function restAPIUserEvent(
2527
ctx: Koa.ParameterizedContext<
@@ -193,6 +195,82 @@ koaRouter.get('/', async (ctx, next) => {
193195
await next()
194196
})
195197

198+
koaRouter.get('/health', async (ctx, next) => {
199+
ctx.type = 'application/json'
200+
const systemStatus = await getSystemStatus(null)
201+
const coreVersion = systemStatus._internal.versions['core'] ?? 'unknown'
202+
const blueprint = Object.keys(systemStatus._internal.versions).find((component) =>
203+
component.startsWith('blueprint')
204+
)
205+
const blueprintsVersion = blueprint ? systemStatus._internal.versions[blueprint] : 'unknown'
206+
207+
interface ComponentStatus {
208+
name: string
209+
updated: string
210+
status: ExternalStatus
211+
version?: string
212+
components?: ComponentStatus[]
213+
statusMessage?: string
214+
}
215+
216+
// Array of all devices that have a parentId
217+
const subComponents =
218+
systemStatus.components?.filter((c) => c.instanceId !== undefined && c.parentId !== undefined) ?? []
219+
220+
function mapComponents(components?: Component[]): ComponentStatus[] | undefined {
221+
return (
222+
components?.map((c) => {
223+
const version = c._internal.versions['_process']
224+
const children = subComponents.filter((sub) => sub.parentId === c.instanceId)
225+
return {
226+
name: c.name,
227+
updated: c.updated,
228+
status: c.status,
229+
version: version ?? undefined,
230+
components: children.length ? mapComponents(children) : undefined,
231+
statusMessage: c.statusMessage?.length ? c.statusMessage : undefined,
232+
}
233+
}) ?? undefined
234+
)
235+
}
236+
237+
// Patch the component statusMessage to be from the _internal field if required
238+
const allComponentsPatched = systemStatus.components?.map((c) => {
239+
return {
240+
...c,
241+
statusMessage: c.statusMessage ?? (c.status !== 'OK' ? c._internal.messages.join(', ') : undefined),
242+
}
243+
})
244+
245+
// Report status for all devices that are not children and any non-devices that are not OK
246+
const componentStatus =
247+
mapComponents(
248+
allComponentsPatched?.filter(
249+
(c) => (c.instanceId !== undefined || c.status !== 'OK') && c.parentId === undefined
250+
)
251+
) ?? []
252+
253+
const allStatusMessages =
254+
allComponentsPatched // include children by not using componentStatus here
255+
?.filter((c) => c.statusMessage !== undefined)
256+
.map((c) => `${c.name}: ${c.statusMessage}`)
257+
.join('; ') ?? ''
258+
259+
const response = ClientAPI.responseSuccess({
260+
name: systemStatus.name,
261+
updated: systemStatus.updated,
262+
status: systemStatus.status,
263+
version: coreVersion,
264+
blueprintsVersion: blueprintsVersion,
265+
components: componentStatus,
266+
statusMessage: allStatusMessages,
267+
})
268+
269+
ctx.body = JSON.stringify({ status: response.success, result: response.result })
270+
ctx.status = response.success
271+
await next()
272+
})
273+
196274
registerBlueprintsRoutes(sofieAPIRequest)
197275
registerDevicesRoutes(sofieAPIRequest)
198276
registerPlaylistsRoutes(sofieAPIRequest)

meteor/server/main.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ import './migration/migrations'
3737
import './api/playout/debug'
3838
import './performanceMonitor'
3939
import './systemStatus/api'
40+
import './systemStatus/prometheusHealthGauge'
4041
import './api/user'
4142
import './api/organizations'
4243
import './api/serviceMessages/api'
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
import { MetricsGauge } from '@sofie-automation/corelib/dist/prometheus'
2+
import { getSystemStatus } from './systemStatus'
3+
4+
export const healthGauge = new MetricsGauge({
5+
name: 'sofie_health_status',
6+
help: 'Health status of Sofie application and its components',
7+
labelNames: ['name', 'version'] as const,
8+
async collect() {
9+
const systemStatus = await getSystemStatus(null)
10+
11+
const statusValues = { OK: 0, FAIL: 1, WARNING: 2, UNDEFINED: 3 }
12+
this.labels({
13+
name: systemStatus.name,
14+
version: systemStatus._internal.versions['core'],
15+
}).set(statusValues[systemStatus.status])
16+
17+
systemStatus.components?.forEach((c) => {
18+
this.labels({
19+
name: c.name,
20+
version: c._internal.versions['_process'] ?? '',
21+
}).set(statusValues[c.status])
22+
})
23+
},
24+
})

meteor/server/systemStatus/systemStatus.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,7 @@ function getSystemStatusForDevice(device: PeripheralDevice): StatusResponse {
137137
const so: StatusResponse = {
138138
name: device.name,
139139
instanceId: device._id,
140+
parentId: device.parentDeviceId ?? undefined,
140141
status: 'UNDEFINED',
141142
updated: new Date(device.lastSeen).toISOString(),
142143
_status: deviceStatus,

packages/meteor-lib/src/api/systemStatus.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ export interface StatusResponseBase {
2727

2828
statusMessage?: string // Tekstlig beskrivelse av status. (Eks: OK, Running, Standby, Completed successfully, 2/3 nodes running, Slow response time).
2929
instanceId?: ProtectedString<any>
30+
parentId?: ProtectedString<any>
3031
utilises?: Array<string>
3132
consumers?: Array<string>
3233
version?: '3' // version of healthcheck

packages/openapi/api/actions.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ tags:
2020
paths:
2121
/:
2222
$ref: 'definitions/sofie.yaml#/resources/index'
23+
/health:
24+
$ref: 'definitions/sofie.yaml#/resources/health'
2325
/system/blueprint:
2426
$ref: 'definitions/sofie.yaml#/resources/systemBlueprint'
2527
/system/migrations:

packages/openapi/api/definitions/sofie.yaml

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,21 @@ resources:
131131
type: string
132132
500:
133133
$ref: '#/components/responses/internalServerError'
134+
health:
135+
get:
136+
operationId: getHealth
137+
tags:
138+
- sofie
139+
summary: Gets the current health status of Sofie and all its components
140+
responses:
141+
200:
142+
description: Command successfully handled - returns an object with detailed health status
143+
content:
144+
application/json:
145+
schema:
146+
$ref: '#/components/schemas/healthStatus'
147+
500:
148+
$ref: '#/components/responses/internalServerError'
134149
components:
135150
schemas:
136151
migrationInputs:
@@ -204,6 +219,88 @@ components:
204219
- attributeId
205220
- migrationValue
206221
additionalProperties: false
222+
healthStatus:
223+
type: object
224+
properties:
225+
name:
226+
type: string
227+
description: Name of the running system
228+
example: Sofie Automation system
229+
updated:
230+
type: string
231+
format: date-time
232+
description: Time when the status of Sofie was updated
233+
example: '2023-11-29T16:50:06.057Z'
234+
status:
235+
type: string
236+
enum: [OK, FAIL, WARNING, UNDEFINED]
237+
description: Sofie status string
238+
example: OK
239+
version:
240+
type: string
241+
description: Sofie core software version
242+
example: '1.50.10'
243+
blueprintsVersion:
244+
type: string
245+
description: Sofie blueprints version
246+
example: '1.0.60'
247+
components:
248+
type: array
249+
description: Array of components that are part of the Sofie software
250+
items:
251+
$ref: '#/components/schemas/healthComponentStatus'
252+
statusMessage:
253+
type: string
254+
description: Concatenation of Sofie status and all component statuses, separated by semicolons
255+
example: 'Playout gateway: Disconnected'
256+
required:
257+
- name
258+
- updated
259+
- status
260+
- version
261+
- blueprintsVersion
262+
- components
263+
- statusMessage
264+
additionalProperties: false
265+
healthComponentStatus:
266+
type: object
267+
properties:
268+
name:
269+
type: string
270+
description: Name of the component
271+
example: Playout Gateway
272+
updated:
273+
type: string
274+
format: date-time
275+
description: Time when the component status was updated
276+
status:
277+
type: string
278+
enum: [OK, FAIL, WARNING, UNDEFINED]
279+
description: Component status string
280+
example: OK
281+
version:
282+
type: string
283+
description: Component software version
284+
example: '0.1.13'
285+
components:
286+
type: array
287+
description: Array of components that are children of this component. Can recurse - components with no child will have no components member
288+
items:
289+
type: object
290+
description: Components conforming to the same definition as the parent object
291+
example:
292+
- name: atem
293+
updated: '2023-11-28T15:17:21.712Z'
294+
status: OK
295+
statusMessage:
296+
type: string
297+
description: Status messages for this component
298+
example: Disconnected
299+
required:
300+
- name
301+
- updated
302+
- status
303+
additionalProperties: false
207304
responses:
208305
postSuccess:
209306
description: POST success.

packages/openapi/install_swagger.js renamed to packages/openapi/install_swagger.mjs

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
const fs = require('fs/promises')
2-
const fs_constants = require('fs').constants
3-
// eslint-disable-next-line n/no-unpublished-require
4-
const wget = require('wget-improved')
1+
import fs from 'fs/promises'
2+
import { exit } from 'process'
3+
// eslint-disable-next-line n/no-unpublished-import
4+
import wget from 'wget-improved'
55

66
async function get(url, path) {
77
let totalBytes = 0
@@ -23,7 +23,7 @@ async function get(url, path) {
2323
}
2424
})
2525
download.on('end', function (output) {
26-
process.stdout.write(`Downloaded 100% of '${path}'. Total length ${totalBytes} bytes.\n`)
26+
process.stdout.write(`${output}, total length ${totalBytes} bytes.\n`)
2727
resolve(output)
2828
})
2929
})
@@ -37,11 +37,13 @@ async function checkInstall() {
3737
})
3838

3939
const srcPath =
40-
'https://repo1.maven.org/maven2/io/swagger/codegen/v3/swagger-codegen-cli/3.0.34/swagger-codegen-cli-3.0.34.jar'
40+
'https://repo1.maven.org/maven2/io/swagger/codegen/v3/swagger-codegen-cli/3.0.51/swagger-codegen-cli-3.0.51.jar'
4141
const swaggerFilename = 'swagger-codegen-cli.jar'
4242
await fs
43-
.access(`jars/${swaggerFilename}`, fs_constants.R_OK)
43+
.access(`jars/${swaggerFilename}`, fs.constants.R_OK)
4444
.catch(async () => get(srcPath, `jars/${swaggerFilename}`))
4545
}
4646

47-
checkInstall()
47+
await checkInstall()
48+
49+
exit(0)

packages/openapi/package.json

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,26 +19,27 @@
1919
"lint-fix": "run lint --fix",
2020
"genclient:ts": "run -T rimraf client/ts && openapi-generator-cli generate -i ./api/actions.yaml -o client/ts -g typescript-fetch -p supportsES6=true",
2121
"genclient:rs": "run -T rimraf client/rs && openapi-generator-cli generate -i ./api/actions.yaml -o client/rs -g rust",
22-
"gendocs": "run -T rimraf docs && node install_swagger.js && java -jar ./jars/swagger-codegen-cli.jar generate -i ./api/actions.yaml -l html2 -o ./docs",
23-
"genserver": "run -T rimraf server && node install_swagger.js && java -jar ./jars/swagger-codegen-cli.jar generate -i ./api/actions.yaml -l nodejs-server -o server && cd server && npm install && cd ../",
22+
"genclient:cs": "run -T rimraf client/cs && openapi-generator-cli generate -i ./api/actions.yaml -o client/cs -g csharp",
23+
"gendocs": "run -T rimraf docs && node install_swagger.mjs && java -jar ./jars/swagger-codegen-cli.jar generate -i ./api/actions.yaml -l html2 -o ./docs",
24+
"genserver": "run -T rimraf server && node install_swagger.mjs && java -jar ./jars/swagger-codegen-cli.jar generate -i ./api/actions.yaml -l nodejs-server -o server && cd server && npm install && cd ../",
2425
"runserver": "run genserver && cd server && node index.js",
2526
"test": "run lint && run genclient:ts && run unit",
2627
"unit:no-server": "node --experimental-fetch ../node_modules/jest/bin/jest.js --detectOpenHandles --forceExit"
2728
},
2829
"prettier": "@sofie-automation/code-standard-preset/.prettierrc.json",
2930
"engines": {
30-
"node": ">=21"
31+
"node": ">=22.13.1"
3132
},
3233
"files": [
3334
"/api",
3435
"/dist",
35-
"install_swagger.js"
36+
"install_swagger.mjs"
3637
],
3738
"dependencies": {
3839
"tslib": "^2.8.1"
3940
},
4041
"devDependencies": {
41-
"@openapitools/openapi-generator-cli": "^2.16.3",
42+
"@openapitools/openapi-generator-cli": "^2.20.2",
4243
"eslint": "^9.18.0",
4344
"eslint-plugin-yml": "^1.16.0",
4445
"js-yaml": "^4.1.0",
@@ -51,5 +52,6 @@
5152
"*.{ts,tsx,js,jsx,yaml}": [
5253
"yarn lint:raw"
5354
]
54-
}
55+
},
56+
"packageManager": "[email protected]"
5557
}

0 commit comments

Comments
 (0)