@@ -116,6 +116,9 @@ type Args struct {
116
116
CrdController bool
117
117
EnableNodeFeatureApi bool
118
118
Port int
119
+ // GrpcHealthPort is only needed to avoid races between tests (by skipping the health server).
120
+ // Could be removed when gRPC labler service is dropped (when nfd-worker tests stop running nfd-master).
121
+ GrpcHealthPort int
119
122
Prune bool
120
123
VerifyNodeName bool
121
124
Options string
@@ -144,6 +147,7 @@ type nfdMaster struct {
144
147
nodeName string
145
148
configFilePath string
146
149
server * grpc.Server
150
+ healthServer * grpc.Server
147
151
stop chan struct {}
148
152
ready chan bool
149
153
apihelper apihelper.APIHelpers
@@ -270,7 +274,11 @@ func (m *nfdMaster) Run() error {
270
274
271
275
// Run gRPC server
272
276
grpcErr := make (chan error , 1 )
273
- go m .runGrpcServer (grpcErr )
277
+ // If the NodeFeature API is enabled, don'tregister the labeler API
278
+ // server. Otherwise, register the labeler server.
279
+ if ! m .args .EnableNodeFeatureApi {
280
+ go m .runGrpcServer (grpcErr )
281
+ }
274
282
275
283
// Run updater that handles events from the nfd CRD API.
276
284
if m .nfdController != nil {
@@ -281,6 +289,13 @@ func (m *nfdMaster) Run() error {
281
289
}
282
290
}
283
291
292
+ // Start gRPC server for liveness probe (at this point we're "live")
293
+ if m .args .GrpcHealthPort != 0 {
294
+ if err := m .startGrpcHealthServer (grpcErr ); err != nil {
295
+ return fmt .Errorf ("failed to start gRPC health server: %w" , err )
296
+ }
297
+ }
298
+
284
299
// Notify that we're ready to accept connections
285
300
m .ready <- true
286
301
close (m .ready )
@@ -323,6 +338,32 @@ func (m *nfdMaster) Run() error {
323
338
}
324
339
}
325
340
341
+ // startGrpcHealthServer starts a gRPC health server for Kubernetes readiness/liveness probes.
342
+ // TODO: improve status checking e.g. with watchdog in the main event loop and
343
+ // cheking that node updater pool is alive.
344
+ func (m * nfdMaster ) startGrpcHealthServer (errChan chan <- error ) error {
345
+ lis , err := net .Listen ("tcp" , fmt .Sprintf (":%d" , m .args .GrpcHealthPort ))
346
+ if err != nil {
347
+ return fmt .Errorf ("failed to listen: %w" , err )
348
+ }
349
+
350
+ s := grpc .NewServer ()
351
+ grpc_health_v1 .RegisterHealthServer (s , health .NewServer ())
352
+ klog .InfoS ("gRPC health server serving" , "port" , m .args .GrpcHealthPort )
353
+
354
+ go func () {
355
+ defer func () {
356
+ lis .Close ()
357
+ }()
358
+ if err := s .Serve (lis ); err != nil {
359
+ errChan <- fmt .Errorf ("gRPC health server exited with an error: %w" , err )
360
+ }
361
+ klog .InfoS ("gRPC health server stopped" )
362
+ }()
363
+ m .healthServer = s
364
+ return nil
365
+ }
366
+
326
367
func (m * nfdMaster ) runGrpcServer (errChan chan <- error ) {
327
368
// Create server listening for TCP connections
328
369
lis , err := net .Listen ("tcp" , fmt .Sprintf (":%d" , m .args .Port ))
@@ -352,13 +393,8 @@ func (m *nfdMaster) runGrpcServer(errChan chan<- error) {
352
393
}
353
394
m .server = grpc .NewServer (serverOpts ... )
354
395
355
- // If the NodeFeature API is enabled, don'tregister the labeler API
356
- // server. Otherwise, register the labeler server.
357
- if ! m .args .EnableNodeFeatureApi {
358
- pb .RegisterLabelerServer (m .server , m )
359
- }
396
+ pb .RegisterLabelerServer (m .server , m )
360
397
361
- grpc_health_v1 .RegisterHealthServer (m .server , health .NewServer ())
362
398
klog .InfoS ("gRPC server serving" , "port" , m .args .Port )
363
399
364
400
// Run gRPC server
@@ -421,7 +457,12 @@ func (m *nfdMaster) nfdAPIUpdateHandler() {
421
457
422
458
// Stop NfdMaster
423
459
func (m * nfdMaster ) Stop () {
424
- m .server .GracefulStop ()
460
+ if m .server != nil {
461
+ m .server .GracefulStop ()
462
+ }
463
+ if m .healthServer != nil {
464
+ m .healthServer .GracefulStop ()
465
+ }
425
466
426
467
if m .nfdController != nil {
427
468
m .nfdController .stop ()
0 commit comments