Skip to content

Commit e31e041

Browse files
fix: take into account units state on liveness (#10060) (#10100)
* fix: take into account units state on liveness * extract check components state to helper * merge conditional assignment into variable declaration (cherry picked from commit 4b818a1) Co-authored-by: Ruben Ruiz de Gauna <[email protected]>
1 parent bc861ff commit e31e041

File tree

5 files changed

+298
-7
lines changed

5 files changed

+298
-7
lines changed
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
# REQUIRED
2+
# Kind can be one of:
3+
# - breaking-change: a change to previously-documented behavior
4+
# - deprecation: functionality that is being removed in a later release
5+
# - bug-fix: fixes a problem in a previous version
6+
# - enhancement: extends functionality but does not break or fix existing behavior
7+
# - feature: new functionality
8+
# - known-issue: problems that we are aware of in a given version
9+
# - security: impacts on the security of a product or a user’s deployment.
10+
# - upgrade: important information for someone upgrading from a prior version
11+
# - other: does not fit into any of the other categories
12+
kind: bug-fix
13+
14+
# REQUIRED for all kinds
15+
# Change summary; a 80ish characters long description of the change.
16+
summary: Include components units status in HTTP liveness checks
17+
18+
# REQUIRED for breaking-change, deprecation, known-issue
19+
# Long description; in case the summary is not enough to describe the change
20+
# this field accommodate a description without length limits.
21+
# description:
22+
23+
# REQUIRED for breaking-change, deprecation, known-issue
24+
# impact:
25+
26+
# REQUIRED for breaking-change, deprecation, known-issue
27+
# action:
28+
29+
# REQUIRED for all kinds
30+
# Affected component; usually one of "elastic-agent", "fleet-server", "filebeat", "metricbeat", "auditbeat", "all", etc.
31+
component: elastic-agent
32+
33+
# AUTOMATED
34+
# OPTIONAL to manually add other PR URLs
35+
# PR URL: A link the PR that added the changeset.
36+
# If not present is automatically filled by the tooling finding the PR where this changelog fragment has been added.
37+
# NOTE: the tooling supports backports, so it's able to fill the original PR number instead of the backport PR number.
38+
# Please provide it if you are adding a fragment for a different PR.
39+
# pr: https://github.com/owner/repo/1234
40+
41+
# AUTOMATED
42+
# OPTIONAL to manually add other issue URLs
43+
# Issue URL; optional; the GitHub issue related to this changeset (either closes or is part of).
44+
# If not present is automatically filled by the tooling with the issue linked to the PR number.
45+
issue: https://github.com/elastic/elastic-agent/issues/8047

internal/pkg/agent/application/monitoring/liveness.go

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import (
1313

1414
"github.com/elastic/elastic-agent-client/v7/pkg/client"
1515

16+
"github.com/elastic/elastic-agent/internal/pkg/agent/application/monitoring/monitoringhelpers"
1617
"github.com/elastic/elastic-agent/internal/pkg/otel/otelhelpers"
1718
agentclient "github.com/elastic/elastic-agent/pkg/control/v2/client"
1819
)
@@ -83,13 +84,8 @@ func livenessHandler(coord CoordinatorState) func(http.ResponseWriter, *http.Req
8384
return nil
8485
}
8586

86-
unhealthyComponent := false
87-
for _, comp := range state.Components {
88-
if (failConfig.Failed && comp.State.State == client.UnitStateFailed) || (failConfig.Degraded && comp.State.State == client.UnitStateDegraded) {
89-
unhealthyComponent = true
90-
}
91-
}
92-
if state.Collector != nil {
87+
unhealthyComponent := (failConfig.Failed && monitoringhelpers.HaveState(state.Components, client.UnitStateFailed)) || (failConfig.Degraded && monitoringhelpers.HaveState(state.Components, client.UnitStateDegraded))
88+
if !unhealthyComponent && state.Collector != nil {
9389
if (failConfig.Failed && (otelhelpers.HasStatus(state.Collector, componentstatus.StatusFatalError) || otelhelpers.HasStatus(state.Collector, componentstatus.StatusPermanentError))) || (failConfig.Degraded && otelhelpers.HasStatus(state.Collector, componentstatus.StatusRecoverableError)) {
9490
unhealthyComponent = true
9591
}

internal/pkg/agent/application/monitoring/liveness_test.go

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -383,6 +383,102 @@ func TestLivenessProcessHTTPHandler(t *testing.T) {
383383
expectedCode: 500,
384384
failon: "degraded",
385385
},
386+
{
387+
name: "component healthy and unit degraded",
388+
coord: mockCoordinator{
389+
isUp: true,
390+
state: coordinator.State{
391+
Components: []runtime.ComponentComponentState{
392+
{
393+
LegacyPID: "2",
394+
State: runtime.ComponentState{
395+
State: client.UnitStateHealthy,
396+
Units: map[runtime.ComponentUnitKey]runtime.ComponentUnitState{
397+
{
398+
UnitType: client.UnitTypeInput,
399+
UnitID: "some-input-unit",
400+
}: {
401+
State: client.UnitStateDegraded,
402+
},
403+
},
404+
},
405+
Component: component.Component{
406+
ID: "test-component",
407+
InputSpec: &component.InputRuntimeSpec{
408+
BinaryName: "testbeat",
409+
},
410+
},
411+
},
412+
},
413+
},
414+
},
415+
expectedCode: 500,
416+
failon: "degraded",
417+
},
418+
{
419+
name: "component healthy and unit failed",
420+
coord: mockCoordinator{
421+
isUp: true,
422+
state: coordinator.State{
423+
Components: []runtime.ComponentComponentState{
424+
{
425+
LegacyPID: "2",
426+
State: runtime.ComponentState{
427+
State: client.UnitStateHealthy,
428+
Units: map[runtime.ComponentUnitKey]runtime.ComponentUnitState{
429+
{
430+
UnitType: client.UnitTypeInput,
431+
UnitID: "some-input-unit",
432+
}: {
433+
State: client.UnitStateFailed,
434+
},
435+
},
436+
},
437+
Component: component.Component{
438+
ID: "test-component",
439+
InputSpec: &component.InputRuntimeSpec{
440+
BinaryName: "testbeat",
441+
},
442+
},
443+
},
444+
},
445+
},
446+
},
447+
expectedCode: 500,
448+
failon: "failed",
449+
},
450+
{
451+
name: "component healthy and unit healty",
452+
coord: mockCoordinator{
453+
isUp: true,
454+
state: coordinator.State{
455+
Components: []runtime.ComponentComponentState{
456+
{
457+
LegacyPID: "2",
458+
State: runtime.ComponentState{
459+
State: client.UnitStateHealthy,
460+
Units: map[runtime.ComponentUnitKey]runtime.ComponentUnitState{
461+
{
462+
UnitType: client.UnitTypeInput,
463+
UnitID: "some-input-unit",
464+
}: {
465+
State: client.UnitStateHealthy,
466+
},
467+
},
468+
},
469+
Component: component.Component{
470+
ID: "test-component",
471+
InputSpec: &component.InputRuntimeSpec{
472+
BinaryName: "testbeat",
473+
},
474+
},
475+
},
476+
},
477+
},
478+
},
479+
expectedCode: 200,
480+
failon: "failed",
481+
},
386482
}
387483

388484
// test with processesHandler
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
2+
// or more contributor license agreements. Licensed under the Elastic License 2.0;
3+
// you may not use this file except in compliance with the Elastic License 2.0.
4+
5+
package monitoringhelpers
6+
7+
import (
8+
"github.com/elastic/elastic-agent-client/v7/pkg/client"
9+
"github.com/elastic/elastic-agent/pkg/component/runtime"
10+
)
11+
12+
// HaveState returns true if any of the components or any of its units has the given state
13+
func HaveState(components []runtime.ComponentComponentState, state client.UnitState) bool {
14+
for _, component := range components {
15+
if component.State.State == state {
16+
return true
17+
}
18+
for _, unit := range component.State.Units {
19+
if unit.State == state {
20+
return true
21+
}
22+
}
23+
}
24+
return false
25+
}
Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
2+
// or more contributor license agreements. Licensed under the Elastic License 2.0;
3+
// you may not use this file except in compliance with the Elastic License 2.0.
4+
5+
package monitoringhelpers
6+
7+
import (
8+
"testing"
9+
10+
"github.com/elastic/elastic-agent-client/v7/pkg/client"
11+
"github.com/elastic/elastic-agent/pkg/component/runtime"
12+
)
13+
14+
func TestComponentsHasState(t *testing.T) {
15+
tests := []struct {
16+
name string
17+
components []runtime.ComponentComponentState
18+
state client.UnitState
19+
expected bool
20+
}{
21+
{
22+
name: "component with no units matches state",
23+
components: []runtime.ComponentComponentState{
24+
{
25+
State: runtime.ComponentState{
26+
State: client.UnitStateHealthy,
27+
Units: map[runtime.ComponentUnitKey]runtime.ComponentUnitState{},
28+
},
29+
},
30+
},
31+
state: client.UnitStateHealthy,
32+
expected: true,
33+
},
34+
{
35+
name: "component with units in different state matches state",
36+
components: []runtime.ComponentComponentState{
37+
{
38+
State: runtime.ComponentState{
39+
State: client.UnitStateHealthy,
40+
Units: map[runtime.ComponentUnitKey]runtime.ComponentUnitState{
41+
{
42+
UnitType: client.UnitTypeInput,
43+
UnitID: "some-input-unit",
44+
}: {
45+
State: client.UnitStateFailed,
46+
},
47+
},
48+
},
49+
},
50+
},
51+
state: client.UnitStateHealthy,
52+
expected: true,
53+
},
54+
{
55+
name: "unit matches state",
56+
components: []runtime.ComponentComponentState{
57+
{
58+
State: runtime.ComponentState{
59+
State: client.UnitStateDegraded,
60+
Units: map[runtime.ComponentUnitKey]runtime.ComponentUnitState{
61+
{
62+
UnitType: client.UnitTypeInput,
63+
UnitID: "some-input-unit",
64+
}: {
65+
State: client.UnitStateHealthy,
66+
},
67+
},
68+
},
69+
},
70+
},
71+
state: client.UnitStateHealthy,
72+
expected: true,
73+
},
74+
{
75+
name: "no match in single component",
76+
components: []runtime.ComponentComponentState{
77+
{
78+
State: runtime.ComponentState{
79+
State: client.UnitStateDegraded,
80+
Units: map[runtime.ComponentUnitKey]runtime.ComponentUnitState{
81+
{
82+
UnitType: client.UnitTypeInput,
83+
UnitID: "some-input-unit",
84+
}: {
85+
State: client.UnitStateStopped,
86+
},
87+
},
88+
},
89+
},
90+
},
91+
state: client.UnitStateHealthy,
92+
expected: false,
93+
},
94+
{
95+
name: "match in second component",
96+
components: []runtime.ComponentComponentState{
97+
{
98+
State: runtime.ComponentState{
99+
State: client.UnitStateDegraded,
100+
Units: map[runtime.ComponentUnitKey]runtime.ComponentUnitState{},
101+
},
102+
},
103+
{
104+
State: runtime.ComponentState{
105+
State: client.UnitStateHealthy,
106+
Units: map[runtime.ComponentUnitKey]runtime.ComponentUnitState{},
107+
},
108+
},
109+
},
110+
state: client.UnitStateHealthy,
111+
expected: true,
112+
},
113+
{
114+
name: "empty components slice",
115+
components: []runtime.ComponentComponentState{},
116+
state: client.UnitStateHealthy,
117+
expected: false,
118+
},
119+
}
120+
121+
for _, tt := range tests {
122+
t.Run(tt.name, func(t *testing.T) {
123+
result := HaveState(tt.components, tt.state)
124+
if result != tt.expected {
125+
t.Errorf("expected %v, got %v", tt.expected, result)
126+
}
127+
})
128+
}
129+
}

0 commit comments

Comments
 (0)