Skip to content

Commit 5cb10c9

Browse files
committed
Reset gateway connection status when health checker reports connected
When the health checker reports a connection error, the gateway syncer sets the gateway connection status to error. When the health checker subsequently reports connected, the gateway syncer needs to set the status back to connected. The rest of the changes were related to adding unit tests for the health checker's interaction with the gateway syncer. This entailed creating a PingerInterface and a fake implementation that can be injected into the health checker. Signed-off-by: Tom Pantelis <tompantelis@gmail.com>
1 parent a12d325 commit 5cb10c9

File tree

6 files changed

+332
-92
lines changed

6 files changed

+332
-92
lines changed

main.go

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -125,8 +125,13 @@ func main() {
125125

126126
var cableHealthchecker healthchecker.Interface
127127
if len(submSpec.GlobalCidr) == 0 && submSpec.HealthCheckEnabled {
128-
cableHealthchecker, err = healthchecker.New(&watcher.Config{RestConfig: cfg}, submSpec.Namespace,
129-
submSpec.ClusterID, submSpec.HealthCheckInterval, submSpec.HealthCheckMaxPacketLossCount)
128+
cableHealthchecker, err = healthchecker.New(&healthchecker.Config{
129+
WatcherConfig: &watcher.Config{RestConfig: cfg},
130+
EndpointNamespace: submSpec.Namespace,
131+
ClusterID: submSpec.ClusterID,
132+
PingInterval: submSpec.HealthCheckInterval,
133+
MaxPacketLossCount: submSpec.HealthCheckMaxPacketLossCount,
134+
})
130135
if err != nil {
131136
klog.Errorf("Error creating healthChecker: %v", err)
132137
}
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
package fake
2+
3+
import (
4+
"sync/atomic"
5+
6+
. "github.com/onsi/ginkgo"
7+
. "github.com/onsi/gomega"
8+
"github.com/submariner-io/submariner/pkg/cableengine/healthchecker"
9+
)
10+
11+
type Pinger struct {
12+
ip string
13+
latencyInfo atomic.Value
14+
start chan struct{}
15+
stop chan struct{}
16+
}
17+
18+
func NewPinger(ip string) *Pinger {
19+
return &Pinger{
20+
ip: ip,
21+
start: make(chan struct{}),
22+
stop: make(chan struct{}),
23+
}
24+
}
25+
26+
func (p *Pinger) Start() {
27+
defer GinkgoRecover()
28+
Expect(p.start).ToNot(BeClosed())
29+
close(p.start)
30+
}
31+
32+
func (p *Pinger) Stop() {
33+
defer GinkgoRecover()
34+
Expect(p.stop).ToNot(BeClosed())
35+
close(p.stop)
36+
}
37+
38+
func (p *Pinger) GetLatencyInfo() *healthchecker.LatencyInfo {
39+
o := p.latencyInfo.Load()
40+
if o != nil {
41+
info := o.(healthchecker.LatencyInfo)
42+
return &info
43+
}
44+
45+
return nil
46+
}
47+
48+
func (p *Pinger) SetLatencyInfo(info *healthchecker.LatencyInfo) {
49+
p.latencyInfo.Store(*info)
50+
}
51+
52+
func (p *Pinger) GetIP() string {
53+
return p.ip
54+
}
55+
56+
func (p *Pinger) AwaitStart() {
57+
Eventually(p.start).Should(BeClosed(), "Start was not called")
58+
}
59+
60+
func (p *Pinger) AwaitStop() {
61+
Eventually(p.stop).Should(BeClosed(), "Stop was not called")
62+
}

pkg/cableengine/healthchecker/healthchecker.go

Lines changed: 45 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
package healthchecker
22

33
import (
4-
"strconv"
54
"sync"
65
"time"
76

@@ -23,21 +22,27 @@ type Interface interface {
2322
GetLatencyInfo(endpoint *submarinerv1.EndpointSpec) *LatencyInfo
2423
}
2524

25+
type Config struct {
26+
WatcherConfig *watcher.Config
27+
EndpointNamespace string
28+
ClusterID string
29+
PingInterval uint
30+
MaxPacketLossCount uint
31+
NewPinger func(string, time.Duration, uint) PingerInterface
32+
}
33+
2634
type controller struct {
27-
endpointWatcher watcher.Interface
28-
pingers sync.Map
29-
clusterID string
30-
pingInterval uint
31-
maxPacketLossCount uint
35+
endpointWatcher watcher.Interface
36+
pingers sync.Map
37+
config *Config
3238
}
3339

34-
func New(config *watcher.Config, endpointNameSpace, clusterID string, pingInterval, maxPacketLossCount uint) (Interface, error) {
40+
func New(config *Config) (Interface, error) {
3541
controller := &controller{
36-
clusterID: clusterID,
37-
pingInterval: pingInterval,
38-
maxPacketLossCount: maxPacketLossCount,
42+
config: config,
3943
}
40-
config.ResourceConfigs = []watcher.ResourceConfig{
44+
45+
config.WatcherConfig.ResourceConfigs = []watcher.ResourceConfig{
4146
{
4247
Name: "HealthChecker Endpoint Controller",
4348
ResourceType: &submarinerv1.Endpoint{},
@@ -46,41 +51,23 @@ func New(config *watcher.Config, endpointNameSpace, clusterID string, pingInterv
4651
OnUpdateFunc: controller.endpointCreatedorUpdated,
4752
OnDeleteFunc: controller.endpointDeleted,
4853
},
49-
SourceNamespace: endpointNameSpace,
54+
SourceNamespace: config.EndpointNamespace,
5055
},
5156
}
5257

53-
endpointWatcher, err := watcher.New(config)
58+
var err error
5459

60+
controller.endpointWatcher, err = watcher.New(config.WatcherConfig)
5561
if err != nil {
5662
return nil, err
5763
}
5864

59-
controller.endpointWatcher = endpointWatcher
60-
6165
return controller, nil
6266
}
6367

6468
func (h *controller) GetLatencyInfo(endpoint *submarinerv1.EndpointSpec) *LatencyInfo {
6569
if obj, found := h.pingers.Load(endpoint.CableName); found {
66-
pinger := obj.(*pingerInfo)
67-
68-
lastTime, _ := time.ParseDuration(strconv.FormatUint(pinger.statistics.lastRtt, 10) + "ns")
69-
minTime, _ := time.ParseDuration(strconv.FormatUint(pinger.statistics.minRtt, 10) + "ns")
70-
averageTime, _ := time.ParseDuration(strconv.FormatUint(pinger.statistics.mean, 10) + "ns")
71-
maxTime, _ := time.ParseDuration(strconv.FormatUint(pinger.statistics.maxRtt, 10) + "ns")
72-
stdDevTime, _ := time.ParseDuration(strconv.FormatUint(pinger.statistics.stdDev, 10) + "ns")
73-
74-
return &LatencyInfo{
75-
ConnectionError: pinger.failureMsg,
76-
Spec: &submarinerv1.LatencyRTTSpec{
77-
Last: lastTime.String(),
78-
Min: minTime.String(),
79-
Average: averageTime.String(),
80-
Max: maxTime.String(),
81-
StdDev: stdDevTime.String(),
82-
},
83-
}
70+
return obj.(PingerInterface).GetLatencyInfo()
8471
}
8572

8673
return nil
@@ -91,13 +78,16 @@ func (h *controller) Start(stopCh <-chan struct{}) error {
9178
return err
9279
}
9380

81+
klog.Infof("CableEngine HealthChecker started with PingInterval: %v, MaxPacketLossCount: %v", h.config.PingInterval,
82+
h.config.MaxPacketLossCount)
83+
9484
return nil
9585
}
9686

9787
func (h *controller) endpointCreatedorUpdated(obj runtime.Object) bool {
9888
klog.V(log.TRACE).Infof("Endpoint created: %#v", obj)
9989
endpointCreated := obj.(*submarinerv1.Endpoint)
100-
if endpointCreated.Spec.ClusterID == h.clusterID {
90+
if endpointCreated.Spec.ClusterID == h.config.ClusterID {
10191
return false
10292
}
10393

@@ -108,33 +98,38 @@ func (h *controller) endpointCreatedorUpdated(obj runtime.Object) bool {
10898
}
10999

110100
if obj, found := h.pingers.Load(endpointCreated.Spec.CableName); found {
111-
pinger := obj.(*pingerInfo)
112-
if pinger.healthCheckIP == endpointCreated.Spec.HealthCheckIP {
101+
pinger := obj.(PingerInterface)
102+
if pinger.GetIP() == endpointCreated.Spec.HealthCheckIP {
113103
return false
114104
}
115105

116106
klog.V(log.DEBUG).Infof("HealthChecker is already running for %q - stopping", endpointCreated.Name)
117-
pinger.stop()
107+
pinger.Stop()
118108
h.pingers.Delete(endpointCreated.Spec.CableName)
119109
}
120110

121-
klog.V(log.TRACE).Infof("Starting Pinger for CableName: %q, with HealthCheckIP: %q",
122-
endpointCreated.Spec.CableName, endpointCreated.Spec.HealthCheckIP)
123-
124-
pingInterval := DefaultPingInterval
125-
if h.pingInterval != 0 {
126-
pingInterval = time.Second * time.Duration(h.pingInterval)
111+
pingInterval := defaultPingInterval
112+
if h.config.PingInterval != 0 {
113+
pingInterval = time.Second * time.Duration(h.config.PingInterval)
127114
}
128115

129-
maxPacketLossCount := DefaultMaxPacketLossCount
116+
maxPacketLossCount := defaultMaxPacketLossCount
117+
118+
if h.config.MaxPacketLossCount != 0 {
119+
maxPacketLossCount = h.config.MaxPacketLossCount
120+
}
130121

131-
if h.maxPacketLossCount != 0 {
132-
maxPacketLossCount = h.maxPacketLossCount
122+
newPingerFunc := h.config.NewPinger
123+
if newPingerFunc == nil {
124+
newPingerFunc = newPinger
133125
}
134126

135-
pinger := newPinger(endpointCreated.Spec.HealthCheckIP, pingInterval, maxPacketLossCount)
127+
pinger := newPingerFunc(endpointCreated.Spec.HealthCheckIP, pingInterval, maxPacketLossCount)
136128
h.pingers.Store(endpointCreated.Spec.CableName, pinger)
137-
pinger.start()
129+
pinger.Start()
130+
131+
klog.Infof("CableEngine HealthChecker started pinger for CableName: %q with HealthCheckIP %q",
132+
endpointCreated.Spec.CableName, endpointCreated.Spec.HealthCheckIP)
138133

139134
return false
140135
}
@@ -146,8 +141,8 @@ func (h *controller) endpointDeleted(obj runtime.Object) bool {
146141
}
147142

148143
if obj, found := h.pingers.Load(endpointDeleted.Spec.CableName); found {
149-
pinger := obj.(*pingerInfo)
150-
pinger.stop()
144+
pinger := obj.(PingerInterface)
145+
pinger.Stop()
151146
h.pingers.Delete(endpointDeleted.Spec.CableName)
152147
}
153148

pkg/cableengine/healthchecker/pinger.go

Lines changed: 43 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,35 +2,44 @@ package healthchecker
22

33
import (
44
"fmt"
5+
"strconv"
56
"time"
67

78
"github.com/go-ping/ping"
9+
submarinerv1 "github.com/submariner-io/submariner/pkg/apis/submariner.io/v1"
810
"k8s.io/klog"
911
)
1012

1113
var waitTime = 15 * time.Second
1214

13-
var DefaultMaxPacketLossCount uint = 5
15+
var defaultMaxPacketLossCount uint = 5
1416

1517
// The RTT will be stored and will be used to calculate the statistics until
1618
// the size is reached. Once the size is reached the array will be reset and
1719
// the last elements will be added to the array for statistics.
1820
var size uint64 = 1000
1921

20-
var DefaultPingInterval = 1 * time.Second
22+
var defaultPingInterval = 1 * time.Second
23+
24+
type PingerInterface interface {
25+
Start()
26+
Stop()
27+
GetLatencyInfo() *LatencyInfo
28+
GetIP() string
29+
}
2130

2231
type pingerInfo struct {
23-
healthCheckIP string
32+
ip string
2433
pingInterval time.Duration
2534
maxPacketLossCount uint
2635
statistics statistics
2736
failureMsg string
2837
stopCh chan struct{}
2938
}
3039

31-
func newPinger(healthCheckIP string, pingInterval time.Duration, maxPacketLossCount uint) *pingerInfo {
40+
func newPinger(ip string, pingInterval time.Duration, maxPacketLossCount uint) PingerInterface {
3241
return &pingerInfo{
33-
healthCheckIP: healthCheckIP,
42+
ip: ip,
3443
pingInterval: pingInterval,
3544
maxPacketLossCount: maxPacketLossCount,
3645
statistics: statistics{
@@ -41,7 +50,7 @@ func newPinger(healthCheckIP string, pingInterval time.Duration, maxPacketLossCo
4150
}
4251
}
4352

44-
func (p *pingerInfo) start() {
53+
func (p *pingerInfo) Start() {
4554
go func() {
4655
for {
4756
select {
@@ -52,17 +61,16 @@ func (p *pingerInfo) start() {
5261
}
5362
}
5463
}()
55-
klog.Infof("CableEngine HealthChecker started pinger for IP %q", p.healthCheckIP)
5664
}
5765

58-
func (p *pingerInfo) stop() {
66+
func (p *pingerInfo) Stop() {
5967
close(p.stopCh)
6068
}
6169

6270
func (p *pingerInfo) sendPing() {
63-
pinger, err := ping.NewPinger(p.healthCheckIP)
71+
pinger, err := ping.NewPinger(p.ip)
6472
if err != nil {
65-
klog.Errorf("Error creating pinger for IP %q: %v", p.healthCheckIP, err)
73+
klog.Errorf("Error creating pinger for IP %q: %v", p.ip, err)
6674
return
6775
}
6876

@@ -73,7 +81,7 @@ func (p *pingerInfo) sendPing() {
7381
pinger.OnSend = func(packet *ping.Packet) {
7482
// Pinger will mark a connection as an error if the packet loss reaches the threshold
7583
if pinger.PacketsSent-pinger.PacketsRecv > int(p.maxPacketLossCount) {
76-
p.failureMsg = fmt.Sprintf("Failed to successfully ping the remote endpoint IP %q", p.healthCheckIP)
84+
p.failureMsg = fmt.Sprintf("Failed to successfully ping the remote endpoint IP %q", p.ip)
7785
pinger.PacketsSent = 0
7886
pinger.PacketsRecv = 0
7987
}
@@ -86,6 +94,29 @@ func (p *pingerInfo) sendPing() {
8694

8795
err = pinger.Run()
8896
if err != nil {
89-
klog.Errorf("Error running ping for the remote endpoint IP %q: %v", p.healthCheckIP, err)
97+
klog.Errorf("Error running ping for the remote endpoint IP %q: %v", p.ip, err)
98+
}
99+
}
100+
101+
func (p *pingerInfo) GetIP() string {
102+
return p.ip
103+
}
104+
105+
func (p *pingerInfo) GetLatencyInfo() *LatencyInfo {
106+
lastTime, _ := time.ParseDuration(strconv.FormatUint(p.statistics.lastRtt, 10) + "ns")
107+
minTime, _ := time.ParseDuration(strconv.FormatUint(p.statistics.minRtt, 10) + "ns")
108+
averageTime, _ := time.ParseDuration(strconv.FormatUint(p.statistics.mean, 10) + "ns")
109+
maxTime, _ := time.ParseDuration(strconv.FormatUint(p.statistics.maxRtt, 10) + "ns")
110+
stdDevTime, _ := time.ParseDuration(strconv.FormatUint(p.statistics.stdDev, 10) + "ns")
111+
112+
return &LatencyInfo{
113+
ConnectionError: p.failureMsg,
114+
Spec: &submarinerv1.LatencyRTTSpec{
115+
Last: lastTime.String(),
116+
Min: minTime.String(),
117+
Average: averageTime.String(),
118+
Max: maxTime.String(),
119+
StdDev: stdDevTime.String(),
120+
},
90121
}
91122
}

pkg/cableengine/syncer/syncer.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,9 @@ func (i *GatewaySyncer) generateGatewayObject() *v1.Gateway {
218218
connection.Status = v1.ConnectionError
219219
connection.StatusMessage = latencyInfo.ConnectionError
220220
}
221+
} else if connection.Status == v1.ConnectionError && latencyInfo.ConnectionError == "" {
222+
connection.Status = v1.Connected
223+
connection.StatusMessage = ""
221224
}
222225
}
223226
}

0 commit comments

Comments
 (0)