Skip to content

Commit 42797ad

Browse files
authored
Merge pull request kubernetes#91700 from skmatti/ilb-metrics
Add usage metrics for GCE Internal Loadbalancers
2 parents 875f31e + 4ca62b8 commit 42797ad

File tree

7 files changed

+384
-2
lines changed

7 files changed

+384
-2
lines changed

staging/src/k8s.io/legacy-cloud-providers/gce/BUILD

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ go_library(
2424
"gce_loadbalancer.go",
2525
"gce_loadbalancer_external.go",
2626
"gce_loadbalancer_internal.go",
27+
"gce_loadbalancer_metrics.go",
2728
"gce_loadbalancer_naming.go",
2829
"gce_networkendpointgroup.go",
2930
"gce_routes.go",
@@ -98,6 +99,7 @@ go_test(
9899
"gce_healthchecks_test.go",
99100
"gce_loadbalancer_external_test.go",
100101
"gce_loadbalancer_internal_test.go",
102+
"gce_loadbalancer_metrics_test.go",
101103
"gce_loadbalancer_test.go",
102104
"gce_loadbalancer_utils_test.go",
103105
"gce_test.go",
@@ -116,6 +118,7 @@ go_test(
116118
"//vendor/github.com/GoogleCloudPlatform/k8s-cloud-provider/pkg/cloud:go_default_library",
117119
"//vendor/github.com/GoogleCloudPlatform/k8s-cloud-provider/pkg/cloud/meta:go_default_library",
118120
"//vendor/github.com/GoogleCloudPlatform/k8s-cloud-provider/pkg/cloud/mock:go_default_library",
121+
"//vendor/github.com/google/go-cmp/cmp:go_default_library",
119122
"//vendor/github.com/stretchr/testify/assert:go_default_library",
120123
"//vendor/github.com/stretchr/testify/require:go_default_library",
121124
"//vendor/golang.org/x/oauth2/google:go_default_library",

staging/src/k8s.io/legacy-cloud-providers/gce/gce.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,8 @@ type Cloud struct {
163163

164164
// Keep a reference of this around so we can inject a new cloud.RateLimiter implementation.
165165
s *cloud.Service
166+
167+
metricsCollector loadbalancerMetricsCollector
166168
}
167169

168170
// ConfigGlobal is the in memory representation of the gce.conf config data
@@ -518,6 +520,7 @@ func CreateGCECloud(config *CloudConfig) (*Cloud, error) {
518520
operationPollRateLimiter: operationPollRateLimiter,
519521
AlphaFeatureGate: config.AlphaFeatureGate,
520522
nodeZones: map[string]sets.String{},
523+
metricsCollector: newLoadBalancerMetrics(),
521524
}
522525

523526
gce.manager = &gceServiceManager{gce}
@@ -643,6 +646,7 @@ func (g *Cloud) Initialize(clientBuilder cloudprovider.ControllerClientBuilder,
643646
g.eventRecorder = g.eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "g-cloudprovider"})
644647

645648
go g.watchClusterID(stop)
649+
go g.metricsCollector.Run(stop)
646650
}
647651

648652
// LoadBalancer returns an implementation of LoadBalancer for Google Compute Engine.

staging/src/k8s.io/legacy-cloud-providers/gce/gce_fake.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ func NewFakeGCECloud(vals TestClusterValues) *Cloud {
7676
networkProjectID: vals.ProjectID,
7777
ClusterID: fakeClusterID(vals.ClusterID),
7878
onXPN: vals.OnXPN,
79+
metricsCollector: newLoadBalancerMetrics(),
7980
}
8081
c := cloud.NewMockGCE(&gceProjectRouter{gce})
8182
gce.c = c

staging/src/k8s.io/legacy-cloud-providers/gce/gce_loadbalancer_internal.go

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,14 +63,23 @@ func (g *Cloud) ensureInternalLoadBalancer(clusterName, clusterID string, svc *v
6363
return nil, cloudprovider.ImplementedElsewhere
6464
}
6565

66+
nm := types.NamespacedName{Name: svc.Name, Namespace: svc.Namespace}
67+
68+
var serviceState L4ILBServiceState
69+
// Mark the service InSuccess state as false to begin with.
70+
// This will be updated to true if the VIP is configured successfully.
71+
serviceState.InSuccess = false
72+
defer func() {
73+
g.metricsCollector.SetL4ILBService(nm.String(), serviceState)
74+
}()
75+
6676
loadBalancerName := g.GetLoadBalancerName(context.TODO(), clusterName, svc)
6777
klog.V(2).Infof("ensureInternalLoadBalancer(%v): Attaching %q finalizer", loadBalancerName, ILBFinalizerV1)
6878
if err := addFinalizer(svc, g.client.CoreV1(), ILBFinalizerV1); err != nil {
6979
klog.Errorf("Failed to attach finalizer '%s' on service %s/%s - %v", ILBFinalizerV1, svc.Namespace, svc.Name, err)
7080
return nil, err
7181
}
7282

73-
nm := types.NamespacedName{Name: svc.Name, Namespace: svc.Namespace}
7483
ports, _, protocol := getPortsAndProtocol(svc.Spec.Ports)
7584
if protocol != v1.ProtocolTCP && protocol != v1.ProtocolUDP {
7685
return nil, fmt.Errorf("Invalid protocol %s, only TCP and UDP are supported", string(protocol))
@@ -213,6 +222,18 @@ func (g *Cloud) ensureInternalLoadBalancer(clusterName, clusterID string, svc *v
213222
return nil, err
214223
}
215224

225+
serviceState.InSuccess = true
226+
if options.AllowGlobalAccess {
227+
serviceState.EnabledGlobalAccess = true
228+
}
229+
// SubnetName is overridden to nil value if Alpha feature gate for custom subnet
230+
// is not enabled. So, a non empty subnet name at this point implies that the
231+
// feature is in use.
232+
if options.SubnetName != "" {
233+
serviceState.EnabledCustomSubnet = true
234+
}
235+
klog.V(6).Infof("Internal Loadbalancer for Service %s ensured, updating its state %v in metrics cache", nm, serviceState)
236+
216237
status := &v1.LoadBalancerStatus{}
217238
status.Ingress = []v1.LoadBalancerIngress{{IP: updatedFwdRule.IPAddress}}
218239
return status, nil
@@ -267,6 +288,7 @@ func (g *Cloud) updateInternalLoadBalancer(clusterName, clusterID string, svc *v
267288

268289
func (g *Cloud) ensureInternalLoadBalancerDeleted(clusterName, clusterID string, svc *v1.Service) error {
269290
loadBalancerName := g.GetLoadBalancerName(context.TODO(), clusterName, svc)
291+
svcNamespacedName := types.NamespacedName{Name: svc.Name, Namespace: svc.Namespace}
270292
_, _, protocol := getPortsAndProtocol(svc.Spec.Ports)
271293
scheme := cloud.SchemeInternal
272294
sharedBackend := shareBackendService(svc)
@@ -326,10 +348,12 @@ func (g *Cloud) ensureInternalLoadBalancerDeleted(clusterName, clusterID string,
326348

327349
klog.V(2).Infof("ensureInternalLoadBalancerDeleted(%v): Removing %q finalizer", loadBalancerName, ILBFinalizerV1)
328350
if err := removeFinalizer(svc, g.client.CoreV1(), ILBFinalizerV1); err != nil {
329-
klog.Errorf("Failed to remove finalizer '%s' on service %s/%s - %v", ILBFinalizerV1, svc.Namespace, svc.Name, err)
351+
klog.Errorf("Failed to remove finalizer '%s' on service %s - %v", ILBFinalizerV1, svcNamespacedName, err)
330352
return err
331353
}
332354

355+
klog.V(6).Infof("Internal Loadbalancer for Service %s deleted, removing its state from metrics cache", svcNamespacedName)
356+
g.metricsCollector.DeleteL4ILBService(svcNamespacedName.String())
333357
return nil
334358
}
335359

Lines changed: 181 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,181 @@
1+
// +build !providerless
2+
3+
/*
4+
Copyright 2020 The Kubernetes Authors.
5+
6+
Licensed under the Apache License, Version 2.0 (the "License");
7+
you may not use this file except in compliance with the License.
8+
You may obtain a copy of the License at
9+
10+
http://www.apache.org/licenses/LICENSE-2.0
11+
12+
Unless required by applicable law or agreed to in writing, software
13+
distributed under the License is distributed on an "AS IS" BASIS,
14+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
See the License for the specific language governing permissions and
16+
limitations under the License.
17+
*/
18+
19+
package gce
20+
21+
import (
22+
"sync"
23+
"time"
24+
25+
"k8s.io/apimachinery/pkg/util/wait"
26+
"k8s.io/component-base/metrics"
27+
"k8s.io/component-base/metrics/legacyregistry"
28+
"k8s.io/klog/v2"
29+
)
30+
31+
const (
32+
label = "feature"
33+
)
34+
35+
var (
36+
metricsInterval = 10 * time.Minute
37+
l4ILBCount = metrics.NewGaugeVec(
38+
&metrics.GaugeOpts{
39+
Name: "number_of_l4_ilbs",
40+
Help: "Number of L4 ILBs",
41+
StabilityLevel: metrics.ALPHA,
42+
},
43+
[]string{label},
44+
)
45+
)
46+
47+
// init registers L4 internal loadbalancer usage metrics.
48+
func init() {
49+
klog.V(3).Infof("Registering Service Controller loadbalancer usage metrics %v", l4ILBCount)
50+
legacyregistry.MustRegister(l4ILBCount)
51+
}
52+
53+
// LoadBalancerMetrics is a cache that contains loadbalancer service resource
54+
// states for computing usage metrics.
55+
type LoadBalancerMetrics struct {
56+
// l4ILBServiceMap is a map of service key and L4 ILB service state.
57+
l4ILBServiceMap map[string]L4ILBServiceState
58+
59+
sync.Mutex
60+
}
61+
62+
type feature string
63+
64+
func (f feature) String() string {
65+
return string(f)
66+
}
67+
68+
const (
69+
l4ILBService = feature("L4ILBService")
70+
l4ILBGlobalAccess = feature("L4ILBGlobalAccess")
71+
l4ILBCustomSubnet = feature("L4ILBCustomSubnet")
72+
// l4ILBInSuccess feature specifies that ILB VIP is configured.
73+
l4ILBInSuccess = feature("L4ILBInSuccess")
74+
// l4ILBInInError feature specifies that an error had occurred for this service
75+
// in ensureInternalLoadbalancer method.
76+
l4ILBInError = feature("L4ILBInError")
77+
)
78+
79+
// L4ILBServiceState contains Internal Loadbalancer feature states as specified
80+
// in k8s Service.
81+
type L4ILBServiceState struct {
82+
// EnabledGlobalAccess specifies if Global Access is enabled.
83+
EnabledGlobalAccess bool
84+
// EnabledCustomSubNet specifies if Custom Subnet is enabled.
85+
EnabledCustomSubnet bool
86+
// InSuccess specifies if the ILB service VIP is configured.
87+
InSuccess bool
88+
}
89+
90+
// loadbalancerMetricsCollector is an interface to update/delete L4 loadbalancer
91+
// states in the cache that is used for computing L4 Loadbalancer usage metrics.
92+
type loadbalancerMetricsCollector interface {
93+
// Run starts a goroutine to compute and export metrics a periodic interval.
94+
Run(stopCh <-chan struct{})
95+
// SetL4ILBService adds/updates L4 ILB service state for given service key.
96+
SetL4ILBService(svcKey string, state L4ILBServiceState)
97+
// DeleteL4ILBService removes the given L4 ILB service key.
98+
DeleteL4ILBService(svcKey string)
99+
}
100+
101+
// newLoadBalancerMetrics initializes LoadBalancerMetrics and starts a goroutine
102+
// to compute and export metrics periodically.
103+
func newLoadBalancerMetrics() loadbalancerMetricsCollector {
104+
return &LoadBalancerMetrics{
105+
l4ILBServiceMap: make(map[string]L4ILBServiceState),
106+
}
107+
}
108+
109+
// Run implements loadbalancerMetricsCollector.
110+
func (lm *LoadBalancerMetrics) Run(stopCh <-chan struct{}) {
111+
klog.V(3).Infof("Loadbalancer Metrics initialized. Metrics will be exported at an interval of %v", metricsInterval)
112+
// Compute and export metrics periodically.
113+
go func() {
114+
// Wait for service states to be populated in the cache before computing metrics.
115+
time.Sleep(metricsInterval)
116+
wait.Until(lm.export, metricsInterval, stopCh)
117+
}()
118+
<-stopCh
119+
}
120+
121+
// SetL4ILBService implements loadbalancerMetricsCollector.
122+
func (lm *LoadBalancerMetrics) SetL4ILBService(svcKey string, state L4ILBServiceState) {
123+
lm.Lock()
124+
defer lm.Unlock()
125+
126+
if lm.l4ILBServiceMap == nil {
127+
klog.Fatalf("Loadbalancer Metrics failed to initialize correctly.")
128+
}
129+
lm.l4ILBServiceMap[svcKey] = state
130+
}
131+
132+
// DeleteL4ILBService implements loadbalancerMetricsCollector.
133+
func (lm *LoadBalancerMetrics) DeleteL4ILBService(svcKey string) {
134+
lm.Lock()
135+
defer lm.Unlock()
136+
137+
delete(lm.l4ILBServiceMap, svcKey)
138+
}
139+
140+
// export computes and exports loadbalancer usage metrics.
141+
func (lm *LoadBalancerMetrics) export() {
142+
ilbCount := lm.computeL4ILBMetrics()
143+
klog.V(5).Infof("Exporting L4 ILB usage metrics: %#v", ilbCount)
144+
for feature, count := range ilbCount {
145+
l4ILBCount.With(map[string]string{label: feature.String()}).Set(float64(count))
146+
}
147+
klog.V(5).Infof("L4 ILB usage metrics exported.")
148+
}
149+
150+
// computeL4ILBMetrics aggregates L4 ILB metrics in the cache.
151+
func (lm *LoadBalancerMetrics) computeL4ILBMetrics() map[feature]int {
152+
lm.Lock()
153+
defer lm.Unlock()
154+
klog.V(4).Infof("Computing L4 ILB usage metrics from service state map: %#v", lm.l4ILBServiceMap)
155+
counts := map[feature]int{
156+
l4ILBService: 0,
157+
l4ILBGlobalAccess: 0,
158+
l4ILBCustomSubnet: 0,
159+
l4ILBInSuccess: 0,
160+
l4ILBInError: 0,
161+
}
162+
163+
for key, state := range lm.l4ILBServiceMap {
164+
klog.V(6).Infof("ILB Service %s has EnabledGlobalAccess: %t, EnabledCustomSubnet: %t, InSuccess: %t", key, state.EnabledGlobalAccess, state.EnabledCustomSubnet, state.InSuccess)
165+
counts[l4ILBService]++
166+
if !state.InSuccess {
167+
counts[l4ILBInError]++
168+
// Skip counting other features if the service is in error state.
169+
continue
170+
}
171+
counts[l4ILBInSuccess]++
172+
if state.EnabledGlobalAccess {
173+
counts[l4ILBGlobalAccess]++
174+
}
175+
if state.EnabledCustomSubnet {
176+
counts[l4ILBCustomSubnet]++
177+
}
178+
}
179+
klog.V(4).Info("L4 ILB usage metrics computed.")
180+
return counts
181+
}

0 commit comments

Comments
 (0)