@@ -3,10 +3,13 @@ package cloudscale_ccm
33import (
44 "context"
55 "fmt"
6+ "strings"
67
78 "github.com/cloudscale-ch/cloudscale-cloud-controller-manager/pkg/internal/kubeutil"
89 "github.com/cloudscale-ch/cloudscale-go-sdk/v4"
10+ "golang.org/x/exp/slices"
911 v1 "k8s.io/api/core/v1"
12+ metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1013 "k8s.io/client-go/kubernetes"
1114 "k8s.io/klog/v2"
1215)
@@ -73,6 +76,33 @@ const (
7376 // resources instead.
7477 LoadBalancerVIPAddresses = "k8s.cloudscale.ch/loadbalancer-vip-addresses"
7578
79+ // LoadBalancerFloatingIPs assigns the given Floating IPs to the
80+ // load balancer. The expected value is a list of addresses of the
81+ // Floating IPs in CIDR notation. For example:
82+ //
83+ // ["5.102.150.123/32", "2a06:c01::123/128"]
84+ //
85+ // If any Floating IP address is assigned to multiple services via this
86+ // annotation, the CCM will refuse to update the associated services, as
87+ // this is considered a serious configuration issue that has to first be
88+ // resolved by the operator.
89+ //
90+ // While the service being handled needs to have a parseable Floating IP
91+ // config, the services it is compared to for conflict detection do not.
92+ //
93+ // Such services are skipped during conflict detection with the goal
94+ // of limiting the impact of config parse errors to the service being
95+ // processed.
96+ //
97+ // Floating IPs already assigned to the loadbalancer, but no longer
98+ // present in the annotations, stay on the loadbalancer until another
99+ // service requests them. This is due to the fact that it is not possible
100+ // to unassign Floating IPs to point to nowhere.
101+ //
102+ // The Floating IPs are only assigned to the LoadBalancer once it has
103+ // been fully created.
104+ LoadBalancerFloatingIPs = "k8s.cloudscale.ch/loadbalancer-floating-ips"
105+
76106 // LoadBalancerPoolAlgorithm defines the load balancing algorithm used
77107 // by the loadbalancer. See the API documentation for more information:
78108 //
@@ -279,9 +309,9 @@ func (l *loadbalancer) EnsureLoadBalancer(
279309 nodes []* v1.Node ,
280310) (* v1.LoadBalancerStatus , error ) {
281311
282- // Skip if the service is not supported by this CCM
312+ // Detect configuration issues and abort if they are found
283313 serviceInfo := newServiceInfo (service , clusterName )
284- if supported , err := serviceInfo . isSupported ( ); ! supported {
314+ if err := l . ensureValidConfig ( ctx , serviceInfo ); err != nil {
285315 return nil , err
286316 }
287317
@@ -347,9 +377,9 @@ func (l *loadbalancer) UpdateLoadBalancer(
347377 nodes []* v1.Node ,
348378) error {
349379
350- // Skip if the service is not supported by this CCM
380+ // Detect configuration issues and abort if they are found
351381 serviceInfo := newServiceInfo (service , clusterName )
352- if supported , err := serviceInfo . isSupported ( ); ! supported {
382+ if err := l . ensureValidConfig ( ctx , serviceInfo ); err != nil {
353383 return err
354384 }
355385
@@ -388,9 +418,9 @@ func (l *loadbalancer) EnsureLoadBalancerDeleted(
388418 service * v1.Service ,
389419) error {
390420
391- // Skip if the service is not supported by this CCM
421+ // Detect configuration issues and abort if they are found
392422 serviceInfo := newServiceInfo (service , clusterName )
393- if supported , err := serviceInfo . isSupported ( ); ! supported {
423+ if err := l . ensureValidConfig ( ctx , serviceInfo ); err != nil {
394424 return err
395425 }
396426
@@ -402,6 +432,120 @@ func (l *loadbalancer) EnsureLoadBalancerDeleted(
402432 })
403433}
404434
435+ // ensureValidConfig ensures that the configuration can be applied at all,
436+ // acting as a gate that ensures certain invariants before any changes are
437+ // made.
438+ //
439+ // The general idea is that it's better to not make any chanages if the config
440+ // is bad, rather than throwing errors later when some changes have already
441+ // been made.
442+ func (l * loadbalancer ) ensureValidConfig (
443+ ctx context.Context , serviceInfo * serviceInfo ) error {
444+
445+ // Skip if the service is not supported by this CCM
446+ if supported , err := serviceInfo .isSupported (); ! supported {
447+ return err
448+ }
449+
450+ // If Floating IPs are used, make sure there are no conflicting
451+ // assignment across services.
452+ ips , err := l .findIPsAssignedElsewhere (ctx , serviceInfo )
453+ if err != nil {
454+ return fmt .Errorf ("could not parse %s" , LoadBalancerFloatingIPs )
455+ }
456+
457+ if len (ips ) > 0 {
458+
459+ info := make ([]string , 0 , len (ips ))
460+ for ip , service := range ips {
461+ info = append (info , fmt .Sprintf ("%s->%s" , ip , service ))
462+ }
463+
464+ return fmt .Errorf (
465+ "at least one Floating IP assigned to service %s is also " +
466+ "assigned to another service. Refusing to continue to avoid " +
467+ "flapping: %s" ,
468+ serviceInfo .Service .Name ,
469+ strings .Join (info , ", " ),
470+ )
471+ }
472+
473+ return nil
474+ }
475+
476+ // findIPsAssignedElsewhere lists other services and compares their Floating
477+ // IPs with the ones found on the given service. If an IP is found to be
478+ // assigned to two services, the IP and the name of the service are returned.
479+ func (l * loadbalancer ) findIPsAssignedElsewhere (
480+ ctx context.Context , serviceInfo * serviceInfo ) (map [string ]string , error ) {
481+
482+ ips , err := serviceInfo .annotationList (LoadBalancerFloatingIPs )
483+ if err != nil {
484+ return nil , err
485+ }
486+
487+ if len (ips ) == 0 {
488+ return nil , nil
489+ }
490+
491+ conflicts := make (map [string ]string , 0 )
492+
493+ // Unfortuantely, there's no way to filter for the services that matter
494+ // here. The only available field selectors for services are
495+ // `metadata.name` and `metadata.namespace`.
496+ //
497+ // To support larger clusters, ensure to not load all services in a
498+ // single call.
499+ opts := metav1.ListOptions {
500+ Continue : "" ,
501+ Limit : 250 ,
502+ }
503+
504+ svcs := l .k8s .CoreV1 ().Services ("" )
505+ for {
506+ services , err := svcs .List (ctx , opts )
507+ if err != nil {
508+ return nil , fmt .Errorf ("failed to retrieve services: %w" , err )
509+ }
510+
511+ for _ , service := range services .Items {
512+ if service .Spec .Type != "LoadBalancer" {
513+ continue
514+ }
515+ if service .UID == serviceInfo .Service .UID {
516+ continue
517+ }
518+
519+ otherInfo := newServiceInfo (& service , serviceInfo .clusterName )
520+ other , err := otherInfo .annotationList (LoadBalancerFloatingIPs )
521+
522+ // Ignore errors loading the IPs of other services, as they would
523+ // not be configured either, if the current service is otherwise
524+ // okay, it should be able to continue.
525+ //
526+ // If this is not done, a single configuration error on a service
527+ // causes this function to err on all other services.
528+ if err != nil {
529+ continue
530+ }
531+
532+ for _ , ip := range other {
533+ if slices .Contains (ips , ip ) {
534+ conflicts [ip ] = service .Name
535+ }
536+ }
537+ }
538+
539+ if services .Continue == "" {
540+ break
541+ }
542+
543+ opts .Continue = services .Continue
544+ }
545+
546+ return conflicts , nil
547+ }
548+
405549// loadBalancerStatus generates the v1.LoadBalancerStatus for the given
406550// loadbalancer, as required by Kubernetes.
407551func loadBalancerStatus (lb * cloudscale.LoadBalancer ) * v1.LoadBalancerStatus {
0 commit comments