Skip to content

Commit 29e2f44

Browse files
committed
Predictable IPs for health managers and rsyslog containers
This patch implements predictable IPs for health managers by generating a list of IPs on the same subnet as the octavia management network's CIDR but outside of the existing IP ranges. These IPs mapped according to the OpenShift node's name and the purpose (e.g. contro1_hm and contro1_rsyslog). The IPs are set as aliases on the pods using the downward API and a utility mounted into the pod.
1 parent 5414e8c commit 29e2f44

File tree

9 files changed

+235
-77
lines changed

9 files changed

+235
-77
lines changed

controllers/amphoracontroller_controller.go

Lines changed: 10 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ package controllers
1919
import (
2020
"context"
2121
"fmt"
22-
"sort"
2322
"strings"
2423
"time"
2524

@@ -44,7 +43,6 @@ import (
4443
appsv1 "k8s.io/api/apps/v1"
4544
corev1 "k8s.io/api/core/v1"
4645
k8s_errors "k8s.io/apimachinery/pkg/api/errors"
47-
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
4846
"k8s.io/apimachinery/pkg/fields"
4947
"k8s.io/apimachinery/pkg/runtime"
5048
"k8s.io/apimachinery/pkg/types"
@@ -577,46 +575,23 @@ func (r *OctaviaAmphoraControllerReconciler) generateServiceConfigMaps(
577575
err.Error()))
578576
return err
579577
}
578+
580579
//
581-
// TODO(beagles): Improve this with predictable IPs for the health managers because what is
582-
// going to happen on start up is that health managers will restart each time a new one is deployed.
583-
// The easiest strategy is to create a "hole" in the IP address range and control the
584-
// allocation and configuration of an additional IP on each network attached interface. We will
585-
// need a container in the Pod that has the ip command installed to do this however.
580+
// Get the predicatable IPs from the HmConfigMap
586581
//
587-
healthManagerIPs, err := getPodIPs(
588-
fmt.Sprintf("%s-%s", "octavia", octaviav1.HealthManager),
589-
instance.Namespace,
590-
r.Kclient,
591-
&r.Log,
592-
)
582+
hmMap := &corev1.ConfigMap{}
583+
err = helper.GetClient().Get(ctx, types.NamespacedName{Name: octavia.HmConfigMap, Namespace: instance.GetNamespace()}, hmMap)
593584
if err != nil {
594-
instance.Status.Conditions.Set(condition.FalseCondition(
595-
condition.InputReadyCondition,
596-
condition.ErrorReason,
597-
condition.SeverityWarning,
598-
condition.InputReadyErrorMessage,
599-
err.Error()))
600585
return err
601586
}
602-
603-
// TODO(beagles): come up with a way to preallocate or ensure
604-
// a stable list of IPs.
605-
606-
if instance.Spec.Role == octaviav1.HealthManager {
607-
// TODO(gthiemonge) This is fine to leave this list empty in the HM when
608-
// we use redis, because the HM doesn't create any LBs, but if we drop
609-
// redis, failovers will be triggered in the HM
610-
templateParameters["ControllerIPList"] = ""
611-
} else if len(healthManagerIPs) == 0 {
612-
return fmt.Errorf("Health manager ports are not ready yet")
613-
} else {
614-
withPorts := make([]string, len(healthManagerIPs))
615-
for idx, val := range healthManagerIPs {
616-
withPorts[idx] = fmt.Sprintf("%s:5555", val)
587+
var ipAddresses []string
588+
for key, val := range hmMap.Data {
589+
if strings.HasPrefix(key, "hm_") {
590+
ipAddresses = append(ipAddresses, fmt.Sprintf("%s:5555", val))
617591
}
618-
templateParameters["ControllerIPList"] = strings.Join(withPorts, ",")
619592
}
593+
ipAddressString := strings.Join(ipAddresses, ",")
594+
templateParameters["ControllerIPList"] = ipAddressString
620595

621596
spec := instance.Spec
622597
templateParameters["ServiceUser"] = spec.ServiceUser
@@ -727,46 +702,6 @@ func (r *OctaviaAmphoraControllerReconciler) SetupWithManager(mgr ctrl.Manager)
727702
Complete(r)
728703
}
729704

730-
func listHealthManagerPods(name string, ns string, client kubernetes.Interface, log *logr.Logger) (*corev1.PodList, error) {
731-
listOptions := metav1.ListOptions{
732-
LabelSelector: fmt.Sprintf("%s=%s", common.AppSelector, name),
733-
FieldSelector: "status.phase==Running",
734-
}
735-
log.Info(fmt.Sprintf("Listing pods using label selector %s and field selector %s", listOptions.LabelSelector, listOptions.FieldSelector))
736-
pods, err := client.CoreV1().Pods(ns).List(context.Background(), listOptions)
737-
if err != nil {
738-
return nil, err
739-
}
740-
return pods, nil
741-
}
742-
743-
func getPodIPs(name string, ns string, client kubernetes.Interface, log *logr.Logger) ([]string, error) {
744-
//
745-
// Get the IPs for the network attachments for these PODs.
746-
//
747-
var result []string
748-
pods, err := listHealthManagerPods(name, ns, client, log)
749-
if err != nil {
750-
return nil, err
751-
}
752-
for _, pod := range pods.Items {
753-
annotations := pod.GetAnnotations()
754-
networkStatusList, err := nad.GetNetworkStatusFromAnnotation(annotations)
755-
if err != nil {
756-
log.Error(err, fmt.Sprintf("Unable to get network annotations from %s", annotations))
757-
return nil, err
758-
}
759-
for _, networkStatus := range networkStatusList {
760-
netAttachName := fmt.Sprintf("%s/%s", ns, octavia.LbNetworkAttachmentName)
761-
if networkStatus.Name == netAttachName {
762-
result = append(result, networkStatus.IPs[0])
763-
}
764-
}
765-
}
766-
sort.Strings(result)
767-
return result, nil
768-
}
769-
770705
func (r *OctaviaAmphoraControllerReconciler) findObjectsForSrc(ctx context.Context, src client.Object) []reconcile.Request {
771706
requests := []reconcile.Request{}
772707

controllers/octavia_controller.go

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ import (
4343
octaviav1 "github.com/openstack-k8s-operators/octavia-operator/api/v1beta1"
4444
"github.com/openstack-k8s-operators/octavia-operator/pkg/octavia"
4545
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
46+
"k8s.io/apimachinery/pkg/types"
4647
"k8s.io/apimachinery/pkg/util/intstr"
4748

4849
appsv1 "k8s.io/api/apps/v1"
@@ -675,6 +676,109 @@ func (r *OctaviaReconciler) reconcileNormal(ctx context.Context, instance *octav
675676
return ctrl.Result{}, err
676677
}
677678

679+
nodeConfigMap := &corev1.ConfigMap{
680+
ObjectMeta: metav1.ObjectMeta{
681+
Name: octavia.HmConfigMap,
682+
Namespace: instance.GetNamespace(),
683+
Labels: labels.GetLabels(instance, labels.GetGroupLabel(instance.ObjectMeta.Name), map[string]string{}),
684+
},
685+
Data: make(map[string]string),
686+
}
687+
688+
// Look for existing config map and if exists, read existing data and match
689+
// against nodes.
690+
foundMap := &corev1.ConfigMap{}
691+
err = helper.GetClient().Get(ctx, types.NamespacedName{Name: octavia.HmConfigMap, Namespace: instance.GetNamespace()},
692+
foundMap)
693+
if err != nil {
694+
if k8s_errors.IsNotFound(err) {
695+
Log.Info(fmt.Sprintf("Port map %s doesn't exist, creating.", octavia.HmConfigMap))
696+
} else {
697+
return ctrl.Result{}, err
698+
}
699+
} else {
700+
Log.Info("Retrieved existing map, updating..")
701+
nodeConfigMap.Data = foundMap.Data
702+
}
703+
704+
//
705+
// Predictable IPs.
706+
//
707+
// NOTE(beagles): refactoring this might be nice. This could also be
708+
// optimized but the data sets are small (nodes an IP ranges are less than
709+
// 100) so optimization might be a waste.
710+
//
711+
predictableIPParams, err := octavia.GetPredictableIPAM(networkParameters)
712+
if err != nil {
713+
return ctrl.Result{}, err
714+
}
715+
// Get a list of the nodes in the cluster
716+
717+
// TODO(beagles):
718+
// * confirm whether or not this lists only the nodes we want (i.e. ones
719+
// that will host the daemonset)
720+
// * do we want to provide a mechanism to temporarily disabling this list
721+
// for maintenance windows where nodes might be "coming and going"
722+
723+
nodes, _ := helper.GetKClient().CoreV1().Nodes().List(ctx, metav1.ListOptions{})
724+
updatedMap := make(map[string]string)
725+
allocatedIPs := make(map[string]bool)
726+
var predictableIPsRequired []string
727+
728+
// First scan existing allocations so we can keep existing allocations.
729+
// Keeping track of what's required and what already exists. If a node is
730+
// removed from the cluster, it's IPs will not be added to the allocated
731+
// list and are effectively recycled.
732+
for _, node := range nodes.Items {
733+
Log.Info(fmt.Sprintf("cluster node name %s", node.Name))
734+
portName := fmt.Sprintf("hm_%s", node.Name)
735+
if ipValue, ok := nodeConfigMap.Data[portName]; ok {
736+
updatedMap[portName] = ipValue
737+
allocatedIPs[ipValue] = true
738+
Log.Info(fmt.Sprintf("%s has IP mapping %s: %s", node.Name, portName, ipValue))
739+
} else {
740+
predictableIPsRequired = append(predictableIPsRequired, portName)
741+
}
742+
portName = fmt.Sprintf("rsyslog_%s", node.Name)
743+
if ipValue, ok := nodeConfigMap.Data[portName]; ok {
744+
updatedMap[portName] = ipValue
745+
allocatedIPs[ipValue] = true
746+
Log.Info(fmt.Sprintf("%s has IP mapping %s: %s", node.Name, portName, ipValue))
747+
} else {
748+
predictableIPsRequired = append(predictableIPsRequired, portName)
749+
}
750+
}
751+
// Get new IPs using the range from predictableIPParmas minus the
752+
// allocatedIPs captured above.
753+
Log.Info(fmt.Sprintf("Allocating %d predictable IPs", len(predictableIPsRequired)))
754+
for _, portName := range predictableIPsRequired {
755+
hmPort, err := octavia.GetNextIP(predictableIPParams, allocatedIPs)
756+
if err != nil {
757+
// An error here is really unexpected- it means either we have
758+
// messed up the allocatedIPs list or the range we are assuming is
759+
// too small for the number of health managers and rsyslog
760+
// containers.
761+
return ctrl.Result{}, err
762+
}
763+
updatedMap[portName] = hmPort
764+
}
765+
766+
mapLabels := labels.GetLabels(instance, labels.GetGroupLabel(instance.ObjectMeta.Name), map[string]string{})
767+
_, err = controllerutil.CreateOrPatch(ctx, helper.GetClient(), nodeConfigMap, func() error {
768+
nodeConfigMap.Labels = util.MergeStringMaps(nodeConfigMap.Labels, mapLabels)
769+
nodeConfigMap.Data = updatedMap
770+
err := controllerutil.SetControllerReference(instance, nodeConfigMap, helper.GetScheme())
771+
if err != nil {
772+
return err
773+
}
774+
return nil
775+
})
776+
777+
if err != nil {
778+
Log.Info("Unable to create config map for health manager ports...")
779+
return ctrl.Result{}, err
780+
}
781+
678782
octaviaHealthManager, op, err := r.amphoraControllerDaemonSetCreateOrUpdate(instance, networkInfo,
679783
ampImageOwnerID, instance.Spec.OctaviaHealthManager, octaviav1.HealthManager)
680784
if err != nil {

pkg/amphoracontrollers/daemonset.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,12 +42,12 @@ func DaemonSet(
4242

4343
// The API pod has an extra volume so the API and the provider agent can
4444
// communicate with each other.
45-
volumes := octavia.GetVolumes(instance.Name)
45+
volumes := GetVolumes(instance.Name)
4646
parentOctaviaName := octavia.GetOwningOctaviaControllerName(instance)
4747
certsSecretName := fmt.Sprintf("%s-certs-secret", parentOctaviaName)
4848
volumes = append(volumes, GetCertVolume(certsSecretName)...)
4949

50-
volumeMounts := octavia.GetVolumeMounts(serviceName)
50+
volumeMounts := GetVolumeMounts(serviceName)
5151
volumeMounts = append(volumeMounts, GetCertVolumeMount()...)
5252

5353
livenessProbe := &corev1.Probe{
@@ -83,6 +83,7 @@ func DaemonSet(
8383

8484
envVars["KOLLA_CONFIG_STRATEGY"] = env.SetValue("COPY_ALWAYS")
8585
envVars["CONFIG_HASH"] = env.SetValue(configHash)
86+
envVars["NODE_NAME"] = env.DownwardAPI("spec.nodeName")
8687

8788
envVars["MGMT_CIDR"] = env.SetValue(instance.Spec.OctaviaProviderSubnetCIDR)
8889
envVars["MGMT_GATEWAY"] = env.SetValue(instance.Spec.OctaviaProviderSubnetGateway)

pkg/amphoracontrollers/volumes.go

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ package amphoracontrollers
1717

1818
import (
1919
corev1 "k8s.io/api/core/v1"
20+
21+
"github.com/openstack-k8s-operators/octavia-operator/pkg/octavia"
2022
)
2123

2224
const (
@@ -28,6 +30,35 @@ var (
2830
configMode int32 = 0644
2931
)
3032

33+
func GetVolumes(name string) []corev1.Volume {
34+
var config0640AccessMode int32 = 0640
35+
return append(
36+
octavia.GetVolumes(name),
37+
corev1.Volume{
38+
Name: "hm-ports",
39+
VolumeSource: corev1.VolumeSource{
40+
ConfigMap: &corev1.ConfigMapVolumeSource{
41+
LocalObjectReference: corev1.LocalObjectReference{
42+
Name: octavia.HmConfigMap,
43+
},
44+
DefaultMode: &config0640AccessMode,
45+
},
46+
},
47+
},
48+
)
49+
}
50+
51+
func GetVolumeMounts(serviceName string) []corev1.VolumeMount {
52+
return append(
53+
octavia.GetVolumeMounts(serviceName),
54+
corev1.VolumeMount{
55+
Name: "hm-ports",
56+
MountPath: "/var/lib/hmports",
57+
ReadOnly: true,
58+
},
59+
)
60+
}
61+
3162
// GetCertVolume - service volumes
3263
func GetCertVolume(certSecretName string) []corev1.Volume {
3364
return []corev1.Volume{

pkg/octavia/const.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,4 +49,7 @@ const (
4949

5050
// AdminTenantName
5151
AdminTenant = "admin"
52+
53+
// HmConfigMap ...
54+
HmConfigMap = "octavia-hmport-map"
5255
)

pkg/octavia/lb_mgmt_network.go

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ import (
3232
octaviav1 "github.com/openstack-k8s-operators/octavia-operator/api/v1beta1"
3333
)
3434

35+
// NetworkProvisioningSummary -
36+
// Type for conveying the results of the EnsureAmphoraManagementNetwork call.
3537
type NetworkProvisioningSummary struct {
3638
TenantNetworkID string
3739
TenantSubnetID string
@@ -897,3 +899,40 @@ func EnsureAmphoraManagementNetwork(
897899
ManagementSubnetGateway: networkParameters.ProviderGateway.String(),
898900
}, nil
899901
}
902+
903+
// GetPredictableIPAM returns a struct describing the available IP range. If the
904+
// IP pool size does not fit in given networkParameters CIDR it will return an
905+
// error instead.
906+
func GetPredictableIPAM(networkParameters *NetworkParameters) (*NADIpam, error) {
907+
predParams := &NADIpam{}
908+
predParams.CIDR = networkParameters.ProviderCIDR
909+
predParams.RangeStart = networkParameters.ProviderAllocationEnd.Next()
910+
endRange := predParams.RangeStart
911+
for i := 0; i < LbProvPredictablePoolSize; i++ {
912+
if !predParams.CIDR.Contains(endRange) {
913+
return nil, fmt.Errorf("predictable IPs: cannot allocate %d IP addresses in %s", LbProvPredictablePoolSize, predParams.CIDR)
914+
}
915+
endRange = endRange.Next()
916+
}
917+
predParams.RangeEnd = endRange
918+
return predParams, nil
919+
}
920+
921+
// GetNextIP picks the next available IP from the range defined by a NADIpam,
922+
// skipping ones that are already used appear as keys in the currentValues map.
923+
func GetNextIP(predParams *NADIpam, currentValues map[string]bool) (string, error) {
924+
candidateAddress := predParams.RangeStart
925+
for alloced := true; alloced; {
926+
927+
if _, ok := currentValues[candidateAddress.String()]; ok {
928+
if candidateAddress == predParams.RangeEnd {
929+
return "", fmt.Errorf("predictable IPs: out of available addresses")
930+
}
931+
candidateAddress = candidateAddress.Next()
932+
} else {
933+
alloced = false
934+
}
935+
}
936+
currentValues[candidateAddress.String()] = true
937+
return candidateAddress.String(), nil
938+
}

pkg/octavia/network_consts.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,9 @@ const (
6464
// LbProvSubnetPoolSize -
6565
LbProvSubnetPoolSize = 25
6666

67+
// LbProvPredictablePoolSize -
68+
LbProvPredictablePoolSize = 25
69+
6770
// IPv4 consts
6871

6972
// TODO(beagles): support IPv6 for the provider network.

templates/octaviaamphoracontroller/bin/octavia_controller_start.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ set -ex
1818
/usr/local/bin/container-scripts/octavia_mgmt_subnet_route.py octavia "$MGMT_CIDR" "$MGMT_GATEWAY"
1919

2020
if [ "$1" = "octavia-health-manager" ]; then
21+
/usr/local/bin/container-scripts/setipalias.py octavia
2122
/usr/local/bin/container-scripts/octavia_hm_advertisement.py octavia
2223
fi
2324

0 commit comments

Comments
 (0)