Skip to content

Commit 72b0edd

Browse files
Merge pull request #228 from numtide/fix/multiadmin-web-env-vars-and-multigateway-service
feat(cluster-handler): add multigateway global service and fix env vars
2 parents d12c2f8 + 78d40ee commit 72b0edd

File tree

13 files changed

+656
-114
lines changed

13 files changed

+656
-114
lines changed

pkg/cluster-handler/controller/multigrescluster/builders_global.go

Lines changed: 50 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -255,9 +255,13 @@ func BuildMultiAdminWebDeployment(
255255
Name: "multiadmin-web",
256256
Image: string(cluster.Spec.Images.MultiAdminWeb),
257257
Env: []corev1.EnvVar{
258+
{
259+
Name: "MULTIADMIN_API_URL",
260+
Value: fmt.Sprintf("http://%s-multiadmin:18000", cluster.Name),
261+
},
258262
{
259263
Name: "POSTGRES_HOST",
260-
Value: "multigateway",
264+
Value: fmt.Sprintf("%s-multigateway", cluster.Name),
261265
},
262266
{
263267
Name: "POSTGRES_PORT",
@@ -349,3 +353,48 @@ func BuildMultiAdminWebService(
349353

350354
return svc, nil
351355
}
356+
357+
// BuildMultiGatewayGlobalService constructs a cluster-level Service that selects
358+
// all multigateway pods across all cells. This provides a stable DNS name for
359+
// multiadmin-web to connect to PostgreSQL via any available multigateway,
360+
// regardless of which cells exist.
361+
func BuildMultiGatewayGlobalService(
362+
cluster *multigresv1alpha1.MultigresCluster,
363+
scheme *runtime.Scheme,
364+
) (*corev1.Service, error) {
365+
labels := metadata.BuildStandardLabels(cluster.Name, metadata.ComponentMultiGateway)
366+
metadata.AddClusterLabel(labels, cluster.Name)
367+
368+
// The selector intentionally omits the cell label so it matches
369+
// multigateway pods from all cells in this cluster.
370+
selector := map[string]string{
371+
metadata.LabelAppComponent: metadata.ComponentMultiGateway,
372+
metadata.LabelAppInstance: cluster.Name,
373+
}
374+
375+
svc := &corev1.Service{
376+
ObjectMeta: metav1.ObjectMeta{
377+
Name: fmt.Sprintf("%s-multigateway", cluster.Name),
378+
Namespace: cluster.Namespace,
379+
Labels: labels,
380+
},
381+
Spec: corev1.ServiceSpec{
382+
Selector: selector,
383+
Type: corev1.ServiceTypeClusterIP,
384+
Ports: []corev1.ServicePort{
385+
{
386+
Name: "postgres",
387+
Port: 15432,
388+
TargetPort: intstr.FromString("postgres"),
389+
Protocol: corev1.ProtocolTCP,
390+
},
391+
},
392+
},
393+
}
394+
395+
if err := controllerutil.SetControllerReference(cluster, svc, scheme); err != nil {
396+
return nil, err
397+
}
398+
399+
return svc, nil
400+
}

pkg/cluster-handler/controller/multigrescluster/builders_global_test.go

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package multigrescluster
22

33
import (
4+
"fmt"
45
"testing"
56

67
multigresv1alpha1 "github.com/numtide/multigres-operator/api/v1alpha1"
@@ -209,6 +210,31 @@ func TestBuildMultiAdminWebDeployment(t *testing.T) {
209210
}
210211
}
211212

213+
// Verify env vars
214+
envVars := got.Spec.Template.Spec.Containers[0].Env
215+
wantEnv := map[string]string{
216+
"MULTIADMIN_API_URL": fmt.Sprintf("http://%s-multiadmin:18000", cluster.Name),
217+
"POSTGRES_HOST": fmt.Sprintf("%s-multigateway", cluster.Name),
218+
"POSTGRES_PORT": "15432",
219+
"POSTGRES_DATABASE": "postgres",
220+
"POSTGRES_USER": "postgres",
221+
}
222+
for wantName, wantValue := range wantEnv {
223+
found := false
224+
for _, ev := range envVars {
225+
if ev.Name == wantName {
226+
found = true
227+
if ev.Value != wantValue {
228+
t.Errorf("Env %s = %q, want %q", wantName, ev.Value, wantValue)
229+
}
230+
break
231+
}
232+
}
233+
if !found {
234+
t.Errorf("Missing env var %s", wantName)
235+
}
236+
}
237+
212238
// Verify Selector does NOT contain mutable labels
213239
selector := got.Spec.Selector.MatchLabels
214240
if _, ok := selector["app.kubernetes.io/name"]; ok {
@@ -310,3 +336,64 @@ func TestBuildMultiAdminService(t *testing.T) {
310336
}
311337
})
312338
}
339+
340+
func TestBuildMultiGatewayGlobalService(t *testing.T) {
341+
scheme := runtime.NewScheme()
342+
_ = multigresv1alpha1.AddToScheme(scheme)
343+
_ = corev1.AddToScheme(scheme)
344+
345+
cluster := &multigresv1alpha1.MultigresCluster{
346+
ObjectMeta: metav1.ObjectMeta{
347+
Name: "my-cluster",
348+
Namespace: "default",
349+
UID: "cluster-uid",
350+
},
351+
}
352+
353+
t.Run("Success", func(t *testing.T) {
354+
got, err := BuildMultiGatewayGlobalService(cluster, scheme)
355+
if err != nil {
356+
t.Fatalf("BuildMultiGatewayGlobalService() error = %v", err)
357+
}
358+
359+
if got.Name != "my-cluster-multigateway" {
360+
t.Errorf("Name = %v, want %v", got.Name, "my-cluster-multigateway")
361+
}
362+
363+
// Verify selector targets all multigateways for this cluster (no cell label)
364+
selector := got.Spec.Selector
365+
if selector["app.kubernetes.io/component"] != "multigateway" {
366+
t.Errorf("Selector component = %v, want multigateway", selector["app.kubernetes.io/component"])
367+
}
368+
if selector["app.kubernetes.io/instance"] != "my-cluster" {
369+
t.Errorf("Selector instance = %v, want my-cluster", selector["app.kubernetes.io/instance"])
370+
}
371+
if _, hasCell := selector["multigres.com/cell"]; hasCell {
372+
t.Error("Selector must NOT contain cell label to match all cells")
373+
}
374+
375+
// Verify port
376+
if len(got.Spec.Ports) != 1 {
377+
t.Fatalf("Ports count = %d, want 1", len(got.Spec.Ports))
378+
}
379+
if got.Spec.Ports[0].Port != 15432 {
380+
t.Errorf("Port = %d, want 15432", got.Spec.Ports[0].Port)
381+
}
382+
if got.Spec.Ports[0].Name != "postgres" {
383+
t.Errorf("Port name = %v, want postgres", got.Spec.Ports[0].Name)
384+
}
385+
386+
// Verify OwnerReference
387+
if len(got.OwnerReferences) != 1 {
388+
t.Errorf("OwnerReferences count = %v, want 1", len(got.OwnerReferences))
389+
}
390+
})
391+
392+
t.Run("ControllerRefError", func(t *testing.T) {
393+
emptyScheme := runtime.NewScheme()
394+
_, err := BuildMultiGatewayGlobalService(cluster, emptyScheme)
395+
if err == nil {
396+
t.Error("Expected error due to missing scheme types, got nil")
397+
}
398+
})
399+
}

pkg/cluster-handler/controller/multigrescluster/integration_test.go

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -360,7 +360,8 @@ func TestMultigresCluster_HappyPath(t *testing.T) {
360360
},
361361
Resources: resolver.DefaultResourcesAdminWeb(),
362362
Env: []corev1.EnvVar{
363-
{Name: "POSTGRES_HOST", Value: "multigateway"},
363+
{Name: "MULTIADMIN_API_URL", Value: "http://" + clusterName + "-multiadmin:18000"},
364+
{Name: "POSTGRES_HOST", Value: clusterName + "-multigateway"},
364365
{Name: "POSTGRES_PORT", Value: "15432"},
365366
{Name: "POSTGRES_DATABASE", Value: "postgres"},
366367
{Name: "POSTGRES_USER", Value: "postgres"},
@@ -643,7 +644,8 @@ func TestMultigresCluster_HappyPath(t *testing.T) {
643644
},
644645
Resources: resolver.DefaultResourcesAdminWeb(),
645646
Env: []corev1.EnvVar{
646-
{Name: "POSTGRES_HOST", Value: "multigateway"},
647+
{Name: "MULTIADMIN_API_URL", Value: "http://minimal-cluster-multiadmin:18000"},
648+
{Name: "POSTGRES_HOST", Value: "minimal-cluster-multigateway"},
647649
{Name: "POSTGRES_PORT", Value: "15432"},
648650
{Name: "POSTGRES_DATABASE", Value: "postgres"},
649651
{Name: "POSTGRES_USER", Value: "postgres"},
@@ -923,7 +925,8 @@ func TestMultigresCluster_HappyPath(t *testing.T) {
923925
},
924926
Resources: resolver.DefaultResourcesAdminWeb(),
925927
Env: []corev1.EnvVar{
926-
{Name: "POSTGRES_HOST", Value: "multigateway"},
928+
{Name: "MULTIADMIN_API_URL", Value: "http://lazy-cluster-multiadmin:18000"},
929+
{Name: "POSTGRES_HOST", Value: "lazy-cluster-multigateway"},
927930
{Name: "POSTGRES_PORT", Value: "15432"},
928931
{Name: "POSTGRES_DATABASE", Value: "postgres"},
929932
{Name: "POSTGRES_USER", Value: "postgres"},

pkg/cluster-handler/controller/multigrescluster/multigrescluster_controller.go

Lines changed: 86 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package multigrescluster
33
import (
44
"context"
55
"fmt"
6+
"slices"
67
"time"
78

89
appsv1 "k8s.io/api/apps/v1"
@@ -23,8 +24,11 @@ import (
2324
multigresv1alpha1 "github.com/numtide/multigres-operator/api/v1alpha1"
2425
"github.com/numtide/multigres-operator/pkg/monitoring"
2526
"github.com/numtide/multigres-operator/pkg/resolver"
27+
"github.com/numtide/multigres-operator/pkg/util/metadata"
2628
)
2729

30+
const finalizerName = "multigres.com/cluster-cleanup"
31+
2832
// MultigresClusterReconciler reconciles a MultigresCluster object.
2933
type MultigresClusterReconciler struct {
3034
client.Client
@@ -40,6 +44,7 @@ type MultigresClusterReconciler struct {
4044
// +kubebuilder:rbac:groups=multigres.com,resources=multigresclusters/finalizers,verbs=update
4145
// +kubebuilder:rbac:groups=multigres.com,resources=coretemplates;celltemplates;shardtemplates,verbs=get;list;watch
4246
// +kubebuilder:rbac:groups=multigres.com,resources=cells;tablegroups;toposervers,verbs=get;list;watch;create;update;patch;delete
47+
// +kubebuilder:rbac:groups=multigres.com,resources=shards,verbs=get;list;watch
4348
// +kubebuilder:rbac:groups=apps,resources=deployments,verbs=get;list;watch;create;update;patch;delete
4449
func (r *MultigresClusterReconciler) Reconcile(
4550
ctx context.Context,
@@ -107,9 +112,19 @@ func (r *MultigresClusterReconciler) Reconcile(
107112
}
108113
}
109114

110-
// If being deleted, let Kubernetes GC handle cleanup
115+
// Add finalizer on first reconcile to guarantee ordered deletion.
116+
// We do not return early here because GenerationChangedPredicate would
117+
// filter out the metadata-only update, preventing child resource creation.
118+
if !slices.Contains(cluster.Finalizers, finalizerName) {
119+
cluster.Finalizers = append(cluster.Finalizers, finalizerName)
120+
if err := r.Update(ctx, cluster); err != nil {
121+
l.Error(err, "Failed to add finalizer")
122+
return ctrl.Result{}, err
123+
}
124+
}
125+
111126
if !cluster.DeletionTimestamp.IsZero() {
112-
return ctrl.Result{}, nil
127+
return r.handleDeletion(ctx, cluster)
113128
}
114129

115130
{
@@ -199,6 +214,75 @@ func (r *MultigresClusterReconciler) Reconcile(
199214
return ctrl.Result{}, nil
200215
}
201216

217+
// handleDeletion orchestrates phased deletion of the MultigresCluster.
218+
// It deletes Cells and TableGroups first so their data-handler finalizers
219+
// can run against the still-live topo servers. Once all Cells and Shards
220+
// are fully removed, it removes our finalizer, allowing Kubernetes GC
221+
// to clean up the remaining resources (topo servers, deployments).
222+
func (r *MultigresClusterReconciler) handleDeletion(
223+
ctx context.Context,
224+
cluster *multigresv1alpha1.MultigresCluster,
225+
) (ctrl.Result, error) {
226+
l := log.FromContext(ctx)
227+
clusterLabels := client.MatchingLabels{metadata.LabelMultigresCluster: cluster.Name}
228+
ns := client.InNamespace(cluster.Namespace)
229+
230+
// Delete all Cells owned by this cluster.
231+
cells := &multigresv1alpha1.CellList{}
232+
if err := r.List(ctx, cells, ns, clusterLabels); err != nil {
233+
return ctrl.Result{}, fmt.Errorf("failed to list cells: %w", err)
234+
}
235+
for i := range cells.Items {
236+
if cells.Items[i].DeletionTimestamp.IsZero() {
237+
if err := r.Delete(ctx, &cells.Items[i]); err != nil && !errors.IsNotFound(err) {
238+
return ctrl.Result{}, fmt.Errorf("failed to delete cell %q: %w", cells.Items[i].Name, err)
239+
}
240+
l.Info("Initiated cell deletion", "cell", cells.Items[i].Name)
241+
}
242+
}
243+
244+
// Delete all TableGroups owned by this cluster (cascades to Shards).
245+
tableGroups := &multigresv1alpha1.TableGroupList{}
246+
if err := r.List(ctx, tableGroups, ns, clusterLabels); err != nil {
247+
return ctrl.Result{}, fmt.Errorf("failed to list tablegroups: %w", err)
248+
}
249+
for i := range tableGroups.Items {
250+
if tableGroups.Items[i].DeletionTimestamp.IsZero() {
251+
if err := r.Delete(ctx, &tableGroups.Items[i]); err != nil && !errors.IsNotFound(err) {
252+
return ctrl.Result{}, fmt.Errorf("failed to delete tablegroup %q: %w", tableGroups.Items[i].Name, err)
253+
}
254+
l.Info("Initiated tablegroup deletion", "tablegroup", tableGroups.Items[i].Name)
255+
}
256+
}
257+
258+
// Check if any Cells or Shards still exist (waiting for data-handler finalizer processing).
259+
shards := &multigresv1alpha1.ShardList{}
260+
if err := r.List(ctx, shards, ns, clusterLabels); err != nil {
261+
return ctrl.Result{}, fmt.Errorf("failed to list shards: %w", err)
262+
}
263+
264+
remaining := len(cells.Items) + len(shards.Items)
265+
if remaining > 0 {
266+
l.Info("Waiting for data-handler finalizers",
267+
"remainingCells", len(cells.Items),
268+
"remainingShards", len(shards.Items),
269+
)
270+
return ctrl.Result{RequeueAfter: 2 * time.Second}, nil
271+
}
272+
273+
// All Cells and Shards are gone — safe to remove our finalizer.
274+
cluster.Finalizers = slices.DeleteFunc(cluster.Finalizers, func(s string) bool {
275+
return s == finalizerName
276+
})
277+
if err := r.Update(ctx, cluster); err != nil {
278+
return ctrl.Result{}, fmt.Errorf("failed to remove finalizer: %w", err)
279+
}
280+
281+
l.Info("Cluster cleanup complete, finalizer removed")
282+
r.Recorder.Event(cluster, "Normal", "CleanupComplete", "All data-handler resources cleaned up")
283+
return ctrl.Result{}, nil
284+
}
285+
202286
// SetupWithManager sets up the controller with the Manager.
203287
func (r *MultigresClusterReconciler) SetupWithManager(
204288
mgr ctrl.Manager,

pkg/cluster-handler/controller/multigrescluster/reconcile_global.go

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,10 @@ import (
1414

1515
// Builder function variables to allow mocking in tests
1616
var (
17-
buildMultiAdminService = BuildMultiAdminService
18-
buildMultiAdminWebDeployment = BuildMultiAdminWebDeployment
19-
buildMultiAdminWebService = BuildMultiAdminWebService
17+
buildMultiAdminService = BuildMultiAdminService
18+
buildMultiAdminWebDeployment = BuildMultiAdminWebDeployment
19+
buildMultiAdminWebService = BuildMultiAdminWebService
20+
buildMultiGatewayGlobalService = BuildMultiGatewayGlobalService
2021
)
2122

2223
func (r *MultigresClusterReconciler) reconcileGlobalComponents(
@@ -231,5 +232,22 @@ func (r *MultigresClusterReconciler) reconcileMultiAdminWeb(
231232
return fmt.Errorf("failed to apply multiadmin-web service: %w", err)
232233
}
233234

235+
// 3. Reconcile global multigateway Service
236+
desiredGwSvc, err := buildMultiGatewayGlobalService(cluster, r.Scheme)
237+
if err != nil {
238+
return fmt.Errorf("failed to build global multigateway service: %w", err)
239+
}
240+
241+
desiredGwSvc.SetGroupVersionKind(corev1.SchemeGroupVersion.WithKind("Service"))
242+
if err := r.Patch(
243+
ctx,
244+
desiredGwSvc,
245+
client.Apply,
246+
client.ForceOwnership,
247+
client.FieldOwner("multigres-operator"),
248+
); err != nil {
249+
return fmt.Errorf("failed to apply global multigateway service: %w", err)
250+
}
251+
234252
return nil
235253
}

0 commit comments

Comments
 (0)