Skip to content

Commit ab7f485

Browse files
committed
feat(operator): custom probe scripts from configmaps with override option
1 parent 47c15d1 commit ab7f485

19 files changed

+670
-35
lines changed

README.md

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,39 @@ kubectl patch dragonfly dragonfly-sample --type merge -p '{"spec":{"resources":{
9191
9292
To add authentication to the dragonfly pods, you either set the `DFLY_requirepass` environment variable, or add the `--requirepass` argument.
9393
94+
### Customising health-check probe scripts
95+
96+
The operator generates default liveness, readiness, and startup probe scripts and mounts them via ConfigMaps. You can replace any probe with your own script using the `custom*ProbeConfigMap` fields.
97+
98+
Scripts run inside the Dragonfly container and have access to `HEALTHCHECK_PORT` (admin port 9999 — no TLS, no auth).
99+
100+
```yaml
101+
apiVersion: v1
102+
kind: ConfigMap
103+
metadata:
104+
name: dragonfly-sample-probes
105+
namespace: default
106+
data:
107+
liveness-check.sh: |
108+
#!/bin/sh
109+
RESPONSE=$(timeout 4 redis-cli -h localhost -p ${HEALTHCHECK_PORT:-9999} PING 2>/dev/null)
110+
case "$RESPONSE" in
111+
PONG|*LOADING*) exit 0 ;;
112+
*) exit 1 ;;
113+
esac
114+
---
115+
apiVersion: dragonflydb.io/v1alpha1
116+
kind: Dragonfly
117+
metadata:
118+
name: dragonfly-sample
119+
spec:
120+
replicas: 1
121+
customLivenessProbeConfigMap:
122+
name: dragonfly-sample-probes
123+
```
124+
125+
> **Override precedence:** `spec.additionalVolumes` with a matching volume name (`liveness-probe`, `readiness-probe`, `startup-probe`) takes precedence over `custom*ProbeConfigMap`. Do not use both for the same probe.
126+
94127
### Deleting a Dragonfly instance
95128
96129
To delete a Dragonfly instance, you can run

api/v1alpha1/dragonfly_types.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,21 @@ type DragonflySpec struct {
196196
// +optional
197197
// +kubebuilder:validation:Optional
198198
Pdb *PdbSpec `json:"pdb,omitempty"`
199+
200+
// (Optional) Custom ConfigMap with key "liveness-check.sh" to override the default liveness probe.
201+
// +optional
202+
// +kubebuilder:validation:Optional
203+
CustomLivenessProbeConfigMap *corev1.LocalObjectReference `json:"customLivenessProbeConfigMap,omitempty"`
204+
205+
// (Optional) Custom ConfigMap with key "readiness-check.sh" to override the default readiness probe.
206+
// +optional
207+
// +kubebuilder:validation:Optional
208+
CustomReadinessProbeConfigMap *corev1.LocalObjectReference `json:"customReadinessProbeConfigMap,omitempty"`
209+
210+
// (Optional) Custom ConfigMap with key "startup-check.sh" to override the default startup probe.
211+
// +optional
212+
// +kubebuilder:validation:Optional
213+
CustomStartupProbeConfigMap *corev1.LocalObjectReference `json:"customStartupProbeConfigMap,omitempty"`
199214
}
200215

201216
// PdbSpec defines the desired state of the PodDisruptionBudget

api/v1alpha1/zz_generated.deepcopy.go

Lines changed: 15 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

config/crd/bases/dragonflydb.io_dragonflies.yaml

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4461,6 +4461,60 @@ spec:
44614461
type: string
44624462
type: object
44634463
type: object
4464+
customLivenessProbeConfigMap:
4465+
description: |-
4466+
(Optional) Custom ConfigMap for the liveness probe script.
4467+
Must contain key "liveness-check.sh". Overrides the operator-embedded default.
4468+
Note: if spec.additionalVolumes also contains a volume named "liveness-probe",
4469+
it takes precedence over this field. Do not use both for the same probe.
4470+
properties:
4471+
name:
4472+
default: ""
4473+
description: |-
4474+
Name of the referent.
4475+
This field is effectively required, but due to backwards compatibility is
4476+
allowed to be empty. Instances of this type with an empty value here are
4477+
almost certainly wrong.
4478+
More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
4479+
type: string
4480+
type: object
4481+
x-kubernetes-map-type: atomic
4482+
customReadinessProbeConfigMap:
4483+
description: |-
4484+
(Optional) Custom ConfigMap for the readiness probe script.
4485+
Must contain key "readiness-check.sh". Overrides the operator-embedded default.
4486+
Note: if spec.additionalVolumes also contains a volume named "readiness-probe",
4487+
it takes precedence over this field. Do not use both for the same probe.
4488+
properties:
4489+
name:
4490+
default: ""
4491+
description: |-
4492+
Name of the referent.
4493+
This field is effectively required, but due to backwards compatibility is
4494+
allowed to be empty. Instances of this type with an empty value here are
4495+
almost certainly wrong.
4496+
More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
4497+
type: string
4498+
type: object
4499+
x-kubernetes-map-type: atomic
4500+
customStartupProbeConfigMap:
4501+
description: |-
4502+
(Optional) Custom ConfigMap for the startup probe script.
4503+
Must contain key "startup-check.sh". Overrides the operator-embedded default.
4504+
Note: if spec.additionalVolumes also contains a volume named "startup-probe",
4505+
it takes precedence over this field. Do not use both for the same probe.
4506+
properties:
4507+
name:
4508+
default: ""
4509+
description: |-
4510+
Name of the referent.
4511+
This field is effectively required, but due to backwards compatibility is
4512+
allowed to be empty. Instances of this type with an empty value here are
4513+
almost certainly wrong.
4514+
More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
4515+
type: string
4516+
type: object
4517+
x-kubernetes-map-type: atomic
44644518
enableReplicationReadinessGate:
44654519
description: |-
44664520
(Optional) When enabled, adds a custom readiness gate to pods that prevents

config/rbac/role.yaml

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,7 @@ rules:
77
- apiGroups:
88
- ""
99
resources:
10-
- events
11-
verbs:
12-
- create
13-
- patch
14-
- apiGroups:
15-
- ""
16-
resources:
10+
- configmaps
1711
- pods
1812
- services
1913
verbs:
@@ -24,6 +18,13 @@ rules:
2418
- patch
2519
- update
2620
- watch
21+
- apiGroups:
22+
- ""
23+
resources:
24+
- events
25+
verbs:
26+
- create
27+
- patch
2728
- apiGroups:
2829
- ""
2930
resources:

e2e/dragonfly_controller_test.go

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,12 @@ import (
2828
"strings"
2929
"time"
3030

31-
"github.com/redis/go-redis/v9"
3231
resourcesv1 "github.com/dragonflydb/dragonfly-operator/api/v1alpha1"
3332
"github.com/dragonflydb/dragonfly-operator/internal/controller"
3433
"github.com/dragonflydb/dragonfly-operator/internal/resources"
3534
. "github.com/onsi/ginkgo/v2"
3635
. "github.com/onsi/gomega"
36+
"github.com/redis/go-redis/v9"
3737
appsv1 "k8s.io/api/apps/v1"
3838
corev1 "k8s.io/api/core/v1"
3939
apierrors "k8s.io/apimachinery/pkg/api/errors"
@@ -136,10 +136,9 @@ var _ = Describe("Dragonfly Lifecycle tests", Ordered, FlakeAttempts(3), func()
136136
err := k8sClient.Create(ctx, &df)
137137
Expect(err).To(BeNil())
138138

139-
// Wait until Dragonfly object is marked initialized
140-
waitForDragonflyPhase(ctx, k8sClient, name, namespace, controller.PhaseResourcesCreated, 2*time.Minute)
141-
waitForStatefulSetReady(ctx, k8sClient, name, namespace, 2*time.Minute)
142-
139+
// Wait for master election, then for all replicas to be Ready
140+
waitForDragonflyPhase(ctx, k8sClient, name, namespace, controller.PhaseReady, 3*time.Minute)
141+
waitForStatefulSetReady(ctx, k8sClient, name, namespace, 3*time.Minute)
143142
})
144143

145144
var ss appsv1.StatefulSet
@@ -714,8 +713,8 @@ var _ = Describe("Dragonfly tiering test with single replica", Ordered, FlakeAtt
714713
})
715714

716715
It("Resources should exist", func() {
717-
// Wait until Dragonfly object is marked initialized
718-
waitForDragonflyPhase(ctx, k8sClient, name, namespace, controller.PhaseResourcesCreated, 2*time.Minute)
716+
// Wait until Dragonfly object is marked initialized and master is elected
717+
waitForDragonflyPhase(ctx, k8sClient, name, namespace, controller.PhaseReady, 2*time.Minute)
719718
waitForStatefulSetReady(ctx, k8sClient, name, namespace, 2*time.Minute)
720719

721720
// Check for service and statefulset
@@ -833,8 +832,8 @@ var _ = Describe("Dragonfly PVC Test with single replica", Ordered, FlakeAttempt
833832
})
834833

835834
It("Resources should exist", func() {
836-
// Wait until Dragonfly object is marked initialized
837-
waitForDragonflyPhase(ctx, k8sClient, name, namespace, controller.PhaseResourcesCreated, 2*time.Minute)
835+
// Wait until Dragonfly object is marked initialized and master is elected
836+
waitForDragonflyPhase(ctx, k8sClient, name, namespace, controller.PhaseReady, 2*time.Minute)
838837
waitForStatefulSetReady(ctx, k8sClient, name, namespace, 2*time.Minute)
839838

840839
// Check for service and statefulset
@@ -888,6 +887,10 @@ var _ = Describe("Dragonfly PVC Test with single replica", Ordered, FlakeAttempt
888887
// Wait until Dragonfly object is marked initialized
889888
waitForDragonflyPhase(ctx, k8sClient, name, namespace, controller.PhaseReady, 2*time.Minute)
890889
waitForStatefulSetReady(ctx, k8sClient, name, namespace, 2*time.Minute)
890+
// Phase may already be Ready from before deletion; wait explicitly for the
891+
// lifecycle controller to finish master election on the recreated pod.
892+
err = waitForMasterPod(ctx, k8sClient, name, namespace, 2*time.Minute)
893+
Expect(err).To(BeNil())
891894
// check if the pod is created
892895
err = k8sClient.Get(ctx, types.NamespacedName{
893896
Name: fmt.Sprintf("%s-0", name),

e2e/util.go

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,28 @@ func parseTieredEntriesFromInfo(info string) (int64, error) {
6666
return 0, fmt.Errorf("tiered_entries not found")
6767
}
6868

69+
// waitForMasterPod polls until at least one pod with role=master exists. Use this
70+
// after waitForStatefulSetReady to guarantee the lifecycle controller has finished
71+
// master election before the test tries to connect.
72+
func waitForMasterPod(ctx context.Context, c client.Client, name, namespace string, maxDuration time.Duration) error {
73+
ctx, cancel := context.WithTimeout(ctx, maxDuration)
74+
defer cancel()
75+
for {
76+
select {
77+
case <-ctx.Done():
78+
return fmt.Errorf("timed out waiting for master pod for %s", name)
79+
default:
80+
var pods corev1.PodList
81+
if err := c.List(ctx, &pods, client.InNamespace(namespace), client.MatchingLabels{
82+
resources.DragonflyNameLabelKey: name,
83+
resources.RoleLabelKey: resources.Master,
84+
}); err == nil && len(pods.Items) > 0 {
85+
return nil
86+
}
87+
}
88+
}
89+
}
90+
6991
func waitForStatefulSetReady(ctx context.Context, c client.Client, name, namespace string, maxDuration time.Duration) error {
7092
ctx, cancel := context.WithTimeout(ctx, maxDuration)
7193
defer cancel()

internal/controller/dragonfly_controller.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ type DragonflyReconciler struct {
4545
//+kubebuilder:rbac:groups="",resources=services,verbs=get;list;watch;create;update;patch;delete
4646
//+kubebuilder:rbac:groups="",resources=pods,verbs=get;list;watch;create;update;patch;delete
4747
//+kubebuilder:rbac:groups="",resources=events,verbs=create;patch
48+
//+kubebuilder:rbac:groups="",resources=configmaps,verbs=get;list;watch;create;update;patch;delete
4849
//+kubebuilder:rbac:groups=networking.k8s.io,resources=networkpolicies,verbs=get;list;watch;create;update;patch;delete
4950

5051
// Reconcile is part of the main kubernetes reconciliation loop which aims to

internal/controller/dragonfly_instance.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -744,6 +744,12 @@ func resourceSpecsEqual(desired, existing client.Object) bool {
744744
if !reflect.DeepEqual(desired.GetLabels(), existing.GetLabels()) || !reflect.DeepEqual(desired.GetAnnotations(), existing.GetAnnotations()) {
745745
return false
746746
}
747+
// ConfigMaps store content in .Data, not .Spec — compare Data directly.
748+
if cmDesired, ok := desired.(*corev1.ConfigMap); ok {
749+
if cmExisting, ok := existing.(*corev1.ConfigMap); ok {
750+
return reflect.DeepEqual(cmDesired.Data, cmExisting.Data)
751+
}
752+
}
747753
// Compare only the .Spec field using reflection
748754
desiredV := reflect.ValueOf(desired).Elem()
749755
existingV := reflect.ValueOf(existing).Elem()

internal/controller/util_test.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,10 @@ func makePod(name string) corev1.Pod {
3232

3333
func TestSelectMasterCandidate(t *testing.T) {
3434
tests := []struct {
35-
name string
36-
pods []corev1.Pod
37-
readyPods map[string]bool // pod names that are considered ready
38-
wantName string // expected winner; "" means nil result
35+
name string
36+
pods []corev1.Pod
37+
readyPods map[string]bool // pod names that are considered ready
38+
wantName string // expected winner; "" means nil result
3939
}{
4040
{
4141
name: "no pods",

0 commit comments

Comments
 (0)