Skip to content

Commit 745819c

Browse files
authored
fix: prevent non-deterministic reconcile loops (#121)
1 parent 07c019c commit 745819c

File tree

6 files changed

+161
-91
lines changed

6 files changed

+161
-91
lines changed

api/core/v1beta1/connect_types_test.go

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -121,22 +121,22 @@ func TestCreateVolumeFactory_OffHost(t *testing.T) {
121121
fmt.Printf("VolMounts: %+v\n", volMounts)
122122
assert.Len(t, volMounts, 6)
123123

124-
// config volumes
124+
// config volumes (sorted by MountPath within the same volume name)
125125
checkVolumeMount(t, volMounts[0], "config-volume",
126-
"/etc/rstudio-connect/rstudio-connect.gcfg", "rstudio-connect.gcfg", true)
127-
checkVolumeMount(t, volMounts[1], "config-volume",
128-
"/etc/rstudio-connect/runtime.yaml", "runtime.yaml", true)
129-
checkVolumeMount(t, volMounts[2], "config-volume",
130126
"/etc/rstudio-connect/launcher/launcher.kubernetes.profiles.conf",
131127
"launcher.kubernetes.profiles.conf", true)
128+
checkVolumeMount(t, volMounts[1], "config-volume",
129+
"/etc/rstudio-connect/rstudio-connect.gcfg", "rstudio-connect.gcfg", true)
130+
checkVolumeMount(t, volMounts[2], "config-volume",
131+
"/etc/rstudio-connect/runtime.yaml", "runtime.yaml", true)
132132

133-
// template volumes
133+
// template volumes (sorted by MountPath within the same volume name)
134134
checkVolumeMount(t, volMounts[3], "template-config",
135-
"/var/lib/rstudio-connect-launcher/Kubernetes/rstudio-library-templates-data.tpl",
136-
"rstudio-library-templates-data.tpl", true)
137-
checkVolumeMount(t, volMounts[4], "template-config",
138135
"/var/lib/rstudio-connect-launcher/Kubernetes/job.tpl",
139136
"job.tpl", true)
137+
checkVolumeMount(t, volMounts[4], "template-config",
138+
"/var/lib/rstudio-connect-launcher/Kubernetes/rstudio-library-templates-data.tpl",
139+
"rstudio-library-templates-data.tpl", true)
140140
checkVolumeMount(t, volMounts[5], "template-config",
141141
"/var/lib/rstudio-connect-launcher/Kubernetes/service.tpl",
142142
"service.tpl", true)

api/product/secret.go

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"context"
55
"encoding/json"
66
"fmt"
7+
"sort"
78

89
"github.com/aws/aws-sdk-go-v2/aws"
910
"github.com/aws/aws-sdk-go-v2/config"
@@ -58,10 +59,15 @@ func (t *TestSecretProvider) GetSecretWithFallback(key string) string {
5859
}
5960

6061
func mapToJmesPath(input map[string]string) (jmes []secretObjectJmesPath) {
61-
for k, v := range input {
62+
keys := make([]string, 0, len(input))
63+
for k := range input {
64+
keys = append(keys, k)
65+
}
66+
sort.Strings(keys)
67+
for _, k := range keys {
6268
jmes = append(jmes, secretObjectJmesPath{
6369
// ensure this path is quoted appropriately...
64-
Path: fmt.Sprintf("\"%s\"", v),
70+
Path: fmt.Sprintf("\"%s\"", input[k]),
6571
ObjectAlias: k,
6672
})
6773
}
@@ -83,11 +89,22 @@ func generateSecretObjectYaml(name string, keys map[string]string) (string, erro
8389
}
8490

8591
func generateSecretObjects(p KubernetesOwnerProvider, secrets map[string]map[string]string) (output []*v1.SecretObject) {
86-
for k, v := range secrets {
92+
outerKeys := make([]string, 0, len(secrets))
93+
for k := range secrets {
94+
outerKeys = append(outerKeys, k)
95+
}
96+
sort.Strings(outerKeys)
97+
for _, k := range outerKeys {
98+
v := secrets[k]
99+
innerKeys := make([]string, 0, len(v))
100+
for dk := range v {
101+
innerKeys = append(innerKeys, dk)
102+
}
103+
sort.Strings(innerKeys)
87104
var objData []*v1.SecretObjectData
88-
for dk, dv := range v {
105+
for _, dk := range innerKeys {
89106
objData = append(objData, &v1.SecretObjectData{
90-
ObjectName: dv,
107+
ObjectName: v[dk],
91108
Key: dk,
92109
})
93110
}

api/product/volumes.go

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,14 @@ type MultiContainerVolumeFactory struct {
2929
}
3030

3131
func (m *MultiContainerVolumeFactory) InitContainers() []corev1.Container {
32-
containers := []corev1.Container{}
33-
for k, s := range m.InitContainerDefs {
32+
keys := make([]string, 0, len(m.InitContainerDefs))
33+
for k := range m.InitContainerDefs {
34+
keys = append(keys, k)
35+
}
36+
sort.Strings(keys)
37+
containers := make([]corev1.Container, 0, len(keys))
38+
for _, k := range keys {
39+
s := m.InitContainerDefs[k]
3440
containers = append(containers, corev1.Container{
3541
Name: k,
3642
Image: s.Image,
@@ -45,8 +51,14 @@ func (m *MultiContainerVolumeFactory) InitContainers() []corev1.Container {
4551
}
4652

4753
func (m *MultiContainerVolumeFactory) Sidecars() []corev1.Container {
48-
containers := []corev1.Container{}
49-
for k, s := range m.SidecarDefs {
54+
keys := make([]string, 0, len(m.SidecarDefs))
55+
for k := range m.SidecarDefs {
56+
keys = append(keys, k)
57+
}
58+
sort.Strings(keys)
59+
containers := make([]corev1.Container, 0, len(keys))
60+
for _, k := range keys {
61+
s := m.SidecarDefs[k]
5062
containers = append(containers, corev1.Container{
5163
Name: k,
5264
Image: s.Image,
@@ -114,7 +126,10 @@ func (v *VolumeFactory) VolumeMounts() []corev1.VolumeMount {
114126
}
115127
}
116128
sort.Slice(vms, func(i, j int) bool {
117-
return vms[i].Name < vms[j].Name
129+
if vms[i].Name != vms[j].Name {
130+
return vms[i].Name < vms[j].Name
131+
}
132+
return vms[i].MountPath < vms[j].MountPath
118133
})
119134
return vms
120135
}

internal/controller/core/site_controller_workbench.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package core
33
import (
44
"context"
55
"fmt"
6+
"sort"
67
"strings"
78

89
"github.com/go-logr/logr"
@@ -579,6 +580,7 @@ func getResourceProfileKeys(resourceProfiles map[string]*v1beta1.WorkbenchLaunch
579580
for key := range resourceProfiles {
580581
keys = append(keys, key)
581582
}
583+
sort.Strings(keys)
582584
return keys
583585
}
584586

internal/controller/core/subdir.go

Lines changed: 99 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"context"
55
"fmt"
66
"math/rand"
7+
"strings"
78

89
"github.com/posit-dev/team-operator/api/core/v1beta1"
910
"github.com/posit-dev/team-operator/api/product"
@@ -93,96 +94,123 @@ func (r *SiteReconciler) provisionSubDirectoryCreator(ctx context.Context, req c
9394
return err
9495
}
9596

96-
// TODO: some way to ensure that we do not spawn _too many_ jobs...?
97-
9897
if !site.Spec.VolumeSubdirJobOff {
99-
args := []string{
100-
fmt.Sprintf("/mnt/%s/connect", site.Name),
101-
fmt.Sprintf("/mnt/%s/workbench", site.Name),
102-
fmt.Sprintf("/mnt/%s/workbench-shared-storage", site.Name),
98+
// Skip Job creation if a successfully completed subdir Job already exists.
99+
// The Job is idempotent (mkdir -p), so re-running it is harmless but wasteful
100+
var existingJobs batchv1.JobList
101+
if err := r.List(ctx, &existingJobs,
102+
client.InNamespace(req.Namespace),
103+
client.MatchingLabels(site.KubernetesLabels()),
104+
); err != nil {
105+
l.Error(err, "Error listing existing subdir jobs")
106+
return err
103107
}
104-
if site.Spec.SharedDirectory != "" {
105-
args = append(args, fmt.Sprintf("/mnt/%s/shared", site.Name))
108+
hasCompletedJob := false
109+
for i := range existingJobs.Items {
110+
job := &existingJobs.Items[i]
111+
if strings.HasPrefix(job.Name, provisionerName+"-") {
112+
for _, cond := range job.Status.Conditions {
113+
if cond.Type == batchv1.JobComplete && cond.Status == v1.ConditionTrue {
114+
hasCompletedJob = true
115+
break
116+
}
117+
}
118+
}
119+
if hasCompletedJob {
120+
break
121+
}
106122
}
107-
provisionerNameTemp := provisionerName + "-" + RandStringBytes(6)
123+
if hasCompletedJob {
124+
l.V(1).Info("Subdir provisioning job already completed; skipping")
125+
} else {
126+
args := []string{
127+
fmt.Sprintf("/mnt/%s/connect", site.Name),
128+
fmt.Sprintf("/mnt/%s/workbench", site.Name),
129+
fmt.Sprintf("/mnt/%s/workbench-shared-storage", site.Name),
130+
}
131+
if site.Spec.SharedDirectory != "" {
132+
args = append(args, fmt.Sprintf("/mnt/%s/shared", site.Name))
133+
}
134+
provisionerNameTemp := provisionerName + "-" + RandStringBytes(6)
108135

109-
provisionerJob := &batchv1.Job{
110-
ObjectMeta: metav1.ObjectMeta{
111-
Name: provisionerNameTemp,
112-
Namespace: req.Namespace,
113-
},
114-
}
136+
provisionerJob := &batchv1.Job{
137+
ObjectMeta: metav1.ObjectMeta{
138+
Name: provisionerNameTemp,
139+
Namespace: req.Namespace,
140+
},
141+
}
115142

116-
if _, err := internal.CreateOrUpdateResource(ctx, r.Client, r.Scheme, l, provisionerJob, site, func() error {
117-
provisionerJob.Labels = site.KubernetesLabels()
118-
provisionerJob.Spec = batchv1.JobSpec{
119-
// 2 hours to live
120-
TTLSecondsAfterFinished: ptr.To(int32(2 * 60 * 60)),
121-
Template: v1.PodTemplateSpec{
122-
Spec: v1.PodSpec{
123-
EnableServiceLinks: ptr.To(false),
124-
RestartPolicy: v1.RestartPolicyOnFailure,
125-
Containers: []v1.Container{
126-
{
127-
Name: "subdir-maker",
128-
Image: "ghcr.io/rstudio/rstudio-workbench-preview:jammy-daily",
129-
Command: []string{
130-
"/subdir-provisioner.sh",
131-
},
132-
Args: args,
133-
VolumeMounts: []v1.VolumeMount{
134-
{
135-
Name: "exec-script",
136-
ReadOnly: false,
137-
MountPath: "/subdir-provisioner.sh",
138-
SubPath: "subdir-provisioner.sh",
143+
if _, err := internal.CreateOrUpdateResource(ctx, r.Client, r.Scheme, l, provisionerJob, site, func() error {
144+
provisionerJob.Labels = site.KubernetesLabels()
145+
provisionerJob.Spec = batchv1.JobSpec{
146+
// 2 hours to live
147+
TTLSecondsAfterFinished: ptr.To(int32(2 * 60 * 60)),
148+
Template: v1.PodTemplateSpec{
149+
Spec: v1.PodSpec{
150+
EnableServiceLinks: ptr.To(false),
151+
RestartPolicy: v1.RestartPolicyOnFailure,
152+
Containers: []v1.Container{
153+
{
154+
Name: "subdir-maker",
155+
Image: "ghcr.io/rstudio/rstudio-workbench-preview:jammy-daily",
156+
Command: []string{
157+
"/subdir-provisioner.sh",
139158
},
140-
{
141-
Name: "data-volume",
142-
ReadOnly: false,
143-
MountPath: "/mnt/",
159+
Args: args,
160+
VolumeMounts: []v1.VolumeMount{
161+
{
162+
Name: "exec-script",
163+
ReadOnly: false,
164+
MountPath: "/subdir-provisioner.sh",
165+
SubPath: "subdir-provisioner.sh",
166+
},
167+
{
168+
Name: "data-volume",
169+
ReadOnly: false,
170+
MountPath: "/mnt/",
171+
},
144172
},
145173
},
146174
},
147-
},
148-
SecurityContext: &v1.PodSecurityContext{
149-
RunAsUser: ptr.To(int64(0)),
150-
},
151-
Volumes: []v1.Volume{
152-
{
153-
Name: "exec-script",
154-
VolumeSource: v1.VolumeSource{
155-
ConfigMap: &v1.ConfigMapVolumeSource{
156-
LocalObjectReference: v1.LocalObjectReference{
157-
Name: provisionerName,
158-
},
159-
Items: []v1.KeyToPath{
160-
{
161-
Key: "subdir-provisioner.sh",
162-
Path: "subdir-provisioner.sh",
175+
SecurityContext: &v1.PodSecurityContext{
176+
RunAsUser: ptr.To(int64(0)),
177+
},
178+
Volumes: []v1.Volume{
179+
{
180+
Name: "exec-script",
181+
VolumeSource: v1.VolumeSource{
182+
ConfigMap: &v1.ConfigMapVolumeSource{
183+
LocalObjectReference: v1.LocalObjectReference{
184+
Name: provisionerName,
163185
},
186+
Items: []v1.KeyToPath{
187+
{
188+
Key: "subdir-provisioner.sh",
189+
Path: "subdir-provisioner.sh",
190+
},
191+
},
192+
DefaultMode: ptr.To(product.MustParseOctal("755")),
164193
},
165-
DefaultMode: ptr.To(product.MustParseOctal("755")),
166194
},
167195
},
168-
},
169-
{
170-
Name: "data-volume",
171-
VolumeSource: v1.VolumeSource{
172-
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
173-
ClaimName: provisionerName,
174-
ReadOnly: false,
196+
{
197+
Name: "data-volume",
198+
VolumeSource: v1.VolumeSource{
199+
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
200+
ClaimName: provisionerName,
201+
ReadOnly: false,
202+
},
175203
},
176204
},
177205
},
178206
},
179207
},
180-
},
208+
}
209+
return nil
210+
}); err != nil {
211+
l.Error(err, "Error creating provisioner job")
212+
return err
181213
}
182-
return nil
183-
}); err != nil {
184-
l.Error(err, "Error creating provisioner job")
185-
return err
186214
}
187215
}
188216
return nil

internal/controller/core/workbench.go

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -653,7 +653,15 @@ func (r *WorkbenchReconciler) ensureDeployedService(ctx context.Context, req ctr
653653
if _, err := internal.CreateOrUpdateResource(ctx, r.Client, r.Scheme, l, secret, w, func() error {
654654
secret.Labels = w.KubernetesLabels()
655655
secret.Immutable = nil
656-
secret.StringData = secretData
656+
// Use Data ([]byte) instead of StringData (string) to avoid a spurious
657+
// diff on every reconcile. StringData is write-only: the API server
658+
// converts it to Data on create, and returns StringData as nil on read.
659+
// CreateOrUpdate compares the pre-mutation snapshot (StringData=nil) with
660+
// the post-mutation object (StringData set), always seeing a diff.
661+
secret.Data = make(map[string][]byte, len(secretData))
662+
for k, v := range secretData {
663+
secret.Data[k] = []byte(v)
664+
}
657665
secret.Type = "Opaque"
658666
return nil
659667
}); err != nil {

0 commit comments

Comments
 (0)