Skip to content

Commit b33e542

Browse files
committed
fix: stop endless subdir job spawning
1 parent 2388bff commit b33e542

File tree

1 file changed

+99
-71
lines changed

1 file changed

+99
-71
lines changed

internal/controller/core/subdir.go

Lines changed: 99 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -93,96 +93,124 @@ func (r *SiteReconciler) provisionSubDirectoryCreator(ctx context.Context, req c
9393
return err
9494
}
9595

96-
// TODO: some way to ensure that we do not spawn _too many_ jobs...?
97-
9896
if !site.Spec.VolumeSubdirJobOff {
99-
args := []string{
100-
fmt.Sprintf("/mnt/%s/connect", site.Name),
101-
fmt.Sprintf("/mnt/%s/workbench", site.Name),
102-
fmt.Sprintf("/mnt/%s/workbench-shared-storage", site.Name),
97+
// Skip Job creation if a successfully completed subdir Job already exists.
98+
// The Job is idempotent (mkdir -p), so re-running it is harmless but wasteful —
99+
// previous code created a new Job with a random suffix on every reconcile.
100+
var existingJobs batchv1.JobList
101+
if err := r.List(ctx, &existingJobs,
102+
client.InNamespace(req.Namespace),
103+
client.MatchingLabels(site.KubernetesLabels()),
104+
); err != nil {
105+
l.Error(err, "Error listing existing subdir jobs")
106+
return err
103107
}
104-
if site.Spec.SharedDirectory != "" {
105-
args = append(args, fmt.Sprintf("/mnt/%s/shared", site.Name))
108+
hasCompletedJob := false
109+
for i := range existingJobs.Items {
110+
job := &existingJobs.Items[i]
111+
if len(job.Name) > len(provisionerName) && job.Name[:len(provisionerName)+1] == provisionerName+"-" {
112+
for _, cond := range job.Status.Conditions {
113+
if cond.Type == batchv1.JobComplete && cond.Status == v1.ConditionTrue {
114+
hasCompletedJob = true
115+
break
116+
}
117+
}
118+
}
119+
if hasCompletedJob {
120+
break
121+
}
106122
}
107-
provisionerNameTemp := provisionerName + "-" + RandStringBytes(6)
123+
if hasCompletedJob {
124+
l.V(1).Info("Subdir provisioning job already completed; skipping")
125+
} else {
126+
args := []string{
127+
fmt.Sprintf("/mnt/%s/connect", site.Name),
128+
fmt.Sprintf("/mnt/%s/workbench", site.Name),
129+
fmt.Sprintf("/mnt/%s/workbench-shared-storage", site.Name),
130+
}
131+
if site.Spec.SharedDirectory != "" {
132+
args = append(args, fmt.Sprintf("/mnt/%s/shared", site.Name))
133+
}
134+
provisionerNameTemp := provisionerName + "-" + RandStringBytes(6)
108135

109-
provisionerJob := &batchv1.Job{
110-
ObjectMeta: metav1.ObjectMeta{
111-
Name: provisionerNameTemp,
112-
Namespace: req.Namespace,
113-
},
114-
}
136+
provisionerJob := &batchv1.Job{
137+
ObjectMeta: metav1.ObjectMeta{
138+
Name: provisionerNameTemp,
139+
Namespace: req.Namespace,
140+
},
141+
}
115142

116-
if _, err := internal.CreateOrUpdateResource(ctx, r.Client, r.Scheme, l, provisionerJob, site, func() error {
117-
provisionerJob.Labels = site.KubernetesLabels()
118-
provisionerJob.Spec = batchv1.JobSpec{
119-
// 2 hours to live
120-
TTLSecondsAfterFinished: ptr.To(int32(2 * 60 * 60)),
121-
Template: v1.PodTemplateSpec{
122-
Spec: v1.PodSpec{
123-
EnableServiceLinks: ptr.To(false),
124-
RestartPolicy: v1.RestartPolicyOnFailure,
125-
Containers: []v1.Container{
126-
{
127-
Name: "subdir-maker",
128-
Image: "ghcr.io/rstudio/rstudio-workbench-preview:jammy-daily",
129-
Command: []string{
130-
"/subdir-provisioner.sh",
131-
},
132-
Args: args,
133-
VolumeMounts: []v1.VolumeMount{
134-
{
135-
Name: "exec-script",
136-
ReadOnly: false,
137-
MountPath: "/subdir-provisioner.sh",
138-
SubPath: "subdir-provisioner.sh",
143+
if _, err := internal.CreateOrUpdateResource(ctx, r.Client, r.Scheme, l, provisionerJob, site, func() error {
144+
provisionerJob.Labels = site.KubernetesLabels()
145+
provisionerJob.Spec = batchv1.JobSpec{
146+
// 2 hours to live
147+
TTLSecondsAfterFinished: ptr.To(int32(2 * 60 * 60)),
148+
Template: v1.PodTemplateSpec{
149+
Spec: v1.PodSpec{
150+
EnableServiceLinks: ptr.To(false),
151+
RestartPolicy: v1.RestartPolicyOnFailure,
152+
Containers: []v1.Container{
153+
{
154+
Name: "subdir-maker",
155+
Image: "ghcr.io/rstudio/rstudio-workbench-preview:jammy-daily",
156+
Command: []string{
157+
"/subdir-provisioner.sh",
139158
},
140-
{
141-
Name: "data-volume",
142-
ReadOnly: false,
143-
MountPath: "/mnt/",
159+
Args: args,
160+
VolumeMounts: []v1.VolumeMount{
161+
{
162+
Name: "exec-script",
163+
ReadOnly: false,
164+
MountPath: "/subdir-provisioner.sh",
165+
SubPath: "subdir-provisioner.sh",
166+
},
167+
{
168+
Name: "data-volume",
169+
ReadOnly: false,
170+
MountPath: "/mnt/",
171+
},
144172
},
145173
},
146174
},
147-
},
148-
SecurityContext: &v1.PodSecurityContext{
149-
RunAsUser: ptr.To(int64(0)),
150-
},
151-
Volumes: []v1.Volume{
152-
{
153-
Name: "exec-script",
154-
VolumeSource: v1.VolumeSource{
155-
ConfigMap: &v1.ConfigMapVolumeSource{
156-
LocalObjectReference: v1.LocalObjectReference{
157-
Name: provisionerName,
158-
},
159-
Items: []v1.KeyToPath{
160-
{
161-
Key: "subdir-provisioner.sh",
162-
Path: "subdir-provisioner.sh",
175+
SecurityContext: &v1.PodSecurityContext{
176+
RunAsUser: ptr.To(int64(0)),
177+
},
178+
Volumes: []v1.Volume{
179+
{
180+
Name: "exec-script",
181+
VolumeSource: v1.VolumeSource{
182+
ConfigMap: &v1.ConfigMapVolumeSource{
183+
LocalObjectReference: v1.LocalObjectReference{
184+
Name: provisionerName,
163185
},
186+
Items: []v1.KeyToPath{
187+
{
188+
Key: "subdir-provisioner.sh",
189+
Path: "subdir-provisioner.sh",
190+
},
191+
},
192+
DefaultMode: ptr.To(product.MustParseOctal("755")),
164193
},
165-
DefaultMode: ptr.To(product.MustParseOctal("755")),
166194
},
167195
},
168-
},
169-
{
170-
Name: "data-volume",
171-
VolumeSource: v1.VolumeSource{
172-
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
173-
ClaimName: provisionerName,
174-
ReadOnly: false,
196+
{
197+
Name: "data-volume",
198+
VolumeSource: v1.VolumeSource{
199+
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
200+
ClaimName: provisionerName,
201+
ReadOnly: false,
202+
},
175203
},
176204
},
177205
},
178206
},
179207
},
180-
},
208+
}
209+
return nil
210+
}); err != nil {
211+
l.Error(err, "Error creating provisioner job")
212+
return err
181213
}
182-
return nil
183-
}); err != nil {
184-
l.Error(err, "Error creating provisioner job")
185-
return err
186214
}
187215
}
188216
return nil

0 commit comments

Comments
 (0)