Skip to content

Commit fdbb960

Browse files
authored
Merge pull request kubernetes#90934 from iobuf/cronjob
CronJob: cleanup legacy ScheduledJob vars,docs
2 parents 850fddb + b33e202 commit fdbb960

File tree

7 files changed

+208
-209
lines changed

7 files changed

+208
-209
lines changed

pkg/apis/batch/validation/validation.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -173,16 +173,16 @@ func ValidateJobStatusUpdate(status, oldStatus batch.JobStatus) field.ErrorList
173173
}
174174

175175
// ValidateCronJob validates a CronJob and returns an ErrorList with any errors.
176-
func ValidateCronJob(scheduledJob *batch.CronJob) field.ErrorList {
176+
func ValidateCronJob(cronJob *batch.CronJob) field.ErrorList {
177177
// CronJobs and rcs have the same name validation
178-
allErrs := apivalidation.ValidateObjectMeta(&scheduledJob.ObjectMeta, true, apivalidation.ValidateReplicationControllerName, field.NewPath("metadata"))
179-
allErrs = append(allErrs, ValidateCronJobSpec(&scheduledJob.Spec, field.NewPath("spec"))...)
180-
if len(scheduledJob.ObjectMeta.Name) > apimachineryvalidation.DNS1035LabelMaxLength-11 {
178+
allErrs := apivalidation.ValidateObjectMeta(&cronJob.ObjectMeta, true, apivalidation.ValidateReplicationControllerName, field.NewPath("metadata"))
179+
allErrs = append(allErrs, ValidateCronJobSpec(&cronJob.Spec, field.NewPath("spec"))...)
180+
if len(cronJob.ObjectMeta.Name) > apimachineryvalidation.DNS1035LabelMaxLength-11 {
181181
// The cronjob controller appends a 11-character suffix to the cronjob (`-$TIMESTAMP`) when
182182
// creating a job. The job name length limit is 63 characters.
183183
// Therefore cronjob names must have length <= 63-11=52. If we don't validate this here,
184184
// then job creation will fail later.
185-
allErrs = append(allErrs, field.Invalid(field.NewPath("metadata").Child("name"), scheduledJob.ObjectMeta.Name, "must be no more than 52 characters"))
185+
allErrs = append(allErrs, field.Invalid(field.NewPath("metadata").Child("name"), cronJob.ObjectMeta.Name, "must be no more than 52 characters"))
186186
}
187187
return allErrs
188188
}

pkg/controller/cronjob/cronjob_controller.go

Lines changed: 67 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,13 @@ package cronjob
1818

1919
/*
2020
I did not use watch or expectations. Those add a lot of corner cases, and we aren't
21-
expecting a large volume of jobs or scheduledJobs. (We are favoring correctness
21+
expecting a large volume of jobs or cronJobs. (We are favoring correctness
2222
over scalability. If we find a single controller thread is too slow because
2323
there are a lot of Jobs or CronJobs, we can parallelize by Namespace.
2424
If we find the load on the API server is too high, we can use a watch and
2525
UndeltaStore.)
2626
27-
Just periodically list jobs and SJs, and then reconcile them.
28-
27+
Just periodically list jobs and cronJobs, and then reconcile them.
2928
*/
3029

3130
import (
@@ -63,7 +62,7 @@ var controllerKind = batchv1beta1.SchemeGroupVersion.WithKind("CronJob")
6362
type Controller struct {
6463
kubeClient clientset.Interface
6564
jobControl jobControlInterface
66-
sjControl sjControlInterface
65+
cjControl cjControlInterface
6766
podControl podControlInterface
6867
recorder record.EventRecorder
6968
}
@@ -83,7 +82,7 @@ func NewController(kubeClient clientset.Interface) (*Controller, error) {
8382
jm := &Controller{
8483
kubeClient: kubeClient,
8584
jobControl: realJobControl{KubeClient: kubeClient},
86-
sjControl: &realSJControl{KubeClient: kubeClient},
85+
cjControl: &realCJControl{KubeClient: kubeClient},
8786
podControl: &realPodControl{KubeClient: kubeClient},
8887
recorder: eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "cronjob-controller"}),
8988
}
@@ -131,15 +130,15 @@ func (jm *Controller) syncAll() {
131130
return jm.kubeClient.BatchV1beta1().CronJobs(metav1.NamespaceAll).List(context.TODO(), opts)
132131
}
133132

134-
jobsBySj := groupJobsByParent(js)
135-
klog.V(4).Infof("Found %d groups", len(jobsBySj))
133+
jobsByCj := groupJobsByParent(js)
134+
klog.V(4).Infof("Found %d groups", len(jobsByCj))
136135
err = pager.New(pager.SimplePageFunc(cronJobListFunc)).EachListItem(context.Background(), metav1.ListOptions{}, func(object runtime.Object) error {
137-
sj, ok := object.(*batchv1beta1.CronJob)
136+
cj, ok := object.(*batchv1beta1.CronJob)
138137
if !ok {
139-
return fmt.Errorf("expected type *batchv1beta1.CronJob, got type %T", sj)
138+
return fmt.Errorf("expected type *batchv1beta1.CronJob, got type %T", cj)
140139
}
141-
syncOne(sj, jobsBySj[sj.UID], time.Now(), jm.jobControl, jm.sjControl, jm.recorder)
142-
cleanupFinishedJobs(sj, jobsBySj[sj.UID], jm.jobControl, jm.sjControl, jm.recorder)
140+
syncOne(cj, jobsByCj[cj.UID], time.Now(), jm.jobControl, jm.cjControl, jm.recorder)
141+
cleanupFinishedJobs(cj, jobsByCj[cj.UID], jm.jobControl, jm.cjControl, jm.recorder)
143142
return nil
144143
})
145144

@@ -150,10 +149,10 @@ func (jm *Controller) syncAll() {
150149
}
151150

152151
// cleanupFinishedJobs cleanups finished jobs created by a CronJob
153-
func cleanupFinishedJobs(sj *batchv1beta1.CronJob, js []batchv1.Job, jc jobControlInterface,
154-
sjc sjControlInterface, recorder record.EventRecorder) {
152+
func cleanupFinishedJobs(cj *batchv1beta1.CronJob, js []batchv1.Job, jc jobControlInterface,
153+
cjc cjControlInterface, recorder record.EventRecorder) {
155154
// If neither limits are active, there is no need to do anything.
156-
if sj.Spec.FailedJobsHistoryLimit == nil && sj.Spec.SuccessfulJobsHistoryLimit == nil {
155+
if cj.Spec.FailedJobsHistoryLimit == nil && cj.Spec.SuccessfulJobsHistoryLimit == nil {
157156
return
158157
}
159158

@@ -169,107 +168,107 @@ func cleanupFinishedJobs(sj *batchv1beta1.CronJob, js []batchv1.Job, jc jobContr
169168
}
170169
}
171170

172-
if sj.Spec.SuccessfulJobsHistoryLimit != nil {
173-
removeOldestJobs(sj,
171+
if cj.Spec.SuccessfulJobsHistoryLimit != nil {
172+
removeOldestJobs(cj,
174173
successfulJobs,
175174
jc,
176-
*sj.Spec.SuccessfulJobsHistoryLimit,
175+
*cj.Spec.SuccessfulJobsHistoryLimit,
177176
recorder)
178177
}
179178

180-
if sj.Spec.FailedJobsHistoryLimit != nil {
181-
removeOldestJobs(sj,
179+
if cj.Spec.FailedJobsHistoryLimit != nil {
180+
removeOldestJobs(cj,
182181
failedJobs,
183182
jc,
184-
*sj.Spec.FailedJobsHistoryLimit,
183+
*cj.Spec.FailedJobsHistoryLimit,
185184
recorder)
186185
}
187186

188187
// Update the CronJob, in case jobs were removed from the list.
189-
if _, err := sjc.UpdateStatus(sj); err != nil {
190-
nameForLog := fmt.Sprintf("%s/%s", sj.Namespace, sj.Name)
191-
klog.Infof("Unable to update status for %s (rv = %s): %v", nameForLog, sj.ResourceVersion, err)
188+
if _, err := cjc.UpdateStatus(cj); err != nil {
189+
nameForLog := fmt.Sprintf("%s/%s", cj.Namespace, cj.Name)
190+
klog.Infof("Unable to update status for %s (rv = %s): %v", nameForLog, cj.ResourceVersion, err)
192191
}
193192
}
194193

195194
// removeOldestJobs removes the oldest jobs from a list of jobs
196-
func removeOldestJobs(sj *batchv1beta1.CronJob, js []batchv1.Job, jc jobControlInterface, maxJobs int32, recorder record.EventRecorder) {
195+
func removeOldestJobs(cj *batchv1beta1.CronJob, js []batchv1.Job, jc jobControlInterface, maxJobs int32, recorder record.EventRecorder) {
197196
numToDelete := len(js) - int(maxJobs)
198197
if numToDelete <= 0 {
199198
return
200199
}
201200

202-
nameForLog := fmt.Sprintf("%s/%s", sj.Namespace, sj.Name)
201+
nameForLog := fmt.Sprintf("%s/%s", cj.Namespace, cj.Name)
203202
klog.V(4).Infof("Cleaning up %d/%d jobs from %s", numToDelete, len(js), nameForLog)
204203

205204
sort.Sort(byJobStartTime(js))
206205
for i := 0; i < numToDelete; i++ {
207206
klog.V(4).Infof("Removing job %s from %s", js[i].Name, nameForLog)
208-
deleteJob(sj, &js[i], jc, recorder)
207+
deleteJob(cj, &js[i], jc, recorder)
209208
}
210209
}
211210

212211
// syncOne reconciles a CronJob with a list of any Jobs that it created.
213-
// All known jobs created by "sj" should be included in "js".
212+
// All known jobs created by "cj" should be included in "js".
214213
// The current time is passed in to facilitate testing.
215214
// It has no receiver, to facilitate testing.
216-
func syncOne(sj *batchv1beta1.CronJob, js []batchv1.Job, now time.Time, jc jobControlInterface, sjc sjControlInterface, recorder record.EventRecorder) {
217-
nameForLog := fmt.Sprintf("%s/%s", sj.Namespace, sj.Name)
215+
func syncOne(cj *batchv1beta1.CronJob, js []batchv1.Job, now time.Time, jc jobControlInterface, cjc cjControlInterface, recorder record.EventRecorder) {
216+
nameForLog := fmt.Sprintf("%s/%s", cj.Namespace, cj.Name)
218217

219218
childrenJobs := make(map[types.UID]bool)
220219
for _, j := range js {
221220
childrenJobs[j.ObjectMeta.UID] = true
222-
found := inActiveList(*sj, j.ObjectMeta.UID)
221+
found := inActiveList(*cj, j.ObjectMeta.UID)
223222
if !found && !IsJobFinished(&j) {
224-
recorder.Eventf(sj, v1.EventTypeWarning, "UnexpectedJob", "Saw a job that the controller did not create or forgot: %s", j.Name)
223+
recorder.Eventf(cj, v1.EventTypeWarning, "UnexpectedJob", "Saw a job that the controller did not create or forgot: %s", j.Name)
225224
// We found an unfinished job that has us as the parent, but it is not in our Active list.
226225
// This could happen if we crashed right after creating the Job and before updating the status,
227-
// or if our jobs list is newer than our sj status after a relist, or if someone intentionally created
226+
// or if our jobs list is newer than our cj status after a relist, or if someone intentionally created
228227
// a job that they wanted us to adopt.
229228

230229
// TODO: maybe handle the adoption case? Concurrency/suspend rules will not apply in that case, obviously, since we can't
231230
// stop users from creating jobs if they have permission. It is assumed that if a
232-
// user has permission to create a job within a namespace, then they have permission to make any scheduledJob
231+
// user has permission to create a job within a namespace, then they have permission to make any cronJob
233232
// in the same namespace "adopt" that job. ReplicaSets and their Pods work the same way.
234-
// TBS: how to update sj.Status.LastScheduleTime if the adopted job is newer than any we knew about?
233+
// TBS: how to update cj.Status.LastScheduleTime if the adopted job is newer than any we knew about?
235234
} else if found && IsJobFinished(&j) {
236235
_, status := getFinishedStatus(&j)
237-
deleteFromActiveList(sj, j.ObjectMeta.UID)
238-
recorder.Eventf(sj, v1.EventTypeNormal, "SawCompletedJob", "Saw completed job: %s, status: %v", j.Name, status)
236+
deleteFromActiveList(cj, j.ObjectMeta.UID)
237+
recorder.Eventf(cj, v1.EventTypeNormal, "SawCompletedJob", "Saw completed job: %s, status: %v", j.Name, status)
239238
}
240239
}
241240

242241
// Remove any job reference from the active list if the corresponding job does not exist any more.
243242
// Otherwise, the cronjob may be stuck in active mode forever even though there is no matching
244243
// job running.
245-
for _, j := range sj.Status.Active {
244+
for _, j := range cj.Status.Active {
246245
if found := childrenJobs[j.UID]; !found {
247-
recorder.Eventf(sj, v1.EventTypeNormal, "MissingJob", "Active job went missing: %v", j.Name)
248-
deleteFromActiveList(sj, j.UID)
246+
recorder.Eventf(cj, v1.EventTypeNormal, "MissingJob", "Active job went missing: %v", j.Name)
247+
deleteFromActiveList(cj, j.UID)
249248
}
250249
}
251250

252-
updatedSJ, err := sjc.UpdateStatus(sj)
251+
updatedCJ, err := cjc.UpdateStatus(cj)
253252
if err != nil {
254-
klog.Errorf("Unable to update status for %s (rv = %s): %v", nameForLog, sj.ResourceVersion, err)
253+
klog.Errorf("Unable to update status for %s (rv = %s): %v", nameForLog, cj.ResourceVersion, err)
255254
return
256255
}
257-
*sj = *updatedSJ
256+
*cj = *updatedCJ
258257

259-
if sj.DeletionTimestamp != nil {
258+
if cj.DeletionTimestamp != nil {
260259
// The CronJob is being deleted.
261260
// Don't do anything other than updating status.
262261
return
263262
}
264263

265-
if sj.Spec.Suspend != nil && *sj.Spec.Suspend {
264+
if cj.Spec.Suspend != nil && *cj.Spec.Suspend {
266265
klog.V(4).Infof("Not starting job for %s because it is suspended", nameForLog)
267266
return
268267
}
269268

270-
times, err := getRecentUnmetScheduleTimes(*sj, now)
269+
times, err := getRecentUnmetScheduleTimes(*cj, now)
271270
if err != nil {
272-
recorder.Eventf(sj, v1.EventTypeWarning, "FailedNeedsStart", "Cannot determine if job needs to be started: %v", err)
271+
recorder.Eventf(cj, v1.EventTypeWarning, "FailedNeedsStart", "Cannot determine if job needs to be started: %v", err)
273272
klog.Errorf("Cannot determine if %s needs to be started: %v", nameForLog, err)
274273
return
275274
}
@@ -284,22 +283,22 @@ func syncOne(sj *batchv1beta1.CronJob, js []batchv1.Job, now time.Time, jc jobCo
284283

285284
scheduledTime := times[len(times)-1]
286285
tooLate := false
287-
if sj.Spec.StartingDeadlineSeconds != nil {
288-
tooLate = scheduledTime.Add(time.Second * time.Duration(*sj.Spec.StartingDeadlineSeconds)).Before(now)
286+
if cj.Spec.StartingDeadlineSeconds != nil {
287+
tooLate = scheduledTime.Add(time.Second * time.Duration(*cj.Spec.StartingDeadlineSeconds)).Before(now)
289288
}
290289
if tooLate {
291290
klog.V(4).Infof("Missed starting window for %s", nameForLog)
292-
recorder.Eventf(sj, v1.EventTypeWarning, "MissSchedule", "Missed scheduled time to start a job: %s", scheduledTime.Format(time.RFC1123Z))
291+
recorder.Eventf(cj, v1.EventTypeWarning, "MissSchedule", "Missed scheduled time to start a job: %s", scheduledTime.Format(time.RFC1123Z))
293292
// TODO: Since we don't set LastScheduleTime when not scheduling, we are going to keep noticing
294293
// the miss every cycle. In order to avoid sending multiple events, and to avoid processing
295-
// the sj again and again, we could set a Status.LastMissedTime when we notice a miss.
294+
// the cj again and again, we could set a Status.LastMissedTime when we notice a miss.
296295
// Then, when we call getRecentUnmetScheduleTimes, we can take max(creationTimestamp,
297296
// Status.LastScheduleTime, Status.LastMissedTime), and then so we won't generate
298297
// and event the next time we process it, and also so the user looking at the status
299298
// can see easily that there was a missed execution.
300299
return
301300
}
302-
if sj.Spec.ConcurrencyPolicy == batchv1beta1.ForbidConcurrent && len(sj.Status.Active) > 0 {
301+
if cj.Spec.ConcurrencyPolicy == batchv1beta1.ForbidConcurrent && len(cj.Status.Active) > 0 {
303302
// Regardless which source of information we use for the set of active jobs,
304303
// there is some risk that we won't see an active job when there is one.
305304
// (because we haven't seen the status update to the SJ or the created pod).
@@ -312,37 +311,37 @@ func syncOne(sj *batchv1beta1.CronJob, js []batchv1.Job, now time.Time, jc jobCo
312311
klog.V(4).Infof("Not starting job for %s because of prior execution still running and concurrency policy is Forbid", nameForLog)
313312
return
314313
}
315-
if sj.Spec.ConcurrencyPolicy == batchv1beta1.ReplaceConcurrent {
316-
for _, j := range sj.Status.Active {
314+
if cj.Spec.ConcurrencyPolicy == batchv1beta1.ReplaceConcurrent {
315+
for _, j := range cj.Status.Active {
317316
klog.V(4).Infof("Deleting job %s of %s that was still running at next scheduled start time", j.Name, nameForLog)
318317

319318
job, err := jc.GetJob(j.Namespace, j.Name)
320319
if err != nil {
321-
recorder.Eventf(sj, v1.EventTypeWarning, "FailedGet", "Get job: %v", err)
320+
recorder.Eventf(cj, v1.EventTypeWarning, "FailedGet", "Get job: %v", err)
322321
return
323322
}
324-
if !deleteJob(sj, job, jc, recorder) {
323+
if !deleteJob(cj, job, jc, recorder) {
325324
return
326325
}
327326
}
328327
}
329328

330-
jobReq, err := getJobFromTemplate(sj, scheduledTime)
329+
jobReq, err := getJobFromTemplate(cj, scheduledTime)
331330
if err != nil {
332331
klog.Errorf("Unable to make Job from template in %s: %v", nameForLog, err)
333332
return
334333
}
335-
jobResp, err := jc.CreateJob(sj.Namespace, jobReq)
334+
jobResp, err := jc.CreateJob(cj.Namespace, jobReq)
336335
if err != nil {
337336
// If the namespace is being torn down, we can safely ignore
338337
// this error since all subsequent creations will fail.
339338
if !errors.HasStatusCause(err, v1.NamespaceTerminatingCause) {
340-
recorder.Eventf(sj, v1.EventTypeWarning, "FailedCreate", "Error creating job: %v", err)
339+
recorder.Eventf(cj, v1.EventTypeWarning, "FailedCreate", "Error creating job: %v", err)
341340
}
342341
return
343342
}
344343
klog.V(4).Infof("Created Job %s for %s", jobResp.Name, nameForLog)
345-
recorder.Eventf(sj, v1.EventTypeNormal, "SuccessfulCreate", "Created job %v", jobResp.Name)
344+
recorder.Eventf(cj, v1.EventTypeNormal, "SuccessfulCreate", "Created job %v", jobResp.Name)
346345

347346
// ------------------------------------------------------------------ //
348347

@@ -359,29 +358,29 @@ func syncOne(sj *batchv1beta1.CronJob, js []batchv1.Job, now time.Time, jc jobCo
359358
if err != nil {
360359
klog.V(2).Infof("Unable to make object reference for job for %s", nameForLog)
361360
} else {
362-
sj.Status.Active = append(sj.Status.Active, *ref)
361+
cj.Status.Active = append(cj.Status.Active, *ref)
363362
}
364-
sj.Status.LastScheduleTime = &metav1.Time{Time: scheduledTime}
365-
if _, err := sjc.UpdateStatus(sj); err != nil {
366-
klog.Infof("Unable to update status for %s (rv = %s): %v", nameForLog, sj.ResourceVersion, err)
363+
cj.Status.LastScheduleTime = &metav1.Time{Time: scheduledTime}
364+
if _, err := cjc.UpdateStatus(cj); err != nil {
365+
klog.Infof("Unable to update status for %s (rv = %s): %v", nameForLog, cj.ResourceVersion, err)
367366
}
368367

369368
return
370369
}
371370

372371
// deleteJob reaps a job, deleting the job, the pods and the reference in the active list
373-
func deleteJob(sj *batchv1beta1.CronJob, job *batchv1.Job, jc jobControlInterface, recorder record.EventRecorder) bool {
374-
nameForLog := fmt.Sprintf("%s/%s", sj.Namespace, sj.Name)
372+
func deleteJob(cj *batchv1beta1.CronJob, job *batchv1.Job, jc jobControlInterface, recorder record.EventRecorder) bool {
373+
nameForLog := fmt.Sprintf("%s/%s", cj.Namespace, cj.Name)
375374

376375
// delete the job itself...
377376
if err := jc.DeleteJob(job.Namespace, job.Name); err != nil {
378-
recorder.Eventf(sj, v1.EventTypeWarning, "FailedDelete", "Deleted job: %v", err)
377+
recorder.Eventf(cj, v1.EventTypeWarning, "FailedDelete", "Deleted job: %v", err)
379378
klog.Errorf("Error deleting job %s from %s: %v", job.Name, nameForLog, err)
380379
return false
381380
}
382381
// ... and its reference from active list
383-
deleteFromActiveList(sj, job.ObjectMeta.UID)
384-
recorder.Eventf(sj, v1.EventTypeNormal, "SuccessfulDelete", "Deleted job %v", job.Name)
382+
deleteFromActiveList(cj, job.ObjectMeta.UID)
383+
recorder.Eventf(cj, v1.EventTypeNormal, "SuccessfulDelete", "Deleted job %v", job.Name)
385384

386385
return true
387386
}

0 commit comments

Comments
 (0)