Skip to content

Commit 8add17d

Browse files
committed
[YUNIKORN-2368] Shim: Send updated resource requests to core (#912)
Closes: #912
1 parent cd60dd9 commit 8add17d

File tree

8 files changed

+88
-54
lines changed

8 files changed

+88
-54
lines changed

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ go 1.22.0
2323
toolchain go1.22.5
2424

2525
require (
26-
github.com/apache/yunikorn-core v0.0.0-20240827175300-6939b13d1d0e
26+
github.com/apache/yunikorn-core v0.0.0-20240908061623-6f06490bcfa3
2727
github.com/apache/yunikorn-scheduler-interface v0.0.0-20240827015655-68e8c6cca28a
2828
github.com/google/go-cmp v0.6.0
2929
github.com/google/uuid v1.6.0

go.sum

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@ github.com/NYTimes/gziphandler v1.1.1 h1:ZUDjpQae29j0ryrS0u/B8HZfJBtBQHjqw2rQ2cq
88
github.com/NYTimes/gziphandler v1.1.1/go.mod h1:n/CVRwUEOgIxrgPvAQhUUr9oeUtvrhMomdKFjzJNB0c=
99
github.com/antlr4-go/antlr/v4 v4.13.0 h1:lxCg3LAv+EUK6t1i0y1V6/SLeUi0eKEKdhQAlS8TVTI=
1010
github.com/antlr4-go/antlr/v4 v4.13.0/go.mod h1:pfChB/xh/Unjila75QW7+VU4TSnWnnk9UTnmpPaOR2g=
11-
github.com/apache/yunikorn-core v0.0.0-20240827175300-6939b13d1d0e h1:VaihjHjtmsDK7HEOjlX8KCz7QDxmZSf71CSCuOgjqcc=
12-
github.com/apache/yunikorn-core v0.0.0-20240827175300-6939b13d1d0e/go.mod h1:HYeyzHhZt43oG54pasKHrwHM+Jeji8nFoAE2bcLWLYg=
11+
github.com/apache/yunikorn-core v0.0.0-20240908061623-6f06490bcfa3 h1:ySu0cpFSYFGNtf+PZw4ulzO+cWOyJMYJs+AjmwGWM80=
12+
github.com/apache/yunikorn-core v0.0.0-20240908061623-6f06490bcfa3/go.mod h1:HYeyzHhZt43oG54pasKHrwHM+Jeji8nFoAE2bcLWLYg=
1313
github.com/apache/yunikorn-scheduler-interface v0.0.0-20240827015655-68e8c6cca28a h1:3WRXGTvhunGBZj8AVZDxx7Bs/AXiH9mvf2jYcuDyklA=
1414
github.com/apache/yunikorn-scheduler-interface v0.0.0-20240827015655-68e8c6cca28a/go.mod h1:co3uU98sj1CUTPNTM13lTyi+CY0DOgDndDW2KiUjktU=
1515
github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio=

pkg/cache/application.go

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -546,13 +546,14 @@ func (app *Application) onReservationStateChange() {
546546

547547
for _, t := range app.getTasks(TaskStates().Bound) {
548548
if t.placeholder {
549-
if _, ok := desireCounts[t.taskGroupName]; ok {
550-
desireCounts[t.taskGroupName]--
549+
taskGroupName := t.GetTaskGroupName()
550+
if _, ok := desireCounts[taskGroupName]; ok {
551+
desireCounts[taskGroupName]--
551552
} else {
552553
log.Log(log.ShimCacheApplication).Debug("placeholder taskGroupName set on pod is unknown for application",
553554
zap.String("application", app.applicationID),
554555
zap.String("podName", t.GetTaskPod().Name),
555-
zap.String("taskGroupName", t.taskGroupName))
556+
zap.String("taskGroupName", taskGroupName))
556557
}
557558
}
558559
}
@@ -659,12 +660,13 @@ func (app *Application) handleAppTaskCompletedEvent() {
659660
}
660661

661662
func (app *Application) publishPlaceholderTimeoutEvents(task *Task) {
662-
if app.originatingTask != nil && task.IsPlaceholder() && task.terminationType == si.TerminationType_name[int32(si.TerminationType_TIMEOUT)] {
663+
taskTerminationType := task.GetTaskTerminationType()
664+
if app.originatingTask != nil && task.IsPlaceholder() && taskTerminationType == si.TerminationType_name[int32(si.TerminationType_TIMEOUT)] {
663665
log.Log(log.ShimCacheApplication).Debug("trying to send placeholder timeout events to the original pod from application",
664666
zap.String("appID", app.applicationID),
665667
zap.Stringer("app request originating pod", app.originatingTask.GetTaskPod()),
666668
zap.String("taskID", task.taskID),
667-
zap.String("terminationType", task.terminationType))
669+
zap.String("terminationType", taskTerminationType))
668670
events.GetRecorder().Eventf(app.originatingTask.GetTaskPod().DeepCopy(), nil, v1.EventTypeWarning, "GangScheduling",
669671
"PlaceholderTimeOut", "Application %s placeholder has been timed out", app.applicationID)
670672
}

pkg/cache/context.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -299,7 +299,7 @@ func (ctx *Context) updateYuniKornPod(appID string, pod *v1.Pod) {
299299
app := ctx.getApplication(appID)
300300
if app != nil {
301301
if task := app.GetTask(taskID); task != nil {
302-
task.setTaskPod(pod)
302+
task.SetTaskPod(pod)
303303
}
304304
}
305305

@@ -1194,7 +1194,7 @@ func (ctx *Context) HandleContainerStateUpdate(request *si.UpdateContainerSchedu
11941194
Reason: "SchedulingSkipped",
11951195
Message: request.Reason,
11961196
}) {
1197-
events.GetRecorder().Eventf(task.pod.DeepCopy(), nil,
1197+
events.GetRecorder().Eventf(task.GetTaskPod().DeepCopy(), nil,
11981198
v1.EventTypeNormal, "PodUnschedulable", "PodUnschedulable",
11991199
"Task %s is skipped from scheduling because the queue quota has been exceed", task.alias)
12001200
}
@@ -1209,7 +1209,7 @@ func (ctx *Context) HandleContainerStateUpdate(request *si.UpdateContainerSchedu
12091209
Reason: v1.PodReasonUnschedulable,
12101210
Message: request.Reason,
12111211
}) {
1212-
events.GetRecorder().Eventf(task.pod.DeepCopy(), nil,
1212+
events.GetRecorder().Eventf(task.GetTaskPod().DeepCopy(), nil,
12131213
v1.EventTypeNormal, "PodUnschedulable", "PodUnschedulable",
12141214
"Task %s is pending for the requested resources become available", task.alias)
12151215
}

pkg/cache/placeholder_manager.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ func (mgr *PlaceholderManager) createAppPlaceholders(app *Application) error {
7979
// map task group to count of already created placeholders
8080
tgCounts := make(map[string]int32)
8181
for _, ph := range app.getPlaceHolderTasks() {
82-
tgCounts[ph.getTaskGroupName()]++
82+
tgCounts[ph.GetTaskGroupName()]++
8383
}
8484

8585
// iterate all task groups, create placeholders for all the min members

pkg/cache/scheduler_callback.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ func (callback *AsyncRMCallback) UpdateAllocation(response *si.AllocationRespons
6464
task.setAllocationKey(alloc.AllocationKey)
6565

6666
if err := callback.context.AssumePod(alloc.AllocationKey, alloc.NodeID); err != nil {
67-
task.failWithEvent(err.Error(), "AssumePodError")
67+
task.FailWithEvent(err.Error(), "AssumePodError")
6868
return err
6969
}
7070

pkg/cache/task.go

Lines changed: 71 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -40,24 +40,27 @@ import (
4040
)
4141

4242
type Task struct {
43-
taskID string
44-
alias string
45-
applicationID string
46-
application *Application
43+
taskID string
44+
alias string
45+
applicationID string
46+
application *Application
47+
podStatus v1.PodStatus // pod status, maintained separately for efficiency reasons
48+
context *Context
49+
createTime time.Time
50+
placeholder bool
51+
originator bool
52+
sm *fsm.FSM
53+
54+
// mutable resources, require locking
4755
allocationKey string
48-
resource *si.Resource
49-
pod *v1.Pod
50-
podStatus v1.PodStatus // pod status, maintained separately for efficiency reasons
51-
context *Context
5256
nodeName string
53-
createTime time.Time
5457
taskGroupName string
55-
placeholder bool
5658
terminationType string
57-
originator bool
5859
schedulingState TaskSchedulingState
59-
sm *fsm.FSM
60-
lock *locking.RWMutex
60+
resource *si.Resource
61+
pod *v1.Pod
62+
63+
lock *locking.RWMutex
6164
}
6265

6366
func NewTask(tid string, app *Application, ctx *Context, pod *v1.Pod) *Task {
@@ -135,14 +138,10 @@ func (task *Task) GetTaskPod() *v1.Pod {
135138
}
136139

137140
func (task *Task) GetTaskID() string {
138-
task.lock.RLock()
139-
defer task.lock.RUnlock()
140141
return task.taskID
141142
}
142143

143144
func (task *Task) IsPlaceholder() bool {
144-
task.lock.RLock()
145-
defer task.lock.RUnlock()
146145
return task.placeholder
147146
}
148147

@@ -157,19 +156,25 @@ func (task *Task) setTaskGroupName(groupName string) {
157156
task.taskGroupName = groupName
158157
}
159158

160-
func (task *Task) setTaskTerminationType(terminationTyp string) {
159+
func (task *Task) setTaskTerminationType(terminationType string) {
161160
task.lock.Lock()
162161
defer task.lock.Unlock()
163-
task.terminationType = terminationTyp
162+
task.terminationType = terminationType
164163
}
165164

166-
func (task *Task) getTaskGroupName() string {
165+
func (task *Task) GetTaskTerminationType() string {
166+
task.lock.RLock()
167+
defer task.lock.RUnlock()
168+
return task.terminationType
169+
}
170+
171+
func (task *Task) GetTaskGroupName() string {
167172
task.lock.RLock()
168173
defer task.lock.RUnlock()
169174
return task.taskGroupName
170175
}
171176

172-
func (task *Task) getNodeName() string {
177+
func (task *Task) GetNodeName() string {
173178
task.lock.RLock()
174179
defer task.lock.RUnlock()
175180
return task.nodeName
@@ -222,8 +227,6 @@ func (task *Task) initialize() {
222227
}
223228

224229
func (task *Task) IsOriginator() bool {
225-
task.lock.RLock()
226-
defer task.lock.RUnlock()
227230
return task.originator
228231
}
229232

@@ -286,13 +289,33 @@ func (task *Task) handleSubmitTaskEvent() {
286289
log.Log(log.ShimCacheTask).Debug("scheduling pod",
287290
zap.String("podName", task.pod.Name))
288291

292+
// send update allocation event to core
293+
task.updateAllocation()
294+
295+
if !utils.PodAlreadyBound(task.pod) {
296+
// if this is a new request, add events to pod
297+
events.GetRecorder().Eventf(task.pod.DeepCopy(), nil, v1.EventTypeNormal, "Scheduling", "Scheduling",
298+
"%s is queued and waiting for allocation", task.alias)
299+
// if this task belongs to a task group, that means the app has gang scheduling enabled
300+
// in this case, post an event to indicate the task is being gang scheduled
301+
if !task.placeholder && task.taskGroupName != "" {
302+
events.GetRecorder().Eventf(task.pod.DeepCopy(), nil,
303+
v1.EventTypeNormal, "GangScheduling", "TaskGroupMatch",
304+
"Pod belongs to the taskGroup %s, it will be scheduled as a gang member", task.taskGroupName)
305+
}
306+
}
307+
}
308+
309+
// updateAllocation updates the core scheduler when task information changes.
310+
// This function must be called with the task lock held.
311+
func (task *Task) updateAllocation() {
289312
// build preemption policy
290313
preemptionPolicy := &si.PreemptionPolicy{
291314
AllowPreemptSelf: task.isPreemptSelfAllowed(),
292315
AllowPreemptOther: task.isPreemptOtherAllowed(),
293316
}
294317

295-
// submit allocation ask
318+
// submit allocation
296319
rr := common.CreateAllocationForTask(
297320
task.applicationID,
298321
task.taskID,
@@ -305,22 +328,9 @@ func (task *Task) handleSubmitTaskEvent() {
305328
preemptionPolicy)
306329
log.Log(log.ShimCacheTask).Debug("send update request", zap.Stringer("request", rr))
307330
if err := task.context.apiProvider.GetAPIs().SchedulerAPI.UpdateAllocation(rr); err != nil {
308-
log.Log(log.ShimCacheTask).Debug("failed to send scheduling request to scheduler", zap.Error(err))
331+
log.Log(log.ShimCacheTask).Debug("failed to send allocation to scheduler", zap.Error(err))
309332
return
310333
}
311-
312-
if !utils.PodAlreadyBound(task.pod) {
313-
// if this is a new request, add events to pod
314-
events.GetRecorder().Eventf(task.pod.DeepCopy(), nil, v1.EventTypeNormal, "Scheduling", "Scheduling",
315-
"%s is queued and waiting for allocation", task.alias)
316-
// if this task belongs to a task group, that means the app has gang scheduling enabled
317-
// in this case, post an event to indicate the task is being gang scheduled
318-
if !task.placeholder && task.taskGroupName != "" {
319-
events.GetRecorder().Eventf(task.pod.DeepCopy(), nil,
320-
v1.EventTypeNormal, "GangScheduling", "TaskGroupMatch",
321-
"Pod belongs to the taskGroup %s, it will be scheduled as a gang member", task.taskGroupName)
322-
}
323-
}
324334
}
325335

326336
// this is called after task reaches PENDING state,
@@ -604,20 +614,42 @@ func (task *Task) UpdatePodCondition(podCondition *v1.PodCondition) (bool, *v1.P
604614
return false, pod
605615
}
606616

617+
func (task *Task) GetAllocationKey() string {
618+
task.lock.RLock()
619+
defer task.lock.RUnlock()
620+
return task.allocationKey
621+
}
622+
607623
func (task *Task) setAllocationKey(allocationKey string) {
608624
task.lock.Lock()
609625
defer task.lock.Unlock()
610626
task.allocationKey = allocationKey
611627
}
612628

629+
func (task *Task) FailWithEvent(errorMessage, actionReason string) {
630+
task.lock.RLock()
631+
defer task.lock.RUnlock()
632+
task.failWithEvent(errorMessage, actionReason)
633+
}
634+
613635
func (task *Task) failWithEvent(errorMessage, actionReason string) {
614636
dispatcher.Dispatch(NewFailTaskEvent(task.applicationID, task.taskID, errorMessage))
615637
events.GetRecorder().Eventf(task.pod.DeepCopy(),
616638
nil, v1.EventTypeWarning, actionReason, actionReason, errorMessage)
617639
}
618640

619-
func (task *Task) setTaskPod(pod *v1.Pod) {
641+
func (task *Task) SetTaskPod(pod *v1.Pod) {
620642
task.lock.Lock()
621643
defer task.lock.Unlock()
644+
622645
task.pod = pod
646+
oldResource := task.resource
647+
newResource := common.GetPodResource(pod)
648+
if !common.Equals(oldResource, newResource) {
649+
// pod resources have changed
650+
task.resource = newResource
651+
652+
// update allocation in core
653+
task.updateAllocation()
654+
}
623655
}

pkg/cache/task_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,7 @@ func TestReleaseTaskAllocation(t *testing.T) {
203203
// bind a task is a async process, wait for it to happen
204204
err = utils.WaitForCondition(
205205
func() bool {
206-
return task.getNodeName() == "node-1"
206+
return task.GetNodeName() == "node-1"
207207
},
208208
100*time.Millisecond,
209209
3*time.Second,
@@ -481,7 +481,7 @@ func TestSetTaskGroup(t *testing.T) {
481481
}
482482
task := NewTask("task01", app, mockedContext, pod)
483483
task.setTaskGroupName("test-group")
484-
assert.Equal(t, task.getTaskGroupName(), "test-group")
484+
assert.Equal(t, task.GetTaskGroupName(), "test-group")
485485
}
486486

487487
//nolint:funlen

0 commit comments

Comments
 (0)