Skip to content

Commit acddc0c

Browse files
authored
clear prefill target header if set in incoming request (#244)
Signed-off-by: Etai Lev Ran <[email protected]>
1 parent 086c2e0 commit acddc0c

File tree

2 files changed

+6
-2
lines changed

2 files changed

+6
-2
lines changed

pkg/plugins/pre-request/pd_prerequest.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,10 @@ func (p *PrefillHeaderHandler) WithName(name string) *PrefillHeaderHandler {
6969

7070
// PreRequest wires prefill SchedulerProfile result into a header to indicate prefill worker
7171
func (p *PrefillHeaderHandler) PreRequest(_ context.Context, request *types.LLMRequest, schedulingResult *types.SchedulingResult, targetPort int) {
72+
if _, found := request.Headers[prefillPodHeader]; found {
73+
request.Headers[prefillPodHeader] = "" // clear header, if already set
74+
}
75+
7276
prefillProfileRunResult, exists := schedulingResult.ProfileResults[p.prefillProfile]
7377
if !exists {
7478
return // prefill profile failed to run or we chose not to run it, no-op in this case

pkg/plugins/profile/pd_profile_handler.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -100,8 +100,8 @@ func (h *PdProfileHandler) Pick(ctx context.Context, cycleState *types.CycleStat
100100
}
101101

102102
// if we're here that means decode profile ran successfully, and we have additional profile configured that didn't run yet,
103-
// which means PD is enabled (otherwise, prefil profile is not configured at all and this profile handler is not used).
104-
// inspect decode execution result to decide if prefil should run or not.
103+
// which means PD is enabled (otherwise, prefill profile is not configured at all and this profile handler is not used).
104+
// inspect decode execution result to decide if prefill should run or not.
105105
// if the request is short enough, use decode results only and don't run the prefill profile.
106106
hitPercentagePrefix := 0.0 // default to 0, meaning no prefix cache hit
107107
prefixState, err := types.ReadCycleStateKey[*prefix.SchedulingContextState](cycleState, prefix.PrefixCachePluginType)

0 commit comments

Comments
 (0)