Skip to content

Commit 74e9744

Browse files
committed
Rename prefix scorer HashBlockSize to BlockSize
1 parent 46a100f commit 74e9744

File tree

6 files changed

+24
-24
lines changed

6 files changed

+24
-24
lines changed

pkg/epp/config/loader/configloader_test.go

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ func TestLoadRawConfiguration(t *testing.T) {
7373
},
7474
{
7575
Type: test2Type,
76-
Parameters: json.RawMessage("{\"hashBlockSize\":32}"),
76+
Parameters: json.RawMessage("{\"blockSize\":32}"),
7777
},
7878
{
7979
Name: "testPicker",
@@ -175,7 +175,7 @@ func TestLoadRawConfigurationWithDefaults(t *testing.T) {
175175
{
176176
Name: test2Type,
177177
Type: test2Type,
178-
Parameters: json.RawMessage("{\"hashBlockSize\":32}"),
178+
Parameters: json.RawMessage("{\"blockSize\":32}"),
179179
},
180180
{
181181
Name: "testPicker",
@@ -464,7 +464,7 @@ plugins:
464464
type: test-profile-handler
465465
- type: test-two
466466
parameters:
467-
hashBlockSize: 32
467+
blockSize: 32
468468
- name: testPicker
469469
type: test-picker
470470
schedulingProfiles:
@@ -767,7 +767,7 @@ plugins:
767767
- name: prefixCacheScorer
768768
type: prefix-cache-scorer
769769
parameters:
770-
hashBlockSize: 32
770+
blockSize: 32
771771
- name: maxScorePicker
772772
type: max-score-picker
773773
- name: profileHandler
@@ -792,7 +792,7 @@ plugins:
792792
- name: prefixCacheScorer
793793
type: prefix-cache-scorer
794794
parameters:
795-
hashBlockSize: 32
795+
blockSize: 32
796796
schedulingProfiles:
797797
- name: default
798798
plugins:
@@ -826,7 +826,7 @@ plugins:
826826
- name: prefixCacheScorer
827827
type: prefix-cache-scorer
828828
parameters:
829-
hashBlockSize: asdf
829+
blockSize: asdf
830830
schedulingProfiles:
831831
- name: default
832832
plugins:

pkg/epp/scheduling/framework/plugins/multi/prefix/plugin.go

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ import (
3737

3838
const (
3939
// vLLM default token block size is 16, and a good guess of average characters per token is 4.
40-
DefaultHashBlockSize = 64
40+
DefaultBlockSize = 64
4141
// The maximum number of blocks to match. Two long requests with the same prefix up to this
4242
// limit will be indistinguishable.
4343
// This parameter provides a trade-off between cache size, prefix matching speed and matching
@@ -58,15 +58,15 @@ const (
5858
)
5959

6060
var DefaultConfig = Config{
61-
HashBlockSize: DefaultHashBlockSize,
61+
BlockSize: DefaultBlockSize,
6262
MaxPrefixBlocksToMatch: DefaultMaxPrefixBlocks,
6363
LRUCapacityPerServer: DefaultLRUCapacityPerServer,
6464
}
6565

6666
type Config struct {
67-
// The input prompt is broken into sizes of HashBlockSize to calculate block hashes . Requests
67+
// The input prompt is broken into sizes of BlockSize to calculate block hashes . Requests
6868
// with length shorter than the block size will be ignored.
69-
HashBlockSize int `json:"hashBlockSize"`
69+
BlockSize int `json:"blockSize"`
7070
// MaxPrefixBlocksToMatch is the maximum number of prefix blocks to match. Input beyond this limit will
7171
// be ignored.
7272
MaxPrefixBlocksToMatch int `json:"maxPrefixBlocksToMatch"`
@@ -133,7 +133,7 @@ var (
133133
// PrefixCachePluginFactory defines the factory function for Prefix plugin.
134134
func PrefixCachePluginFactory(name string, rawParameters json.RawMessage, handle plugins.Handle) (plugins.Plugin, error) {
135135
parameters := Config{
136-
HashBlockSize: DefaultHashBlockSize,
136+
BlockSize: DefaultBlockSize,
137137
MaxPrefixBlocksToMatch: DefaultMaxPrefixBlocks,
138138
LRUCapacityPerServer: DefaultLRUCapacityPerServer,
139139
}
@@ -180,7 +180,7 @@ func (p *Plugin) WithName(name string) *Plugin {
180180
// Score returns the scoring result for the given list of pods based on context.
181181
func (p *Plugin) Score(ctx context.Context, cycleState *types.CycleState, request *types.LLMRequest, pods []types.Pod) map[types.Pod]float64 {
182182
// pre score step, hashing prompt and find longest prefix match.
183-
hashes := hashPrompt(ctx, request, p.config.HashBlockSize, p.config.MaxPrefixBlocksToMatch)
183+
hashes := hashPrompt(ctx, request, p.config.BlockSize, p.config.MaxPrefixBlocksToMatch)
184184
state := &SchedulingContextState{
185185
PrefixHashes: hashes,
186186
PrefixCacheServers: p.matchLongestPrefix(ctx, hashes),
@@ -231,7 +231,7 @@ func (p *Plugin) PreRequest(ctx context.Context, request *types.LLMRequest, sche
231231

232232
total := len(state.PrefixHashes)
233233
matchLen := state.PrefixCacheServers[ServerID(targetPod.NamespacedName)]
234-
metrics.RecordPrefixCacheMatch(matchLen*p.config.HashBlockSize, total*p.config.HashBlockSize)
234+
metrics.RecordPrefixCacheMatch(matchLen*p.config.BlockSize, total*p.config.BlockSize)
235235
}
236236

237237
// matchLongestPrefix returns a map of servers and length of prefix that each server caches.

pkg/epp/scheduling/framework/plugins/multi/prefix/plugin_test.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ import (
3535

3636
func TestPrefixPluginCompletion(t *testing.T) {
3737
config := Config{
38-
HashBlockSize: 4,
38+
BlockSize: 4,
3939
MaxPrefixBlocksToMatch: DefaultMaxPrefixBlocks,
4040
LRUCapacityPerServer: DefaultLRUCapacityPerServer,
4141
}
@@ -201,7 +201,7 @@ func TestPrefixPluginCompletion(t *testing.T) {
201201

202202
func TestPrefixPluginChatCompletions(t *testing.T) {
203203
config := Config{
204-
HashBlockSize: 4,
204+
BlockSize: 4,
205205
MaxPrefixBlocksToMatch: DefaultMaxPrefixBlocks,
206206
LRUCapacityPerServer: DefaultLRUCapacityPerServer,
207207
}
@@ -235,7 +235,7 @@ func TestPrefixPluginChatCompletions(t *testing.T) {
235235

236236
func TestPrefixPluginChatCompletionsGrowth(t *testing.T) {
237237
config := Config{
238-
HashBlockSize: 8, // Use larger block size for more predictable JSON marshaling
238+
BlockSize: 8, // Use larger block size for more predictable JSON marshaling
239239
MaxPrefixBlocksToMatch: DefaultMaxPrefixBlocks,
240240
LRUCapacityPerServer: DefaultLRUCapacityPerServer,
241241
}
@@ -347,7 +347,7 @@ func BenchmarkPrefixPluginStress(b *testing.B) {
347347
blockSize := 4
348348
maxPrefixBlocks := 50000
349349
config := Config{
350-
HashBlockSize: blockSize,
350+
BlockSize: blockSize,
351351
MaxPrefixBlocksToMatch: maxPrefixBlocks,
352352
LRUCapacityPerServer: DefaultLRUCapacityPerServer,
353353
}
@@ -416,7 +416,7 @@ func BenchmarkPrefixPluginChatCompletionsStress(b *testing.B) {
416416
blockSize := 8
417417
maxPrefixBlocks := 50000
418418
config := Config{
419-
HashBlockSize: blockSize,
419+
BlockSize: blockSize,
420420
MaxPrefixBlocksToMatch: maxPrefixBlocks,
421421
LRUCapacityPerServer: DefaultLRUCapacityPerServer,
422422
}

site-src/guides/epp-configuration/config-text.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ kind: EndpointPickerConfig
9191
plugins:
9292
- type: prefix-cache-scorer
9393
parameters:
94-
hashBlockSize: 5
94+
blockSize: 5
9595
maxPrefixBlocksToMatch: 256
9696
lruCapacityPerServer: 31250
9797
schedulingProfiles:
@@ -158,7 +158,7 @@ spec:
158158
plugins:
159159
- type: prefix-cache-scorer
160160
parameters:
161-
hashBlockSize: 5
161+
blockSize: 5
162162
maxPrefixBlocksToMatch: 256
163163
lruCapacityPerServer: 31250
164164
schedulingProfiles:
@@ -177,7 +177,7 @@ kind: EndpointPickerConfig
177177
plugins:
178178
- type: prefix-cache-scorer
179179
parameters:
180-
hashBlockSize: 5
180+
blockSize: 5
181181
maxPrefixBlocksToMatch: 256
182182
lruCapacityPerServer: 31250
183183
- type: single-profile-handler
@@ -207,7 +207,7 @@ Scores pods based on the amount of the prompt is believed to be in the pod's KvC
207207

208208
- *Type*: prefix-cache-scorer
209209
- *Parameters*:
210-
- `hashBlockSize` specified the size of the blocks to break up the input prompt when
210+
- `blockSize` specified the size of the blocks to break up the input prompt when
211211
calculating the block hashes. If not specified defaults to `64`
212212
- `maxPrefixBlocksToMatch` specifies the maximum number of prefix blocks to match. If
213213
not specified defaults to `256`

site-src/guides/epp-configuration/prefix-aware.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ Like any other plugins, the prefix cache aware plugin can be enabled/disabled vi
1414

1515
The prefix cache plugin exposes the following advanced configuration parameters:
1616

17-
* `hashBlockSize`: The plugin matches prefixes in the unit of blocks. This is the size
17+
* `blockSize`: The plugin matches prefixes in the unit of blocks. This is the size
1818
of each block in number of bytes. vLLM default block size is 16 tokens. Assume 4 characters per token, the default
1919
is set to 64 in EPP. The default is recommended unless performance is critical for use cases with
2020
extremely long inputs.

test/testdata/configloader_1_test.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ plugins:
99
type: test-profile-handler
1010
- type: test-two
1111
parameters:
12-
hashBlockSize: 32
12+
blockSize: 32
1313
- name: testPicker
1414
type: test-picker
1515

0 commit comments

Comments
 (0)