Skip to content

Commit f4ab8c6

Browse files
authored
Rename prefix scorer HashBlockSize to BlockSize (#1613)
1 parent aaba124 commit f4ab8c6

File tree

6 files changed

+24
-24
lines changed

6 files changed

+24
-24
lines changed

pkg/epp/config/loader/configloader_test.go

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ func TestLoadRawConfiguration(t *testing.T) {
7373
},
7474
{
7575
Type: test2Type,
76-
Parameters: json.RawMessage("{\"hashBlockSize\":32}"),
76+
Parameters: json.RawMessage("{\"blockSize\":32}"),
7777
},
7878
{
7979
Name: "testPicker",
@@ -175,7 +175,7 @@ func TestLoadRawConfigurationWithDefaults(t *testing.T) {
175175
{
176176
Name: test2Type,
177177
Type: test2Type,
178-
Parameters: json.RawMessage("{\"hashBlockSize\":32}"),
178+
Parameters: json.RawMessage("{\"blockSize\":32}"),
179179
},
180180
{
181181
Name: "testPicker",
@@ -464,7 +464,7 @@ plugins:
464464
type: test-profile-handler
465465
- type: test-two
466466
parameters:
467-
hashBlockSize: 32
467+
blockSize: 32
468468
- name: testPicker
469469
type: test-picker
470470
schedulingProfiles:
@@ -767,7 +767,7 @@ plugins:
767767
- name: prefixCacheScorer
768768
type: prefix-cache-scorer
769769
parameters:
770-
hashBlockSize: 32
770+
blockSize: 32
771771
- name: maxScorePicker
772772
type: max-score-picker
773773
- name: profileHandler
@@ -792,7 +792,7 @@ plugins:
792792
- name: prefixCacheScorer
793793
type: prefix-cache-scorer
794794
parameters:
795-
hashBlockSize: 32
795+
blockSize: 32
796796
schedulingProfiles:
797797
- name: default
798798
plugins:
@@ -826,7 +826,7 @@ plugins:
826826
- name: prefixCacheScorer
827827
type: prefix-cache-scorer
828828
parameters:
829-
hashBlockSize: asdf
829+
blockSize: asdf
830830
schedulingProfiles:
831831
- name: default
832832
plugins:

pkg/epp/scheduling/framework/plugins/multi/prefix/plugin.go

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ import (
3737

3838
const (
3939
// vLLM default token block size is 16, and a good guess of average characters per token is 4.
40-
DefaultHashBlockSize = 64
40+
DefaultBlockSize = 64
4141
// The maximum number of blocks to match. Two long requests with the same prefix up to this
4242
// limit will be indistinguishable.
4343
// This parameter provides a trade-off between cache size, prefix matching speed and matching
@@ -58,15 +58,15 @@ const (
5858
)
5959

6060
var DefaultConfig = Config{
61-
HashBlockSize: DefaultHashBlockSize,
61+
BlockSize: DefaultBlockSize,
6262
MaxPrefixBlocksToMatch: DefaultMaxPrefixBlocks,
6363
LRUCapacityPerServer: DefaultLRUCapacityPerServer,
6464
}
6565

6666
type Config struct {
67-
// The input prompt is broken into sizes of HashBlockSize to calculate block hashes . Requests
67+
// The input prompt is broken into sizes of BlockSize to calculate block hashes . Requests
6868
// with length shorter than the block size will be ignored.
69-
HashBlockSize int `json:"hashBlockSize"`
69+
BlockSize int `json:"blockSize"`
7070
// MaxPrefixBlocksToMatch is the maximum number of prefix blocks to match. Input beyond this limit will
7171
// be ignored.
7272
MaxPrefixBlocksToMatch int `json:"maxPrefixBlocksToMatch"`
@@ -133,7 +133,7 @@ var (
133133
// PrefixCachePluginFactory defines the factory function for Prefix plugin.
134134
func PrefixCachePluginFactory(name string, rawParameters json.RawMessage, handle plugins.Handle) (plugins.Plugin, error) {
135135
parameters := Config{
136-
HashBlockSize: DefaultHashBlockSize,
136+
BlockSize: DefaultBlockSize,
137137
MaxPrefixBlocksToMatch: DefaultMaxPrefixBlocks,
138138
LRUCapacityPerServer: DefaultLRUCapacityPerServer,
139139
}
@@ -180,7 +180,7 @@ func (p *Plugin) WithName(name string) *Plugin {
180180
// Score returns the scoring result for the given list of pods based on context.
181181
func (p *Plugin) Score(ctx context.Context, cycleState *types.CycleState, request *types.LLMRequest, pods []types.Pod) map[types.Pod]float64 {
182182
// pre score step, hashing prompt and find longest prefix match.
183-
hashes := hashPrompt(ctx, request, p.config.HashBlockSize, p.config.MaxPrefixBlocksToMatch)
183+
hashes := hashPrompt(ctx, request, p.config.BlockSize, p.config.MaxPrefixBlocksToMatch)
184184
state := &SchedulingContextState{
185185
PrefixHashes: hashes,
186186
PrefixCacheServers: p.matchLongestPrefix(ctx, hashes),
@@ -231,7 +231,7 @@ func (p *Plugin) PreRequest(ctx context.Context, request *types.LLMRequest, sche
231231

232232
total := len(state.PrefixHashes)
233233
matchLen := state.PrefixCacheServers[ServerID(targetPod.NamespacedName)]
234-
metrics.RecordPrefixCacheMatch(matchLen*p.config.HashBlockSize, total*p.config.HashBlockSize)
234+
metrics.RecordPrefixCacheMatch(matchLen*p.config.BlockSize, total*p.config.BlockSize)
235235
}
236236

237237
// matchLongestPrefix returns a map of servers and length of prefix that each server caches.

pkg/epp/scheduling/framework/plugins/multi/prefix/plugin_test.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ import (
3535

3636
func TestPrefixPluginCompletion(t *testing.T) {
3737
config := Config{
38-
HashBlockSize: 4,
38+
BlockSize: 4,
3939
MaxPrefixBlocksToMatch: DefaultMaxPrefixBlocks,
4040
LRUCapacityPerServer: DefaultLRUCapacityPerServer,
4141
}
@@ -201,7 +201,7 @@ func TestPrefixPluginCompletion(t *testing.T) {
201201

202202
func TestPrefixPluginChatCompletions(t *testing.T) {
203203
config := Config{
204-
HashBlockSize: 4,
204+
BlockSize: 4,
205205
MaxPrefixBlocksToMatch: DefaultMaxPrefixBlocks,
206206
LRUCapacityPerServer: DefaultLRUCapacityPerServer,
207207
}
@@ -235,7 +235,7 @@ func TestPrefixPluginChatCompletions(t *testing.T) {
235235

236236
func TestPrefixPluginChatCompletionsGrowth(t *testing.T) {
237237
config := Config{
238-
HashBlockSize: 8, // Use larger block size for more predictable JSON marshaling
238+
BlockSize: 8, // Use larger block size for more predictable JSON marshaling
239239
MaxPrefixBlocksToMatch: DefaultMaxPrefixBlocks,
240240
LRUCapacityPerServer: DefaultLRUCapacityPerServer,
241241
}
@@ -349,7 +349,7 @@ func BenchmarkPrefixPluginStress(b *testing.B) {
349349
blockSize := 4
350350
maxPrefixBlocks := 50000
351351
config := Config{
352-
HashBlockSize: blockSize,
352+
BlockSize: blockSize,
353353
MaxPrefixBlocksToMatch: maxPrefixBlocks,
354354
LRUCapacityPerServer: DefaultLRUCapacityPerServer,
355355
}
@@ -418,7 +418,7 @@ func BenchmarkPrefixPluginChatCompletionsStress(b *testing.B) {
418418
blockSize := 8
419419
maxPrefixBlocks := 50000
420420
config := Config{
421-
HashBlockSize: blockSize,
421+
BlockSize: blockSize,
422422
MaxPrefixBlocksToMatch: maxPrefixBlocks,
423423
LRUCapacityPerServer: DefaultLRUCapacityPerServer,
424424
}

site-src/guides/epp-configuration/config-text.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ kind: EndpointPickerConfig
8585
plugins:
8686
- type: prefix-cache-scorer
8787
parameters:
88-
hashBlockSize: 5
88+
blockSize: 5
8989
maxPrefixBlocksToMatch: 256
9090
lruCapacityPerServer: 31250
9191
schedulingProfiles:
@@ -152,7 +152,7 @@ spec:
152152
plugins:
153153
- type: prefix-cache-scorer
154154
parameters:
155-
hashBlockSize: 5
155+
blockSize: 5
156156
maxPrefixBlocksToMatch: 256
157157
lruCapacityPerServer: 31250
158158
schedulingProfiles:
@@ -171,7 +171,7 @@ kind: EndpointPickerConfig
171171
plugins:
172172
- type: prefix-cache-scorer
173173
parameters:
174-
hashBlockSize: 5
174+
blockSize: 5
175175
maxPrefixBlocksToMatch: 256
176176
lruCapacityPerServer: 31250
177177
- type: single-profile-handler
@@ -201,7 +201,7 @@ Scores pods based on the amount of the prompt is believed to be in the pod's KvC
201201

202202
- *Type*: prefix-cache-scorer
203203
- *Parameters*:
204-
- `hashBlockSize` specified the size of the blocks to break up the input prompt when
204+
- `blockSize` specified the size of the blocks to break up the input prompt when
205205
calculating the block hashes. If not specified defaults to `64`
206206
- `maxPrefixBlocksToMatch` specifies the maximum number of prefix blocks to match. If
207207
not specified defaults to `256`

site-src/guides/epp-configuration/prefix-aware.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ Like any other plugins, the prefix cache aware plugin can be enabled/disabled vi
1414

1515
The prefix cache plugin exposes the following advanced configuration parameters:
1616

17-
* `hashBlockSize`: The plugin matches prefixes in the unit of blocks. This is the size
17+
* `blockSize`: The plugin matches prefixes in the unit of blocks. This is the size
1818
of each block in number of bytes. vLLM default block size is 16 tokens. Assume 4 characters per token, the default
1919
is set to 64 in EPP. The default is recommended unless performance is critical for use cases with
2020
extremely long inputs.

test/testdata/configloader_1_test.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ plugins:
99
type: test-profile-handler
1010
- type: test-two
1111
parameters:
12-
hashBlockSize: 32
12+
blockSize: 32
1313
- name: testPicker
1414
type: test-picker
1515

0 commit comments

Comments
 (0)