Skip to content

Commit 3849120

Browse files
committed
Merge branch 'llm-d:main' into unit-tests
Signed-off-by: sagiahrac <[email protected]> Signed-off-by: Sage Ahrac <[email protected]>
2 parents 16b61a9 + d5a8ca8 commit 3849120

File tree

14 files changed

+1775
-339
lines changed

14 files changed

+1775
-339
lines changed

Makefile

Lines changed: 63 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -75,19 +75,70 @@ verify-boilerplate: $(TOOLS_DIR)/verify_boilerplate.py
7575
$(TOOLS_DIR)/verify_boilerplate.py --boilerplate-dir=hack/boilerplate --skip docs
7676

7777
.PHONY: unit-test
78-
unit-test: download-tokenizer download-zmq
78+
unit-test: download-tokenizer detect-python install-python-deps download-zmq
7979
@printf "\033[33;1m==== Running unit tests ====\033[0m\n"
8080
go test -ldflags="$(LDFLAGS)" ./pkg/...
81+
@printf "\033[33;1m==== Running chat template tests ====\033[0m\n"
82+
go test -tags=exclude -v -ldflags="$(LDFLAGS)" ./pkg/preprocessing/chat_completions_template/
83+
@printf "\033[33;1m==== Running chat template benchmarks ====\033[0m\n"
84+
go test -tags=exclude -bench=. -benchmem -ldflags="$(LDFLAGS)" ./pkg/preprocessing/chat_completions_template/
8185

8286
.PHONY: e2e-test
83-
e2e-test: download-tokenizer download-zmq
84-
@printf "\033[33;1m==== Running unit tests ====\033[0m\n"
87+
e2e-test: download-tokenizer detect-python install-python-deps download-zmq
88+
@printf "\033[33;1m==== Running e2e tests ====\033[0m\n"
8589
go test -v -ldflags="$(LDFLAGS)" ./tests/...
8690

8791
##@ Build
8892

93+
# Python detection and build configuration
94+
PYTHON_VERSION := 3.11.7
95+
PYTHON_DIR = build/python-$(PYTHON_VERSION)
96+
97+
.PHONY: detect-python
98+
detect-python:
99+
@printf "\033[33;1m==== Detecting Python installation ====\033[0m\n"
100+
@if python3 -c "import sys; print(sys.version)" >/dev/null 2>&1; then \
101+
echo "Using system Python"; \
102+
PYTHON_PATH=$$(python3 -c "import sys; print(sys.prefix)"); \
103+
PYTHON_VERSION=$$(python3 -c "import sys; print(f'{sys.version_info.major}.{sys.version_info.minor}')"); \
104+
sed -i.bak "s|{{PYTHON_PATH}}|$$PYTHON_PATH|g; s|{{PYTHON_VERSION}}|$$PYTHON_VERSION|g" \
105+
pkg/preprocessing/chat_completions_template/cgo_functions.go; \
106+
rm -f pkg/preprocessing/chat_completions_template/cgo_functions.go.bak; \
107+
else \
108+
echo "System Python not found, downloading..."; \
109+
$(MAKE) download-python; \
110+
fi
111+
112+
.PHONY: download-python
113+
download-python:
114+
@printf "\033[33;1m==== Downloading Python $(PYTHON_VERSION) ====\033[0m\n"
115+
@mkdir -p build
116+
@if [ ! -d "$(PYTHON_DIR)" ]; then \
117+
if [ "$(TARGETOS)" = "darwin" ]; then \
118+
curl -L https://www.python.org/ftp/python/$(PYTHON_VERSION)/python-$(PYTHON_VERSION)-macos11.pkg -o build/python.pkg; \
119+
sudo installer -pkg build/python.pkg -target /; \
120+
elif [ "$(TARGETOS)" = "linux" ]; then \
121+
curl -L https://www.python.org/ftp/python/$(PYTHON_VERSION)/Python-$(PYTHON_VERSION).tgz -o build/python.tgz; \
122+
tar -xzf build/python.tgz -C build/; \
123+
cd build/Python-$(PYTHON_VERSION) && ./configure --prefix=$(PWD)/$(PYTHON_DIR) && make && make install; \
124+
fi; \
125+
fi
126+
@# Update CGo flags with downloaded Python path
127+
@sed -i.bak "s|{{PYTHON_PATH}}|$(PWD)/$(PYTHON_DIR)|g; s|{{PYTHON_VERSION}}|$(PYTHON_VERSION)|g" \
128+
pkg/preprocessing/chat_completions_template/cgo_functions.go
129+
@rm -f pkg/preprocessing/chat_completions_template/cgo_functions.go.bak
130+
131+
.PHONY: install-python-deps
132+
install-python-deps: detect-python
133+
@printf "\033[33;1m==== Installing Python dependencies ====\033[0m\n"
134+
@if [ -d "$(PYTHON_DIR)" ]; then \
135+
$(PYTHON_DIR)/bin/pip install -r pkg/preprocessing/chat_completions_template/requirements.txt; \
136+
else \
137+
python3 -m pip install -r pkg/preprocessing/chat_completions_template/requirements.txt; \
138+
fi
139+
89140
.PHONY: build
90-
build: check-go download-tokenizer download-zmq
141+
build: check-go download-tokenizer detect-python install-python-deps download-zmq
91142
@printf "\033[33;1m==== Building ====\033[0m\n"
92143
go build -ldflags="$(LDFLAGS)" -o bin/$(PROJECT_NAME) examples/kv_cache_index/main.go
93144

@@ -351,6 +402,14 @@ print-namespace: ## Print the current namespace
351402
print-project-name: ## Print the current project name
352403
@echo "$(PROJECT_NAME)"
353404

405+
.PHONY: clean
406+
clean: ## Clean build artifacts and restore placeholders
407+
@printf "\033[33;1m==== Cleaning build artifacts ====\033[0m\n"
408+
@rm -rf build/
409+
@# Restore original placeholders
410+
@git checkout pkg/preprocessing/chat_completions_template/cgo_functions.go
411+
@echo "✅ Build artifacts cleaned and placeholders restored"
412+
354413
.PHONY: install-hooks
355414
install-hooks: ## Install git hooks
356415
git config core.hooksPath hooks

examples/kv_events/offline/main.go

Lines changed: 16 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@ limitations under the License.
1717
package main
1818

1919
import (
20-
"bytes"
2120
"context"
2221
_ "embed"
2322
"fmt"
@@ -166,22 +165,21 @@ func runEventsDemo(ctx context.Context, kvCacheIndexer *kvcache.Indexer, publish
166165
// Simulate vLLM engine publishing BlockStored events
167166
logger.Info("@@@ Simulating vLLM engine publishing BlockStored events...")
168167

169-
var blockStoredPayload bytes.Buffer
170-
enc := msgpack.NewEncoder(&blockStoredPayload)
171-
enc.UseArrayEncodedStructs(true)
168+
blockStoredEvent := kvevents.BlockStored{
169+
BlockHashes: testdata.PromptHashes,
170+
ParentBlockHash: nil,
171+
TokenIds: []uint32{1, 2, 3},
172+
BlockSize: 256,
173+
LoraID: nil,
174+
}
172175

173176
//nolint // won't fail
174-
enc.Encode(&kvevents.BlockStoredEvent{
175-
TypeField: "BlockStored",
176-
BlockStored: &kvevents.BlockStored{BlockHashes: testdata.PromptHashes},
177-
})
178-
179-
dpRank := 0
177+
blockStoredPayload, _ := msgpack.Marshal(blockStoredEvent.ToTaggedUnion())
180178

181179
eventBatch := kvevents.EventBatch{
182180
TS: float64(time.Now().UnixNano()) / 1e9,
183-
Events: []msgpack.RawMessage{blockStoredPayload.Bytes()},
184-
DataParallelRank: &dpRank,
181+
Events: []msgpack.RawMessage{blockStoredPayload},
182+
DataParallelRank: nil,
185183
}
186184

187185
topic := fmt.Sprintf("kv@vllm-pod1@%s", testdata.ModelName)
@@ -204,20 +202,17 @@ func runEventsDemo(ctx context.Context, kvCacheIndexer *kvcache.Indexer, publish
204202
// Simulate removing some blocks
205203
logger.Info("@@@ Simulating vLLM engine removing some blocks...")
206204

207-
var blockRemovedPayload bytes.Buffer
208-
enc = msgpack.NewEncoder(&blockRemovedPayload)
209-
enc.UseArrayEncodedStructs(true)
205+
blockRemovedEvent := kvevents.BlockRemoved{
206+
BlockHashes: testdata.PromptHashes[2:], // Remove last blocks
207+
}
210208

211209
//nolint // won't fail
212-
enc.Encode(&kvevents.BlockRemovedEvent{
213-
TypeField: "BlockRemoved",
214-
BlockRemoved: &kvevents.BlockRemoved{BlockHashes: testdata.PromptHashes[2:]},
215-
})
210+
blockRemovedPayload, _ := msgpack.Marshal(blockRemovedEvent.ToTaggedUnion())
216211

217212
removeEventBatch := kvevents.EventBatch{
218213
TS: float64(time.Now().UnixNano()) / 1e9,
219-
Events: []msgpack.RawMessage{blockRemovedPayload.Bytes()},
220-
DataParallelRank: &dpRank,
214+
Events: []msgpack.RawMessage{blockRemovedPayload},
215+
DataParallelRank: nil,
221216
}
222217

223218
if err := publisher.PublishEvent(ctx, topic, removeEventBatch); err != nil {

pkg/kvcache/kvevents/events.go

Lines changed: 34 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,22 @@
11
package kvevents
22

33
import (
4-
"fmt"
5-
64
"github.com/vmihailenco/msgpack/v5"
75
)
86

7+
const (
8+
// BlockStoredEventTag is the tag for BlockStored events.
9+
BlockStoredEventTag = "BlockStored"
10+
// BlockRemovedEventTag is the tag for BlockRemoved events.
11+
BlockRemovedEventTag = "BlockRemoved"
12+
// AllBlocksClearedEventTag is the tag for AllBlocksCleared events.
13+
AllBlocksClearedEventTag = "AllBlocksCleared"
14+
)
15+
916
// event is a marker interface for KV-cache events.
1017
type event interface {
1118
isEvent()
19+
ToTaggedUnion() []any
1220
}
1321

1422
// EventBatch represents a batch of events.
@@ -20,37 +28,6 @@ type EventBatch struct {
2028
DataParallelRank *int `msgpack:",omitempty"`
2129
}
2230

23-
// DecodeMsgpack allows 2- or 3-element array-encoded batches.
24-
func (e *EventBatch) DecodeMsgpack(decoder *msgpack.Decoder) error {
25-
length, err := decoder.DecodeArrayLen()
26-
if err != nil {
27-
return err
28-
}
29-
if length < 2 {
30-
return fmt.Errorf("EventBatch: expected at least 2 fields, got %d", length)
31-
}
32-
if e.TS, err = decoder.DecodeFloat64(); err != nil {
33-
return err
34-
}
35-
if err := decoder.Decode(&e.Events); err != nil {
36-
return err
37-
}
38-
if length > 2 {
39-
var rank int
40-
if err := decoder.Decode(&rank); err != nil {
41-
return err
42-
}
43-
e.DataParallelRank = &rank
44-
}
45-
// skip any extra
46-
for i := 3; i < length; i++ {
47-
if err := decoder.Skip(); err != nil {
48-
return err
49-
}
50-
}
51-
return nil
52-
}
53-
5431
// BlockStored event.
5532
type BlockStored struct {
5633
_ struct{} `msgpack:",array"`
@@ -61,31 +38,43 @@ type BlockStored struct {
6138
LoraID *int
6239
}
6340

64-
func (BlockStored) isEvent() {}
65-
66-
type BlockStoredEvent struct {
67-
_ struct{} `msgpack:",array"`
68-
TypeField string
69-
*BlockStored
41+
func (bs BlockStored) ToTaggedUnion() []any {
42+
return []any{
43+
BlockStoredEventTag,
44+
bs.BlockHashes,
45+
bs.ParentBlockHash,
46+
bs.TokenIds,
47+
bs.BlockSize,
48+
bs.LoraID,
49+
}
7050
}
7151

52+
func (BlockStored) isEvent() {}
53+
7254
// BlockRemoved event.
7355
type BlockRemoved struct {
7456
_ struct{} `msgpack:",array"`
7557
BlockHashes []uint64
7658
}
7759

78-
func (BlockRemoved) isEvent() {}
79-
80-
type BlockRemovedEvent struct {
81-
_ struct{} `msgpack:",array"`
82-
TypeField string
83-
*BlockRemoved
60+
func (br BlockRemoved) ToTaggedUnion() []any {
61+
return []any{
62+
BlockRemovedEventTag,
63+
br.BlockHashes,
64+
}
8465
}
8566

67+
func (BlockRemoved) isEvent() {}
68+
8669
// AllBlocksCleared event.
8770
type AllBlocksCleared struct {
8871
_ struct{} `msgpack:",array"`
8972
}
9073

74+
func (ac AllBlocksCleared) ToTaggedUnion() []any {
75+
return []any{
76+
AllBlocksClearedEventTag,
77+
}
78+
}
79+
9180
func (AllBlocksCleared) isEvent() {}

pkg/kvcache/kvevents/pool.go

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -193,13 +193,13 @@ func (p *Pool) processEvent(ctx context.Context, msg *Message) {
193193
var unmarshalErr error
194194
switch tag {
195195
case "BlockStored":
196-
var bs BlockStoredEvent
197-
unmarshalErr = msgpack.Unmarshal(rawEvent, &bs)
198-
event = bs.BlockStored
196+
var bs BlockStored
197+
unmarshalErr = msgpack.Unmarshal(payloadBytes, &bs)
198+
event = bs
199199
case "BlockRemoved":
200-
var br BlockRemovedEvent
201-
unmarshalErr = msgpack.Unmarshal(rawEvent, &br)
202-
event = br.BlockRemoved
200+
var br BlockRemoved
201+
unmarshalErr = msgpack.Unmarshal(payloadBytes, &br)
202+
event = br
203203
case "AllBlocksCleared":
204204
var ac AllBlocksCleared
205205
unmarshalErr = msgpack.Unmarshal(payloadBytes, &ac)

pkg/preprocessing/chat_completions_template/README.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -62,16 +62,16 @@ The templating process (steps 1.1-1.4) handles the conversion from structured re
6262
└── cgo_functions.go:NewChatTemplateCGoWrapper()
6363
└── Creates ChatTemplateCGoWrapper struct with initialized=false
6464
65-
1.2. **Template Fetching**: wrapper.GetModelChatTemplate(getReq)
66-
├── cgo_functions.go:GetModelChatTemplate(req)
65+
1.2. **Template Fetching**: wrapper.GetModelChatTemplate(ctx, getReq)
66+
├── cgo_functions.go:GetModelChatTemplate(ctx, req)
6767
│ ├── Initialize() Python interpreter via CGO
6868
│ ├── executePythonCode() - **CGO Binding** to Python
6969
│ └── **Python Wrapper**: chat_template_wrapper.py:get_model_chat_template()
7070
│ └── Uses Hugging Face AutoTokenizer to fetch model template
7171
└── Returns: (template, template_vars)
7272
73-
1.3. **Template Rendering**: wrapper.RenderChatTemplate(req)
74-
├── cgo_functions.go:RenderChatTemplate(req)
73+
1.3. **Template Rendering**: wrapper.RenderChatTemplate(ctx, req)
74+
├── cgo_functions.go:RenderChatTemplate(ctx, req)
7575
│ ├── Initialize() Python interpreter via CGO (if not already done)
7676
│ ├── executePythonCode() - **CGO Binding** to Python
7777
│ └── **Python Wrapper**: chat_template_wrapper.py:render_jinja_template()

0 commit comments

Comments
 (0)