Skip to content

Commit 3408e47

Browse files
ApostaCrobertgshaw2-redhatFlechmantlrmchlsmth
authored
[P/D][V1] KV Connector API V1 (vllm-project#15960)
Signed-off-by: ApostaC <[email protected]> Signed-off-by: [email protected] <[email protected]> Signed-off-by: remi <[email protected]> Co-authored-by: [email protected] <[email protected]> Co-authored-by: Robert Shaw <[email protected]> Co-authored-by: Rémi Delacourt <[email protected]> Co-authored-by: Tyler Michael Smith <[email protected]>
1 parent 0377b83 commit 3408e47

File tree

24 files changed

+1377
-83
lines changed

24 files changed

+1377
-83
lines changed
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
3+
from vllm import LLM, SamplingParams
4+
from vllm.config import KVTransferConfig
5+
6+
# Read prompts from output.txt
7+
prompts = []
8+
try:
9+
with open("output.txt") as f:
10+
for line in f:
11+
prompts.append(line.strip())
12+
print(f"Loaded {len(prompts)} prompts from output.txt")
13+
except FileNotFoundError:
14+
print("Error: output.txt file not found")
15+
exit(-1)
16+
17+
sampling_params = SamplingParams(temperature=0, top_p=0.95, max_tokens=10)
18+
19+
llm = LLM(
20+
model="meta-llama/Llama-3.2-1B-Instruct",
21+
enforce_eager=True,
22+
gpu_memory_utilization=0.8,
23+
max_num_batched_tokens=64,
24+
max_num_seqs=16,
25+
kv_transfer_config=KVTransferConfig.from_cli(
26+
'{"kv_connector":"SharedStorageConnector","kv_role":"kv_both",'
27+
'"kv_connector_extra_config": {"shared_storage_path": "local_storage"}}'
28+
)) #, max_model_len=2048, max_num_batched_tokens=2048)
29+
30+
# 1ST generation (prefill instance)
31+
outputs = llm.generate(prompts, sampling_params)
32+
33+
for output in outputs:
34+
prompt = output.prompt
35+
generated_text = output.outputs[0].text
36+
print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
3+
from vllm import LLM, SamplingParams
4+
from vllm.config import KVTransferConfig
5+
6+
context = "Hi " * 1000
7+
context2 = "Hey " * 500
8+
prompts = [
9+
context + "Hello, my name is",
10+
context + "The capital of France is",
11+
context2 + "Your name is",
12+
context2 + "The capital of China is",
13+
]
14+
15+
sampling_params = SamplingParams(temperature=0, top_p=0.95, max_tokens=1)
16+
17+
llm = LLM(model="meta-llama/Llama-3.2-1B-Instruct",
18+
enforce_eager=True,
19+
gpu_memory_utilization=0.8,
20+
kv_transfer_config=KVTransferConfig.from_cli(
21+
'{"kv_connector":"SharedStorageConnector","kv_role":"kv_both", '
22+
'"kv_connector_extra_config": '
23+
'{"shared_storage_path": "local_storage"}}')
24+
) #, max_model_len=2048, max_num_batched_tokens=2048)
25+
26+
# 1ST generation (prefill instance)
27+
outputs = llm.generate(
28+
prompts,
29+
sampling_params,
30+
)
31+
32+
new_prompts = []
33+
for output in outputs:
34+
prompt = output.prompt
35+
generated_text = output.outputs[0].text
36+
new_prompts.append(prompt + generated_text)
37+
print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
38+
39+
# Write new_prompts to output.txt
40+
with open("output.txt", "w") as f:
41+
for prompt in new_prompts:
42+
f.write(prompt + "\n")
43+
print(f"Saved {len(new_prompts)} prompts to output.txt")
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
rm -rf local_storage/
2+
rm output.txt
3+
4+
VLLM_ENABLE_V1_MULTIPROCESSING=0 CUDA_VISIBLE_DEVICES=0 python3 prefill_example.py
5+
VLLM_ENABLE_V1_MULTIPROCESSING=0 CUDA_VISIBLE_DEVICES=0 python3 decode_example.py

0 commit comments

Comments
 (0)