Skip to content

Commit 17a88b5

Browse files
committed
Fixed_test_plugin.py
Signed-off-by: slokesha <[email protected]>
1 parent 596d335 commit 17a88b5

File tree

1 file changed

+27
-23
lines changed

1 file changed

+27
-23
lines changed

examples/test_plugin.py

Lines changed: 27 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -2,28 +2,32 @@
22

33
from vllm import LLM, SamplingParams
44

5-
os.environ["VLLM_SKIP_WARMUP"] = "true"
6-
prompts = [
7-
"Hello, my name is",
8-
"0.999 compares to 0.9 is ",
9-
"The capital of France is",
10-
"The future of AI is",
11-
]
12-
sampling_params = SamplingParams(temperature=0, max_tokens=50)
13-
model = "/mnt/weka/llm/Qwen3/Qwen3-30B-A3B/"
14-
# model = "/mnt/weka/llm/Qwen3/Qwen3-32B/"
15-
# model = "meta-llama/Llama-3.2-1B-Instruct"
16-
# model = "/mnt/weka/llm/DeepSeek-V2-Lite-Chat/"
17-
# model = "/mnt/weka/data/mlperf_models/Mixtral-8x7B-Instruct-v0.1"
18-
# model = "/mnt/weka/data/pytorch/llama3.1/Meta-Llama-3.1-8B/"
19-
kwargs = {"tensor_parallel_size": 1}
20-
if os.path.basename(model) in ["Qwen3-30B-A3B", "DeepSeek-V2-Lite-Chat"]:
21-
kwargs["enable_expert_parallel"] = True
22-
llm = LLM(model=model, max_model_len=4096, trust_remote_code=True, **kwargs)
5+
def main():
6+
os.environ["VLLM_SKIP_WARMUP"] = "true"
7+
prompts = [
8+
"Hello, my name is",
9+
"0.999 compares to 0.9 is ",
10+
"The capital of France is",
11+
"The future of AI is",
12+
]
13+
sampling_params = SamplingParams(temperature=0, max_tokens=50)
14+
model = "/mnt/weka/llm/Qwen3/Qwen3-30B-A3B/"
15+
# model = "/mnt/weka/llm/Qwen3/Qwen3-32B/"
16+
# model = "meta-llama/Llama-3.2-1B-Instruct"
17+
# model = "/mnt/weka/llm/DeepSeek-V2-Lite-Chat/"
18+
# model = "/mnt/weka/data/mlperf_models/Mixtral-8x7B-Instruct-v0.1"
19+
# model = "/mnt/weka/data/pytorch/llama3.1/Meta-Llama-3.1-8B/"
20+
kwargs = {"tensor_parallel_size": 2}
21+
if os.path.basename(model) in ["Qwen3-30B-A3B", "DeepSeek-V2-Lite-Chat"]:
22+
kwargs["enable_expert_parallel"] = True
23+
llm = LLM(model=model, max_model_len=4096, trust_remote_code=True, **kwargs)
2324

24-
outputs = llm.generate(prompts, sampling_params)
25+
outputs = llm.generate(prompts, sampling_params)
2526

26-
for output in outputs:
27-
prompt = output.prompt
28-
generated_text = output.outputs[0].text
29-
print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
27+
for output in outputs:
28+
prompt = output.prompt
29+
generated_text = output.outputs[0].text
30+
print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
31+
32+
if __name__ == "__main__":
33+
main()

0 commit comments

Comments
 (0)