File tree Expand file tree Collapse file tree 1 file changed +20
-7
lines changed
paddlex/inference/genai/configs Expand file tree Collapse file tree 1 file changed +20
-7
lines changed Original file line number Diff line number Diff line change @@ -45,13 +45,26 @@ def get_config(backend):
4545 cfg ["max-concurrency" ] = 2048
4646 return cfg
4747 elif backend == "vllm" :
48- return {
49- "trust-remote-code" : True ,
50- "gpu-memory-utilization" : 0.5 ,
51- "max-model-len" : 16384 ,
52- "max-num-batched-tokens" : 131072 ,
53- "api-server-count" : 4 ,
54- }
48+ require_deps ("torch" )
49+
50+ import torch
51+
52+ if torch .xpu .is_available ():
53+ return {
54+ "trust-remote-code" : True ,
55+ "max-num-batched-tokens" : 16384 ,
56+ "no-enable-prefix-caching" : True ,
57+ "mm-processor-cache-gb" : 0 ,
58+ "enforce-eager" : True ,
59+ }
60+ else :
61+ return {
62+ "trust-remote-code" : True ,
63+ "gpu-memory-utilization" : 0.5 ,
64+ "max-model-len" : 16384 ,
65+ "max-num-batched-tokens" : 131072 ,
66+ "api-server-count" : 4 ,
67+ }
5568 elif backend == "sglang" :
5669 return {
5770 "trust-remote-code" : True ,
You can’t perform that action at this time.
0 commit comments