@@ -46,6 +46,7 @@ def ensure_system_prompt(messages: list[dict[str, Any]],
46
46
"model" :
47
47
"NousResearch/Hermes-3-Llama-3.1-8B" ,
48
48
"arguments" : [
49
+ "--enforce-eager" , "--no-enable-prefix-caching" ,
49
50
"--tool-call-parser" , "hermes" , "--chat-template" ,
50
51
str (VLLM_PATH / "examples/tool_chat_template_hermes.jinja" )
51
52
],
@@ -60,6 +61,7 @@ def ensure_system_prompt(messages: list[dict[str, Any]],
60
61
"model" :
61
62
"meta-llama/Meta-Llama-3.1-8B-Instruct" ,
62
63
"arguments" : [
64
+ "--enforce-eager" , "--no-enable-prefix-caching" ,
63
65
"--tool-call-parser" , "llama3_json" , "--chat-template" ,
64
66
str (VLLM_PATH / "examples/tool_chat_template_llama3.1_json.jinja" )
65
67
],
@@ -70,6 +72,7 @@ def ensure_system_prompt(messages: list[dict[str, Any]],
70
72
"model" :
71
73
"meta-llama/Llama-3.2-3B-Instruct" ,
72
74
"arguments" : [
75
+ "--enforce-eager" , "--no-enable-prefix-caching" ,
73
76
"--tool-call-parser" , "llama3_json" , "--chat-template" ,
74
77
str (VLLM_PATH / "examples/tool_chat_template_llama3.2_json.jinja" )
75
78
],
@@ -80,6 +83,7 @@ def ensure_system_prompt(messages: list[dict[str, Any]],
80
83
"model" :
81
84
"mistralai/Mistral-7B-Instruct-v0.3" ,
82
85
"arguments" : [
86
+ "--enforce-eager" , "--no-enable-prefix-caching" ,
83
87
"--tool-call-parser" , "mistral" , "--chat-template" ,
84
88
str (VLLM_PATH / "examples/tool_chat_template_mistral.jinja" ),
85
89
"--ignore-patterns=\" consolidated.safetensors\" "
@@ -111,22 +115,28 @@ def ensure_system_prompt(messages: list[dict[str, Any]],
111
115
"model" :
112
116
"ibm-granite/granite-3.0-8b-instruct" ,
113
117
"arguments" : [
118
+ "--enforce-eager" , "--no-enable-prefix-caching" ,
114
119
"--tool-call-parser" , "granite" , "--chat-template" ,
115
120
str (VLLM_PATH / "examples/tool_chat_template_granite.jinja" )
116
121
],
117
122
},
118
123
"granite-3.1-8b" : {
119
- "model" : "ibm-granite/granite-3.1-8b-instruct" ,
124
+ "model" :
125
+ "ibm-granite/granite-3.1-8b-instruct" ,
120
126
"arguments" : [
127
+ "--enforce-eager" ,
128
+ "--no-enable-prefix-caching" ,
121
129
"--tool-call-parser" ,
122
130
"granite" ,
123
131
],
124
- "supports_parallel" : True ,
132
+ "supports_parallel" :
133
+ True ,
125
134
},
126
135
"internlm" : {
127
136
"model" :
128
137
"internlm/internlm2_5-7b-chat" ,
129
138
"arguments" : [
139
+ "--enforce-eager" , "--no-enable-prefix-caching" ,
130
140
"--tool-call-parser" , "internlm" , "--chat-template" ,
131
141
str (VLLM_PATH /
132
142
"examples/tool_chat_template_internlm2_tool.jinja" ),
@@ -139,6 +149,7 @@ def ensure_system_prompt(messages: list[dict[str, Any]],
139
149
"model" :
140
150
"Team-ACE/ToolACE-8B" ,
141
151
"arguments" : [
152
+ "--enforce-eager" , "--no-enable-prefix-caching" ,
142
153
"--tool-call-parser" , "pythonic" , "--chat-template" ,
143
154
str (VLLM_PATH / "examples/tool_chat_template_toolace.jinja" )
144
155
],
0 commit comments