[Bugfix] Allow prefill of assistant response when using mistral_common (#9446)

sasha0552 · web-flow · commit 5e443b594fab · 2024-10-17T15:06:37.000Z
diff --git a/vllm/transformers_utils/tokenizers/mistral.py b/vllm/transformers_utils/tokenizers/mistral.py
@@ -166,6 +166,10 @@ def apply_chat_template(self,
                             tools: Optional[Dict[str, Any]] = None,
                             **kwargs) -> List[int]:
 
+        last_message = messages[-1]
+        if last_message["role"] == "assistant":
+            last_message["prefix"] = True
+
         request = ChatCompletionRequest(messages=messages,
                                         tools=tools)  # type: ignore[type-var]
         encoded = self.mistral.encode_chat_completion(request)