@@ -379,7 +379,8 @@ def hf_autotokenizer_to_chat_completion_handler(
379379
380380
381381def hf_tokenizer_config_to_chat_formatter (
382- tokenizer_config : Dict [str , Any ]
382+ tokenizer_config : Dict [str , Any ],
383+ add_generation_prompt : bool = True ,
383384) -> ChatFormatter :
384385 assert isinstance (tokenizer_config , dict )
385386
@@ -401,31 +402,34 @@ def hf_tokenizer_config_to_chat_formatter(
401402 lstrip_blocks = True ,
402403 ).from_string (chat_template )
403404
404- def format_autotokenizer (
405+ def format_tokenizer_config (
405406 messages : List [llama_types .ChatCompletionRequestMessage ],
406407 ** kwargs : Any ,
407408 ) -> ChatFormatterResponse :
408409 # TODO: veryify this is correct
409410 # Add a blank assistant message to the end of the messages to prompt the model to generate a response
410- prompt = env . render (
411- messages = [
411+ if add_generation_prompt :
412+ messages = [
412413 * messages ,
413414 llama_types .ChatCompletionRequestAssistantMessage (
414415 role = "assistant" , content = ""
415416 ),
416- ],
417+ ]
418+ prompt = env .render (
419+ messages = messages ,
417420 bos_token = bos_token ,
418421 eos_token = eos_token ,
419422 )
420- return ChatFormatterResponse (prompt = prompt , stop = eos_token )
423+ return ChatFormatterResponse (prompt = prompt , stop = [ eos_token , bos_token ] )
421424
422- return format_autotokenizer
425+ return format_tokenizer_config
423426
424427
425428def hf_tokenizer_config_to_chat_completion_handler (
426429 tokenizer_config : Dict [str , Any ],
430+ add_generation_prompt : bool = True ,
427431) -> LlamaChatCompletionHandler :
428- chat_formatter = hf_tokenizer_config_to_chat_formatter (tokenizer_config )
432+ chat_formatter = hf_tokenizer_config_to_chat_formatter (tokenizer_config , add_generation_prompt = add_generation_prompt )
429433 return chat_formatter_to_chat_completion_handler (chat_formatter )
430434
431435
0 commit comments