diff --git a/torchchat/generate.py b/torchchat/generate.py index a8501328e..4e0980d33 100644 --- a/torchchat/generate.py +++ b/torchchat/generate.py @@ -103,7 +103,11 @@ def encode_dialog_prompt(self, dialog) -> List[int]: tokens = self.tokenizer.encode(f"{B_INST} ") first_message = True # Bool to handle placing the B_INST token. Behavior is weird - the system prompt should have the B_INST, but not the first user message. All following user messages *should* have it. Also, if there is no system prompt, then the user message should have it. for message in dialog: - content = message["content"].strip() + if isinstance(message["content"], list): + content = message["content"][0]["text"] + else: + content = message["content"] + content = content.strip() if message["role"] == "system": encoded = self.tokenizer.encode(f"{B_SYS}\n{content}\n{E_SYS}") first_message = False diff --git a/torchchat/usages/openai_api.py b/torchchat/usages/openai_api.py index f2d68881a..72a6dfc9b 100644 --- a/torchchat/usages/openai_api.py +++ b/torchchat/usages/openai_api.py @@ -376,7 +376,7 @@ def chunked_completion(self, completion_request: CompletionRequest): encoded_prompt=encoded, temperature=float(completion_request.temperature), chat_mode=False, - sequential_prefill=False, + sequential_prefill=True, ) def callback(x, *, done_generating=False):