Skip to content

Commit a16e294

Browse files
committed
fix encoding of user
1 parent eb2fb24 commit a16e294

File tree

1 file changed

+10
-7
lines changed

1 file changed

+10
-7
lines changed

models/deepseek.cpp

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,10 @@ namespace chatllm::deepseek::v1
3939
{
4040
Tokenizer *tok = dynamic_cast<Tokenizer *>(tokenizer);
4141
append_ai_opening(round_idx, ids);
42-
tok->encode(ai, ids, false, true);
42+
43+
std::ostringstream oss_ai;
44+
oss_ai << " " << ai;
45+
tok->encode(oss_ai.str(), ids, false, true);
4346
}
4447

4548
void ChatHistoryEncoder::append_sys_prompt(std::vector<int> &ids) const
@@ -58,25 +61,25 @@ namespace chatllm::deepseek::v1
5861
void ChatHistoryEncoder::append_user(int round_idx, const std::string &user, std::vector<int> &ids) const
5962
{
6063
Tokenizer *tok = dynamic_cast<Tokenizer *>(tokenizer);
61-
std::ostringstream oss_prompt;
6264

63-
append_ai_opening(round_idx, ids);
65+
append_user_opening(round_idx, ids);
6466

67+
std::ostringstream oss_prompt;
68+
oss_prompt << " ";
6569
oss_prompt << user << "\n\n";
66-
auto text = oss_prompt.str();
67-
tok->encode(text, ids, false, false);
70+
tok->encode(oss_prompt.str(), ids, false, false);
6871
}
6972

7073
void ChatHistoryEncoder::append_ai_opening(int round_idx, std::vector<int> &ids) const
7174
{
7275
Tokenizer *tok = dynamic_cast<Tokenizer *>(tokenizer);
73-
tok->encode("Assistant: ", ids, false, false);
76+
tok->encode("Assistant:", ids, false, false);
7477
}
7578

7679
void ChatHistoryEncoder::append_user_opening(int round_idx, std::vector<int> &ids) const
7780
{
7881
Tokenizer *tok = dynamic_cast<Tokenizer *>(tokenizer);
79-
tok->encode("User: ", ids, false, false);
82+
tok->encode("User:", ids, false, false);
8083
}
8184

8285
bool Tokenizer::is_special_id(int id) const

0 commit comments

Comments
 (0)