33#include " log.h"
44#include " regex-partial.h"
55
6+ #include < algorithm>
7+ #include < cctype>
68#include < optional>
79#include < stdexcept>
810#include < string>
11+ #include < string_view>
912#include < vector>
1013
1114using json = nlohmann::ordered_json;
@@ -166,6 +169,27 @@ void common_chat_msg_parser::consume_literal(const std::string & literal) {
166169}
167170
168171bool common_chat_msg_parser::try_parse_reasoning (const std::string & start_think, const std::string & end_think) {
172+ std::string pending_reasoning_prefix;
173+
174+ if (syntax_.reasoning_format == COMMON_REASONING_FORMAT_NONE) {
175+ return false ;
176+ }
177+
178+ auto set_reasoning_prefix = [&](size_t prefix_pos) {
179+ if (!syntax_.thinking_forced_open || syntax_.reasoning_in_content ) {
180+ return ;
181+ }
182+ if (prefix_pos + start_think.size () > input_.size ()) {
183+ pending_reasoning_prefix.clear ();
184+ return ;
185+ }
186+ // Capture the exact literal that opened the reasoning section so we can
187+ // surface it back to callers. This ensures formats that force the
188+ // reasoning tag open (e.g. DeepSeek R1) retain their original prefix
189+ // instead of dropping it during parsing.
190+ pending_reasoning_prefix = input_.substr (prefix_pos, start_think.size ());
191+ };
192+
169193 auto handle_reasoning = [&](const std::string & reasoning, bool closed) {
170194 auto stripped_reasoning = string_strip (reasoning);
171195 if (stripped_reasoning.empty ()) {
@@ -178,28 +202,116 @@ bool common_chat_msg_parser::try_parse_reasoning(const std::string & start_think
178202 add_content (syntax_.reasoning_format == COMMON_REASONING_FORMAT_DEEPSEEK ? " </think>" : end_think);
179203 }
180204 } else {
205+ if (!pending_reasoning_prefix.empty ()) {
206+ add_reasoning_content (pending_reasoning_prefix);
207+ pending_reasoning_prefix.clear ();
208+ }
181209 add_reasoning_content (stripped_reasoning);
182210 }
183211 };
184- if (syntax_.reasoning_format != COMMON_REASONING_FORMAT_NONE) {
185- if (syntax_.thinking_forced_open || try_consume_literal (start_think)) {
186- if (auto res = try_find_literal (end_think)) {
187- handle_reasoning (res->prelude , /* closed */ true );
188- consume_spaces ();
189- return true ;
190- }
191- auto rest = consume_rest ();
212+
213+ const size_t saved_pos = pos_;
214+ const size_t saved_content_size = result_.content .size ();
215+ const size_t saved_reasoning_size = result_.reasoning_content .size ();
216+
217+ auto restore_state = [&]() {
218+ move_to (saved_pos);
219+ result_.content .resize (saved_content_size);
220+ result_.reasoning_content .resize (saved_reasoning_size);
221+ };
222+
223+ // Allow leading whitespace to be preserved as content when reasoning is present at the start
224+ size_t cursor = pos_;
225+ size_t whitespace_end = cursor;
226+ while (whitespace_end < input_.size () && std::isspace (static_cast <unsigned char >(input_[whitespace_end]))) {
227+ ++whitespace_end;
228+ }
229+
230+ if (whitespace_end >= input_.size ()) {
231+ restore_state ();
232+ if (syntax_.thinking_forced_open ) {
233+ auto rest = input_.substr (saved_pos);
192234 if (!rest.empty ()) {
193235 handle_reasoning (rest, /* closed */ !is_partial ());
194236 }
195- // Allow unclosed thinking tags, for now (https://github.com/ggml-org/llama.cpp/issues/13812, https://github.com/ggml-org/llama.cpp/issues/13877)
196- // if (!syntax_.thinking_forced_open) {
197- // throw common_chat_msg_partial_exception(end_think);
198- // }
237+ move_to (input_.size ());
199238 return true ;
200239 }
240+ return false ;
241+ }
242+
243+ cursor = whitespace_end;
244+ const size_t remaining = input_.size () - cursor;
245+ const size_t start_prefix = std::min (start_think.size (), remaining);
246+ const bool has_start_tag = input_.compare (cursor, start_prefix, start_think, 0 , start_prefix) == 0 ;
247+
248+ if (has_start_tag && start_prefix < start_think.size ()) {
249+ move_to (input_.size ());
250+ return true ;
251+ }
252+
253+ if (has_start_tag) {
254+ if (whitespace_end > pos_) {
255+ add_content (input_.substr (pos_, whitespace_end - pos_));
256+ }
257+ set_reasoning_prefix (cursor);
258+ cursor += start_think.size ();
259+ } else if (syntax_.thinking_forced_open ) {
260+ cursor = whitespace_end;
261+ } else {
262+ restore_state ();
263+ return false ;
264+ }
265+ while (true ) {
266+ if (cursor >= input_.size ()) {
267+ move_to (input_.size ());
268+ return true ;
269+ }
270+
271+ size_t end_pos = input_.find (end_think, cursor);
272+ if (end_pos == std::string::npos) {
273+ std::string_view remaining_view (input_.data () + cursor, input_.size () - cursor);
274+ size_t partial_off = string_find_partial_stop (remaining_view, end_think);
275+ size_t reasoning_end = partial_off == std::string::npos ? input_.size () : cursor + partial_off;
276+ if (reasoning_end > cursor) {
277+ handle_reasoning (input_.substr (cursor, reasoning_end - cursor), /* closed */ partial_off == std::string::npos && !is_partial ());
278+ }
279+ move_to (input_.size ());
280+ return true ;
281+ }
282+
283+ if (end_pos > cursor) {
284+ handle_reasoning (input_.substr (cursor, end_pos - cursor), /* closed */ true );
285+ } else {
286+ handle_reasoning (" " , /* closed */ true );
287+ }
288+
289+ cursor = end_pos + end_think.size ();
290+
291+ while (cursor < input_.size () && std::isspace (static_cast <unsigned char >(input_[cursor]))) {
292+ ++cursor;
293+ }
294+
295+ const size_t next_remaining = input_.size () - cursor;
296+ if (next_remaining == 0 ) {
297+ move_to (cursor);
298+ return true ;
299+ }
300+
301+ const size_t next_prefix = std::min (start_think.size (), next_remaining);
302+ if (input_.compare (cursor, next_prefix, start_think, 0 , next_prefix) == 0 ) {
303+ if (next_prefix < start_think.size ()) {
304+ move_to (input_.size ());
305+ return true ;
306+ }
307+ set_reasoning_prefix (cursor);
308+ cursor += start_think.size ();
309+ continue ;
310+ }
311+
312+ move_to (cursor);
313+ return true ;
201314 }
202- return false ;
203315}
204316
205317std::string common_chat_msg_parser::consume_rest () {
0 commit comments