|
3 | 3 | #include "log.h"
|
4 | 4 | #include "regex-partial.h"
|
5 | 5 |
|
| 6 | +#include <algorithm> |
| 7 | +#include <cctype> |
6 | 8 | #include <optional>
|
7 | 9 | #include <stdexcept>
|
8 | 10 | #include <string>
|
| 11 | +#include <string_view> |
9 | 12 | #include <vector>
|
10 | 13 |
|
11 | 14 | using json = nlohmann::ordered_json;
|
@@ -152,25 +155,111 @@ bool common_chat_msg_parser::try_parse_reasoning(const std::string & start_think
|
152 | 155 | add_reasoning_content(stripped_reasoning);
|
153 | 156 | }
|
154 | 157 | };
|
155 |
| - if (syntax_.reasoning_format != COMMON_REASONING_FORMAT_NONE) { |
156 |
| - if (syntax_.thinking_forced_open || try_consume_literal(start_think)) { |
157 |
| - if (auto res = try_find_literal(end_think)) { |
158 |
| - handle_reasoning(res->prelude, /* closed */ true); |
159 |
| - consume_spaces(); |
160 |
| - return true; |
161 |
| - } |
162 |
| - auto rest = consume_rest(); |
| 158 | + |
| 159 | + if (syntax_.reasoning_format == COMMON_REASONING_FORMAT_NONE) { |
| 160 | + return false; |
| 161 | + } |
| 162 | + |
| 163 | + const size_t saved_pos = pos_; |
| 164 | + const size_t saved_content_size = result_.content.size(); |
| 165 | + const size_t saved_reasoning_size = result_.reasoning_content.size(); |
| 166 | + |
| 167 | + auto restore_state = [&]() { |
| 168 | + move_to(saved_pos); |
| 169 | + result_.content.resize(saved_content_size); |
| 170 | + result_.reasoning_content.resize(saved_reasoning_size); |
| 171 | + }; |
| 172 | + |
| 173 | + // Allow leading whitespace to be preserved as content when reasoning is present at the start |
| 174 | + size_t cursor = pos_; |
| 175 | + size_t whitespace_end = cursor; |
| 176 | + while (whitespace_end < input_.size() && std::isspace(static_cast<unsigned char>(input_[whitespace_end]))) { |
| 177 | + ++whitespace_end; |
| 178 | + } |
| 179 | + |
| 180 | + if (whitespace_end >= input_.size()) { |
| 181 | + restore_state(); |
| 182 | + if (syntax_.thinking_forced_open) { |
| 183 | + auto rest = input_.substr(saved_pos); |
163 | 184 | if (!rest.empty()) {
|
164 | 185 | handle_reasoning(rest, /* closed */ !is_partial());
|
165 | 186 | }
|
166 |
| - // Allow unclosed thinking tags, for now (https://github.com/ggml-org/llama.cpp/issues/13812, https://github.com/ggml-org/llama.cpp/issues/13877) |
167 |
| - // if (!syntax_.thinking_forced_open) { |
168 |
| - // throw common_chat_msg_partial_exception(end_think); |
169 |
| - // } |
| 187 | + move_to(input_.size()); |
170 | 188 | return true;
|
171 | 189 | }
|
| 190 | + return false; |
| 191 | + } |
| 192 | + |
| 193 | + cursor = whitespace_end; |
| 194 | + const size_t remaining = input_.size() - cursor; |
| 195 | + const size_t start_prefix = std::min(start_think.size(), remaining); |
| 196 | + const bool has_start_tag = input_.compare(cursor, start_prefix, start_think, 0, start_prefix) == 0; |
| 197 | + |
| 198 | + if (has_start_tag && start_prefix < start_think.size()) { |
| 199 | + move_to(input_.size()); |
| 200 | + return true; |
| 201 | + } |
| 202 | + |
| 203 | + if (has_start_tag) { |
| 204 | + if (whitespace_end > pos_) { |
| 205 | + add_content(input_.substr(pos_, whitespace_end - pos_)); |
| 206 | + } |
| 207 | + cursor += start_think.size(); |
| 208 | + } else if (syntax_.thinking_forced_open) { |
| 209 | + cursor = whitespace_end; |
| 210 | + } else { |
| 211 | + restore_state(); |
| 212 | + return false; |
| 213 | + } |
| 214 | + while (true) { |
| 215 | + if (cursor >= input_.size()) { |
| 216 | + move_to(input_.size()); |
| 217 | + return true; |
| 218 | + } |
| 219 | + |
| 220 | + size_t end_pos = input_.find(end_think, cursor); |
| 221 | + if (end_pos == std::string::npos) { |
| 222 | + std::string_view remaining_view(input_.data() + cursor, input_.size() - cursor); |
| 223 | + size_t partial_off = string_find_partial_stop(remaining_view, end_think); |
| 224 | + size_t reasoning_end = partial_off == std::string::npos ? input_.size() : cursor + partial_off; |
| 225 | + if (reasoning_end > cursor) { |
| 226 | + handle_reasoning(input_.substr(cursor, reasoning_end - cursor), /* closed */ partial_off == std::string::npos && !is_partial()); |
| 227 | + } |
| 228 | + move_to(input_.size()); |
| 229 | + return true; |
| 230 | + } |
| 231 | + |
| 232 | + if (end_pos > cursor) { |
| 233 | + handle_reasoning(input_.substr(cursor, end_pos - cursor), /* closed */ true); |
| 234 | + } else { |
| 235 | + handle_reasoning("", /* closed */ true); |
| 236 | + } |
| 237 | + |
| 238 | + cursor = end_pos + end_think.size(); |
| 239 | + |
| 240 | + while (cursor < input_.size() && std::isspace(static_cast<unsigned char>(input_[cursor]))) { |
| 241 | + ++cursor; |
| 242 | + } |
| 243 | + |
| 244 | + const size_t next_remaining = input_.size() - cursor; |
| 245 | + if (next_remaining == 0) { |
| 246 | + move_to(cursor); |
| 247 | + return true; |
| 248 | + } |
| 249 | + |
| 250 | + const size_t next_prefix = std::min(start_think.size(), next_remaining); |
| 251 | + if (input_.compare(cursor, next_prefix, start_think, 0, next_prefix) == 0) { |
| 252 | + if (next_prefix < start_think.size()) { |
| 253 | + move_to(input_.size()); |
| 254 | + return true; |
| 255 | + } |
| 256 | + cursor += start_think.size(); |
| 257 | + continue; |
| 258 | + } |
| 259 | + |
| 260 | + move_to(cursor); |
| 261 | + return true; |
172 | 262 | }
|
173 |
| - return false; |
174 | 263 | }
|
175 | 264 |
|
176 | 265 | std::string common_chat_msg_parser::consume_rest() {
|
|
0 commit comments