Skip to content

Commit 6474a01

Browse files
committed
Merge branch 'concedo_experimental' into esocrok
2 parents 166ba6b + 5b6ed8b commit 6474a01

File tree

94 files changed

+5025
-1460
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

94 files changed

+5025
-1460
lines changed

common/arg.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1783,7 +1783,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
17831783
).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_LOOKUP}));
17841784
add_opt(common_arg(
17851785
{"-t", "--threads"}, "N",
1786-
string_format("number of threads to use during generation (default: %d)", params.cpuparams.n_threads),
1786+
string_format("number of CPU threads to use during generation (default: %d)", params.cpuparams.n_threads),
17871787
[](common_params & params, int value) {
17881788
params.cpuparams.n_threads = value;
17891789
if (params.cpuparams.n_threads <= 0) {

common/chat-parser.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -432,7 +432,7 @@ std::optional<common_chat_msg_parser::consume_json_result> common_chat_msg_parse
432432
if (is_arguments_path({})) {
433433
// Entire JSON is the arguments and was parsed fully.
434434
return consume_json_result {
435-
partial->json.dump(),
435+
partial->json.dump(/* indent */ -1, /* indent_char */ ' ', /* ensure_ascii */ true),
436436
/* .is_partial = */ false,
437437
};
438438
}
@@ -444,7 +444,7 @@ std::optional<common_chat_msg_parser::consume_json_result> common_chat_msg_parse
444444
std::vector<std::string> path;
445445
std::function<json(const json &)> remove_unsupported_healings_and_dump_args = [&](const json & j) -> json {
446446
if (is_arguments_path(path)) {
447-
auto arguments = j.dump();
447+
auto arguments = j.dump(/* indent */ -1, /* indent_char */ ' ', /* ensure_ascii */ true);
448448
if (is_partial() && !partial->healing_marker.marker.empty()) {
449449
auto idx = arguments.find(partial->healing_marker.json_dump_marker);
450450
if (idx != std::string::npos) {

common/json-partial.cpp

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#include <nlohmann/json.hpp>
66

77
#include <string>
8+
#include <regex>
89

910
using json = nlohmann::ordered_json;
1011

@@ -168,6 +169,47 @@ bool common_json_parse(
168169
}
169170
}
170171

172+
// Matches a potentially partial unicode escape sequence, e.g. \u, \uX, \uXX, \uXXX, \uXXXX
173+
static const std::regex partial_unicode_regex(R"(\\u(?:[0-9a-fA-F](?:[0-9a-fA-F](?:[0-9a-fA-F](?:[0-9a-fA-F])?)?)?)?$)");
174+
175+
auto is_high_surrogate = [&](const std::string & s) {
176+
// Check if a partial of a high surrogate (U+D800-U+DBFF)
177+
return s.length() >= 4 &&
178+
s[0] == '\\' && s[1] == 'u' &&
179+
std::tolower(s[2]) == 'd' &&
180+
(s[3] == '8' || s[3] == '9' || std::tolower(s[3]) == 'a' || std::tolower(s[3]) == 'b');
181+
};
182+
183+
// Initialize the unicode marker to a low surrogate to handle the edge case
184+
// where a high surrogate (U+D800-U+DBFF) is immediately followed by a
185+
// backslash (\)
186+
std::string unicode_marker_padding = "udc00";
187+
std::smatch last_unicode_seq;
188+
189+
if (std::regex_search(str, last_unicode_seq, partial_unicode_regex)) {
190+
std::smatch second_last_seq;
191+
std::string prelude = str.substr(0, last_unicode_seq.position());
192+
193+
// Pad the escape sequence with 0s until it forms a complete sequence of 6 characters
194+
unicode_marker_padding = std::string(6 - last_unicode_seq.length(), '0');
195+
196+
if (is_high_surrogate(last_unicode_seq.str())) {
197+
// If the sequence is a partial match for a high surrogate, add a low surrogate (U+DC00-U+UDFF)
198+
unicode_marker_padding += "\\udc00";
199+
} else if (std::regex_search(prelude, second_last_seq, partial_unicode_regex)) {
200+
if (is_high_surrogate(second_last_seq.str())) {
201+
// If this follows a high surrogate, pad it to be a low surrogate
202+
if (last_unicode_seq.length() == 2) {
203+
unicode_marker_padding = "dc00";
204+
} else if (last_unicode_seq.length() == 3) {
205+
unicode_marker_padding = "c00";
206+
} else {
207+
// The original unicode_marker_padding is already padded with 0s
208+
}
209+
}
210+
}
211+
}
212+
171213
const auto & magic_seed = out.healing_marker.marker = healing_marker;//"$llama.cpp.json$";
172214

173215
if (err_loc.stack.back().type == COMMON_JSON_STACK_ELEMENT_KEY) {
@@ -186,6 +228,9 @@ bool common_json_parse(
186228
} else if (str[str.length() - 1] == '\\' && can_parse(str + "\\\"" + closing)) {
187229
// Was inside an object value string after an escape
188230
str += (out.healing_marker.json_dump_marker = "\\" + magic_seed) + "\"" + closing;
231+
} else if (can_parse(str + unicode_marker_padding + "\"" + closing)) {
232+
// Was inside an object value string after a partial unicode escape
233+
str += (out.healing_marker.json_dump_marker = unicode_marker_padding + magic_seed) + "\"" + closing;
189234
} else {
190235
// find last :
191236
auto last_pos = str.find_last_of(':');
@@ -205,6 +250,9 @@ bool common_json_parse(
205250
} else if (str[str.length() - 1] == '\\' && can_parse(str + "\\\"" + closing)) {
206251
// Was inside an array value string after an escape
207252
str += (out.healing_marker.json_dump_marker = "\\" + magic_seed) + "\"" + closing;
253+
} else if (can_parse(str + unicode_marker_padding + "\"" + closing)) {
254+
// Was inside an array value string after a partial unicode escape
255+
str += (out.healing_marker.json_dump_marker = unicode_marker_padding + magic_seed) + "\"" + closing;
208256
} else if (!was_maybe_number() && can_parse(str + ", 1" + closing)) {
209257
// Had just finished a value
210258
str += (out.healing_marker.json_dump_marker = ",\"" + magic_seed) + "\"" + closing;
@@ -230,6 +278,9 @@ bool common_json_parse(
230278
} else if (str[str.length() - 1] == '\\' && can_parse(str + "\\\": 1" + closing)) {
231279
// Was inside an object key string after an escape
232280
str += (out.healing_marker.json_dump_marker = "\\" + magic_seed) + "\": 1" + closing;
281+
} else if (can_parse(str + unicode_marker_padding + "\": 1" + closing)) {
282+
// Was inside an object key string after a partial unicode escape
283+
str += (out.healing_marker.json_dump_marker = unicode_marker_padding + magic_seed) + "\": 1" + closing;
233284
} else {
234285
auto last_pos = str.find_last_of(':');
235286
if (last_pos == std::string::npos) {

common/json-schema-to-grammar.cpp

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,9 @@ static std::string build_repetition(const std::string & item_rule, int min_items
4141
return result;
4242
}
4343

44-
static void _build_min_max_int(int min_value, int max_value, std::stringstream & out, int decimals_left = 16, bool top_level = true) {
45-
auto has_min = min_value != std::numeric_limits<int>::min();
46-
auto has_max = max_value != std::numeric_limits<int>::max();
44+
static void _build_min_max_int(int64_t min_value, int64_t max_value, std::stringstream & out, int decimals_left = 16, bool top_level = true) {
45+
auto has_min = min_value != std::numeric_limits<int64_t>::min();
46+
auto has_max = max_value != std::numeric_limits<int64_t>::max();
4747

4848
auto digit_range = [&](char from, char to) {
4949
out << "[";
@@ -159,7 +159,7 @@ static void _build_min_max_int(int min_value, int max_value, std::stringstream &
159159
if (has_min) {
160160
if (min_value < 0) {
161161
out << "\"-\" (";
162-
_build_min_max_int(std::numeric_limits<int>::min(), -min_value, out, decimals_left, /* top_level= */ false);
162+
_build_min_max_int(std::numeric_limits<int64_t>::min(), -min_value, out, decimals_left, /* top_level= */ false);
163163
out << ") | [0] | [1-9] ";
164164
more_digits(0, decimals_left - 1);
165165
} else if (min_value == 0) {
@@ -194,7 +194,7 @@ static void _build_min_max_int(int min_value, int max_value, std::stringstream &
194194
}
195195
digit_range(c, c);
196196
out << " (";
197-
_build_min_max_int(std::stoi(min_s.substr(1)), std::numeric_limits<int>::max(), out, less_decimals, /* top_level= */ false);
197+
_build_min_max_int(std::stoll(min_s.substr(1)), std::numeric_limits<int64_t>::max(), out, less_decimals, /* top_level= */ false);
198198
out << ")";
199199
if (c < '9') {
200200
out << " | ";
@@ -216,7 +216,7 @@ static void _build_min_max_int(int min_value, int max_value, std::stringstream &
216216
_build_min_max_int(0, max_value, out, decimals_left, /* top_level= */ true);
217217
} else {
218218
out << "\"-\" (";
219-
_build_min_max_int(-max_value, std::numeric_limits<int>::max(), out, decimals_left, /* top_level= */ false);
219+
_build_min_max_int(-max_value, std::numeric_limits<int64_t>::max(), out, decimals_left, /* top_level= */ false);
220220
out << ")";
221221
}
222222
return;
@@ -925,17 +925,17 @@ class SchemaConverter {
925925
int max_len = schema.contains("maxLength") ? schema["maxLength"].get<int>() : std::numeric_limits<int>::max();
926926
return _add_rule(rule_name, "\"\\\"\" " + build_repetition(char_rule, min_len, max_len) + " \"\\\"\" space");
927927
} else if (schema_type == "integer" && (schema.contains("minimum") || schema.contains("exclusiveMinimum") || schema.contains("maximum") || schema.contains("exclusiveMaximum"))) {
928-
int min_value = std::numeric_limits<int>::min();
929-
int max_value = std::numeric_limits<int>::max();
928+
int64_t min_value = std::numeric_limits<int64_t>::min();
929+
int64_t max_value = std::numeric_limits<int64_t>::max();
930930
if (schema.contains("minimum")) {
931-
min_value = schema["minimum"].get<int>();
931+
min_value = schema["minimum"].get<int64_t>();
932932
} else if (schema.contains("exclusiveMinimum")) {
933-
min_value = schema["exclusiveMinimum"].get<int>() + 1;
933+
min_value = schema["exclusiveMinimum"].get<int64_t>() + 1;
934934
}
935935
if (schema.contains("maximum")) {
936-
max_value = schema["maximum"].get<int>();
936+
max_value = schema["maximum"].get<int64_t>();
937937
} else if (schema.contains("exclusiveMaximum")) {
938-
max_value = schema["exclusiveMaximum"].get<int>() - 1;
938+
max_value = schema["exclusiveMaximum"].get<int64_t>() - 1;
939939
}
940940
std::stringstream out;
941941
out << "(";

embd_res/kcpp_sdui.embd

Lines changed: 60 additions & 60 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)