Skip to content

Commit 8357335

Browse files
committed
Revert "common : introduce composable PEG parser combinators for chat parsing and new jinja template engine (ikawrakow#1369)"
This reverts commit ab1d740.
1 parent e128165 commit 8357335

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

71 files changed

+4848
-19195
lines changed

README.md

Lines changed: 1 addition & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -222,47 +222,4 @@ Contributions in form of pull requests, issue submissions (bug reports, feature
222222

223223
## License
224224

225-
- [subprocess.h](https://github.com/sheredom/subprocess.h) - Single-header process launching solution for C and C++ - Public domain
226-
- [server](example/server/README.md)
227-
- [GBNF grammars](grammars/README.md)
228-
229-
#### Development documentation
230-
231-
- [How to build](docs/build.md)
232-
- [Running on Docker](docs/docker.md)
233-
- [Performance troubleshooting](docs/development/token_generation_performance_tips.md)
234-
- [GGML tips & tricks](https://github.com/ggml-org/llama.cpp/wiki/GGML-Tips-&-Tricks)
235-
236-
#### Seminal papers and background on the models
237-
238-
If your issue is with model generation quality, then please at least scan the following links and papers to understand the limitations of LLaMA models. This is especially important when choosing an appropriate model size and appreciating both the significant and subtle differences between LLaMA models and ChatGPT:
239-
- LLaMA:
240-
- [Introducing LLaMA: A foundational, 65-billion-parameter large language model](https://ai.facebook.com/blog/large-language-model-llama-meta-ai/)
241-
- [LLaMA: Open and Efficient Foundation Language Models](https://arxiv.org/abs/2302.13971)
242-
- GPT-3
243-
- [Language Models are Few-Shot Learners](https://arxiv.org/abs/2005.14165)
244-
- GPT-3.5 / InstructGPT / ChatGPT:
245-
- [Aligning language models to follow instructions](https://openai.com/research/instruction-following)
246-
- [Training language models to follow instructions with human feedback](https://arxiv.org/abs/2203.02155)
247-
248-
## Completions
249-
Command-line completion is available for some environments.
250-
251-
#### Bash Completion
252-
```bash
253-
$ build/bin/llama-cli --completion-bash > ~/.llama-completion.bash
254-
$ source ~/.llama-completion.bash
255-
```
256-
Optionally this can be added to your `.bashrc` or `.bash_profile` to load it
257-
automatically. For example:
258-
```console
259-
$ echo "source ~/.llama-completion.bash" >> ~/.bashrc
260-
```
261-
262-
## Dependencies
263-
264-
- [yhirose/cpp-httplib](https://github.com/yhirose/cpp-httplib) - Single-header HTTP server, used by `llama-server` - MIT license
265-
- [stb-image](https://github.com/nothings/stb) - Single-header image format decoder, used by multimodal subsystem - Public domain
266-
- [nlohmann/json](https://github.com/nlohmann/json) - Single-header JSON library, used by various tools/examples - MIT License
267-
- [miniaudio.h](https://github.com/mackron/miniaudio) - Single-header audio format decoder, used by multimodal subsystem - Public domain
268-
- [subprocess.h](https://github.com/sheredom/subprocess.h) - Single-header process launching solution for C and C++ - Public domain
225+
MIT

ci/run.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ function gg_run_ctest_release {
141141
(time make -j ) 2>&1 | tee -a $OUT/${ci}-make.log
142142

143143
if [ -z ${GG_BUILD_LOW_PERF} ]; then
144-
(time ctest --output-on-failure -L 'main|python' ) 2>&1 | tee -a $OUT/${ci}-ctest.log
144+
(time ctest --output-on-failure -L main ) 2>&1 | tee -a $OUT/${ci}-ctest.log
145145
else
146146
(time ctest --output-on-failure -L main -E test-opt ) 2>&1 | tee -a $OUT/${ci}-ctest.log
147147
fi

common/CMakeLists.txt

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,6 @@ add_library(${TARGET} STATIC
5858
chat-parser.h
5959
chat-parser-xml-toolcall.h
6060
chat-parser-xml-toolcall.cpp
61-
chat-peg-parser.cpp
62-
chat-peg-parser.h
6361
common.cpp
6462
sampling.h
6563
sampling.cpp
@@ -77,27 +75,11 @@ add_library(${TARGET} STATIC
7775
ngram-cache.h
7876
ngram-map.cpp
7977
ngram-map.h
80-
peg-parser.cpp
81-
peg-parser.h
8278
speculative.cpp
83-
unicode.cpp
84-
unicode.h
8579
ngram-mod.cpp
8680
ngram-mod.h
8781
regex-partial.cpp
8882
regex-partial.h
89-
jinja/lexer.cpp
90-
jinja/lexer.h
91-
jinja/parser.cpp
92-
jinja/parser.h
93-
jinja/runtime.cpp
94-
jinja/runtime.h
95-
jinja/value.cpp
96-
jinja/value.h
97-
jinja/string.cpp
98-
jinja/string.h
99-
jinja/caps.cpp
100-
jinja/caps.h
10183
)
10284

10385
if (BUILD_SHARED_LIBS)

common/chat-parser-xml-toolcall.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -842,7 +842,7 @@ inline void parse_msg_with_xml_tool_calls(common_chat_msg_parser & builder, cons
842842
}
843843

844844
// remove potential partial suffix
845-
if (builder.pos() == builder.input().size() && builder.is_partial()) {
845+
if (builder.pos() == builder.input().size()) {
846846
if (unclosed_reasoning_content.empty()) {
847847
rstrip(content);
848848
trim_potential_partial_word(content);

common/chat-parser.cpp

Lines changed: 12 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
#include "chat-parser.h"
2-
#include "chat-peg-parser.h"
32
#include "common.h"
43
#include "log.h"
5-
#include "peg-parser.h"
64
#include "regex-partial.h"
75

86
#include <algorithm>
@@ -551,7 +549,7 @@ std::optional<common_chat_msg_parser::consume_json_result> common_chat_msg_parse
551549
if (is_arguments_path({})) {
552550
// Entire JSON is the arguments and was parsed fully.
553551
return consume_json_result {
554-
partial->json.dump(/* indent */ -1, /* indent_char */ ' ', /* ensure_ascii */ true),
552+
partial->json.dump(),
555553
/* .is_partial = */ false,
556554
};
557555
}
@@ -563,7 +561,7 @@ std::optional<common_chat_msg_parser::consume_json_result> common_chat_msg_parse
563561
std::vector<std::string> path;
564562
std::function<json(const json &)> remove_unsupported_healings_and_dump_args = [&](const json & j) -> json {
565563
if (is_arguments_path(path)) {
566-
auto arguments = j.dump(/* indent */ -1, /* indent_char */ ' ', /* ensure_ascii */ true);
564+
auto arguments = j.dump();
567565
if (is_partial() && !partial->healing_marker.marker.empty()) {
568566
auto idx = arguments.find(partial->healing_marker.json_dump_marker);
569567
if (idx != std::string::npos) {
@@ -898,19 +896,19 @@ static void common_chat_parse_minimax_m2(common_chat_msg_parser & builder) {
898896

899897
static void common_chat_parse_qwen3_coder_xml(common_chat_msg_parser & builder) {
900898
static const xml_tool_call_format form = ([]() {
901-
xml_tool_call_format form{};
899+
xml_tool_call_format form {};
902900
form.scope_start = "<tool_call>";
903-
form.tool_start = "<function=";
904-
form.tool_sep = ">";
905-
form.key_start = "<parameter=";
901+
form.tool_start = "<function=";
902+
form.tool_sep = ">";
903+
form.key_start = "<parameter=";
906904
form.key_val_sep = ">";
907-
form.val_end = "</parameter>";
908-
form.tool_end = "</function>";
909-
form.scope_end = "</tool_call>";
905+
form.val_end = "</parameter>";
906+
form.tool_end = "</function>";
907+
form.scope_end = "</tool_call>";
910908
form.trim_raw_argval = true;
911909
return form;
912-
})();
913-
builder.consume_reasoning_with_xml_tool_calls(form);
910+
})();
911+
builder.consume_reasoning_with_xml_tool_calls(form);
914912
}
915913

916914
static void common_chat_parse_kimi_k2(common_chat_msg_parser & builder) {
@@ -1510,11 +1508,6 @@ static void common_chat_parse(common_chat_msg_parser & builder) {
15101508
}
15111509

15121510
common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const common_chat_syntax & syntax) {
1513-
if (syntax.format == COMMON_CHAT_FORMAT_PEG_SIMPLE ||
1514-
syntax.format == COMMON_CHAT_FORMAT_PEG_NATIVE ||
1515-
syntax.format == COMMON_CHAT_FORMAT_PEG_CONSTRUCTED) {
1516-
return common_chat_peg_parse(syntax.parser, input, is_partial, syntax);
1517-
}
15181511
common_chat_msg_parser builder(input, is_partial, syntax);
15191512
try {
15201513
common_chat_parse(builder);
@@ -1528,40 +1521,7 @@ common_chat_msg common_chat_parse(const std::string & input, bool is_partial, co
15281521
}
15291522
auto msg = builder.result();
15301523
if (!is_partial) {
1531-
LOG_DBG("Parsed message: %s\n", common_chat_msgs_to_json_oaicompat({msg}).at(0).dump().c_str());
1532-
}
1533-
return msg;
1534-
}
1535-
1536-
common_chat_msg common_chat_peg_parse(const common_peg_arena & parser, const std::string & input, bool is_partial, const common_chat_syntax & syntax) {
1537-
if (parser.empty()) {
1538-
throw std::runtime_error("Failed to parse due to missing parser definition.");
1539-
}
1540-
1541-
LOG_DBG("Parsing input with format %s: %s\n", common_chat_format_name(syntax.format), input.c_str());
1542-
1543-
common_peg_parse_context ctx(input, is_partial);
1544-
auto result = parser.parse(ctx);
1545-
if (result.fail()) {
1546-
throw std::runtime_error(std::string("Failed to parse input at pos ") + std::to_string(result.end));
1547-
}
1548-
1549-
common_chat_msg msg;
1550-
msg.role = "assistant";
1551-
1552-
if (syntax.format == COMMON_CHAT_FORMAT_PEG_NATIVE) {
1553-
auto mapper = common_chat_peg_native_mapper(msg);
1554-
mapper.from_ast(ctx.ast, result);
1555-
} else if (syntax.format == COMMON_CHAT_FORMAT_PEG_CONSTRUCTED) {
1556-
auto mapper = common_chat_peg_constructed_mapper(msg);
1557-
mapper.from_ast(ctx.ast, result);
1558-
} else {
1559-
// Generic mapper
1560-
auto mapper = common_chat_peg_mapper(msg);
1561-
mapper.from_ast(ctx.ast, result);
1562-
}
1563-
if (!is_partial) {
1564-
LOG_DBG("Parsed message: %s\n", common_chat_msgs_to_json_oaicompat({msg}).at(0).dump().c_str());
1524+
LOG_DBG("Parsed message: %s\n", common_chat_msgs_to_json_oaicompat<json>({msg}).at(0).dump().c_str());
15651525
}
15661526
return msg;
15671527
}

common/chat-peg-parser.cpp

Lines changed: 0 additions & 124 deletions
This file was deleted.

0 commit comments

Comments
 (0)