Skip to content

Commit 918de1d

Browse files
committed
add parsing
1 parent 9acfada commit 918de1d

File tree

4 files changed

+356
-18
lines changed

4 files changed

+356
-18
lines changed

common/parsers/harmony.cpp

Lines changed: 66 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,73 @@
11
#include "harmony.h"
2+
#include "regex-partial.h"
23

34
harmony_msg_parser::harmony_msg_parser(common_chat_msg_parser & builder)
4-
: builder(builder)
5-
{
6-
// TODO
7-
}
5+
: builder(builder) {}
86

97
void harmony_msg_parser::parse() {
108
// TODO @ngxson : this won't work with --special enabled, we should fix that
11-
builder.try_parse_reasoning("<|channel|>analysis<|message|>", "<|start|>assistant<|channel|>final<|message|>");
12-
builder.add_content(builder.consume_rest());
9+
//builder.try_parse_reasoning("<|channel|>analysis<|message|>", "<|start|>assistant<|channel|>final<|message|>");
10+
//builder.add_content(builder.consume_rest());
11+
channel();
12+
}
13+
14+
void harmony_msg_parser::channel() {
15+
if (builder.try_consume_literal("<|channel|>")) {
16+
static const common_regex channel_type_regexp("(final|analysis|commentary)");
17+
if (auto res = builder.try_consume_regex(channel_type_regexp)) {
18+
auto type = builder.str(res->groups[0]);
19+
if (type == "analysis") {
20+
analysis();
21+
} else if (type == "final") {
22+
final();
23+
} else if (type == "commentary") {
24+
commentary();
25+
}
26+
}
27+
}
28+
}
29+
30+
void harmony_msg_parser::analysis() {
31+
if (builder.try_consume_literal("<|message|>")) {
32+
static const common_regex end("<\\|end\\|>");
33+
if (auto res = builder.try_find_regex(end, std::string::npos, false)) {
34+
builder.add_reasoning_content(res->prelude);
35+
if (builder.try_consume_literal("<|start|>")) {
36+
start();
37+
}
38+
} else {
39+
builder.add_reasoning_content(builder.consume_rest());
40+
}
41+
}
42+
}
43+
44+
void harmony_msg_parser::start() {
45+
if (builder.try_consume_literal("assistant")) {
46+
channel();
47+
}
48+
}
49+
50+
void harmony_msg_parser::final() {
51+
if (builder.try_consume_literal("<|message|>")) {
52+
builder.add_content(builder.consume_rest());
53+
}
54+
}
55+
56+
void harmony_msg_parser::commentary() {
57+
if (builder.try_consume_literal(" to=")) {
58+
user_function();
59+
}
60+
}
61+
62+
void harmony_msg_parser::user_function() {
63+
static const common_regex tool_call_regex(
64+
"functions\\.([a-zA-Z_][a-zA-Z0-9_]*)\\s*(<\\|constrain\\|>([a-z]+))?<\\|message\\|>"
65+
);
66+
if (auto res = builder.try_consume_regex(tool_call_regex)) {
67+
auto name = builder.str(res->groups[1]);
68+
//auto constrain_type = builder.str(res->groups[3]);
69+
auto args = builder.consume_rest();
70+
71+
builder.add_tool_call(name, "", args);
72+
}
1373
}

common/parsers/harmony.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,4 +13,12 @@ class harmony_msg_parser {
1313
harmony_msg_parser(common_chat_msg_parser & builder);
1414

1515
void parse();
16+
17+
private:
18+
void channel();
19+
void analysis();
20+
void final();
21+
void commentary();
22+
void start();
23+
void user_function();
1624
};

src/llama-vocab.cpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#include "llama-impl.h"
66
#include "llama-model-loader.h"
77

8+
#include "llama.h"
89
#include "unicode.h"
910

1011
#include <algorithm>
@@ -2339,13 +2340,12 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
23392340
}
23402341
}
23412342

2342-
// @aldehir : check if this is still needed
2343-
// // @ngxson : quick hack for gpt-oss, always render these tokens
2344-
// for (const auto & t : token_to_id) {
2345-
// if (t.first == "<|channel|>" || t.first == "<|message|>" || t.first == "<|start|>") {
2346-
// id_to_token[t.second].attr = LLAMA_TOKEN_ATTR_USER_DEFINED;
2347-
// }
2348-
// }
2343+
// @ngxson : quick hack for gpt-oss, always render these tokens
2344+
for (const auto & t : token_to_id) {
2345+
if (t.first == "<|channel|>" || t.first == "<|message|>" || t.first == "<|start|>") {
2346+
id_to_token[t.second].attr = LLAMA_TOKEN_ATTR_USER_DEFINED;
2347+
}
2348+
}
23492349

23502350
// sanity checks
23512351
if (special_eos_id != LLAMA_TOKEN_NULL && special_eog_ids.count(special_eos_id) == 0) {

0 commit comments

Comments
 (0)