Skip to content

Commit 2089ff7

Browse files
committed
Minor nuisances
1 parent a6b68d0 commit 2089ff7

File tree

4 files changed

+156
-160
lines changed

4 files changed

+156
-160
lines changed

src/models/rwkv6-base.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
llm_build_rwkv6_base::llm_build_rwkv6_base(const llama_model & model, const llm_graph_params & params) :
44
llm_graph_context(params),
5-
model(model) {};
5+
model(model) {}
66

77
ggml_tensor * llm_build_rwkv6_base::build_rwkv6_channel_mix(const llama_layer * layer,
88
ggml_tensor * cur,
@@ -24,7 +24,7 @@ ggml_tensor * llm_build_rwkv6_base::build_rwkv6_channel_mix(const llama_layer *
2424
GGML_ABORT("fatal error");
2525
};
2626
return cur;
27-
};
27+
}
2828

2929
ggml_tensor * llm_build_rwkv6_base::build_rwkv6_time_mix(llm_graph_input_rs * inp,
3030
ggml_tensor * cur,

src/models/rwkv6.cpp

Lines changed: 87 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -1,96 +1,94 @@
11
#include "models.h"
22

3-
llm_build_rwkv6::llm_build_rwkv6(const llama_model & model, const llm_graph_params & params) : llm_build_rwkv6_base(model, params) {
4-
GGML_ASSERT(hparams.token_shift_count == 2);
5-
6-
ggml_tensor * cur;
7-
ggml_tensor * inpL;
8-
9-
inpL = build_inp_embd(model.tok_embd);
10-
inpL = build_norm(inpL, model.tok_norm, model.tok_norm_b, LLM_NORM, -1);
11-
12-
auto * rs_inp = build_rs_inp();
13-
14-
const auto n_embd = hparams.n_embd;
15-
const auto n_seq_tokens = ubatch.n_seq_tokens;
16-
const auto n_seqs = ubatch.n_seqs;
17-
18-
ggml_tensor * inp_out_ids = build_inp_out_ids();
19-
20-
for (int il = 0; il < n_layer; ++il) {
21-
const llama_layer * layer = &model.layers[il];
22-
inpL = ggml_reshape_3d(ctx0, inpL, n_embd, n_seq_tokens, n_seqs);
23-
24-
ggml_tensor * token_shift = build_rwkv_token_shift_load(rs_inp, ubatch, il);
25-
26-
ggml_tensor * att_shift = ggml_view_3d(ctx0, token_shift, n_embd, 1, n_seqs, token_shift->nb[1], token_shift->nb[2], 0);
27-
ggml_tensor * ffn_shift = ggml_view_3d(ctx0, token_shift, n_embd, 1, n_seqs, token_shift->nb[1], token_shift->nb[2], n_embd * ggml_element_size(token_shift));
28-
29-
ggml_tensor * att_norm = build_norm(inpL, layer->attn_norm, layer->attn_norm_b, LLM_NORM, il);
30-
cb(att_norm, "attn_norm", il);
31-
32-
ggml_tensor * x_prev = ggml_concat(
33-
ctx0,
34-
att_shift,
35-
ggml_view_3d(ctx0, att_norm, n_embd, n_seq_tokens - 1, n_seqs, att_norm->nb[1], att_norm->nb[2], 0),
36-
1
37-
);
38-
39-
cur = build_rwkv6_time_mix(rs_inp, att_norm, x_prev, ubatch, il);
40-
41-
ggml_tensor * ffn_inp = ggml_add(ctx0, cur, inpL);
42-
cb(ffn_inp, "ffn_inp", il);
43-
44-
ggml_tensor * ffn_norm = build_norm(ffn_inp, layer->attn_norm_2, layer->attn_norm_2_b, LLM_NORM, il);
45-
cb(ffn_norm, "ffn_norm", il);
46-
47-
x_prev = ggml_concat(
48-
ctx0,
49-
ffn_shift,
50-
ggml_view_3d(ctx0, ffn_norm, n_embd, n_seq_tokens - 1, n_seqs, ffn_norm->nb[1], ffn_norm->nb[2], 0),
51-
1
52-
);
53-
54-
token_shift = ggml_concat(ctx0,
55-
ggml_view_3d(ctx0, att_norm, n_embd, 1, n_seqs, att_norm->nb[1], att_norm->nb[2], (n_seq_tokens-1)*n_embd*ggml_element_size(att_norm)),
56-
ggml_view_3d(ctx0, ffn_norm, n_embd, 1, n_seqs, ffn_norm->nb[1], ffn_norm->nb[2], (n_seq_tokens-1)*n_embd*ggml_element_size(ffn_norm)),
57-
1
58-
);
59-
ggml_build_forward_expand(gf, build_rwkv_token_shift_store(token_shift, ubatch, il));
60-
61-
ffn_inp = ggml_reshape_2d(ctx0, ffn_inp, n_embd, n_tokens);
62-
ffn_norm = ggml_reshape_2d(ctx0, ffn_norm, n_embd, n_tokens);
63-
x_prev = ggml_reshape_2d(ctx0, x_prev, n_embd, n_tokens);
64-
cur = ggml_reshape_2d(ctx0, cur, n_embd, n_tokens);
65-
66-
if (il == n_layer - 1 && inp_out_ids) {
67-
ffn_inp = ggml_get_rows(ctx0, ffn_inp, inp_out_ids);
68-
ffn_norm = ggml_get_rows(ctx0, ffn_norm, inp_out_ids);
69-
x_prev = ggml_get_rows(ctx0, x_prev, inp_out_ids);
70-
cur = ggml_get_rows(ctx0, cur, inp_out_ids);
71-
}
72-
cur = build_rwkv6_channel_mix(layer, ffn_norm, x_prev, LLM_ARCH_RWKV6);
73-
cur = ggml_add(ctx0, cur, ffn_inp);
74-
75-
if (hparams.rescale_every_n_layers != 0 && (il + 1) % hparams.rescale_every_n_layers == 0) {
76-
cur = ggml_scale(ctx0, cur, 0.5F);
77-
}
78-
cur = build_cvec(cur, il);
79-
cb(cur, "l_out", il);
80-
81-
// input for next layer
82-
inpL = cur;
83-
}
84-
cur = inpL;
85-
cur = build_norm(cur, model.output_norm, model.output_norm_b, LLM_NORM, -1);
3+
llm_build_rwkv6::llm_build_rwkv6(const llama_model & model, const llm_graph_params & params) :
4+
llm_build_rwkv6_base(model, params) {
5+
GGML_ASSERT(hparams.token_shift_count == 2);
6+
7+
ggml_tensor * cur;
8+
ggml_tensor * inpL;
9+
10+
inpL = build_inp_embd(model.tok_embd);
11+
inpL = build_norm(inpL, model.tok_norm, model.tok_norm_b, LLM_NORM, -1);
12+
13+
auto * rs_inp = build_rs_inp();
14+
15+
const auto n_embd = hparams.n_embd;
16+
const auto n_seq_tokens = ubatch.n_seq_tokens;
17+
const auto n_seqs = ubatch.n_seqs;
18+
19+
ggml_tensor * inp_out_ids = build_inp_out_ids();
20+
21+
for (int il = 0; il < n_layer; ++il) {
22+
const llama_layer * layer = &model.layers[il];
23+
inpL = ggml_reshape_3d(ctx0, inpL, n_embd, n_seq_tokens, n_seqs);
24+
25+
ggml_tensor * token_shift = build_rwkv_token_shift_load(rs_inp, ubatch, il);
26+
27+
ggml_tensor * att_shift =
28+
ggml_view_3d(ctx0, token_shift, n_embd, 1, n_seqs, token_shift->nb[1], token_shift->nb[2], 0);
29+
ggml_tensor * ffn_shift = ggml_view_3d(ctx0, token_shift, n_embd, 1, n_seqs, token_shift->nb[1],
30+
token_shift->nb[2], n_embd * ggml_element_size(token_shift));
31+
32+
ggml_tensor * att_norm = build_norm(inpL, layer->attn_norm, layer->attn_norm_b, LLM_NORM, il);
33+
cb(att_norm, "attn_norm", il);
34+
35+
ggml_tensor * x_prev = ggml_concat(
36+
ctx0, att_shift,
37+
ggml_view_3d(ctx0, att_norm, n_embd, n_seq_tokens - 1, n_seqs, att_norm->nb[1], att_norm->nb[2], 0), 1);
8638

87-
cb(cur, "result_norm", -1);
88-
res->t_embd = cur;
39+
cur = build_rwkv6_time_mix(rs_inp, att_norm, x_prev, ubatch, il);
8940

90-
cur = build_lora_mm(model.output, cur);
41+
ggml_tensor * ffn_inp = ggml_add(ctx0, cur, inpL);
42+
cb(ffn_inp, "ffn_inp", il);
9143

92-
cb(cur, "result_output", -1);
93-
res->t_logits = cur;
44+
ggml_tensor * ffn_norm = build_norm(ffn_inp, layer->attn_norm_2, layer->attn_norm_2_b, LLM_NORM, il);
45+
cb(ffn_norm, "ffn_norm", il);
9446

95-
ggml_build_forward_expand(gf, cur);
47+
x_prev = ggml_concat(
48+
ctx0, ffn_shift,
49+
ggml_view_3d(ctx0, ffn_norm, n_embd, n_seq_tokens - 1, n_seqs, ffn_norm->nb[1], ffn_norm->nb[2], 0), 1);
50+
51+
token_shift = ggml_concat(ctx0,
52+
ggml_view_3d(ctx0, att_norm, n_embd, 1, n_seqs, att_norm->nb[1], att_norm->nb[2],
53+
(n_seq_tokens - 1) * n_embd * ggml_element_size(att_norm)),
54+
ggml_view_3d(ctx0, ffn_norm, n_embd, 1, n_seqs, ffn_norm->nb[1], ffn_norm->nb[2],
55+
(n_seq_tokens - 1) * n_embd * ggml_element_size(ffn_norm)),
56+
1);
57+
ggml_build_forward_expand(gf, build_rwkv_token_shift_store(token_shift, ubatch, il));
58+
59+
ffn_inp = ggml_reshape_2d(ctx0, ffn_inp, n_embd, n_tokens);
60+
ffn_norm = ggml_reshape_2d(ctx0, ffn_norm, n_embd, n_tokens);
61+
x_prev = ggml_reshape_2d(ctx0, x_prev, n_embd, n_tokens);
62+
cur = ggml_reshape_2d(ctx0, cur, n_embd, n_tokens);
63+
64+
if (il == n_layer - 1 && inp_out_ids) {
65+
ffn_inp = ggml_get_rows(ctx0, ffn_inp, inp_out_ids);
66+
ffn_norm = ggml_get_rows(ctx0, ffn_norm, inp_out_ids);
67+
x_prev = ggml_get_rows(ctx0, x_prev, inp_out_ids);
68+
cur = ggml_get_rows(ctx0, cur, inp_out_ids);
69+
}
70+
cur = build_rwkv6_channel_mix(layer, ffn_norm, x_prev, LLM_ARCH_RWKV6);
71+
cur = ggml_add(ctx0, cur, ffn_inp);
72+
73+
if (hparams.rescale_every_n_layers != 0 && (il + 1) % hparams.rescale_every_n_layers == 0) {
74+
cur = ggml_scale(ctx0, cur, 0.5F);
75+
}
76+
cur = build_cvec(cur, il);
77+
cb(cur, "l_out", il);
78+
79+
// input for next layer
80+
inpL = cur;
9681
}
82+
cur = inpL;
83+
cur = build_norm(cur, model.output_norm, model.output_norm_b, LLM_NORM, -1);
84+
85+
cb(cur, "result_norm", -1);
86+
res->t_embd = cur;
87+
88+
cur = build_lora_mm(model.output, cur);
89+
90+
cb(cur, "result_output", -1);
91+
res->t_logits = cur;
92+
93+
ggml_build_forward_expand(gf, cur);
94+
}

src/models/rwkv7-base.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
llm_build_rwkv7_base::llm_build_rwkv7_base(const llama_model & model, const llm_graph_params & params) :
44
llm_graph_context(params),
5-
model(model) {};
5+
model(model) {}
66

77
ggml_tensor * llm_build_rwkv7_base::build_rwkv7_channel_mix(const llama_layer * layer,
88
ggml_tensor * cur,
@@ -23,7 +23,7 @@ ggml_tensor * llm_build_rwkv7_base::build_rwkv7_channel_mix(const llama_layer *
2323
GGML_ABORT("fatal error");
2424
};
2525
return cur;
26-
};
26+
}
2727

2828
ggml_tensor * llm_build_rwkv7_base::build_rwkv7_time_mix(llm_graph_input_rs * inp,
2929
ggml_tensor * cur,

src/models/rwkv7.cpp

Lines changed: 65 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -1,92 +1,90 @@
11
#include "models.h"
22

3-
llm_build_rwkv7::llm_build_rwkv7(const llama_model & model, const llm_graph_params & params) : llm_build_rwkv7_base(model, params) {
4-
GGML_ASSERT(hparams.token_shift_count == 2);
3+
llm_build_rwkv7::llm_build_rwkv7(const llama_model & model, const llm_graph_params & params) :
4+
llm_build_rwkv7_base(model, params) {
5+
GGML_ASSERT(hparams.token_shift_count == 2);
56

6-
ggml_tensor * cur;
7-
ggml_tensor * inpL;
8-
ggml_tensor * v_first = nullptr;
7+
ggml_tensor * cur;
8+
ggml_tensor * inpL;
9+
ggml_tensor * v_first = nullptr;
910

10-
inpL = build_inp_embd(model.tok_embd);
11-
inpL = build_norm(inpL, model.tok_norm, model.tok_norm_b, LLM_NORM, -1);
11+
inpL = build_inp_embd(model.tok_embd);
12+
inpL = build_norm(inpL, model.tok_norm, model.tok_norm_b, LLM_NORM, -1);
1213

13-
auto * rs_inp = build_rs_inp();
14+
auto * rs_inp = build_rs_inp();
1415

15-
const auto n_embd = hparams.n_embd;
16-
const auto n_seq_tokens = ubatch.n_seq_tokens;
17-
const auto n_seqs = ubatch.n_seqs;
16+
const auto n_embd = hparams.n_embd;
17+
const auto n_seq_tokens = ubatch.n_seq_tokens;
18+
const auto n_seqs = ubatch.n_seqs;
1819

19-
ggml_tensor * inp_out_ids = build_inp_out_ids();
20+
ggml_tensor * inp_out_ids = build_inp_out_ids();
2021

21-
for (int il = 0; il < n_layer; ++il) {
22-
const llama_layer * layer = &model.layers[il];
23-
inpL = ggml_reshape_3d(ctx0, inpL, n_embd, n_seq_tokens, n_seqs);
22+
for (int il = 0; il < n_layer; ++il) {
23+
const llama_layer * layer = &model.layers[il];
24+
inpL = ggml_reshape_3d(ctx0, inpL, n_embd, n_seq_tokens, n_seqs);
2425

25-
ggml_tensor * token_shift = build_rwkv_token_shift_load(rs_inp, ubatch, il);
26+
ggml_tensor * token_shift = build_rwkv_token_shift_load(rs_inp, ubatch, il);
2627

27-
ggml_tensor * att_shift = ggml_view_3d(ctx0, token_shift, n_embd, 1, n_seqs, token_shift->nb[1], token_shift->nb[2], 0);
28-
ggml_tensor * ffn_shift = ggml_view_3d(ctx0, token_shift, n_embd, 1, n_seqs, token_shift->nb[1], token_shift->nb[2], n_embd * ggml_element_size(token_shift));
28+
ggml_tensor * att_shift =
29+
ggml_view_3d(ctx0, token_shift, n_embd, 1, n_seqs, token_shift->nb[1], token_shift->nb[2], 0);
30+
ggml_tensor * ffn_shift = ggml_view_3d(ctx0, token_shift, n_embd, 1, n_seqs, token_shift->nb[1],
31+
token_shift->nb[2], n_embd * ggml_element_size(token_shift));
2932

30-
ggml_tensor * att_norm = build_norm(inpL, layer->attn_norm, layer->attn_norm_b, LLM_NORM, il);
31-
cb(att_norm, "attn_norm", il);
33+
ggml_tensor * att_norm = build_norm(inpL, layer->attn_norm, layer->attn_norm_b, LLM_NORM, il);
34+
cb(att_norm, "attn_norm", il);
3235

33-
ggml_tensor * x_prev = ggml_concat(
34-
ctx0,
35-
att_shift,
36-
ggml_view_3d(ctx0, att_norm, n_embd, n_seq_tokens - 1, n_seqs, att_norm->nb[1], att_norm->nb[2], 0),
37-
1
38-
);
36+
ggml_tensor * x_prev = ggml_concat(
37+
ctx0, att_shift,
38+
ggml_view_3d(ctx0, att_norm, n_embd, n_seq_tokens - 1, n_seqs, att_norm->nb[1], att_norm->nb[2], 0), 1);
3939

40-
cur = build_rwkv7_time_mix(rs_inp, att_norm, x_prev, v_first, ubatch, il);
40+
cur = build_rwkv7_time_mix(rs_inp, att_norm, x_prev, v_first, ubatch, il);
4141

42-
ggml_tensor * ffn_inp = ggml_add(ctx0, cur, inpL);
43-
cb(ffn_inp, "ffn_inp", il);
42+
ggml_tensor * ffn_inp = ggml_add(ctx0, cur, inpL);
43+
cb(ffn_inp, "ffn_inp", il);
4444

45-
ggml_tensor * ffn_norm = build_norm(ffn_inp, layer->attn_norm_2, layer->attn_norm_2_b, LLM_NORM, il);
46-
cb(ffn_norm, "ffn_norm", il);
45+
ggml_tensor * ffn_norm = build_norm(ffn_inp, layer->attn_norm_2, layer->attn_norm_2_b, LLM_NORM, il);
46+
cb(ffn_norm, "ffn_norm", il);
4747

48-
x_prev = ggml_concat(
49-
ctx0,
50-
ffn_shift,
51-
ggml_view_3d(ctx0, ffn_norm, n_embd, n_seq_tokens - 1, n_seqs, ffn_norm->nb[1], ffn_norm->nb[2], 0),
52-
1
53-
);
48+
x_prev = ggml_concat(
49+
ctx0, ffn_shift,
50+
ggml_view_3d(ctx0, ffn_norm, n_embd, n_seq_tokens - 1, n_seqs, ffn_norm->nb[1], ffn_norm->nb[2], 0), 1);
5451

55-
token_shift = ggml_concat(ctx0,
56-
ggml_view_3d(ctx0, att_norm, n_embd, 1, n_seqs, att_norm->nb[1], att_norm->nb[2], (n_seq_tokens-1)*n_embd*ggml_element_size(att_norm)),
57-
ggml_view_3d(ctx0, ffn_norm, n_embd, 1, n_seqs, ffn_norm->nb[1], ffn_norm->nb[2], (n_seq_tokens-1)*n_embd*ggml_element_size(ffn_norm)),
58-
1
59-
);
60-
ggml_build_forward_expand(gf, build_rwkv_token_shift_store(token_shift, ubatch, il));
52+
token_shift = ggml_concat(ctx0,
53+
ggml_view_3d(ctx0, att_norm, n_embd, 1, n_seqs, att_norm->nb[1], att_norm->nb[2],
54+
(n_seq_tokens - 1) * n_embd * ggml_element_size(att_norm)),
55+
ggml_view_3d(ctx0, ffn_norm, n_embd, 1, n_seqs, ffn_norm->nb[1], ffn_norm->nb[2],
56+
(n_seq_tokens - 1) * n_embd * ggml_element_size(ffn_norm)),
57+
1);
58+
ggml_build_forward_expand(gf, build_rwkv_token_shift_store(token_shift, ubatch, il));
6159

62-
ffn_inp = ggml_reshape_2d(ctx0, ffn_inp, n_embd, n_tokens);
63-
ffn_norm = ggml_reshape_2d(ctx0, ffn_norm, n_embd, n_tokens);
64-
x_prev = ggml_reshape_2d(ctx0, x_prev, n_embd, n_tokens);
60+
ffn_inp = ggml_reshape_2d(ctx0, ffn_inp, n_embd, n_tokens);
61+
ffn_norm = ggml_reshape_2d(ctx0, ffn_norm, n_embd, n_tokens);
62+
x_prev = ggml_reshape_2d(ctx0, x_prev, n_embd, n_tokens);
6563

66-
if (il == n_layer - 1 && inp_out_ids) {
67-
ffn_inp = ggml_get_rows(ctx0, ffn_inp, inp_out_ids);
68-
ffn_norm = ggml_get_rows(ctx0, ffn_norm, inp_out_ids);
69-
x_prev = ggml_get_rows(ctx0, x_prev, inp_out_ids);
70-
}
71-
cur = build_rwkv7_channel_mix(layer, ffn_norm, x_prev, LLM_ARCH_RWKV7);
72-
cur = ggml_add(ctx0, cur, ffn_inp);
64+
if (il == n_layer - 1 && inp_out_ids) {
65+
ffn_inp = ggml_get_rows(ctx0, ffn_inp, inp_out_ids);
66+
ffn_norm = ggml_get_rows(ctx0, ffn_norm, inp_out_ids);
67+
x_prev = ggml_get_rows(ctx0, x_prev, inp_out_ids);
68+
}
69+
cur = build_rwkv7_channel_mix(layer, ffn_norm, x_prev, LLM_ARCH_RWKV7);
70+
cur = ggml_add(ctx0, cur, ffn_inp);
7371

74-
cur = build_cvec(cur, il);
75-
cb(cur, "l_out", il);
72+
cur = build_cvec(cur, il);
73+
cb(cur, "l_out", il);
7674

77-
// input for next layer
78-
inpL = cur;
79-
}
80-
cur = inpL;
81-
cur = build_norm(cur, model.output_norm, model.output_norm_b, LLM_NORM, -1);
75+
// input for next layer
76+
inpL = cur;
77+
}
78+
cur = inpL;
79+
cur = build_norm(cur, model.output_norm, model.output_norm_b, LLM_NORM, -1);
8280

83-
cb(cur, "result_norm", -1);
84-
res->t_embd = cur;
81+
cb(cur, "result_norm", -1);
82+
res->t_embd = cur;
8583

86-
cur = build_lora_mm(model.output, cur);
84+
cur = build_lora_mm(model.output, cur);
8785

88-
cb(cur, "result_output", -1);
89-
res->t_logits = cur;
86+
cb(cur, "result_output", -1);
87+
res->t_logits = cur;
9088

91-
ggml_build_forward_expand(gf, cur);
92-
}
89+
ggml_build_forward_expand(gf, cur);
90+
}

0 commit comments

Comments
 (0)