Skip to content

Commit 6c281ad

Browse files
committed
Move struct declaration to header and update allowed tensor list
1 parent 971f245 commit 6c281ad

File tree

3 files changed

+66
-52
lines changed

3 files changed

+66
-52
lines changed

examples/quantize/quantize.cpp

Lines changed: 1 addition & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#include "common.h"
22
#include "llama.h"
3+
#include "llama-quant.h"
34

45
#include <cstdio>
56
#include <cstring>
@@ -248,52 +249,6 @@ static ggml_type parse_ggml_type(const char * arg) {
248249
return GGML_TYPE_COUNT;
249250
}
250251

251-
// Allowed tensors for arbitrary quantization with --tensor-type option
252-
static const std::vector<std::string> ALLOWED_TENSOR_TYPE = {
253-
"attn_k",
254-
"attn_kv_a_mqa",
255-
"attn_kv_b",
256-
"attn_o",
257-
"attn_output",
258-
"attn_q",
259-
"attn_q_a",
260-
"attn_q_b",
261-
"attn_qkv",
262-
"attn_v",
263-
"channel_mix_key",
264-
"channel_mix_receptance",
265-
"channel_mix_value",
266-
"cls",
267-
"cls.output",
268-
"cross_attn_k",
269-
"cross_attn_o",
270-
"cross_attn_q",
271-
"cross_attn_v",
272-
"ffn_act",
273-
"ffn_down",
274-
"ffn_down_exps",
275-
"ffn_down_shexp",
276-
"ffn_gate",
277-
"ffn_gate_exps",
278-
"ffn_gate_shexp",
279-
"ffn_up",
280-
"ffn_up_exps",
281-
"ffn_up_shexp",
282-
"ssm_in",
283-
"ssm_out",
284-
"time_mix_gate",
285-
"time_mix_key",
286-
"time_mix_output",
287-
"time_mix_receptance",
288-
"time_mix_value",
289-
};
290-
291-
// changes to this struct must be replicated in llama-quant.cpp
292-
struct tensor_quantization {
293-
std::string name;
294-
ggml_type quant = GGML_TYPE_COUNT;
295-
};
296-
297252
static bool parse_tensor_type(const char * data, std::vector<tensor_quantization> & tensor_type) {
298253
const char * sep = strchr(data, '=');
299254
if (sep == nullptr) {

src/llama-quant.cpp

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -48,12 +48,6 @@ struct quantize_state_impl {
4848
{}
4949
};
5050

51-
// changes to this struct must be replicated in quantize.cpp
52-
struct tensor_quantization {
53-
std::string name;
54-
ggml_type quant = GGML_TYPE_COUNT;
55-
};
56-
5751
static void llama_tensor_dequantize_impl(
5852
ggml_tensor * tensor, std::vector<no_init<float>> & output, std::vector<std::thread> & workers,
5953
const size_t nelements, const int nthread

src/llama-quant.h

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,66 @@
11
#pragma once
2+
3+
#include <string>
4+
#include <vector>
5+
6+
#include "ggml.h"
7+
8+
// Allowed tensors for arbitrary quantization with --tensor-type option
9+
static const std::vector<std::string> ALLOWED_TENSOR_TYPE = {
10+
"attn_k",
11+
"attn_k_b",
12+
"attn_kv_a_mqa",
13+
"attn_kv_b",
14+
"attn_o",
15+
"attn_output",
16+
"attn_q",
17+
"attn_q_a",
18+
"attn_q_b",
19+
"attn_qkv",
20+
"attn_rel_b",
21+
"attn_v",
22+
"attn_v_b",
23+
"channel_mix_key",
24+
"channel_mix_receptance",
25+
"channel_mix_value",
26+
"cls",
27+
"cls.output",
28+
"conv1",
29+
"conv1d",
30+
"conv2",
31+
"cross_attn_k",
32+
"cross_attn_o",
33+
"cross_attn_q",
34+
"cross_attn_rel_b",
35+
"cross_attn_v",
36+
"dw",
37+
"ffn_down",
38+
"ffn_down_exps",
39+
"ffn_down_shexp",
40+
"ffn_gate",
41+
"ffn_gate_exps",
42+
"ffn_gate_shexp",
43+
"ffn_up",
44+
"ffn_up_exps",
45+
"ffn_up_shexp",
46+
"pw1",
47+
"pw1",
48+
"ssm_a",
49+
"ssm_conv1d",
50+
"ssm_dt",
51+
"ssm_in",
52+
"ssm_out",
53+
"ssm_x",
54+
"time_mix_gate",
55+
"time_mix_key",
56+
"time_mix_output",
57+
"time_mix_receptance",
58+
"time_mix_value",
59+
"token_types"
60+
};
61+
62+
// Quantization types
63+
struct tensor_quantization {
64+
std::string name;
65+
ggml_type quant = GGML_TYPE_COUNT;
66+
};

0 commit comments

Comments
 (0)