Skip to content

Commit a787e60

Browse files
committed
gguf: run generate-llm
1 parent 9abb7f5 commit a787e60

File tree

2 files changed

+135
-9
lines changed

2 files changed

+135
-9
lines changed

packages/gguf/scripts/generate-llm.ts

Lines changed: 30 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,10 @@
55

66
import { writeFileSync } from "node:fs";
77

8-
const SOURCE_CPP_URL = "https://raw.githubusercontent.com/ggerganov/llama.cpp/master/src/llama.cpp";
8+
const SOURCE_CPP_URLS = [
9+
"https://raw.githubusercontent.com/ggerganov/llama.cpp/master/src/llama-arch.cpp",
10+
"https://raw.githubusercontent.com/ggerganov/llama.cpp/master/src/llama-model.cpp",
11+
];
912
const DEST_FILE_PATH = "./src/transformer-llm.ts";
1013
const DEST_COMMON_SOURCE = `
1114
/** This file is auto-generated by generate-llm.ts */
@@ -90,10 +93,26 @@ const KV_TYPE = {
9093
LLM_KV_EXPERT_SHARED_COUNT: "number",
9194
LLM_KV_EXPERT_WEIGHTS_SCALE: "number",
9295
LLM_KV_ROPE_SCALING_YARN_LOG_MUL: "number",
96+
LLM_KV_ROPE_DIMENSION_COUNT: "number",
97+
LLM_KV_ROPE_DIMENSION_SECTIONS: "number[]",
9398
LLM_KV_ATTENTION_Q_LORA_RANK: "number",
9499
LLM_KV_ATTENTION_RELATIVE_BUCKETS_COUNT: "number",
95100
LLM_KV_DECODER_START_TOKEN_ID: "number",
96101
LLM_KV_USE_PARALLEL_RESIDUAL: "boolean",
102+
LLM_KV_WKV_HEAD_SIZE: "number",
103+
LLM_KV_TIME_MIX_EXTRA_DIM: "number",
104+
LLM_KV_TIME_DECAY_EXTRA_DIM: "number",
105+
LLM_KV_RESCALE_EVERY_N_LAYERS: "boolean",
106+
LLM_KV_TOKEN_SHIFT_COUNT: "boolean",
107+
LLM_KV_SWIN_NORM: "boolean",
108+
LLM_KV_ATTENTION_GROUPNORM_EPS: "number",
109+
LLM_KV_ATTENTION_GROUPNORM_GROUPS: "number",
110+
LLM_KV_ATTENTION_SCALE: "number",
111+
LLM_KV_EMBEDDING_SCALE: "number",
112+
LLM_KV_RESIDUAL_SCALE: "number",
113+
LLM_KV_SSM_DT_B_C_RMS: "boolean",
114+
LLM_KV_EXPERT_WEIGHTS_NORM: "boolean",
115+
LLM_KV_EXPERT_GATING_FUNC: "boolean",
97116
};
98117

99118
interface Arch {
@@ -105,8 +124,13 @@ interface Arch {
105124
}
106125

107126
async function main() {
108-
const res = await fetch(SOURCE_CPP_URL);
109-
const cppSource = await res.text();
127+
const cppSources = await Promise.all(
128+
SOURCE_CPP_URLS.map(async (url) => {
129+
const res = await fetch(url);
130+
return await res.text();
131+
})
132+
);
133+
const cppSource = cppSources.join("\n");
110134

111135
/////////////////////////////////////
112136
// extract list of all architectures
@@ -143,6 +167,7 @@ async function main() {
143167
constToKVName[matched.groups.cppConst] = matched.groups.name;
144168
}
145169
}
170+
console.log("constToKVName", constToKVName);
146171

147172
/////////////////////////////////////
148173
// extract list of tensor names based on architecture
@@ -172,8 +197,8 @@ async function main() {
172197
let insideLoadHParamsFn = false;
173198
currCppConst = "";
174199
for (const line of cppSource.split("\n")) {
175-
// check if current line is function llm_load_hparams()
176-
if (line.startsWith("static void llm_load_hparams")) {
200+
// check if current line is function llama_model::load_hparams()
201+
if (line.startsWith("void llama_model::load_hparams")) {
177202
insideLoadHParamsFn = true;
178203
}
179204
if (!insideLoadHParamsFn) {

packages/gguf/src/transformer-llm.ts

Lines changed: 105 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ export enum TransformerLLMPoolingType {
5454

5555
export const LLM_ARCHITECTURES = [
5656
"llama",
57+
"deci",
5758
"falcon",
5859
"grok",
5960
"gpt2",
@@ -71,34 +72,52 @@ export const LLM_ARCHITECTURES = [
7172
"qwen",
7273
"qwen2",
7374
"qwen2moe",
75+
"qwen2vl",
7476
"phi2",
7577
"phi3",
78+
"phimoe",
7679
"plamo",
7780
"codeshell",
7881
"orion",
7982
"internlm2",
8083
"minicpm",
84+
"minicpm3",
8185
"gemma",
8286
"gemma2",
8387
"starcoder2",
8488
"mamba",
8589
"xverse",
8690
"command-r",
91+
"cohere2",
8792
"dbrx",
8893
"olmo",
94+
"olmo2",
95+
"olmoe",
8996
"openelm",
9097
"arctic",
98+
"deepseek",
9199
"deepseek2",
92100
"chatglm",
93101
"bitnet",
94102
"t5",
95103
"t5encoder",
96104
"jais",
105+
"nemotron",
106+
"exaone",
107+
"rwkv6",
108+
"rwkv6qwen2",
109+
"granite",
110+
"granitemoe",
111+
"chameleon",
112+
"wavtokenizer-dec",
97113
] as const;
98114
type LLMArchitecture = (typeof LLM_ARCHITECTURES)[number];
99115
export type ArchLlama = TransformerLLMBase<"llama"> & {
100116
"llama.attention.layer_norm_rms_epsilon": number;
101117
};
118+
export type ArchDeci = TransformerLLMBase<"deci"> & {
119+
"deci.attention.layer_norm_rms_epsilon": number;
120+
};
102121
export type ArchFalcon = TransformerLLMBase<"falcon"> & {
103122
"falcon.attention.layer_norm_epsilon": number;
104123
};
@@ -130,19 +149,16 @@ export type ArchRefact = TransformerLLMBase<"refact"> & {
130149
export type ArchBert = TransformerLLMBase<"bert"> & {
131150
"bert.attention.layer_norm_epsilon": number;
132151
"bert.attention.causal": boolean;
133-
"tokenizer.ggml.token_type_count": number;
134152
"bert.pooling_type": TransformerLLMPoolingType;
135153
};
136154
export type ArchNomicBert = TransformerLLMBase<"nomic-bert"> & {
137155
"nomic-bert.attention.layer_norm_epsilon": number;
138156
"nomic-bert.attention.causal": boolean;
139-
"tokenizer.ggml.token_type_count": number;
140157
"nomic-bert.pooling_type": TransformerLLMPoolingType;
141158
};
142159
export type ArchJinaBertV2 = TransformerLLMBase<"jina-bert-v2"> & {
143160
"jina-bert-v2.attention.layer_norm_epsilon": number;
144161
"jina-bert-v2.attention.causal": boolean;
145-
"tokenizer.ggml.token_type_count": number;
146162
"jina-bert-v2.pooling_type": TransformerLLMPoolingType;
147163
};
148164
export type ArchBloom = TransformerLLMBase<"bloom"> & {
@@ -162,13 +178,19 @@ export type ArchQwen2moe = TransformerLLMBase<"qwen2moe"> & {
162178
"qwen2moe.expert_shared_feed_forward_length": number;
163179
"qwen2moe.attention.layer_norm_rms_epsilon": number;
164180
};
181+
export type ArchQwen2vl = TransformerLLMBase<"qwen2vl"> & {
182+
"qwen2vl.rope.dimension_sections": number[];
183+
};
165184
export type ArchPhi2 = TransformerLLMBase<"phi2"> & {
166185
"phi2.attention.layer_norm_epsilon": number;
167186
};
168187
export type ArchPhi3 = TransformerLLMBase<"phi3"> & {
169188
"phi3.attention.layer_norm_rms_epsilon": number;
170189
"phi3.attention.sliding_window": number;
171190
};
191+
export type ArchPhimoe = TransformerLLMBase<"phimoe"> & {
192+
"phimoe.attention.layer_norm_rms_epsilon": number;
193+
};
172194
export type ArchPlamo = TransformerLLMBase<"plamo"> & {
173195
"plamo.attention.layer_norm_rms_epsilon": number;
174196
};
@@ -183,6 +205,14 @@ export type ArchInternlm2 = TransformerLLMBase<"internlm2"> & {
183205
};
184206
export type ArchMinicpm = TransformerLLMBase<"minicpm"> & {
185207
"minicpm.attention.layer_norm_rms_epsilon": number;
208+
"minicpm.embedding_scale": number;
209+
"minicpm.residual_scale": number;
210+
"minicpm.logit_scale": number;
211+
};
212+
export type ArchMinicpm3 = TransformerLLMBase<"minicpm3"> & {
213+
"minicpm3.attention.layer_norm_rms_epsilon": number;
214+
"minicpm3.attention.q_lora_rank": number;
215+
"minicpm3.attention.kv_lora_rank": number;
186216
};
187217
export type ArchGemma = TransformerLLMBase<"gemma"> & {
188218
"gemma.attention.layer_norm_rms_epsilon": number;
@@ -201,6 +231,7 @@ export type ArchMamba = TransformerLLMBase<"mamba"> & {
201231
"mamba.ssm.inner_size": number;
202232
"mamba.ssm.state_size": number;
203233
"mamba.ssm.time_step_rank": number;
234+
"mamba.ssm.dt_b_c_rms": boolean;
204235
"mamba.attention.layer_norm_rms_epsilon": number;
205236
};
206237
export type ArchXverse = TransformerLLMBase<"xverse"> & {
@@ -210,6 +241,11 @@ export type ArchCommandR = TransformerLLMBase<"command-r"> & {
210241
"command-r.logit_scale": number;
211242
"command-r.attention.layer_norm_epsilon": number;
212243
};
244+
export type ArchCohere2 = TransformerLLMBase<"cohere2"> & {
245+
"cohere2.attention.sliding_window": number;
246+
"cohere2.logit_scale": number;
247+
"cohere2.attention.layer_norm_epsilon": number;
248+
};
213249
export type ArchDbrx = TransformerLLMBase<"dbrx"> & {
214250
"dbrx.attention.layer_norm_epsilon": number;
215251
"dbrx.attention.clamp_kqv": number;
@@ -218,12 +254,25 @@ export type ArchOlmo = TransformerLLMBase<"olmo"> & {
218254
"olmo.attention.layer_norm_epsilon": number;
219255
"olmo.attention.clamp_kqv": number;
220256
};
257+
export type ArchOlmo2 = TransformerLLMBase<"olmo2"> & {
258+
"olmo2.attention.layer_norm_rms_epsilon": number;
259+
};
260+
export type ArchOlmoe = TransformerLLMBase<"olmoe"> & {
261+
"olmoe.attention.layer_norm_rms_epsilon": number;
262+
};
221263
export type ArchOpenelm = TransformerLLMBase<"openelm"> & {
222264
"openelm.attention.layer_norm_rms_epsilon": number;
223265
};
224266
export type ArchArctic = TransformerLLMBase<"arctic"> & {
225267
"arctic.attention.layer_norm_rms_epsilon": number;
226268
};
269+
export type ArchDeepseek = TransformerLLMBase<"deepseek"> & {
270+
"deepseek.attention.layer_norm_rms_epsilon": number;
271+
"deepseek.leading_dense_block_count": number;
272+
"deepseek.expert_feed_forward_length": number;
273+
"deepseek.expert_shared_count": number;
274+
"deepseek.expert_weights_scale": number;
275+
};
227276
export type ArchDeepseek2 = TransformerLLMBase<"deepseek2"> & {
228277
"deepseek2.attention.layer_norm_rms_epsilon": number;
229278
"deepseek2.leading_dense_block_count": number;
@@ -232,6 +281,8 @@ export type ArchDeepseek2 = TransformerLLMBase<"deepseek2"> & {
232281
"deepseek2.expert_feed_forward_length": number;
233282
"deepseek2.expert_shared_count": number;
234283
"deepseek2.expert_weights_scale": number;
284+
"deepseek2.expert_weights_norm": boolean;
285+
"deepseek2.expert_gating_func": boolean;
235286
"deepseek2.rope.scaling.yarn_log_multiplier": number;
236287
};
237288
export type ArchChatglm = TransformerLLMBase<"chatglm"> & {
@@ -253,9 +304,44 @@ export type ArchJais = TransformerLLMBase<"jais"> & {
253304
"jais.attention.layer_norm_epsilon": number;
254305
"jais.attention.max_alibi_bias": number;
255306
};
307+
export type ArchNemotron = TransformerLLMBase<"nemotron"> & {
308+
"nemotron.attention.layer_norm_epsilon": number;
309+
};
310+
export type ArchExaone = TransformerLLMBase<"exaone"> & {
311+
"exaone.attention.layer_norm_rms_epsilon": number;
312+
};
313+
export type ArchRwkv6 = TransformerLLMBase<"rwkv6">;
314+
export type ArchRwkv6qwen2 = TransformerLLMBase<"rwkv6qwen2"> & {
315+
"rwkv6qwen2.attention.layer_norm_epsilon": number;
316+
"rwkv6qwen2.attention.layer_norm_rms_epsilon": number;
317+
"rwkv6qwen2.wkv.head_size": number;
318+
"rwkv6qwen2.time_mix_extra_dim": number;
319+
"rwkv6qwen2.time_decay_extra_dim": number;
320+
"rwkv6qwen2.rescale_every_n_layers": boolean;
321+
"rwkv6qwen2.token_shift_count": boolean;
322+
};
323+
export type ArchGranite = TransformerLLMBase<"granite">;
324+
export type ArchGraniteMoe = TransformerLLMBase<"granitemoe"> & {
325+
"granitemoe.attention.layer_norm_rms_epsilon": number;
326+
"granitemoe.logit_scale": number;
327+
"granitemoe.residual_scale": number;
328+
"granitemoe.embedding_scale": number;
329+
"granitemoe.attention.scale": number;
330+
};
331+
export type ArchChameleon = TransformerLLMBase<"chameleon"> & {
332+
"chameleon.attention.layer_norm_rms_epsilon": number;
333+
"chameleon.swin_norm": boolean;
334+
};
335+
export type ArchWavtokenizerDec = TransformerLLMBase<"wavtokenizer-dec"> & {
336+
"wavtokenizer-dec.attention.layer_norm_epsilon": number;
337+
"wavtokenizer-dec.attention.group_norm_epsilon": number;
338+
"wavtokenizer-dec.attention.group_norm_groups": number;
339+
"wavtokenizer-dec.attention.causal": boolean;
340+
};
256341

257342
export type TransformerLLM =
258343
| ArchLlama
344+
| ArchDeci
259345
| ArchFalcon
260346
| ArchGrok
261347
| ArchGpt2
@@ -273,26 +359,41 @@ export type TransformerLLM =
273359
| ArchQwen
274360
| ArchQwen2
275361
| ArchQwen2moe
362+
| ArchQwen2vl
276363
| ArchPhi2
277364
| ArchPhi3
365+
| ArchPhimoe
278366
| ArchPlamo
279367
| ArchCodeshell
280368
| ArchOrion
281369
| ArchInternlm2
282370
| ArchMinicpm
371+
| ArchMinicpm3
283372
| ArchGemma
284373
| ArchGemma2
285374
| ArchStarcoder2
286375
| ArchMamba
287376
| ArchXverse
288377
| ArchCommandR
378+
| ArchCohere2
289379
| ArchDbrx
290380
| ArchOlmo
381+
| ArchOlmo2
382+
| ArchOlmoe
291383
| ArchOpenelm
292384
| ArchArctic
385+
| ArchDeepseek
293386
| ArchDeepseek2
294387
| ArchChatglm
295388
| ArchBitnet
296389
| ArchT5
297390
| ArchT5encoder
298-
| ArchJais;
391+
| ArchJais
392+
| ArchNemotron
393+
| ArchExaone
394+
| ArchRwkv6
395+
| ArchRwkv6qwen2
396+
| ArchGranite
397+
| ArchGraniteMoe
398+
| ArchChameleon
399+
| ArchWavtokenizerDec;

0 commit comments

Comments
 (0)