Skip to content

Commit a5726ac

Browse files
authored
gguf: sync type with upstream llama.cpp (huggingface#849)
Generated by `scripts/generate-llm.ts`
1 parent 33e6316 commit a5726ac

File tree

2 files changed

+96
-9
lines changed

2 files changed

+96
-9
lines changed

packages/gguf/scripts/generate-llm.ts

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
import { writeFileSync } from "node:fs";
77

8-
const SOURCE_CPP_URL = "https://raw.githubusercontent.com/ggerganov/llama.cpp/master/llama.cpp";
8+
const SOURCE_CPP_URL = "https://raw.githubusercontent.com/ggerganov/llama.cpp/master/src/llama.cpp";
99
const DEST_FILE_PATH = "./src/transformer-llm.ts";
1010
const DEST_COMMON_SOURCE = `
1111
/** This file is auto-generated by generate-llm.ts */
@@ -80,6 +80,20 @@ const KV_TYPE = {
8080
LLM_KV_SSM_STATE_SIZE: "number",
8181
LLM_KV_SSM_TIME_STEP_RANK: "number",
8282
LLM_KV_LOGIT_SCALE: "number",
83+
LLM_KV_EXPERT_FEED_FORWARD_LENGTH: "number",
84+
LLM_KV_EXPERT_SHARED_FEED_FORWARD_LENGTH: "number",
85+
LLM_KV_ATTENTION_SLIDING_WINDOW: "number",
86+
LLM_KV_ATTN_LOGIT_SOFTCAPPING: "number",
87+
LLM_KV_FINAL_LOGIT_SOFTCAPPING: "number",
88+
LLM_KV_LEADING_DENSE_BLOCK_COUNT: "number",
89+
LLM_KV_ATTENTION_KV_LORA_RANK: "number",
90+
LLM_KV_EXPERT_SHARED_COUNT: "number",
91+
LLM_KV_EXPERT_WEIGHTS_SCALE: "number",
92+
LLM_KV_ROPE_SCALING_YARN_LOG_MUL: "number",
93+
LLM_KV_ATTENTION_Q_LORA_RANK: "number",
94+
LLM_KV_ATTENTION_RELATIVE_BUCKETS_COUNT: "number",
95+
LLM_KV_DECODER_START_TOKEN_ID: "number",
96+
LLM_KV_USE_PARALLEL_RESIDUAL: "boolean",
8397
};
8498

8599
interface Arch {
@@ -199,7 +213,12 @@ async function main() {
199213
if (a.hparams.length) {
200214
code += [
201215
" & {",
202-
...a.hparams.map((k) => `\t${JSON.stringify(constToKVName[k].replace("%s", a.name))}: ${KV_TYPE[k]},`),
216+
...a.hparams.map((k) => {
217+
if (!KV_TYPE[k]) {
218+
throw new Error(`Cannot find type definition of ${k}`);
219+
}
220+
return `\t${JSON.stringify(constToKVName[k].replace("%s", a.name))}: ${KV_TYPE[k]},`;
221+
}),
203222
"};",
204223
].join("\n");
205224
} else {

packages/gguf/src/transformer-llm.ts

Lines changed: 75 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -62,10 +62,10 @@ export const LLM_ARCHITECTURES = [
6262
"mpt",
6363
"baichuan",
6464
"starcoder",
65-
"persimmon",
6665
"refact",
6766
"bert",
6867
"nomic-bert",
68+
"jina-bert-v2",
6969
"bloom",
7070
"stablelm",
7171
"qwen",
@@ -79,12 +79,21 @@ export const LLM_ARCHITECTURES = [
7979
"internlm2",
8080
"minicpm",
8181
"gemma",
82+
"gemma2",
8283
"starcoder2",
8384
"mamba",
8485
"xverse",
8586
"command-r",
8687
"dbrx",
8788
"olmo",
89+
"openelm",
90+
"arctic",
91+
"deepseek2",
92+
"chatglm",
93+
"bitnet",
94+
"t5",
95+
"t5encoder",
96+
"jais",
8897
] as const;
8998
type LLMArchitecture = (typeof LLM_ARCHITECTURES)[number];
9099
export type ArchLlama = TransformerLLMBase<"llama"> & {
@@ -100,7 +109,10 @@ export type ArchGpt2 = TransformerLLMBase<"gpt2"> & {
100109
"gpt2.attention.layer_norm_epsilon": number;
101110
};
102111
export type ArchGptj = TransformerLLMBase<"gptj">;
103-
export type ArchGptneox = TransformerLLMBase<"gptneox">;
112+
export type ArchGptneox = TransformerLLMBase<"gptneox"> & {
113+
"gptneox.attention.layer_norm_epsilon": number;
114+
"gptneox.use_parallel_residual": boolean;
115+
};
104116
export type ArchMpt = TransformerLLMBase<"mpt"> & {
105117
"mpt.attention.layer_norm_epsilon": number;
106118
"mpt.attention.clamp_kqv": number;
@@ -112,9 +124,6 @@ export type ArchBaichuan = TransformerLLMBase<"baichuan"> & {
112124
export type ArchStarcoder = TransformerLLMBase<"starcoder"> & {
113125
"starcoder.attention.layer_norm_epsilon": number;
114126
};
115-
export type ArchPersimmon = TransformerLLMBase<"persimmon"> & {
116-
"persimmon.attention.layer_norm_epsilon": number;
117-
};
118127
export type ArchRefact = TransformerLLMBase<"refact"> & {
119128
"refact.attention.layer_norm_rms_epsilon": number;
120129
};
@@ -130,6 +139,12 @@ export type ArchNomicBert = TransformerLLMBase<"nomic-bert"> & {
130139
"tokenizer.ggml.token_type_count": number;
131140
"nomic-bert.pooling_type": TransformerLLMPoolingType;
132141
};
142+
export type ArchJinaBertV2 = TransformerLLMBase<"jina-bert-v2"> & {
143+
"jina-bert-v2.attention.layer_norm_epsilon": number;
144+
"jina-bert-v2.attention.causal": boolean;
145+
"tokenizer.ggml.token_type_count": number;
146+
"jina-bert-v2.pooling_type": TransformerLLMPoolingType;
147+
};
133148
export type ArchBloom = TransformerLLMBase<"bloom"> & {
134149
"bloom.attention.layer_norm_epsilon": number;
135150
};
@@ -143,13 +158,16 @@ export type ArchQwen2 = TransformerLLMBase<"qwen2"> & {
143158
"qwen2.attention.layer_norm_rms_epsilon": number;
144159
};
145160
export type ArchQwen2moe = TransformerLLMBase<"qwen2moe"> & {
161+
"qwen2moe.expert_feed_forward_length": number;
162+
"qwen2moe.expert_shared_feed_forward_length": number;
146163
"qwen2moe.attention.layer_norm_rms_epsilon": number;
147164
};
148165
export type ArchPhi2 = TransformerLLMBase<"phi2"> & {
149166
"phi2.attention.layer_norm_epsilon": number;
150167
};
151168
export type ArchPhi3 = TransformerLLMBase<"phi3"> & {
152169
"phi3.attention.layer_norm_rms_epsilon": number;
170+
"phi3.attention.sliding_window": number;
153171
};
154172
export type ArchPlamo = TransformerLLMBase<"plamo"> & {
155173
"plamo.attention.layer_norm_rms_epsilon": number;
@@ -169,6 +187,12 @@ export type ArchMinicpm = TransformerLLMBase<"minicpm"> & {
169187
export type ArchGemma = TransformerLLMBase<"gemma"> & {
170188
"gemma.attention.layer_norm_rms_epsilon": number;
171189
};
190+
export type ArchGemma2 = TransformerLLMBase<"gemma2"> & {
191+
"gemma2.attention.sliding_window": number;
192+
"gemma2.attention.layer_norm_rms_epsilon": number;
193+
"gemma2.attn_logit_softcapping": number;
194+
"gemma2.final_logit_softcapping": number;
195+
};
172196
export type ArchStarcoder2 = TransformerLLMBase<"starcoder2"> & {
173197
"starcoder2.attention.layer_norm_epsilon": number;
174198
};
@@ -194,6 +218,41 @@ export type ArchOlmo = TransformerLLMBase<"olmo"> & {
194218
"olmo.attention.layer_norm_epsilon": number;
195219
"olmo.attention.clamp_kqv": number;
196220
};
221+
export type ArchOpenelm = TransformerLLMBase<"openelm"> & {
222+
"openelm.attention.layer_norm_rms_epsilon": number;
223+
};
224+
export type ArchArctic = TransformerLLMBase<"arctic"> & {
225+
"arctic.attention.layer_norm_rms_epsilon": number;
226+
};
227+
export type ArchDeepseek2 = TransformerLLMBase<"deepseek2"> & {
228+
"deepseek2.attention.layer_norm_rms_epsilon": number;
229+
"deepseek2.leading_dense_block_count": number;
230+
"deepseek2.attention.q_lora_rank": number;
231+
"deepseek2.attention.kv_lora_rank": number;
232+
"deepseek2.expert_feed_forward_length": number;
233+
"deepseek2.expert_shared_count": number;
234+
"deepseek2.expert_weights_scale": number;
235+
"deepseek2.rope.scaling.yarn_log_multiplier": number;
236+
};
237+
export type ArchChatglm = TransformerLLMBase<"chatglm"> & {
238+
"chatglm.attention.layer_norm_rms_epsilon": number;
239+
};
240+
export type ArchBitnet = TransformerLLMBase<"bitnet"> & {
241+
"bitnet.attention.layer_norm_rms_epsilon": number;
242+
};
243+
export type ArchT5 = TransformerLLMBase<"t5"> & {
244+
"t5.attention.layer_norm_rms_epsilon": number;
245+
"t5.attention.relative_buckets_count": number;
246+
"t5.decoder_start_token_id": number;
247+
};
248+
export type ArchT5encoder = TransformerLLMBase<"t5encoder"> & {
249+
"t5encoder.attention.layer_norm_rms_epsilon": number;
250+
"t5encoder.attention.relative_buckets_count": number;
251+
};
252+
export type ArchJais = TransformerLLMBase<"jais"> & {
253+
"jais.attention.layer_norm_epsilon": number;
254+
"jais.attention.max_alibi_bias": number;
255+
};
197256

198257
export type TransformerLLM =
199258
| ArchLlama
@@ -205,10 +264,10 @@ export type TransformerLLM =
205264
| ArchMpt
206265
| ArchBaichuan
207266
| ArchStarcoder
208-
| ArchPersimmon
209267
| ArchRefact
210268
| ArchBert
211269
| ArchNomicBert
270+
| ArchJinaBertV2
212271
| ArchBloom
213272
| ArchStablelm
214273
| ArchQwen
@@ -222,9 +281,18 @@ export type TransformerLLM =
222281
| ArchInternlm2
223282
| ArchMinicpm
224283
| ArchGemma
284+
| ArchGemma2
225285
| ArchStarcoder2
226286
| ArchMamba
227287
| ArchXverse
228288
| ArchCommandR
229289
| ArchDbrx
230-
| ArchOlmo;
290+
| ArchOlmo
291+
| ArchOpenelm
292+
| ArchArctic
293+
| ArchDeepseek2
294+
| ArchChatglm
295+
| ArchBitnet
296+
| ArchT5
297+
| ArchT5encoder
298+
| ArchJais;

0 commit comments

Comments
 (0)