Skip to content

Commit 52b2da6

Browse files
committed
model : LiquidAI lfm2 350M/700M/1.2B dense text-only
1 parent a457551 commit 52b2da6

15 files changed

+408
-3
lines changed

convert_hf_to_gguf.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -300,6 +300,7 @@ def prepare_tensors(self):
300300
gguf.MODEL_TENSOR.POS_EMBD,
301301
gguf.MODEL_TENSOR.TOKEN_TYPES,
302302
gguf.MODEL_TENSOR.SSM_CONV1D,
303+
gguf.MODEL_TENSOR.SHORTCONV_CONV,
303304
gguf.MODEL_TENSOR.TIME_MIX_FIRST,
304305
gguf.MODEL_TENSOR.TIME_MIX_W1,
305306
gguf.MODEL_TENSOR.TIME_MIX_W2,
@@ -833,6 +834,9 @@ def get_vocab_base_pre(self, tokenizer) -> str:
833834
if chkhsh == "48f8e02c0359c0bbdd82f26909171fac1c18a457bb47573ed1fe3bbb2c1cfd4b":
834835
# ref: https://huggingface.co/tiiuae/Falcon-H1-34B-Base
835836
res = "falcon-h1"
837+
if chkhsh == "169bf0296a13c4d9b7672313f749eb36501d931022de052aad6e36f2bf34dd51":
838+
# ref: https://huggingface.co/LiquidAI/LFM2-Tokenizer
839+
res = "lfm2"
836840

837841
if res is None:
838842
logger.warning("\n")
@@ -6943,6 +6947,55 @@ def set_vocab(self):
69436947
chat_template = tokenizer.chat_template.replace("[:]", "")
69446948
self.gguf_writer.add_chat_template(chat_template)
69456949

6950+
@ModelBase.register("LFM2ForCausalLM")
6951+
class LFM2Model(TextModel):
6952+
model_arch = gguf.MODEL_ARCH.LFM2
6953+
6954+
def _add_feed_forward_length(self):
6955+
ff_dim = self.hparams["block_ff_dim"]
6956+
6957+
auto_adjust_ff_dim = self.hparams["block_auto_adjust_ff_dim"]
6958+
ff_dim = self.hparams["block_ff_dim"]
6959+
ffn_dim_multiplier = self.hparams["block_ffn_dim_multiplier"]
6960+
multiple_of = self.hparams["block_multiple_of"]
6961+
6962+
if auto_adjust_ff_dim:
6963+
ff_dim = int(2 * ff_dim / 3)
6964+
# custom dim factor multiplier
6965+
if ffn_dim_multiplier is not None:
6966+
ff_dim = int(ffn_dim_multiplier * ff_dim)
6967+
ff_dim = multiple_of * ((ff_dim + multiple_of - 1) // multiple_of)
6968+
6969+
self.gguf_writer.add_feed_forward_length(ff_dim)
6970+
6971+
6972+
def set_gguf_parameters(self):
6973+
# set only for attention layers before calling super().set_gguf_parameters()
6974+
self.hparams["num_key_value_heads"] = [(self.hparams["num_key_value_heads"] if x in self.hparams["full_attn_idxs"] else 0) for x in range(self.block_count)]
6975+
6976+
super().set_gguf_parameters()
6977+
self.gguf_writer.add_vocab_size(self.hparams["vocab_size"])
6978+
self.gguf_writer.add_shortconv_l_cache(self.hparams["conv_L_cache"])
6979+
self.gguf_writer.add_is_recurrent_layer([x not in self.hparams["full_attn_idxs"] for x in range(self.block_count)])
6980+
self.gguf_writer.add_layer_norm_rms_eps(self.hparams["norm_eps"])
6981+
self._add_feed_forward_length()
6982+
6983+
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
6984+
if 'operator_norm' in name:
6985+
name = name.replace('operator_norm', 'norm')
6986+
elif 'attention.k_layernorm' in name or 'attention.q_layernorm' in name:
6987+
name = name.replace('attention', 'self_attn')
6988+
elif name.startswith("model.embedding_norm"):
6989+
name = name.replace("model.embedding_norm", 'word_embeddings_layernorm')
6990+
elif 'conv.conv' in name:
6991+
# conv op requires 2d tensor
6992+
data_torch = data_torch.squeeze(1)
6993+
elif 'self_attn.out_proj' in name:
6994+
name = name.replace('out_proj', 'o_proj')
6995+
6996+
return [(self.map_tensor_name(name), data_torch)]
6997+
6998+
69466999
###### CONVERSION LOGIC ######
69477000

69487001

convert_hf_to_gguf_update.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,7 @@ class TOKENIZER_TYPE(IntEnum):
129129
{"name": "pixtral", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/mistral-community/pixtral-12b", },
130130
{"name": "seed-coder", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/ByteDance-Seed/Seed-Coder-8B-Base", },
131131
{"name": "a.x-4.0", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/skt/A.X-4.0", },
132+
{"name": "lfm2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/LiquidAI/LFM2-Tokenizer"},
132133
]
133134

134135
# some models are known to be broken upstream, so we will skip them as exceptions

ggml/src/ggml-cuda/ssm-conv.cu

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,9 @@ static void ssm_conv_f32_cuda(const float * src0, const float * src1, const int
107107
if (nc == 4) {
108108
ssm_conv_f32<threads, 4><<<blocks, threads, 0, stream>>>(src0, src1, src0_nb0, src0_nb1, src0_nb2, src1_nb1,
109109
dst, dst_nb0, dst_nb1, dst_nb2, n_t);
110+
} else if (nc == 3) {
111+
ssm_conv_f32<threads, 3><<<blocks, threads, 0, stream>>>(src0, src1, src0_nb0, src0_nb1, src0_nb2, src1_nb1,
112+
dst, dst_nb0, dst_nb1, dst_nb2, n_t);
110113
} else {
111114
GGML_ABORT("Only support kernel size = 4 now.");
112115
}
@@ -116,6 +119,11 @@ static void ssm_conv_f32_cuda(const float * src0, const float * src1, const int
116119
dim3 blocks(n_s, (nr + threads - 1) / threads, (n_t + split_n_t - 1) / split_n_t);
117120
ssm_conv_long_token_f32<threads, 4, split_n_t><<<blocks, threads, 0, stream>>>(
118121
src0, src1, src0_nb0, src0_nb1, src0_nb2, src1_nb1, dst, dst_nb0, dst_nb1, dst_nb2, n_t);
122+
} else if (nc == 3) {
123+
const int64_t split_n_t = 32;
124+
dim3 blocks(n_s, (nr + threads - 1) / threads, (n_t + split_n_t - 1) / split_n_t);
125+
ssm_conv_long_token_f32<threads, 3, split_n_t><<<blocks, threads, 0, stream>>>(
126+
src0, src1, src0_nb0, src0_nb1, src0_nb2, src1_nb1, dst, dst_nb0, dst_nb1, dst_nb2, n_t);
119127
} else {
120128
GGML_ABORT("Only support kernel size = 4 right now.");
121129
}

gguf-py/gguf/constants.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,7 @@ class LLM:
122122
ALTUP_ACTIVE_IDX = "{arch}.altup.active_idx"
123123
ALTUP_NUM_INPUTS = "{arch}.altup.num_inputs"
124124
EMBD_LENGTH_PER_LAYER_INP = "{arch}.embedding_length_per_layer_input"
125+
IS_RECURRENT_LAYER = "{arch}.is_recurrent_layer"
125126

126127
class Attention:
127128
HEAD_COUNT = "{arch}.attention.head_count"
@@ -187,6 +188,9 @@ class ConvNext:
187188
class Classifier:
188189
OUTPUT_LABELS = "{arch}.classifier.output_labels"
189190

191+
class ShortConv:
192+
L_CACHE = "{arch}.shortconv.l_cache"
193+
190194
class Tokenizer:
191195
MODEL = "tokenizer.ggml.model"
192196
PRE = "tokenizer.ggml.pre"
@@ -361,6 +365,7 @@ class MODEL_ARCH(IntEnum):
361365
ERNIE4_5 = auto()
362366
HUNYUAN_MOE = auto()
363367
SMOLLM3 = auto()
368+
LFM2 = auto()
364369

365370

366371
class VISION_PROJECTOR_TYPE(IntEnum):
@@ -532,6 +537,9 @@ class MODEL_TENSOR(IntEnum):
532537
POSNET_ATTN_K = auto()
533538
POSNET_ATTN_V = auto()
534539
POSNET_ATTN_OUT = auto()
540+
SHORTCONV_CONV = auto()
541+
SHORTCONV_INPROJ = auto()
542+
SHORTCONV_OUTPROJ = auto()
535543
# vision
536544
V_MMPROJ = auto()
537545
V_MMPROJ_FC = auto()
@@ -671,6 +679,7 @@ class MODEL_TENSOR(IntEnum):
671679
MODEL_ARCH.FALCON_H1: "falcon-h1",
672680
MODEL_ARCH.HUNYUAN_MOE: "hunyuan-moe",
673681
MODEL_ARCH.SMOLLM3: "smollm3",
682+
MODEL_ARCH.LFM2: "lfm2",
674683
}
675684

676685
VISION_PROJECTOR_TYPE_NAMES: dict[VISION_PROJECTOR_TYPE, str] = {
@@ -842,6 +851,9 @@ class MODEL_TENSOR(IntEnum):
842851
MODEL_TENSOR.POSNET_ATTN_K: "posnet.{bid}.attn_k",
843852
MODEL_TENSOR.POSNET_ATTN_V: "posnet.{bid}.attn_v",
844853
MODEL_TENSOR.POSNET_ATTN_OUT: "posnet.{bid}.attn_output",
854+
MODEL_TENSOR.SHORTCONV_CONV: "blk.{bid}.shortconv.conv",
855+
MODEL_TENSOR.SHORTCONV_INPROJ: "blk.{bid}.shortconv.in_proj",
856+
MODEL_TENSOR.SHORTCONV_OUTPROJ: "blk.{bid}.shortconv.out_proj",
845857
# vision
846858
MODEL_TENSOR.V_MMPROJ: "mm.{bid}",
847859
MODEL_TENSOR.V_MMPROJ_FC: "mm.model.fc",
@@ -2323,6 +2335,23 @@ class MODEL_TENSOR(IntEnum):
23232335
MODEL_TENSOR.FFN_GATE,
23242336
MODEL_TENSOR.FFN_DOWN,
23252337
MODEL_TENSOR.FFN_UP,
2338+
MODEL_ARCH.LFM2: [
2339+
MODEL_TENSOR.TOKEN_EMBD,
2340+
MODEL_TENSOR.TOKEN_EMBD_NORM,
2341+
MODEL_TENSOR.SHORTCONV_CONV,
2342+
MODEL_TENSOR.SHORTCONV_INPROJ,
2343+
MODEL_TENSOR.SHORTCONV_OUTPROJ,
2344+
MODEL_TENSOR.FFN_GATE,
2345+
MODEL_TENSOR.FFN_DOWN,
2346+
MODEL_TENSOR.FFN_UP,
2347+
MODEL_TENSOR.FFN_NORM,
2348+
MODEL_TENSOR.ATTN_NORM, # operator_norm
2349+
MODEL_TENSOR.ATTN_Q_NORM,
2350+
MODEL_TENSOR.ATTN_K_NORM,
2351+
MODEL_TENSOR.ATTN_Q,
2352+
MODEL_TENSOR.ATTN_K,
2353+
MODEL_TENSOR.ATTN_V,
2354+
MODEL_TENSOR.ATTN_OUT,
23262355
],
23272356
# TODO
23282357
}

gguf-py/gguf/gguf_writer.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -648,6 +648,12 @@ def add_convnext_embedding_length(self, length: int) -> None:
648648
def add_convnext_block_count(self, length: int) -> None:
649649
self.add_uint32(Keys.ConvNext.BLOCK_COUNT.format(arch=self.arch), length)
650650

651+
def add_shortconv_l_cache(self, length: int) -> None:
652+
self.add_uint32(Keys.ShortConv.L_CACHE.format(arch=self.arch), length)
653+
654+
def add_is_recurrent_layer(self, value: Sequence[bool]) -> None:
655+
self.add_array(Keys.LLM.IS_RECURRENT_LAYER.format(arch=self.arch), value)
656+
651657
def add_block_count(self, length: int) -> None:
652658
self.add_uint32(Keys.LLM.BLOCK_COUNT.format(arch=self.arch), length)
653659

gguf-py/gguf/tensor_mapping.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1015,6 +1015,18 @@ class TensorNameMap:
10151015
"backbone.posnet.{bid}.proj_out", # wavtokenizer
10161016
),
10171017

1018+
MODEL_TENSOR.SHORTCONV_CONV: (
1019+
"model.layers.{bid}.conv.conv",
1020+
),
1021+
1022+
MODEL_TENSOR.SHORTCONV_INPROJ: (
1023+
"model.layers.{bid}.conv.in_proj",
1024+
),
1025+
1026+
MODEL_TENSOR.SHORTCONV_OUTPROJ: (
1027+
"model.layers.{bid}.conv.out_proj",
1028+
),
1029+
10181030
#############################################################################
10191031
## Vision encoder
10201032

src/llama-arch.cpp

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
8282
{ LLM_ARCH_ERNIE4_5, "ernie4_5" },
8383
{ LLM_ARCH_HUNYUAN_MOE, "hunyuan-moe" },
8484
{ LLM_ARCH_SMOLLM3, "smollm3" },
85+
{ LLM_ARCH_LFM2, "lfm2" },
8586
{ LLM_ARCH_UNKNOWN, "(unknown)" },
8687
};
8788

@@ -188,6 +189,10 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
188189

189190
{ LLM_KV_CLASSIFIER_OUTPUT_LABELS, "%s.classifier.output_labels" },
190191

192+
{ LLM_KV_SHORTCONV_L_CACHE, "%s.shortconv.l_cache" },
193+
194+
{ LLM_KV_IS_RECURRENT_LAYER, "%s.is_recurrent_layer" },
195+
191196
{ LLM_KV_TOKENIZER_MODEL, "tokenizer.ggml.model" },
192197
{ LLM_KV_TOKENIZER_PRE, "tokenizer.ggml.pre" },
193198
{ LLM_KV_TOKENIZER_LIST, "tokenizer.ggml.tokens" },
@@ -1793,6 +1798,27 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
17931798
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
17941799
},
17951800
},
1801+
{
1802+
LLM_ARCH_LFM2,
1803+
{
1804+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1805+
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1806+
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1807+
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1808+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1809+
{ LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
1810+
{ LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
1811+
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1812+
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1813+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1814+
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1815+
{ LLM_TENSOR_SHORTCONV_CONV, "blk.%d.shortconv.conv" },
1816+
{ LLM_TENSOR_SHORTCONV_INPROJ, "blk.%d.shortconv.in_proj" },
1817+
{ LLM_TENSOR_SHORTCONV_OUTPROJ, "blk.%d.shortconv.out_proj" },
1818+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1819+
{ LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
1820+
}
1821+
},
17961822
{
17971823
LLM_ARCH_UNKNOWN,
17981824
{
@@ -1960,6 +1986,9 @@ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
19601986
{LLM_TENSOR_CONVNEXT_PW1, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
19611987
{LLM_TENSOR_CONVNEXT_PW2, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
19621988
{LLM_TENSOR_CONVNEXT_GAMMA, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1989+
{LLM_TENSOR_SHORTCONV_CONV, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1990+
{LLM_TENSOR_SHORTCONV_INPROJ, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1991+
{LLM_TENSOR_SHORTCONV_OUTPROJ, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
19631992
};
19641993

19651994
LLM_KV::LLM_KV(llm_arch arch, const char * suffix) : arch(arch), suffix(suffix) {}
@@ -2031,6 +2060,7 @@ bool llm_arch_is_hybrid(const llm_arch & arch) {
20312060
switch (arch) {
20322061
case LLM_ARCH_JAMBA:
20332062
case LLM_ARCH_FALCON_H1:
2063+
case LLM_ARCH_LFM2:
20342064
return true;
20352065
default:
20362066
return false;

src/llama-arch.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@ enum llm_arch {
8686
LLM_ARCH_ERNIE4_5,
8787
LLM_ARCH_HUNYUAN_MOE,
8888
LLM_ARCH_SMOLLM3,
89+
LLM_ARCH_LFM2,
8990
LLM_ARCH_UNKNOWN,
9091
};
9192

@@ -227,6 +228,10 @@ enum llm_kv {
227228

228229
LLM_KV_CLASSIFIER_OUTPUT_LABELS,
229230

231+
LLM_KV_SHORTCONV_L_CACHE,
232+
233+
LLM_KV_IS_RECURRENT_LAYER,
234+
230235
// deprecated:
231236
LLM_KV_TOKENIZER_PREFIX_ID,
232237
LLM_KV_TOKENIZER_SUFFIX_ID,
@@ -396,6 +401,9 @@ enum llm_tensor {
396401
LLM_TENSOR_POS_NET_ATTN_K,
397402
LLM_TENSOR_POS_NET_ATTN_V,
398403
LLM_TENSOR_POS_NET_ATTN_OUT,
404+
LLM_TENSOR_SHORTCONV_CONV,
405+
LLM_TENSOR_SHORTCONV_INPROJ,
406+
LLM_TENSOR_SHORTCONV_OUTPROJ,
399407
};
400408

401409
enum llm_tensor_layer {

src/llama-hparams.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,11 @@ uint32_t llama_hparams::n_embd_r() const {
7171
return token_shift_count * n_embd;
7272
}
7373

74+
if (n_shortconv_l_cache != 0) {
75+
// for LFM2 models
76+
return n_embd * (n_shortconv_l_cache - 1);
77+
}
78+
7479
// TODO: maybe support other convolution strides than 1
7580
// NOTE: since the first column of the conv_state is shifted out each time, it's not actually needed
7681
// Corresponds to Mamba's conv_states size

src/llama-hparams.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,8 @@ struct llama_hparams {
5555
struct llama_hparams_posnet posnet;
5656
struct llama_hparams_convnext convnext;
5757

58+
uint32_t n_shortconv_l_cache = 0;
59+
5860
std::array<uint32_t, LLAMA_MAX_LAYERS> n_head_arr;
5961
std::array<uint32_t, LLAMA_MAX_LAYERS> n_head_kv_arr;
6062
std::array<uint32_t, LLAMA_MAX_LAYERS> n_ff_arr;

0 commit comments

Comments
 (0)