@@ -590,6 +590,15 @@ def get_vocab_base_pre(self, tokenizer) -> str:
590590 if chkhsh == "855059429035d75a914d1eda9f10a876752e281a054a7a3d421ef0533e5b6249" :
591591 # ref: https://huggingface.co/HuggingFaceTB/SmolLM-135M
592592 res = "smollm"
593+ if chkhsh == "3c30d3ad1d6b64202cd222813e7736c2db6e1bd6d67197090fc1211fbc612ae7" :
594+ # ref: https://huggingface.co/bigscience/bloom
595+ res = "bloom"
596+ if chkhsh == "bc01ce58980e1db43859146dc51b1758b3b88729b217a74792e9f8d43e479d21" :
597+ # ref: https://huggingface.co/TurkuNLP/gpt3-finnish-small
598+ res = "gpt3-finnish"
599+ if chkhsh == "4e2b24cc4770243d65a2c9ec19770a72f08cffc161adbb73fcbb6b7dd45a0aae" :
600+ # ref: https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct
601+ res = "exaone"
593602
594603 if res is None :
595604 logger .warning ("\n " )
@@ -893,7 +902,7 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
893902 return tensors
894903
895904
896- @Model .register ("BloomForCausalLM" )
905+ @Model .register ("BloomForCausalLM" , "BloomModel" )
897906class BloomModel (Model ):
898907 model_arch = gguf .MODEL_ARCH .BLOOM
899908
@@ -3734,6 +3743,118 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
37343743 name = name .removeprefix ("transformer." )
37353744 return [(self .map_tensor_name (name ), data_torch )]
37363745
3746+
3747+ @Model .register ("NemotronForCausalLM" )
3748+ class NemotronModel (Model ):
3749+ model_arch = gguf .MODEL_ARCH .NEMOTRON
3750+
3751+ def set_vocab (self ):
3752+ self ._set_vocab_sentencepiece ()
3753+ self .gguf_writer .add_pad_token_id (0 )
3754+ self .gguf_writer .add_unk_token_id (1 )
3755+
3756+ def set_gguf_parameters (self ):
3757+ super ().set_gguf_parameters ()
3758+ hparams = self .hparams
3759+ self .gguf_writer .add_vocab_size (hparams ["vocab_size" ])
3760+
3761+ f_norm_eps = self .find_hparam (["layer_norm_eps" , "layer_norm_epsilon" , "norm_epsilon" , "norm_eps" ])
3762+ self .gguf_writer .add_layer_norm_eps (f_norm_eps )
3763+
3764+ # * Partial RoPE
3765+ rot_pct = self .find_hparam (["partial_rotary_factor" , "rope_pct" , "rope_percent" ])
3766+ n_embd = self .find_hparam (["hidden_size" , "n_embd" ])
3767+ n_head = self .find_hparam (["num_attention_heads" , "n_head" ])
3768+ self .gguf_writer .add_rope_dimension_count (int (rot_pct * n_embd ) // n_head )
3769+
3770+ # * RopeScaling for Nemotron
3771+ if "rope_scaling" not in self .hparams or self .hparams ["rope_scaling" ] is None :
3772+ self .gguf_writer .add_rope_scaling_type (gguf .RopeScalingType .NONE )
3773+ else :
3774+ self .gguf_writer .add_rope_scaling_type (gguf .RopeScalingType .LINEAR )
3775+ self .gguf_writer .add_rope_scaling_factor (self .hparams ["factor" ])
3776+
3777+ def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
3778+ # * Adding +1 to LayerNorm's weights here to implement layernorm1p w/o changing anything on the GGML engine side
3779+ # model.layers.{l}.input_layernorm.weight
3780+ # model.layers.{l}.post_attention_layernorm.weight
3781+ # model.norm.weight
3782+ if name .endswith ("norm.weight" ):
3783+ data_torch = data_torch + 1
3784+
3785+ return [(self .map_tensor_name (name ), data_torch )]
3786+
3787+
3788+ @Model .register ("ExaoneForCausalLM" )
3789+ class ExaoneModel (Model ):
3790+ model_arch = gguf .MODEL_ARCH .EXAONE
3791+
3792+ def set_gguf_parameters (self ):
3793+ hparams = self .hparams
3794+
3795+ assert (hparams ["activation_function" ] == "silu" )
3796+
3797+ max_position_embeddings = hparams ["max_position_embeddings" ]
3798+ embed_dim = hparams ["hidden_size" ]
3799+ num_heads = hparams ["num_attention_heads" ]
3800+ num_kv_heads = hparams .get ("num_key_value_heads" , num_heads )
3801+ layer_norm_eps = hparams ["layer_norm_epsilon" ]
3802+ intermediate_size = hparams ["intermediate_size" ] if "intermediate_size" in hparams else 4 * embed_dim
3803+ num_layers = hparams ["num_layers" ]
3804+ # ignore for now as EXAONE-3.0-7.8B-Instruct attentino_dropout is 0.0
3805+ # attention_dropout_rate = hparams["attention_dropout"]
3806+ # ignore for now as EXAONE-3.0-7.8B-Instruct embed_dropout is 0.0
3807+ # embed_dropout_rate = hparams["embed_dropout"]
3808+ self .gguf_writer .add_embedding_length (embed_dim )
3809+ self .gguf_writer .add_head_count (num_heads )
3810+ self .gguf_writer .add_head_count_kv (num_kv_heads )
3811+ self .gguf_writer .add_context_length (max_position_embeddings )
3812+ self .gguf_writer .add_layer_norm_rms_eps (layer_norm_eps )
3813+ self .gguf_writer .add_feed_forward_length (intermediate_size )
3814+ self .gguf_writer .add_block_count (num_layers )
3815+ self .gguf_writer .add_file_type (self .ftype )
3816+
3817+ if (rope_theta := self .hparams .get ("rope_theta" )) is not None :
3818+ self .gguf_writer .add_rope_freq_base (rope_theta )
3819+ rotary_factor = self .find_hparam (["partial_rotary_factor" , "rope_pct" ], optional = True )
3820+ rotary_factor = rotary_factor if rotary_factor is not None else 1.0
3821+ self .gguf_writer .add_rope_dimension_count (int (rotary_factor * (hparams ["hidden_size" ] // hparams ["num_attention_heads" ])))
3822+ if hparams .get ("rope_scaling" ) is not None and "factor" in hparams ["rope_scaling" ]:
3823+ if hparams ["rope_scaling" ].get ("type" ) == "linear" :
3824+ self .gguf_writer .add_rope_scaling_type (gguf .RopeScalingType .LINEAR )
3825+ self .gguf_writer .add_rope_scaling_factor (hparams ["rope_scaling" ]["factor" ])
3826+
3827+ def prepare_tensors (self ):
3828+ if rope_scaling := self .find_hparam (["rope_scaling" ], optional = True ):
3829+ if rope_scaling .get ("rope_type" , '' ).lower () == "llama3" :
3830+ base = self .hparams .get ("rope_theta" , 10000.0 )
3831+ dim = self .hparams ["hidden_size" ] // self .hparams ["num_attention_heads" ]
3832+ freqs = 1.0 / (base ** (torch .arange (0 , dim , 2 , dtype = torch .float32 ) / dim ))
3833+
3834+ factor = rope_scaling .get ("factor" , 8.0 )
3835+ low_freq_factor = rope_scaling .get ("low_freq_factor" , 1.0 )
3836+ high_freq_factor = rope_scaling .get ("high_freq_factor" , 4.0 )
3837+ old_context_len = self .hparams .get ("original_max_position_embeddings" , 8192 )
3838+
3839+ low_freq_wavelen = old_context_len / low_freq_factor
3840+ high_freq_wavelen = old_context_len / high_freq_factor
3841+ assert low_freq_wavelen != high_freq_wavelen
3842+
3843+ rope_factors = []
3844+ for freq in freqs :
3845+ wavelen = 2 * math .pi / freq
3846+ if wavelen < high_freq_wavelen :
3847+ rope_factors .append (1 )
3848+ elif wavelen > low_freq_wavelen :
3849+ rope_factors .append (factor )
3850+ else :
3851+ smooth = (old_context_len / wavelen - low_freq_factor ) / (high_freq_factor - low_freq_factor )
3852+ rope_factors .append (1 / ((1 - smooth ) / factor + smooth ))
3853+
3854+ self .gguf_writer .add_tensor (self .format_tensor_name (gguf .MODEL_TENSOR .ROPE_FREQS ), np .array (rope_factors , dtype = np .float32 ))
3855+
3856+ super ().prepare_tensors ()
3857+
37373858###### CONVERSION LOGIC ######
37383859
37393860
0 commit comments