@@ -843,6 +843,9 @@ def get_vocab_base_pre(self, tokenizer) -> str:
843843 if chkhsh == "169bf0296a13c4d9b7672313f749eb36501d931022de052aad6e36f2bf34dd51" :
844844 # ref: https://huggingface.co/LiquidAI/LFM2-Tokenizer
845845 res = "lfm2"
846+ if chkhsh == "2085e1638f6c377a0aa4ead21b27bb4cb941bf800df86ed391011769c1758dfb" :
847+ # ref: https://huggingface.co/LGAI-EXAONE/EXAONE-4.0-32B
848+ res = "exaone4"
846849
847850 if res is None :
848851 logger .warning ("\n " )
@@ -6780,6 +6783,75 @@ def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
67806783 yield (self .format_tensor_name (gguf .MODEL_TENSOR .ROPE_FREQS ), torch .tensor (rope_factors , dtype = torch .float32 ))
67816784
67826785
6786+ @ModelBase .register ("Exaone4ForCausalLM" )
6787+ class Exaone4Model (TextModel ):
6788+ model_arch = gguf .MODEL_ARCH .EXAONE4
6789+
6790+ def set_vocab (self ):
6791+ tokens , toktypes , tokpre = self .get_vocab_base ()
6792+ self .gguf_writer .add_tokenizer_model ("gpt2" )
6793+ self .gguf_writer .add_tokenizer_pre (tokpre )
6794+ self .gguf_writer .add_token_list (tokens )
6795+ self .gguf_writer .add_token_types (toktypes )
6796+
6797+ special_vocab = gguf .SpecialVocab (self .dir_model , load_merges = True )
6798+ special_vocab .add_to_gguf (self .gguf_writer )
6799+
6800+ def set_gguf_parameters (self ):
6801+ super ().set_gguf_parameters ()
6802+ hparams = self .hparams
6803+ self .gguf_writer .add_vocab_size (hparams ["vocab_size" ])
6804+
6805+ if hparams .get ("sliding_window" ) is not None :
6806+ self .gguf_writer .add_sliding_window (hparams ["sliding_window" ])
6807+ if "layer_types" in hparams :
6808+ self .gguf_writer .add_sliding_window_pattern ([t == "sliding_attention" for t in hparams ["layer_types" ]])
6809+ elif "sliding_window_pattern" in hparams :
6810+ sliding_window_pattern = []
6811+ if isinstance (hparams ["sliding_window_pattern" ], str ): # e.g. LLLG
6812+ for i in range (hparams ["num_hidden_layers" ]):
6813+ sliding_window_pattern .append (hparams ["sliding_window_pattern" ][i % len (hparams ["sliding_window_pattern" ])] == "L" )
6814+ if isinstance (hparams ["sliding_window_pattern" ], int ): # e.g. 4
6815+ for i in range (hparams ["num_hidden_layers" ]):
6816+ sliding_window_pattern .append ((i + 1 ) % hparams ["sliding_window_pattern" ] != 0 )
6817+ if len (sliding_window_pattern ) == hparams ["num_hidden_layers" ]:
6818+ self .gguf_writer .add_sliding_window_pattern (sliding_window_pattern )
6819+
6820+ rope_scaling = self .hparams .get ("rope_scaling" ) or {}
6821+ if rope_scaling .get ("rope_type" , rope_scaling .get ("type" )) == "linear" and "factor" in rope_scaling :
6822+ self .gguf_writer .add_rope_scaling_type (gguf .RopeScalingType .LINEAR )
6823+ self .gguf_writer .add_rope_scaling_factor (rope_scaling ["factor" ])
6824+
6825+ def generate_extra_tensors (self ) -> Iterable [tuple [str , Tensor ]]:
6826+ if rope_scaling := self .find_hparam (["rope_scaling" ], optional = True ):
6827+ if rope_scaling .get ("rope_type" , '' ).lower () == "llama3" :
6828+ base = self .hparams .get ("rope_theta" , 10_000.0 )
6829+ if (dim := self .hparams .get ("head_dim" )) is None :
6830+ dim = self .hparams ["hidden_size" ] // self .hparams ["num_attention_heads" ]
6831+ freqs = 1.0 / (base ** (torch .arange (0 , dim , 2 , dtype = torch .float32 ) / dim ))
6832+
6833+ factor = rope_scaling .get ("factor" , 16.0 )
6834+ low_freq_factor = rope_scaling .get ("low_freq_factor" , 1.0 )
6835+ high_freq_factor = rope_scaling .get ("high_freq_factor" , 4.0 )
6836+ old_context_len = self .hparams .get ("original_max_position_embeddings" , 8192 )
6837+
6838+ low_freq_wavelen = old_context_len / low_freq_factor
6839+ high_freq_wavelen = old_context_len / high_freq_factor
6840+
6841+ rope_factors = []
6842+ for freq in freqs :
6843+ wavelen = 2 * math .pi / freq
6844+ if wavelen < high_freq_wavelen :
6845+ rope_factors .append (1 )
6846+ elif wavelen > low_freq_wavelen :
6847+ rope_factors .append (factor )
6848+ else :
6849+ smooth = (old_context_len / wavelen - low_freq_factor ) / (high_freq_factor - low_freq_factor )
6850+ rope_factors .append (1 / ((1 - smooth ) / factor + smooth ))
6851+
6852+ yield (self .format_tensor_name (gguf .MODEL_TENSOR .ROPE_FREQS ), torch .tensor (rope_factors , dtype = torch .float32 ))
6853+
6854+
67836855@ModelBase .register ("GraniteForCausalLM" )
67846856class GraniteModel (LlamaModel ):
67856857 """Conversion for IBM's GraniteForCausalLM"""
0 commit comments