@@ -228,7 +228,7 @@ def __init__(
228228 rope_freq_scale : float = 1.0 ,
229229 n_gqa : Optional [int ] = None , # (TEMPORARY) must be 8 for llama2 70b
230230 rms_norm_eps : Optional [float ] = None , # (TEMPORARY)
231- mul_mat_q : Optional [bool ] = None , # (TEMPORARY)
231+ mul_mat_q : Optional [bool ] = None ,
232232 verbose : bool = True ,
233233 ):
234234 """Load a llama.cpp model from `model_path`.
@@ -290,11 +290,6 @@ def __init__(
290290 self .params .rope_freq_base = rope_freq_base
291291 self .params .rope_freq_scale = rope_freq_scale
292292
293- if n_gqa is not None :
294- self .params .n_gqa = n_gqa
295-
296- if rms_norm_eps is not None :
297- self .params .rms_norm_eps = rms_norm_eps
298293
299294 if mul_mat_q is not None :
300295 self .params .mul_mat_q = mul_mat_q
@@ -453,6 +448,8 @@ def detokenize(self, tokens: List[int]) -> bytes:
453448 buffer_size = 32
454449 buffer = (ctypes .c_char * buffer_size )()
455450 for token in tokens :
451+ if token == llama_cpp .llama_token_bos (self .ctx ):
452+ continue
456453 n = llama_cpp .llama_token_to_str (
457454 self .ctx , llama_cpp .llama_token (token ), buffer , buffer_size
458455 )
@@ -1585,13 +1582,7 @@ def __getstate__(self):
15851582 lora_base = self .lora_base ,
15861583 lora_path = self .lora_path ,
15871584 tensor_split = self .tensor_split ,
1588- ### TEMPORARY ###
1589- n_gqa = self .params .n_gqa ,
1590- rms_norm_eps = self .params .rms_norm_eps ,
1591- ### TEMPORARY ###
1592- ### DEPRECATED ###
1593- n_parts = self .n_parts ,
1594- ### DEPRECATED ###
1585+ mul_mat_q = self .params .mul_mat_q ,
15951586 )
15961587
15971588 def __setstate__ (self , state ):
@@ -1613,14 +1604,8 @@ def __setstate__(self, state):
16131604 lora_base = state ["lora_base" ],
16141605 lora_path = state ["lora_path" ],
16151606 tensor_split = state ["tensor_split" ],
1607+ mul_mat_q = state ["mul_mat_q" ],
16161608 verbose = state ["verbose" ],
1617- ### TEMPORARY ###
1618- n_gqa = state ["n_gqa" ],
1619- rms_norm_eps = state ["rms_norm_eps" ],
1620- ### TEMPORARY ###
1621- ### DEPRECATED ###
1622- n_parts = state ["n_parts" ],
1623- ### DEPRECATED ###
16241609 )
16251610
16261611 def save_state (self ) -> LlamaState :
0 commit comments