@@ -180,7 +180,8 @@ def get_tensors(self) -> Iterator[tuple[str, Tensor]]:
180180 extra = sorted (tensor_names_from_parts .difference (self .tensor_names ))
181181 missing_files = sorted (set (weight_map [n ] for n in missing if n in weight_map ))
182182 if len (extra ) == 0 and len (missing_files ) > 0 :
183- raise ValueError (f"Missing or incomplete model files: { missing_files } " )
183+ raise ValueError (f"Missing or incomplete model files: { missing_files } \n "
184+ f"Missing tensors: { missing } " )
184185 else :
185186 raise ValueError ("Mismatch between weight map and model parts for tensor names:\n "
186187 f"Missing tensors: { missing } \n "
@@ -1099,13 +1100,6 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
10991100
11001101 tensors .append ((self .map_tensor_name (name ), data_torch ))
11011102
1102- if name == "word_embeddings.weight" :
1103- assert self .tensor_names is not None
1104-
1105- # TODO: tie them at runtime, don't duplicate in the model file
1106- if all (s not in self .tensor_names for s in ("lm_head.weight" , "output.weight" )):
1107- tensors .append ((self .format_tensor_name (gguf .MODEL_TENSOR .OUTPUT ), data_torch ))
1108-
11091103 return tensors
11101104
11111105
@@ -2423,10 +2417,6 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
24232417
24242418 tensors .append ((new_name , data_torch ))
24252419
2426- # note: GPT2 output is tied to (same as) wte in original model
2427- if new_name == self .format_tensor_name (gguf .MODEL_TENSOR .TOKEN_EMBD ):
2428- tensors .append ((self .format_tensor_name (gguf .MODEL_TENSOR .OUTPUT ), data_torch ))
2429-
24302420 return tensors
24312421
24322422
@@ -2756,21 +2746,26 @@ def set_gguf_parameters(self):
27562746 self .gguf_writer .add_rope_scaling_type (gguf .RopeScalingType .LINEAR )
27572747 self .gguf_writer .add_rope_scaling_factor (1.0 )
27582748
2749+ _has_tok_embd = False
2750+
27592751 def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
27602752 del bid # unused
27612753
2762- new_name = self .map_tensor_name (name )
2763-
2764- tensors : list [tuple [str , Tensor ]] = [(new_name , data_torch )]
2754+ output_name = self .format_tensor_name (gguf .MODEL_TENSOR .OUTPUT )
2755+ tok_embd_name = self .format_tensor_name (gguf .MODEL_TENSOR .TOKEN_EMBD )
27652756
2766- if new_name == self .format_tensor_name (gguf .MODEL_TENSOR .TOKEN_EMBD ):
2767- assert self .tensor_names is not None
2757+ new_name = self .map_tensor_name (name )
27682758
2769- if all (s not in self .tensor_names for s in ("lm_head.weight" , "output.weight" )):
2770- # copy tok_embd.weight to output.weight
2771- tensors .append ((self .format_tensor_name (gguf .MODEL_TENSOR .OUTPUT ), data_torch ))
2759+ # assuming token_embd.weight is seen before output.weight
2760+ if not self ._has_tok_embd and new_name == self .format_tensor_name (gguf .MODEL_TENSOR .OUTPUT ):
2761+ # even though the tensor file(s) does not contain the word embeddings they are still in the weight map
2762+ if self .tensor_names and "transformer.wte.weight" in self .tensor_names :
2763+ logger .debug (f"{ tok_embd_name } not found before { output_name } , assuming they are tied" )
2764+ self .tensor_names .remove ("transformer.wte.weight" )
2765+ elif new_name == tok_embd_name :
2766+ self ._has_tok_embd = True
27722767
2773- return tensors
2768+ return [( new_name , data_torch )]
27742769
27752770
27762771@Model .register ("InternLM2ForCausalLM" )
0 commit comments