File tree Expand file tree Collapse file tree 1 file changed +10
-7
lines changed Expand file tree Collapse file tree 1 file changed +10
-7
lines changed Original file line number Diff line number Diff line change @@ -193,15 +193,18 @@ def quant_embedding(model):
193193
194194 quantized_token_embed = quant_embedding (llava .model_ .language_model .model )
195195
196- qval = quantized_token_embed . embedding . weight
197- scale = quantized_token_embed .embedding . scales
196+ print ( "GET ATTRS" , quantized_token_embed )
197+ print ( "GET ATTRS2" , quantized_token_embed .embed_tokens )
198198
199- qval_copy = quantized_token_embed_copy .embedding .weight .tensor_impl .get_plain ()[0 ]
200- scale_copy = quantized_token_embed_copy .embedding .weight .tensor_impl .get_plain ()[1 ]
201- zero_copy = quantized_token_embed_copy .embedding .weight .tensor_impl .get_plain ()[2 ]
199+ qval = quantized_token_embed .embed_tokens .weight
200+ scale = quantized_token_embed .embed_tokens .scales
202201
203- print ("COPY TENSOR" , quantized_token_embed_copy .embedding .weight )
204- print ("ORIGINAL DTYPE" , quantized_token_embed .embedding .dtype )
202+ qval_copy = quantized_token_embed_copy .embed_tokens .weight .tensor_impl .get_plain ()[0 ]
203+ scale_copy = quantized_token_embed_copy .embed_tokens .weight .tensor_impl .get_plain ()[1 ]
204+ zero_copy = quantized_token_embed_copy .embed_tokens .weight .tensor_impl .get_plain ()[2 ]
205+
206+ print ("COPY TENSOR" , quantized_token_embed_copy .embed_tokens .weight )
207+ print ("ORIGINAL DTYPE" , quantized_token_embed .embed_tokens .dtype )
205208
206209 print ("COMPARING" )
207210 print ("qval_copy" , qval_copy )
You can’t perform that action at this time.
0 commit comments