File tree Expand file tree Collapse file tree 1 file changed +10
-7
lines changed Expand file tree Collapse file tree 1 file changed +10
-7
lines changed Original file line number Diff line number Diff line change @@ -199,15 +199,18 @@ def quant_embedding(model):
199199
200200 quantized_token_embed = quant_embedding (llava .model_ .language_model .model )
201201
202- qval = quantized_token_embed . embedding . weight
203- scale = quantized_token_embed .embedding . scales
202+ print ( "GET ATTRS" , quantized_token_embed )
203+ print ( "GET ATTRS2" , quantized_token_embed .embed_tokens )
204204
205- qval_copy = quantized_token_embed_copy .embedding .weight .tensor_impl .get_plain ()[0 ]
206- scale_copy = quantized_token_embed_copy .embedding .weight .tensor_impl .get_plain ()[1 ]
207- zero_copy = quantized_token_embed_copy .embedding .weight .tensor_impl .get_plain ()[2 ]
205+ qval = quantized_token_embed .embed_tokens .weight
206+ scale = quantized_token_embed .embed_tokens .scales
208207
209- print ("COPY TENSOR" , quantized_token_embed_copy .embedding .weight )
210- print ("ORIGINAL DTYPE" , quantized_token_embed .embedding .dtype )
208+ qval_copy = quantized_token_embed_copy .embed_tokens .weight .tensor_impl .get_plain ()[0 ]
209+ scale_copy = quantized_token_embed_copy .embed_tokens .weight .tensor_impl .get_plain ()[1 ]
210+ zero_copy = quantized_token_embed_copy .embed_tokens .weight .tensor_impl .get_plain ()[2 ]
211+
212+ print ("COPY TENSOR" , quantized_token_embed_copy .embed_tokens .weight )
213+ print ("ORIGINAL DTYPE" , quantized_token_embed .embed_tokens .dtype )
211214
212215 print ("COMPARING" )
213216 print ("qval_copy" , qval_copy )
You can’t perform that action at this time.
0 commit comments