@@ -554,14 +554,12 @@ function default_merge_input_ids_with_image_features({
554554 input_ids,
555555 attention_mask,
556556} ) {
557- console . log ( 'input_ids' , input_ids )
558557 const image_tokens = input_ids . tolist ( ) . map ( ids =>
559558 ids . reduce ( ( acc , x , idx ) => {
560559 if ( x == image_token_id ) acc . push ( idx ) ;
561560 return acc ;
562561 } , [ ] )
563562 ) ;
564- console . log ( 'image_tokens' , image_tokens )
565563 const n_image_tokens = image_tokens . reduce ( ( acc , x ) => acc + x . length , 0 ) ;
566564 const n_image_features = image_features . dims [ 0 ] ;
567565 if ( n_image_tokens !== n_image_features ) {
@@ -617,15 +615,11 @@ async function imageTextToTextForward(self, {
617615
618616 if ( ! inputs_embeds ) {
619617 // 1. Extract the input embeddings
620- console . log ( 'before encode_text' ) ;
621618 inputs_embeds = await self . encode_text ( { input_ids, ...kwargs } ) ;
622- console . log ( 'after encode_text' , inputs_embeds . dims ) ;
623619
624620 // 2. Possibly, merge text and images
625621 if ( pixel_values && input_ids . dims [ 1 ] !== 1 ) {
626- console . log ( 'before encode_image' ) ;
627622 const image_features = await self . encode_image ( { pixel_values, ...kwargs } ) ;
628- console . log ( 'after encode_image' ) ;
629623
630624 ( { inputs_embeds, attention_mask } = self . _merge_input_ids_with_image_features ( {
631625 image_features,
0 commit comments