File tree Expand file tree Collapse file tree 1 file changed +6
-3
lines changed
vllm/model_executor/models Expand file tree Collapse file tree 1 file changed +6
-3
lines changed Original file line number Diff line number Diff line change @@ -263,6 +263,11 @@ def _call_hf_processor(
263
263
mm_data ,
264
264
mm_kwargs ,
265
265
)
266
+ if "pixel_values" in processed_outputs :
267
+ # Cast pixel values to model dtype already here,
268
+ # so we need to transfer less data to the GPU
269
+ processed_outputs ["pixel_values" ] = processed_outputs [
270
+ "pixel_values" ].to (self .info .ctx .model_config .dtype )
266
271
267
272
# HF processor pops the `num_crops` kwarg, which is needed by vLLM
268
273
if (images := mm_data .get ("images" )) is not None :
@@ -543,9 +548,7 @@ def _image_pixels_to_features(
543
548
vision_tower : SiglipVisionModel ,
544
549
pixel_values : torch .Tensor ,
545
550
) -> torch .Tensor :
546
- target_dtype = vision_tower .get_input_embeddings ().weight .dtype
547
- image_features = vision_tower (pixel_values .to (dtype = target_dtype ))
548
- return image_features
551
+ return vision_tower (pixel_values )
549
552
550
553
def _process_image_input (
551
554
self ,
You can’t perform that action at this time.
0 commit comments