diff --git a/lmms_eval/models/vllm.py b/lmms_eval/models/vllm.py index d1bedc66b..71b1f80f6 100644 --- a/lmms_eval/models/vllm.py +++ b/lmms_eval/models/vllm.py @@ -150,8 +150,10 @@ def encode_video(self, video_path): def flatten(self, input): new_list = [] for i in input: - for j in i: - new_list.append(j) + if isinstance(i, (list, tuple)): + new_list.extend(i) + else: + new_list.append(i) return new_list def generate_until(self, requests) -> List[str]: @@ -203,7 +205,7 @@ def generate_until(self, requests) -> List[str]: messages = [{"role": "user", "content": []}] # When there is no image token in the context, append the image to the text messages[0]["content"].append({"type": "text", "text": contexts}) - for img in imgs: + for img in self.flatten(imgs): messages[0]["content"].append({"type": "image_url", "image_url": {"url": f"data:image/png;base64,{img}"}}) batched_messages.append(messages)