@@ -188,6 +188,16 @@ def _collate(x):
188188 # Import utils here if flatten is moved
189189 import lmms_eval .utils as utils
190190
191+ def _ensure_list (v ):
192+ if v is None :
193+ return []
194+ if isinstance (v , list ):
195+ # incase [[img]]
196+ if len (v ) == 1 and isinstance (v [0 ], list ):
197+ return v [0 ]
198+ return v
199+ return [v ]
200+
191201 pbar = tqdm (total = len (requests ), disable = (self .rank != 0 ), desc = "Model Responding" )
192202 # we group requests by their generation_kwargs,
193203 # so that we don't try to execute e.g. greedy sampling and temp=0.8 sampling
@@ -202,13 +212,10 @@ def _collate(x):
202212 # TODO: Clarify the behavior of doc_to_visual for documents without visual info.
203213 # The current logic might incorrectly discard all visuals if one doc lacks them.
204214 # Ensure flatten is appropriate here based on doc_to_visual's return type.
205- visual_list = [doc_to_visual [0 ](self .task_dict [task ][split ][ids ]) for ids in doc_id ]
206- if None in visual_list : # This check might need refinement
207- # If a mix of visual/non-visual is possible, this needs careful handling
208- # Currently sets all visuals to empty if any doc returns None
209- visual_list = []
210- else :
211- visual_list = self .flatten (visual_list ) # Assumes doc_to_visual returns list of lists
215+ visuals_per_doc = []
216+ for fn , ids in zip (doc_to_visual , doc_id ):
217+ v = fn (self .task_dict [task ][split ][ids ])
218+ visuals_per_doc .append (_ensure_list (v ))
212219
213220 gen_kwargs = all_gen_kwargs [0 ] if all_gen_kwargs else {}
214221
@@ -249,7 +256,7 @@ def _collate(x):
249256 # Needs careful review based on doc_to_visual output structure
250257 # For simplicity, assuming visual_list contains all visuals for the batch for now
251258 # A more robust approach might map visuals back to their original context index.
252- relevant_visuals = visual_list # Placeholder: needs logic to get visuals for context 'i'
259+ relevant_visuals = visuals_per_doc [ i ] # Placeholder: needs logic to get visuals for context 'i'
253260
254261 for visual in relevant_visuals :
255262 if isinstance (visual , str ) and visual .endswith ((".mp4" , ".avi" , ".mov" )): # Video file
0 commit comments