wraps the batch_idx value in a shape [1] torch tensor to be compatible with batch_collator

skothenhill-nv · skothenhill-nv · commit 924d426939ae · 2025-05-01T15:54:38.000Z
Signed-off-by: Steven &lt;skothenhill@nvidia.com&gt;
diff --git a/sub-packages/bionemo-llm/src/bionemo/llm/utils/callbacks.py b/sub-packages/bionemo-llm/src/bionemo/llm/utils/callbacks.py
@@ -98,8 +98,11 @@ def write_on_batch_end(
         result_path = os.path.join(self.output_dir, f"predictions__rank_{trainer.global_rank}__batch_{batch_idx}.pt")
 
         # batch_indices is not captured due to a lightning bug when return_predictions = False
-        # we use input IDs in the prediction to map the result to input
-        prediction["batch_idx"] = batch_idx
+        # we use input IDs in the prediction to map the result to input.
+
+        # NOTE store the batch_idx so we do not need to rely on filenames for reconstruction of inputs. This is wrapped
+        # in a tensor and list container to ensure compatibility with batch_collator.
+        prediction["batch_idx"] = torch.tensor([batch_idx], dtype=torch.int64)
 
         torch.save(prediction, result_path)
         logging.info(f"Inference predictions are stored in {result_path}\n{prediction.keys()}")