use utils._convert_to_result for huggingface_inference (#36593)

liferoad · web-flow · commit f2860fa2fe8e · 2025-10-23T11:00:25.000-04:00
* fix(huggingface_inference): use utils._convert_to_result for batch processing

The internal _convert_to_result function was incorrectly handling batches with multiple elements by wrapping predictions in a list. This caused all predictions to be grouped into a single result. Replace it with utils._convert_to_result which properly processes each element in the batch individually.

Added test case to verify correct batch processing behavior.

* test(huggingface): add batched examples test for tf inference

Add test case to verify batch processing with tensorflow examples in huggingface inference
diff --git a/sdks/python/apache_beam/ml/inference/huggingface_inference.py b/sdks/python/apache_beam/ml/inference/huggingface_inference.py
@@ -563,16 +563,6 @@ def get_metrics_namespace(self) -> str:
     return 'BeamML_HuggingFaceModelHandler_Tensor'
 
 
-def _convert_to_result(
-    batch: Iterable,
-    predictions: Union[Iterable, dict[Any, Iterable]],
-    model_id: Optional[str] = None,
-) -> Iterable[PredictionResult]:
-  return [
-      PredictionResult(x, y, model_id) for x, y in zip(batch, [predictions])
-  ]
-
-
 def _default_pipeline_inference_fn(
     batch, pipeline, inference_args) -> Iterable[PredictionResult]:
   predicitons = pipeline(batch, **inference_args)
@@ -715,7 +705,7 @@ def run_inference(
     """
     inference_args = {} if not inference_args else inference_args
     predictions = self._inference_fn(batch, pipeline, inference_args)
-    return _convert_to_result(batch, predictions)
+    return utils._convert_to_result(batch, predictions)
 
   def update_model_path(self, model_path: Optional[str] = None):
     """
diff --git a/sdks/python/apache_beam/ml/inference/huggingface_inference_test.py b/sdks/python/apache_beam/ml/inference/huggingface_inference_test.py
@@ -121,12 +121,34 @@ def test_framework_detection_tensorflow(self):
     inference_runner = HuggingFaceModelHandlerTensor(
         model_uri='unused',
         model_class=TFAutoModel,
-        inference_fn=fake_inference_fn_tensor,
-        inference_args={"add": True})
-    batched_examples = [tf.constant([1]), tf.constant([10]), tf.constant([100])]
-    inference_runner.run_inference(
-        batched_examples, fake_model, inference_args={"add": True})
-    self.assertEqual(inference_runner._framework, "tf")
+        inference_fn=fake_inference_fn_tensor)
+    batched_examples = [tf.constant(1), tf.constant(10), tf.constant(100)]
+    inference_runner.run_inference(batched_examples, fake_model)
+    self.assertEqual(inference_runner._framework, 'tf')
+
+  def test_convert_to_result_batch_processing(self):
+    """Test that utils._convert_to_result correctly handles 
+    batches with multiple elements."""
+
+    # Test case that reproduces the bug: batch size > 1
+    batch = ["input1", "input2"]
+    predictions = [{
+        "translation_text": "output1"
+    }, {
+        "translation_text": "output2"
+    }]
+
+    results = list(utils._convert_to_result(batch, predictions))
+
+    # Should return 2 results, not 1
+    self.assertEqual(
+        len(results), 2, "Should return one result per batch element")
+
+    # Check that each result has the correct input and output
+    self.assertEqual(results[0].example, "input1")
+    self.assertEqual(results[0].inference, {"translation_text": "output1"})
+    self.assertEqual(results[1].example, "input2")
+    self.assertEqual(results[1].inference, {"translation_text": "output2"})
 
 
 if __name__ == '__main__':