Fix

larryliu0820 · larryliu0820 · commit 14d2db7eaae1 · 2025-09-20T10:55:56.000-07:00
diff --git a/extension/llm/runner/__init__.py b/extension/llm/runner/__init__.py
@@ -25,7 +25,7 @@
 
 try:
     # Import shared components from the compiled C++ extension
-    from ._llm_runner import (
+    from executorch.extension.llm.runner._llm_runner import (  # noqa: F401
         GenerationConfig,
         Image,
         make_image_input,
@@ -105,7 +105,9 @@ def create_text_input(self, text: str):
         """
         return make_text_input(text)
 
-    def create_image_input(self, image: Union[str, Path, np.ndarray, "PILImage.Image"]):
+    def create_image_input(  # noqa: C901
+        self, image: Union[str, Path, np.ndarray, "PILImage.Image"]
+    ):
         """
         Create an image input for multimodal processing.
 
diff --git a/extension/llm/runner/test/test_pybindings.py b/extension/llm/runner/test/test_pybindings.py
@@ -24,7 +24,6 @@
     make_text_input,
     MultimodalInput,
     MultimodalRunner,
-    Stats,
 )
 
 
@@ -114,94 +113,6 @@ def test_repr(self):
         self.assertIn("warming=False", repr_str)
 
 
-class TestStats(unittest.TestCase):
-    """Test the Stats class."""
-
-    def test_attributes(self):
-        """Test that Stats has all expected attributes."""
-        stats = Stats()
-
-        # Check all timing attributes exist
-        self.assertTrue(hasattr(stats, "SCALING_FACTOR_UNITS_PER_SECOND"))
-        self.assertTrue(hasattr(stats, "model_load_start_ms"))
-        self.assertTrue(hasattr(stats, "model_load_end_ms"))
-        self.assertTrue(hasattr(stats, "inference_start_ms"))
-        self.assertTrue(hasattr(stats, "token_encode_end_ms"))
-        self.assertTrue(hasattr(stats, "model_execution_start_ms"))
-        self.assertTrue(hasattr(stats, "model_execution_end_ms"))
-        self.assertTrue(hasattr(stats, "prompt_eval_end_ms"))
-        self.assertTrue(hasattr(stats, "first_token_ms"))
-        self.assertTrue(hasattr(stats, "inference_end_ms"))
-        self.assertTrue(hasattr(stats, "aggregate_sampling_time_ms"))
-        self.assertTrue(hasattr(stats, "num_prompt_tokens"))
-        self.assertTrue(hasattr(stats, "num_generated_tokens"))
-
-    def test_scaling_factor(self):
-        """Test the scaling factor constant."""
-        stats = Stats()
-        self.assertEqual(stats.SCALING_FACTOR_UNITS_PER_SECOND, 1000)
-
-    def test_methods(self):
-        """Test Stats methods."""
-        stats = Stats()
-
-        # Test on_sampling_begin and on_sampling_end
-        stats.on_sampling_begin()
-        stats.on_sampling_end()
-
-        # Test reset without all_stats
-        stats.model_load_start_ms = 100
-        stats.model_load_end_ms = 200
-        stats.inference_start_ms = 300
-        stats.num_prompt_tokens = 10
-        stats.num_generated_tokens = 20
-
-        stats.reset(False)
-
-        # Model load times should be preserved
-        self.assertEqual(stats.model_load_start_ms, 100)
-        self.assertEqual(stats.model_load_end_ms, 200)
-        # Other stats should be reset
-        self.assertEqual(stats.inference_start_ms, 0)
-        self.assertEqual(stats.num_prompt_tokens, 0)
-        self.assertEqual(stats.num_generated_tokens, 0)
-
-        # Test reset with all_stats
-        stats.reset(True)
-        self.assertEqual(stats.model_load_start_ms, 0)
-        self.assertEqual(stats.model_load_end_ms, 0)
-
-    def test_to_json_string(self):
-        """Test JSON string conversion."""
-        stats = Stats()
-        stats.num_prompt_tokens = 10
-        stats.num_generated_tokens = 20
-        stats.model_load_start_ms = 100
-        stats.model_load_end_ms = 200
-        stats.inference_start_ms = 300
-        stats.inference_end_ms = 1300
-
-        json_str = stats.to_json_string()
-        self.assertIn('"prompt_tokens":10', json_str)
-        self.assertIn('"generated_tokens":20', json_str)
-        self.assertIn('"model_load_start_ms":100', json_str)
-        self.assertIn('"model_load_end_ms":200', json_str)
-
-    def test_repr(self):
-        """Test string representation."""
-        stats = Stats()
-        stats.num_prompt_tokens = 10
-        stats.num_generated_tokens = 20
-        stats.inference_start_ms = 1000
-        stats.inference_end_ms = 2000
-
-        repr_str = repr(stats)
-        self.assertIn("Stats", repr_str)
-        self.assertIn("num_prompt_tokens=10", repr_str)
-        self.assertIn("num_generated_tokens=20", repr_str)
-        self.assertIn("tokens_per_second=20", repr_str)  # 20 tokens / 1 second
-
-
 class TestImage(unittest.TestCase):
     """Test the Image class."""
 
@@ -329,7 +240,7 @@ def tearDown(self):
     def test_initialization_failure(self):
         """Test that initialization fails gracefully with invalid files."""
         with self.assertRaises(RuntimeError) as cm:
-            runner = MultimodalRunner(self.model_path, self.tokenizer_path)
+            MultimodalRunner(self.model_path, self.tokenizer_path, None)
         # Should fail because the tokenizer file is not valid
         self.assertIn("Failed to", str(cm.exception))
 
diff --git a/extension/llm/runner/utils.py b/extension/llm/runner/utils.py
@@ -23,7 +23,7 @@
 except ImportError:
     HAS_PIL = False
 
-from ._llm_runner import GenerationConfig
+from executorch.extension.llm.runner._llm_runner import GenerationConfig  # noqa: F401
 
 
 def load_image_from_file(