Remove pad_max_tiles from preprocess (#6295)

lucylq · facebook-github-bot · commit 2e67e3ada416 · 2024-10-16T12:02:56.000-07:00
Summary: Following changes in torchtune: - meta-pytorch/torchtune#1836 - meta-pytorch/torchtune#1853 Update ET downstream and remove pad-max-tiles from preprocess. Pull Request resolved: #6295 Test Plan: With AOTI tests commented out (not working atm): ``` python -m unittest examples/models/llama3_2_vision/preprocess/test_preprocess.py ... ---------------------------------------------------------------------- Ran 4 tests in 21.129s OK ``` Reviewed By: larryliu0820 Differential Revision: D64481012 Pulled By: lucylq fbshipit-source-id: e822c235c5555e0682d181c4c482dec7c170c96e
diff --git a/examples/models/llama3_2_vision/preprocess/export_preprocess.py b/examples/models/llama3_2_vision/preprocess/export_preprocess.py
@@ -13,17 +13,17 @@
 
 
 def main():
-    # Export
-    ep = export_preprocess()
 
     # ExecuTorch
-    et = lower_to_executorch_preprocess(ep)
+    ep_et = export_preprocess()
+    et = lower_to_executorch_preprocess(ep_et)
     with open("preprocess_et.pte", "wb") as file:
         et.write_to_file(file)
 
     # AOTInductor
+    ep_aoti = export_preprocess()
     torch._inductor.aot_compile(
-        ep.module(),
+        ep_aoti.module(),
         get_example_inputs(),
         options={"aot_inductor.output_path": "preprocess_aoti.so"},
     )
diff --git a/examples/models/llama3_2_vision/preprocess/export_preprocess_lib.py b/examples/models/llama3_2_vision/preprocess/export_preprocess_lib.py
@@ -43,7 +43,6 @@ def export_preprocess(
     max_num_tiles: int = 4,
     tile_size: int = 224,
     antialias: bool = False,
-    pad_max_tiles: bool = True,
 ) -> ExportedProgram:
 
     # Instantiate eager model.
@@ -54,7 +53,6 @@ def export_preprocess(
         max_num_tiles=max_num_tiles,
         tile_size=tile_size,
         antialias=antialias,
-        pad_max_tiles=pad_max_tiles,
     )
 
     # Replace non-exportable ops with custom ops.
diff --git a/examples/models/llama3_2_vision/preprocess/test_preprocess.py b/examples/models/llama3_2_vision/preprocess/test_preprocess.py
@@ -15,6 +15,11 @@
 
 from executorch.extension.pybindings import portable_lib  # noqa # usort: skip
 from executorch.extension.llm.custom_ops import sdpa_with_kv_cache  # noqa # usort: skip
+from executorch.examples.models.llama3_2_vision.preprocess.export_preprocess_lib import (
+    export_preprocess,
+    get_example_inputs,
+    lower_to_executorch_preprocess,
+)
 from executorch.extension.pybindings.portable_lib import (
     _load_for_executorch_from_buffer,
 )
@@ -37,12 +42,6 @@
 )
 from torchvision.transforms.v2 import functional as F
 
-from .export_preprocess_lib import (
-    export_preprocess,
-    get_example_inputs,
-    lower_to_executorch_preprocess,
-)
-
 
 @dataclass
 class PreprocessConfig:
@@ -54,7 +53,6 @@ class PreprocessConfig:
     tile_size: int = 224
     max_num_tiles: int = 4
     possible_resolutions = None
-    pad_max_tiles: bool = True
 
 
 class TestImageTransform(unittest.TestCase):
@@ -137,17 +135,6 @@ def prepare_inputs(
                 [1.0, 1.0],  # expected_tile_max
                 [0.0, 0.0],  # expected_tile_min
                 [1, 2],  # expected_aspect_ratio
-                False,  # pad_max_tiles
-            ),
-            (
-                (100, 400, 3),  # image_size
-                torch.Size([4, 3, 224, 224]),  # expected shape
-                False,  # resize_to_max_canvas
-                [0.2230, 0.1763, 0.0, 0.0],  # expected_tile_means
-                [1.0, 1.0, 0.0, 0.0],  # expected_tile_max
-                [0.0, 0.0, 0.0, 0.0],  # expected_tile_min
-                [1, 2],  # expected_aspect_ratio
-                True,  # pad_max_tiles
             ),
             (
                 (1000, 300, 3),  # image_size
@@ -157,7 +144,6 @@ def prepare_inputs(
                 [0.9976, 0.9940, 0.9936, 0.9906],  # expected_tile_max
                 [0.0037, 0.0047, 0.0039, 0.0],  # expected_tile_min
                 [4, 1],  # expected_aspect_ratio
-                False,  # pad_max_tiles
             ),
             (
                 (200, 200, 3),  # image_size
@@ -167,7 +153,6 @@ def prepare_inputs(
                 [0.9921, 0.9925, 0.9969, 0.9908],  # expected_tile_max
                 [0.0056, 0.0069, 0.0059, 0.0032],  # expected_tile_min
                 [2, 2],  # expected_aspect_ratio
-                False,  # pad_max_tiles
             ),
             (
                 (600, 200, 3),  # image_size
@@ -177,17 +162,6 @@ def prepare_inputs(
                 [1.0, 1.0, 1.0],  # expected_tile_max
                 [0.0, 0.0, 0.0],  # expected_tile_min
                 [3, 1],  # expected_aspect_ratio
-                False,  # pad_max_tiles
-            ),
-            (
-                (600, 200, 3),  # image_size
-                torch.Size([4, 3, 224, 224]),  # expected shape
-                False,  # resize_to_max_canvas
-                [0.4472, 0.4468, 0.3031, 0.0],  # expected_tile_means
-                [1.0, 1.0, 1.0, 0.0],  # expected_tile_max
-                [0.0, 0.0, 0.0, 0.0],  # expected_tile_min
-                [3, 1],  # expected_aspect_ratio
-                True,  # pad_max_tiles
             ),
         ]
     )
@@ -200,11 +174,8 @@ def test_preprocess(
         expected_tile_max: List[float],
         expected_tile_min: List[float],
         expected_ar: List[int],
-        pad_max_tiles: bool,
     ) -> None:
-        config = PreprocessConfig(
-            resize_to_max_canvas=resize_to_max_canvas, pad_max_tiles=pad_max_tiles
-        )
+        config = PreprocessConfig(resize_to_max_canvas=resize_to_max_canvas)
 
         reference_model = CLIPImageTransform(
             image_mean=config.image_mean,
@@ -215,7 +186,6 @@ def test_preprocess(
             tile_size=config.tile_size,
             max_num_tiles=config.max_num_tiles,
             possible_resolutions=None,
-            pad_max_tiles=config.pad_max_tiles,
         )
 
         eager_model = _CLIPImageTransform(
@@ -225,7 +195,6 @@ def test_preprocess(
             antialias=config.antialias,
             tile_size=config.tile_size,
             max_num_tiles=config.max_num_tiles,
-            pad_max_tiles=config.pad_max_tiles,
         )
 
         exported_model = export_preprocess(
@@ -235,7 +204,6 @@ def test_preprocess(
             antialias=config.antialias,
             tile_size=config.tile_size,
             max_num_tiles=config.max_num_tiles,
-            pad_max_tiles=config.pad_max_tiles,
         )
 
         executorch_model = lower_to_executorch_preprocess(exported_model)
@@ -275,11 +243,8 @@ def test_preprocess(
             self.assertAlmostEqual(tile.min().item(), expected_tile_min[i], delta=1e-4)
 
         # Check num tiles matches the product of the aspect ratio.
-        if pad_max_tiles:
-            self.assertEqual(config.max_num_tiles, reference_image.shape[0])
-        else:
-            expected_num_tiles = reference_ar[0] * reference_ar[1]
-            self.assertEqual(expected_num_tiles, reference_image.shape[0])
+        expected_num_tiles = reference_ar[0] * reference_ar[1]
+        self.assertEqual(expected_num_tiles, reference_image.shape[0])
 
         # Pre-work for eager and exported models. The reference model performs these
         # calculations and passes the result to _CLIPImageTransform, the exportable model.