Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions .github/workflows/pull.yml
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,31 @@ jobs:
# run e2e (export, tokenizer and runner)
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llava.sh

test-preprocess-linux:
name: test-preprocess-linux
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
strategy:
fail-fast: false
with:
runner: linux.24xlarge
docker-image: executorch-ubuntu-22.04-clang12
submodules: 'true'
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
timeout: 90
script: |
# The generic Linux job chooses to use base env, not the one setup by the image
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
conda activate "${CONDA_ENV}"

PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake"

# install pybind
bash install_requirements.sh --pybind xnnpack

# run python unittest
python -m unittest examples.models.llama3_2_vision.preprocess.test_preprocess


test-quantized-aot-lib-linux:
name: test-quantized-aot-lib-linux
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
Expand Down
51 changes: 35 additions & 16 deletions examples/models/llama3_2_vision/preprocess/export_preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,28 +5,47 @@
# LICENSE file in the root directory of this source tree.

import torch
from executorch.examples.models.llama3_2_vision.preprocess.export_preprocess_lib import (
export_preprocess,
get_example_inputs,
lower_to_executorch_preprocess,
from executorch.examples.models.llama3_2_vision.preprocess.model import (
CLIPImageTransformModel,
PreprocessConfig,
)
from executorch.exir import EdgeCompileConfig, to_edge


def main():
# Eager model.
model = CLIPImageTransformModel(PreprocessConfig())

# ExecuTorch
ep_et = export_preprocess()
et = lower_to_executorch_preprocess(ep_et)
with open("preprocess_et.pte", "wb") as file:
et.write_to_file(file)

# AOTInductor
ep_aoti = export_preprocess()
torch._inductor.aot_compile(
ep_aoti.module(),
get_example_inputs(),
options={"aot_inductor.output_path": "preprocess_aoti.so"},
# Export.
ep = torch.export.export(
model.get_eager_model(),
model.get_example_inputs(),
dynamic_shapes=model.get_dynamic_shapes(),
strict=False,
)

# Executorch
edge_program = to_edge(
ep, compile_config=EdgeCompileConfig(_check_ir_validity=False)
)
et_program = edge_program.to_executorch()
with open("preprocess_et.pte", "wb") as file:
et_program.write_to_file(file)

# Export.
# ep = torch.export.export(
# model.get_eager_model(),
# model.get_example_inputs(),
# dynamic_shapes=model.get_dynamic_shapes(),
# strict=False,
# )
#
# # AOTInductor
# torch._inductor.aot_compile(
# ep.module(),
# model.get_example_inputs(),
# options={"aot_inductor.output_path": "preprocess_aoti.so"},
# )


if __name__ == "__main__":
Expand Down

This file was deleted.

72 changes: 72 additions & 0 deletions examples/models/llama3_2_vision/preprocess/model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

# pyre-unsafe

from dataclasses import dataclass
from typing import Dict, List, Optional, Tuple

import torch

from executorch.extension.llm.custom_ops import op_tile_crop_aot # noqa
from torch.export import Dim
from torchtune.models.clip.inference._transform import _CLIPImageTransform

from ...model_base import EagerModelBase


@dataclass
class PreprocessConfig:
image_mean: Optional[List[float]] = None
image_std: Optional[List[float]] = None
resample: str = "bilinear"
max_num_tiles: int = 4
tile_size: int = 224
antialias: bool = False
# Used for eager.
resize_to_max_canvas: bool = True
possible_resolutions: Optional[List[Tuple[int, int]]] = None


class CLIPImageTransformModel(EagerModelBase):
def __init__(
self,
config: PreprocessConfig,
):
super().__init__()

# Eager model.
self.model = _CLIPImageTransform(
image_mean=config.image_mean,
image_std=config.image_std,
resample=config.resample,
max_num_tiles=config.max_num_tiles,
tile_size=config.tile_size,
antialias=config.antialias,
)

# Replace non-exportable ops with custom ops.
self.model.tile_crop = torch.ops.preprocess.tile_crop.default

def get_eager_model(self) -> torch.nn.Module:
return self.model

def get_example_inputs(self) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
image = torch.ones(3, 800, 600)
target_size = torch.tensor([448, 336])
canvas_size = torch.tensor([448, 448])
return (image, target_size, canvas_size)

def get_dynamic_shapes(self) -> Dict[str, Dict[int, Dim]]:
img_h = Dim("img_h", min=1, max=4000)
img_w = Dim("img_w", min=1, max=4000)

dynamic_shapes = {
"image": {1: img_h, 2: img_w},
"target_size": None,
"canvas_size": None,
}
return dynamic_shapes
78 changes: 30 additions & 48 deletions examples/models/llama3_2_vision/preprocess/test_preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,31 +6,30 @@

import unittest

from dataclasses import dataclass
from typing import List, Optional, Tuple
from typing import List, Tuple

import numpy as np
import PIL
import torch

from executorch.extension.pybindings import portable_lib # noqa # usort: skip
from executorch.extension.llm.custom_ops import sdpa_with_kv_cache # noqa # usort: skip
from executorch.examples.models.llama3_2_vision.preprocess.export_preprocess_lib import (
export_preprocess,
get_example_inputs,
lower_to_executorch_preprocess,
from executorch.examples.models.llama3_2_vision.preprocess.model import (
CLIPImageTransformModel,
PreprocessConfig,
)

from executorch.exir import EdgeCompileConfig, to_edge

from executorch.extension.pybindings import portable_lib # noqa # usort: skip
from executorch.extension.llm.custom_ops import op_tile_crop_aot # noqa # usort: skip

from executorch.extension.pybindings.portable_lib import (
_load_for_executorch_from_buffer,
)

from parameterized import parameterized
from PIL import Image

from torchtune.models.clip.inference._transform import (
_CLIPImageTransform,
CLIPImageTransform,
)
from torchtune.models.clip.inference._transform import CLIPImageTransform

from torchtune.modules.transforms.vision_utils.get_canvas_best_fit import (
find_supported_resolutions,
Expand All @@ -43,18 +42,6 @@
from torchvision.transforms.v2 import functional as F


@dataclass
class PreprocessConfig:
image_mean: Optional[List[float]] = None
image_std: Optional[List[float]] = None
resize_to_max_canvas: bool = True
resample: str = "bilinear"
antialias: bool = False
tile_size: int = 224
max_num_tiles: int = 4
possible_resolutions = None


class TestImageTransform(unittest.TestCase):
"""
This unittest checks that the exported image transform model produces the
Expand Down Expand Up @@ -188,31 +175,26 @@ def test_preprocess(
possible_resolutions=None,
)

eager_model = _CLIPImageTransform(
image_mean=config.image_mean,
image_std=config.image_std,
resample=config.resample,
antialias=config.antialias,
tile_size=config.tile_size,
max_num_tiles=config.max_num_tiles,
)
model = CLIPImageTransformModel(config)
eager_model = model.get_eager_model()

exported_model = export_preprocess(
image_mean=config.image_mean,
image_std=config.image_std,
resample=config.resample,
antialias=config.antialias,
tile_size=config.tile_size,
max_num_tiles=config.max_num_tiles,
exported_model = torch.export.export(
eager_model,
model.get_example_inputs(),
dynamic_shapes=model.get_dynamic_shapes(),
strict=False,
)

executorch_model = lower_to_executorch_preprocess(exported_model)
edge_program = to_edge(
exported_model, compile_config=EdgeCompileConfig(_check_ir_validity=False)
)
executorch_model = edge_program.to_executorch()
executorch_module = _load_for_executorch_from_buffer(executorch_model.buffer)

aoti_path = torch._inductor.aot_compile(
exported_model.module(),
get_example_inputs(),
)
# aoti_path = torch._inductor.aot_compile(
# exported_model.module(),
# get_example_inputs(),
# )

# Prepare image input.
image = (
Expand Down Expand Up @@ -276,7 +258,7 @@ def test_preprocess(
self.assertEqual(reference_ar, et_ar.tolist())

# Run aoti model and check it matches reference model.
aoti_model = torch._export.aot_load(aoti_path, "cpu")
aoti_image, aoti_ar = aoti_model(image_tensor, inscribed_size, best_resolution)
self.assertTrue(torch.allclose(reference_image, aoti_image))
self.assertEqual(reference_ar, aoti_ar.tolist())
# aoti_model = torch._export.aot_load(aoti_path, "cpu")
# aoti_image, aoti_ar = aoti_model(image_tensor, inscribed_size, best_resolution)
# self.assertTrue(torch.allclose(reference_image, aoti_image))
# self.assertEqual(reference_ar, aoti_ar.tolist())
Loading