diff --git a/backends/arm/test/models/stable_diffusion/test_CLIPTextModelWithProjection.py b/backends/arm/test/models/stable_diffusion/test_CLIPTextModelWithProjection.py index 49266beee63..fad31b57537 100644 --- a/backends/arm/test/models/stable_diffusion/test_CLIPTextModelWithProjection.py +++ b/backends/arm/test/models/stable_diffusion/test_CLIPTextModelWithProjection.py @@ -4,7 +4,7 @@ # LICENSE file in the root directory of this source tree. -import unittest +from typing import Tuple import torch from executorch.backends.arm._passes import ( @@ -17,11 +17,17 @@ from executorch.backends.arm.test.models.stable_diffusion.stable_diffusion_module_test_configs import ( CLIP_text_encoder_config, ) -from executorch.backends.arm.test.tester.arm_tester import ArmTester +from executorch.backends.arm.test.tester.test_pipeline import ( + TosaPipelineFP, + TosaPipelineINT, + VgfPipeline, +) from transformers import CLIPTextModelWithProjection +input_t = Tuple[torch.Tensor] + -class TestCLIPTextModelWithProjection(unittest.TestCase): +class TestCLIPTextModelWithProjection: """ Test class of CLIPTextModelWithProjection. CLIPTextModelWithProjection is one of the text_encoder used by Stable Diffusion 3.5 Medium @@ -69,47 +75,93 @@ def prepare_model_and_inputs(self): return text_encoder_model, text_encoder_model_inputs - def test_CLIPTextModelWithProjection_tosa_FP(self): - text_encoder_model, text_encoder_model_inputs = self.prepare_model_and_inputs() - with torch.no_grad(): - ( - ArmTester( - text_encoder_model, - example_inputs=text_encoder_model_inputs, - compile_spec=common.get_tosa_compile_spec(tosa_spec="TOSA-1.0+FP"), - transform_passes=[ - ConvertInt64ConstOpsToInt32Pass(), - ConvertInt64OutputOpsToInt32Pass(), - InsertInt32CastsAfterInt64PlaceholdersPass(), - ], - ) - .export() - .to_edge_transform_and_lower() - .dump_operator_distribution() - .check_count(self.ops_after_partitioner_FP) - .to_executorch() - .run_method_and_compare_outputs( - inputs=text_encoder_model_inputs, - ) - ) - - def test_CLIPTextModelWithProjection_tosa_INT(self): - text_encoder_model, text_encoder_model_inputs = self.prepare_model_and_inputs() - with torch.no_grad(): - ( - ArmTester( - text_encoder_model, - example_inputs=text_encoder_model_inputs, - compile_spec=common.get_tosa_compile_spec(tosa_spec="TOSA-1.0+INT"), - ) - .quantize() - .export() - .to_edge_transform_and_lower() - .dump_operator_distribution() - .check_count(self.ops_after_partitioner_INT) - .to_executorch() - .run_method_and_compare_outputs( - inputs=text_encoder_model_inputs, - atol=0.8, - ) - ) + +def test_CLIPTextModelWithProjection_tosa_FP(): + text_encoder_model, text_encoder_model_inputs = ( + TestCLIPTextModelWithProjection().prepare_model_and_inputs() + ) + with torch.no_grad(): + pipeline = TosaPipelineFP[input_t]( + text_encoder_model, + text_encoder_model_inputs, + aten_op=[], + exir_op=[], + use_to_edge_transform_and_lower=True, + transform_passes=[ + ConvertInt64ConstOpsToInt32Pass(), + ConvertInt64OutputOpsToInt32Pass(), + InsertInt32CastsAfterInt64PlaceholdersPass(), + ], + ) + pipeline.change_args( + "check_count.exir", TestCLIPTextModelWithProjection.ops_after_partitioner_FP + ) + pipeline.run() + + +def test_CLIPTextModelWithProjection_tosa_INT(): + text_encoder_model, text_encoder_model_inputs = ( + TestCLIPTextModelWithProjection().prepare_model_and_inputs() + ) + with torch.no_grad(): + pipeline = TosaPipelineINT[input_t]( + text_encoder_model, + text_encoder_model_inputs, + aten_op=[], + exir_op=[], + use_to_edge_transform_and_lower=True, + atol=0.8, + ) + pipeline.change_args( + "check_count.exir", + TestCLIPTextModelWithProjection.ops_after_partitioner_INT, + ) + pipeline.run() + + +@common.SkipIfNoModelConverter +def test_CLIPTextModelWithProjection_vgf_FP(): + text_encoder_model, text_encoder_model_inputs = ( + TestCLIPTextModelWithProjection().prepare_model_and_inputs() + ) + with torch.no_grad(): + pipeline = VgfPipeline[input_t]( + text_encoder_model, + text_encoder_model_inputs, + aten_op=[], + exir_op=[], + tosa_version="TOSA-1.0+FP", + use_to_edge_transform_and_lower=True, + atol=4, # TODO: Investiage numerical issue: MAX Diff ~50% + transform_passes=[ + ConvertInt64ConstOpsToInt32Pass(), + ConvertInt64OutputOpsToInt32Pass(), + InsertInt32CastsAfterInt64PlaceholdersPass(), + ], + ) + pipeline.change_args( + "check_count.exir", TestCLIPTextModelWithProjection.ops_after_partitioner_FP + ) + pipeline.run() + + +@common.SkipIfNoModelConverter +def test_CLIPTextModelWithProjection_vgf_INT(): + text_encoder_model, text_encoder_model_inputs = ( + TestCLIPTextModelWithProjection().prepare_model_and_inputs() + ) + with torch.no_grad(): + pipeline = VgfPipeline[input_t]( + text_encoder_model, + text_encoder_model_inputs, + aten_op=[], + exir_op=[], + tosa_version="TOSA-1.0+INT", + use_to_edge_transform_and_lower=True, + atol=0.8, + ) + pipeline.change_args( + "check_count.exir", + TestCLIPTextModelWithProjection.ops_after_partitioner_INT, + ) + pipeline.run() diff --git a/backends/arm/test/models/stable_diffusion/test_SD3Transformer2DModel.py b/backends/arm/test/models/stable_diffusion/test_SD3Transformer2DModel.py index f9d814d044b..1267c5b8e4c 100644 --- a/backends/arm/test/models/stable_diffusion/test_SD3Transformer2DModel.py +++ b/backends/arm/test/models/stable_diffusion/test_SD3Transformer2DModel.py @@ -4,7 +4,7 @@ # LICENSE file in the root directory of this source tree. -import unittest +from typing import Tuple import torch from diffusers.models.transformers import SD3Transformer2DModel @@ -13,10 +13,16 @@ from executorch.backends.arm.test.models.stable_diffusion.stable_diffusion_module_test_configs import ( SD3Transformer2DModel_init_dict, ) -from executorch.backends.arm.test.tester.arm_tester import ArmTester +from executorch.backends.arm.test.tester.test_pipeline import ( + TosaPipelineFP, + TosaPipelineINT, + VgfPipeline, +) + +input_t4 = Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor] -class TestSD3Transformer2DModel(unittest.TestCase): +class TestSD3Transformer2DModel: """ Test class of AutoenSD3Transformer2DModelcoderKL. SD3Transformer2DModel is the transformer model used by Stable Diffusion 3.5 Medium @@ -93,48 +99,88 @@ def forward(self, *args, **kwargs): return sd35_transformer2D_model, sd35_transformer2D_model_inputs - def test_SD3Transformer2DModel_tosa_FP(self): - sd35_transformer2D_model, sd35_transformer2D_model_inputs = ( - self.prepare_model_and_inputs() - ) - with torch.no_grad(): - ( - ArmTester( - sd35_transformer2D_model, - example_inputs=sd35_transformer2D_model_inputs, - compile_spec=common.get_tosa_compile_spec(tosa_spec="TOSA-1.0+FP"), - ) - .export() - .to_edge_transform_and_lower() - .check_count(self.ops_after_partitioner_FP) - .to_executorch() - .run_method_and_compare_outputs( - inputs=sd35_transformer2D_model_inputs, - rtol=1.0, # TODO: MLETORCH-875: Reduce tolerance of SD3Transformer2DModel with FP and INT - atol=4.0, - ) - ) - def test_SD3Transformer2DModel_tosa_INT(self): - sd35_transformer2D_model, sd35_transformer2D_model_inputs = ( - self.prepare_model_and_inputs() +def test_SD3Transformer2DModel_tosa_FP(): + sd35_transformer2D_model, sd35_transformer2D_model_inputs = ( + TestSD3Transformer2DModel().prepare_model_and_inputs() + ) + with torch.no_grad(): + pipeline = TosaPipelineFP[input_t4]( + sd35_transformer2D_model, + sd35_transformer2D_model_inputs, + aten_op=[], + exir_op=[], + use_to_edge_transform_and_lower=True, + rtol=1.0, # TODO: MLETORCH-875: Reduce tolerance of SD3Transformer2DModel with FP and INT + atol=4.0, ) - with torch.no_grad(): - ( - ArmTester( - sd35_transformer2D_model, - example_inputs=sd35_transformer2D_model_inputs, - compile_spec=common.get_tosa_compile_spec(tosa_spec="TOSA-1.0+INT"), - ) - .quantize() - .export() - .to_edge_transform_and_lower() - .check_count(self.ops_after_partitioner_INT) - .to_executorch() - .run_method_and_compare_outputs( - inputs=sd35_transformer2D_model_inputs, - qtol=1.0, # TODO: MLETORCH-875: Reduce tolerance of SD3Transformer2DModel with FP and INT - rtol=1.0, - atol=4.0, - ) - ) + pipeline.change_args( + "check_count.exir", TestSD3Transformer2DModel.ops_after_partitioner_FP + ) + pipeline.run() + + +def test_SD3Transformer2DModel_tosa_INT(): + sd35_transformer2D_model, sd35_transformer2D_model_inputs = ( + TestSD3Transformer2DModel().prepare_model_and_inputs() + ) + with torch.no_grad(): + pipeline = TosaPipelineINT[input_t4]( + sd35_transformer2D_model, + sd35_transformer2D_model_inputs, + aten_op=[], + exir_op=[], + use_to_edge_transform_and_lower=True, + qtol=1.0, # TODO: MLETORCH-875: Reduce tolerance of SD3Transformer2DModel with FP and INT + rtol=1.0, + atol=4.0, + ) + pipeline.change_args( + "check_count.exir", TestSD3Transformer2DModel.ops_after_partitioner_INT + ) + pipeline.run() + + +@common.SkipIfNoModelConverter +def test_SD3Transformer2DModel_vgf_FP(): + sd35_transformer2D_model, sd35_transformer2D_model_inputs = ( + TestSD3Transformer2DModel().prepare_model_and_inputs() + ) + with torch.no_grad(): + pipeline = VgfPipeline[input_t4]( + sd35_transformer2D_model, + sd35_transformer2D_model_inputs, + aten_op=[], + exir_op=[], + tosa_version="TOSA-1.0+FP", + use_to_edge_transform_and_lower=True, + rtol=1.0, # TODO: MLETORCH-875: Reduce tolerance of SD3Transformer2DModel with FP and INT + atol=4.0, + ) + pipeline.change_args( + "check_count.exir", TestSD3Transformer2DModel.ops_after_partitioner_FP + ) + pipeline.run() + + +@common.SkipIfNoModelConverter +def test_SD3Transformer2DModel_vgf_INT(): + sd35_transformer2D_model, sd35_transformer2D_model_inputs = ( + TestSD3Transformer2DModel().prepare_model_and_inputs() + ) + with torch.no_grad(): + pipeline = VgfPipeline[input_t4]( + sd35_transformer2D_model, + sd35_transformer2D_model_inputs, + aten_op=[], + exir_op=[], + tosa_version="TOSA-1.0+INT", + use_to_edge_transform_and_lower=True, + qtol=1.0, # TODO: MLETORCH-875: Reduce tolerance of SD3Transformer2DModel with FP and INT + rtol=1.0, + atol=4.0, + ) + pipeline.change_args( + "check_count.exir", TestSD3Transformer2DModel.ops_after_partitioner_INT + ) + pipeline.run() diff --git a/backends/arm/test/models/stable_diffusion/test_T5EncoderModel.py b/backends/arm/test/models/stable_diffusion/test_T5EncoderModel.py index 22a47042eb1..20b92e4a258 100644 --- a/backends/arm/test/models/stable_diffusion/test_T5EncoderModel.py +++ b/backends/arm/test/models/stable_diffusion/test_T5EncoderModel.py @@ -4,7 +4,7 @@ # LICENSE file in the root directory of this source tree. -import unittest +from typing import Tuple import torch from executorch.backends.arm._passes import ( @@ -17,11 +17,17 @@ from executorch.backends.arm.test.models.stable_diffusion.stable_diffusion_module_test_configs import ( T5_encoder_config, ) -from executorch.backends.arm.test.tester.arm_tester import ArmTester +from executorch.backends.arm.test.tester.test_pipeline import ( + TosaPipelineFP, + TosaPipelineINT, + VgfPipeline, +) from transformers import T5EncoderModel +input_t = Tuple[torch.Tensor] + -class TestT5EncoderModel(unittest.TestCase): +class TestT5EncoderModel: """ Test class of T5EncoderModel. T5EncoderModel is one of the text_encoder used by Stable Diffusion 3.5 Medium @@ -61,46 +67,88 @@ def prepare_model_and_inputs(self): return t5_encoder_model, t5_encoder_model_inputs - def test_T5EncoderModel_tosa_FP(self): - t5_encoder_model, t5_encoder_model_inputs = self.prepare_model_and_inputs() - with torch.no_grad(): - ( - ArmTester( - t5_encoder_model, - example_inputs=t5_encoder_model_inputs, - compile_spec=common.get_tosa_compile_spec(tosa_spec="TOSA-1.0+FP"), - transform_passes=[ - ConvertInt64ConstOpsToInt32Pass(), - ConvertInt64OutputOpsToInt32Pass(), - InsertInt32CastsAfterInt64PlaceholdersPass(), - ], - ) - .export() - .to_edge_transform_and_lower() - .dump_operator_distribution() - .check_count(self.ops_after_partitioner_FP) - .to_executorch() - .run_method_and_compare_outputs( - inputs=t5_encoder_model_inputs, - ) - ) - - def test_T5EncoderModel_tosa_INT(self): - t5_encoder_model, t5_encoder_model_inputs = self.prepare_model_and_inputs() - with torch.no_grad(): - ( - ArmTester( - t5_encoder_model, - example_inputs=t5_encoder_model_inputs, - compile_spec=common.get_tosa_compile_spec(tosa_spec="TOSA-1.0+INT"), - ) - .quantize() - .export() - .to_edge_transform_and_lower() - .dump_operator_distribution() - .check_count(self.ops_after_partitioner_INT) - .to_executorch() - .run_method_and_compare_outputs( - inputs=t5_encoder_model_inputs, - ) - ) + +def test_T5EncoderModel_tosa_FP(): + t5_encoder_model, t5_encoder_model_inputs = ( + TestT5EncoderModel().prepare_model_and_inputs() + ) + with torch.no_grad(): + pipeline = TosaPipelineFP[input_t]( + t5_encoder_model, + t5_encoder_model_inputs, + aten_op=[], + exir_op=[], + use_to_edge_transform_and_lower=True, + transform_passes=[ + ConvertInt64ConstOpsToInt32Pass(), + ConvertInt64OutputOpsToInt32Pass(), + InsertInt32CastsAfterInt64PlaceholdersPass(), + ], + ) + pipeline.change_args( + "check_count.exir", TestT5EncoderModel.ops_after_partitioner_FP + ) + pipeline.run() + + +def test_T5EncoderModel_tosa_INT(): + t5_encoder_model, t5_encoder_model_inputs = ( + TestT5EncoderModel().prepare_model_and_inputs() + ) + with torch.no_grad(): + pipeline = TosaPipelineINT[input_t]( + t5_encoder_model, + t5_encoder_model_inputs, + aten_op=[], + exir_op=[], + use_to_edge_transform_and_lower=True, + ) + pipeline.change_args( + "check_count.exir", TestT5EncoderModel.ops_after_partitioner_INT + ) + pipeline.run() + + +@common.SkipIfNoModelConverter +def test_T5EncoderModel_vgf_FP(): + t5_encoder_model, t5_encoder_model_inputs = ( + TestT5EncoderModel().prepare_model_and_inputs() + ) + with torch.no_grad(): + pipeline = VgfPipeline[input_t]( + t5_encoder_model, + t5_encoder_model_inputs, + aten_op=[], + exir_op=[], + tosa_version="TOSA-1.0+FP", + use_to_edge_transform_and_lower=True, + transform_passes=[ + ConvertInt64ConstOpsToInt32Pass(), + ConvertInt64OutputOpsToInt32Pass(), + InsertInt32CastsAfterInt64PlaceholdersPass(), + ], + ) + pipeline.change_args( + "check_count.exir", TestT5EncoderModel.ops_after_partitioner_FP + ) + pipeline.run() + + +@common.SkipIfNoModelConverter +def test_T5EncoderModel_vgf_INT(): + t5_encoder_model, t5_encoder_model_inputs = ( + TestT5EncoderModel().prepare_model_and_inputs() + ) + with torch.no_grad(): + pipeline = VgfPipeline[input_t]( + t5_encoder_model, + t5_encoder_model_inputs, + aten_op=[], + exir_op=[], + tosa_version="TOSA-1.0+INT", + use_to_edge_transform_and_lower=True, + ) + pipeline.change_args( + "check_count.exir", TestT5EncoderModel.ops_after_partitioner_INT + ) + pipeline.run() diff --git a/backends/arm/test/models/stable_diffusion/test_vae_AutoencoderKL.py b/backends/arm/test/models/stable_diffusion/test_vae_AutoencoderKL.py index ab0f4892fb8..a3c3a018131 100644 --- a/backends/arm/test/models/stable_diffusion/test_vae_AutoencoderKL.py +++ b/backends/arm/test/models/stable_diffusion/test_vae_AutoencoderKL.py @@ -4,7 +4,7 @@ # LICENSE file in the root directory of this source tree. -import unittest +from typing import Tuple import torch from diffusers.models.autoencoders import AutoencoderKL @@ -14,10 +14,16 @@ from executorch.backends.arm.test.models.stable_diffusion.stable_diffusion_module_test_configs import ( AutoencoderKL_config, ) -from executorch.backends.arm.test.tester.arm_tester import ArmTester +from executorch.backends.arm.test.tester.test_pipeline import ( + TosaPipelineFP, + TosaPipelineINT, + VgfPipeline, +) + +input_t = Tuple[torch.Tensor] -class TestAutoencoderKL(unittest.TestCase): +class TestAutoencoderKL: """ Test class of AutoencoderKL. AutoencoderKL is the encoder/decoder used by Stable Diffusion 3.5 Medium @@ -41,40 +47,68 @@ def forward(self, *args, **kwargs): return auto_encoder_model, auto_encoder_model_inputs - def test_AutoencoderKL_tosa_FP(self): - auto_encoder_model, auto_encoder_model_inputs = self.prepare_model_and_inputs() - with torch.no_grad(): - ( - ArmTester( - auto_encoder_model, - example_inputs=auto_encoder_model_inputs, - compile_spec=common.get_tosa_compile_spec(tosa_spec="TOSA-1.0+FP"), - ) - .export() - .to_edge_transform_and_lower() - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .run_method_and_compare_outputs( - inputs=auto_encoder_model_inputs, - ) - ) - - def test_AutoencoderKL_tosa_INT(self): - auto_encoder_model, auto_encoder_model_inputs = self.prepare_model_and_inputs() - with torch.no_grad(): - ( - ArmTester( - auto_encoder_model, - example_inputs=auto_encoder_model_inputs, - compile_spec=common.get_tosa_compile_spec(tosa_spec="TOSA-1.0+INT"), - ) - .quantize() - .export() - .to_edge_transform_and_lower() - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .run_method_and_compare_outputs( - inputs=auto_encoder_model_inputs, - atol=1.0, # TODO: MLETORCH-990 Reduce tolerance of vae(AutoencoderKL) with INT - ) - ) + +def test_AutoencoderKL_tosa_FP(): + auto_encoder_model, auto_encoder_model_inputs = ( + TestAutoencoderKL().prepare_model_and_inputs() + ) + with torch.no_grad(): + pipeline = TosaPipelineFP[input_t]( + auto_encoder_model, + auto_encoder_model_inputs, + aten_op=[], + exir_op=[], + use_to_edge_transform_and_lower=True, + ) + pipeline.run() + + +def test_AutoencoderKL_tosa_INT(): + auto_encoder_model, auto_encoder_model_inputs = ( + TestAutoencoderKL().prepare_model_and_inputs() + ) + with torch.no_grad(): + pipeline = TosaPipelineINT[input_t]( + auto_encoder_model, + auto_encoder_model_inputs, + aten_op=[], + exir_op=[], + use_to_edge_transform_and_lower=True, + atol=1.0, # TODO: MLETORCH-990 Reduce tolerance of vae(AutoencoderKL) with INT + ) + pipeline.run() + + +@common.SkipIfNoModelConverter +def test_AutoencoderKL_vgf_FP(): + auto_encoder_model, auto_encoder_model_inputs = ( + TestAutoencoderKL().prepare_model_and_inputs() + ) + with torch.no_grad(): + pipeline = VgfPipeline[input_t]( + auto_encoder_model, + auto_encoder_model_inputs, + aten_op=[], + exir_op=[], + tosa_version="TOSA-1.0+FP", + use_to_edge_transform_and_lower=True, + ) + pipeline.run() + + +@common.SkipIfNoModelConverter +def test_AutoencoderKL_vgf_INT(): + auto_encoder_model, auto_encoder_model_inputs = ( + TestAutoencoderKL().prepare_model_and_inputs() + ) + with torch.no_grad(): + pipeline = VgfPipeline[input_t]( + auto_encoder_model, + auto_encoder_model_inputs, + aten_op=[], + exir_op=[], + tosa_version="TOSA-1.0+INT", + use_to_edge_transform_and_lower=True, + atol=1.0, # TODO: MLETORCH-990 Reduce tolerance of vae(AutoencoderKL) with INT + ) + pipeline.run()