benchmark_autoencoderkl_encode

hlky · hlky · commit 67143889ec81 · 2025-03-10T13:07:10.000Z
diff --git a/benchmarks/base_classes.py b/benchmarks/base_classes.py
@@ -4,6 +4,7 @@
 import torch
 
 from diffusers import (
+    AutoencoderKL,
     AutoPipelineForImage2Image,
     AutoPipelineForInpainting,
     AutoPipelineForText2Image,
@@ -15,7 +16,6 @@
     StableDiffusionXLControlNetPipeline,
     T2IAdapter,
     WuerstchenCombinedPipeline,
-    AutoencoderKL,
 )
 from diffusers.utils import load_image
 
@@ -31,8 +31,8 @@
     flush,
     generate_csv_dict,
     generate_csv_dict_model,
-    write_to_csv,
     write_list_to_csv,
+    write_to_csv,
 )
 
 
@@ -359,11 +359,7 @@ def __init__(self):
 
     def get_result_filepath(self, suffix):
         name = (
-            self.model_class_name
-            + "_"
-            + self.pretrained_model_name_or_path.replace("/", "_")
-            + "_"
-            + f"{suffix}.csv"
+            self.model_class_name + "_" + self.pretrained_model_name_or_path.replace("/", "_") + "_" + f"{suffix}.csv"
         )
         filepath = os.path.join(BASE_PATH, name)
         return filepath
@@ -375,7 +371,9 @@ class AutoencoderKLBenchmark(BaseBenchmarkTestCase):
     def __init__(self, pretrained_model_name_or_path, dtype, tiling, **kwargs):
         super().__init__()
         self.dtype = getattr(torch, dtype)
-        model = self.model_class.from_pretrained(pretrained_model_name_or_path, torch_dtype=self.dtype, **kwargs).eval()
+        model = self.model_class.from_pretrained(
+            pretrained_model_name_or_path, torch_dtype=self.dtype, **kwargs
+        ).eval()
         model = model.to("cuda")
         self.tiling = False
         if tiling:
@@ -389,13 +387,15 @@ def __init__(self, pretrained_model_name_or_path, dtype, tiling, **kwargs):
     def run_decode(self, model, tensor):
         _ = model.decode(tensor)
 
-    @torch.no_grad
+    @torch.no_grad()
     def _test_decode(self, **kwargs):
         batch = kwargs.get("batch")
         height = kwargs.get("height")
         width = kwargs.get("width")
 
-        tensor = torch.randn((batch, self.model.config.latent_channels, height, width), dtype=self.dtype, device="cuda")
+        tensor = torch.randn(
+            (batch, self.model.config.latent_channels, height, width), dtype=self.dtype, device="cuda"
+        )
 
         try:
             time = benchmark_fn(self.run_decode, self.model, tensor)
@@ -406,7 +406,10 @@ def _test_decode(self, **kwargs):
 
         benchmark_info = BenchmarkInfo(time=time, memory=memory)
         csv_dict = generate_csv_dict_model(
-            model_cls=self.model_class_name, ckpt=self.pretrained_model_name_or_path, benchmark_info=benchmark_info, **kwargs,
+            model_cls=self.model_class_name,
+            ckpt=self.pretrained_model_name_or_path,
+            benchmark_info=benchmark_info,
+            **kwargs,
         )
         print(f"{self.model_class_name} decode - shape: {list(tensor.shape)}, time: {time}, memory: {memory}")
         return csv_dict
@@ -416,15 +419,92 @@ def test_decode(self):
 
         batches = (1,)
         # heights = (32, 64, 128, 256,)
-        widths = (32, 64, 128, 256,)
+        widths = (
+            32,
+            64,
+            128,
+            256,
+        )
         for batch in batches:
             # for height in heights:
-                for width in widths:
-                    benchmark_info = self._test_decode(batch=batch, height=width, width=width)
-                    benchmark_infos.append(benchmark_info)
+            for width in widths:
+                benchmark_info = self._test_decode(batch=batch, height=width, width=width)
+                benchmark_infos.append(benchmark_info)
 
         suffix = "decode"
         if self.tiling:
             suffix = "tiled_decode"
         filepath = self.get_result_filepath(suffix)
         write_list_to_csv(filepath, benchmark_infos)
+
+
+class AutoencoderKLEncodeBenchmark(BaseBenchmarkTestCase):
+    model_class = AutoencoderKL
+
+    def __init__(self, pretrained_model_name_or_path, dtype, tiling, **kwargs):
+        super().__init__()
+        self.dtype = getattr(torch, dtype)
+        model = self.model_class.from_pretrained(
+            pretrained_model_name_or_path, torch_dtype=self.dtype, **kwargs
+        ).eval()
+        model = model.to("cuda")
+        self.tiling = False
+        if tiling:
+            model.enable_tiling()
+            self.tiling = True
+        self.model = model
+        self.model_class_name = str(self.model.__class__.__name__)
+        self.pretrained_model_name_or_path = pretrained_model_name_or_path
+
+    @torch.no_grad()
+    def run_encode(self, model, tensor):
+        _ = model.encode(tensor)
+
+    @torch.no_grad()
+    def _test_encode(self, **kwargs):
+        batch = kwargs.get("batch")
+        height = kwargs.get("height")
+        width = kwargs.get("width")
+
+        tensor = torch.randn(
+            (batch, self.model.config.latent_channels, height, width), dtype=self.dtype, device="cuda"
+        )
+
+        try:
+            time = benchmark_fn(self.run_encode, self.model, tensor)
+            memory = bytes_to_giga_bytes(torch.cuda.max_memory_reserved())
+        except torch.OutOfMemoryError:
+            time = "OOM"
+            memory = "OOM"
+
+        benchmark_info = BenchmarkInfo(time=time, memory=memory)
+        csv_dict = generate_csv_dict_model(
+            model_cls=self.model_class_name,
+            ckpt=self.pretrained_model_name_or_path,
+            benchmark_info=benchmark_info,
+            **kwargs,
+        )
+        print(f"{self.model_class_name} encode - shape: {list(tensor.shape)}, time: {time}, memory: {memory}")
+        return csv_dict
+
+    def test_encode(self):
+        benchmark_infos = []
+
+        batches = (1,)
+        widths = (
+            256,
+            512,
+            1024,
+            2048,
+        )
+        for batch in batches:
+            # for height in heights:
+            for width in widths:
+                benchmark_info = self._test_encode(batch=batch, height=width, width=width)
+                benchmark_infos.append(benchmark_info)
+
+        suffix = "encode"
+        if self.tiling:
+            suffix = "tiled_encode"
+        filepath = self.get_result_filepath(suffix)
+        write_list_to_csv(filepath, benchmark_infos)
diff --git a/benchmarks/benchmark_autoencoderkl.py b/benchmarks/benchmark_autoencoderkl.py
@@ -23,11 +23,13 @@
         type=str,
         default="float16",
     )
-    parser.add_argument(
-        "--tiling",
-        action="store_true"
-    )
+    parser.add_argument("--tiling", action="store_true")
     args = parser.parse_args()
 
-    benchmark = AutoencoderKLBenchmark(pretrained_model_name_or_path=args.pretrained_model_name_or_path, dtype=args.dtype, tiling=args.tiling, subfolder=args.subfolder)
+    benchmark = AutoencoderKLBenchmark(
+        pretrained_model_name_or_path=args.pretrained_model_name_or_path,
+        dtype=args.dtype,
+        tiling=args.tiling,
+        subfolder=args.subfolder,
+    )
     benchmark.test_decode()
diff --git a/benchmarks/benchmark_autoencoderkl_encode.py b/benchmarks/benchmark_autoencoderkl_encode.py
@@ -0,0 +1,35 @@
+import argparse
+import sys
+
+
+sys.path.append(".")
+from base_classes import AutoencoderKLEncodeBenchmark  # noqa: E402
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--pretrained_model_name_or_path",
+        type=str,
+        default="stable-diffusion-v1-5/stable-diffusion-v1-5",
+    )
+    parser.add_argument(
+        "--subfolder",
+        type=str,
+        default=None,
+    )
+    parser.add_argument(
+        "--dtype",
+        type=str,
+        default="float16",
+    )
+    parser.add_argument("--tiling", action="store_true")
+    args = parser.parse_args()
+
+    benchmark = AutoencoderKLEncodeBenchmark(
+        pretrained_model_name_or_path=args.pretrained_model_name_or_path,
+        dtype=args.dtype,
+        tiling=args.tiling,
+        subfolder=args.subfolder,
+    )
+    benchmark.test_encode()
diff --git a/benchmarks/utils.py b/benchmarks/utils.py
@@ -79,7 +79,10 @@ def generate_csv_dict(
 
 
 def generate_csv_dict_model(
-    model_cls: str, ckpt: str, benchmark_info: BenchmarkInfo, **kwargs,
+    model_cls: str,
+    ckpt: str,
+    benchmark_info: BenchmarkInfo,
+    **kwargs,
 ) -> Dict[str, Union[str, bool, float]]:
     """Packs benchmarking data into a dictionary for latter serialization."""
     data_dict = {
@@ -94,6 +97,7 @@ def generate_csv_dict_model(
     }
     return data_dict
 
+
 def write_to_csv(file_name: str, data_dict: Dict[str, Union[str, bool, float]]):
     """Serializes a dictionary into a CSV file."""
     with open(file_name, mode="w", newline="") as csvfile: