huggingface · hlky · Feb 12, 2025 · Feb 12, 2025 · Feb 12, 2025 · Feb 12, 2025
diff --git a/benchmarks/base_classes.py b/benchmarks/base_classes.py
@@ -4,6 +4,7 @@
 import torch
 
 from diffusers import (
+    AutoencoderKL,
     AutoPipelineForImage2Image,
     AutoPipelineForInpainting,
     AutoPipelineForText2Image,
@@ -29,6 +30,8 @@
     bytes_to_giga_bytes,
     flush,
     generate_csv_dict,
+    generate_csv_dict_model,
+    write_list_to_csv,
     write_to_csv,
 )
 
@@ -169,7 +172,7 @@ def benchmark(self, args):
         print(f"[INFO] {self.pipe.__class__.__name__}: Running benchmark with: {vars(args)}\n")
 
         time = benchmark_fn(self.run_inference, self.pipe, args)  # in seconds.
-        memory = bytes_to_giga_bytes(torch.cuda.max_memory_allocated())  # in GBs.
+        memory = bytes_to_giga_bytes(torch.cuda.reset_peak_memory_stats())  # in GBs.
         benchmark_info = BenchmarkInfo(time=time, memory=memory)
 
         pipeline_class_name = str(self.pipe.__class__.__name__)
@@ -344,3 +347,164 @@ class T2IAdapterSDXLBenchmark(T2IAdapterBenchmark):
 
     def __init__(self, args):
         super().__init__(args)
+
+
+class BaseBenchmarkTestCase:
+    model_class = None
+    pretrained_model_name_or_path = None
+    model_class_name = None
+
+    def __init__(self):
+        super().__init__()
+
+    def get_result_filepath(self, suffix):
+        name = (
+            self.model_class_name + "_" + self.pretrained_model_name_or_path.replace("/", "_") + "_" + f"{suffix}.csv"
+        )
+        filepath = os.path.join(BASE_PATH, name)
+        return filepath
+
+
+class AutoencoderKLBenchmark(BaseBenchmarkTestCase):
+    model_class = AutoencoderKL
+
+    def __init__(self, pretrained_model_name_or_path, dtype, tiling, **kwargs):
+        super().__init__()
+        self.dtype = getattr(torch, dtype)
+        model = self.model_class.from_pretrained(
+            pretrained_model_name_or_path, torch_dtype=self.dtype, **kwargs
+        ).eval()
+        model = model.to("cuda")
+        self.tiling = False
+        if tiling:
+            model.enable_tiling()
+            self.tiling = True
+        self.model = model
+        self.model_class_name = str(self.model.__class__.__name__)
+        self.pretrained_model_name_or_path = pretrained_model_name_or_path
+
+    @torch.no_grad()
+    def run_decode(self, model, tensor):
+        _ = model.decode(tensor)
+
+    @torch.no_grad()
+    def _test_decode(self, **kwargs):
+        batch = kwargs.get("batch")
+        height = kwargs.get("height")
+        width = kwargs.get("width")
+
+        tensor = torch.randn(
+            (batch, self.model.config.latent_channels, height, width), dtype=self.dtype, device="cuda"
+        )
+
+        try:
+            time = benchmark_fn(self.run_decode, self.model, tensor)
+            memory = bytes_to_giga_bytes(torch.cuda.max_memory_reserved())
+        except torch.OutOfMemoryError:
+            time = "OOM"
+            memory = "OOM"
+
+        benchmark_info = BenchmarkInfo(time=time, memory=memory)
+        csv_dict = generate_csv_dict_model(
+            model_cls=self.model_class_name,
+            ckpt=self.pretrained_model_name_or_path,
+            benchmark_info=benchmark_info,
+            **kwargs,
+        )
+        print(f"{self.model_class_name} decode - shape: {list(tensor.shape)}, time: {time}, memory: {memory}")
+        return csv_dict
+
+    def test_decode(self):
+        benchmark_infos = []
+
+        batches = (1,)
+        # heights = (32, 64, 128, 256,)
+        widths = (
+            32,
+            64,
+            128,
+            256,
+        )
+        for batch in batches:
+            # for height in heights:
+            for width in widths:
+                benchmark_info = self._test_decode(batch=batch, height=width, width=width)
+                benchmark_infos.append(benchmark_info)
+
+        suffix = "decode"
+        if self.tiling:
+            suffix = "tiled_decode"
+        filepath = self.get_result_filepath(suffix)
+        write_list_to_csv(filepath, benchmark_infos)
+
+
+class AutoencoderKLEncodeBenchmark(BaseBenchmarkTestCase):
+    model_class = AutoencoderKL
+
+    def __init__(self, pretrained_model_name_or_path, dtype, tiling, **kwargs):
+        super().__init__()
+        self.dtype = getattr(torch, dtype)
+        model = self.model_class.from_pretrained(
+            pretrained_model_name_or_path, torch_dtype=self.dtype, **kwargs
+        ).eval()
+        model = model.to("cuda")
+        self.tiling = False
+        if tiling:
+            model.enable_tiling()
+            self.tiling = True
+        self.model = model
+        self.model_class_name = str(self.model.__class__.__name__)
+        self.pretrained_model_name_or_path = pretrained_model_name_or_path
+
+    @torch.no_grad()
+    def run_encode(self, model, tensor):
+        _ = model.encode(tensor)
+
+    @torch.no_grad()
+    def _test_encode(self, **kwargs):
+        batch = kwargs.get("batch")
+        height = kwargs.get("height")
+        width = kwargs.get("width")
+
+        tensor = torch.randn(
+            (batch, self.model.config.in_channels, height, width), dtype=self.dtype, device="cuda"
+        )
+
+        try:
+            time = benchmark_fn(self.run_encode, self.model, tensor)
+            memory = bytes_to_giga_bytes(torch.cuda.max_memory_reserved())
+        except torch.OutOfMemoryError:
+            time = "OOM"
+            memory = "OOM"
+
+        benchmark_info = BenchmarkInfo(time=time, memory=memory)
+        csv_dict = generate_csv_dict_model(
+            model_cls=self.model_class_name,
+            ckpt=self.pretrained_model_name_or_path,
+            benchmark_info=benchmark_info,
+            **kwargs,
+        )
+        print(f"{self.model_class_name} encode - shape: {list(tensor.shape)}, time: {time}, memory: {memory}")
+        return csv_dict
+
+    def test_encode(self):
+        benchmark_infos = []
+
+        batches = (1,)
+        widths = (
+            256,
+            512,
+            1024,
+            2048,
+        )
+        for batch in batches:
+            # for height in heights:
+            for width in widths:
+                benchmark_info = self._test_encode(batch=batch, height=width, width=width)
+                benchmark_infos.append(benchmark_info)
+
+        suffix = "encode"
+        if self.tiling:
+            suffix = "tiled_encode"
+        filepath = self.get_result_filepath(suffix)
+        write_list_to_csv(filepath, benchmark_infos)
diff --git a/benchmarks/benchmark_autoencoderkl.py b/benchmarks/benchmark_autoencoderkl.py
@@ -0,0 +1,35 @@
+import argparse
+import sys
+
+
+sys.path.append(".")
+from base_classes import AutoencoderKLBenchmark  # noqa: E402
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--pretrained_model_name_or_path",
+        type=str,
+        default="stable-diffusion-v1-5/stable-diffusion-v1-5",
+    )
+    parser.add_argument(
+        "--subfolder",
+        type=str,
+        default=None,
+    )
+    parser.add_argument(
+        "--dtype",
+        type=str,
+        default="float16",
+    )
+    parser.add_argument("--tiling", action="store_true")
+    args = parser.parse_args()
+
+    benchmark = AutoencoderKLBenchmark(
+        pretrained_model_name_or_path=args.pretrained_model_name_or_path,
+        dtype=args.dtype,
+        tiling=args.tiling,
+        subfolder=args.subfolder,
+    )
+    benchmark.test_decode()
diff --git a/benchmarks/benchmark_autoencoderkl_encode.py b/benchmarks/benchmark_autoencoderkl_encode.py
@@ -0,0 +1,35 @@
+import argparse
+import sys
+
+
+sys.path.append(".")
+from base_classes import AutoencoderKLEncodeBenchmark  # noqa: E402
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--pretrained_model_name_or_path",
+        type=str,
+        default="stable-diffusion-v1-5/stable-diffusion-v1-5",
+    )
+    parser.add_argument(
+        "--subfolder",
+        type=str,
+        default=None,
+    )
+    parser.add_argument(
+        "--dtype",
+        type=str,
+        default="float16",
+    )
+    parser.add_argument("--tiling", action="store_true")
+    args = parser.parse_args()
+
+    benchmark = AutoencoderKLEncodeBenchmark(
+        pretrained_model_name_or_path=args.pretrained_model_name_or_path,
+        dtype=args.dtype,
+        tiling=args.tiling,
+        subfolder=args.subfolder,
+    )
+    benchmark.test_encode()
diff --git a/benchmarks/utils.py b/benchmarks/utils.py
@@ -26,6 +26,7 @@
 PROMPT = "ghibli style, a fantasy landscape with castles"
 BASE_PATH = os.getenv("BASE_PATH", ".")
 TOTAL_GPU_MEMORY = float(os.getenv("TOTAL_GPU_MEMORY", torch.cuda.get_device_properties(0).total_memory / (1024**3)))
+DEVICE_NAME = torch.cuda.get_device_name()
 
 REPO_ID = "diffusers/benchmarks"
 FINAL_CSV_FILE = "collated_results.csv"
@@ -77,14 +78,43 @@ def generate_csv_dict(
     return data_dict
 
 
+def generate_csv_dict_model(
+    model_cls: str,
+    ckpt: str,
+    benchmark_info: BenchmarkInfo,
+    **kwargs,
+) -> Dict[str, Union[str, bool, float]]:
+    """Packs benchmarking data into a dictionary for latter serialization."""
+    data_dict = {
+        "model_cls": model_cls,
+        "ckpt_id": ckpt,
+        "time (secs)": benchmark_info.time,
+        "memory (gbs)": benchmark_info.memory,
+        "actual_gpu_memory (gbs)": f"{(TOTAL_GPU_MEMORY):.3f}",
+        "device": DEVICE_NAME,
+        "github_sha": GITHUB_SHA,
+        **kwargs,
+    }
+    return data_dict
+
+
 def write_to_csv(file_name: str, data_dict: Dict[str, Union[str, bool, float]]):
     """Serializes a dictionary into a CSV file."""
     with open(file_name, mode="w", newline="") as csvfile:
-        writer = csv.DictWriter(csvfile, fieldnames=BENCHMARK_FIELDS)
+        writer = csv.DictWriter(csvfile, fieldnames=list(data_dict.keys()))
         writer.writeheader()
         writer.writerow(data_dict)
 
 
+def write_list_to_csv(file_name: str, data_dict: List[Dict[str, Union[str, bool, float]]]):
+    """Serializes a dictionary into a CSV file."""
+    with open(file_name, mode="w", newline="") as csvfile:
+        writer = csv.DictWriter(csvfile, fieldnames=list(data_dict[0].keys()))
+        writer.writeheader()
+        for row in data_dict:
+            writer.writerow(row)
+
+
 def collate_csv(input_files: List[str], output_file: str):
     """Collates multiple identically structured CSVs into a single CSV file."""
     with open(output_file, mode="w", newline="") as outfile: