diff --git a/benchmarks/base_classes.py b/benchmarks/base_classes.py index 45bf65c93c93..383202722de5 100644 --- a/benchmarks/base_classes.py +++ b/benchmarks/base_classes.py @@ -4,6 +4,7 @@ import torch from diffusers import ( + AutoencoderKL, AutoPipelineForImage2Image, AutoPipelineForInpainting, AutoPipelineForText2Image, @@ -29,6 +30,8 @@ bytes_to_giga_bytes, flush, generate_csv_dict, + generate_csv_dict_model, + write_list_to_csv, write_to_csv, ) @@ -169,7 +172,7 @@ def benchmark(self, args): print(f"[INFO] {self.pipe.__class__.__name__}: Running benchmark with: {vars(args)}\n") time = benchmark_fn(self.run_inference, self.pipe, args) # in seconds. - memory = bytes_to_giga_bytes(torch.cuda.max_memory_allocated()) # in GBs. + memory = bytes_to_giga_bytes(torch.cuda.reset_peak_memory_stats()) # in GBs. benchmark_info = BenchmarkInfo(time=time, memory=memory) pipeline_class_name = str(self.pipe.__class__.__name__) @@ -344,3 +347,164 @@ class T2IAdapterSDXLBenchmark(T2IAdapterBenchmark): def __init__(self, args): super().__init__(args) + + +class BaseBenchmarkTestCase: + model_class = None + pretrained_model_name_or_path = None + model_class_name = None + + def __init__(self): + super().__init__() + + def get_result_filepath(self, suffix): + name = ( + self.model_class_name + "_" + self.pretrained_model_name_or_path.replace("/", "_") + "_" + f"{suffix}.csv" + ) + filepath = os.path.join(BASE_PATH, name) + return filepath + + +class AutoencoderKLBenchmark(BaseBenchmarkTestCase): + model_class = AutoencoderKL + + def __init__(self, pretrained_model_name_or_path, dtype, tiling, **kwargs): + super().__init__() + self.dtype = getattr(torch, dtype) + model = self.model_class.from_pretrained( + pretrained_model_name_or_path, torch_dtype=self.dtype, **kwargs + ).eval() + model = model.to("cuda") + self.tiling = False + if tiling: + model.enable_tiling() + self.tiling = True + self.model = model + self.model_class_name = str(self.model.__class__.__name__) + self.pretrained_model_name_or_path = pretrained_model_name_or_path + + @torch.no_grad() + def run_decode(self, model, tensor): + _ = model.decode(tensor) + + @torch.no_grad() + def _test_decode(self, **kwargs): + batch = kwargs.get("batch") + height = kwargs.get("height") + width = kwargs.get("width") + + tensor = torch.randn( + (batch, self.model.config.latent_channels, height, width), dtype=self.dtype, device="cuda" + ) + + try: + time = benchmark_fn(self.run_decode, self.model, tensor) + memory = bytes_to_giga_bytes(torch.cuda.max_memory_reserved()) + except torch.OutOfMemoryError: + time = "OOM" + memory = "OOM" + + benchmark_info = BenchmarkInfo(time=time, memory=memory) + csv_dict = generate_csv_dict_model( + model_cls=self.model_class_name, + ckpt=self.pretrained_model_name_or_path, + benchmark_info=benchmark_info, + **kwargs, + ) + print(f"{self.model_class_name} decode - shape: {list(tensor.shape)}, time: {time}, memory: {memory}") + return csv_dict + + def test_decode(self): + benchmark_infos = [] + + batches = (1,) + # heights = (32, 64, 128, 256,) + widths = ( + 32, + 64, + 128, + 256, + ) + for batch in batches: + # for height in heights: + for width in widths: + benchmark_info = self._test_decode(batch=batch, height=width, width=width) + benchmark_infos.append(benchmark_info) + + suffix = "decode" + if self.tiling: + suffix = "tiled_decode" + filepath = self.get_result_filepath(suffix) + write_list_to_csv(filepath, benchmark_infos) + + +class AutoencoderKLEncodeBenchmark(BaseBenchmarkTestCase): + model_class = AutoencoderKL + + def __init__(self, pretrained_model_name_or_path, dtype, tiling, **kwargs): + super().__init__() + self.dtype = getattr(torch, dtype) + model = self.model_class.from_pretrained( + pretrained_model_name_or_path, torch_dtype=self.dtype, **kwargs + ).eval() + model = model.to("cuda") + self.tiling = False + if tiling: + model.enable_tiling() + self.tiling = True + self.model = model + self.model_class_name = str(self.model.__class__.__name__) + self.pretrained_model_name_or_path = pretrained_model_name_or_path + + @torch.no_grad() + def run_encode(self, model, tensor): + _ = model.encode(tensor) + + @torch.no_grad() + def _test_encode(self, **kwargs): + batch = kwargs.get("batch") + height = kwargs.get("height") + width = kwargs.get("width") + + tensor = torch.randn( + (batch, self.model.config.in_channels, height, width), dtype=self.dtype, device="cuda" + ) + + try: + time = benchmark_fn(self.run_encode, self.model, tensor) + memory = bytes_to_giga_bytes(torch.cuda.max_memory_reserved()) + except torch.OutOfMemoryError: + time = "OOM" + memory = "OOM" + + benchmark_info = BenchmarkInfo(time=time, memory=memory) + csv_dict = generate_csv_dict_model( + model_cls=self.model_class_name, + ckpt=self.pretrained_model_name_or_path, + benchmark_info=benchmark_info, + **kwargs, + ) + print(f"{self.model_class_name} encode - shape: {list(tensor.shape)}, time: {time}, memory: {memory}") + return csv_dict + + def test_encode(self): + benchmark_infos = [] + + batches = (1,) + widths = ( + 256, + 512, + 1024, + 2048, + ) + for batch in batches: + # for height in heights: + for width in widths: + benchmark_info = self._test_encode(batch=batch, height=width, width=width) + benchmark_infos.append(benchmark_info) + + suffix = "encode" + if self.tiling: + suffix = "tiled_encode" + filepath = self.get_result_filepath(suffix) + write_list_to_csv(filepath, benchmark_infos) diff --git a/benchmarks/benchmark_autoencoderkl.py b/benchmarks/benchmark_autoencoderkl.py new file mode 100644 index 000000000000..b32145f300ba --- /dev/null +++ b/benchmarks/benchmark_autoencoderkl.py @@ -0,0 +1,35 @@ +import argparse +import sys + + +sys.path.append(".") +from base_classes import AutoencoderKLBenchmark # noqa: E402 + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--pretrained_model_name_or_path", + type=str, + default="stable-diffusion-v1-5/stable-diffusion-v1-5", + ) + parser.add_argument( + "--subfolder", + type=str, + default=None, + ) + parser.add_argument( + "--dtype", + type=str, + default="float16", + ) + parser.add_argument("--tiling", action="store_true") + args = parser.parse_args() + + benchmark = AutoencoderKLBenchmark( + pretrained_model_name_or_path=args.pretrained_model_name_or_path, + dtype=args.dtype, + tiling=args.tiling, + subfolder=args.subfolder, + ) + benchmark.test_decode() diff --git a/benchmarks/benchmark_autoencoderkl_encode.py b/benchmarks/benchmark_autoencoderkl_encode.py new file mode 100644 index 000000000000..359aba948068 --- /dev/null +++ b/benchmarks/benchmark_autoencoderkl_encode.py @@ -0,0 +1,35 @@ +import argparse +import sys + + +sys.path.append(".") +from base_classes import AutoencoderKLEncodeBenchmark # noqa: E402 + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--pretrained_model_name_or_path", + type=str, + default="stable-diffusion-v1-5/stable-diffusion-v1-5", + ) + parser.add_argument( + "--subfolder", + type=str, + default=None, + ) + parser.add_argument( + "--dtype", + type=str, + default="float16", + ) + parser.add_argument("--tiling", action="store_true") + args = parser.parse_args() + + benchmark = AutoencoderKLEncodeBenchmark( + pretrained_model_name_or_path=args.pretrained_model_name_or_path, + dtype=args.dtype, + tiling=args.tiling, + subfolder=args.subfolder, + ) + benchmark.test_encode() diff --git a/benchmarks/utils.py b/benchmarks/utils.py index 5fce920ac6c3..19ab4590f2ce 100644 --- a/benchmarks/utils.py +++ b/benchmarks/utils.py @@ -26,6 +26,7 @@ PROMPT = "ghibli style, a fantasy landscape with castles" BASE_PATH = os.getenv("BASE_PATH", ".") TOTAL_GPU_MEMORY = float(os.getenv("TOTAL_GPU_MEMORY", torch.cuda.get_device_properties(0).total_memory / (1024**3))) +DEVICE_NAME = torch.cuda.get_device_name() REPO_ID = "diffusers/benchmarks" FINAL_CSV_FILE = "collated_results.csv" @@ -77,14 +78,43 @@ def generate_csv_dict( return data_dict +def generate_csv_dict_model( + model_cls: str, + ckpt: str, + benchmark_info: BenchmarkInfo, + **kwargs, +) -> Dict[str, Union[str, bool, float]]: + """Packs benchmarking data into a dictionary for latter serialization.""" + data_dict = { + "model_cls": model_cls, + "ckpt_id": ckpt, + "time (secs)": benchmark_info.time, + "memory (gbs)": benchmark_info.memory, + "actual_gpu_memory (gbs)": f"{(TOTAL_GPU_MEMORY):.3f}", + "device": DEVICE_NAME, + "github_sha": GITHUB_SHA, + **kwargs, + } + return data_dict + + def write_to_csv(file_name: str, data_dict: Dict[str, Union[str, bool, float]]): """Serializes a dictionary into a CSV file.""" with open(file_name, mode="w", newline="") as csvfile: - writer = csv.DictWriter(csvfile, fieldnames=BENCHMARK_FIELDS) + writer = csv.DictWriter(csvfile, fieldnames=list(data_dict.keys())) writer.writeheader() writer.writerow(data_dict) +def write_list_to_csv(file_name: str, data_dict: List[Dict[str, Union[str, bool, float]]]): + """Serializes a dictionary into a CSV file.""" + with open(file_name, mode="w", newline="") as csvfile: + writer = csv.DictWriter(csvfile, fieldnames=list(data_dict[0].keys())) + writer.writeheader() + for row in data_dict: + writer.writerow(row) + + def collate_csv(input_files: List[str], output_file: str): """Collates multiple identically structured CSVs into a single CSV file.""" with open(output_file, mode="w", newline="") as outfile: