Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
166 changes: 165 additions & 1 deletion benchmarks/base_classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import torch

from diffusers import (
AutoencoderKL,
AutoPipelineForImage2Image,
AutoPipelineForInpainting,
AutoPipelineForText2Image,
Expand All @@ -29,6 +30,8 @@
bytes_to_giga_bytes,
flush,
generate_csv_dict,
generate_csv_dict_model,
write_list_to_csv,
write_to_csv,
)

Expand Down Expand Up @@ -169,7 +172,7 @@ def benchmark(self, args):
print(f"[INFO] {self.pipe.__class__.__name__}: Running benchmark with: {vars(args)}\n")

time = benchmark_fn(self.run_inference, self.pipe, args) # in seconds.
memory = bytes_to_giga_bytes(torch.cuda.max_memory_allocated()) # in GBs.
memory = bytes_to_giga_bytes(torch.cuda.reset_peak_memory_stats()) # in GBs.
benchmark_info = BenchmarkInfo(time=time, memory=memory)

pipeline_class_name = str(self.pipe.__class__.__name__)
Expand Down Expand Up @@ -344,3 +347,164 @@ class T2IAdapterSDXLBenchmark(T2IAdapterBenchmark):

def __init__(self, args):
super().__init__(args)


class BaseBenchmarkTestCase:
model_class = None
pretrained_model_name_or_path = None
model_class_name = None

def __init__(self):
super().__init__()

def get_result_filepath(self, suffix):
name = (
self.model_class_name + "_" + self.pretrained_model_name_or_path.replace("/", "_") + "_" + f"{suffix}.csv"
)
filepath = os.path.join(BASE_PATH, name)
return filepath


class AutoencoderKLBenchmark(BaseBenchmarkTestCase):
Copy link
Member

@sayakpaul sayakpaul Feb 12, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we let the users define dummy_inputs() per model class here? And then we could let them implement their own function that needs to be benchmarked.

So, BaseBenchmarkTestCase could then have a method benchmark():

def benchmark(...):
    time = benchmark_fn(self.run_decode, self.model, tensor)
    memory = bytes_to_giga_bytes(torch.cuda.max_memory_allocated()) # should this be allocated?
    benchmark_info = BenchmarkInfo(time=time, memory=memory)
    
    csv_dict = generate_csv_dict_model(
        model_cls=self.model_class_name, ckpt=self.pretrained_model_name_or_path, benchmark_info=benchmark_info, **kwargs,
    )
    print(f"{self.model_class_name} decode - shape: {list(tensor.shape)}, time: {time}, memory: {memory}")
    return csv_dict

model_class = AutoencoderKL

def __init__(self, pretrained_model_name_or_path, dtype, tiling, **kwargs):
super().__init__()
self.dtype = getattr(torch, dtype)
model = self.model_class.from_pretrained(
pretrained_model_name_or_path, torch_dtype=self.dtype, **kwargs
).eval()
model = model.to("cuda")
self.tiling = False
if tiling:
model.enable_tiling()
self.tiling = True
self.model = model
self.model_class_name = str(self.model.__class__.__name__)
self.pretrained_model_name_or_path = pretrained_model_name_or_path

@torch.no_grad()
def run_decode(self, model, tensor):
_ = model.decode(tensor)

@torch.no_grad()
def _test_decode(self, **kwargs):
batch = kwargs.get("batch")
height = kwargs.get("height")
width = kwargs.get("width")

tensor = torch.randn(
(batch, self.model.config.latent_channels, height, width), dtype=self.dtype, device="cuda"
)

try:
time = benchmark_fn(self.run_decode, self.model, tensor)
memory = bytes_to_giga_bytes(torch.cuda.max_memory_reserved())
except torch.OutOfMemoryError:
time = "OOM"
memory = "OOM"

benchmark_info = BenchmarkInfo(time=time, memory=memory)
csv_dict = generate_csv_dict_model(
model_cls=self.model_class_name,
ckpt=self.pretrained_model_name_or_path,
benchmark_info=benchmark_info,
**kwargs,
)
print(f"{self.model_class_name} decode - shape: {list(tensor.shape)}, time: {time}, memory: {memory}")
return csv_dict

def test_decode(self):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not needed for the first iteration but I would consider also including model.compile().

benchmark_infos = []

batches = (1,)
# heights = (32, 64, 128, 256,)
widths = (
32,
64,
128,
256,
)
for batch in batches:
# for height in heights:
for width in widths:
benchmark_info = self._test_decode(batch=batch, height=width, width=width)
benchmark_infos.append(benchmark_info)

suffix = "decode"
if self.tiling:
suffix = "tiled_decode"
filepath = self.get_result_filepath(suffix)
write_list_to_csv(filepath, benchmark_infos)


class AutoencoderKLEncodeBenchmark(BaseBenchmarkTestCase):
model_class = AutoencoderKL

def __init__(self, pretrained_model_name_or_path, dtype, tiling, **kwargs):
super().__init__()
self.dtype = getattr(torch, dtype)
model = self.model_class.from_pretrained(
pretrained_model_name_or_path, torch_dtype=self.dtype, **kwargs
).eval()
model = model.to("cuda")
self.tiling = False
if tiling:
model.enable_tiling()
self.tiling = True
self.model = model
self.model_class_name = str(self.model.__class__.__name__)
self.pretrained_model_name_or_path = pretrained_model_name_or_path

@torch.no_grad()
def run_encode(self, model, tensor):
_ = model.encode(tensor)

@torch.no_grad()
def _test_encode(self, **kwargs):
batch = kwargs.get("batch")
height = kwargs.get("height")
width = kwargs.get("width")

tensor = torch.randn(
(batch, self.model.config.in_channels, height, width), dtype=self.dtype, device="cuda"
)

try:
time = benchmark_fn(self.run_encode, self.model, tensor)
memory = bytes_to_giga_bytes(torch.cuda.max_memory_reserved())
except torch.OutOfMemoryError:
time = "OOM"
memory = "OOM"

benchmark_info = BenchmarkInfo(time=time, memory=memory)
csv_dict = generate_csv_dict_model(
model_cls=self.model_class_name,
ckpt=self.pretrained_model_name_or_path,
benchmark_info=benchmark_info,
**kwargs,
)
print(f"{self.model_class_name} encode - shape: {list(tensor.shape)}, time: {time}, memory: {memory}")
return csv_dict

def test_encode(self):
benchmark_infos = []

batches = (1,)
widths = (
256,
512,
1024,
2048,
)
for batch in batches:
# for height in heights:
for width in widths:
benchmark_info = self._test_encode(batch=batch, height=width, width=width)
benchmark_infos.append(benchmark_info)

suffix = "encode"
if self.tiling:
suffix = "tiled_encode"
filepath = self.get_result_filepath(suffix)
write_list_to_csv(filepath, benchmark_infos)
35 changes: 35 additions & 0 deletions benchmarks/benchmark_autoencoderkl.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import argparse
import sys


sys.path.append(".")
from base_classes import AutoencoderKLBenchmark # noqa: E402


if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"--pretrained_model_name_or_path",
type=str,
default="stable-diffusion-v1-5/stable-diffusion-v1-5",
)
parser.add_argument(
"--subfolder",
type=str,
default=None,
)
parser.add_argument(
"--dtype",
type=str,
default="float16",
)
parser.add_argument("--tiling", action="store_true")
args = parser.parse_args()

benchmark = AutoencoderKLBenchmark(
pretrained_model_name_or_path=args.pretrained_model_name_or_path,
dtype=args.dtype,
tiling=args.tiling,
subfolder=args.subfolder,
)
benchmark.test_decode()
35 changes: 35 additions & 0 deletions benchmarks/benchmark_autoencoderkl_encode.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import argparse
import sys


sys.path.append(".")
from base_classes import AutoencoderKLEncodeBenchmark # noqa: E402


if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"--pretrained_model_name_or_path",
type=str,
default="stable-diffusion-v1-5/stable-diffusion-v1-5",
)
parser.add_argument(
"--subfolder",
type=str,
default=None,
)
parser.add_argument(
"--dtype",
type=str,
default="float16",
)
parser.add_argument("--tiling", action="store_true")
args = parser.parse_args()

benchmark = AutoencoderKLEncodeBenchmark(
pretrained_model_name_or_path=args.pretrained_model_name_or_path,
dtype=args.dtype,
tiling=args.tiling,
subfolder=args.subfolder,
)
benchmark.test_encode()
32 changes: 31 additions & 1 deletion benchmarks/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
PROMPT = "ghibli style, a fantasy landscape with castles"
BASE_PATH = os.getenv("BASE_PATH", ".")
TOTAL_GPU_MEMORY = float(os.getenv("TOTAL_GPU_MEMORY", torch.cuda.get_device_properties(0).total_memory / (1024**3)))
DEVICE_NAME = torch.cuda.get_device_name()

REPO_ID = "diffusers/benchmarks"
FINAL_CSV_FILE = "collated_results.csv"
Expand Down Expand Up @@ -77,14 +78,43 @@ def generate_csv_dict(
return data_dict


def generate_csv_dict_model(
model_cls: str,
ckpt: str,
benchmark_info: BenchmarkInfo,
**kwargs,
) -> Dict[str, Union[str, bool, float]]:
"""Packs benchmarking data into a dictionary for latter serialization."""
data_dict = {
"model_cls": model_cls,
"ckpt_id": ckpt,
"time (secs)": benchmark_info.time,
"memory (gbs)": benchmark_info.memory,
"actual_gpu_memory (gbs)": f"{(TOTAL_GPU_MEMORY):.3f}",
"device": DEVICE_NAME,
"github_sha": GITHUB_SHA,
**kwargs,
}
return data_dict


def write_to_csv(file_name: str, data_dict: Dict[str, Union[str, bool, float]]):
"""Serializes a dictionary into a CSV file."""
with open(file_name, mode="w", newline="") as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=BENCHMARK_FIELDS)
writer = csv.DictWriter(csvfile, fieldnames=list(data_dict.keys()))
writer.writeheader()
writer.writerow(data_dict)


def write_list_to_csv(file_name: str, data_dict: List[Dict[str, Union[str, bool, float]]]):
"""Serializes a dictionary into a CSV file."""
with open(file_name, mode="w", newline="") as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=list(data_dict[0].keys()))
writer.writeheader()
for row in data_dict:
writer.writerow(row)


def collate_csv(input_files: List[str], output_file: str):
"""Collates multiple identically structured CSVs into a single CSV file."""
with open(output_file, mode="w", newline="") as outfile:
Expand Down
Loading