add flops and params.

sayakpaul · sayakpaul · commit 4d83a478b26a · 2025-05-20T09:34:40.000+05:30
diff --git a/benchmarks/benchmarking_utils.py b/benchmarks/benchmarking_utils.py
@@ -6,6 +6,7 @@
 import pandas as pd
 import torch
 import torch.utils.benchmark as benchmark
+from torchprofile import profile_macs
 
 from diffusers.models.modeling_utils import ModelMixin
 from diffusers.utils import logging
@@ -31,6 +32,19 @@ def flush():
     torch.cuda.reset_peak_memory_stats()
 
 
+# Taken from https://github.com/lucasb-eyer/cnn_vit_benchmarks/blob/15b665ff758e8062131353076153905cae00a71f/main.py
+def calculate_flops(model, input_dict):
+    model.eval()
+    with torch.no_grad():
+        macs = profile_macs(model, **input_dict)
+    flops = 2 * macs  # 1 MAC operation = 2 FLOPs (1 multiplication + 1 addition)
+    return flops
+
+
+def calculate_params(model):
+    return sum(p.numel() for p in model.parameters())
+
+
 # Users can define their own in case this doesn't suffice. For most cases,
 # it should be sufficient.
 def model_init_fn(model_cls, group_offload_kwargs=None, layerwise_upcasting=False, **init_kwargs):
@@ -69,6 +83,14 @@ def post_benchmark(self, model):
 
     @torch.no_grad()
     def run_benchmark(self, scenario: BenchmarkScenario):
+        # 0) Basic stats
+        model = model_init_fn(scenario.model_cls, **scenario.model_init_kwargs)
+        num_params = calculate_params(model)
+        flops = calculate_flops(model, input_dict=scenario.model_init_kwargs)
+        model.cpu()
+        del model
+        self.pre_benchmark()
+
         # 1) plain stats
         results = {}
         plain = None
@@ -104,6 +126,8 @@ def run_benchmark(self, scenario: BenchmarkScenario):
         result = {
             "scenario": scenario.name,
             "model_cls": scenario.model_cls.__name__,
+            "num_params": num_params,
+            "flops": flops,
             "time_plain_s": plain["time"],
             "mem_plain_GB": plain["memory"],
             "time_compile_s": compiled["time"],