Skip to content

Commit 646458a

Browse files
committed
Add cuda profiler
Signed-off-by: ajrasane <[email protected]>
1 parent 1aafbbc commit 646458a

File tree

1 file changed

+7
-2
lines changed

1 file changed

+7
-2
lines changed

examples/diffusers/quantization/diffusion_trt.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,10 @@ def generate_image(pipe, prompt, image_name):
6464

6565
@torch.inference_mode()
6666
def benchmark_backbone_standalone(
67-
pipe, num_warmup=10, num_benchmark=100, model_name="flux-dev",
67+
pipe,
68+
num_warmup=10,
69+
num_benchmark=100,
70+
model_name="flux-dev",
6871
):
6972
"""Benchmark the backbone model directly without running the full pipeline."""
7073
backbone = pipe.transformer if hasattr(pipe, "transformer") else pipe.unet
@@ -90,10 +93,12 @@ def benchmark_backbone_standalone(
9093
print(f"Benchmarking: {num_benchmark} iterations")
9194
times = []
9295
for _ in tqdm(range(num_benchmark), desc="Benchmark"):
96+
torch.cuda.profiler.cudart().cudaProfilerStart()
9397
start_event.record()
9498
_ = backbone(**dummy_inputs_dict)
9599
end_event.record()
96100
torch.cuda.synchronize()
101+
torch.cuda.profiler.cudart().cudaProfilerStop()
97102
times.append(start_event.elapsed_time(end_event))
98103

99104
avg_latency = sum(times) / len(times)
@@ -102,7 +107,7 @@ def benchmark_backbone_standalone(
102107
p95 = times[int(len(times) * 0.95)]
103108
p99 = times[int(len(times) * 0.99)]
104109

105-
print(f"\nBackbone-only inference latency:")
110+
print("\nBackbone-only inference latency:")
106111
print(f" Average: {avg_latency:.2f} ms")
107112
print(f" P50: {p50:.2f} ms")
108113
print(f" P95: {p95:.2f} ms")

0 commit comments

Comments
 (0)