@@ -64,7 +64,10 @@ def generate_image(pipe, prompt, image_name):
6464
6565@torch .inference_mode ()
6666def benchmark_backbone_standalone (
67- pipe , num_warmup = 10 , num_benchmark = 100 , model_name = "flux-dev" ,
67+ pipe ,
68+ num_warmup = 10 ,
69+ num_benchmark = 100 ,
70+ model_name = "flux-dev" ,
6871):
6972 """Benchmark the backbone model directly without running the full pipeline."""
7073 backbone = pipe .transformer if hasattr (pipe , "transformer" ) else pipe .unet
@@ -90,10 +93,12 @@ def benchmark_backbone_standalone(
9093 print (f"Benchmarking: { num_benchmark } iterations" )
9194 times = []
9295 for _ in tqdm (range (num_benchmark ), desc = "Benchmark" ):
96+ torch .cuda .profiler .cudart ().cudaProfilerStart ()
9397 start_event .record ()
9498 _ = backbone (** dummy_inputs_dict )
9599 end_event .record ()
96100 torch .cuda .synchronize ()
101+ torch .cuda .profiler .cudart ().cudaProfilerStop ()
97102 times .append (start_event .elapsed_time (end_event ))
98103
99104 avg_latency = sum (times ) / len (times )
@@ -102,7 +107,7 @@ def benchmark_backbone_standalone(
102107 p95 = times [int (len (times ) * 0.95 )]
103108 p99 = times [int (len (times ) * 0.99 )]
104109
105- print (f "\n Backbone-only inference latency:" )
110+ print ("\n Backbone-only inference latency:" )
106111 print (f" Average: { avg_latency :.2f} ms" )
107112 print (f" P50: { p50 :.2f} ms" )
108113 print (f" P95: { p95 :.2f} ms" )
0 commit comments