Skip to content

Commit 4424b73

Browse files
committed
Added extra command line options to control reproducibility
1 parent ffc6bab commit 4424b73

File tree

1 file changed

+71
-30
lines changed

1 file changed

+71
-30
lines changed

benchmarking/inference_benchmark.py

Lines changed: 71 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,9 @@
2121
--batches BATCHES [BATCHES ...]
2222
--input-length INPUT_LENGTH
2323
--out-dir OUT_DIR
24+
--iterations ITERATIONS
25+
--warmup-runs WARMUP_RUNS
26+
--output-length OUTPUT_LENGTH
2427
"""
2528

2629
import argparse
@@ -30,6 +33,9 @@
3033
from optimum_benchmark.logging_utils import setup_logging
3134
import torch
3235

36+
torch.backends.cudnn.benchmark = False
37+
torch.backends.cudnn.deterministic = True
38+
3339
BFLOAT16_SUPPORT = torch.cuda.get_device_capability()[0] >= 8
3440

3541
WEIGHTS_CONFIGS = {
@@ -73,9 +79,8 @@
7379
},
7480
}
7581

76-
if __name__ == "__main__":
77-
setup_logging(level="INFO")
7882

83+
def parse_args():
7984
parser = argparse.ArgumentParser(description="bitsandbytes inference benchmark tool")
8085

8186
parser.add_argument("model_id", type=str, help="The model checkpoint to use.")
@@ -98,37 +103,73 @@
98103

99104
parser.add_argument("--out-dir", type=str, default="reports")
100105

101-
args = parser.parse_args()
106+
parser.add_argument("--iterations", type=int, default=10, help="Number of iterations for each benchmark run")
107+
parser.add_argument(
108+
"--warmup-runs", type=int, default=10, help="Number of warmup runs to discard before measurement"
109+
)
110+
parser.add_argument(
111+
"--output-length",
112+
type=int,
113+
default=64,
114+
help="If set, `max_new_tokens` and `min_new_tokens` will be set to this value.",
115+
)
116+
117+
return parser.parse_args()
118+
119+
120+
def run_benchmark(args, config, batch_size):
121+
launcher_config = ProcessConfig(device_isolation=True, device_isolation_action="warn", start_method="spawn")
122+
scenario_config = InferenceConfig(
123+
latency=True,
124+
memory=True,
125+
input_shapes={"batch_size": batch_size, "sequence_length": args.input_length},
126+
iterations=args.iterations,
127+
warmup_runs=args.warmup_runs,
128+
# set duration to 0 to disable the duration-based stopping criterion
129+
# this is IMPORTANT to ensure that all benchmarks run the same number of operations, regardless of hardware speed/bottlenecks
130+
duration=0,
131+
# for consistent results, set a fixed min and max for output tokens
132+
generate_kwargs={"min_new_tokens": args.output_length, "max_new_tokens": args.output_length},
133+
forward_kwargs={"min_new_tokens": args.output_length, "max_new_tokens": args.output_length},
134+
)
135+
136+
backend_config = PyTorchConfig(
137+
device="cuda",
138+
device_ids="0",
139+
device_map="auto",
140+
no_weights=False,
141+
model=args.model_id,
142+
**WEIGHTS_CONFIGS[config],
143+
)
144+
145+
test_name = (
146+
f"benchmark-{config}"
147+
f"-bsz-{batch_size}"
148+
f"-isz-{args.input_length}"
149+
f"-osz-{args.output_length}"
150+
f"-iter-{args.iterations}"
151+
f"-wrmup-{args.warmup_runs}"
152+
)
153+
benchmark_config = BenchmarkConfig(
154+
name=test_name,
155+
scenario=scenario_config,
156+
launcher=launcher_config,
157+
backend=backend_config,
158+
)
159+
160+
out_path = out_dir / (test_name + ".json")
161+
print(f"[{test_name}] Starting:")
162+
benchmark_report = Benchmark.launch(benchmark_config)
163+
benchmark_report.save_json(out_path)
164+
165+
166+
if __name__ == "__main__":
167+
setup_logging(level="INFO")
168+
args = parse_args()
102169

103170
out_dir = Path(args.out_dir)
104171
out_dir.mkdir(parents=True, exist_ok=True)
105172

106173
for batch_size in args.batches:
107-
print(f"Benchmarking batch size: {batch_size}")
108174
for config in args.configs:
109-
launcher_config = ProcessConfig(device_isolation=True, start_method="spawn")
110-
scenario_config = InferenceConfig(
111-
latency=True,
112-
memory=True,
113-
input_shapes={"batch_size": batch_size, "sequence_length": args.input_length},
114-
)
115-
backend_config = PyTorchConfig(
116-
device="cuda",
117-
device_ids="0",
118-
device_map="auto",
119-
no_weights=False,
120-
model=args.model_id,
121-
**WEIGHTS_CONFIGS[config],
122-
)
123-
benchmark_config = BenchmarkConfig(
124-
name=f"benchmark-{config}-bsz{batch_size}",
125-
scenario=scenario_config,
126-
launcher=launcher_config,
127-
backend=backend_config,
128-
)
129-
130-
out_path = out_dir / f"benchmark_{config}_bsz{batch_size}.json"
131-
132-
benchmark_report = Benchmark.launch(benchmark_config)
133-
benchmark_report.log()
134-
benchmark_report.save_json(out_path)
175+
run_benchmark(args, config, batch_size)

0 commit comments

Comments
 (0)