|
36 | 36 | # "Background_Matting": { |
37 | 37 | # "dim": 16, |
38 | 38 | # }, |
39 | | - "LearningToPaint": { |
40 | | - "dim": 1024, |
41 | | - }, |
| 39 | + # "LearningToPaint": { |
| 40 | + # "dim": 1024, |
| 41 | + # }, |
42 | 42 | "alexnet": { |
43 | 43 | "dim": 1024, |
44 | 44 | }, |
45 | | - "dcgan": { |
46 | | - "dim": 1024, |
47 | | - }, |
48 | | - "densenet121": { |
49 | | - "dim": 64, |
50 | | - }, |
| 45 | + # "densenet121": { |
| 46 | + # "dim": 64, |
| 47 | + # }, |
51 | 48 | "hf_Albert": { |
52 | 49 | "dim": 32, |
53 | 50 | "buffer_prefix": "albert" |
|
109 | 106 | "timm_resnest": { |
110 | 107 | "dim": 256, |
111 | 108 | }, |
112 | | - "timm_vision_transformer": { |
113 | | - "dim": 256, |
114 | | - }, |
| 109 | + # "timm_vision_transformer": { |
| 110 | + # "dim": 256, |
| 111 | + # "decomp_attn": True, |
| 112 | + # }, |
115 | 113 | "timm_vovnet": { |
116 | 114 | "dim": 128, |
117 | 115 | }, |
118 | | - "vgg16": { |
119 | | - "dim": 128, |
120 | | - }, |
| 116 | + # "vgg16": { |
| 117 | + # "dim": 128, |
| 118 | + # }, |
121 | 119 | } |
122 | 120 |
|
123 | 121 | # Adapted from pytorch.benchmarks.dynamo.common.main() |
@@ -213,10 +211,12 @@ def export_torchbench_model( |
213 | 211 | external_weight_path = None |
214 | 212 |
|
215 | 213 | decomp_list = [torch.ops.aten.reflection_pad2d] |
216 | | - if decomp_attn == True: |
| 214 | + if decomp_attn == True or torchbench_models_dict[model_id].get("decomp_attn"): |
| 215 | + print("decomposing attention for: " + model_id) |
217 | 216 | decomp_list.extend([ |
218 | 217 | torch.ops.aten._scaled_dot_product_flash_attention_for_cpu, |
219 | 218 | torch.ops.aten._scaled_dot_product_flash_attention.default, |
| 219 | + torch.ops.aten._scaled_dot_product_flash_attention, |
220 | 220 | torch.ops.aten.scaled_dot_product_attention, |
221 | 221 | ]) |
222 | 222 | with decompositions.extend_aot_decompositions( |
@@ -278,21 +278,37 @@ class CompiledTorchbenchModel(CompiledModule): |
278 | 278 | ) |
279 | 279 | return vmfb_path, external_weight_path, forward_args |
280 | 280 |
|
281 | | -def run_benchmark(device, vmfb_path, weights_path, example_args, model_id, csv_path): |
| 281 | + |
| 282 | +def _run_iter(runner, inputs): |
| 283 | + start = time.time() |
| 284 | + res = runner.ctx.modules.compiled_torchbench_model["main"](*inputs) |
| 285 | + return res, time.time() - start |
| 286 | + |
| 287 | +def run_benchmark(device, vmfb_path, weights_path, example_args, model_id, csv_path, iters): |
282 | 288 | if "rocm" in device: |
283 | 289 | device = "hip" + device.split("rocm")[-1] |
284 | 290 | mod_runner = vmfbRunner(device, vmfb_path, weights_path) |
285 | | - inputs = [ireert.asdevicearray(mod_runner.config.device, i.clone().detach().cpu()) for i in example_args] |
286 | | - start = time.time() |
287 | | - results = runner.ctx.modules.compiled_torchbench_model["main"](*inputs) |
288 | | - latency = time.time() - start |
289 | | - with open(csv_path, "a") as csvfile: |
290 | | - fieldnames = ["model", "latency"] |
291 | | - data = [{"model": model_id, "latency": latency}] |
| 291 | + inputs = torch_to_iree(mod_runner, example_args) |
| 292 | + iter_latencies = [] |
| 293 | + for i in range(iters): |
| 294 | + results, iter_latency = _run_iter(mod_runner, inputs) |
| 295 | + iter_latencies.append(iter_latency) |
| 296 | + avg_latency = sum(iter_latencies) / len(iter_latencies) |
| 297 | + with open(csv_path, "w") as csvfile: |
| 298 | + fieldnames = ["model", "avg_latency"] |
| 299 | + data = [{"model": model_id, "avg_latency": avg_latency}] |
292 | 300 | writer = csv.DictWriter(csvfile, fieldnames=fieldnames) |
| 301 | + writer.writeheader() |
293 | 302 | writer.writerows(data) |
294 | 303 |
|
295 | 304 |
|
| 305 | +def torch_to_iree(iree_runner, example_args): |
| 306 | + if isinstance(example_args, dict): |
| 307 | + iree_args = [ireert.asdevicearray(iree_runner.config.device, i.clone().detach().cpu()) for i in example_args.values()] |
| 308 | + else: |
| 309 | + iree_args = [ireert.asdevicearray(iree_runner.config.device, i.clone().detach().cpu()) for i in example_args] |
| 310 | + return iree_args |
| 311 | + |
296 | 312 | def run_main(model_id, args, tb_dir, tb_args): |
297 | 313 | print(f"exporting {model_id}") |
298 | 314 | mod_str, weights_path, example_args = export_torchbench_model( |
@@ -320,7 +336,7 @@ def run_main(model_id, args, tb_dir, tb_args): |
320 | 336 | f.write(mod_str) |
321 | 337 | print("Saved to", safe_name + ".mlir") |
322 | 338 | elif args.run_benchmark: |
323 | | - run_benchmark(args.device, mod_str, weights_path, example_args, model_id, args.output_csv) |
| 339 | + run_benchmark(args.device, mod_str, weights_path, example_args, model_id, args.output_csv, args.num_iters) |
324 | 340 |
|
325 | 341 | gc.collect() |
326 | 342 |
|
|
0 commit comments