align the lint check

RUIJIEZHONG66166 · RUIJIEZHONG66166 · commit 4e7733a10106 · 2025-08-07T03:01:37.000-07:00
diff --git a/test/microbench/adaptive_avg_pool2d.py b/test/microbench/adaptive_avg_pool2d.py
@@ -1,5 +1,5 @@
-import time
 import argparse
+import time
 
 import torch
 from torch.profiler import profile, ProfilerActivity
@@ -46,17 +46,20 @@ def Adaptive_AVGPool2d(shape, dtype, channels_last, backward, device):
     if backward:
         output[0].backward(grad)
 
+
 def run_profile(shape, dtype, channels_last, backward, device, num_iter):
     with profile(
-        activities=[ProfilerActivity.CPU,
-                  ProfilerActivity.XPU if device == "xpu" else ProfilerActivity.CUDA,
+        activities=[
+            ProfilerActivity.CPU,
+            ProfilerActivity.XPU if device == "xpu" else ProfilerActivity.CUDA,
         ],
         record_shapes=True,
     ) as prof:
         for i in range(num_iter):
             Adaptive_AVGPool2d(shape, dtype, channels_last, backward, device)
     print(prof.key_averages().table(sort_by=f"{device}_time_total"))
 
+
 def run_e2e(shape, dtype, channels_last, backward, device, num_iter):
     if device in ["xpu", "cuda"]:
         torch.xpu.synchronize() if device == "xpu" else torch.cuda.synchronize()
@@ -69,6 +72,7 @@ def run_e2e(shape, dtype, channels_last, backward, device, num_iter):
     e2e_time = (t2 - t1) / num_iter
     print("E2E total time:", f"{float(e2e_time):.20f}")
 
+
 def benchmark(args):
     for shape in shape_list:
         for dtype in [torch.bfloat16, torch.float16, torch.float32]:
@@ -109,13 +113,14 @@ def benchmark(args):
                         args.num_iter,
                     )
 
+
 def parse_args():
     parser = argparse.ArgumentParser(description="OP Benchmark")
     parser.add_argument(
         "--device",
         type=str,
-        default='xpu',
-        help='Device to run on (e.g., "cpu", "cuda", "xpu")'
+        default="xpu",
+        help='Device to run on (e.g., "cpu", "cuda", "xpu")',
     )
     group = parser.add_mutually_exclusive_group()
     group.add_argument(
@@ -125,6 +130,7 @@ def parse_args():
     parser.add_argument("--num-iter", type=int, default=20, help="Number of iterations")
     return parser.parse_args()
 
+
 if __name__ == "__main__":
     args = parse_args()
     benchmark(args)
diff --git a/test/microbench/indexing.index_fill.py b/test/microbench/indexing.index_fill.py
@@ -1,5 +1,6 @@
-import time
 import argparse
+import time
+
 import torch
 from torch.profiler import profile, ProfilerActivity
 
@@ -13,36 +14,43 @@ def Index_fill(input, indices, dim, device):
     else:
         output = input.index_fill(dim, indices, 2)
 
+
 def run_profile(input, indices, dim, cache_r, cache_w, device, num_iter):
     with profile(
-        activities=[ProfilerActivity.CPU,
-                  ProfilerActivity.XPU if device == 'xpu' else ProfilerActivity.CUDA],
+        activities=[
+            ProfilerActivity.CPU,
+            ProfilerActivity.XPU if device == "xpu" else ProfilerActivity.CUDA,
+        ],
         record_shapes=True,
     ) as prof:
         for i in range(num_iter):
             cache_r = cache_w * i
             Index_fill(input, indices, dim, device)
-    print(prof.key_averages().table(sort_by="{}_time_total".format(device)))
+    print(prof.key_averages().table(sort_by=f"{device}_time_total"))
+
 
 def run_e2e(input, indices, dim, cache_r, cache_w, device, num_iter):
-    if device in ['xpu', 'cuda']:
-        torch.xpu.synchronize() if device == 'xpu' else torch.cuda.synchronize()
+    if device in ["xpu", "cuda"]:
+        torch.xpu.synchronize() if device == "xpu" else torch.cuda.synchronize()
     t1 = time.time()
     for i in range(num_iter):
         cache_r = cache_w * i
         Index_fill(input, indices, dim, device)
-    if device in ['xpu', 'cuda']:
-        torch.xpu.synchronize() if device == 'xpu' else torch.cuda.synchronize()
+    if device in ["xpu", "cuda"]:
+        torch.xpu.synchronize() if device == "xpu" else torch.cuda.synchronize()
     t2 = time.time()
     e2e_time = (t2 - t1) / num_iter
     print("E2E total time:", f"{float(e2e_time):.20f}")
 
+
 def benchmark(args):
     for shape in shape_list:
         for dtype in [torch.bfloat16, torch.float16, torch.float32]:
             for dim in [0, 1]:
                 input = torch.zeros(shape, dtype=dtype, device=args.device)
-                indices = torch.linspace(0, 1022, steps=512, device=args.device).to(torch.long)
+                indices = torch.linspace(0, 1022, steps=512, device=args.device).to(
+                    torch.long
+                )
                 y_0 = torch.ones((512, 1024), dtype=dtype, device=args.device)
                 y_1 = torch.randn((1024, 512), dtype=dtype, device=args.device)
                 cache_r = torch.randn((1024 * 1024 * 1024), device=args.device)
@@ -62,24 +70,45 @@ def benchmark(args):
                     backward,
                 )
                 if not args.e2e_only:
-                    run_profile(input, indices, dim, cache_r, cache_w, args.device, args.num_iter)
+                    run_profile(
+                        input,
+                        indices,
+                        dim,
+                        cache_r,
+                        cache_w,
+                        args.device,
+                        args.num_iter,
+                    )
 
                 if not args.profile_only:
-                    run_e2e(input, indices, dim, cache_r, cache_w, args.device, args.num_iter)
+                    run_e2e(
+                        input,
+                        indices,
+                        dim,
+                        cache_r,
+                        cache_w,
+                        args.device,
+                        args.num_iter,
+                    )
+
 
 def parse_args():
-    parser = argparse.ArgumentParser(description='OP Benchmark')
-    parser.add_argument('--device', type=str, default='xpu',
-                        help='Device to run on (e.g., "cpu", "cuda", "xpu")')
+    parser = argparse.ArgumentParser(description="OP Benchmark")
+    parser.add_argument(
+        "--device",
+        type=str,
+        default="xpu",
+        help='Device to run on (e.g., "cpu", "cuda", "xpu")',
+    )
     group = parser.add_mutually_exclusive_group()
-    group.add_argument('--profile-only', action='store_true',
-                       help='Only Run profile timing')
-    group.add_argument('--e2e-only', action='store_true',
-                       help='Only Run E2E timing')
-    parser.add_argument('--num-iter', type=int, default=20,
-                        help='Number of iterations')
+    group.add_argument(
+        "--profile-only", action="store_true", help="Only Run profile timing"
+    )
+    group.add_argument("--e2e-only", action="store_true", help="Only Run E2E timing")
+    parser.add_argument("--num-iter", type=int, default=20, help="Number of iterations")
     return parser.parse_args()
 
+
 if __name__ == "__main__":
     args = parse_args()
     benchmark(args)