@@ -2503,25 +2503,24 @@ def pointwise(
25032503 size_hints , TRITON_MAX_BLOCK ["X" ], waves_per_eu = 2
25042504 ),
25052505 triton_config_with_settings (
2506- size_hints , 4096 # wrt: better than the max_block for some kernel
2506+ size_hints ,
2507+ 4096 , # wrt: better than the max_block for some kernel
25072508 ),
25082509 * hinted_configs ,
25092510 ]
25102511 # Additional reduction configs appended for ROCm builds
25112512 if torch .version .hip :
2512- configs .append (triton_config_with_settings (
2513- size_hints ,
2514- 2048 ,
2515- num_warps = 8 ,
2516- num_stages = 2 ,
2517- waves_per_eu = 1
2518- )) # 20% improvement
2513+ configs .append (
2514+ triton_config_with_settings (
2515+ size_hints , 2048 , num_warps = 8 , num_stages = 2 , waves_per_eu = 1
2516+ )
2517+ ) # 20% improvement
25192518 if len (size_hints ) == 2 :
25202519 # Only avoiding tuning on TileHint.SQUARE if not on ROCm builds
25212520 # ROCm has observed improvement by diverging here
25222521 if (
2523- disable_pointwise_autotuning (inductor_meta )
2524- or (torch .version .hip is None and tile_hint == TileHint .SQUARE )
2522+ disable_pointwise_autotuning (inductor_meta )
2523+ or (torch .version .hip is None and tile_hint == TileHint .SQUARE )
25252524 ) and not (
25262525 inductor_meta .get ("max_autotune" )
25272526 or inductor_meta .get ("max_autotune_pointwise" )
@@ -2530,13 +2529,19 @@ def pointwise(
25302529 else :
25312530 configs = [
25322531 triton_config_with_settings (size_hints , 32 , 32 ),
2533- triton_config_with_settings (size_hints , 64 , 32 ), # better for some kernels
2532+ triton_config_with_settings (
2533+ size_hints , 64 , 32
2534+ ), # better for some kernels
25342535 triton_config_with_settings (size_hints , 64 , 64 ), # ~8% better for fp16
25352536 triton_config_with_settings (size_hints , 256 , 16 ),
25362537 triton_config_with_settings (size_hints , 16 , 256 ),
2537- triton_config_with_settings (size_hints , 128 , 16 ), # +10% for some kernels
2538- triton_config_with_settings (size_hints , 128 , 32 ), # additional 10% more
2539- triton_config_with_settings (size_hints , 32 , 512 ), # +30% for some kernels
2538+ triton_config_with_settings (
2539+ size_hints , 128 , 16
2540+ ), # +10% for some kernels
2541+ triton_config_with_settings (size_hints , 128 , 32 ), # additional 10% more
2542+ triton_config_with_settings (
2543+ size_hints , 32 , 512
2544+ ), # +30% for some kernels
25402545 triton_config_with_settings (size_hints , bs , 1 ),
25412546 triton_config_with_settings (size_hints , 1 , bs ),
25422547 * hinted_configs ,
0 commit comments