Skip to content

Commit a453dea

Browse files
committed
blocksize check etc
1 parent f8d0c10 commit a453dea

File tree

1 file changed

+7
-7
lines changed

1 file changed

+7
-7
lines changed

torch/_inductor/runtime/triton_heuristics.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2572,7 +2572,7 @@ def pointwise(
25722572
"""add 2D tiling configs, but don't use triton_config_with_settings function
25732573
as it is buggy and might change the tiling randomly
25742574
"""
2575-
def addConfig__(xblock:int, yblock:int, num_warps:int):
2575+
def addConfig__(xblock:int, yblock:int, num_warps:int, num_stages:int):
25762576
# only add a tiling config if size is bigger than the tile
25772577
# check also for grid overflow
25782578
xgrid = (size_hints["x"] + xblock - 1) // xblock
@@ -2586,12 +2586,12 @@ def addConfig__(xblock:int, yblock:int, num_warps:int):
25862586
if size_hints["y"] < yblock:
25872587
return
25882588
# all good, add the config
2589-
configs.append(Config({"XBLOCK": xblock, "YBLOCK": yblock}, num_warps=num_warps))
2590-
addConfig__(512, 8, 8) # wrt1/t21 # triton_poi_fused__unsafe_view_add_addmm_cat_clone_permute_split_with_sizes_view_19
2591-
addConfig__(32, 128, 4) # wrt2: 570us : triton_poi_fused_add_transpose_view_52
2592-
addConfig__(64, 32, 8) # wrt3: 150us: triton_poi_fused__to_copy_add_native_layer_norm_native_layer_norm_backward_permute_view_103
2593-
addConfig__(64, 256, 4) # wri0: 70us: triton_poi_fused_clone_tanh_transpose_19
2594-
addConfig__(512, 64, 8) # wri0: 58us: triton_poi_fused_clone_53
2589+
configs.append(Config({"XBLOCK": xblock, "YBLOCK": yblock}, num_warps=num_warps, num_stages=num_stages))
2590+
addConfig__(512, 8, 8,1 ) # wrt1/t21 # triton_poi_fused__unsafe_view_add_addmm_cat_clone_permute_split_with_sizes_view_19
2591+
addConfig__(32, 128, 4, 1) # wrt2: 570us : triton_poi_fused_add_transpose_view_52
2592+
addConfig__(64, 32, 8, 1) # wrt3: 150us: triton_poi_fused__to_copy_add_native_layer_norm_native_layer_norm_backward_permute_view_103
2593+
addConfig__(64, 256, 4, 1) # wri0: 70us: triton_poi_fused_clone_tanh_transpose_19
2594+
addConfig__(512, 64, 8, 1) # wri0: 58us: triton_poi_fused_clone_53
25952595

25962596
if len(size_hints) == 3:
25972597
if disable_pointwise_autotuning(inductor_meta):

0 commit comments

Comments
 (0)