@@ -2568,17 +2568,30 @@ def pointwise(
25682568 * hinted_configs ,
25692569 ]
25702570 if torch .version .hip :
2571- configs += [ # add here
2572- ]
2573- # bypass triton_config_with_settings -> triton_config logic
25742571 if "x" in size_hints and "y" in size_hints :
2575- configs += [
2576- Config ({"XBLOCK" : 512 , "YBLOCK" : 8 }, num_warps = 8 ), # wrt1/t21 # triton_poi_fused__unsafe_view_add_addmm_cat_clone_permute_split_with_sizes_view_19
2577- Config ({"XBLOCK" : 32 , "YBLOCK" : 128 }, num_warps = 4 ), # wrt2: 570us : triton_poi_fused_add_transpose_view_52
2578- Config ({"XBLOCK" :64 , "YBLOCK" : 32 }, num_warps = 8 ), # wrt3: 150us: triton_poi_fused__to_copy_add_native_layer_norm_native_layer_norm_backward_permute_view_103
2579- Config ({"XBLOCK" :64 , "YBLOCK" : 256 }, num_warps = 4 ), # wri0: 70us: triton_poi_fused_clone_tanh_transpose_19
2580- Config ({"XBLOCK" :512 , "YBLOCK" : 64 }, num_warps = 8 ), # wri0: 58us: triton_poi_fused_clone_53
2581- ]
2572+ """add 2D tiling configs, but don't use triton_config_with_settings function
2573+ as it is buggy and might change the tiling randomly
2574+ """
2575+ def addConfig__ (xblock :int , yblock :int , num_warps :int , num_stages :int ):
2576+ # only add a tiling config if size is bigger than the tile
2577+ # check also for grid overflow
2578+ xgrid = (size_hints ["x" ] + xblock - 1 ) // xblock
2579+ ygrid = (size_hints ["y" ] + yblock - 1 ) // yblock
2580+ if xgrid > 2147483647 :
2581+ return
2582+ if ygrid > 65535 :
2583+ return
2584+ if size_hints ["x" ] < xblock :
2585+ return
2586+ if size_hints ["y" ] < yblock :
2587+ return
2588+ # all good, add the config
2589+ configs .append (Config ({"XBLOCK" : xblock , "YBLOCK" : yblock }, num_warps = num_warps , num_stages = num_stages ))
2590+ addConfig__ (512 , 8 , 8 ,1 ) # wrt1/t21 # triton_poi_fused__unsafe_view_add_addmm_cat_clone_permute_split_with_sizes_view_19
2591+ addConfig__ (32 , 128 , 4 , 1 ) # wrt2: 570us : triton_poi_fused_add_transpose_view_52
2592+ addConfig__ (64 , 32 , 8 , 1 ) # wrt3: 150us: triton_poi_fused__to_copy_add_native_layer_norm_native_layer_norm_backward_permute_view_103
2593+ addConfig__ (64 , 256 , 4 , 1 ) # wri0: 70us: triton_poi_fused_clone_tanh_transpose_19
2594+ addConfig__ (512 , 64 , 8 , 1 ) # wri0: 58us: triton_poi_fused_clone_53
25822595
25832596 if len (size_hints ) == 3 :
25842597 if disable_pointwise_autotuning (inductor_meta ):
0 commit comments