[FlexAttn] Set USE_TMA explicitly (#5075)

whitneywhtsang · web-flow · commit 8255e76f9973 · 2025-09-10T09:36:14.000-04:00
This PR removes the `use_tma.patch` patch, as there is no plan to
upstream it to PyTorch.

Signed-off-by: Whitney Tsang &lt;whitney.tsang@intel.com&gt;
diff --git a/benchmarks/triton_kernels_benchmark/flex_attention_benchmark_causal_mask.py b/benchmarks/triton_kernels_benchmark/flex_attention_benchmark_causal_mask.py
@@ -156,7 +156,7 @@ def benchmark(Z, H_q, H_kv, N_CTX_q, N_CTX_kv, D_HEAD_qk, D_HEAD_v, MODE, provid
                                                               device=DEVICE)
 
     elif provider == 'triton':
-        kernel_options = {'BLOCKS_ARE_CONTIGUOUS': True}
+        kernel_options = {'BLOCKS_ARE_CONTIGUOUS': True, 'USE_TMA': True}
         triton_fn = lambda: compiled_flex_attention(q, k, v, block_mask=block_mask, scale=sm_scale, enable_gqa=(
             not H_q == H_kv), kernel_options=kernel_options)
         if MODE == 'bwd':
diff --git a/scripts/patch-pytorch.sh b/scripts/patch-pytorch.sh
@@ -35,4 +35,3 @@ apply_patch() {
 echo "Applying PyTorch patches in $REPO_ROOT"
 
 # put your patch applies here
-apply_patch ./patch/use_tma.patch
diff --git a/scripts/patch/use_tma.patch b/scripts/patch/use_tma.patch

Original file line number	Diff line number	Diff line change
`@@ -35,4 +35,3 @@ apply_patch() {`
`35`	`35`	`echo "Applying PyTorch patches in $REPO_ROOT"`
`36`	`36`
`37`	`37`	`# put your patch applies here`
`38`		`-apply_patch ./patch/use_tma.patch`