Fix diff-train errors (#315)

FindHao · web-flow · commit 863a713874ba · 2025-07-30T09:33:21.000-07:00
* fix mis-sync caused by diff-train

* remove comments

* fix lint
diff --git a/test/test_gpu/skip_tests_h100_pytorch.yaml b/test/test_gpu/skip_tests_h100_pytorch.yaml
@@ -35,6 +35,7 @@ jagged_layer_norm:
 jagged_mean:
 jagged_softmax:
 jagged_sum:
+gdpa:
 ragged_attention:
 # cpu-op for testing
 test_op:
diff --git a/test/test_gpu/skip_tests_h100_triton_main.yaml b/test/test_gpu/skip_tests_h100_triton_main.yaml
@@ -35,6 +35,7 @@ jagged_layer_norm:
 jagged_mean:
 jagged_softmax:
 jagged_sum:
+gdpa:
 # cpu-op for testing
 test_op:
 # TODO: decoding attention requires updated xformers and flash_attn
diff --git a/tritonbench/operators/gdpa/gdpa.py b/tritonbench/operators/gdpa/gdpa.py
@@ -35,25 +35,11 @@
 from .math import (
     activation_string_to_int,
     fast_gelu,
-    fast_gelu_bf16,
-    fast_gelu_bf16_grad,
     fast_gelu_grad,
-    fast_silu,
-    fast_silu_grad,
     gelu,
-    gelu_approx,
-    gelu_approx_grad,
     gelu_grad,
-    leaky_relu,
-    leaky_relu_grad,
     raw,
     raw_grad,
-    relu,
-    relu_grad,
-    silu,
-    silu_grad,
-    tanh,
-    tanh_approx_bf16,
     tanh_approx_fp32,
 )
 
diff --git a/tritonbench/operators/gdpa/gdpa_utils.py b/tritonbench/operators/gdpa/gdpa_utils.py
@@ -1,4 +1,4 @@
-# (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary.
+# (c) Meta Platforms, Inc. and affiliates.
 
 # pyre-strict
 import math
diff --git a/tritonbench/operators/gdpa/math.py b/tritonbench/operators/gdpa/math.py
@@ -1,4 +1,4 @@
-# (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary.
+# (c) Meta Platforms, Inc. and affiliates.
 
 # pyre-unsafe
 
@@ -84,6 +84,7 @@ def gelu_grad(x):
     @triton.jit
     def tanh_approx_fp32(x):
         return tanh(x)
+
 else:
 
     @triton.jit
diff --git a/tritonbench/operators/gdpa/operator.py b/tritonbench/operators/gdpa/operator.py
@@ -18,14 +18,10 @@
 
 import argparse
 import gc
-import re
 from typing import Any, Callable, Generator, List, Optional
 
 import torch
 
-from .gdpa import gdpa
-from .gdpa_utils import generate_jagged_data
-
 from tritonbench.utils.triton_op import (
     BenchmarkOperator,
     BenchmarkOperatorMetrics,
@@ -35,6 +31,9 @@
     register_x_val,
 )
 
+from .gdpa import gdpa
+from .gdpa_utils import generate_jagged_data
+
 
 def calculate_memory_size(jagged_q, jagged_k, jagged_v, real_output, run_fwd, run_bwd):
     def tensor_size(tensor):
@@ -103,19 +102,19 @@ def parse_args(args):
         "--max_seq_len",
         default=1000,
         type=str,
-        help=f"Max sequence length for Q",
+        help="Max sequence length for Q",
     )
     parser.add_argument(
         "--dim",
         default=512,
         type=str,
-        help=f"Query dimension",
+        help="Query dimension",
     )
     parser.add_argument(
         "--head",
         default=4,
         type=str,
-        help=f"Multi head number",
+        help="Multi head number",
     )
     parser.add_argument(
         "--kv_len",

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-# (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary.`
	`1`	`+# (c) Meta Platforms, Inc. and affiliates.`
`2`	`2`
`3`	`3`	`# pyre-strict`
`4`	`4`	`import math`