Grab txt file from huggingface as the default (#38)

PaliC · web-flow · commit d49a1671d949 · 2025-07-23T08:47:55.000-07:00
diff --git a/BackendBench/torchbench_suite.py b/BackendBench/torchbench_suite.py
@@ -4,12 +4,18 @@
 
 import math
 import re
+import tempfile
 from collections import defaultdict
 from pathlib import Path
 
+import requests
 import torch
 from torch.testing import make_tensor
 
+# the schema for this dataset is the one defined in tritonbench traces.
+# ie. https://github.com/pytorch-labs/tritonbench/blob/main/tritonbench/data/input_configs/hf_train/AlbertForMaskedLM_training.txt
+DEFAULT_HUGGINGFACE_URL = "https://huggingface.co/datasets/GPUMODE/huggingface_op_trace/resolve/main/tritonbench_op_trace.txt"
+
 
 dtype_abbrs = {
     torch.bfloat16: "bf16",
@@ -120,11 +126,29 @@ def _parse_inputs(filename, filter, op_inputs):
 
 
 class TorchBenchTestSuite:
-    def __init__(self, name, filename, filter=None, topn=None):
+    def __init__(self, name, filename=None, filter=None, topn=None):
         self.name = name
         self.topn = topn
         self.optests = defaultdict(list)
-        if Path(filename).is_dir():
+
+        # Use default URL if no filename provided
+        if filename is None:
+            filename = DEFAULT_HUGGINGFACE_URL
+
+        # Check if filename is a URL
+        if isinstance(filename, str) and (
+            filename.startswith("http://") or filename.startswith("https://")
+        ):
+            with (
+                tempfile.NamedTemporaryFile(mode="w+", suffix=".txt", delete=False) as tmp_file,
+                requests.get(filename) as response,
+            ):
+                response.raise_for_status()
+                tmp_file.write(response.text)
+                tmp_file.flush()
+                _parse_inputs(tmp_file.name, filter, self.optests)
+                Path(tmp_file.name).unlink(missing_ok=True)
+        elif Path(filename).is_dir():
             for file_path in Path(filename).glob("**/*.txt"):
                 _parse_inputs(str(file_path), filter, self.optests)
         else:
@@ -148,6 +172,8 @@ def __iter__(self):
                     "native_layer_norm_backward",
                     "upsample_nearest2d_backward.vec",
                     "upsample_bilinear2d_backward.vec",
+                    "_cudnn_rnn_backward.default",  # RuntimeError: cuDNN error: CUDNN_STATUS_BAD_PARAM
+                    "_fft_c2c.default",  # cuFFT only supports dimensions whose sizes are powers of two when computing in half precision
                 ]
             ):
                 # TODO: indexing ops need valid indices
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,4 +1,4 @@
 [tool.ruff]
 line-length = 100
 
-[tool.ruff.format]
+[tool.ruff.format]
diff --git a/requirements.txt b/requirements.txt
@@ -3,4 +3,5 @@ click
 numpy
 expecttest
 anthropic>=0.34.0
-pytest
+pytest
+requests
diff --git a/scripts/main.py b/scripts/main.py
@@ -10,7 +10,7 @@
 from BackendBench.llm_client import ClaudeKernelGenerator
 from BackendBench.opinfo_suite import OpInfoTestSuite
 from BackendBench.suite import SmokeTestSuite
-from BackendBench.torchbench_suite import TorchBenchTestSuite
+from BackendBench.torchbench_suite import DEFAULT_HUGGINGFACE_URL, TorchBenchTestSuite
 
 logger = logging.getLogger(__name__)
 
@@ -80,7 +80,7 @@ def setup_logging(log_level):
 )
 @click.option(
     "--torchbench-data-path",
-    default="third_party/tritonbench/tritonbench/data/input_configs",
+    default=DEFAULT_HUGGINGFACE_URL,
     type=str,
     help="Path to TorchBench operator data",
 )