diff --git a/BackendBench/huggingface_tracer/README.md b/BackendBench/huggingface_tracer/README.md
new file mode 100644
index 0000000..6461932
--- /dev/null
+++ b/BackendBench/huggingface_tracer/README.md
@@ -0,0 +1,87 @@
+# Sample Inputs Schema
+
+This directory contains outputs of the huggingface tracer  which store traced PyTorch operation inputs from HuggingFace models.
+
+'[hf_op_trace.json](https://huggingface.co/datasets/GPUMODE/huggingface_op_trace/resolve/main/hf_op_trace.json)' contains an example of what these look like with the outputs from 20 models.
+
+## Schema Structure
+
+```json
+{
+  "operation_name": {
+    "total_calls": <int>,
+    "unique_input_count": <int>,
+    "unique_inputs": [
+      {
+        "op_name": "<operation_name>",
+        "input_shapes": [<shape_or_null>, ...],
+        "input_dtypes": ["<dtype_string>", ...],
+        "non_tensor_inputs": [<value_or_null_or_tensor_list_ref>, ...],
+        "tensor_lists": {<tensor_list_metadata>},
+        "count": <int>
+      }
+    ]
+  }
+}
+```
+
+## Field Descriptions
+
+- **`input_shapes`**: List of tensor shapes (e.g., `[1, 3, 224, 224]`) or `null` for non-tensor inputs
+- **`input_dtypes`**: List of type strings (e.g., `"torch.float32"`, `"<class 'int'>"`, `"<class 'list'>"`
+- **`non_tensor_inputs`**: Actual non-tensor values, `null` for tensors, or `{"tensor_list_ref": <id>}` for tensor lists
+- **`tensor_lists`**: Metadata for tensor lists, keyed by string IDs:
+  ```json
+  {
+    "0": {
+      "length": <int>,
+      "shapes": [[<shape>], ...],
+      "dtypes": ["<dtype>", ...]
+    }
+  }
+  ```
+- **`count`**: Frequency of this input combination in the traced data
+
+**Note**: All dtypes (in input_dtypes and tensor_lists) are strings, not Python types (e.g., `torch.float32` instead of `float32`) as they are serialized in the JSON file. They should be converted to Python types before use.
+
+## Examples
+
+**Simple tensor input:**
+```json
+"input_shapes": [[2, 13]],
+"input_dtypes": ["torch.int64"],
+"non_tensor_inputs": [null]
+```
+
+**Tensor list input:**
+```json
+"input_shapes": [null, null],
+"input_dtypes": ["<class 'list'>", "<class 'int'>"],
+"non_tensor_inputs": [{"tensor_list_ref": 0}, 1],
+"tensor_lists": {
+  "0": {
+    "length": 3,
+    "shapes": [[1, 128, 20, 20], [1, 128, 20, 20], [1, 128, 20, 20]],
+    "dtypes": ["torch.float32", "torch.float32", "torch.float32"]
+  }
+}
+```
+
+**Example entry with non-tensor inputs**
+```json
+"convolution.default": {
+  "total_calls": 108,
+  "unique_input_count": 67,
+  "unique_inputs": [
+    {
+      "op_name": "convolution.default",
+      "input_shapes": [[1, 256, 14, 14], [1024, 256, 1, 1], null, null, null, null, null, null, null],
+      "input_dtypes": ["torch.float32", "torch.float32", "<class 'NoneType'>", "<class 'list'>", "<class 'list'>", "<class 'list'>", "<class 'bool'>", "<class 'list'>", "<class 'int'>"],
+      "non_tensor_inputs": [null, null, null, [1, 1], [0, 0], [1, 1], false, [0, 0], 1],
+      "tensor_lists": {},
+      "count": 6
+    },
+    ...
+  ]
+  }
+```
diff --git a/BackendBench/huggingface_tracer/__init__.py b/BackendBench/huggingface_tracer/__init__.py
new file mode 100644
index 0000000..8054c98
--- /dev/null
+++ b/BackendBench/huggingface_tracer/__init__.py
@@ -0,0 +1,20 @@
+"""
+HuggingFace Tracer Test Suite Package.
+
+This package provides functionality for creating and running test suites
+based on HuggingFace tracer data.
+"""
+
+from .suite import (
+    build_huggingface_tracer_tests,
+    HuggingFaceTracerOpTest,
+    HuggingFaceTracerTest,
+    HuggingFaceTracerTestSuite,
+)
+
+__all__ = [
+    "HuggingFaceTracerTest",
+    "HuggingFaceTracerOpTest",
+    "HuggingFaceTracerTestSuite",
+    "build_huggingface_tracer_tests",
+]
diff --git a/BackendBench/huggingface_tracer/manual_ops_mapping.json b/BackendBench/huggingface_tracer/manual_ops_mapping.json
new file mode 100644
index 0000000..f9d8843
--- /dev/null
+++ b/BackendBench/huggingface_tracer/manual_ops_mapping.json
@@ -0,0 +1,74 @@
+{
+    "lift_fresh": {
+        "cpu": ["torch.float32", "torch.int64", "torch.uint8"],
+        "cuda": ["torch.float32", "torch.int64", "torch.uint8"]
+    },
+    "_to_copy": {
+        "cpu": ["torch.bool", "torch.float32", "torch.int64", "torch.uint8"],
+        "cuda": ["torch.bool", "torch.float32", "torch.int64", "torch.uint8"]
+    },
+    "convolution": {
+        "cpu": ["torch.float32"],
+        "cuda": ["torch.float32"]
+    },
+    "_scaled_dot_product_efficient_attention": {
+        "cpu": ["torch.float32"],
+        "cuda": ["torch.float32"]
+    },
+    "detach": {
+        "cpu": ["torch.float32"],
+        "cuda": ["torch.float32"]
+    },
+    "_has_compatible_shallow_copy_type": {
+        "cpu": ["torch.float32"],
+        "cuda": ["torch.float32"]
+    },
+    "detach_": {
+        "cpu": ["torch.int64"],
+        "cuda": ["torch.int64"]
+    },
+    "is_nonzero": {
+        "cpu": ["torch.bool"],
+        "cuda": ["torch.bool"]
+    },
+    "linalg_vector_norm": {
+        "cpu": ["torch.float32"],
+        "cuda": ["torch.float32"]
+    },
+    "_local_scalar_dense": {
+        "cpu": ["torch.bool", "torch.float32"],
+        "cuda": ["torch.bool", "torch.float32"]
+    },
+    "cudnn_batch_norm": {
+        "cpu": ["torch.float32"],
+        "cuda": ["torch.float32"]
+    },
+    "max_pool2d_with_indices": {
+        "cpu": ["torch.float32"],
+        "cuda": ["torch.float32"]
+    },
+    "copy_": {
+        "cpu": ["torch.float32"],
+        "cuda": ["torch.float32"]
+    },
+    "upsample_nearest2d": {
+        "cpu": ["torch.float32"],
+        "cuda": ["torch.float32"]
+    },
+    "alias": {
+        "cpu": ["torch.float32"],
+        "cuda": ["torch.float32"]
+    },
+    "type_as": {
+        "cpu": ["torch.int32", "torch.int64"],
+        "cuda": ["torch.int32", "torch.int64"]
+    },
+    "_scaled_dot_product_flash_attention_for_cpu": {
+        "cpu": ["torch.float32"],
+        "cuda": []
+    },
+    "_softmax": {
+        "cpu": ["torch.float32"],
+        "cuda": ["torch.float32"]
+    }
+}
diff --git a/BackendBench/huggingface_tracer/suite.py b/BackendBench/huggingface_tracer/suite.py
new file mode 100644
index 0000000..08d1663
--- /dev/null
+++ b/BackendBench/huggingface_tracer/suite.py
@@ -0,0 +1,344 @@
+"""
+HuggingFace Tracer Test Suite.
+
+This module provides test suite functionality for HuggingFace tracer data,
+including test classes and the main test suite implementation.
+"""
+
+import logging
+import os
+from typing import Any, Dict, List, Optional
+
+import torch
+
+from BackendBench.suite import OpTest, TestSuite
+from torch.testing._internal.common_methods_invocations import op_db
+
+from .tracer_parser import (
+    create_single_tensor,
+    create_tensor_list,
+    load_json_data,
+    select_unique_inputs,
+    SPECIAL_CASES,
+)
+
+DEFAULT_JSON_SOURCE = (
+    "https://huggingface.co/datasets/GPUMODE/huggingface_op_trace/resolve/main/hf_op_trace.json"
+)
+logger = logging.getLogger(__name__)
+
+# todo: This is a manual mapping of the ops that are not supported by opinfo but are still present
+# in the huggingface models. This is a temporary solution until we have a better way of
+# handling these ops.
+
+MANUAL_OPS_FILE = "manual_ops_mapping.json"
+
+
+class HuggingFaceTracerTest:
+    """Test class for individual HuggingFace tracer test cases."""
+
+    def __init__(self, *args, **kwargs):
+        """
+        Initialize a tracer test case.
+
+        Args:
+            *args: Positional arguments for the test
+            **kwargs: Keyword arguments for the test
+        """
+        self.args = args
+        self.kwargs = kwargs
+
+
+class HuggingFaceTracerOpTest(OpTest):
+    """OpTest implementation for HuggingFace tracer data."""
+
+    def __init__(
+        self,
+        op_name: str,
+        selected_unique_inputs: List[Dict[str, Any]],
+        device: str = "cpu",
+        dtype: torch.dtype = torch.float32,
+    ):
+        """
+        Initialize an operation test.
+
+        Args:
+            op_name: Name of the PyTorch operation
+            selected_unique_inputs: List of selected input combinations
+            device: Device to run tests on
+            dtype: Default data type for tensors
+        """
+        self.op_name = op_name
+        self.op = self._get_torch_op(op_name)
+        self._selected_unique_inputs = selected_unique_inputs
+        self.performance_tests = []
+        self.device = device
+        self.dtype = dtype
+
+    def _get_torch_op(self, op_name: str):
+        """
+        Convert operator name to torch operation.
+
+        Args:
+            op_name: String name of the operation
+
+        Returns:
+            PyTorch operation object or None if not found
+        """
+        try:
+            # Handle common torch operation patterns
+            if "." in op_name:
+                parts = op_name.split(".")
+                if len(parts) == 2:
+                    op_base, overload = parts
+                    op_packet = getattr(torch.ops.aten, op_base)
+                    return getattr(op_packet, overload)
+            return getattr(torch.ops.aten, op_name)
+        except AttributeError:
+            logger.warning(f"Could not find torch operation for {op_name}")
+            return None
+
+    @property
+    def correctness_tests(self):
+        """Generate tests from selected unique_inputs."""
+        for combination in self._selected_unique_inputs:
+            args = self._convert_args_to_tensors(combination)
+            yield HuggingFaceTracerTest(*args)
+
+    def _convert_args_to_tensors(self, combination: Dict[str, Any]) -> List[Any]:
+        """
+        Convert JSON combination to actual tensor objects using new schema.
+
+        Args:
+            combination: Dictionary containing input metadata
+
+        Returns:
+            List of converted arguments (tensors and non-tensors)
+        """
+        input_shapes = combination["input_shapes"]
+        input_dtypes = combination["input_dtypes"]
+        non_tensor_inputs = combination["non_tensor_inputs"]
+        tensor_lists = combination.get("tensor_lists", {})
+
+        converted_args = []
+        logger.debug(f"Converting args for {self.op_name}: {combination}")
+
+        for i, (shape, dtype_str, non_tensor_input) in enumerate(
+            zip(input_shapes, input_dtypes, non_tensor_inputs)
+        ):
+            converted_arg = self._convert_single_arg(
+                shape, dtype_str, non_tensor_input, tensor_lists, i
+            )
+            converted_args.append(converted_arg)
+
+        return converted_args
+
+    def _convert_single_arg(
+        self,
+        shape: Any,
+        dtype_str: str,
+        non_tensor_input: Any,
+        tensor_lists: Dict[str, Any],
+        arg_index: int,
+    ) -> Any:
+        """
+        Convert a single argument from JSON representation to actual object.
+
+        Args:
+            shape: Shape information (list or None)
+            dtype_str: String representation of dtype
+            non_tensor_input: Non-tensor input value
+            tensor_lists: Dictionary of tensor list metadata
+            arg_index: Index of the argument for error reporting
+
+        Returns:
+            Converted argument (tensor, list of tensors, or other value)
+        """
+        if non_tensor_input is not None:
+            return self._handle_non_tensor_input(non_tensor_input, dtype_str, tensor_lists)
+        elif dtype_str == "<class 'NoneType'>":
+            return None
+        elif dtype_str == "<class 'list'>" and shape is None:
+            logger.warning(
+                f"Found <class 'list'> dtype but no tensor_list_ref for argument {arg_index}"
+            )
+            return []
+        else:
+            return self._handle_tensor_input(shape, dtype_str, arg_index)
+
+    def _handle_non_tensor_input(
+        self, non_tensor_input: Any, dtype_str: str, tensor_lists: Dict[str, Any]
+    ) -> Any:
+        """Handle non-tensor inputs including tensor list references."""
+        # Check if this is a tensor list reference
+        if isinstance(non_tensor_input, dict) and "tensor_list_ref" in non_tensor_input:
+            tensor_list_ref = str(non_tensor_input["tensor_list_ref"])
+            if tensor_list_ref in tensor_lists:
+                tensor_list_metadata = tensor_lists[tensor_list_ref]
+                return create_tensor_list(tensor_list_metadata, self.device, self.dtype)
+            else:
+                logger.warning(f"Tensor list reference {tensor_list_ref} not found in tensor_lists")
+                return []  # Empty list as fallback
+
+        # Handle torch.dtype conversion
+        elif dtype_str == "<class 'torch.dtype'>" and isinstance(non_tensor_input, str):
+            try:
+                return getattr(torch, non_tensor_input.replace("torch.", ""))
+            except AttributeError:
+                logger.warning(f"Could not convert {non_tensor_input} to torch dtype")
+                return non_tensor_input
+
+        # Regular non-tensor input
+        else:
+            return non_tensor_input
+
+    def _handle_tensor_input(self, shape: Any, dtype_str: str, arg_index: int) -> torch.Tensor:
+        """Handle tensor inputs."""
+        if isinstance(shape, list):
+            return create_single_tensor(shape, dtype_str, self.device, self.dtype)
+        else:
+            raise ValueError(
+                f"Invalid shape for tensor input {arg_index}: {shape}. Expected a list."
+            )
+
+
+def build_huggingface_tracer_tests(
+    json_source: str,
+    op_filter: Optional[List[str]] = None,
+    device: str = "cpu",
+    dtype: torch.dtype = torch.float32,
+) -> List[HuggingFaceTracerOpTest]:
+    """
+    Build HuggingFace tracer tests from JSON data.
+
+    Args:
+        json_source: Path to JSON file or URL containing operator data
+        op_filter: Optional list of operator names to include (None = include all)
+        device: Device to run tests on (e.g., "cuda", "cpu")
+        dtype: Default data type for tensors
+
+    Returns:
+        List of HuggingFaceTracerOpTest objects
+    """
+    data = load_json_data(json_source)
+
+    op_tests = []
+
+    # create op_info mapping to test dtypes
+    op_dtype_filter = {op.name.split(".")[-1]: op.supported_dtypes(device) for op in op_db}
+    manual_ops = load_json_data(os.path.join(os.path.dirname(__file__), MANUAL_OPS_FILE))
+    for op in manual_ops:
+        dtype_list = manual_ops[op].get(device, [])
+        # convert to set to match with op_info datatype
+        ops_set = set()
+        for dtype_str in dtype_list:
+            # Convert string representation to actual torch dtype
+            if dtype_str.startswith("torch."):
+                dtype_obj = getattr(torch, dtype_str.replace("torch.", ""))
+                ops_set.add(dtype_obj)
+
+        # this might not be true, but inplace ops and normal ops should support the same dtypes
+        # todo: confirm the above
+
+        if op[-1] == "_":
+            op = op[:-1]
+        op_dtype_filter[op] = ops_set
+    logging.info(f"op_dtype_filter: {op_dtype_filter}")
+
+    skipped_no_op_info = []
+    skipped_no_dtype_tests = []
+
+    for op in op_dtype_filter:
+        logger.debug(f"op: {op}, dtypes: {op_dtype_filter[op]}")
+
+    for op_name, op_data in data.items():
+        # Apply filter if provided
+        if op_filter and op_name not in op_filter:
+            continue
+        if op_name in SPECIAL_CASES:
+            logger.warning(f"Skipping special case op {op_name}")
+            continue
+
+        # this might not be true, but inplace ops and normal ops should support the same dtypes
+        # todo: confirm the above
+        op_name_no_overload = op_name.split(".")[0]
+        if op_name_no_overload[-1] == "_":
+            op_name_no_overload = op_name_no_overload[:-1]
+        # Skip if no op_info
+        if op_name_no_overload not in op_dtype_filter:
+            logger.warning(
+                f"Skipping {op_name}: op not found in op_info we should add these manually later"
+            )
+            skipped_no_op_info.append(op_name)
+            continue
+        # Skip if no unique_inputs
+        if "unique_inputs" not in op_data or not op_data["unique_inputs"]:
+            logger.debug(f"Skipping {op_name}: no unique_inputs found")
+            continue
+        # Skip if no supported dtypes
+        if dtype not in op_dtype_filter[op_name_no_overload]:
+            logger.debug(f"Skipping {op_name}: dtype {dtype} not supported according to op_info")
+            skipped_no_dtype_tests.append(op_name)
+            continue
+
+        # Select best unique_inputs
+        selected_unique_inputs = select_unique_inputs(op_data["unique_inputs"], dtype)
+
+        if selected_unique_inputs or len(selected_unique_inputs) > 0:
+            op_test = HuggingFaceTracerOpTest(
+                op_name, selected_unique_inputs, device=device, dtype=dtype
+            )
+            op_tests.append(op_test)
+            logger.debug(
+                f"Created test for {op_name} with {len(selected_unique_inputs)} unique_inputs on {device}"
+            )
+        else:
+            logger.debug(f"Skipping {op_name}: no unique_inputs found for dtype {dtype}")
+            skipped_no_dtype_tests.append(op_name)
+
+    logger.info(f"While building tests, skipped {len(skipped_no_op_info)} ops with no op_info")
+    logger.info(
+        f"While building tests, skipped {len(skipped_no_dtype_tests)} ops with no dtype tests"
+    )
+    logger.info(
+        "Skipped ops with no op_info or were manually added: \n" + "\n".join(skipped_no_op_info)
+    )
+    logger.info(
+        f"Skipped ops as they don't support testing {dtype} on {device}: \n"
+        + "\n".join(skipped_no_dtype_tests)
+    )
+
+    return op_tests
+
+
+class HuggingFaceTracerTestSuite(TestSuite):
+    """Test suite for HuggingFace tracer data."""
+
+    def __init__(
+        self,
+        name: str,
+        device: str,
+        dtype: torch.dtype,
+        json_source: str = DEFAULT_JSON_SOURCE,
+        filter: Optional[List[str]] = None,
+    ):
+        """
+        Initialize HuggingFace tracer test suite.
+
+        Args:
+            name: Name of the test suite
+            device: Device to run tests on (e.g., "cuda", "cpu")
+            dtype: Default data type for tensors
+            json_source: Path to JSON file or URL containing operator data (defaults to HuggingFace dataset)
+            filter: Optional list of operator names to include
+        """
+        self.device = device
+        self.dtype = dtype
+
+        op_tests = build_huggingface_tracer_tests(json_source, filter, device, dtype)
+        super().__init__(name, op_tests)
+
+        logger.info(
+            f"Created HuggingFace tracer suite '{name}' with {len(op_tests)} "
+            f"operator tests on {device} with dtype {dtype}"
+        )
diff --git a/BackendBench/huggingface_tracer/tracer_parser.py b/BackendBench/huggingface_tracer/tracer_parser.py
new file mode 100644
index 0000000..a2b3612
--- /dev/null
+++ b/BackendBench/huggingface_tracer/tracer_parser.py
@@ -0,0 +1,240 @@
+"""
+Helper module for parsing HuggingFace tracer data.
+
+This module contains utilities for loading, processing, and selecting
+unique inputs from HuggingFace tracer JSON data.
+"""
+
+import json
+import logging
+from typing import Any, Dict, List
+from urllib.parse import urlparse
+from urllib.request import urlopen
+
+import torch
+
+logger = logging.getLogger(__name__)
+
+# Operations that require special handling due to input constraints
+# These ops have requirements on inputs that make randomized tensors unsuitable
+SPECIAL_CASES = {
+    "embedding.default",  # requires second arg tensor to describe dims of first arg
+    "index.Tensor",  # requires list of tensors with indices within bounds of first arg
+    "meshgrid.indexing",  # requires last argument to be indexing method string
+    "empty_like.default",  # correctness testing doesn't make sense without special handling
+}
+
+
+def load_json_data(json_source: str) -> Dict[str, Any]:
+    """
+    Load operator data from JSON file or URL.
+
+    Args:
+        json_source: Path to JSON file or URL containing operator data
+
+    Returns:
+        Dictionary containing the loaded JSON data
+
+    Raises:
+        FileNotFoundError: If the JSON file doesn't exist
+        json.JSONDecodeError: If the JSON format is invalid
+        Exception: If URL cannot be accessed
+    """
+    # Check if the source is a URL
+    parsed_url = urlparse(json_source)
+    if parsed_url.scheme in ("http", "https"):
+        try:
+            logger.info(f"Loading JSON data from URL: {json_source}")
+            with urlopen(json_source) as response:
+                data = response.read().decode("utf-8")
+                return json.loads(data)
+        except Exception as e:
+            logger.error(f"Failed to load JSON from URL {json_source}: {e}")
+            raise
+    else:
+        # Handle as local file path
+        try:
+            logger.info(f"Loading JSON data from local file: {json_source}")
+            with open(json_source, "r") as f:
+                return json.load(f)
+        except FileNotFoundError:
+            logger.error(f"JSON file not found: {json_source}")
+            raise
+        except json.JSONDecodeError as e:
+            logger.error(f"Invalid JSON format in {json_source}: {e}")
+            raise
+
+
+def calculate_tensor_shape_magnitude(combination: Dict[str, Any]) -> float:
+    """
+    Calculate a magnitude metric for tensor arguments to determine 'largest'.
+
+    Args:
+        combination: Dictionary containing input_shapes and other metadata
+
+    Returns:
+        Float representing the total "magnitude" (product of all tensor dimensions) from the shape
+    """
+    total_magnitude = 0.0
+    input_shapes = combination["input_shapes"]
+
+    for shape in input_shapes:
+        if isinstance(shape, list) and len(shape) > 0 and all(isinstance(x, int) for x in shape):
+            # Calculate product of dimensions (total tensor size)
+            magnitude = 1
+            for dim in shape:
+                magnitude *= dim
+            total_magnitude += magnitude
+
+    return total_magnitude
+
+
+def select_unique_inputs(
+    unique_inputs: List[Dict[str, Any]],
+    dtype,
+    max_popular: int = 5,
+    max_largest: int = 5,
+) -> List[Dict[str, Any]]:
+    """
+    Select the most relevant unique inputs based on popularity and size.
+
+    Selects up to max_popular most popular unique_inputs and max_largest
+    largest unique_inputs, ensuring uniqueness by avoiding duplicates.
+
+    Args:
+        unique_inputs: List of unique input combinations
+        dtype: Data type to use for tensors, we will filter to only those with this dtype
+        max_popular: Maximum number of popular inputs to select
+        max_largest: Maximum number of largest inputs to select
+
+    Returns:
+        List of selected unique input combinations
+    """
+
+    # Filter to only those with the specified dtype in the cases of tensors
+    for input in unique_inputs:
+        for tensor_dtype in input["input_dtypes"]:
+            if tensor_dtype.startswith("torch.") and tensor_dtype != str(dtype):
+                continue
+        for _, entry in input["tensor_lists"].items():
+            for tensor_dtype in entry["dtypes"]:
+                # all types should be tensors already
+                if tensor_dtype != str(dtype):
+                    continue
+
+    # Sort by count (popularity) descending
+    popular_unique_inputs = sorted(unique_inputs, key=lambda x: x["count"], reverse=True)[
+        :max_popular
+    ]
+
+    # Sort by magnitude descending
+    largest_unique_inputs = sorted(
+        unique_inputs,
+        key=lambda x: calculate_tensor_shape_magnitude(x),
+        reverse=True,
+    )
+
+    # Create set of selected unique_inputs (using input_shapes as key for uniqueness)
+    selected = {}
+
+    # Add popular unique_inputs first
+    for combo in popular_unique_inputs:
+        key = str(combo["input_shapes"])  # Use string representation as key
+        selected[key] = combo
+
+    # Add largest unique_inputs, skipping duplicates
+    for combo in largest_unique_inputs:
+        key = str(combo["input_shapes"])
+        if key not in selected:
+            selected[key] = combo
+        if len(selected) >= max_popular + max_largest:
+            break
+
+    return list(selected.values())
+
+
+def create_single_tensor(
+    shape: List[int],
+    dtype_str: str,
+    device: str = "cpu",
+    default_dtype: torch.dtype = torch.float32,
+) -> torch.Tensor:
+    """
+    Create a single tensor with the given shape and dtype.
+
+    Args:
+        shape: List of integers representing tensor dimensions
+        dtype_str: String representation of the desired dtype
+        device: Device to create tensor on
+        default_dtype: Fallback dtype if conversion fails
+
+    Returns:
+        PyTorch tensor with specified properties
+    """
+    # Convert dtype string to actual torch dtype
+    torch_dtype = default_dtype
+    if dtype_str and isinstance(dtype_str, str):
+        try:
+            if dtype_str.startswith("torch."):
+                dtype_name = dtype_str.replace("torch.", "")
+                torch_dtype = getattr(torch, dtype_name)
+        except AttributeError:
+            logger.warning(f"Could not convert {dtype_str} to torch dtype, using {torch_dtype}")
+
+    # Create tensor with appropriate method based on dtype
+    if torch_dtype in [torch.float16, torch.float32, torch.float64, torch.bfloat16]:
+        # Floating point types - use randn
+        tensor = torch.randn(shape, dtype=torch_dtype, device=device)
+    elif torch_dtype in [
+        torch.int8,
+        torch.int16,
+        torch.int32,
+        torch.int64,
+        torch.uint8,
+    ]:
+        # Integer types - use randint with reasonable range
+        tensor = torch.randint(0, 10, shape, dtype=torch_dtype, device=device)
+    elif torch_dtype == torch.bool:
+        # Boolean type - use randint and cast to bool
+        tensor = torch.randint(0, 2, shape, dtype=torch.uint8, device=device).bool()
+    elif torch_dtype in [torch.complex64, torch.complex128]:
+        # Complex types - create from real and imaginary parts
+        real_dtype = torch.float32 if torch_dtype == torch.complex64 else torch.float64
+        real_part = torch.randn(shape, dtype=real_dtype, device=device)
+        imag_part = torch.randn(shape, dtype=real_dtype, device=device)
+        tensor = torch.complex(real_part, imag_part)
+    else:
+        raise ValueError(f"Unsupported dtype: {dtype_str}")
+
+    return tensor
+
+
+def create_tensor_list(
+    tensor_list_metadata: Dict[str, Any],
+    device: str = "cpu",
+    default_dtype: torch.dtype = torch.float32,
+) -> List[torch.Tensor]:
+    """
+    Create a list of tensors from tensor list metadata.
+
+    Args:
+        tensor_list_metadata: Dictionary containing length, shapes, and dtypes
+        device: Device to create tensors on
+        default_dtype: Fallback dtype if conversion fails
+
+    Returns:
+        List of PyTorch tensors
+    """
+    length = tensor_list_metadata["length"]
+    shapes = tensor_list_metadata["shapes"]
+    dtypes = tensor_list_metadata["dtypes"]
+
+    tensor_list = []
+    for j in range(length):
+        # Use last shape/dtype if not enough provided
+        shape = shapes[j] if j < len(shapes) else shapes[-1]
+        dtype_str = dtypes[j] if j < len(dtypes) else dtypes[-1]
+        tensor = create_single_tensor(shape, dtype_str, device, default_dtype)
+        tensor_list.append(tensor)
+
+    return tensor_list
diff --git a/scripts/main.py b/scripts/main.py
index 3aa1d62..b77dc19 100644
--- a/scripts/main.py
+++ b/scripts/main.py
@@ -7,18 +7,33 @@
 import BackendBench.eval as eval
 import click
 import torch
+from BackendBench.huggingface_tracer import HuggingFaceTracerTestSuite
+from BackendBench.llm_client import ClaudeKernelGenerator
 from BackendBench.opinfo_suite import OpInfoTestSuite
 from BackendBench.suite import SmokeTestSuite
-from BackendBench.llm_client import ClaudeKernelGenerator
 
 logger = logging.getLogger(__name__)
 
 
+def setup_logging():
+    """Setup logging configuration."""
+    logging_level = os.environ.get("LOG_LEVEL", "WARNING")
+    numeric_level = getattr(logging, logging_level.upper(), None)
+    if not isinstance(numeric_level, int):
+        raise ValueError(f"Invalid log level: {logging_level}")
+
+    logging.basicConfig(
+        level=numeric_level,
+        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+        datefmt="%Y-%m-%d %H:%M:%S",
+    )
+
+
 @click.command()
 @click.option(
     "--suite",
     default="smoke",
-    type=click.Choice(["smoke", "opinfo"]),
+    type=click.Choice(["smoke", "opinfo", "huggingface"]),
     help="Which suite to run",
 )
 @click.option(
@@ -40,6 +55,8 @@
     help="Maximum attempts for LLM kernel generation with feedback",
 )
 def cli(suite, backend, ops, llm_max_attempts):
+    # Setup logging first
+    setup_logging()
     if ops:
         ops = ops.split(",")
 
@@ -62,11 +79,16 @@ def cli(suite, backend, ops, llm_max_attempts):
             torch.bfloat16,
             filter=ops,
         ),
+        "huggingface": lambda: HuggingFaceTracerTestSuite(
+            name="huggingface_tracer_cuda_bfloat16",
+            device="cuda",
+            dtype=torch.float32,
+            filter=ops,
+        ),
     }[suite]()
 
     overall_correctness = []
     overall_performance = []
-
     for test in suite:
         if test.op not in backend:
             continue
@@ -102,6 +124,13 @@ def setup_llm_backend(llm_backend, llm_client, suite_name, ops_filter, max_attem
                 torch.bfloat16,
                 filter=ops_filter,
             )
+        elif suite_name == "huggingface":
+            suite = HuggingFaceTracerTestSuite(
+                name="huggingface_tracer_cuda_float32",
+                device="cuda",
+                dtype=torch.float32,
+                filter=ops_filter,
+            )
         else:
             raise ValueError(f"Unknown suite: {suite_name}")