diff --git a/BackendBench/huggingface_tracer/README.md b/BackendBench/huggingface_tracer/README.md new file mode 100644 index 0000000..6461932 --- /dev/null +++ b/BackendBench/huggingface_tracer/README.md @@ -0,0 +1,87 @@ +# Sample Inputs Schema + +This directory contains outputs of the huggingface tracer which store traced PyTorch operation inputs from HuggingFace models. + +'[hf_op_trace.json](https://huggingface.co/datasets/GPUMODE/huggingface_op_trace/resolve/main/hf_op_trace.json)' contains an example of what these look like with the outputs from 20 models. + +## Schema Structure + +```json +{ + "operation_name": { + "total_calls": , + "unique_input_count": , + "unique_inputs": [ + { + "op_name": "", + "input_shapes": [, ...], + "input_dtypes": ["", ...], + "non_tensor_inputs": [, ...], + "tensor_lists": {}, + "count": + } + ] + } +} +``` + +## Field Descriptions + +- **`input_shapes`**: List of tensor shapes (e.g., `[1, 3, 224, 224]`) or `null` for non-tensor inputs +- **`input_dtypes`**: List of type strings (e.g., `"torch.float32"`, `""`, `""` +- **`non_tensor_inputs`**: Actual non-tensor values, `null` for tensors, or `{"tensor_list_ref": }` for tensor lists +- **`tensor_lists`**: Metadata for tensor lists, keyed by string IDs: + ```json + { + "0": { + "length": , + "shapes": [[], ...], + "dtypes": ["", ...] + } + } + ``` +- **`count`**: Frequency of this input combination in the traced data + +**Note**: All dtypes (in input_dtypes and tensor_lists) are strings, not Python types (e.g., `torch.float32` instead of `float32`) as they are serialized in the JSON file. They should be converted to Python types before use. + +## Examples + +**Simple tensor input:** +```json +"input_shapes": [[2, 13]], +"input_dtypes": ["torch.int64"], +"non_tensor_inputs": [null] +``` + +**Tensor list input:** +```json +"input_shapes": [null, null], +"input_dtypes": ["", ""], +"non_tensor_inputs": [{"tensor_list_ref": 0}, 1], +"tensor_lists": { + "0": { + "length": 3, + "shapes": [[1, 128, 20, 20], [1, 128, 20, 20], [1, 128, 20, 20]], + "dtypes": ["torch.float32", "torch.float32", "torch.float32"] + } +} +``` + +**Example entry with non-tensor inputs** +```json +"convolution.default": { + "total_calls": 108, + "unique_input_count": 67, + "unique_inputs": [ + { + "op_name": "convolution.default", + "input_shapes": [[1, 256, 14, 14], [1024, 256, 1, 1], null, null, null, null, null, null, null], + "input_dtypes": ["torch.float32", "torch.float32", "", "", "", "", "", "", ""], + "non_tensor_inputs": [null, null, null, [1, 1], [0, 0], [1, 1], false, [0, 0], 1], + "tensor_lists": {}, + "count": 6 + }, + ... + ] + } +``` diff --git a/BackendBench/huggingface_tracer/__init__.py b/BackendBench/huggingface_tracer/__init__.py new file mode 100644 index 0000000..8054c98 --- /dev/null +++ b/BackendBench/huggingface_tracer/__init__.py @@ -0,0 +1,20 @@ +""" +HuggingFace Tracer Test Suite Package. + +This package provides functionality for creating and running test suites +based on HuggingFace tracer data. +""" + +from .suite import ( + build_huggingface_tracer_tests, + HuggingFaceTracerOpTest, + HuggingFaceTracerTest, + HuggingFaceTracerTestSuite, +) + +__all__ = [ + "HuggingFaceTracerTest", + "HuggingFaceTracerOpTest", + "HuggingFaceTracerTestSuite", + "build_huggingface_tracer_tests", +] diff --git a/BackendBench/huggingface_tracer/manual_ops_mapping.json b/BackendBench/huggingface_tracer/manual_ops_mapping.json new file mode 100644 index 0000000..f9d8843 --- /dev/null +++ b/BackendBench/huggingface_tracer/manual_ops_mapping.json @@ -0,0 +1,74 @@ +{ + "lift_fresh": { + "cpu": ["torch.float32", "torch.int64", "torch.uint8"], + "cuda": ["torch.float32", "torch.int64", "torch.uint8"] + }, + "_to_copy": { + "cpu": ["torch.bool", "torch.float32", "torch.int64", "torch.uint8"], + "cuda": ["torch.bool", "torch.float32", "torch.int64", "torch.uint8"] + }, + "convolution": { + "cpu": ["torch.float32"], + "cuda": ["torch.float32"] + }, + "_scaled_dot_product_efficient_attention": { + "cpu": ["torch.float32"], + "cuda": ["torch.float32"] + }, + "detach": { + "cpu": ["torch.float32"], + "cuda": ["torch.float32"] + }, + "_has_compatible_shallow_copy_type": { + "cpu": ["torch.float32"], + "cuda": ["torch.float32"] + }, + "detach_": { + "cpu": ["torch.int64"], + "cuda": ["torch.int64"] + }, + "is_nonzero": { + "cpu": ["torch.bool"], + "cuda": ["torch.bool"] + }, + "linalg_vector_norm": { + "cpu": ["torch.float32"], + "cuda": ["torch.float32"] + }, + "_local_scalar_dense": { + "cpu": ["torch.bool", "torch.float32"], + "cuda": ["torch.bool", "torch.float32"] + }, + "cudnn_batch_norm": { + "cpu": ["torch.float32"], + "cuda": ["torch.float32"] + }, + "max_pool2d_with_indices": { + "cpu": ["torch.float32"], + "cuda": ["torch.float32"] + }, + "copy_": { + "cpu": ["torch.float32"], + "cuda": ["torch.float32"] + }, + "upsample_nearest2d": { + "cpu": ["torch.float32"], + "cuda": ["torch.float32"] + }, + "alias": { + "cpu": ["torch.float32"], + "cuda": ["torch.float32"] + }, + "type_as": { + "cpu": ["torch.int32", "torch.int64"], + "cuda": ["torch.int32", "torch.int64"] + }, + "_scaled_dot_product_flash_attention_for_cpu": { + "cpu": ["torch.float32"], + "cuda": [] + }, + "_softmax": { + "cpu": ["torch.float32"], + "cuda": ["torch.float32"] + } +} diff --git a/BackendBench/huggingface_tracer/suite.py b/BackendBench/huggingface_tracer/suite.py new file mode 100644 index 0000000..08d1663 --- /dev/null +++ b/BackendBench/huggingface_tracer/suite.py @@ -0,0 +1,344 @@ +""" +HuggingFace Tracer Test Suite. + +This module provides test suite functionality for HuggingFace tracer data, +including test classes and the main test suite implementation. +""" + +import logging +import os +from typing import Any, Dict, List, Optional + +import torch + +from BackendBench.suite import OpTest, TestSuite +from torch.testing._internal.common_methods_invocations import op_db + +from .tracer_parser import ( + create_single_tensor, + create_tensor_list, + load_json_data, + select_unique_inputs, + SPECIAL_CASES, +) + +DEFAULT_JSON_SOURCE = ( + "https://huggingface.co/datasets/GPUMODE/huggingface_op_trace/resolve/main/hf_op_trace.json" +) +logger = logging.getLogger(__name__) + +# todo: This is a manual mapping of the ops that are not supported by opinfo but are still present +# in the huggingface models. This is a temporary solution until we have a better way of +# handling these ops. + +MANUAL_OPS_FILE = "manual_ops_mapping.json" + + +class HuggingFaceTracerTest: + """Test class for individual HuggingFace tracer test cases.""" + + def __init__(self, *args, **kwargs): + """ + Initialize a tracer test case. + + Args: + *args: Positional arguments for the test + **kwargs: Keyword arguments for the test + """ + self.args = args + self.kwargs = kwargs + + +class HuggingFaceTracerOpTest(OpTest): + """OpTest implementation for HuggingFace tracer data.""" + + def __init__( + self, + op_name: str, + selected_unique_inputs: List[Dict[str, Any]], + device: str = "cpu", + dtype: torch.dtype = torch.float32, + ): + """ + Initialize an operation test. + + Args: + op_name: Name of the PyTorch operation + selected_unique_inputs: List of selected input combinations + device: Device to run tests on + dtype: Default data type for tensors + """ + self.op_name = op_name + self.op = self._get_torch_op(op_name) + self._selected_unique_inputs = selected_unique_inputs + self.performance_tests = [] + self.device = device + self.dtype = dtype + + def _get_torch_op(self, op_name: str): + """ + Convert operator name to torch operation. + + Args: + op_name: String name of the operation + + Returns: + PyTorch operation object or None if not found + """ + try: + # Handle common torch operation patterns + if "." in op_name: + parts = op_name.split(".") + if len(parts) == 2: + op_base, overload = parts + op_packet = getattr(torch.ops.aten, op_base) + return getattr(op_packet, overload) + return getattr(torch.ops.aten, op_name) + except AttributeError: + logger.warning(f"Could not find torch operation for {op_name}") + return None + + @property + def correctness_tests(self): + """Generate tests from selected unique_inputs.""" + for combination in self._selected_unique_inputs: + args = self._convert_args_to_tensors(combination) + yield HuggingFaceTracerTest(*args) + + def _convert_args_to_tensors(self, combination: Dict[str, Any]) -> List[Any]: + """ + Convert JSON combination to actual tensor objects using new schema. + + Args: + combination: Dictionary containing input metadata + + Returns: + List of converted arguments (tensors and non-tensors) + """ + input_shapes = combination["input_shapes"] + input_dtypes = combination["input_dtypes"] + non_tensor_inputs = combination["non_tensor_inputs"] + tensor_lists = combination.get("tensor_lists", {}) + + converted_args = [] + logger.debug(f"Converting args for {self.op_name}: {combination}") + + for i, (shape, dtype_str, non_tensor_input) in enumerate( + zip(input_shapes, input_dtypes, non_tensor_inputs) + ): + converted_arg = self._convert_single_arg( + shape, dtype_str, non_tensor_input, tensor_lists, i + ) + converted_args.append(converted_arg) + + return converted_args + + def _convert_single_arg( + self, + shape: Any, + dtype_str: str, + non_tensor_input: Any, + tensor_lists: Dict[str, Any], + arg_index: int, + ) -> Any: + """ + Convert a single argument from JSON representation to actual object. + + Args: + shape: Shape information (list or None) + dtype_str: String representation of dtype + non_tensor_input: Non-tensor input value + tensor_lists: Dictionary of tensor list metadata + arg_index: Index of the argument for error reporting + + Returns: + Converted argument (tensor, list of tensors, or other value) + """ + if non_tensor_input is not None: + return self._handle_non_tensor_input(non_tensor_input, dtype_str, tensor_lists) + elif dtype_str == "": + return None + elif dtype_str == "" and shape is None: + logger.warning( + f"Found dtype but no tensor_list_ref for argument {arg_index}" + ) + return [] + else: + return self._handle_tensor_input(shape, dtype_str, arg_index) + + def _handle_non_tensor_input( + self, non_tensor_input: Any, dtype_str: str, tensor_lists: Dict[str, Any] + ) -> Any: + """Handle non-tensor inputs including tensor list references.""" + # Check if this is a tensor list reference + if isinstance(non_tensor_input, dict) and "tensor_list_ref" in non_tensor_input: + tensor_list_ref = str(non_tensor_input["tensor_list_ref"]) + if tensor_list_ref in tensor_lists: + tensor_list_metadata = tensor_lists[tensor_list_ref] + return create_tensor_list(tensor_list_metadata, self.device, self.dtype) + else: + logger.warning(f"Tensor list reference {tensor_list_ref} not found in tensor_lists") + return [] # Empty list as fallback + + # Handle torch.dtype conversion + elif dtype_str == "" and isinstance(non_tensor_input, str): + try: + return getattr(torch, non_tensor_input.replace("torch.", "")) + except AttributeError: + logger.warning(f"Could not convert {non_tensor_input} to torch dtype") + return non_tensor_input + + # Regular non-tensor input + else: + return non_tensor_input + + def _handle_tensor_input(self, shape: Any, dtype_str: str, arg_index: int) -> torch.Tensor: + """Handle tensor inputs.""" + if isinstance(shape, list): + return create_single_tensor(shape, dtype_str, self.device, self.dtype) + else: + raise ValueError( + f"Invalid shape for tensor input {arg_index}: {shape}. Expected a list." + ) + + +def build_huggingface_tracer_tests( + json_source: str, + op_filter: Optional[List[str]] = None, + device: str = "cpu", + dtype: torch.dtype = torch.float32, +) -> List[HuggingFaceTracerOpTest]: + """ + Build HuggingFace tracer tests from JSON data. + + Args: + json_source: Path to JSON file or URL containing operator data + op_filter: Optional list of operator names to include (None = include all) + device: Device to run tests on (e.g., "cuda", "cpu") + dtype: Default data type for tensors + + Returns: + List of HuggingFaceTracerOpTest objects + """ + data = load_json_data(json_source) + + op_tests = [] + + # create op_info mapping to test dtypes + op_dtype_filter = {op.name.split(".")[-1]: op.supported_dtypes(device) for op in op_db} + manual_ops = load_json_data(os.path.join(os.path.dirname(__file__), MANUAL_OPS_FILE)) + for op in manual_ops: + dtype_list = manual_ops[op].get(device, []) + # convert to set to match with op_info datatype + ops_set = set() + for dtype_str in dtype_list: + # Convert string representation to actual torch dtype + if dtype_str.startswith("torch."): + dtype_obj = getattr(torch, dtype_str.replace("torch.", "")) + ops_set.add(dtype_obj) + + # this might not be true, but inplace ops and normal ops should support the same dtypes + # todo: confirm the above + + if op[-1] == "_": + op = op[:-1] + op_dtype_filter[op] = ops_set + logging.info(f"op_dtype_filter: {op_dtype_filter}") + + skipped_no_op_info = [] + skipped_no_dtype_tests = [] + + for op in op_dtype_filter: + logger.debug(f"op: {op}, dtypes: {op_dtype_filter[op]}") + + for op_name, op_data in data.items(): + # Apply filter if provided + if op_filter and op_name not in op_filter: + continue + if op_name in SPECIAL_CASES: + logger.warning(f"Skipping special case op {op_name}") + continue + + # this might not be true, but inplace ops and normal ops should support the same dtypes + # todo: confirm the above + op_name_no_overload = op_name.split(".")[0] + if op_name_no_overload[-1] == "_": + op_name_no_overload = op_name_no_overload[:-1] + # Skip if no op_info + if op_name_no_overload not in op_dtype_filter: + logger.warning( + f"Skipping {op_name}: op not found in op_info we should add these manually later" + ) + skipped_no_op_info.append(op_name) + continue + # Skip if no unique_inputs + if "unique_inputs" not in op_data or not op_data["unique_inputs"]: + logger.debug(f"Skipping {op_name}: no unique_inputs found") + continue + # Skip if no supported dtypes + if dtype not in op_dtype_filter[op_name_no_overload]: + logger.debug(f"Skipping {op_name}: dtype {dtype} not supported according to op_info") + skipped_no_dtype_tests.append(op_name) + continue + + # Select best unique_inputs + selected_unique_inputs = select_unique_inputs(op_data["unique_inputs"], dtype) + + if selected_unique_inputs or len(selected_unique_inputs) > 0: + op_test = HuggingFaceTracerOpTest( + op_name, selected_unique_inputs, device=device, dtype=dtype + ) + op_tests.append(op_test) + logger.debug( + f"Created test for {op_name} with {len(selected_unique_inputs)} unique_inputs on {device}" + ) + else: + logger.debug(f"Skipping {op_name}: no unique_inputs found for dtype {dtype}") + skipped_no_dtype_tests.append(op_name) + + logger.info(f"While building tests, skipped {len(skipped_no_op_info)} ops with no op_info") + logger.info( + f"While building tests, skipped {len(skipped_no_dtype_tests)} ops with no dtype tests" + ) + logger.info( + "Skipped ops with no op_info or were manually added: \n" + "\n".join(skipped_no_op_info) + ) + logger.info( + f"Skipped ops as they don't support testing {dtype} on {device}: \n" + + "\n".join(skipped_no_dtype_tests) + ) + + return op_tests + + +class HuggingFaceTracerTestSuite(TestSuite): + """Test suite for HuggingFace tracer data.""" + + def __init__( + self, + name: str, + device: str, + dtype: torch.dtype, + json_source: str = DEFAULT_JSON_SOURCE, + filter: Optional[List[str]] = None, + ): + """ + Initialize HuggingFace tracer test suite. + + Args: + name: Name of the test suite + device: Device to run tests on (e.g., "cuda", "cpu") + dtype: Default data type for tensors + json_source: Path to JSON file or URL containing operator data (defaults to HuggingFace dataset) + filter: Optional list of operator names to include + """ + self.device = device + self.dtype = dtype + + op_tests = build_huggingface_tracer_tests(json_source, filter, device, dtype) + super().__init__(name, op_tests) + + logger.info( + f"Created HuggingFace tracer suite '{name}' with {len(op_tests)} " + f"operator tests on {device} with dtype {dtype}" + ) diff --git a/BackendBench/huggingface_tracer/tracer_parser.py b/BackendBench/huggingface_tracer/tracer_parser.py new file mode 100644 index 0000000..a2b3612 --- /dev/null +++ b/BackendBench/huggingface_tracer/tracer_parser.py @@ -0,0 +1,240 @@ +""" +Helper module for parsing HuggingFace tracer data. + +This module contains utilities for loading, processing, and selecting +unique inputs from HuggingFace tracer JSON data. +""" + +import json +import logging +from typing import Any, Dict, List +from urllib.parse import urlparse +from urllib.request import urlopen + +import torch + +logger = logging.getLogger(__name__) + +# Operations that require special handling due to input constraints +# These ops have requirements on inputs that make randomized tensors unsuitable +SPECIAL_CASES = { + "embedding.default", # requires second arg tensor to describe dims of first arg + "index.Tensor", # requires list of tensors with indices within bounds of first arg + "meshgrid.indexing", # requires last argument to be indexing method string + "empty_like.default", # correctness testing doesn't make sense without special handling +} + + +def load_json_data(json_source: str) -> Dict[str, Any]: + """ + Load operator data from JSON file or URL. + + Args: + json_source: Path to JSON file or URL containing operator data + + Returns: + Dictionary containing the loaded JSON data + + Raises: + FileNotFoundError: If the JSON file doesn't exist + json.JSONDecodeError: If the JSON format is invalid + Exception: If URL cannot be accessed + """ + # Check if the source is a URL + parsed_url = urlparse(json_source) + if parsed_url.scheme in ("http", "https"): + try: + logger.info(f"Loading JSON data from URL: {json_source}") + with urlopen(json_source) as response: + data = response.read().decode("utf-8") + return json.loads(data) + except Exception as e: + logger.error(f"Failed to load JSON from URL {json_source}: {e}") + raise + else: + # Handle as local file path + try: + logger.info(f"Loading JSON data from local file: {json_source}") + with open(json_source, "r") as f: + return json.load(f) + except FileNotFoundError: + logger.error(f"JSON file not found: {json_source}") + raise + except json.JSONDecodeError as e: + logger.error(f"Invalid JSON format in {json_source}: {e}") + raise + + +def calculate_tensor_shape_magnitude(combination: Dict[str, Any]) -> float: + """ + Calculate a magnitude metric for tensor arguments to determine 'largest'. + + Args: + combination: Dictionary containing input_shapes and other metadata + + Returns: + Float representing the total "magnitude" (product of all tensor dimensions) from the shape + """ + total_magnitude = 0.0 + input_shapes = combination["input_shapes"] + + for shape in input_shapes: + if isinstance(shape, list) and len(shape) > 0 and all(isinstance(x, int) for x in shape): + # Calculate product of dimensions (total tensor size) + magnitude = 1 + for dim in shape: + magnitude *= dim + total_magnitude += magnitude + + return total_magnitude + + +def select_unique_inputs( + unique_inputs: List[Dict[str, Any]], + dtype, + max_popular: int = 5, + max_largest: int = 5, +) -> List[Dict[str, Any]]: + """ + Select the most relevant unique inputs based on popularity and size. + + Selects up to max_popular most popular unique_inputs and max_largest + largest unique_inputs, ensuring uniqueness by avoiding duplicates. + + Args: + unique_inputs: List of unique input combinations + dtype: Data type to use for tensors, we will filter to only those with this dtype + max_popular: Maximum number of popular inputs to select + max_largest: Maximum number of largest inputs to select + + Returns: + List of selected unique input combinations + """ + + # Filter to only those with the specified dtype in the cases of tensors + for input in unique_inputs: + for tensor_dtype in input["input_dtypes"]: + if tensor_dtype.startswith("torch.") and tensor_dtype != str(dtype): + continue + for _, entry in input["tensor_lists"].items(): + for tensor_dtype in entry["dtypes"]: + # all types should be tensors already + if tensor_dtype != str(dtype): + continue + + # Sort by count (popularity) descending + popular_unique_inputs = sorted(unique_inputs, key=lambda x: x["count"], reverse=True)[ + :max_popular + ] + + # Sort by magnitude descending + largest_unique_inputs = sorted( + unique_inputs, + key=lambda x: calculate_tensor_shape_magnitude(x), + reverse=True, + ) + + # Create set of selected unique_inputs (using input_shapes as key for uniqueness) + selected = {} + + # Add popular unique_inputs first + for combo in popular_unique_inputs: + key = str(combo["input_shapes"]) # Use string representation as key + selected[key] = combo + + # Add largest unique_inputs, skipping duplicates + for combo in largest_unique_inputs: + key = str(combo["input_shapes"]) + if key not in selected: + selected[key] = combo + if len(selected) >= max_popular + max_largest: + break + + return list(selected.values()) + + +def create_single_tensor( + shape: List[int], + dtype_str: str, + device: str = "cpu", + default_dtype: torch.dtype = torch.float32, +) -> torch.Tensor: + """ + Create a single tensor with the given shape and dtype. + + Args: + shape: List of integers representing tensor dimensions + dtype_str: String representation of the desired dtype + device: Device to create tensor on + default_dtype: Fallback dtype if conversion fails + + Returns: + PyTorch tensor with specified properties + """ + # Convert dtype string to actual torch dtype + torch_dtype = default_dtype + if dtype_str and isinstance(dtype_str, str): + try: + if dtype_str.startswith("torch."): + dtype_name = dtype_str.replace("torch.", "") + torch_dtype = getattr(torch, dtype_name) + except AttributeError: + logger.warning(f"Could not convert {dtype_str} to torch dtype, using {torch_dtype}") + + # Create tensor with appropriate method based on dtype + if torch_dtype in [torch.float16, torch.float32, torch.float64, torch.bfloat16]: + # Floating point types - use randn + tensor = torch.randn(shape, dtype=torch_dtype, device=device) + elif torch_dtype in [ + torch.int8, + torch.int16, + torch.int32, + torch.int64, + torch.uint8, + ]: + # Integer types - use randint with reasonable range + tensor = torch.randint(0, 10, shape, dtype=torch_dtype, device=device) + elif torch_dtype == torch.bool: + # Boolean type - use randint and cast to bool + tensor = torch.randint(0, 2, shape, dtype=torch.uint8, device=device).bool() + elif torch_dtype in [torch.complex64, torch.complex128]: + # Complex types - create from real and imaginary parts + real_dtype = torch.float32 if torch_dtype == torch.complex64 else torch.float64 + real_part = torch.randn(shape, dtype=real_dtype, device=device) + imag_part = torch.randn(shape, dtype=real_dtype, device=device) + tensor = torch.complex(real_part, imag_part) + else: + raise ValueError(f"Unsupported dtype: {dtype_str}") + + return tensor + + +def create_tensor_list( + tensor_list_metadata: Dict[str, Any], + device: str = "cpu", + default_dtype: torch.dtype = torch.float32, +) -> List[torch.Tensor]: + """ + Create a list of tensors from tensor list metadata. + + Args: + tensor_list_metadata: Dictionary containing length, shapes, and dtypes + device: Device to create tensors on + default_dtype: Fallback dtype if conversion fails + + Returns: + List of PyTorch tensors + """ + length = tensor_list_metadata["length"] + shapes = tensor_list_metadata["shapes"] + dtypes = tensor_list_metadata["dtypes"] + + tensor_list = [] + for j in range(length): + # Use last shape/dtype if not enough provided + shape = shapes[j] if j < len(shapes) else shapes[-1] + dtype_str = dtypes[j] if j < len(dtypes) else dtypes[-1] + tensor = create_single_tensor(shape, dtype_str, device, default_dtype) + tensor_list.append(tensor) + + return tensor_list diff --git a/scripts/main.py b/scripts/main.py index 3aa1d62..b77dc19 100644 --- a/scripts/main.py +++ b/scripts/main.py @@ -7,18 +7,33 @@ import BackendBench.eval as eval import click import torch +from BackendBench.huggingface_tracer import HuggingFaceTracerTestSuite +from BackendBench.llm_client import ClaudeKernelGenerator from BackendBench.opinfo_suite import OpInfoTestSuite from BackendBench.suite import SmokeTestSuite -from BackendBench.llm_client import ClaudeKernelGenerator logger = logging.getLogger(__name__) +def setup_logging(): + """Setup logging configuration.""" + logging_level = os.environ.get("LOG_LEVEL", "WARNING") + numeric_level = getattr(logging, logging_level.upper(), None) + if not isinstance(numeric_level, int): + raise ValueError(f"Invalid log level: {logging_level}") + + logging.basicConfig( + level=numeric_level, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + ) + + @click.command() @click.option( "--suite", default="smoke", - type=click.Choice(["smoke", "opinfo"]), + type=click.Choice(["smoke", "opinfo", "huggingface"]), help="Which suite to run", ) @click.option( @@ -40,6 +55,8 @@ help="Maximum attempts for LLM kernel generation with feedback", ) def cli(suite, backend, ops, llm_max_attempts): + # Setup logging first + setup_logging() if ops: ops = ops.split(",") @@ -62,11 +79,16 @@ def cli(suite, backend, ops, llm_max_attempts): torch.bfloat16, filter=ops, ), + "huggingface": lambda: HuggingFaceTracerTestSuite( + name="huggingface_tracer_cuda_bfloat16", + device="cuda", + dtype=torch.float32, + filter=ops, + ), }[suite]() overall_correctness = [] overall_performance = [] - for test in suite: if test.op not in backend: continue @@ -102,6 +124,13 @@ def setup_llm_backend(llm_backend, llm_client, suite_name, ops_filter, max_attem torch.bfloat16, filter=ops_filter, ) + elif suite_name == "huggingface": + suite = HuggingFaceTracerTestSuite( + name="huggingface_tracer_cuda_float32", + device="cuda", + dtype=torch.float32, + filter=ops_filter, + ) else: raise ValueError(f"Unknown suite: {suite_name}")