diff --git a/.gitignore b/.gitignore index 1592432..4e3f765 100644 --- a/.gitignore +++ b/.gitignore @@ -2,7 +2,6 @@ __pycache__/ .claude/ .vscode/ .ruff_cache/ -generated_kernels/ backendbench.egg-info/ CLAUDE.md venv/ @@ -10,3 +9,6 @@ ops/ uv.lock pytorch_operator_coverage.csv .pre-commit-cache/ +generated_kernels/ +internal_operators.csv +torchbench_operator_folder_mapping.csv \ No newline at end of file diff --git a/BackendBench/__init__.py b/BackendBench/__init__.py index f59deee..cbac6f5 100644 --- a/BackendBench/__init__.py +++ b/BackendBench/__init__.py @@ -5,125 +5,7 @@ # LICENSE file in the root directory of this source tree. """ -BackendBench: A PyTorch backend evaluation framework with monkey patching support. - -Import this module to automatically monkey patch PyTorch operations with custom backends. +BackendBench: A PyTorch backend evaluation framework. """ -import os - -from .backends import AtenBackend, FlagGemsBackend - - -class BackendRegistry: - """Registry for managing different PyTorch backends.""" - - def __init__(self): - self._current_backend = None - self._original_ops = {} - self._patched = False - - def register_backend(self, backend_name: str, backend_instance=None): - """Register and activate a backend.""" - if backend_instance is None: - backend_instance = self._create_backend(backend_name) - - if self._patched: - self.unpatch() - - self._current_backend = backend_instance - self._patch_torch_ops() - - def _create_backend(self, backend_name: str): - """Create a backend instance.""" - backends = {"aten": AtenBackend, "flag_gems": FlagGemsBackend} - - if backend_name not in backends: - raise ValueError(f"Unknown backend: {backend_name}. Available: {list(backends.keys())}") - - return backends[backend_name]() - - def _patch_torch_ops(self): - """Monkey patch torch operations with current backend.""" - if self._current_backend is None: - return - - # Get all torch ops that the backend supports - if hasattr(self._current_backend, "ops"): - for torch_op, backend_impl in self._current_backend.ops.items(): - if torch_op not in self._original_ops: - self._original_ops[torch_op] = torch_op.default - torch_op.default = backend_impl - - self._patched = True - print( - f"BackendBench: Monkey patched {len(self._original_ops)} operations with {self._current_backend.name} backend" - ) - - def unpatch(self): - """Restore original torch operations.""" - if not self._patched: - return - - for torch_op, original_impl in self._original_ops.items(): - torch_op.default = original_impl - - self._original_ops.clear() - self._patched = False - print("BackendBench: Restored original PyTorch operations") - - def get_current_backend(self): - """Get the currently active backend.""" - return self._current_backend - - def is_patched(self): - """Check if operations are currently patched.""" - return self._patched - - -# Global registry instance -_registry = BackendRegistry() - - -def use_backend(backend_name: str, backend_instance=None): - """ - Switch to a different backend. - - Args: - backend_name: Name of the backend ('aten', 'flag_gems') - backend_instance: Optional pre-configured backend instance - """ - _registry.register_backend(backend_name, backend_instance) - - -def get_backend(): - """Get the currently active backend.""" - return _registry.get_current_backend() - - -def restore_pytorch(): - """Restore original PyTorch operations.""" - _registry.unpatch() - - -def is_patched(): - """Check if BackendBench is currently patching operations.""" - return _registry.is_patched() - - -# Auto-configuration based on environment variables -def _auto_configure(): - """Auto-configure backend based on environment variables.""" - backend_name = os.getenv("BACKENDBENCH_BACKEND", "aten") - - try: - use_backend(backend_name) - except Exception as e: - print(f"Warning: Failed to initialize {backend_name} backend: {e}") - print("Falling back to aten backend") - use_backend("aten") - - -# Auto-configure on import unless explicitly disabled -if os.getenv("BACKENDBENCH_NO_AUTO_PATCH", "").lower() not in ("1", "true", "yes"): - _auto_configure() +__version__ = "0.1.0" diff --git a/BackendBench/backends/directory.py b/BackendBench/backends/directory.py index 6da0956..ef70eb7 100644 --- a/BackendBench/backends/directory.py +++ b/BackendBench/backends/directory.py @@ -34,22 +34,28 @@ def _load_kernels(self): if not os.path.isdir(op_dir): continue - impl_files = [f for f in os.listdir(op_dir) if f.endswith(".py")] + impl_files = [ + f + for f in os.listdir(op_dir) + if f.endswith(".py") and f.startswith(f"{op_name}_implementation") + ] if not impl_files: - logger.warning(f"No Python files found in {op_dir}") + logger.debug(f"No implementation files found in {op_dir}") continue # Use the first implementation file - impl_file = impl_files[0] + impl_file = sorted(impl_files)[0] # Sort to ensure consistent selection impl_path = os.path.join(op_dir, impl_file) try: # Load the implementation and map to PyTorch operation kernel_func = self._load_kernel_from_file(impl_path, op_name) - pytorch_op = self._find_pytorch_op(op_name) - if pytorch_op: - self.compiled_kernels[pytorch_op] = kernel_func - logger.info(f"Loaded {op_name} from {impl_file}") + pytorch_ops = self._find_pytorch_ops(op_name) + + if pytorch_ops: + for pytorch_op in pytorch_ops: + self.compiled_kernels[pytorch_op] = kernel_func + logger.info(f"Loaded {op_name} from {impl_file} -> {pytorch_op}") loaded_count += 1 else: logger.warning(f"Could not map {op_name} to PyTorch operation") @@ -68,23 +74,44 @@ def _load_kernel_from_file(self, file_path: str, op_name: str) -> Callable: if hasattr(module, kernel_func_name): return getattr(module, kernel_func_name) else: - raise ValueError(f"No callable function found in {file_path}") - - def _find_pytorch_op(self, op_name: str): - """Map operation name to PyTorch operation.""" - # Try common patterns - try: - return getattr(torch.ops.aten, op_name).default - except AttributeError: - pass - - try: - return getattr(torch.ops.aten, op_name).Tensor - except AttributeError: - pass - - # Not 100% sure this is right, will need to iterate over all ops - return None + raise ValueError(f"No function named {kernel_func_name} found in {file_path}") + + def _find_pytorch_ops(self, op_name: str): + """Map operation name to PyTorch operations. + + Returns a list of PyTorch operations that match the directory name. + This handles the common case where a directory name like 'add' should map + to multiple overloads like add.default, add.Tensor, etc. + """ + matched_ops = [] + + # Handle suffixed directory names (e.g., add_out -> add.out) + base_name = op_name + suffix = None + if "_" in op_name: + parts = op_name.rsplit("_", 1) + if parts[1] in ["out", "inplace", "scalar"]: + base_name = parts[0] + suffix = parts[1] + + # Try to find the operation in torch.ops.aten + if hasattr(torch.ops.aten, base_name): + aten_op = getattr(torch.ops.aten, base_name) + + # If we have a specific suffix, try to get that overload + if suffix and hasattr(aten_op, suffix): + matched_ops.append(getattr(aten_op, suffix)) + else: + # Otherwise, try common overloads + for overload in ["default", "Tensor", "Scalar", "int", "float"]: + if hasattr(aten_op, overload): + op = getattr(aten_op, overload) + matched_ops.append(op) + + # Also check for operations that might be in other namespaces + # This could be extended based on actual usage patterns + + return matched_ops def __getitem__(self, key): if key in self.compiled_kernels: @@ -93,4 +120,4 @@ def __getitem__(self, key): return key def __contains__(self, key): - return key in self.compiled_kernels or True # Always claim to contain ops for fallback + return key in self.compiled_kernels diff --git a/BackendBench/scripts/create_simple_test_ops.py b/BackendBench/scripts/create_simple_test_ops.py index e26fd4f..7a8d04d 100644 --- a/BackendBench/scripts/create_simple_test_ops.py +++ b/BackendBench/scripts/create_simple_test_ops.py @@ -19,7 +19,7 @@ def create_relu(): os.makedirs("generated_kernels/relu", exist_ok=True) - with open("generated_kernels/relu/relu_implementation_1.py", "w") as f: + with open("generated_kernels/relu/relu_implementation_v1.py", "w") as f: f.write('''import torch def relu_kernel_impl(input): @@ -37,7 +37,7 @@ def relu_kernel_impl(input): def create_add(): os.makedirs("generated_kernels/add", exist_ok=True) - with open("generated_kernels/add/add_implementation_1.py", "w") as f: + with open("generated_kernels/add/add_implementation_v1.py", "w") as f: f.write('''import torch def add_kernel_impl(input, other): @@ -56,7 +56,7 @@ def add_kernel_impl(input, other): def create_mul(): os.makedirs("generated_kernels/mul", exist_ok=True) - with open("generated_kernels/mul/mul_implementation_1.py", "w") as f: + with open("generated_kernels/mul/mul_implementation_v1.py", "w") as f: f.write('''import torch def mul_kernel_impl(input, other): @@ -75,7 +75,7 @@ def mul_kernel_impl(input, other): def create_abs(): os.makedirs("generated_kernels/abs", exist_ok=True) - with open("generated_kernels/abs/abs_implementation_1.py", "w") as f: + with open("generated_kernels/abs/abs_implementation_v1.py", "w") as f: f.write('''import torch def abs_kernel_impl(input): @@ -93,7 +93,7 @@ def abs_kernel_impl(input): def create_sum(): os.makedirs("generated_kernels/sum", exist_ok=True) - with open("generated_kernels/sum/sum_implementation_1.py", "w") as f: + with open("generated_kernels/sum/sum_implementation_v1.py", "w") as f: f.write('''import torch def sum_kernel_impl(input, *args, **kwargs): @@ -122,8 +122,8 @@ def main(): logger.info("Created 5 simple kernel implementations in generated_kernels/") logger.info("Test them individually:") - logger.info(" python generated_kernels/relu/relu_implementation_1.py") - logger.info(" python generated_kernels/add/add_implementation_1.py") + logger.info(" python generated_kernels/relu/relu_implementation_v1.py") + logger.info(" python generated_kernels/add/add_implementation_v1.py") logger.info(" etc.") logger.info("Or test all with the backend:") logger.info(" python test/test_simple_directory_backend.py") diff --git a/BackendBench/scripts/create_watermarked_operators.py b/BackendBench/scripts/create_watermarked_operators.py new file mode 100755 index 0000000..282c226 --- /dev/null +++ b/BackendBench/scripts/create_watermarked_operators.py @@ -0,0 +1,176 @@ +#!/usr/bin/env python3 + +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD 3-Clause license found in the +# LICENSE file in the root directory of this source tree. + +""" +Create watermarked operator implementations that return constant tensors. +These implementations will verify monkey patching works but will fail correctness tests. +""" + +import os +import argparse +from pathlib import Path + + +WATERMARK_VALUE = 42.0 + + +def create_watermarked_impl(op_name: str, watermark_value: float = WATERMARK_VALUE) -> str: + """Generate a watermarked implementation that returns a constant tensor.""" + + return f'''# Watermarked implementation for {op_name} operator +# This implementation returns a constant tensor to verify monkey patching + +import torch + +def {op_name}_kernel_impl(*args, **kwargs): + """Watermarked implementation of {op_name}. + + Returns a tensor filled with {watermark_value} to verify the operator + is being called through DirectoryBackend. This will fail correctness + tests but confirms the monkey patching mechanism is working. + """ + # Find the first tensor argument to determine output shape and device + tensor_arg = None + for arg in args: + if isinstance(arg, torch.Tensor): + tensor_arg = arg + break + + if tensor_arg is not None: + # Return a tensor with same shape, dtype, and device as input + result = torch.full_like(tensor_arg, {watermark_value}) + return result + else: + # Fallback for operators without tensor inputs + # Return a scalar tensor + return torch.tensor({watermark_value}) +''' + + +def create_watermarked_operators( + base_dir: str = "generated_kernels", + watermark_value: float = WATERMARK_VALUE, + overwrite: bool = False, +): + """Create watermarked implementations for all operators in the directory structure.""" + + base_path = Path(base_dir) + if not base_path.exists(): + print(f"Error: Directory {base_path} does not exist.") + print("Please run setup_operator_directories.py first.") + return + + created_count = 0 + skipped_count = 0 + + # Iterate through all operator directories + for op_dir in base_path.iterdir(): + if not op_dir.is_dir() or op_dir.name == "__pycache__": + continue + + op_name = op_dir.name + impl_file = op_dir / f"{op_name}_implementation_v1.py" + + # Skip if file exists and overwrite is False + if impl_file.exists() and not overwrite: + skipped_count += 1 + continue + + # Create watermarked implementation + impl_content = create_watermarked_impl(op_name, watermark_value) + impl_file.write_text(impl_content) + created_count += 1 + + print("\nWatermarked operator creation complete:") + print(f"- Created {created_count} watermarked implementations") + print(f"- Skipped {skipped_count} existing implementations") + print(f"- Watermark value: {watermark_value}") + print(f"- Base directory: {base_path.absolute()}") + + # Create a verification script + verification_script = base_path / "verify_watermarks.py" + verification_content = f'''#!/usr/bin/env python3 +"""Verify that watermarked operators are being loaded correctly.""" + +import torch +from BackendBench.backends import DirectoryBackend + +# Expected watermark value +WATERMARK_VALUE = {watermark_value} + +# Load the backend +backend = DirectoryBackend("{base_dir}") + +# Test a few operators +test_ops = ["relu", "add", "mul", "sub", "div"] + +print(f"Testing watermarked operators (expected value: {{WATERMARK_VALUE}})...") +print(f"Loaded {{len(backend.compiled_kernels)}} operators\\n") + +for op_name in test_ops: + # Try to find the operator + found = False + for torch_op in backend.compiled_kernels: + if op_name in str(torch_op): + # Test the operator + try: + x = torch.tensor([1.0, 2.0, 3.0]) + result = backend[torch_op](x) + + if torch.allclose(result, torch.full_like(x, WATERMARK_VALUE)): + print(f"✓ {{op_name}}: Watermark detected correctly") + else: + print(f"✗ {{op_name}}: Unexpected result {{result}}") + + found = True + break + except Exception as e: + print(f"✗ {{op_name}}: Error - {{e}}") + found = True + break + + if not found: + print(f"? {{op_name}}: Not found in loaded operators") +''' + + verification_script.write_text(verification_content) + os.chmod(verification_script, 0o755) + + print(f"\nCreated verification script: {verification_script}") + print("\nTo verify watermarks are working:") + print(f" python {verification_script}") + print("\nTo test with evaluation harness (should fail correctness):") + print(" python -m BackendBench.scripts.main --backend directory --ops relu,add --suite smoke") + + +def main(): + parser = argparse.ArgumentParser( + description="Create watermarked operator implementations for testing" + ) + parser.add_argument( + "--base-dir", + default="generated_kernels", + help="Base directory containing operator subdirectories", + ) + parser.add_argument( + "--watermark-value", + type=float, + default=WATERMARK_VALUE, + help=f"Value to use for watermarking (default: {WATERMARK_VALUE})", + ) + parser.add_argument( + "--overwrite", action="store_true", help="Overwrite existing implementation files" + ) + + args = parser.parse_args() + + create_watermarked_operators(args.base_dir, args.watermark_value, args.overwrite) + + +if __name__ == "__main__": + main() diff --git a/BackendBench/scripts/debug_operator_mapping.py b/BackendBench/scripts/debug_operator_mapping.py new file mode 100644 index 0000000..936940a --- /dev/null +++ b/BackendBench/scripts/debug_operator_mapping.py @@ -0,0 +1,119 @@ +#!/usr/bin/env python3 + +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD 3-Clause license found in the +# LICENSE file in the root directory of this source tree. + + +""" +Debug script to show how TorchBench operator names map to DirectoryBackend folder names. +Creates a CSV file showing the mapping for debugging purposes. + +Usage: + python -m BackendBench.scripts.debug_operator_mapping + +Output: + torchbench_operator_folder_mapping.csv - CSV file with operator mappings +""" + +import csv +from pathlib import Path +from BackendBench.backends.directory import DirectoryBackend + + +def get_operator_mapping(): + """Get the mapping from TorchBench operators to folder names.""" + mappings = [] + + # Create a DirectoryBackend to see what operators it loads + backend = DirectoryBackend("generated_kernels") + + print(f"DirectoryBackend loaded {len(backend.compiled_kernels)} operators") + + # Get all the folder names that exist + generated_kernels = Path("generated_kernels") + if generated_kernels.exists(): + folder_names = [d.name for d in generated_kernels.iterdir() if d.is_dir()] + print(f"Found {len(folder_names)} folders in generated_kernels/") + else: + print("No generated_kernels directory found") + return [] + + # For each loaded operator, find its folder + for pytorch_op in sorted(backend.compiled_kernels.keys(), key=str): + op_str = str(pytorch_op) + + # Extract the base name (e.g., "add" from "aten.add.Tensor") + if "aten." in op_str: + base_name = op_str.split("aten.")[1].split(".")[0] + else: + base_name = "unknown" + + # Find the folder that maps to this operator by checking which folder + # the DirectoryBackend actually uses for this operator + folder_name = None + + # Check each folder to see which one would produce this operator + for folder in folder_names: + test_backend = DirectoryBackend.__new__(DirectoryBackend) + test_ops = test_backend._find_pytorch_ops(folder) + if pytorch_op in test_ops: + folder_name = folder + break + + # Get overload info + overload = "unknown" + if "." in op_str and "aten." in op_str: + parts = op_str.split(".") + if len(parts) >= 3: + overload = parts[2] + + mappings.append( + { + "pytorch_operator": op_str, + "base_name": base_name, + "overload": overload, + "folder_name": folder_name or "NOT_FOUND", + "is_mapped": folder_name is not None, + } + ) + + return mappings + + +def create_mapping_csv(): + """Create a CSV file with the operator mapping.""" + mappings = get_operator_mapping() + + csv_file = "torchbench_operator_folder_mapping.csv" + + with open(csv_file, "w", newline="") as f: + if mappings: + writer = csv.DictWriter(f, fieldnames=mappings[0].keys()) + writer.writeheader() + writer.writerows(mappings) + + print(f"\nCreated {csv_file} with {len(mappings)} operator mappings") + + # Print some statistics + mapped_count = sum(1 for m in mappings if m["is_mapped"]) + print(f"Successfully mapped: {mapped_count}/{len(mappings)} operators") + + # Show some examples + print("\nExample mappings:") + for i, mapping in enumerate(mappings[:10]): + print(f" {mapping['pytorch_operator']} -> {mapping['folder_name']}") + + if len(mappings) > 10: + print(f" ... and {len(mappings) - 10} more (see CSV file)") + + return csv_file + + +if __name__ == "__main__": + print("Creating TorchBench operator to folder mapping...") + csv_file = create_mapping_csv() + print(f"\nDebug CSV created: {csv_file}") + print("This file shows how PyTorch operators map to generated_kernels/ folder names") diff --git a/BackendBench/scripts/setup_operator_directories.py b/BackendBench/scripts/setup_operator_directories.py new file mode 100755 index 0000000..a9ec61c --- /dev/null +++ b/BackendBench/scripts/setup_operator_directories.py @@ -0,0 +1,223 @@ +#!/usr/bin/env python3 + +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD 3-Clause license found in the +# LICENSE file in the root directory of this source tree. + +""" +Setup script to create directory structure for all PyTorch operators. +This creates empty directories that LLM researchers can fill with generated kernels. +""" + +import os +import csv +import argparse +from pathlib import Path + +# Import the generate_coverage_csv functionality +from .generate_operator_coverage_csv import generate_coverage_csv + + +def clean_op_name_for_directory(op_name: str) -> str: + """Convert operator name to valid directory name. + + Examples: + - aten::add.Tensor -> add + - aten::add.out -> add_out + - aten::native_batch_norm -> native_batch_norm + - torch.ops.aten.add.default -> add + """ + # Remove aten:: prefix + if op_name.startswith("aten::"): + op_name = op_name[6:] + + # Remove torch.ops.aten. prefix + if op_name.startswith("torch.ops.aten."): + op_name = op_name[15:] + + # Handle .default, .Tensor, .out suffixes + if "." in op_name: + parts = op_name.split(".") + base = parts[0] + suffix = parts[1] if len(parts) > 1 else "" + + # For common suffixes, we might want to keep them to distinguish overloads + if suffix in ["out", "inplace", "scalar"]: + op_name = f"{base}_{suffix}" + else: + # For .default, .Tensor, etc., just use the base name + op_name = base + + # Replace any remaining invalid characters + op_name = op_name.replace(":", "_").replace("/", "_").replace("\\", "_") + + return op_name + + +def create_readme_for_op( + op_dir: Path, op_name: str, is_core: bool, is_opinfo: bool, is_torchbench: bool +): + """Create a README.md file for each operator directory.""" + readme_path = op_dir / "README.md" + + status = [] + if is_core: + status.append("Core PyTorch operator") + if is_opinfo: + status.append("Has OpInfo tests") + if is_torchbench: + status.append("Used in TorchBench") + + content = f"""# {op_name} + +Status: {", ".join(status) if status else "Regular operator"} + +## Implementation + +Place your generated kernel implementation in this directory as: +- `{clean_op_name_for_directory(op_name)}_implementation_v1.py` +- `{clean_op_name_for_directory(op_name)}_implementation_v2.py` +- etc. + +Each implementation file should contain a function named: +```python +def {clean_op_name_for_directory(op_name)}_kernel_impl(*args, **kwargs): + # Your implementation here + pass +``` + +## Testing + +The DirectoryBackend will automatically load the first implementation file found in this directory. +""" + + readme_path.write_text(content) + + +def setup_operator_directories(base_dir: str = "generated_kernels", include_all: bool = False): + """Set up directory structure for PyTorch operators.""" + + # First, generate the coverage CSV if it doesn't exist + csv_path = "pytorch_operator_coverage.csv" + if not os.path.exists(csv_path): + print("Generating operator coverage CSV...") + csv_path = generate_coverage_csv() + + # Create base directory + base_path = Path(base_dir) + base_path.mkdir(exist_ok=True) + + # Read operator data from CSV + operators = [] + with open(csv_path, "r") as f: + reader = csv.DictReader(f) + for row in reader: + operators.append( + { + "name": row["op_name"], + "is_core": row["is_core"] == "True", + "is_opinfo": row["is_in_opinfo"] == "True", + "is_torchbench": row["is_in_torchbench"] == "True", + } + ) + + # Filter operators based on criteria + if not include_all: + # By default, only include operators that are in TorchBench + operators = [op for op in operators if op["is_torchbench"]] + print(f"Setting up directories for {len(operators)} TorchBench operators") + else: + print(f"Setting up directories for all {len(operators)} operators") + + # Create directories + created_count = 0 + skipped_count = 0 + + for op in operators: + op_name = op["name"] + dir_name = clean_op_name_for_directory(op_name) + + if not dir_name: # Skip if we couldn't clean the name + print(f"Skipping operator with invalid name: {op_name}") + skipped_count += 1 + continue + + op_dir = base_path / dir_name + + if op_dir.exists(): + skipped_count += 1 + continue + + op_dir.mkdir(exist_ok=True) + create_readme_for_op(op_dir, op_name, op["is_core"], op["is_opinfo"], op["is_torchbench"]) + created_count += 1 + + print("\nDirectory setup complete:") + print(f"- Created {created_count} new directories") + print(f"- Skipped {skipped_count} existing directories") + print(f"- Base directory: {base_path.absolute()}") + + # Create a main README + main_readme = base_path / "README.md" + main_readme.write_text("""# Generated Kernels Directory + +This directory contains subdirectories for PyTorch operators that need kernel implementations. + +## Structure + +Each subdirectory corresponds to a PyTorch operator and should contain: +- Implementation files: `{op_name}_implementation_*.py` +- README.md with operator information + +## Usage + +1. Navigate to the operator directory you want to implement +2. Create your kernel implementation following the template in the README +3. Test with DirectoryBackend: `python -m BackendBench.scripts.main --backend directory --ops {op_name}` + +## Operator Mapping + +The DirectoryBackend maps directory names to PyTorch operations as follows: +- Directory `add` → `torch.ops.aten.add.default` +- Directory `mul` → `torch.ops.aten.mul.default` +- etc. + +For operators with multiple overloads (e.g., add.out), use suffixes: +- Directory `add_out` → `torch.ops.aten.add.out` +""") + + +def main(): + parser = argparse.ArgumentParser( + description="Set up directory structure for PyTorch operator implementations" + ) + parser.add_argument( + "--base-dir", + default="generated_kernels", + help="Base directory for operator implementations (default: generated_kernels)", + ) + parser.add_argument( + "--include-all", + action="store_true", + help="Include all operators, not just TorchBench operators", + ) + parser.add_argument( + "--regenerate-csv", + action="store_true", + help="Force regeneration of the operator coverage CSV", + ) + + args = parser.parse_args() + + # Remove existing CSV if regeneration is requested + if args.regenerate_csv and os.path.exists("pytorch_operator_coverage.csv"): + os.remove("pytorch_operator_coverage.csv") + print("Removed existing CSV, will regenerate...") + + setup_operator_directories(args.base_dir, args.include_all) + + +if __name__ == "__main__": + main() diff --git a/test/test_backend_evaluation.py b/test/test_backend_evaluation.py new file mode 100644 index 0000000..3412ae0 --- /dev/null +++ b/test/test_backend_evaluation.py @@ -0,0 +1,199 @@ +#!/usr/bin/env python3 + +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD 3-Clause license found in the +# LICENSE file in the root directory of this source tree. + +""" +Comprehensive test for BackendBench evaluation system. + +Tests: +1. DirectoryBackend loads operators correctly +2. Watermarked implementations fail correctness (proving monkey patching works) +3. Main script evaluation works end-to-end +4. eval.py integration works properly +""" + +import sys +import unittest +import subprocess +from pathlib import Path + +import torch + +# Add BackendBench to path +sys.path.insert(0, str(Path(__file__).parent.parent)) + +from BackendBench.backends import DirectoryBackend +from BackendBench.eval import eval_correctness, eval_one_op +from BackendBench.suite import Test + + +class TestBackendEvaluation(unittest.TestCase): + """Comprehensive test for backend evaluation system.""" + + @classmethod + def setUpClass(cls): + """Generate required directory structure and operators.""" + # Generate the directory structure + subprocess.run( + [sys.executable, "-m", "BackendBench.scripts.setup_operator_directories"], check=True + ) + # Create watermarked implementations + subprocess.run( + [ + sys.executable, + "-m", + "BackendBench.scripts.create_watermarked_operators", + "--overwrite", + ], + check=True, + ) + + def test_1_directory_backend_loads_operators(self): + """Test 1: Verify DirectoryBackend loads operators correctly.""" + print("\n" + "=" * 60) + print("TEST 1: DirectoryBackend Operator Loading") + print("=" * 60) + + backend = DirectoryBackend("generated_kernels") + operator_count = len(backend.compiled_kernels) + + print(f"\n📊 Loaded {operator_count} operators") + + # List some examples + print("\n📋 Sample operators:") + for i, op in enumerate(list(backend.compiled_kernels.keys())[:5]): + print(f" {i + 1}. {op}") + print(f" ... and {operator_count - 5} more") + + # Verify we loaded a substantial number + self.assertGreater(operator_count, 100, "Should load many operators from generated_kernels") + + print(f"\n✅ SUCCESS: DirectoryBackend loaded {operator_count} total operators") + + def test_2_watermarked_implementations_fail_correctness(self): + """Test 2: Verify watermarked operators fail eval_correctness (proving monkey patching).""" + print("\n" + "=" * 60) + print("TEST 2: Watermarked Implementation Correctness") + print("=" * 60) + + backend = DirectoryBackend("generated_kernels") + + print("\n🧪 Testing watermarked operators with eval_correctness:") + + failed_count = 0 + total_tested = 0 + + # Test several operators that should have watermarked implementations + test_ops = [ + ( + torch.ops.aten.bitwise_and.Tensor, + lambda: torch.tensor([1, 2, 3]), + lambda: torch.tensor([2, 3, 4]), + ), + ( + torch.ops.aten.fmod.Tensor, + lambda: torch.tensor([5.0, 7.0]), + lambda: torch.tensor([2.0, 3.0]), + ), + ] + + for op, *arg_generators in test_ops: + if op in backend: + try: + impl = backend[op] + test = Test(*arg_generators) + correctness = eval_correctness(op, impl, [test]) + + total_tested += 1 + if correctness == 0.0: + failed_count += 1 + print(f" ✓ {str(op).split('.')[-2]}: Failed correctness (watermarked)") + else: + print(f" ✗ {str(op).split('.')[-2]}: Passed correctness unexpectedly") + + except Exception as e: + print(f" ? {str(op).split('.')[-2]}: Error testing - {e}") + + print(f"\n📊 Results: {failed_count}/{total_tested} operators failed correctness") + print(" This proves our watermarked implementations are being used!") + + self.assertGreater(failed_count, 0, "At least some watermarked ops should fail") + + def test_3_main_script_evaluation(self): + """Test 3: Verify main.py script works with DirectoryBackend.""" + print("\n" + "=" * 60) + print("TEST 3: Main Script Evaluation") + print("=" * 60) + + cmd = [ + sys.executable, + "-m", + "BackendBench.scripts.main", + "--backend", + "directory", + "--suite", + "smoke", + "--log-level", + "ERROR", + ] + + print("\n🚀 Running: " + " ".join(cmd)) + print(" (This uses eval.py internally for correctness evaluation)") + + result = subprocess.run(cmd, capture_output=True, text=True, timeout=120) + + print("\n📊 Evaluation Results:") + if result.stdout: + lines = result.stdout.strip().split("\n") + for line in lines: + if "score" in line: + print(f" {line}") + + # Should complete without crashing + self.assertEqual(result.returncode, 0, "Main script should complete successfully") + + print("\n✅ SUCCESS: Main script evaluation completed") + + def test_4_eval_integration(self): + """Test 4: Verify eval.py functions work correctly.""" + print("\n" + "=" * 60) + print("TEST 4: eval.py Integration") + print("=" * 60) + + backend = DirectoryBackend("generated_kernels") + + print("\n🔧 Testing eval_one_op function:") + + # Find a watermarked operator to test + test_op = None + for op in backend.compiled_kernels.keys(): + if "bitwise_and" in str(op) and "Tensor" in str(op): + test_op = op + break + + if test_op: + impl = backend[test_op] + test = Test(lambda: torch.tensor([1, 2, 3]), lambda: torch.tensor([2, 3, 4])) + + correctness, performance = eval_one_op(test_op, impl, [test], [test]) + + print(f" Operation: {test_op}") + print(f" Correctness: {correctness}") + print(f" Performance: {performance}") + + # Watermarked implementation should fail correctness + self.assertEqual(correctness, 0.0, "Watermarked implementation should fail correctness") + + print(" ✓ eval_one_op works correctly with watermarked implementation") + else: + print(" ! No suitable test operator found, skipping detailed test") + + print("\n✅ SUCCESS: eval.py integration verified") + + +if __name__ == "__main__": + unittest.main() diff --git a/test/test_directory_backend.py b/test/test_directory_backend.py index 19856d6..220dd8c 100644 --- a/test/test_directory_backend.py +++ b/test/test_directory_backend.py @@ -20,15 +20,12 @@ @pytest.fixture(scope="module") def backend(): - expected_dirs = ["relu", "add", "mul", "abs", "sum"] - missing_dirs = [d for d in expected_dirs if not os.path.isdir(f"generated_kernels/{d}")] - - if missing_dirs: - import subprocess + # Always create correct test implementations, overriding any watermarked ones + import subprocess - subprocess.run( - [sys.executable, "BackendBench/scripts/create_simple_test_ops.py"], check=True - ) + subprocess.run( + [sys.executable, "-m", "BackendBench.scripts.create_simple_test_ops"], check=True + ) return DirectoryBackend(ops_dir="generated_kernels")