pytorch
diff --git a/‎backends/cadence/aot/ops_registrations.py‎
Lines changed: 68 additions & 3 deletions b/‎backends/cadence/aot/ops_registrations.py‎
Lines changed: 68 additions & 3 deletions
@@ -6,8 +6,9 @@
 
 # pyre-strict
 
+import logging
 from math import prod
-from typing import Optional, Tuple
+from typing import Callable, Optional, Tuple
 
 import torch
 from executorch.backends.cadence.aot.utils import (
@@ -21,6 +22,61 @@
 
 lib = Library("cadence", "DEF")
 
+# Track meta kernels that have been registered
+_REGISTERED_META_KERNELS: set[str] = set()
+
+
+# Original register_fake function to use for registrations
+_register_fake_original = register_fake
+
+_OUTPUTS_TYPE = torch.Tensor | tuple[torch.Tensor, ...]
+
+
+def _validate_ref_impl_exists() -> None:
+    """
+    Validates that all registered meta kernels have corresponding reference implementations.
+    This is called at module initialization time after both files have been imported.
+    """
+    # Import here after module initialization to ensure ref_implementations has been loaded
+    from executorch.backends.cadence.aot.ref_implementations import (
+        get_registered_ref_implementations,
+    )
+
+    ref_impls = get_registered_ref_implementations()
+    missing_impls = []
+
+    for op_name in _REGISTERED_META_KERNELS:
+        # Strip the namespace prefix if present (e.g., "cadence::" -> "")
+        op_name_clean = op_name.split("::")[-1] if "::" in op_name else op_name
+
+        if op_name_clean not in ref_impls:
+            missing_impls.append(op_name)
+
+    if missing_impls:
+        error_msg = (
+            "The following meta kernel registrations are missing reference implementations:\n"
+            + "\n".join(f"  - {op}" for op in missing_impls)
+            + "\n\nPlease add reference implementations in ref_implementations.py using "
+            + "@impl_tracked(m, '<op_name>')."
+        )
+
+        # TODO: T241466288 Make this an error once all meta kernels have reference implementations
+        logging.warning(error_msg)
+
+
+# Wrap register_fake to track all registrations
+def register_fake(
+    op_name: str,
+) -> Callable[[Callable[..., _OUTPUTS_TYPE]], Callable[..., _OUTPUTS_TYPE]]:
+    """
+    Wrapped version of register_fake that tracks all meta kernel registrations.
+    This enables validation that all meta kernels have reference implementations.
+    """
+    global _REGISTERED_META_KERNELS
+    _REGISTERED_META_KERNELS.add(op_name)
+    return _register_fake_original(op_name)
+
+
 lib.define(
     "quantize_per_tensor(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype) -> (Tensor Z)"
 )
@@ -2406,7 +2462,9 @@ def idma_load_impl(
     task_num: int = 0,
     channel: int = 0,
 ) -> torch.Tensor:
-    return copy_idma_copy_impl(src, task_num, channel)
+    res = copy_idma_copy_impl(src, task_num, channel)
+    assert isinstance(res, torch.Tensor)
+    return res
 
 
 @register_fake("cadence::idma_store")
@@ -2415,7 +2473,9 @@ def idma_store_impl(
     task_num: int = 0,
     channel: int = 0,
 ) -> torch.Tensor:
-    return copy_idma_copy_impl(src, task_num, channel)
+    res = copy_idma_copy_impl(src, task_num, channel)
+    assert isinstance(res, torch.Tensor)
+    return res
 
 
 @register_fake("cadence::roi_align_box_processor")
@@ -2671,3 +2731,8 @@ def quantized_w8a32_gru_meta(
     b_h_scale: float,
 ) -> torch.Tensor:
     return inputs.new_empty((2, hidden.shape[-1]), dtype=inputs.dtype)
+
+
+# Validate that all meta kernels have reference implementations
+# This is called at module import time to catch missing implementations early
+_validate_ref_impl_exists()