Migrate uncommon cadence custom ops from Jarvis.nn.ref_implementations -> executorch ref_implementations

Andrew Grebenisan · facebook-github-bot · commit fb8a6a0c4469 · 2025-09-16T16:21:58.000-07:00
Summary:
It turns out there was duplication in the cadence custom op ref implementation files, which could lead to op name registry collision (op name was already registered).

Resolved by migrating uncommon ops from Jarvis.nn.ref_implementations to the executorch ref_implementations, deleting the Jarvis file, and updating all of the dependencies.

Reviewed By: mcremon-meta

Differential Revision: D82566217
diff --git a/backends/cadence/aot/TARGETS b/backends/cadence/aot/TARGETS
@@ -130,6 +130,8 @@ runtime.python_library(
     deps = [
         "fbcode//caffe2:torch",
         "fbcode//executorch/exir:scalar_type",
+        "fbcode//on_device_ai/Assistant/Jarvis/nn:roi_align_utils",
+        "fbcode//executorch/kernels/quantized:custom_ops_generated_lib",
     ],
 )
 
diff --git a/backends/cadence/aot/ref_implementations.py b/backends/cadence/aot/ref_implementations.py
@@ -6,16 +6,18 @@
 
 # pyre-strict
 
-
 from typing import Callable
 
 import torch
+import torch.nn as nn
+import torch.nn.functional as F
 
 from executorch.exir.scalar_type import ScalarType
+from on_device_ai.Assistant.Jarvis.nn.roi_align_utils import convertBoxPosToTuringConfig
 from torch.library import impl, Library
 
-
 m = Library("cadence", "IMPL", "CompositeExplicitAutograd")
+torch.ops.load_library("//executorch/kernels/quantized:custom_ops_generated_lib")
 
 qdtype_map: dict[ScalarType, torch.dtype] = {
     ScalarType.QINT8: torch.qint8,
@@ -38,7 +40,7 @@ def quantize_per_tensor(
 
     Args:
         - input_tensor (Tensor): input tensor
-        - scale (float): Inverse of quantization scale. Derived from the ratio
+        - scale (float): Quantization scale. Derived from the ratio
             between the min/max of the floating-point tensor and the
             min/max of the quantized range, and then inverted.
         - zero_point (int): The point which represents 0 in the quantized
@@ -64,7 +66,8 @@ def quantize_per_tensor(
             f"Unsupported dtype to quantize to. Supported dtypes must be one of {supported_quant_types}"
         )
 
-    quantized = torch.round(input_tensor * scale + zero_point).to(dtype)
+    inv_scale = 1.0 / scale
+    quantized = torch.round(input_tensor * inv_scale + zero_point).to(dtype)
     return torch.max(
         torch.min(quantized, torch.tensor(quant_max)),
         torch.tensor(quant_min),
@@ -97,7 +100,7 @@ def dequantize_per_tensor(
             is already provided.
         - quant_max (int): The largest value in the quantized domain. Unused since scale
             is already provided.
-        - dtype (torch.dtype): The type of the output tensor. Must be a floating point type.
+        - dtype (torch.dtype): The type of the input tensor.
     """
     supported_quant_types = [
         torch.int8,
@@ -108,23 +111,15 @@ def dequantize_per_tensor(
     ]
     if input_tensor.dtype not in supported_quant_types:
         raise ValueError(f"Input dtype must be one of {supported_quant_types}")
-    supported_dequant_types = [
-        torch.float,
-        torch.float32,
-        torch.float16,
-        torch.bfloat16,
-    ]
-    if dtype not in supported_dequant_types:
-        raise ValueError(
-            f"Unsupported dtype to dequantize to. Supported dtypes must be one of {supported_dequant_types}"
-        )
+    if input_tensor.dtype != dtype:
+        raise ValueError("Input dtype must match dtype")
 
     # Needed to prevent underflow in cases where the zero_point is larger than
     # the quantized value.
     if not input_tensor.dtype.is_signed:
         input_tensor = input_tensor.to(torch.int32)
 
-    return (input_tensor - zero_point).to(dtype) * scale
+    return ((input_tensor - zero_point) * scale).to(torch.float32)
 
 
 @impl(m, "quantized_add.per_tensor")
@@ -180,12 +175,10 @@ def quantized_add_per_tensor(
     dequant_X = X_scale * (X - X_zero_point)
     dequant_Y = Y_scale * (Y - Y_zero_point)
 
-    out_scale_inv = 1 / out_scale
-
     # q_min/q_max are unused args
     return quantize_per_tensor(
         dequant_X + dequant_Y,
-        out_scale_inv,
+        out_scale,
         out_zero_point,
         torch.iinfo(dtype).min,
         torch.iinfo(dtype).max,
@@ -260,7 +253,6 @@ def quantized_linear_common(
         - offset (Tensor): Unused
     """
     out_scale = -out_multiplier * (1 / (1 << 31)) * (2**out_shift)
-    out_scale_inv = 1 / out_scale
 
     N, K = weight.shape
 
@@ -281,7 +273,7 @@ def quantized_linear_common(
     )
     return quantize_per_tensor(
         out,
-        out_scale_inv,
+        out_scale,
         out_zero_point,
         torch.iinfo(dtype).min,
         torch.iinfo(dtype).max,
@@ -399,6 +391,17 @@ def quantized_fully_connected_asym8sxasym8s_asym8s_per_tensor() -> torch.Tensor:
 def quantized_fully_connected_asym8uxasym8u_asym8u_per_tensor() -> torch.Tensor: ...
 
 
+@impl(m, "fully_connected")
+def fully_connected(
+    input_tensor: torch.Tensor,
+    weight: torch.Tensor,
+    bias: torch.Tensor,
+) -> torch.Tensor:
+    if input_tensor.shape[0] != 1:
+        raise ValueError("Fully connected linear only supports batch size of 1")
+    return F.linear(input_tensor, weight, bias)
+
+
 @impl(m, "quantized_matmul")
 def quantized_matmul(
     X: torch.Tensor,
@@ -538,15 +541,15 @@ def quantized_layer_norm_per_tensor(
         )
 
     float_input_tensor = dequantize_per_tensor(
-        input_tensor, X_scale, X_zero_point, -128, 127, torch.float32
+        input_tensor, X_scale, X_zero_point, -128, 127, input_tensor.dtype
     )
     out = torch.nn.functional.layer_norm(
         float_input_tensor, normalized_shape, weight, bias, eps=eps
     )
 
     return quantize_per_tensor(
         out,
-        1 / output_scale,
+        output_scale,
         output_zero_point,
         torch.iinfo(input_tensor.dtype).min,
         torch.iinfo(input_tensor.dtype).max,
@@ -615,7 +618,7 @@ def quantized_conv_per_tensor(
 
     return quantize_per_tensor(
         float_out,
-        1.0 / output_scale,
+        output_scale,
         output_zero_point,
         torch.iinfo(input_tensor.dtype).min,
         torch.iinfo(input_tensor.dtype).max,
@@ -942,7 +945,7 @@ def quantized_relu_common(
     if X.dtype not in supported_dtypes:
         raise ValueError(f"X dtype must be one of {supported_dtypes}. Got {X.dtype}")
 
-    out_scale = -out_multiplier * (1 / (1 << 31)) * (2**out_shift)
+    out_scale = 1.0 / (-out_multiplier * (1 / (1 << 31)) * (2**out_shift))
     dequantized_X = torch.where(X > X_zero_point, X - X_zero_point, torch.zeros_like(X))
     return quantize_per_tensor(
         dequantized_X,
@@ -1068,3 +1071,45 @@ def requantize(
         out_quant_max,
         dtype,
     )
+
+
+@impl(m, "roi_align_box_processor")
+def roi_align_box_processor(
+    rois: torch.Tensor,
+    output_size_h: int,
+    output_size_w: int,
+    sampling_ratio: int,
+    aligned: bool,
+) -> torch.Tensor:
+    K = rois.shape[0]
+    turing_rois = []
+    for i in range(K):
+        x1 = rois[i][1].item()
+        y1 = rois[i][2].item()
+        x2 = rois[i][3].item()
+        y2 = rois[i][4].item()
+        topLeftXY = (x1, y1)
+        bottomRightXY = (x2, y2)
+        turing_roi = convertBoxPosToTuringConfig(
+            topLeftXY,
+            bottomRightXY,
+            K,
+            output_size_h,
+            output_size_w,
+            sampling_ratio,
+            aligned,
+        )
+        turing_rois.append(torch.frombuffer(turing_roi, dtype=torch.uint8))
+
+    out = torch.stack(turing_rois)
+    return out
+
+
+@impl(m, "rms_norm")
+def rms_norm(
+    X: torch.Tensor,
+    normalized_shape: tuple[int],
+    W: torch.Tensor,
+    eps: float,
+) -> torch.Tensor:
+    return W * nn.RMSNorm(list(normalized_shape), eps=eps, dtype=X.dtype)(X)
diff --git a/backends/cadence/aot/tests/test_ref_implementations.py b/backends/cadence/aot/tests/test_ref_implementations.py
@@ -36,12 +36,11 @@ def test_quantize_per_tensor(
     ) -> None:
         input_tensor = torch.tensor([input_value])
         scale = (f_max - f_min) / (q_max - q_min)
-        inv_scale = 1.0 / scale
-        zero_point = round(-f_min * inv_scale) + q_min
+        zero_point = round(-f_min * 1 / scale) + q_min
         expected_output = torch.tensor([expected_value], dtype=target_dtype)
 
         output = torch.ops.cadence.quantize_per_tensor(
-            input_tensor, inv_scale, zero_point, q_min, q_max, target_dtype
+            input_tensor, scale, zero_point, q_min, q_max, target_dtype
         )
 
         self.assertEqual(
@@ -85,7 +84,7 @@ def test_dequantize_per_tensor(
         expected_output = torch.tensor([expected_value], dtype=torch.float32)
 
         output = torch.ops.cadence.dequantize_per_tensor(
-            input_tensor, scale, zero_point, q_min, q_max, torch.float32
+            input_tensor, scale, zero_point, q_min, q_max, input_tensor.dtype
         )
 
         self.assertEqual(

Original file line number	Diff line number	Diff line change
`@@ -130,6 +130,8 @@ runtime.python_library(`
`130`	`130`	`deps = [`
`131`	`131`	`"fbcode//caffe2:torch",`
`132`	`132`	`"fbcode//executorch/exir:scalar_type",`
	`133`	`+ "fbcode//on_device_ai/Assistant/Jarvis/nn:roi_align_utils",`
	`134`	`+ "fbcode//executorch/kernels/quantized:custom_ops_generated_lib",`
`133`	`135`	`],`
`134`	`136`	`)`
`135`	`137`