[MLIR] Add type checking in JVPOp::verifySymbolUses (#1020)

joeycarter · rmoyard · web-flow · commit 9cb2e22969ac · 2024-08-15T09:56:20.000-04:00
**Context:** Add type checking in the JVP MLIR operation like we have in the equivalent VJP operation. **Description of the Change:** In `JVPOp::verifySymbolUses()`, gather up the data types of the tangent parameters and check them one by one against the data types of the corresponding callee input types, in a similar manner as is currently done in `VJPOp::verifySymbolUses()`. **Benefits:** The goal is to avoid triggering [this assert](https://github.com/PennyLaneAI/catalyst/blob/6c0ed0b528119b78bc32172780350ff1bc760424/mlir/lib/Gradient/Utils/EinsumLinalgGeneric.cpp#L100) by doing the type checking earlier and printing a more descriptive error message. **Possible Drawbacks:** None. **Related GitHub Issues:** [sc-48792] --------- Co-authored-by: Romain Moyard <rmoyard@gmail.com>
diff --git a/doc/changelog.md b/doc/changelog.md
@@ -290,6 +290,11 @@
   are no longer decomposed when using Catalyst, improving compilation & runtime performance.
   [(#955)](https://github.com/PennyLaneAI/catalyst/pull/955)
 
+* Improve error messaging for `catalyst.jvp` when the callee input type and the tangent
+  type are not compatible by performing type-checking at the MLIR level. Note that the
+  equivalent type checking is already performed in `catalyst.vjp`.
+  [(#1020)](https://github.com/PennyLaneAI/catalyst/pull/1020)
+
 <h3>Breaking changes</h3>
 
 * Return values of qjit-compiled functions that were previously `numpy.ndarray` are now of type
@@ -468,6 +473,7 @@
 
 This release contains contributions from (in alphabetical order):
 
+Joey Carter,
 Alessandro Cosentino,
 Lillian M. A. Frederiksen,
 Josh Izaac,
diff --git a/mlir/lib/Gradient/IR/GradientOps.cpp b/mlir/lib/Gradient/IR/GradientOps.cpp
@@ -317,6 +317,28 @@ LogicalResult JVPOp::verifySymbolUses(SymbolTableCollection &symbolTable)
         }
     }
 
+    std::vector<Type> tanTypes;
+    {
+        auto tanOperands = OperandRange(
+            this->operand_begin() + callee.getFunctionType().getNumInputs(), this->operand_end());
+        for (auto c : tanOperands) {
+            tanTypes.push_back(c.getType());
+        }
+    }
+
+    auto calleeInputTypes = callee.getFunctionType().getInputs();
+
+    // Check that callee inputs have the same types as tangent inputs
+    for (size_t i = 0; i < tanTypes.size(); i++) {
+        auto tanType = tanTypes[i];
+        auto cIType = calleeInputTypes[i];
+        if (tanType != cIType) {
+            return this->emitOpError("callee input type does not match the tangent type")
+                   << " callee input " << i << " was expected to be of type " << tanType
+                   << " but got " << cIType;
+        }
+    }
+
     return success();
 }
 
diff --git a/mlir/test/Gradient/VerifierTest.mlir b/mlir/test/Gradient/VerifierTest.mlir
@@ -268,6 +268,37 @@ gradient.jvp "fd" @measure(%cst0) tangents(%cst1) : (tensor<2xf64>, tensor<2xf64
 
 // -----
 
+func.func @foo(%arg0: tensor<f64>) -> (tensor<f64>, tensor<f64>) {
+
+    %cst = stablehlo.constant dense<2.000000e+00> : tensor<f64>
+    %0 = stablehlo.multiply %cst, %arg0 : tensor<f64>
+    %1 = stablehlo.multiply %arg0, %arg0 : tensor<f64>
+    return %0, %1 : tensor<f64>, tensor<f64>
+
+}
+
+%cst0 = arith.constant dense<1.0> : tensor<f64>
+%cst1 = arith.constant dense<1.0> : tensor<f64>
+gradient.jvp "auto" @foo(%cst0) tangents(%cst1) : (tensor<f64>, tensor<f64>) -> (tensor<f64>, tensor<f64>, tensor<f64>, tensor<f64>)
+
+// -----
+
+func.func @foo(%arg0: tensor<f64>) -> (tensor<f64>, tensor<f64>) {
+
+    %cst = stablehlo.constant dense<2.000000e+00> : tensor<f64>
+    %0 = stablehlo.multiply %cst, %arg0 : tensor<f64>
+    %1 = stablehlo.multiply %arg0, %arg0 : tensor<f64>
+    return %0, %1 : tensor<f64>, tensor<f64>
+
+}
+
+%cst0 = arith.constant dense<1.0> : tensor<f64>
+%cst1 = arith.constant dense<1> : tensor<i64>
+// expected-error@+1 {{callee input type does not match the tangent type}}
+gradient.jvp "auto" @foo(%cst0) tangents(%cst1) : (tensor<f64>, tensor<i64>) -> (tensor<f64>, tensor<f64>, tensor<f64>, tensor<f64>)
+
+// -----
+
 func.func @measure(%arg0: tensor<2xf64>) -> tensor<2xf64> {
 
     %c0 = arith.constant 0 : i64
@@ -303,6 +334,39 @@ gradient.vjp "fd" @measure(%cst0) cotangents(%cst1) {resultSegmentSizes = array<
 
 // -----
 
+func.func @foo(%arg0: tensor<f64>) -> (tensor<f64>, tensor<f64>) {
+
+    %cst = stablehlo.constant dense<2.000000e+00> : tensor<f64>
+    %0 = stablehlo.multiply %cst, %arg0 : tensor<f64>
+    %1 = stablehlo.multiply %arg0, %arg0 : tensor<f64>
+    return %0, %1 : tensor<f64>, tensor<f64>
+
+}
+
+%cst0 = arith.constant dense<1.0> : tensor<f64>
+%cst1 = arith.constant dense<1.0> : tensor<f64>
+%cst2 = arith.constant dense<1.0> : tensor<f64>
+gradient.vjp "auto" @foo(%cst0) cotangents(%cst1, %cst2) {resultSegmentSizes = array<i32: 2, 1>} : (tensor<f64>, tensor<f64>, tensor<f64>) -> (tensor<f64>, tensor<f64>, tensor<f64>)
+
+// -----
+
+func.func @foo(%arg0: tensor<f64>) -> (tensor<f64>, tensor<f64>) {
+
+    %cst = stablehlo.constant dense<2.000000e+00> : tensor<f64>
+    %0 = stablehlo.multiply %cst, %arg0 : tensor<f64>
+    %1 = stablehlo.multiply %arg0, %arg0 : tensor<f64>
+    return %0, %1 : tensor<f64>, tensor<f64>
+
+}
+
+%cst0 = arith.constant dense<1.0> : tensor<f64>
+%cst1 = arith.constant dense<1> : tensor<i64>
+%cst2 = arith.constant dense<1> : tensor<i64>
+// expected-error@+1 {{callee result type does not match the cotangent type}}
+gradient.vjp "auto" @foo(%cst0) cotangents(%cst1, %cst2) {resultSegmentSizes = array<i32: 2, 1>} : (tensor<f64>, tensor<i64>, tensor<i64>) -> (tensor<f64>, tensor<f64>, tensor<f64>)
+
+// -----
+
 module @grad.wrapper {
   func.func public @jit_grad.wrapper(%arg0: tensor<2xf64>) -> tensor<2xf64> attributes {llvm.emit_c_interface} {
     %0 = gradient.grad "auto" @wrapper(%arg0) {diffArgIndices = dense<0> : tensor<1xi64>} : (tensor<2xf64>) -> tensor<2xf64>