Add fix for finite difference jacobian (#1769)

erick-xanadu · web-flow · commit 45f4bd28e56c · 2025-05-28T14:16:33.000-04:00
**Context:** The new bufferization pipeline does not appear to bufferize correctly the `tensor.generate` operation. We see it generate the following code inside `tensor.generate` ```mlir %8 = linalg.index 0 : index %9 = linalg.index 1 : index %10 = memref.load %arg0[%9] : memref<2xf64> %11 = arith.addf %10, %cst : f64 memref.store %11, %arg0[%9] : memref<2xf64> %12 = func.call @circuit_0(%arg0) : (memref<2xf64>) -> memref<2xf64> ``` This code clearly modifies the value stored in memref `%arg0` during each execution of `tensor.generate` (or `linalg.map` after bufferization). Before the new bufferization the correct code was as follows: ```mlir %8 = linalg.index 0 : index %9 = linalg.index 1 : index %10 = memref.load %arg0[%9] : memref<2xf64> %11 = arith.addf %10, %cst : f64 %alloc_1 = memref.alloc() {alignment = 64 : i64} : memref<2xf64> memref.copy %arg0, %alloc_1 : memref<2xf64> to memref<2xf64> memref.store %11, %alloc_1[%9] : memref<2xf64> %12 = func.call @circuit_0(%alloc_1) : (memref<2xf64>) -> memref<2xf64> ``` **Description of the Change:** This commit adds the change that now there is an explicit copy on the argument that is to be added with the finite difference parameter. **Benefits:** Correct code generation. Upstream bug report: llvm/llvm-project#141667 [sc-92105]
diff --git a/doc/releases/changelog-dev.md b/doc/releases/changelog-dev.md
@@ -255,6 +255,7 @@
   [(#1708)](https://github.com/PennyLaneAI/catalyst/pull/1708)
   [(#1740)](https://github.com/PennyLaneAI/catalyst/pull/1740)
   [(#1751)](https://github.com/PennyLaneAI/catalyst/pull/1751)
+  [(#1769)](https://github.com/PennyLaneAI/catalyst/pull/1769)
 
 * Redundant `OptionalAttr` is removed from `adjoint` argument in `QuantumOps.td` TableGen file
   [(#1746)](https://github.com/PennyLaneAI/catalyst/pull/1746)
diff --git a/frontend/test/pytest/test_gradient.py b/frontend/test/pytest/test_gradient.py
@@ -2274,5 +2274,57 @@ def circuit(x, y):
     assert np.allclose(expected, observed)
 
 
+def test_bufferization_inside_tensor_generate(backend):
+    """This tests specifically for an bug already
+    filed in LLVM: https://github.com/llvm/llvm-project/issues/141667
+    The issue is that linalg structured operations cannot be nested
+    but finite differences will generate code like:
+
+    ```
+    %h_val
+    %arg
+    tensor.generate {
+      %shifted = arith.addf %h_val, %arg
+      func.call @func(%shifted)
+    }
+    ```
+
+    which during bufferization will be:
+
+    ```
+    linalg.map {
+      memref.store %arg0, %shifted
+      func.call @func(arg0)
+    }
+    ```
+
+    which means the value of arg0 will be modified
+    after each iteration of linalg.map
+
+    To prevent this, we inserted copies. See
+    https://github.com/PennyLaneAI/catalyst/pull/1769
+    for the implementation.
+    """
+
+    inp = np.array([2.0, 1.0])
+
+    @qjit
+    def workflow(x):
+        @qml.qnode(qml.device(backend, wires=1))
+        def circuit(x):
+            qml.RX(np.pi * x[0], wires=0)
+            qml.RY(x[1], wires=0)
+            return qml.probs()
+
+        g = qml.jacobian(circuit, method="fd", h=0.3)
+        return g(x)
+
+    result = workflow(inp)
+    reference = np.array([[-0.37120096, -0.45467246], [0.37120096, 0.45467246]])
+    assert np.allclose(result, reference)
+    # Also check that the input has not been modified
+    assert np.allclose([2.0, 1.0], inp)
+
+
 if __name__ == "__main__":
     pytest.main(["-x", __file__])
diff --git a/mlir/lib/Gradient/Transforms/GradMethods/FiniteDifference.cpp b/mlir/lib/Gradient/Transforms/GradMethods/FiniteDifference.cpp
@@ -20,6 +20,7 @@
 #include <vector>
 
 #include "mlir/Dialect/Arith/IR/Arith.h"
+#include "mlir/Dialect/Bufferization/IR/Bufferization.h"
 #include "mlir/Dialect/Linalg/IR/Linalg.h"
 #include "mlir/Dialect/Tensor/IR/Tensor.h"
 
@@ -156,12 +157,32 @@ void FiniteDiffLowering::computeFiniteDiff(PatternRewriter &rewriter, Location l
             else {
                 auto bodyBuilder = [&](OpBuilder &rewriter, Location loc,
                                        ValueRange tensorIndices) -> void {
+                    // we need to do this to guarantee a copy here.
+                    // otherwise, each time we enter this scope, we will have a different
+                    // value for diffArgElemen
+                    //
+                    // %memref = bufferization.to_memref %arg0 : memref<2xf64>
+                    // %copy = bufferization.clone %memref : memref<2xf64> to memref<2xf64>
+                    // %tensor = bufferization.to_tensor %copy restrict : memref<2xf64>
+                    auto tensorTy = diffArg.getType();
+                    auto memrefTy = bufferization::getMemRefTypeWithStaticIdentityLayout(
+                        cast<TensorType>(tensorTy));
+                    auto toMemrefOp =
+                        rewriter.create<bufferization::ToMemrefOp>(loc, memrefTy, diffArg);
+
+                    auto cloneOp = rewriter.create<bufferization::CloneOp>(loc, toMemrefOp);
+
+                    auto toTensorOp =
+                        rewriter.create<bufferization::ToTensorOp>(loc, cloneOp, true);
+
+                    auto diffArgCopy = toTensorOp.getResult();
+
                     Value diffArgElem = rewriter.create<tensor::ExtractOp>(
-                        loc, diffArg, tensorIndices.take_back(operandRank));
+                        loc, diffArgCopy, tensorIndices.take_back(operandRank));
                     Value diffArgElemShifted =
                         rewriter.create<arith::AddFOp>(loc, diffArgElem, hForOperand);
                     Value diffArgShifted = rewriter.create<tensor::InsertOp>(
-                        loc, diffArgElemShifted, diffArg, tensorIndices.take_back(operandRank));
+                        loc, diffArgElemShifted, diffArgCopy, tensorIndices.take_back(operandRank));
 
                     std::vector<Value> callArgsForward(callArgs.begin(), callArgs.end());
                     callArgsForward[diffArgIdx] = diffArgShifted;
diff --git a/mlir/test/Gradient/FiniteDifferenceTest.mlir b/mlir/test/Gradient/FiniteDifferenceTest.mlir
@@ -161,9 +161,12 @@ func.func private @funcMultiArg(%arg0: tensor<7xf64>, %arg1: f64) -> tensor<2xf6
     // CHECK:        [[BASE:%.+]] = call @funcMultiArg(%arg0, %arg1)
     // CHECK:        [[DIFF:%.+]] = tensor.generate
     // CHECK-NEXT:   ^bb0(%arg2: index, %arg3: index):
-    // CHECK:            [[VAL:%.+]] = tensor.extract %arg0[%arg3]
+    // CHECK:            [[MEMREF:%.+]] = bufferization.to_memref %arg0
+    // CHECK:            [[COPY:%.+]] = bufferization.clone [[MEMREF]]
+    // CHECK:            [[TENSOR:%.+]] = bufferization.to_tensor [[COPY]]
+    // CHECK:            [[VAL:%.+]] = tensor.extract [[TENSOR]][%arg3]
     // CHECK:            [[ADD:%.+]] = arith.addf [[VAL]]
-    // CHECK:            [[SHIFTED:%.+]] = tensor.insert [[ADD]] into %arg0[%arg3]
+    // CHECK:            [[SHIFTED:%.+]] = tensor.insert [[ADD]] into [[TENSOR]][%arg3]
     // CHECK:            [[EVAL:%.+]] = func.call @funcMultiArg([[SHIFTED]], %arg1)
     // CHECK:            [[SUB:%.+]] = arith.subf [[EVAL]], [[BASE]]
     // CHECK:            [[RES:%.+]] = tensor.extract [[SUB]][%arg2]
@@ -185,9 +188,12 @@ func.func private @funcMultiArg(%arg0: tensor<7xf64>, %arg1: f64) -> tensor<2xf6
     // CHECK:        [[BASE:%.+]] = call @funcMultiArg(%arg0, %arg1)
     // CHECK:        [[DIFF:%.+]] = tensor.generate
     // CHECK-NEXT:   ^bb0(%arg2: index, %arg3: index):
-    // CHECK:            [[VAL:%.+]] = tensor.extract %arg0[%arg3]
+    // CHECK:            [[MEMREF:%.+]] = bufferization.to_memref %arg0
+    // CHECK:            [[COPY:%.+]] = bufferization.clone [[MEMREF]]
+    // CHECK:            [[TENSOR:%.+]] = bufferization.to_tensor [[COPY]]
+    // CHECK:            [[VAL:%.+]] = tensor.extract [[TENSOR]][%arg3]
     // CHECK:            [[ADD:%.+]] = arith.addf [[VAL]]
-    // CHECK:            [[SHIFTED:%.+]] = tensor.insert [[ADD]] into %arg0[%arg3]
+    // CHECK:            [[SHIFTED:%.+]] = tensor.insert [[ADD]] into [[TENSOR]][%arg3]
     // CHECK:            [[EVAL:%.+]] = func.call @funcMultiArg([[SHIFTED]], %arg1)
     // CHECK:            [[SUB:%.+]] = arith.subf [[EVAL]], [[BASE]]
     // CHECK:            [[RES:%.+]] = tensor.extract [[SUB]][%arg2]
@@ -221,18 +227,24 @@ func.func private @funcMultiRes(%arg0: tensor<7xf64>) -> (f64, tensor<2xf64>) at
     // CHECK:        [[BASE:%.+]]:2 = call @funcMultiRes(%arg0)
     // CHECK:        [[DIFF:%.+]] = tensor.generate
     // CHECK-NEXT:   ^bb0(%arg1: index):
-    // CHECK:            [[VAL:%.+]] = tensor.extract %arg0[%arg1]
+    // CHECK:            [[MEMREF:%.+]] = bufferization.to_memref %arg0
+    // CHECK:            [[COPY:%.+]] = bufferization.clone [[MEMREF]]
+    // CHECK:            [[TENSOR:%.+]] = bufferization.to_tensor [[COPY]]
+    // CHECK:            [[VAL:%.+]] = tensor.extract [[TENSOR]][%arg1]
     // CHECK:            [[ADD:%.+]] = arith.addf [[VAL]]
-    // CHECK:            [[SHIFTED:%.+]] = tensor.insert [[ADD]] into %arg0[%arg1]
+    // CHECK:            [[SHIFTED:%.+]] = tensor.insert [[ADD]] into [[TENSOR]][%arg1]
     // CHECK:            [[EVAL:%.+]]:2 = func.call @funcMultiRes([[SHIFTED]])
     // CHECK:            [[RES:%.+]] = arith.subf [[EVAL]]#0, [[BASE]]#0
     // CHECK:            tensor.yield [[RES]]
     // CHECK:        [[R0:%.+]] = arith.divf [[DIFF]]
     // CHECK:        [[DIFF:%.+]] = tensor.generate
     // CHECK-NEXT:   ^bb0(%arg1: index, %arg2: index):
-    // CHECK:            [[VAL:%.+]] = tensor.extract %arg0[%arg2]
+    // CHECK:            [[MEMREF:%.+]] = bufferization.to_memref %arg0
+    // CHECK:            [[COPY:%.+]] = bufferization.clone [[MEMREF]]
+    // CHECK:            [[TENSOR:%.+]] = bufferization.to_tensor [[COPY]]
+    // CHECK:            [[VAL:%.+]] = tensor.extract [[TENSOR]][%arg2]
     // CHECK:            [[ADD:%.+]] = arith.addf [[VAL]]
-    // CHECK:            [[SHIFTED:%.+]] = tensor.insert [[ADD]] into %arg0[%arg2]
+    // CHECK:            [[SHIFTED:%.+]] = tensor.insert [[ADD]] into [[TENSOR]][%arg2]
     // CHECK:            [[EVAL:%.+]]:2 = func.call @funcMultiRes([[SHIFTED]])
     // CHECK:            [[SUB:%.+]] = arith.subf [[EVAL]]#1, [[BASE]]#1
     // CHECK:            [[RES:%.+]] = tensor.extract [[SUB]][%arg1]
@@ -267,9 +279,12 @@ func.func private @funcDynamicTensor(%arg0: tensor<?x3xf64>) -> tensor<2x?xf64>
 
     // CHECK:        [[DIFF:%.+]] = tensor.generate [[DDIM0]], [[DDIM1]]
     // CHECK-NEXT:   ^bb0([[i0:%.+]]: index, [[i1:%.+]]: index, [[i2:%.+]]: index, [[i3:%.+]]: index):
-    // CHECK:            [[VAL:%.+]] = tensor.extract %arg0[[[i2]], [[i3]]]
+    // CHECK:            [[MEMREF:%.+]] = bufferization.to_memref %arg0
+    // CHECK:            [[COPY:%.+]] = bufferization.clone [[MEMREF]]
+    // CHECK:            [[TENSOR:%.+]] = bufferization.to_tensor [[COPY]]
+    // CHECK:            [[VAL:%.+]] = tensor.extract [[TENSOR]][[[i2]], [[i3]]]
     // CHECK:            [[ADD:%.+]] = arith.addf [[VAL]], [[F64]]
-    // CHECK:            [[SHIFTED:%.+]] = tensor.insert [[ADD]] into %arg0[[[i2]], [[i3]]]
+    // CHECK:            [[SHIFTED:%.+]] = tensor.insert [[ADD]] into [[TENSOR]][[[i2]], [[i3]]]
     // CHECK:            [[EVAL:%.+]] = func.call @funcDynamicTensor([[SHIFTED]])
     // CHECK:            [[SUB:%.+]] = arith.subf [[EVAL]], [[BASE]]
     // CHECK:            [[RES:%.+]] = tensor.extract [[SUB]][[[i0]], [[i1]]]