Add ConcatOp lowering, fix lit test

vimarsh6739 · vimarsh6739 · commit bb22ea211c74 · 2025-11-10T13:07:54.000-06:00
Need to find another cafe to work...
diff --git a/src/enzyme_ad/jax/Passes/EnzymeBatchToStableHLOPass.cpp b/src/enzyme_ad/jax/Passes/EnzymeBatchToStableHLOPass.cpp
@@ -9,11 +9,11 @@
 // This file implements a pass to print the MLIR module
 //===----------------------------------------------------------------------===//
 
-#include "src/enzyme_ad/jax/Passes/Passes.h"
-#include "stablehlo/dialect/StablehloOps.h"
 #include "Enzyme/MLIR/Dialect/Dialect.h"
 #include "Enzyme/MLIR/Dialect/Ops.h"
+#include "src/enzyme_ad/jax/Passes/Passes.h"
 #include "src/enzyme_ad/jax/Utils.h"
+#include "stablehlo/dialect/StablehloOps.h"
 
 #include "mlir/IR/PatternMatch.h"
 #include "mlir/Transforms/DialectConversion.h"
@@ -29,16 +29,67 @@ using namespace mlir;
 using namespace mlir::enzyme;
 using namespace enzyme;
 namespace {
+
+struct ExtractOpConversion : public OpConversionPattern<enzyme::ExtractOp> {
+  using OpConversionPattern<enzyme::ExtractOp>::OpConversionPattern;
+
+  LogicalResult
+  matchAndRewrite(enzyme::ExtractOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
+    auto outTy = op.getOutput().getType();
+    // stablehlo always has tensor type
+    auto outRankTy = dyn_cast<RankedTensorType>(outTy);
+    auto rank = outRankTy.getRank();
+    return failure(); 
+    // stablehlo.dynamic_slice op
+  }
+};
+
+struct ConcatOpConversion : public OpConversionPattern<enzyme::ConcatOp> {
+  using OpConversionPattern<enzyme::ConcatOp>::OpConversionPattern;
+
+  LogicalResult
+  matchAndRewrite(enzyme::ConcatOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
+    SmallVector<Value> inputs = op.getInputs();
+    if (inputs.empty())
+      return failure();
+
+    auto firstInTy = inputs.front().getType();
+
+    // stablehlo always has tensor type
+    // reshape each input to 1xinput_rank and concatenate on dim=0
+
+    SmallVector<Value> expandedInputs;
+    for (Value in : inputs) {
+      auto inRankTy = cast<RankedTensorType>(in.getType());
+      auto inShape = inRankTy.getShape();
+      SmallVector<int64_t> newInShape = {1};
+      newInShape.append(inShape.begin(), inShape.end());
+      auto newInTy = inRankTy.clone(newInShape);
+      Value newInput = rewriter.create<stablehlo::ReshapeOp>(
+          op->getLoc(), newInTy, in, op->getAttrs());
+      expandedInputs.push_back(newInput);
+    }
+
+    // concatenate on dim=0
+    rewriter.replaceOpWithNewOp<stablehlo::ConcatenateOp>(
+        op, op->getResultTypes(), expandedInputs, /*dim=*/0);
+    return success();
+  }
+};
 struct EnzymeBatchToStableHLOPass
     : public enzyme::impl::EnzymeBatchToStableHLOPassBase<
           EnzymeBatchToStableHLOPass> {
   void runOnOperation() override {
     MLIRContext *context = &getContext();
     RewritePatternSet patterns(context);
+    patterns.add<ConcatOpConversion, ExtractOpConversion>(context);
+
     ConversionTarget target(*context);
     target.addLegalDialect<stablehlo::StablehloDialect>();
     target.addLegalDialect<enzyme::EnzymeDialect>();
-    target.addIllegalOp<enzyme::ConcatOp, enzyme::ExtractOp>();
+    target.addIllegalOp<enzyme::ConcatOp>();
 
     if (failed(applyPartialConversion(getOperation(), target,
                                       std::move(patterns)))) {
diff --git a/test/lit_tests/adbatching/bwd_batch.mlir b/test/lit_tests/adbatching/bwd_batch.mlir
@@ -0,0 +1,73 @@
+// RUN: enzymexlamlir-opt --split-input-file --enzyme-diff-batch %s | FileCheck %s
+// RUN: enzymexlamlir-opt --split-input-file --enzyme-diff-batch --enzyme-batch-to-tensor %s | FileCheck %s --check-prefix=LEGAL
+
+//1. Scalar test
+module {
+  func.func @square(%x : tensor<f64>) -> tensor<f64>{
+    %y = stablehlo.multiply %x, %x : tensor<f64>
+    return %y : tensor<f64>
+  }
+  func.func @test1(%x : tensor<f64>, %dr1 : tensor<f64>, %dr2 : tensor<f64>) -> (tensor<f64>,tensor<f64>) {
+    %r, %dx1 = enzyme.autodiff @square(%x, %dr1) { activity=[#enzyme<activity enzyme_active>], ret_activity=[#enzyme<activity enzyme_active>] } : (tensor<f64>, tensor<f64>) -> (tensor<f64>,tensor<f64>) 
+    %r2, %dx2 = enzyme.autodiff @square(%x, %dr2) { activity=[#enzyme<activity enzyme_active>], ret_activity=[#enzyme<activity enzyme_active>] } : (tensor<f64>, tensor<f64>) -> (tensor<f64>,tensor<f64>) 
+    return %dx1,%dx2 : tensor<f64>, tensor<f64>
+  }
+}
+
+// CHECK-LABEL: func.func @test1
+// CHECK-SAME: (%[[PRIMAL:.*]]: tensor<f64>, %[[DIFF1:.*]]: tensor<f64>, %[[DIFF2:.*]]: tensor<f64>) -> (tensor<f64>, tensor<f64>)
+// CHECK:         %[[CONCAT:.*]] = enzyme.concat(%[[DIFF1]], %[[DIFF2]]) : (tensor<f64>, tensor<f64>) -> tensor<2xf64>
+// CHECK:         %[[BATCHED_RES_BASE:.*]]:2 = enzyme.autodiff @square(%[[PRIMAL]], %[[CONCAT]]) {{.*}} width = 2 {{.*}} : (tensor<f64>, tensor<2xf64>) -> (tensor<f64>, tensor<2xf64>)
+// CHECK:         %[[C0:.*]] = arith.constant 0 : index
+// CHECK-NEXT:    %[[RES0:.*]] = enzyme.extract %[[BATCHED_RES_BASE]]#1[%[[C0]]] : (tensor<2xf64>) -> tensor<f64>
+// CHECK-NEXT:    %[[C1:.*]] = arith.constant 1 : index
+// CHECK-NEXT:    %[[RES1:.*]] = enzyme.extract %[[BATCHED_RES_BASE]]#1[%[[C1]]] : (tensor<2xf64>) -> tensor<f64>
+// CHECK-NEXT:    return %[[RES0]], %[[RES1]]
+
+// LEGAL-LABEL: func.func @test1
+// LEGAL-SAME: (%[[PRIMAL:.*]]: f64, %[[DIFF1:.*]]: f64, %[[DIFF2:.*]]: f64) -> (f64, f64)
+// LEGAL:         %[[CONCAT:.*]] = tensor.from_elements %[[DIFF1]], %[[DIFF2]] : tensor<2xf64>
+// LEGAL:         %[[BATCHED_RES_BASE:.*]]:2 = enzyme.autodiff @square(%[[PRIMAL]], %[[CONCAT]]) {{.*}} width = 2 {{.*}} : (f64, tensor<2xf64>) -> (f64, tensor<2xf64>)
+// LEGAL:         %[[C0:.*]] = arith.constant 0 : index
+// LEGAL-NEXT:    %[[RES0:.*]] = tensor.extract %[[BATCHED_RES_BASE]]#1[%[[C0]]] : tensor<2xf64>
+// LEGAL-NEXT:    %[[C1:.*]] = arith.constant 1 : index
+// LEGAL-NEXT:    %[[RES1:.*]] = tensor.extract %[[BATCHED_RES_BASE]]#1[%[[C1]]] : tensor<2xf64>
+// LEGAL-NEXT:    return %[[RES0]], %[[RES1]]
+
+// -----
+
+//2. Tensor test
+module {
+  func.func @square(%x : tensor<10xf64>) -> tensor<10xf64>{
+    %y = stablehlo.multiply %x, %x : tensor<10xf64>
+    return %y : tensor<10xf64>
+  }
+  func.func @test2(%x : tensor<10xf64>, %dr1 : tensor<10xf64>, %dr2 : tensor<10xf64>) -> (tensor<10xf64>,tensor<10xf64>) {
+    %r, %dx1 = enzyme.autodiff @square(%x, %dr1) { activity=[#enzyme<activity enzyme_active>], ret_activity=[#enzyme<activity enzyme_active>]} : (tensor<10xf64>, tensor<10xf64>) -> (tensor<10xf64>, tensor<10xf64>)
+    %r2, %dx2 = enzyme.autodiff @square(%x, %dr2) { activity=[#enzyme<activity enzyme_active>], ret_activity=[#enzyme<activity enzyme_active>]} : (tensor<10xf64>, tensor<10xf64>) -> (tensor<10xf64>, tensor<10xf64>)
+    return %dx1,%dx2 : tensor<10xf64>,tensor<10xf64>
+  }
+}
+
+
+// CHECK-LABEL: func.func @test2
+// CHECK-SAME: (%[[PRIMAL:.*]]: tensor<10xf64>, %[[DIFF1:.*]]: tensor<10xf64>, %[[DIFF2:.*]]: tensor<10xf64>) -> (tensor<10xf64>, tensor<10xf64>)
+// CHECK:         %[[CONCAT:.*]] = enzyme.concat(%[[DIFF1]], %[[DIFF2]]) : (tensor<10xf64>, tensor<10xf64>) -> tensor<2x10xf64>
+// CHECK:         %[[BATCHED_RES_BASE:.*]]:2 = enzyme.autodiff @square(%[[PRIMAL]], %[[CONCAT]]) {{.*}} width = 2 {{.*}} : (tensor<10xf64>, tensor<2x10xf64>) -> (tensor<10xf64>, tensor<2x10xf64>)
+// CHECK:         %[[C0:.*]] = arith.constant 0 : index
+// CHECK-NEXT:    %[[RES0:.*]] = enzyme.extract %[[BATCHED_RES_BASE]]#1[%[[C0]]] : (tensor<2x10xf64>) -> tensor<10xf64>
+// CHECK-NEXT:    %[[C1:.*]] = arith.constant 1 : index
+// CHECK-NEXT:    %[[RES1:.*]] = enzyme.extract %[[BATCHED_RES_BASE]]#1[%[[C1]]] : (tensor<2x10xf64>) -> tensor<10xf64>
+// CHECK-NEXT:    return %[[RES0]], %[[RES1]]
+
+// LEGAL-LABEL: func.func @test2
+// LEGAL-SAME: (%[[PRIMAL:.*]]: tensor<10xf64>, %[[DIFF1:.*]]: tensor<10xf64>, %[[DIFF2:.*]]: tensor<10xf64>) -> (tensor<10xf64>, tensor<10xf64>)
+// LEGAL:         %[[EDIFF1:.*]] = tensor.expand_shape %[[DIFF1]] {{\[\[0, 1\]\]}} output_shape [1, 10] : tensor<10xf64> into tensor<1x10xf64>
+// LEGAL:         %[[EDIFF2:.*]] = tensor.expand_shape %[[DIFF2]] {{\[\[0, 1\]\]}} output_shape [1, 10] : tensor<10xf64> into tensor<1x10xf64>
+// LEGAL:         %[[CONCAT:.*]] = tensor.concat dim(0) %[[EDIFF1]], %[[EDIFF2]] : (tensor<1x10xf64>, tensor<1x10xf64>) -> tensor<2x10xf64>
+// LEGAL:         %[[BATCHED_RES_BASE:.*]]:2 = enzyme.autodiff @square(%[[PRIMAL]], %[[CONCAT]]) {{.*}} width = 2 {{.*}} : (tensor<10xf64>, tensor<2x10xf64>) -> (tensor<10xf64>, tensor<2x10xf64>)
+// LEGAL:         %[[C0:.*]] = arith.constant 0 : index
+// LEGAL-NEXT:    %[[RES0:.*]] = tensor.extract_slice %[[BATCHED_RES_BASE]]#1[%[[C0]], 0] [1, 10] [1, 1] : tensor<2x10xf64> to tensor<10xf64>
+// LEGAL-NEXT:    %[[C1:.*]] = arith.constant 1 : index
+// LEGAL-NEXT:    %[[RES1:.*]] = tensor.extract_slice %[[BATCHED_RES_BASE]]#1[%[[C1]], 0] [1, 10] [1, 1] : tensor<2x10xf64> to tensor<10xf64>
+// LEGAL-NEXT:    return %[[RES0]], %[[RES1]]
diff --git a/test/lit_tests/adbatching/fwd_batch.mlir b/test/lit_tests/adbatching/fwd_batch.mlir
@@ -0,0 +1,71 @@
+// RUN: enzymexlamlir-opt --split-input-file --enzyme-diff-batch %s | FileCheck %s
+// RUN: enzymexlamlir-opt --split-input-file --enzyme-diff-batch --enzyme-batch-to-tensor %s | FileCheck %s --check-prefix=LEGAL
+// 1. Scalar test
+module {
+  func.func @square(%x : tensor<f64>) -> tensor<f64>{
+    %y = stablehlo.multiply %x, %x : tensor<f64>
+    return %y : tensor<f64>
+  }
+  func.func @test1(%x : tensor<f64>, %dx1 : tensor<f64>, %dx2 : tensor<f64>) -> (tensor<f64>,tensor<f64>) {
+    %r1 = enzyme.fwddiff @square(%x, %dx1) { activity=[#enzyme<activity enzyme_dup>], ret_activity=[#enzyme<activity enzyme_dupnoneed>]} : (tensor<f64>, tensor<f64>) -> (tensor<f64>)
+    %r2 = enzyme.fwddiff @square(%x, %dx2) { activity=[#enzyme<activity enzyme_dup>], ret_activity=[#enzyme<activity enzyme_dupnoneed>] } : (tensor<f64>, tensor<f64>) -> (tensor<f64>)
+    return %r1,%r2 : tensor<f64>, tensor<f64>
+  }
+}
+
+// CHECK-LABEL: func.func @test1
+// CHECK-SAME: (%[[PRIMAL:.*]]: tensor<f64>, %[[DIFF1:.*]]: tensor<f64>, %[[DIFF2:.*]]: tensor<f64>) -> (tensor<f64>, tensor<f64>)
+// CHECK:         %[[CONCAT:.*]] = enzyme.concat(%[[DIFF1]], %[[DIFF2]]) : (tensor<f64>, tensor<f64>) -> tensor<2xf64>
+// CHECK:         %[[BATCHED_RES:.*]] = enzyme.fwddiff @square(%[[PRIMAL]], %[[CONCAT]]) {{.*}} width = 2 {{.*}} : (tensor<f64>, tensor<2xf64>) -> tensor<2xf64>
+// CHECK:         %[[C0:.*]] = arith.constant 0 : index
+// CHECK-NEXT:    %[[RES0:.*]] = enzyme.extract %[[BATCHED_RES]][%[[C0]]] : (tensor<2xf64>) -> tensor<f64>
+// CHECK-NEXT:    %[[C1:.*]] = arith.constant 1 : index
+// CHECK-NEXT:    %[[RES1:.*]] = enzyme.extract %[[BATCHED_RES]][%[[C1]]] : (tensor<2xf64>) -> tensor<f64>
+// CHECK-NEXT:    return %[[RES0]], %[[RES1]]
+
+// LEGAL-LABEL: func.func @test1
+// LEGAL-SAME: (%[[PRIMAL:.*]]: tensor<f64>, %[[DIFF1:.*]]: tensor<f64>, %[[DIFF2:.*]]: tensor<f64>) -> (tensor<f64>, tensor<f64>)
+// LEGAL:         %[[CONCAT:.*]] = tensor.from_elements %[[DIFF1]], %[[DIFF2]] : tensor<2xf64>
+// LEGAL:         %[[BATCHED_RES:.*]] = enzyme.fwddiff @square(%[[PRIMAL]], %[[CONCAT]]) {{.*}} width = 2 {{.*}} : (tensor<f64>, tensor<2xf64>) -> tensor<2xf64>
+// LEGAL:         %[[C0:.*]] = arith.constant 0 : index
+// LEGAL-NEXT:    %[[RES0:.*]] = tensor.extract %[[BATCHED_RES]][%[[C0]]] : tensor<2xf64>
+// LEGAL-NEXT:    %[[C1:.*]] = arith.constant 1 : index
+// LEGAL-NEXT:    %[[RES1:.*]] = tensor.extract %[[BATCHED_RES]][%[[C1]]] : tensor<2xf64>
+// LEGAL-NEXT:    return %[[RES0]], %[[RES1]]
+
+// -----
+
+// 2. Tensor test
+module {
+  func.func @square(%x : tensor<10xf64>) -> tensor<10xf64>{
+    %y = stablehlo.multiply %x, %x : tensor<10xf64>
+    return %y : tensor<10xf64>
+  }
+  func.func @test2(%x : tensor<10xf64>, %dx : tensor<10xf64>, %dx2 : tensor<10xf64>) -> (tensor<10xf64>,tensor<10xf64>) {
+    %r = enzyme.fwddiff @square(%x, %dx) { activity=[#enzyme<activity enzyme_dup>], ret_activity=[#enzyme<activity enzyme_dupnoneed>]} : (tensor<10xf64>, tensor<10xf64>) -> (tensor<10xf64>)
+    %r2 = enzyme.fwddiff @square(%x, %dx2) { activity=[#enzyme<activity enzyme_dup>], ret_activity=[#enzyme<activity enzyme_dupnoneed>] } : (tensor<10xf64>, tensor<10xf64>) -> (tensor<10xf64>)
+    return %r,%r2 : tensor<10xf64>,tensor<10xf64>
+  }
+}
+
+// CHECK-LABEL: func.func @test2
+// CHECK-SAME: (%[[PRIMAL:.*]]: tensor<10xf64>, %[[DIFF1:.*]]: tensor<10xf64>, %[[DIFF2:.*]]: tensor<10xf64>) -> (tensor<10xf64>, tensor<10xf64>)
+// CHECK:         %[[CONCAT:.*]] = enzyme.concat(%[[DIFF1]], %[[DIFF2]]) : (tensor<10xf64>, tensor<10xf64>) -> tensor<2x10xf64>
+// CHECK:         %[[BATCHED_RES:.*]] = enzyme.fwddiff @square(%[[PRIMAL]], %[[CONCAT]]) {{.*}} width = 2 {{.*}} : (tensor<10xf64>, tensor<2x10xf64>) -> tensor<2x10xf64>
+// CHECK:         %[[C0:.*]] = arith.constant 0 : index
+// CHECK-NEXT:    %[[RES0:.*]] = enzyme.extract %[[BATCHED_RES]][%[[C0]]] : (tensor<2x10xf64>) -> tensor<10xf64>
+// CHECK-NEXT:    %[[C1:.*]] = arith.constant 1 : index
+// CHECK-NEXT:    %[[RES1:.*]] = enzyme.extract %[[BATCHED_RES]][%[[C1]]] : (tensor<2x10xf64>) -> tensor<10xf64>
+// CHECK-NEXT:    return %[[RES0]], %[[RES1]]
+
+// LEGAL-LABEL: func.func @test2
+// LEGAL-SAME: (%[[PRIMAL:.*]]: tensor<10xf64>, %[[DIFF1:.*]]: tensor<10xf64>, %[[DIFF2:.*]]: tensor<10xf64>) -> (tensor<10xf64>, tensor<10xf64>)
+// LEGAL:         %[[EDIFF1:.*]] = tensor.expand_shape %[[DIFF1]] {{\[\[0, 1\]\]}} output_shape [1, 10] : tensor<10xf64> into tensor<1x10xf64>
+// LEGAL:         %[[EDIFF2:.*]] = tensor.expand_shape %[[DIFF2]] {{\[\[0, 1\]\]}} output_shape [1, 10] : tensor<10xf64> into tensor<1x10xf64>
+// LEGAL:         %[[CONCAT:.*]] = tensor.concat dim(0) %[[EDIFF1]], %[[EDIFF2]] : (tensor<1x10xf64>, tensor<1x10xf64>) -> tensor<2x10xf64>
+// LEGAL:         %[[BATCHED_RES:.*]] = enzyme.fwddiff @square(%[[PRIMAL]], %[[CONCAT]]) {{.*}} width = 2 {{.*}} : (tensor<10xf64>, tensor<2x10xf64>) -> tensor<2x10xf64>
+// LEGAL:         %[[C0:.*]] = arith.constant 0 : index
+// LEGAL-NEXT:    %[[RES0:.*]] = tensor.extract_slice %[[BATCHED_RES]][%[[C0]], 0] [1, 10] [1, 1] : tensor<2x10xf64> to tensor<10xf64>
+// LEGAL-NEXT:    %[[C1:.*]] = arith.constant 1 : index
+// LEGAL-NEXT:    %[[RES1:.*]] = tensor.extract_slice %[[BATCHED_RES]][%[[C1]], 0] [1, 10] [1, 1] : tensor<2x10xf64> to tensor<10xf64>
+// LEGAL-NEXT:    return %[[RES0]], %[[RES1]]