Finalize changes

vimarsh6739 · vimarsh6739 · commit a9db3a28f313 · 2025-11-10T15:53:08.000-06:00
diff --git a/src/enzyme_ad/jax/Passes/EnzymeBatchToStableHLOPass.cpp b/src/enzyme_ad/jax/Passes/EnzymeBatchToStableHLOPass.cpp
@@ -47,26 +47,23 @@ struct ExtractOpConversion : public OpConversionPattern<enzyme::ExtractOp> {
     if (ndims < 1)
       return failure();
 
-    // dynamic_slice followed by reshape
-    auto i64Ty = IntegerType::get(rewriter.getContext(), 64);
-    auto tensor0i64Ty = RankedTensorType::get({}, i64Ty);
-    auto zero = rewriter.create<stablehlo::ConstantOp>(
-        op.getLoc(), rewriter.getZeroAttr(tensor0i64Ty));
-
-    SmallVector<Value> dynamicSliceStartSlices(ndims, zero);
-    dynamicSliceStartSlices[0] = op.getIndex(); // assume its legal for no
-
-    SmallVector<int64_t> localRetShape = {1};
-    localRetShape.append(outRankTy.getShape().begin(),
+    // static slice
+    SmallVector<int64_t> start_indices;
+    start_indices.push_back(op.getIndex());
+    for (int i = 1; i < ndims; ++i) {
+      start_indices.push_back(0);
+    }
+    SmallVector<int64_t> limit_indices;
+    limit_indices.push_back(op.getIndex() + 1);
+    limit_indices.append(outRankTy.getShape().begin(),
                          outRankTy.getShape().end());
-    ;
-    auto slicedOut = rewriter.create<stablehlo::DynamicSliceOp>(
-        op->getLoc(), op.getInput(), dynamicSliceStartSlices, localRetShape);
+    SmallVector<int64_t> strides(ndims, 1);
 
+    Value slicedOut =
+        stablehlo::SliceOp::create(rewriter, op->getLoc(), op.getInput(),
+                                   start_indices, limit_indices, strides);
     // reshape slicedOut to our final Op
-    rewriter.replaceOpWithNewOp<stablehlo::ReshapeOp>(op, op->getLoc(), outTy,
-                                                      slicedOut);
-
+    rewriter.replaceOpWithNewOp<stablehlo::ReshapeOp>(op, outTy, slicedOut);
     return success();
   }
 };
@@ -81,8 +78,6 @@ struct ConcatOpConversion : public OpConversionPattern<enzyme::ConcatOp> {
     if (inputs.empty())
       return failure();
 
-    auto firstInTy = inputs.front().getType();
-
     // stablehlo always has tensor type
     // reshape each input to 1xinput_rank and concatenate on dim=0
 
@@ -94,7 +89,7 @@ struct ConcatOpConversion : public OpConversionPattern<enzyme::ConcatOp> {
       newInShape.append(inShape.begin(), inShape.end());
       auto newInTy = inRankTy.clone(newInShape);
       Value newInput =
-          rewriter.create<stablehlo::ReshapeOp>(op->getLoc(), newInTy, in);
+          stablehlo::ReshapeOp::create(rewriter, op->getLoc(), newInTy, in);
       expandedInputs.push_back(newInput);
     }
 
@@ -116,7 +111,7 @@ struct EnzymeBatchToStableHLOPass
     ConversionTarget target(*context);
     target.addLegalDialect<stablehlo::StablehloDialect>();
     target.addLegalDialect<enzyme::EnzymeDialect>();
-    target.addIllegalOp<enzyme::ConcatOp>();
+    target.addIllegalOp<enzyme::ConcatOp, enzyme::ExtractOp>();
 
     if (failed(applyPartialConversion(getOperation(), target,
                                       std::move(patterns)))) {
diff --git a/test/lit_tests/OptimizeAD/bwd_batch.mlir b/test/lit_tests/OptimizeAD/bwd_batch.mlir
@@ -18,22 +18,20 @@ module {
 // CHECK-SAME: (%[[PRIMAL:.*]]: tensor<f64>, %[[DIFF1:.*]]: tensor<f64>, %[[DIFF2:.*]]: tensor<f64>) -> (tensor<f64>, tensor<f64>)
 // CHECK:         %[[CONCAT:.*]] = enzyme.concat(%[[DIFF1]], %[[DIFF2]]) : (tensor<f64>, tensor<f64>) -> tensor<2xf64>
 // CHECK:         %[[BATCHED_RES_BASE:.*]]:2 = enzyme.autodiff @square(%[[PRIMAL]], %[[CONCAT]]) {{.*}} width = 2 {{.*}} : (tensor<f64>, tensor<2xf64>) -> (tensor<f64>, tensor<2xf64>)
-// CHECK:         %[[C0:.*]] = arith.constant 0 : index
-// CHECK-NEXT:    %[[RES0:.*]] = enzyme.extract %[[BATCHED_RES_BASE]]#1[%[[C0]]] : (tensor<2xf64>) -> tensor<f64>
-// CHECK-NEXT:    %[[C1:.*]] = arith.constant 1 : index
-// CHECK-NEXT:    %[[RES1:.*]] = enzyme.extract %[[BATCHED_RES_BASE]]#1[%[[C1]]] : (tensor<2xf64>) -> tensor<f64>
+// CHECK:         %[[RES0:.*]] = enzyme.extract %[[BATCHED_RES_BASE]]#1[0] : (tensor<2xf64>) -> tensor<f64>
+// CHECK-NEXT:    %[[RES1:.*]] = enzyme.extract %[[BATCHED_RES_BASE]]#1[1] : (tensor<2xf64>) -> tensor<f64>
 // CHECK-NEXT:    return %[[RES0]], %[[RES1]]
 
 // LEGAL-LABEL: func.func @test1
-// LEGAL-SAME: (%[[PRIMAL:.*]]: f64, %[[DIFF1:.*]]: f64, %[[DIFF2:.*]]: f64) -> (f64, f64)
+// LEGAL-SAME: (%[[PRIMAL:.*]]: tensor<f64>, %[[DIFF1:.*]]: tensor<f64>, %[[DIFF2:.*]]: tensor<f64>) -> (tensor<f64>, tensor<f64>)
 // LEGAL:         %[[EDIFF1:.*]] = stablehlo.reshape %[[DIFF1]] : (tensor<f64>) -> tensor<1xf64>
 // LEGAL:         %[[EDIFF2:.*]] = stablehlo.reshape %[[DIFF2]] : (tensor<f64>) -> tensor<1xf64>
 // LEGAL:         %[[CONCAT:.*]] = stablehlo.concatenate %[[EDIFF1]], %[[EDIFF2]], dim = 0 : (tensor<1xf64>, tensor<1xf64>) -> tensor<2xf64>
-// LEGAL:         %[[BATCHED_RES_BASE:.*]]:2 = enzyme.autodiff @square(%[[PRIMAL]], %[[CONCAT]]) {{.*}} width = 2 {{.*}} : (f64, tensor<2xf64>) -> (f64, tensor<2xf64>)
-// LEGAL:         %[[C0:.*]] = arith.constant 0 : index
-// LEGAL-NEXT:    %[[RES0:.*]] = tensor.extract %[[BATCHED_RES_BASE]]#1[%[[C0]]] : tensor<2xf64>
-// LEGAL-NEXT:    %[[C1:.*]] = arith.constant 1 : index
-// LEGAL-NEXT:    %[[RES1:.*]] = tensor.extract %[[BATCHED_RES_BASE]]#1[%[[C1]]] : tensor<2xf64>
+// LEGAL:         %[[BATCHED_RES_BASE:.*]]:2 = enzyme.autodiff @square(%[[PRIMAL]], %[[CONCAT]]) {{.*}} width = 2 {{.*}} : (tensor<f64>, tensor<2xf64>) -> (tensor<f64>, tensor<2xf64>)
+// LEGAL:         %[[R0:.*]] = stablehlo.slice %[[BATCHED_RES_BASE]]#1 [0:1] : (tensor<2xf64>) -> tensor<1xf64>
+// LEGAL-NEXT:    %[[RES0:.*]] = stablehlo.reshape %[[R0]] : (tensor<1xf64>) -> tensor<f64>
+// LEGAL-NEXT:    %[[R1:.*]] = stablehlo.slice %[[BATCHED_RES_BASE]]#1 [1:2] : (tensor<2xf64>) -> tensor<1xf64>
+// LEGAL-NEXT:    %[[RES1:.*]] = stablehlo.reshape %[[R1]] : (tensor<1xf64>) -> tensor<f64>
 // LEGAL-NEXT:    return %[[RES0]], %[[RES1]]
 
 // -----
@@ -56,10 +54,8 @@ module {
 // CHECK-SAME: (%[[PRIMAL:.*]]: tensor<10xf64>, %[[DIFF1:.*]]: tensor<10xf64>, %[[DIFF2:.*]]: tensor<10xf64>) -> (tensor<10xf64>, tensor<10xf64>)
 // CHECK:         %[[CONCAT:.*]] = enzyme.concat(%[[DIFF1]], %[[DIFF2]]) : (tensor<10xf64>, tensor<10xf64>) -> tensor<2x10xf64>
 // CHECK:         %[[BATCHED_RES_BASE:.*]]:2 = enzyme.autodiff @square(%[[PRIMAL]], %[[CONCAT]]) {{.*}} width = 2 {{.*}} : (tensor<10xf64>, tensor<2x10xf64>) -> (tensor<10xf64>, tensor<2x10xf64>)
-// CHECK:         %[[C0:.*]] = arith.constant 0 : index
-// CHECK-NEXT:    %[[RES0:.*]] = enzyme.extract %[[BATCHED_RES_BASE]]#1[%[[C0]]] : (tensor<2x10xf64>) -> tensor<10xf64>
-// CHECK-NEXT:    %[[C1:.*]] = arith.constant 1 : index
-// CHECK-NEXT:    %[[RES1:.*]] = enzyme.extract %[[BATCHED_RES_BASE]]#1[%[[C1]]] : (tensor<2x10xf64>) -> tensor<10xf64>
+// CHECK-NEXT:    %[[RES0:.*]] = enzyme.extract %[[BATCHED_RES_BASE]]#1[0] : (tensor<2x10xf64>) -> tensor<10xf64>
+// CHECK-NEXT:    %[[RES1:.*]] = enzyme.extract %[[BATCHED_RES_BASE]]#1[1] : (tensor<2x10xf64>) -> tensor<10xf64>
 // CHECK-NEXT:    return %[[RES0]], %[[RES1]]
 
 // LEGAL-LABEL: func.func @test2
@@ -68,8 +64,8 @@ module {
 // LEGAL:         %[[EDIFF2:.*]] = stablehlo.reshape %[[DIFF2]] : (tensor<10xf64>) -> tensor<1x10xf64>
 // LEGAL:         %[[CONCAT:.*]] = stablehlo.concatenate %[[EDIFF1]], %[[EDIFF2]], dim = 0 : (tensor<1x10xf64>, tensor<1x10xf64>) -> tensor<2x10xf64>
 // LEGAL:         %[[BATCHED_RES_BASE:.*]]:2 = enzyme.autodiff @square(%[[PRIMAL]], %[[CONCAT]]) {{.*}} width = 2 {{.*}} : (tensor<10xf64>, tensor<2x10xf64>) -> (tensor<10xf64>, tensor<2x10xf64>)
-// LEGAL:         %[[C0:.*]] = arith.constant 0 : index
-// LEGAL-NEXT:    %[[RES0:.*]] = tensor.extract_slice %[[BATCHED_RES_BASE]]#1[%[[C0]], 0] [1, 10] [1, 1] : tensor<2x10xf64> to tensor<10xf64>
-// LEGAL-NEXT:    %[[C1:.*]] = arith.constant 1 : index
-// LEGAL-NEXT:    %[[RES1:.*]] = tensor.extract_slice %[[BATCHED_RES_BASE]]#1[%[[C1]], 0] [1, 10] [1, 1] : tensor<2x10xf64> to tensor<10xf64>
+// LEGAL:         %[[R0:.*]] = stablehlo.slice %[[BATCHED_RES_BASE]]#1 [0:1, 0:10] : (tensor<2x10xf64>) -> tensor<1x10xf64>
+// LEGAL-NEXT:    %[[RES0:.*]] = stablehlo.reshape %[[R0]] : (tensor<1x10xf64>) -> tensor<10xf64>
+// LEGAL-NEXT:    %[[R1:.*]] = stablehlo.slice %[[BATCHED_RES_BASE]]#1 [1:2, 0:10] : (tensor<2x10xf64>) -> tensor<1x10xf64>
+// LEGAL-NEXT:    %[[RES1:.*]] = stablehlo.reshape %[[R1]] : (tensor<1x10xf64>) -> tensor<10xf64>
 // LEGAL-NEXT:    return %[[RES0]], %[[RES1]]
diff --git a/test/lit_tests/OptimizeAD/fwd_batch.mlir b/test/lit_tests/OptimizeAD/fwd_batch.mlir
@@ -17,10 +17,8 @@ module {
 // CHECK-SAME: (%[[PRIMAL:.*]]: tensor<f64>, %[[DIFF1:.*]]: tensor<f64>, %[[DIFF2:.*]]: tensor<f64>) -> (tensor<f64>, tensor<f64>)
 // CHECK:         %[[CONCAT:.*]] = enzyme.concat(%[[DIFF1]], %[[DIFF2]]) : (tensor<f64>, tensor<f64>) -> tensor<2xf64>
 // CHECK:         %[[BATCHED_RES:.*]] = enzyme.fwddiff @square(%[[PRIMAL]], %[[CONCAT]]) {{.*}} width = 2 {{.*}} : (tensor<f64>, tensor<2xf64>) -> tensor<2xf64>
-// CHECK:         %[[C0:.*]] = arith.constant 0 : index
-// CHECK-NEXT:    %[[RES0:.*]] = enzyme.extract %[[BATCHED_RES]][%[[C0]]] : (tensor<2xf64>) -> tensor<f64>
-// CHECK-NEXT:    %[[C1:.*]] = arith.constant 1 : index
-// CHECK-NEXT:    %[[RES1:.*]] = enzyme.extract %[[BATCHED_RES]][%[[C1]]] : (tensor<2xf64>) -> tensor<f64>
+// CHECK:         %[[RES0:.*]] = enzyme.extract %[[BATCHED_RES]][0] : (tensor<2xf64>) -> tensor<f64>
+// CHECK-NEXT:    %[[RES1:.*]] = enzyme.extract %[[BATCHED_RES]][1] : (tensor<2xf64>) -> tensor<f64>
 // CHECK-NEXT:    return %[[RES0]], %[[RES1]]
 
 // LEGAL-LABEL: func.func @test1
@@ -29,10 +27,10 @@ module {
 // LEGAL:         %[[EDIFF2:.*]] = stablehlo.reshape %[[DIFF2]] : (tensor<f64>) -> tensor<1xf64>
 // LEGAL:         %[[CONCAT:.*]] = stablehlo.concatenate %[[EDIFF1]], %[[EDIFF2]], dim = 0 : (tensor<1xf64>, tensor<1xf64>) -> tensor<2xf64>
 // LEGAL:         %[[BATCHED_RES:.*]] = enzyme.fwddiff @square(%[[PRIMAL]], %[[CONCAT]]) {{.*}} width = 2 {{.*}} : (tensor<f64>, tensor<2xf64>) -> tensor<2xf64>
-// LEGAL:         %[[C0:.*]] = arith.constant 0 : index
-// LEGAL-NEXT:    %[[RES0:.*]] = tensor.extract %[[BATCHED_RES]][%[[C0]]] : tensor<2xf64>
-// LEGAL-NEXT:    %[[C1:.*]] = arith.constant 1 : index
-// LEGAL-NEXT:    %[[RES1:.*]] = tensor.extract %[[BATCHED_RES]][%[[C1]]] : tensor<2xf64>
+// LEGAL:         %[[R0:.*]] = stablehlo.slice %[[BATCHED_RES]] [0:1] : (tensor<2xf64>) -> tensor<1xf64>
+// LEGAL-NEXT:    %[[RES0:.*]] = stablehlo.reshape %[[R0]] : (tensor<1xf64>) -> tensor<f64>
+// LEGAL-NEXT:    %[[R1:.*]] = stablehlo.slice %[[BATCHED_RES]] [1:2] : (tensor<2xf64>) -> tensor<1xf64>
+// LEGAL-NEXT:    %[[RES1:.*]] = stablehlo.reshape %[[R1]] : (tensor<1xf64>) -> tensor<f64>
 // LEGAL-NEXT:    return %[[RES0]], %[[RES1]]
 
 // -----
@@ -50,14 +48,13 @@ module {
   }
 }
 
+
 // CHECK-LABEL: func.func @test2
 // CHECK-SAME: (%[[PRIMAL:.*]]: tensor<10xf64>, %[[DIFF1:.*]]: tensor<10xf64>, %[[DIFF2:.*]]: tensor<10xf64>) -> (tensor<10xf64>, tensor<10xf64>)
 // CHECK:         %[[CONCAT:.*]] = enzyme.concat(%[[DIFF1]], %[[DIFF2]]) : (tensor<10xf64>, tensor<10xf64>) -> tensor<2x10xf64>
 // CHECK:         %[[BATCHED_RES:.*]] = enzyme.fwddiff @square(%[[PRIMAL]], %[[CONCAT]]) {{.*}} width = 2 {{.*}} : (tensor<10xf64>, tensor<2x10xf64>) -> tensor<2x10xf64>
-// CHECK:         %[[C0:.*]] = arith.constant 0 : index
-// CHECK-NEXT:    %[[RES0:.*]] = enzyme.extract %[[BATCHED_RES]][%[[C0]]] : (tensor<2x10xf64>) -> tensor<10xf64>
-// CHECK-NEXT:    %[[C1:.*]] = arith.constant 1 : index
-// CHECK-NEXT:    %[[RES1:.*]] = enzyme.extract %[[BATCHED_RES]][%[[C1]]] : (tensor<2x10xf64>) -> tensor<10xf64>
+// CHECK:         %[[RES0:.*]] = enzyme.extract %[[BATCHED_RES]][0] : (tensor<2x10xf64>) -> tensor<10xf64>
+// CHECK-NEXT:    %[[RES1:.*]] = enzyme.extract %[[BATCHED_RES]][1] : (tensor<2x10xf64>) -> tensor<10xf64>
 // CHECK-NEXT:    return %[[RES0]], %[[RES1]]
 
 // LEGAL-LABEL: func.func @test2
@@ -66,8 +63,8 @@ module {
 // LEGAL:         %[[EDIFF2:.*]] = stablehlo.reshape %[[DIFF2]] : (tensor<10xf64>) -> tensor<1x10xf64>
 // LEGAL:         %[[CONCAT:.*]] = stablehlo.concatenate %[[EDIFF1]], %[[EDIFF2]], dim = 0 : (tensor<1x10xf64>, tensor<1x10xf64>) -> tensor<2x10xf64>
 // LEGAL:         %[[BATCHED_RES:.*]] = enzyme.fwddiff @square(%[[PRIMAL]], %[[CONCAT]]) {{.*}} width = 2 {{.*}} : (tensor<10xf64>, tensor<2x10xf64>) -> tensor<2x10xf64>
-// LEGAL:         %[[C0:.*]] = arith.constant 0 : index
-// LEGAL-NEXT:    %[[RES0:.*]] = tensor.extract_slice %[[BATCHED_RES]][%[[C0]], 0] [1, 10] [1, 1] : tensor<2x10xf64> to tensor<10xf64>
-// LEGAL-NEXT:    %[[C1:.*]] = arith.constant 1 : index
-// LEGAL-NEXT:    %[[RES1:.*]] = tensor.extract_slice %[[BATCHED_RES]][%[[C1]], 0] [1, 10] [1, 1] : tensor<2x10xf64> to tensor<10xf64>
+// LEGAL:         %[[R0:.*]] = stablehlo.slice %[[BATCHED_RES]] [0:1, 0:10] : (tensor<2x10xf64>) -> tensor<1x10xf64>
+// LEGAL-NEXT:    %[[RES0:.*]] = stablehlo.reshape %[[R0]] : (tensor<1x10xf64>) -> tensor<10xf64>
+// LEGAL-NEXT:    %[[R1:.*]] = stablehlo.slice %[[BATCHED_RES]] [1:2, 0:10] : (tensor<2x10xf64>) -> tensor<1x10xf64>
+// LEGAL-NEXT:    %[[RES1:.*]] = stablehlo.reshape %[[R1]] : (tensor<1x10xf64>) -> tensor<10xf64>
 // LEGAL-NEXT:    return %[[RES0]], %[[RES1]]