fix: remove selectifactive

avik-pal · avik-pal · commit b7ae6d3514d7 · 2025-11-03T08:24:32.000-05:00
diff --git a/src/enzyme_ad/jax/Implementations/HLODerivatives.td b/src/enzyme_ad/jax/Implementations/HLODerivatives.td
@@ -1459,15 +1459,31 @@ def getBroadcastDimensionsWithBatch : GlobalExpr</*needsprimal*/0, /*needsshadow
 
 def BroadcastDimsToReductionDims : GlobalExpr</*needsprimal*/0, /*needsshadow*/0, [{
   SmallVector<int64_t> reduceDims;
-  auto outRank = cast<RankedTensorType>(op.getType()).getRank();
-  for (int64_t i = 0; i < outRank; i++) {
-    if (!llvm::is_contained(op.getBroadcastDimensions(), i)) {
-      reduceDims.push_back(i);
+  auto outTy = cast<RankedTensorType>(op.getType());
+  auto bcastDims = op.getBroadcastDimensions();
+  auto inTy = cast<RankedTensorType>(op.getOperand().getType());
+
+  for (auto en : llvm::enumerate(outTy.getShape())) {
+    ssize_t bcastIdx = -1;
+    for (auto en2 : llvm::enumerate(bcastDims)) {
+      if (en2.value() == en.index()) {
+        bcastIdx = en2.index();
+        break;
+      }
     }
+    if (bcastIdx != -1) {
+      if (en.value() != inTy.getShape()[bcastIdx]) {
+        reduceDims.push_back(en.index());
+        assert(inTy.getShape()[bcastIdx] == 1);
+      }
+      continue;
+    }
+    reduceDims.push_back(en.index());
   }
+
   if (gutils->width > 1) {
-    for (int64_t i = 0; i < reduceDims.size(); i++) {
-      reduceDims[i] += 1;
+    for (int i = 0; i < reduceDims.size(); i++) {
+      reduceDims[i]++;
     }
   }
   getI64Attr(builder, reduceDims);
@@ -1503,11 +1519,33 @@ def BroadcastDimensionsToInversePermutation : GlobalExpr</*needsprimal*/0, /*nee
 }]>;
 
 def InsertDeletedReduceDimsType : GlobalExpr</*needsprimal*/0, /*needsshadow*/0, [{
+  SmallVector<int64_t> reduceDims;
   auto outTy = cast<RankedTensorType>(op.getType());
+  auto bcastDims = op.getBroadcastDimensions();
+  auto inTy = cast<RankedTensorType>(op.getOperand().getType());
   auto outShape = outTy.getShape();
+
+  for (auto en : llvm::enumerate(outTy.getShape())) {
+    ssize_t bcastIdx = -1;
+    for (auto en2 : llvm::enumerate(bcastDims)) {
+      if (en2.value() == en.index()) {
+        bcastIdx = en2.index();
+        break;
+      }
+    }
+    if (bcastIdx != -1) {
+      if (en.value() != inTy.getShape()[bcastIdx]) {
+        reduceDims.push_back(en.index());
+        assert(inTy.getShape()[bcastIdx] == 1);
+      }
+      continue;
+    }
+    reduceDims.push_back(en.index());
+  }
+
   SmallVector<int64_t> reshapeShape(outTy.getRank(), -1);
   for (auto [i, sz] : llvm::enumerate(outShape)) {
-    if (!llvm::is_contained(op.getBroadcastDimensions(), i)) {
+    if (llvm::is_contained(reduceDims, i)) {
       reshapeShape[i] = 1;
     } else {
       reshapeShape[i] = sz;
@@ -1559,12 +1597,5 @@ def : HLODerivative<"BroadcastInDimOp", (Op $x),
         )
       ],
       (
-        SelectIfActive $x,
-        (
-          BroadcastInDim
-          (ResultTypeWithBatch),
-          (Shadow $x),
-          (getBroadcastDimensionsWithBatch)
-        ),
-        (HLOConstantFP<"0">)
+        BroadcastInDim (ResultTypeWithBatch), (Shadow $x), (getBroadcastDimensionsWithBatch)
       )>;
diff --git a/test/lit_tests/diffrules/stablehlo/broadcastindim3.mlir b/test/lit_tests/diffrules/stablehlo/broadcastindim3.mlir
@@ -0,0 +1,32 @@
+// RUN: enzymexlamlir-opt --enzyme --canonicalize --remove-unnecessary-enzyme-ops --arith-raise --inline --enzyme-hlo-opt %s | FileCheck %s
+
+module {
+  func.func private @"Const{typeof(slicing)}(Main.slicing)_autodiff"(%arg0: tensor<1x4x1xf32>) -> (tensor<f32>, tensor<1x4x1xf32>) {
+    %cst = stablehlo.constant dense<0.000000e+00> : tensor<f32>
+    %cst_0 = stablehlo.constant dense<1.000000e+00> : tensor<3xf32>
+    %0 = stablehlo.slice %arg0 [0:1, 0:1, 0:1] : (tensor<1x4x1xf32>) -> tensor<1x1x1xf32>
+    %1 = stablehlo.reshape %0 : (tensor<1x1x1xf32>) -> tensor<1xf32>
+    %2 = stablehlo.broadcast_in_dim %1, dims = [0] : (tensor<1xf32>) -> tensor<3xf32>
+    %3 = stablehlo.multiply %2, %cst_0 : tensor<3xf32>
+    %4 = stablehlo.multiply %3, %3 : tensor<3xf32>
+    %5 = stablehlo.reduce(%4 init: %cst) applies stablehlo.add across dimensions = [0] : (tensor<3xf32>, tensor<f32>) -> tensor<f32>
+    return %5, %arg0 : tensor<f32>, tensor<1x4x1xf32>
+  }
+  func.func @main(%arg0: tensor<1x4x1xf32>) -> (tensor<1x4x1xf32>, tensor<1x4x1xf32>) {
+    %cst = stablehlo.constant dense<1.000000e+00> : tensor<f32>
+    %0:2 = enzyme.autodiff @"Const{typeof(slicing)}(Main.slicing)_autodiff"(%arg0, %cst) {activity = [#enzyme<activity enzyme_active>], ret_activity = [#enzyme<activity enzyme_activenoneed>, #enzyme<activity enzyme_const>]} : (tensor<1x4x1xf32>, tensor<f32>) -> (tensor<1x4x1xf32>, tensor<1x4x1xf32>)
+    return %0#1, %0#0 : tensor<1x4x1xf32>, tensor<1x4x1xf32>
+  }
+}
+
+// CHECK: func.func @main(%arg0: tensor<1x4x1xf32>) -> (tensor<1x4x1xf32>, tensor<1x4x1xf32>) {
+// CHECK-NEXT:     %cst = stablehlo.constant dense<0.000000e+00> : tensor<f32>
+// CHECK-NEXT:     %0 = stablehlo.slice %arg0 [0:1, 0:1, 0:1] : (tensor<1x4x1xf32>) -> tensor<1x1x1xf32>
+// CHECK-NEXT:     %1 = stablehlo.reshape %0 : (tensor<1x1x1xf32>) -> tensor<1xf32>
+// CHECK-NEXT:     %2 = stablehlo.broadcast_in_dim %1, dims = [0] : (tensor<1xf32>) -> tensor<3xf32>
+// CHECK-NEXT:     %3 = stablehlo.add %2, %2 : tensor<3xf32>
+// CHECK-NEXT:     %4 = stablehlo.reduce(%3 init: %cst) applies stablehlo.add across dimensions = [0] : (tensor<3xf32>, tensor<f32>) -> tensor<f32>
+// CHECK-NEXT:     %5 = stablehlo.reshape %4 : (tensor<f32>) -> tensor<1x1x1xf32>
+// CHECK-NEXT:     %6 = stablehlo.pad %5, %cst, low = [0, 0, 0], high = [0, 3, 0], interior = [0, 0, 0] : (tensor<1x1x1xf32>, tensor<f32>) -> tensor<1x4x1xf32>
+// CHECK-NEXT:     return %6, %arg0 : tensor<1x4x1xf32>, tensor<1x4x1xf32>
+// CHECK-NEXT:   }