Fix reduce_sum_transpose_rule which does a broadcast_in_dim to set out_sharding=operand.aval.to_cotangent_aval().sharding instead of operand.aval.sharding. THis is because if operand is reduced, then on bwd pass, we want the cotangent type to become unreduced.

yashk2810 · Google-ML-Automation · commit 9b2ebc0d7cc8 · 2025-11-19T17:41:34.000-08:00
PiperOrigin-RevId: 834511302
diff --git a/jax/_src/lax/lax.py b/jax/_src/lax/lax.py
@@ -7824,8 +7824,9 @@ def _reduce_sum_transpose_rule(cotangent, operand, *, axes, out_sharding):
   assert ad.is_undefined_primal(operand)
   input_shape = operand.aval.shape
   broadcast_dimensions = tuple(np.delete(np.arange(len(input_shape)), axes))
-  result = broadcast_in_dim(cotangent, input_shape, broadcast_dimensions,
-                            out_sharding=operand.aval.sharding)
+  result = broadcast_in_dim(
+      cotangent, input_shape, broadcast_dimensions,
+      out_sharding=operand.aval.to_cotangent_aval().sharding)
   assert result.shape == input_shape
   return [result]
 
diff --git a/tests/pjit_test.py b/tests/pjit_test.py
@@ -9631,6 +9631,22 @@ def test_jnp_repeat_arraylike(self, mesh):
     jnp.repeat(positions, 5, axis=0, total_repeat_length=num_electrons,
                out_sharding=P())  # doesn't crash
 
+  @jtu.with_explicit_mesh((2,), 'x')
+  def test_mul_inputs_both_reduced(self, mesh):
+    arr1 = jax.device_put(np.arange(8.), P(reduced={'x'}))
+    arr2 = jax.device_put(np.arange(8.), P(reduced={'x'}))
+
+    @jax.jit
+    def f(x, y):
+      z = x * y
+      return z.sum()
+
+    out1, out2 = jax.jit(jax.grad(f, argnums=(0, 1)))(arr1, arr2)
+    self.assertEqual(out1.sharding,
+                     NamedSharding(mesh, P(None, unreduced={'x'})))
+    self.assertEqual(out2.sharding,
+                     NamedSharding(mesh, P(None, unreduced={'x'})))
+
 
 @jtu.pytest_mark_if_available('multiaccelerator')
 class PJitErrorTest(jtu.JaxTestCase):