Make assume_pure able to work with functions that depends on random (#9460)

qihqi · web-flow · commit 8999ba55d233 · 2025-07-09T17:20:54.000-07:00
diff --git a/test/test_assume_pure.py b/test/test_assume_pure.py
@@ -1,4 +1,5 @@
 from copy import deepcopy
+import functools
 import glob
 import os
 from absl.testing import absltest
@@ -178,6 +179,36 @@ def simple_torch_function(a, b):
     # Check that we only lower to HLO twice (once for forward, once for backward).
     self.assertEqual(ending_lowerings - starting_lowerings, 2)
 
+  def test_assume_pure_avoid_retracing_avoid_rejit_rand(self):
+    """Tests that we avoid retracing and re-jitting when using assume_pure."""
+
+    # Arrange: first clear the cache to prevent contamination from other tests.
+    xb._JAX_TO_XLA_COMPUTATION_CACHE.clear()
+    starting_lowerings = xb._jax_to_xla_computation_cache_elements()
+    trace_counter = 0
+
+    @functools.partial(assume_pure, add_rng_seed_argument=True)
+    def simple_torch_function(a, b):
+      nonlocal trace_counter
+      trace_counter += 1
+      return torch.sin(a @ b)
+
+    # Act: simulate a training loop.
+    for i in range(5):
+      a = torch.ones((3, 3), device='xla', requires_grad=True)
+      o = simple_torch_function(a, a, rng_seed=i)
+      o.sum().backward()
+      torch_xla.sync()
+
+    # Assert
+    ending_lowerings = xb._jax_to_xla_computation_cache_elements()
+
+    # Check that we only trace once.
+    self.assertEqual(trace_counter, 1)
+
+    # Check that we only lower to HLO twice (once for forward, once for backward).
+    self.assertEqual(ending_lowerings - starting_lowerings, 2)
+
   def test_assume_pure_matmul_grads(self):
     """Tests matmul with all inputs requiring gradients."""
 
@@ -445,6 +476,48 @@ def torch_func(a, b):
     self.assertTrue(MAGIC_STRING in proto_str,
                     f'Expected "{MAGIC_STRING}" trace in: {path}')
 
+  def test_assume_pure_with_rng(self):
+
+    def add_randn(a):
+      return a + torch.rand_like(a)
+
+    add_randn_p = assume_pure(add_randn, add_rng_seed_argument=True)
+
+    a = torch.randn((2, 2), device='xla')
+    with self.assertRaises(AssertionError):
+      # did not pass rng key
+      add_randn_p(a)
+
+    res1 = add_randn_p(a, rng_seed=0)
+    res2 = add_randn_p(a, rng_seed=1)
+    # different keys yield different result
+    self.assertFalse(torch.allclose(res1, res2))
+
+    res1_again = add_randn_p(a, rng_seed=0)
+    # same key yields same result
+    self.assertTrue(torch.allclose(res1, res1_again))
+
+  def test_assume_pure_with_many_random(self):
+
+    def many_rand(a):
+      a = torch.rand_like(a)
+      b = torch.rand_like(a)
+      c = torch.rand_like(a)
+      return c
+
+    randn_p = assume_pure(many_rand, add_rng_seed_argument=True)
+
+    a = torch.randn((2, 2), device='xla')
+
+    res1 = randn_p(a, rng_seed=0)
+    res2 = randn_p(a, rng_seed=1)
+    # different keys yield different result
+    self.assertFalse(torch.allclose(res1, res2))
+
+    res1_again = randn_p(a, rng_seed=0)
+    # same key yields same result
+    self.assertTrue(torch.allclose(res1, res1_again))
+
 
 FLAGS = flags.FLAGS
 flags.DEFINE_integer(
diff --git a/torch_xla/experimental/assume_pure.py b/torch_xla/experimental/assume_pure.py
@@ -12,7 +12,7 @@
 
 
 @requires_jax
-def assume_pure(fn):
+def assume_pure(fn, *, add_rng_seed_argument=False):
   """Decorates a pure PyTorch/XLA function to skip expensive re-tracing.
 
   Returns a new function that will only be traced once for each unique
@@ -30,9 +30,59 @@ def assume_pure(fn):
 
   - Other custom PyTorch/XLA operations such as `flash_attention` are not
     supported. This limitation may be lifted in the future.
+
+  Args:
+    fn: Callable, the function that is assumed to be pure.
+      A pure function means, if the inputs are fixed then the output is also fixed
+      ie. a mathematical function. NOTE: functions that does randomness generation
+      are NOT pure by this definition.
+      
+    add_rng_seed_argument: bool, if true, then the returned function will take 
+      an extra 'rng_seed' argument. A function with different rng_seed can produce
+      different result, so the lifted function becomes pure. rng_seed must be int
+
+      
+  Example:
+  
+  ```
+  def add_randn(a):
+    return a + torch.randn_like(a)
+  ```
+  
+  add_randn is not a pure function; but assume_pure(add_randn) assumes it is pure
+  and hardcodes the rng key at tracing time; making add_randn behaves differently 
+  (thus being incorrect).
+  
+  if we do add_randn_p = assume_pure(add_randn, add_rng_seed_argument=True), then
+  we can call add_randn_p(a, rng_seed=0) to get one result and add_randn_p(a, rng_seed=0)
+  to get another result.
   """
   from torchax.interop import jax_view
-  return j2t_autograd(jax_view(fn))
+  import torchax
+  if add_rng_seed_argument:
+
+    def new_fn(*args, **kwargs):
+      env = torchax.default_env()
+      rng_seed = args[0]
+      args = args[1:]
+      env.manual_seed(rng_seed._elem)
+      return fn(*args, **kwargs)
+
+    jitted = j2t_autograd(jax_view(new_fn))
+
+    def func_to_return(*args, **kwargs):
+      rng_seed = kwargs.get('rng_seed')
+      assert rng_seed is not None, 'Missing keyword argument rng_seed.'
+      kwargs.pop('rng_seed')
+      if isinstance(rng_seed, int):
+        rng_seed = torch.tensor(rng_seed, dtype=torch.uint32, device='xla')
+      args = (rng_seed, *args)
+      result = jitted(*args, **kwargs)
+      return result
+
+    return func_to_return
+  else:
+    return j2t_autograd(jax_view(fn))
 
 
 @requires_jax