Add DTensor prim and torch symbol for exp (#2496)

kshitij12345 · web-flow · commit 65193b0f2c3a · 2025-09-30T13:04:51.000+02:00
diff --git a/thunder/clang/__init__.py b/thunder/clang/__init__.py
@@ -9,6 +9,7 @@
 import warnings
 
 from thunder.clang.langctx import register_method
+from thunder.clang.utils import create_maybe_convert_to_dtype_with_prim, _elementwise_unary_wrapper
 from thunder.core import utils
 from thunder.core.baseutils import run_once
 from thunder.core.langctxs import langctx, Languages
@@ -140,39 +141,7 @@ def construct_tuple(tup: tuple, /) -> tuple:
 
 
 # TODO Review revising enforce_safe_casting to be more like NumPy's
-@clangop()
-def maybe_convert_to_dtype(a, dtype, *, enforce_safe_casting=False):
-    """If a has the same dtype as the given dtype, returns a unmodified.
-
-    Otherwise returns a converted to the given dtype.
-    """
-
-    utils.check(utils.is_dtype(dtype), lambda: f"Unknown dtype {dtype}!")
-
-    if isinstance(a, Sequence):
-        return tuple(maybe_convert_to_dtype(x, dtype) for x in a)
-    if isinstance(a, TensorProxy):
-        # Translates numbertypes to dtypes
-        if dtypes.is_numbertype(dtype):
-            dtype = dtypes.numbertype_to_dtype(dtype)
-    elif isinstance(a, (Number, NumberProxy)):
-        # NOTE This allows conversions like (5, float32) -> 5., which is a little odd
-        dtype = utils.dtype_to_numbertype(dtype)
-    else:
-        raise ValueError(
-            f"Trying to convert the type of the data of an unknown object {a} of {type(a)} that is neither a tensor, number, or sequence!"
-        )
-
-    if not utils.are_same_dtypes(a, dtype):
-        if enforce_safe_casting:
-            utils.check(
-                utils.can_safe_cast_to(cast_from=utils.to_dtype(a), cast_to=dtype),
-                lambda: f"Can't safe case from a={a} with dtype {utils.to_dtype(a)} to {dtype}!",
-            )
-
-        return prims.convert_element_type(a, dtype)
-
-    return a
+maybe_convert_to_dtype = clangop()(create_maybe_convert_to_dtype_with_prim(prims.convert_element_type))
 
 
 # TODO Consider maybe_device_put analogous to maybe_convert_to_dtype above
@@ -1212,22 +1181,7 @@ def _maybe_broadcast(x, shape):
 # Elementwise unary operations
 #
 # TODO Consider annotating these operators with kind and type promotion information
-
-
-# TODO Add supported dtypes
-def _elementwise_unary_wrapper(
-    a,
-    *,
-    prim,
-    type_promotion_kind=utils.ELEMENTWISE_TYPE_PROMOTION_KIND.DEFAULT,
-):
-    computation_dtype, result_dtype = utils.elementwise_type_promotion(a, type_promotion_kind=type_promotion_kind)
-
-    a = maybe_convert_to_dtype(a, computation_dtype)
-    result = prim(a)
-    result = maybe_convert_to_dtype(result, result_dtype)
-
-    return result
+_elementwise_unary_wrapper = partial(_elementwise_unary_wrapper, dtype_conversion_fn=maybe_convert_to_dtype)
 
 
 # TODO Return self for bool and uint datatypes?
diff --git a/thunder/clang/utils.py b/thunder/clang/utils.py
@@ -0,0 +1,68 @@
+from numbers import Number
+from collections.abc import Sequence
+from collections.abc import Callable
+
+from thunder.core import utils
+import thunder.core.dtypes as dtypes
+from thunder.core.symbol import Symbol
+
+from thunder.core.proxies import (
+    NumberProxy,
+    TensorProxy,
+)
+
+
+def create_maybe_convert_to_dtype_with_prim(conversion_prim: Symbol):
+    assert isinstance(conversion_prim, Symbol)
+
+    def maybe_convert_to_dtype(a, dtype, *, enforce_safe_casting=False):
+        """If a has the same dtype as the given dtype, returns a unmodified.
+
+        Otherwise returns a converted to the given dtype.
+        """
+
+        utils.check(utils.is_dtype(dtype), lambda: f"Unknown dtype {dtype}!")
+
+        if isinstance(a, Sequence):
+            return tuple(maybe_convert_to_dtype(x, dtype) for x in a)
+        if isinstance(a, TensorProxy):
+            # Translates numbertypes to dtypes
+            if dtypes.is_numbertype(dtype):
+                dtype = dtypes.numbertype_to_dtype(dtype)
+        elif isinstance(a, (Number, NumberProxy)):
+            # NOTE This allows conversions like (5, float32) -> 5., which is a little odd
+            dtype = utils.dtype_to_numbertype(dtype)
+        else:
+            raise ValueError(
+                f"Trying to convert the type of the data of an unknown object {a} of {type(a)} that is neither a tensor, number, or sequence!"
+            )
+
+        if not utils.are_same_dtypes(a, dtype):
+            if enforce_safe_casting:
+                utils.check(
+                    utils.can_safe_cast_to(cast_from=utils.to_dtype(a), cast_to=dtype),
+                    lambda: f"Can't safe case from a={a} with dtype {utils.to_dtype(a)} to {dtype}!",
+                )
+
+            return conversion_prim(a, dtype)
+
+        return a
+
+    return maybe_convert_to_dtype
+
+
+# TODO Add supported dtypes
+def _elementwise_unary_wrapper(
+    a,
+    *,
+    prim,
+    type_promotion_kind=utils.ELEMENTWISE_TYPE_PROMOTION_KIND.DEFAULT,
+    dtype_conversion_fn: Callable[[TensorProxy | NumberProxy, dtypes.dtype], TensorProxy | NumberProxy],
+):
+    computation_dtype, result_dtype = utils.elementwise_type_promotion(a, type_promotion_kind=type_promotion_kind)
+
+    a = dtype_conversion_fn(a, computation_dtype)
+    result = prim(a)
+    result = dtype_conversion_fn(result, result_dtype)
+
+    return result
diff --git a/thunder/executors/nvfuserex_impl.py b/thunder/executors/nvfuserex_impl.py
@@ -1541,6 +1541,7 @@ def exp(a: TensorProxy | Number, *, fd: FusionDefinition, lc_to_nv_map: dict) ->
 
 
 register_supported(PrimIDs.EXP, exp, _elementwise_unary_check)
+register_supported(DTensorPrimIDs.EXP, exp, _elementwise_unary_check)
 
 
 def exp2(a: TensorProxy | Number, *, fd: FusionDefinition, lc_to_nv_map: dict) -> Any:
diff --git a/thunder/tests/distributed/test_dtensor.py b/thunder/tests/distributed/test_dtensor.py
@@ -39,7 +39,7 @@
 #       to choose between DTensor supported symbol (from `dtensor_torch_and_prims.py`) or the usual `ltorch` symbol.
 #       This is why we need to make sure that the OpInfo uses PyTorch native op as `op` which is passed to thunder.jit.
 class DTensorOpInfo:
-    def __init__(self, *, name, op, torch_reference, supports_grad, sample_inputs):
+    def __init__(self, *, name, op, torch_reference, supports_grad, sample_inputs, skip_noncontiguous_for_executor=()):
         self.name = name
         assert "torch" in op.__module__, "OpInfo must use PyTorch native op as `op` which is passed to thunder.jit"
         self.op = op
@@ -49,6 +49,10 @@ def __init__(self, *, name, op, torch_reference, supports_grad, sample_inputs):
         # NOTE: This should generally reuse the sample_inputs from the OpInfo
         self.sample_inputs = sample_inputs
 
+        # In some cases, non-contiguous inputs are not supported by the executor.
+        assert isinstance(skip_noncontiguous_for_executor, tuple), "skip_noncontiguous_for_executor must be a tuple"
+        self.skip_noncontiguous_for_executor = skip_noncontiguous_for_executor
+
 
 # DTensor supported ops
 dtensor_supported_opinfos = (
@@ -66,6 +70,15 @@ def __init__(self, *, name, op, torch_reference, supports_grad, sample_inputs):
         supports_grad=False,
         sample_inputs=get_opinfo("linear").sample_inputs,
     ),
+    DTensorOpInfo(
+        name="exp",
+        op=torch.exp,
+        torch_reference=torch.exp,
+        supports_grad=True,
+        sample_inputs=get_opinfo("exp").sample_inputs,
+        # Ref:https://github.com/NVIDIA/Fuser/pull/5124
+        skip_noncontiguous_for_executor=("nvfuser",),
+    ),
 )
 
 skip_opinfos = (
@@ -238,6 +251,10 @@ def test_dtensor_opinfo(self, op: OpInfo, executor):
         tested_sample_count = 0
 
         for sample in op.sample_inputs("cpu", dtypes.float32, requires_grad=op.supports_grad):
+            # Skip if non-contiguous inputs are not supported by the executor.
+            if executor in op.skip_noncontiguous_for_executor and not sample.args[0].is_contiguous():
+                continue
+
             # DTensorConverter converts inputs tensors to DTensor and creates DTensor
             # with possible placements based on the input shapes.
             # See - https://github.com/pytorch/pytorch/blob/eaa5d9d3d3dc642832b269b184f0c3ab8c990274/torch/testing/_internal/distributed/_tensor/common_dtensor.py#L521
diff --git a/thunder/torch/experimental/dtensor_torch_and_prims.py b/thunder/torch/experimental/dtensor_torch_and_prims.py
@@ -6,6 +6,7 @@
 import thunder.torch as ltorch
 from thunder.core.pytree import tree_flatten
 from thunder import clang
+from thunder.clang.utils import create_maybe_convert_to_dtype_with_prim, _elementwise_unary_wrapper
 from thunder.torch.experimental.dtensor_utils import run_with_fake_tensor
 from thunder.torch.experimental.dtensor_proxy import DTensorProxy, create_dtensor_proxy_from_proxies
 from thunder.torch.langctx import register_method
@@ -35,6 +36,7 @@ class DTensorPrimIDs(Enum):
     RESHAPE = auto()
     CONVERT_ELEMENT_TYPE = auto()
     BROADCAST_IN_DIM = auto()
+    EXP = auto()
     LINEAR = auto()
 
 
@@ -242,6 +244,10 @@ def dtensor_broadcast_in_dim_meta(a, shape, broadcast_dimensions):
 pytorchex.register_implementation(dtensor_broadcast_in_dim_prim, dtensor_broadcast_in_dim_prim_impl)
 
 
+maybe_convert_to_dtype = create_maybe_convert_to_dtype_with_prim(dtensor_convert_element_type_prim)
+_elementwise_unary_wrapper = partial(_elementwise_unary_wrapper, dtype_conversion_fn=maybe_convert_to_dtype)
+
+
 def dtensor_linear_meta(a, w, bias):
     output = run_with_fake_tensor(torch.nn.functional.linear, a, w, bias)
     local_tensor_proxy = TensorProxy(like=a.local_tensor)
@@ -268,7 +274,45 @@ def dtensor_linear(a: TensorLike, w: TensorLike, bias: None | TensorLike = None)
     return dtensor_linear_prim(a, w, bias)
 
 
+def dtensor_exp_meta(a):
+    output = run_with_fake_tensor(torch.exp, a)
+    local_tensor_proxy = TensorProxy(like=a.local_tensor)
+    spec = output._spec
+    spec_proxy = AnyProxy(spec, history=a.history)
+    return create_dtensor_proxy_from_proxies(local_tensor_proxy, spec_proxy, False)
+
+
+dtensor_exp_prim = make_prim(DTensorPrimIDs.EXP, "dtensor_exp_prim", meta=dtensor_exp_meta)
+
+dtensor_exp_prim_impl = pytorchex.register_operator("dtensor_exp_prim", like=dtensor_exp_prim, fn=torch.exp)
+
+pytorchex.register_implementation(dtensor_exp_prim, dtensor_exp_prim_impl)
+
+
+def _dtensor_exp_prim_grad(a: TensorLike) -> TensorLike:
+    fwd = dtensor_exp_prim(a)
+
+    g = get_grad(fwd)
+    a_grad = g * fwd
+    put_grad(a, a_grad)
+
+    return fwd
+
+
+register_grad(dtensor_exp_prim, _dtensor_exp_prim_grad)
+
+
+@dtensor_torchsymbol(torch.exp, id="dtensor.torch.exp")
+def dtensor_exp(a: TensorLike) -> TensorLike:
+    return _elementwise_unary_wrapper(
+        a,
+        prim=dtensor_exp_prim,
+        type_promotion_kind=utils.ELEMENTWISE_TYPE_PROMOTION_KIND.INT_TO_FLOAT,
+    )
+
+
 def register_dtensor_torch_and_prims():
     register_function_for_dtensor(torch.mul, ltorch.mul, dtensor_mul, is_method=True)
     register_function_for_dtensor(torch.reshape, ltorch.reshape, dtensor_reshape, is_method=True)
     register_function_for_dtensor(torch.nn.functional.linear, ltorch.linear, dtensor_linear, is_method=False)
+    register_function_for_dtensor(torch.exp, ltorch.exp, dtensor_exp, is_method=True)
diff --git a/thunder/torch/experimental/dtensor_utils.py b/thunder/torch/experimental/dtensor_utils.py
@@ -58,14 +58,17 @@ def materialize_fake_tensors(t):
                 return t
 
             if isinstance(t, DTensorProxy):
-                i_t = torch.randn(
+                i_t = torch.ones(
                     t.local_tensor.shape,
                     device=to_torch_device(t.local_tensor.device),
                     dtype=to_torch_dtype(t.local_tensor.dtype),
                 )
-                return DTensor.from_local(i_t, t.spec._o.device_mesh, t.spec._o.placements)
 
-            return torch.randn(t.shape, device=to_torch_device(t.device), dtype=to_torch_dtype(t.dtype))
+                shape = t.spec._o.tensor_meta.shape if t.spec._o.tensor_meta is not None else None
+                stride = t.spec._o.tensor_meta.stride if t.spec._o.tensor_meta is not None else None
+                return DTensor.from_local(i_t, t.spec._o.device_mesh, t.spec._o.placements, shape=shape, stride=stride)
+
+            return torch.ones(t.shape, device=to_torch_device(t.device), dtype=to_torch_dtype(t.dtype))
 
         args, kwargs = tree_map(materialize_fake_tensors, (args, kwargs))
 

Original file line number	Diff line number	Diff line change
`@@ -1541,6 +1541,7 @@ def exp(a: TensorProxy \| Number, *, fd: FusionDefinition, lc_to_nv_map: dict) ->`
`1541`	`1541`
`1542`	`1542`
`1543`	`1543`	`register_supported(PrimIDs.EXP, exp, _elementwise_unary_check)`
	`1544`	`+register_supported(DTensorPrimIDs.EXP, exp, _elementwise_unary_check)`
`1544`	`1545`
`1545`	`1546`
`1546`	`1547`	`def exp2(a: TensorProxy \| Number, *, fd: FusionDefinition, lc_to_nv_map: dict) -> Any:`