Skip to content

Commit e4a05de

Browse files
Skylion007pytorchmergebot
authored andcommitted
[BE][Ez]: Fix docs recommending inefficient tensor op order (pytorch#144270)
`detach().clone()` is faster than `.clone().detatch()` since the gradients are not cloned. Let's update all the documentation and tests so that users do not use the inefficient op ordering. Pull Request resolved: pytorch#144270 Approved by: https://github.com/awgu, https://github.com/XuehaiPan
1 parent 8d35333 commit e4a05de

File tree

7 files changed

+17
-15
lines changed

7 files changed

+17
-15
lines changed

torch/_refs/__init__.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6607,8 +6607,10 @@ def tensor(data, *, dtype=None, device=None, pin_memory=False, requires_grad=Fal
66076607
# TODO (or not): support names kwarg
66086608
if isinstance(data, torch.Tensor):
66096609
warnings.warn(
6610-
"To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() "
6611-
"or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor)"
6610+
"To copy construct from a tensor, it is recommended to use sourceTensor.detach().clone() "
6611+
"or sourceTensor.detach().clone().requires_grad_(True), rather than torch.tensor(sourceTensor)",
6612+
UserWarning,
6613+
stacklevel=2,
66126614
)
66136615
type_inference = dtype is None
66146616
new_tensor = _internal_new_from_data(

torch/_tensor_docs.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,9 +57,9 @@ def add_docstr_all(method, docstr):
5757
.. warning::
5858
5959
When data is a tensor `x`, :func:`new_tensor()` reads out 'the data' from whatever it is passed,
60-
and constructs a leaf variable. Therefore ``tensor.new_tensor(x)`` is equivalent to ``x.clone().detach()``
61-
and ``tensor.new_tensor(x, requires_grad=True)`` is equivalent to ``x.clone().detach().requires_grad_(True)``.
62-
The equivalents using ``clone()`` and ``detach()`` are recommended.
60+
and constructs a leaf variable. Therefore ``tensor.new_tensor(x)`` is equivalent to ``x.detach().clone()``
61+
and ``tensor.new_tensor(x, requires_grad=True)`` is equivalent to ``x.detach().clone().requires_grad_(True)``.
62+
The equivalents using ``detach()`` and ``clone()`` are recommended.
6363
6464
Args:
6565
data (array_like): The returned Tensor copies :attr:`data`.

torch/_torch_docs.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9034,8 +9034,8 @@ def merge_dicts(*dicts):
90349034
When working with tensors prefer using :func:`torch.Tensor.clone`,
90359035
:func:`torch.Tensor.detach`, and :func:`torch.Tensor.requires_grad_` for
90369036
readability. Letting `t` be a tensor, ``torch.tensor(t)`` is equivalent to
9037-
``t.clone().detach()``, and ``torch.tensor(t, requires_grad=True)``
9038-
is equivalent to ``t.clone().detach().requires_grad_(True)``.
9037+
``t.detach().clone()``, and ``torch.tensor(t, requires_grad=True)``
9038+
is equivalent to ``t.detach().clone().requires_grad_(True)``.
90399039
90409040
.. seealso::
90419041

torch/csrc/utils/tensor_new.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1458,8 +1458,8 @@ Tensor tensor_ctor(
14581458
if (THPVariable_Check(data)) {
14591459
auto ret = PyErr_WarnEx(
14601460
PyExc_UserWarning,
1461-
"To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() "
1462-
"or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).",
1461+
"To copy construct from a tensor, it is recommended to use sourceTensor.detach().clone() "
1462+
"or sourceTensor.detach().clone().requires_grad_(True), rather than torch.tensor(sourceTensor).",
14631463
1);
14641464
if (ret != 0)
14651465
throw python_error();

torch/masked/maskedtensor/core.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ def __new__(cls, data, mask, requires_grad=False):
170170
if data.requires_grad:
171171
warnings.warn(
172172
"It is not recommended to create a MaskedTensor with a tensor that requires_grad. "
173-
"To avoid this, you can use data.clone().detach()",
173+
"To avoid this, you can use data.detach().clone()",
174174
UserWarning,
175175
stacklevel=2,
176176
)

torch/testing/_internal/common_optimizers.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2308,7 +2308,7 @@ def __init__(self, assert_eq_kwargs=None):
23082308

23092309
def add(self, tensor):
23102310
"""
2311-
Add a clone().detach()'d version of the tensor
2311+
Add a detach().clone()'d version of the tensor
23122312
"""
23132313
self.tensors.append(tensor.detach().clone())
23142314

torch/testing/_internal/opinfo/definitions/nested.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,7 @@ def _raggedness_matches(nt1, nt2):
226226
# as this causes autograd problems.
227227
def _clone(t):
228228
requires_grad = t.requires_grad
229-
return t.clone().detach().requires_grad_(requires_grad)
229+
return t.detach().clone().requires_grad_(requires_grad)
230230

231231

232232
# Helper function to update a sample with new kwargs / name
@@ -1316,10 +1316,10 @@ def _get_njts():
13161316
# non-contiguous transposed
13171317
yield njt.transpose(1, 3)
13181318
# non-contiguous with holes
1319-
values = njt.values().clone().detach()
1320-
offsets = njt.offsets().clone().detach()
1319+
values = njt.values().detach().clone()
1320+
offsets = njt.offsets().detach().clone()
13211321
# subtract 1 to cause holes
1322-
lengths = (offsets.diff() - 1).clone().detach()
1322+
lengths = (offsets.diff() - 1).detach().clone()
13231323
yield torch.nested.nested_tensor_from_jagged(
13241324
values=values,
13251325
offsets=offsets,

0 commit comments

Comments
 (0)