Skip to content

Commit 8b14055

Browse files
Merge pull request #1299 from yueyinqiu/gradient
property grad
2 parents fddd06a + 3074b2a commit 8b14055

23 files changed

+114
-113
lines changed

RELEASENOTES.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,12 @@ __Breaking Changes__:
1111

1212
__API Changes__:
1313

14+
- #1291 `Tensor.grad()` and `Tensor.set_grad()` have been replaced by a new property `Tensor.grad`.
15+
- A potential memory leak caused by `set_grad` has been resolved.
16+
1417
__Bug Fixes__:
1518

19+
- #1300 `Adadelta`, `Adam` and `AdamW` will no longer throw `NullReferenceException` when `maximize` is `true` and `grad` is `null`.
1620
- `torch.normal` will now correctly return a leaf tensor.
1721

1822
# NuGet Version 0.102.4

src/Examples/AdversarialExampleGeneration.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@ private static double Test(
133133
model.zero_grad();
134134
loss.backward();
135135

136-
var perturbed = Attack(data, ε, data.grad());
136+
var perturbed = Attack(data, ε, data.grad);
137137

138138
using (var final = model.call(perturbed)) {
139139

src/FSharp.Examples/AdversarialExampleGeneration.fs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ let test (model:MNIST.Model) (eps:float) (data:Dataset) size =
7979
model.zero_grad()
8080
loss.backward()
8181

82-
use perturbed = attack input (eps.ToScalar()) (input.grad())
82+
use perturbed = attack input (eps.ToScalar()) (input.grad)
8383
use final = perturbed --> model
8484
correct <- correct + final.argmax(1L).eq(labels).sum().ToInt32()
8585
end

src/TorchSharp/NN/Module.cs

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -239,8 +239,10 @@ private void _toEpilog(ScalarType? dtype, Device device)
239239
.ToDictionary(field => field.ComponentName());
240240

241241
foreach (var (name, param) in named_parameters(false).ToList()) {
242+
using var grad = param.grad;
243+
242244
if (!param.toWillCopy(dtype ?? param.dtype, device ?? param.device) &&
243-
(param.grad() is null || !param.grad().toWillCopy(dtype ?? param.dtype, device ?? param.device)))
245+
(grad is null || !grad.toWillCopy(dtype ?? param.dtype, device ?? param.device)))
244246
continue;
245247

246248
Parameter p;
@@ -252,20 +254,19 @@ private void _toEpilog(ScalarType? dtype, Device device)
252254
// disable grad we would need to call .detach() on the moved tensor.
253255
using (var d = torch.no_grad()) {
254256
p = new Parameter(
255-
param.to(paramType, device ?? param.device).DetachFromDisposeScope(), param.requires_grad)
256-
.DetachFromDisposeScope() as Parameter;
257+
data: param.to(paramType, device ?? param.device),
258+
requires_grad: param.requires_grad);
259+
_ = p.DetachFromDisposeScope();
257260

258261
// Copy the gradient over as well, if it exists
259-
var grad = param.grad();
260262
if (grad is not null) {
261-
p.set_grad(grad.to(paramType, device ?? param.device)
262-
.with_requires_grad(grad.requires_grad)
263-
.MoveToOtherDisposeScope(p));
263+
using var newGrad = grad.to(paramType, device ?? param.device)
264+
.with_requires_grad(grad.requires_grad);
265+
p.grad = newGrad;
264266
}
265267

266-
// Dispose the param and gradient
268+
// Dispose the param
267269
param.Dispose();
268-
grad?.Dispose();
269270
}
270271
ConditionallyRegisterParameter(name, p);
271272

@@ -360,11 +361,10 @@ public virtual void zero_grad(bool set_to_none = true)
360361
CheckForErrors();
361362

362363
foreach (var (_, p) in named_parameters()) {
363-
var grad = p.grad();
364+
using var grad = p.grad;
364365
if (grad is not null) {
365366
if (set_to_none) {
366-
p.set_grad(null);
367-
grad.DetachFromDisposeScope().Dispose();
367+
p.grad = null;
368368
} else {
369369
grad.zero_();
370370
}

src/TorchSharp/Optimizers/ASGD.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ public override Tensor step(Func<Tensor> closure = null)
145145

146146
foreach (var param in group.Parameters) {
147147

148-
var grad = param.grad();
148+
var grad = param.grad;
149149

150150
if (grad is null) continue;
151151

src/TorchSharp/Optimizers/Adadelta.cs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,10 +136,12 @@ public override Tensor step(Func<Tensor> closure = null)
136136

137137
foreach (var param in group.Parameters) {
138138

139-
var grad = (maximize) ? -param.grad() : param.grad();
139+
var grad = param.grad;
140140

141141
if (grad is null) continue;
142142

143+
if (maximize) grad = -grad;
144+
143145
if (grad.is_sparse) throw new ArgumentException("Adadelta does not support sparse gradients");
144146

145147
var state = (State)_state[param.handle];

src/TorchSharp/Optimizers/Adagrad.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,7 @@ public override Tensor step(Func<Tensor> closure = null)
147147

148148
var state = (State)_state[param.handle];
149149

150-
var grad = param.grad();
150+
var grad = param.grad;
151151

152152
if (grad is null) continue;
153153

src/TorchSharp/Optimizers/Adam.cs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -164,10 +164,12 @@ public override Tensor step(Func<Tensor> closure = null)
164164

165165
var state = (State)_state[param.handle];
166166

167-
var grad = (maximize) ? -param.grad() : param.grad();
167+
var grad = param.grad;
168168

169169
if (grad is null) continue;
170170

171+
if (maximize) grad = -grad;
172+
171173
state.step += 1;
172174

173175
var bias_correction1 = 1 - Math.Pow(beta1, state.step);

src/TorchSharp/Optimizers/AdamW.cs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -164,10 +164,12 @@ public override Tensor step(Func<Tensor> closure = null)
164164

165165
var state = (State)_state[param.handle];
166166

167-
var grad = (maximize) ? -param.grad() : param.grad();
167+
var grad = param.grad;
168168

169169
if (grad is null) continue;
170170

171+
if (maximize) grad = -grad;
172+
171173
state.step += 1;
172174

173175
param.mul_(1 - lr * weight_decay);

src/TorchSharp/Optimizers/Adamax.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ public override Tensor step(Func<Tensor> closure = null)
148148

149149
foreach (var param in group.Parameters) {
150150

151-
var grad = param.grad();
151+
var grad = param.grad;
152152

153153
if (grad is null) continue;
154154

0 commit comments

Comments
 (0)