Added bespoke _to() implementations for a number of built-in modules. This is a minor performance improvement.

NiklasGustafsson · NiklasGustafsson · commit 1736f0f402b0 · 2024-10-31T06:48:39.000-07:00
diff --git a/src/TorchSharp/NN/Bilinear.cs b/src/TorchSharp/NN/Bilinear.cs
@@ -69,6 +69,39 @@ public Parameter weight {
                 }
             }
 
+            // Rather than spending cycles discovering what parameters exist, we can just hardcode it.
+            protected internal override nn.Module _to(Device device, ScalarType dtype, bool non_blocking) {
+                if (_weight is not null && ReplaceParameter(dtype, device, _weight, out Parameter? w)) {
+                    weight = w!;
+                }
+                if (_bias is not null && ReplaceParameter(dtype, device, _bias, out Parameter? b)) {
+                    bias = b!;
+                }
+                return this;
+            }
+
+            protected internal override nn.Module _to(DeviceType deviceType, int deviceIndex, bool non_blocking)
+            {
+                var device = new Device(deviceType, deviceIndex);
+                if (_weight is not null && ReplaceParameter(_weight.dtype, device, _weight, out Parameter? w)) {
+                    weight = w!;
+                }
+                if (_bias is not null && ReplaceParameter(_bias.dtype, device, _bias, out Parameter? b)) {
+                    bias = b!;
+                }
+                return this;
+            }
+
+            protected internal override nn.Module _to(ScalarType dtype, bool non_blocking) {
+                if (_weight is not null && ReplaceParameter(dtype, _weight.device, _weight, out Parameter? w)) {
+                    weight = w!;
+                }
+                if (_bias is not null && ReplaceParameter(dtype, _bias.device, _bias, out Parameter? b)) {
+                    bias = b!;
+                }
+                return this;
+            }
+
             [ComponentName(Name = BiasComponentName)]
             private Parameter? _bias;
             [ComponentName(Name = WeightComponentName)]
diff --git a/src/TorchSharp/NN/Convolution/Convolution.cs b/src/TorchSharp/NN/Convolution/Convolution.cs
@@ -154,6 +154,39 @@ public Parameter weight {
                 }
             }
 
+            // Rather than spending cycles discovering what parameters exist, we can just hardcode it.
+            protected internal override nn.Module _to(Device device, ScalarType dtype, bool non_blocking) {
+                if (_weight is not null && ReplaceParameter(dtype, device, _weight, out Parameter? w)) {
+                    weight = w!;
+                }
+                if (_bias is not null && ReplaceParameter(dtype, device, _bias, out Parameter? b)) {
+                    bias = b!;
+                }
+                return this;
+            }
+
+            protected internal override nn.Module _to(DeviceType deviceType, int deviceIndex, bool non_blocking)
+            {
+                var device = new Device(deviceType, deviceIndex);
+                if (_weight is not null && ReplaceParameter(_weight.dtype, device, _weight, out Parameter? w)) {
+                    weight = w!;
+                }
+                if (_bias is not null && ReplaceParameter(_bias.dtype, device, _bias, out Parameter? b)) {
+                    bias = b!;
+                }
+                return this;
+            }
+
+            protected internal override nn.Module _to(ScalarType dtype, bool non_blocking) {
+                if (_weight is not null && ReplaceParameter(dtype, _weight.device, _weight, out Parameter? w)) {
+                    weight = w!;
+                }
+                if (_bias is not null && ReplaceParameter(dtype, _bias.device, _bias, out Parameter? b)) {
+                    bias = b!;
+                }
+                return this;
+            }
+
             [ComponentName(Name = BiasComponentName)]
             protected Parameter? _bias;
             [ComponentName(Name = WeightComponentName)]
diff --git a/src/TorchSharp/NN/Linear.cs b/src/TorchSharp/NN/Linear.cs
@@ -79,6 +79,39 @@ public Parameter weight {
                 }
             }
 
+            // Rather than spending cycles discovering what parameters exist, we can just hardcode it.
+            protected internal override nn.Module _to(Device device, ScalarType dtype, bool non_blocking) {
+                if (_weight is not null && ReplaceParameter(dtype, device, _weight, out var w)) {
+                    weight = w!;
+                }
+                if (_bias is not null && ReplaceParameter(dtype, device, _bias, out var b)) {
+                    bias = b!;
+                }
+                return this;
+            }
+
+            protected internal override nn.Module _to(DeviceType deviceType, int deviceIndex, bool non_blocking)
+            {
+                var device = new Device(deviceType, deviceIndex);
+                if (_weight is not null && ReplaceParameter(_weight.dtype, device, _weight, out var w)) {
+                    weight = w!;
+                }
+                if (_bias is not null && ReplaceParameter(_bias.dtype, device, _bias, out var b)) {
+                    bias = b!;
+                }
+                return this;
+            }
+            protected internal override nn.Module _to(ScalarType dtype, bool non_blocking) {
+                if (_weight is not null && ReplaceParameter(dtype, _weight.device, _weight, out var w)) {
+                    weight = w!;
+                }
+                if (_bias is not null && ReplaceParameter(dtype, _bias.device, _bias, out var b)) {
+                    bias = b!;
+                }
+                return this;
+            }
+
+
             [ComponentName(Name = BiasComponentName)]
             private Parameter? _bias;
             [ComponentName(Name = WeightComponentName)]
diff --git a/src/TorchSharp/NN/Module.cs b/src/TorchSharp/NN/Module.cs
@@ -253,44 +253,21 @@ protected virtual void _toEpilog(ScalarType? dtype, Device? device, bool non_blo
 
                     var props = GetType().GetProperties(BindingFlags.Public | BindingFlags.Instance);
 
-                    var propsByName = new Dictionary<string, PropertyInfo>();
-                    foreach (var p in props) {
-                        // There may be duplicates, and this just overwrites it.
-                        propsByName[p.Name] = p;
-                    }
+                    // var propsByName = new Dictionary<string, PropertyInfo>();
+                    // foreach (var p in props) {
+                    //     // There may be duplicates, and this just overwrites it.
+                    //     propsByName[p.Name] = p;
+                    // }
+
+                    var propsByName = props.ToDictionary(prop => prop.Name);
 
                     foreach (var (name, param) in named_parameters(false).ToList()) {
-                        using var grad = param.grad;
-
-                        if (!param.toWillCopy(dtype ?? param.dtype, device ?? param.device) &&
-                            (grad is null || !grad.toWillCopy(dtype ?? param.dtype, device ?? param.device)))
-                            continue;
-
-                        Parameter p;
-                        ScalarType paramType =
-                            dtype != null && (param.dtype.IsFloatingPoint() || param.dtype.IsComplex()) ? dtype.Value : param.dtype;
-
-                        // When moving the parameter, we don't want the autograd to track this movement on the graph.
-                        // In addition, we need the new tensor to be a leaf to accumulate gradients, so if we didn't
-                        // disable grad we would need to call .detach() on the moved tensor.
-                        using (var d = torch.no_grad()) {
-                            p = new Parameter(
-                                data: param.to(paramType, device ?? param.device),
-                                requires_grad: param.requires_grad);
-                            _ = p.DetachFromDisposeScope();
-
-                            // Copy the gradient over as well, if it exists
-                            if (grad is not null) {
-                                using var newGrad = grad.to(paramType, device ?? param.device)
-                                    .with_requires_grad(grad.requires_grad);
-                                p.grad = newGrad;
-                            }
-                        }
+
+                        if (!ReplaceParameter(dtype, device, param, out var p)) continue;
 
                         if (propsByName.TryGetValue(name, out var property)) {
                             property.SetValue(this, p);
-                        }
-                        else {
+                        } else {
                             param?.Dispose();
 
                             ConditionallyRegisterParameter(name, p);
@@ -304,17 +281,11 @@ protected virtual void _toEpilog(ScalarType? dtype, Device? device, bool non_blo
 
                     foreach (var (name, buffer) in named_buffers(false).ToList()) {
 
-                        if (!buffer.toWillCopy(dtype ?? buffer.dtype, device ?? buffer.device)) continue;
+                        if (!ReplaceBuffer(dtype, device, buffer, out var t)) continue;
 
-                        ScalarType bufferType =
-                            dtype != null && (buffer.dtype.IsFloatingPoint() || buffer.dtype.IsComplex()) ? dtype.Value : buffer.dtype;
-
-                        // Buffers don't get grads so we don't need to detach them afterwards
-                        var t = buffer.to(bufferType, device ?? buffer.device, disposeAfter: true).DetachFromDisposeScope();
                         if (propsByName.TryGetValue(name, out var property)) {
                             property.SetValue(this, t);
-                        }
-                        else {
+                        } else {
                             ConditionallyRegisterBuffer(name, t);
                             if (fieldsByComponentName.TryGetValue(name, out var field))
                                 field.SetValue(this, t);
@@ -327,6 +298,51 @@ protected virtual void _toEpilog(ScalarType? dtype, Device? device, bool non_blo
                     }
                 }
 
+                protected static bool  ReplaceBuffer(ScalarType? dtype, Device? device, Tensor buffer, out Tensor? result)
+                {
+                    result = null;
+
+                    if (!buffer.toWillCopy(dtype ?? buffer.dtype, device ?? buffer.device)) return false;
+
+                    ScalarType bufferType =
+                        dtype != null && (buffer.dtype.IsFloatingPoint() || buffer.dtype.IsComplex()) ? dtype.Value : buffer.dtype;
+
+                    // Buffers don't get grads so we don't need to detach them afterwards
+                    result = buffer.to(bufferType, device ?? buffer.device, disposeAfter: true).DetachFromDisposeScope();
+                    return true;
+                }
+
+                protected static bool ReplaceParameter(ScalarType? dtype, Device? device, Parameter param, out Parameter? p)
+                {
+                    Tensor? grad = param.grad;
+                    p = null;
+
+                    if (!param.toWillCopy(dtype ?? param.dtype, device ?? param.device) &&
+                        (grad is null || !grad.toWillCopy(dtype ?? param.dtype, device ?? param.device)))
+                        return false;
+
+                    ScalarType paramType =
+                        dtype != null && (param.dtype.IsFloatingPoint() || param.dtype.IsComplex()) ? dtype.Value : param.dtype;
+
+                    // When moving the parameter, we don't want the autograd to track this movement on the graph.
+                    // In addition, we need the new tensor to be a leaf to accumulate gradients, so if we didn't
+                    // disable grad we would need to call .detach() on the moved tensor.
+                    using (var d = torch.no_grad()) {
+                        p = new Parameter(
+                            data: param.to(paramType, device ?? param.device),
+                            requires_grad: param.requires_grad);
+                        _ = p.DetachFromDisposeScope();
+
+                        // Copy the gradient over as well, if it exists
+                        if (grad is not null) {
+                            using var newGrad = grad.to(paramType, device ?? param.device)
+                                .with_requires_grad(grad.requires_grad);
+                            p.grad = newGrad;
+                        }
+                    }
+                    return true;
+                }
+
                 private static IEnumerable<FieldInfo> GetFieldsRecursive(Type type, BindingFlags bindingFlags) {
 
                     Type? currentType = type;
diff --git a/src/TorchSharp/NN/Normalization/GroupNorm.cs b/src/TorchSharp/NN/Normalization/GroupNorm.cs
@@ -33,7 +33,7 @@ internal GroupNorm(long num_groups, long num_channels, double eps, bool affine,
 
             public override Tensor forward(Tensor tensor)
             {
-                if (tensor.Dimensions < 3) 
+                if (tensor.Dimensions < 3)
                     throw new ArgumentException($"Invalid number of dimensions for GroupNorm argument: {tensor.Dimensions}");
                 return F.group_norm(tensor, num_groups, weight, bias, eps);
             }
@@ -66,6 +66,39 @@ public Parameter weight {
                 }
             }
 
+            // Rather than spending cycles discovering what parameters exist, we can just hardcode it.
+            protected internal override nn.Module _to(Device device, ScalarType dtype, bool non_blocking) {
+                if (_weight is not null && ReplaceParameter(dtype, device, _weight, out Parameter? w)) {
+                    weight = w!;
+                }
+                if (_bias is not null && ReplaceParameter(dtype, device, _bias, out Parameter? b)) {
+                    bias = b!;
+                }
+                return this;
+            }
+
+            protected internal override nn.Module _to(DeviceType deviceType, int deviceIndex, bool non_blocking)
+            {
+                var device = new Device(deviceType, deviceIndex);
+                if (_weight is not null && ReplaceParameter(_weight.dtype, device, _weight, out Parameter? w)) {
+                    weight = w!;
+                }
+                if (_bias is not null && ReplaceParameter(_bias.dtype, device, _bias, out Parameter? b)) {
+                    bias = b!;
+                }
+                return this;
+            }
+
+            protected internal override nn.Module _to(ScalarType dtype, bool non_blocking) {
+                if (_weight is not null && ReplaceParameter(dtype, _weight.device, _weight, out Parameter? w)) {
+                    weight = w!;
+                }
+                if (_bias is not null && ReplaceParameter(dtype, _bias.device, _bias, out Parameter? b)) {
+                    bias = b!;
+                }
+                return this;
+            }
+
             [ComponentName(Name = nameof(bias))]
             private Parameter? _bias;
             [ComponentName(Name = nameof(weight))]
diff --git a/src/TorchSharp/NN/Normalization/LayerNorm.cs b/src/TorchSharp/NN/Normalization/LayerNorm.cs
@@ -84,6 +84,39 @@ public Parameter weight {
                 }
             }
 
+            // Rather than spending cycles discovering what parameters exist, we can just hardcode it.
+            protected internal override nn.Module _to(Device device, ScalarType dtype, bool non_blocking) {
+                if (_weight is not null && ReplaceParameter(dtype, device, _weight, out Parameter? w)) {
+                    weight = w!;
+                }
+                if (_bias is not null && ReplaceParameter(dtype, device, _bias, out Parameter? b)) {
+                    bias = b!;
+                }
+                return this;
+            }
+
+            protected internal override nn.Module _to(DeviceType deviceType, int deviceIndex, bool non_blocking)
+            {
+                var device = new Device(deviceType, deviceIndex);
+                if (_weight is not null && ReplaceParameter(_weight.dtype, device, _weight, out Parameter? w)) {
+                    weight = w!;
+                }
+                if (_bias is not null && ReplaceParameter(_bias.dtype, device, _bias, out Parameter? b)) {
+                    bias = b!;
+                }
+                return this;
+            }
+
+            protected internal override nn.Module _to(ScalarType dtype, bool non_blocking) {
+                if (_weight is not null && ReplaceParameter(dtype, _weight.device, _weight, out Parameter? w)) {
+                    weight = w!;
+                }
+                if (_bias is not null && ReplaceParameter(dtype, _bias.device, _bias, out Parameter? b)) {
+                    bias = b!;
+                }
+                return this;
+            }
+
             [ComponentName(Name = BiasComponentName)]
             private Parameter? _bias;
             [ComponentName(Name = WeightComponentName)]
diff --git a/src/TorchSharp/NN/Normalization/NormBase.cs b/src/TorchSharp/NN/Normalization/NormBase.cs