rusty-machine/src/learning/nnet/net_layer.rs at d6679cc4aa15fb4b85fb9a38b839521eefffdd1a · AtheMathmo/rusty-machine · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
//! Neural Network Layers

use linalg::{Matrix, MatrixSlice, BaseMatrix};

use learning::LearningResult;
use learning::error::{Error, ErrorKind};
use learning::toolkit::activ_fn::ActivationFunc;

use rand::thread_rng;
use rand::distributions::{Distribution, Normal};

use std::fmt::Debug;

/// Trait for neural net layers
pub trait NetLayer : Debug {
    /// The result of propogating data forward through this layer
    fn forward(&self, input: &Matrix<f64>, params: MatrixSlice<f64>) -> LearningResult<Matrix<f64>>;

    /// The gradient of the output of this layer with respect to its input
    fn back_input(&self, out_grad: &Matrix<f64>, input: &Matrix<f64>, output: &Matrix<f64>, params: MatrixSlice<f64>) -> Matrix<f64>;

    /// The gradient of the output of this layer with respect to its parameters
    fn back_params(&self, out_grad: &Matrix<f64>, input: &Matrix<f64>, output: &Matrix<f64>, params: MatrixSlice<f64>) -> Matrix<f64>;

    /// The default value of the parameters of this layer before training
    fn default_params(&self) -> Vec<f64>;

    /// The shape of the parameters used by this layer
    fn param_shape(&self) -> (usize, usize);

    /// The number of parameters used by this layer
    fn num_params(&self) -> usize {
        let shape = self.param_shape();
        shape.0 * shape.1
    }
}

/// Linear network layer
///
/// Represents a fully connected layer with optional bias term
///
/// The parameters are a matrix of weights of size I x N
/// where N is the dimensionality of the output and I the dimensionality of the input
#[derive(Debug, Clone, Copy)]
pub struct Linear {
    /// The number of dimensions of the input
    input_size: usize,
    /// The number of dimensions of the output
    output_size: usize,
    /// Whether or not to include a bias term
    has_bias: bool,
}

impl Linear {
    /// Construct a new Linear layer
    pub fn new(input_size: usize, output_size: usize) -> Linear {
        Linear {
            input_size: input_size + 1,
            output_size: output_size,
            has_bias: true
        }
    }

    /// Construct a Linear layer without a bias term
    pub fn without_bias(input_size: usize, output_size: usize) -> Linear {
        Linear {
            input_size: input_size,
            output_size: output_size,
            has_bias: false
        }
    }
}

fn remove_first_col(mat: Matrix<f64>) -> Matrix<f64>
{
    let rows = mat.rows();
    let cols = mat.cols();
    let mut data = mat.into_vec();

    let len = data.len();
    let mut del = 0;
    {
        let v = &mut *data;

        for i in 0..len {
            if i % cols == 0 {
                del += 1;
            } else if del > 0 {
                v[i - del] = v[i];
            }
        }
    }
    if del > 0 {
        data.truncate(len - del);
    }
    Matrix::new(rows, cols - 1, data)
}

impl NetLayer for Linear {
    /// Computes a matrix product
    ///
    /// input should have dimensions N x I
    /// where N is the number of samples and I is the dimensionality of the input
    fn forward(&self, input: &Matrix<f64>, params: MatrixSlice<f64>) -> LearningResult<Matrix<f64>> {
        if self.has_bias {
            if input.cols()+1 != params.rows() {
                Err(Error::new(ErrorKind::InvalidData, "The input had the wrong number of columns"))
            } else {
                Ok(&Matrix::ones(input.rows(), 1).hcat(input) * &params)
            }
        } else {
            if input.cols() != params.rows() {
                Err(Error::new(ErrorKind::InvalidData, "The input had the wrong number of columns"))
            } else {
                Ok(input * &params)
            }
        }
    }

    fn back_input(&self, out_grad: &Matrix<f64>, _: &Matrix<f64>, _: &Matrix<f64>, params: MatrixSlice<f64>) -> Matrix<f64> {
        debug_assert_eq!(out_grad.cols(), params.cols());
        let gradient = out_grad * &params.transpose();
        if self.has_bias {
            remove_first_col(gradient)
        } else {
            gradient
        }
    }

    fn back_params(&self, out_grad: &Matrix<f64>, input: &Matrix<f64>, _: &Matrix<f64>, _: MatrixSlice<f64>) -> Matrix<f64> {
        debug_assert_eq!(input.rows(), out_grad.rows());
        if self.has_bias {
            &Matrix::ones(input.rows(), 1).hcat(input).transpose() * out_grad
        } else {
            &input.transpose() * out_grad
        }
    }

    /// Initializes weights using Xavier initialization
    ///
    /// weights drawn from gaussian distribution with 0 mean and variance 2/(input_size+output_size)
    fn default_params(&self) -> Vec<f64> {
        let mut distro = Normal::new(0.0, (2.0/(self.input_size+self.output_size) as f64).sqrt());
        let mut rng = thread_rng();

        (0..self.input_size*self.output_size).map(|_| distro.sample(&mut rng))
                                             .collect()
    }

    fn param_shape(&self) -> (usize, usize) {
        (self.input_size, self.output_size)
    }
}

impl<T: ActivationFunc> NetLayer for T {
    /// Applies the activation function to each element of the input
    fn forward(&self, input: &Matrix<f64>, _: MatrixSlice<f64>) -> LearningResult<Matrix<f64>> {
        let mut output = Vec::with_capacity(input.rows()*input.cols());
        for val in input.data() {
            output.push(T::func(*val));
        }
        Ok(Matrix::new(input.rows(), input.cols(), output))
    }

    fn back_input(&self, out_grad: &Matrix<f64>, _: &Matrix<f64>, output: &Matrix<f64>, _: MatrixSlice<f64>) -> Matrix<f64> {
        let mut in_grad = Vec::with_capacity(output.rows()*output.cols());
        for (y, g) in output.data().iter().zip(out_grad.data()) {
            in_grad.push(T::func_grad_from_output(*y) * g);
        }
        Matrix::new(output.rows(), output.cols(), in_grad)
    }

    fn back_params(&self, _: &Matrix<f64>, _: &Matrix<f64>, _: &Matrix<f64>, _: MatrixSlice<f64>) -> Matrix<f64> {
        Matrix::new(0, 0, Vec::new())
    }

    fn default_params(&self) -> Vec<f64> {
        Vec::new()
    }

    fn param_shape(&self) -> (usize, usize) {
        (0, 0)
    }
}