Skip to content

Commit 55134f1

Browse files
authored
Add linear_ layer for neural networks (#3074)
1 parent 3ac03fa commit 55134f1

File tree

3 files changed

+535
-0
lines changed

3 files changed

+535
-0
lines changed

dlib/dnn/layers.h

Lines changed: 200 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2143,6 +2143,206 @@ namespace dlib
21432143
>
21442144
using fc_no_bias = add_layer<fc_<num_outputs,FC_NO_BIAS>, SUBNET>;
21452145

2146+
// ----------------------------------------------------------------------------------------
2147+
2148+
enum linear_bias_mode { LINEAR_HAS_BIAS = 0, LINEAR_NO_BIAS = 1 };
2149+
2150+
template <
2151+
unsigned long num_outputs_,
2152+
linear_bias_mode bias_mode_
2153+
>
2154+
class linear_
2155+
{
2156+
static_assert(num_outputs_ > 0, "The number of outputs from a linear_ layer must be > 0");
2157+
2158+
public:
2159+
linear_() :
2160+
num_outputs(num_outputs_),
2161+
num_inputs(0),
2162+
learning_rate_multiplier(1),
2163+
bias_mode(bias_mode_) {
2164+
}
2165+
2166+
double get_learning_rate_multiplier() const { return learning_rate_multiplier; }
2167+
void set_learning_rate_multiplier(double val) { learning_rate_multiplier = val; }
2168+
2169+
unsigned long get_num_inputs() const { return num_inputs; }
2170+
unsigned long get_num_outputs() const { return num_outputs; }
2171+
void set_num_outputs(long num)
2172+
{
2173+
DLIB_CASSERT(num > 0, "The number of outputs must be > 0, but num == " << num);
2174+
if (num != (long)num_outputs)
2175+
{
2176+
DLIB_CASSERT(get_layer_params().size() == 0,
2177+
"You can't change the number of filters in linear_ if the parameter tensor has already been allocated.");
2178+
num_outputs = num;
2179+
}
2180+
}
2181+
linear_bias_mode get_bias_mode() const { return bias_mode; }
2182+
2183+
template <typename SUBNET>
2184+
void setup(const SUBNET& sub)
2185+
{
2186+
num_inputs = sub.get_output().nc();
2187+
if (bias_mode == LINEAR_HAS_BIAS)
2188+
params.set_size(num_inputs + 1, num_outputs);
2189+
else
2190+
params.set_size(num_inputs, num_outputs);
2191+
2192+
dlib::rand rnd(std::rand());
2193+
randomize_parameters(params, num_inputs + num_outputs, rnd);
2194+
weights = alias_tensor(num_inputs, num_outputs);
2195+
2196+
if (bias_mode == LINEAR_HAS_BIAS) {
2197+
biases = alias_tensor(1, num_outputs);
2198+
biases(params, weights.size()) = 0;
2199+
}
2200+
}
2201+
2202+
template <typename SUBNET>
2203+
void forward(const SUBNET& sub, resizable_tensor& output)
2204+
{
2205+
const auto& prev_output = sub.get_output();
2206+
DLIB_CASSERT((long)num_inputs == prev_output.nc(),
2207+
"The size of the input tensor to this linear layer doesn't match the size the linear layer was trained with.");
2208+
output.set_size(prev_output.num_samples(), prev_output.k(), prev_output.nr(), num_outputs);
2209+
2210+
auto o = alias_tensor(output.num_samples() * output.k() * output.nr(), num_outputs)(output, 0);
2211+
auto so = alias_tensor(prev_output.num_samples() * prev_output.k() * prev_output.nr(), num_inputs)(prev_output, 0);
2212+
2213+
auto w = weights(params, 0);
2214+
tt::gemm(0, (tensor&)o, 1, so, false, w, false);
2215+
2216+
if (bias_mode == LINEAR_HAS_BIAS)
2217+
{
2218+
auto b = biases(params, weights.size());
2219+
tt::add(1, (tensor&)o, 1, b);
2220+
}
2221+
}
2222+
2223+
template <typename SUBNET>
2224+
void backward(const tensor& gradient_input, SUBNET& sub, tensor& params_grad)
2225+
{
2226+
auto gi = alias_tensor(gradient_input.num_samples() * gradient_input.k() * gradient_input.nr(), num_outputs)(gradient_input, 0);
2227+
if (learning_rate_multiplier != 0)
2228+
{
2229+
const auto& prev_output = sub.get_output();
2230+
auto pw = weights(params_grad, 0);
2231+
auto so = alias_tensor(prev_output.num_samples() * prev_output.k() * prev_output.nr(), num_inputs)(prev_output, 0);
2232+
tt::gemm(0, pw, learning_rate_multiplier, so, true, gi, false);
2233+
2234+
if (bias_mode == LINEAR_HAS_BIAS)
2235+
{
2236+
auto pb = biases(params_grad, weights.size());
2237+
tt::assign_bias_gradient(pb, gi);
2238+
}
2239+
}
2240+
2241+
const auto& prev_gradient = sub.get_gradient_input();
2242+
auto sgi = alias_tensor(prev_gradient.num_samples() * prev_gradient.k() * prev_gradient.nr(), num_inputs)(prev_gradient, 0);
2243+
auto w = weights(params, 0);
2244+
tt::gemm(1, (tensor&)sgi, 1, gi, false, w, true);
2245+
}
2246+
2247+
alias_tensor_instance get_weights() { return weights(params, 0); }
2248+
alias_tensor_const_instance get_weights() const { return weights(params, 0); }
2249+
alias_tensor_instance get_biases()
2250+
{
2251+
static_assert(bias_mode == LINEAR_HAS_BIAS, "This linear_ layer doesn't have a bias vector "
2252+
"to be retrieved, as per template parameter 'bias_mode'.");
2253+
return biases(params, weights.size());
2254+
}
2255+
alias_tensor_const_instance get_biases() const
2256+
{
2257+
static_assert(bias_mode == LINEAR_HAS_BIAS, "This linear_ layer doesn't have a bias vector "
2258+
"to be retrieved, as per template parameter 'bias_mode'.");
2259+
return biases(params, weights.size());
2260+
}
2261+
2262+
inline dpoint map_input_to_output(const dpoint& p) const { return p; }
2263+
inline dpoint map_output_to_input(const dpoint& p) const { return p; }
2264+
2265+
const tensor& get_layer_params() const { return params; }
2266+
tensor& get_layer_params() { return params; }
2267+
2268+
friend void serialize(const linear_& item, std::ostream& out)
2269+
{
2270+
serialize("linear_", out);
2271+
serialize(item.num_outputs, out);
2272+
serialize(item.num_inputs, out);
2273+
serialize(item.params, out);
2274+
serialize(item.weights, out);
2275+
serialize(item.biases, out);
2276+
serialize((int)item.bias_mode, out);
2277+
serialize(item.learning_rate_multiplier, out);
2278+
}
2279+
2280+
friend void deserialize(linear_& item, std::istream& in)
2281+
{
2282+
std::string version;
2283+
deserialize(version, in);
2284+
if (version == "linear_")
2285+
{
2286+
deserialize(item.num_outputs, in);
2287+
deserialize(item.num_inputs, in);
2288+
deserialize(item.params, in);
2289+
deserialize(item.weights, in);
2290+
deserialize(item.biases, in);
2291+
int bmode;
2292+
deserialize(bmode, in);
2293+
item.bias_mode = static_cast<linear_bias_mode>(bmode);
2294+
if (bias_mode_ != item.bias_mode) throw serialization_error("Wrong bias_mode found while deserializing dlib::linear_");
2295+
deserialize(item.learning_rate_multiplier, in);
2296+
}
2297+
else
2298+
{
2299+
throw serialization_error("Unexpected version '" + version + "' found while deserializing dlib::linear_.");
2300+
}
2301+
}
2302+
2303+
friend std::ostream& operator<<(std::ostream& out, const linear_& item)
2304+
{
2305+
out << "linear\t (num_outputs=" << item.num_outputs;
2306+
if (item.bias_mode == LINEAR_HAS_BIAS)
2307+
out << ", bias=true";
2308+
else
2309+
out << ", bias=false";
2310+
out << ")";
2311+
out << " learning_rate_mult=" << item.learning_rate_multiplier;
2312+
return out;
2313+
}
2314+
2315+
friend void to_xml(const linear_& item, std::ostream& out)
2316+
{
2317+
out << "<linear"
2318+
<< " num_outputs='" << item.num_outputs << "'"
2319+
<< " bias='" << ((item.bias_mode == LINEAR_HAS_BIAS) ? "true" : "false") << "'"
2320+
<< " learning_rate_mult='" << item.learning_rate_multiplier << "'>\n";
2321+
out << mat(item.params);
2322+
out << "</linear>\n";
2323+
}
2324+
2325+
private:
2326+
unsigned long num_inputs;
2327+
unsigned long num_outputs;
2328+
double learning_rate_multiplier;
2329+
linear_bias_mode bias_mode;
2330+
resizable_tensor params;
2331+
alias_tensor weights, biases;
2332+
};
2333+
2334+
template <
2335+
unsigned long num_outputs,
2336+
typename SUBNET
2337+
>
2338+
using linear = add_layer<linear_<num_outputs, LINEAR_HAS_BIAS>, SUBNET>;
2339+
2340+
template <
2341+
unsigned long num_outputs,
2342+
typename SUBNET
2343+
>
2344+
using linear_no_bias = add_layer<linear_<num_outputs, LINEAR_NO_BIAS>, SUBNET>;
2345+
21462346
// ----------------------------------------------------------------------------------------
21472347

21482348
class dropout_

0 commit comments

Comments
 (0)