@@ -2143,6 +2143,206 @@ namespace dlib
21432143 >
21442144 using fc_no_bias = add_layer<fc_<num_outputs,FC_NO_BIAS>, SUBNET>;
21452145
2146+ // ----------------------------------------------------------------------------------------
2147+
2148+ enum linear_bias_mode { LINEAR_HAS_BIAS = 0 , LINEAR_NO_BIAS = 1 };
2149+
2150+ template <
2151+ unsigned long num_outputs_,
2152+ linear_bias_mode bias_mode_
2153+ >
2154+ class linear_
2155+ {
2156+ static_assert (num_outputs_ > 0 , " The number of outputs from a linear_ layer must be > 0" );
2157+
2158+ public:
2159+ linear_ () :
2160+ num_outputs (num_outputs_),
2161+ num_inputs (0 ),
2162+ learning_rate_multiplier (1 ),
2163+ bias_mode (bias_mode_) {
2164+ }
2165+
2166+ double get_learning_rate_multiplier () const { return learning_rate_multiplier; }
2167+ void set_learning_rate_multiplier (double val) { learning_rate_multiplier = val; }
2168+
2169+ unsigned long get_num_inputs () const { return num_inputs; }
2170+ unsigned long get_num_outputs () const { return num_outputs; }
2171+ void set_num_outputs (long num)
2172+ {
2173+ DLIB_CASSERT (num > 0 , " The number of outputs must be > 0, but num == " << num);
2174+ if (num != (long )num_outputs)
2175+ {
2176+ DLIB_CASSERT (get_layer_params ().size () == 0 ,
2177+ " You can't change the number of filters in linear_ if the parameter tensor has already been allocated." );
2178+ num_outputs = num;
2179+ }
2180+ }
2181+ linear_bias_mode get_bias_mode () const { return bias_mode; }
2182+
2183+ template <typename SUBNET>
2184+ void setup (const SUBNET& sub)
2185+ {
2186+ num_inputs = sub.get_output ().nc ();
2187+ if (bias_mode == LINEAR_HAS_BIAS)
2188+ params.set_size (num_inputs + 1 , num_outputs);
2189+ else
2190+ params.set_size (num_inputs, num_outputs);
2191+
2192+ dlib::rand rnd (std::rand ());
2193+ randomize_parameters (params, num_inputs + num_outputs, rnd);
2194+ weights = alias_tensor (num_inputs, num_outputs);
2195+
2196+ if (bias_mode == LINEAR_HAS_BIAS) {
2197+ biases = alias_tensor (1 , num_outputs);
2198+ biases (params, weights.size ()) = 0 ;
2199+ }
2200+ }
2201+
2202+ template <typename SUBNET>
2203+ void forward (const SUBNET& sub, resizable_tensor& output)
2204+ {
2205+ const auto & prev_output = sub.get_output ();
2206+ DLIB_CASSERT ((long )num_inputs == prev_output.nc (),
2207+ " The size of the input tensor to this linear layer doesn't match the size the linear layer was trained with." );
2208+ output.set_size (prev_output.num_samples (), prev_output.k (), prev_output.nr (), num_outputs);
2209+
2210+ auto o = alias_tensor (output.num_samples () * output.k () * output.nr (), num_outputs)(output, 0 );
2211+ auto so = alias_tensor (prev_output.num_samples () * prev_output.k () * prev_output.nr (), num_inputs)(prev_output, 0 );
2212+
2213+ auto w = weights (params, 0 );
2214+ tt::gemm (0 , (tensor&)o, 1 , so, false , w, false );
2215+
2216+ if (bias_mode == LINEAR_HAS_BIAS)
2217+ {
2218+ auto b = biases (params, weights.size ());
2219+ tt::add (1 , (tensor&)o, 1 , b);
2220+ }
2221+ }
2222+
2223+ template <typename SUBNET>
2224+ void backward (const tensor& gradient_input, SUBNET& sub, tensor& params_grad)
2225+ {
2226+ auto gi = alias_tensor (gradient_input.num_samples () * gradient_input.k () * gradient_input.nr (), num_outputs)(gradient_input, 0 );
2227+ if (learning_rate_multiplier != 0 )
2228+ {
2229+ const auto & prev_output = sub.get_output ();
2230+ auto pw = weights (params_grad, 0 );
2231+ auto so = alias_tensor (prev_output.num_samples () * prev_output.k () * prev_output.nr (), num_inputs)(prev_output, 0 );
2232+ tt::gemm (0 , pw, learning_rate_multiplier, so, true , gi, false );
2233+
2234+ if (bias_mode == LINEAR_HAS_BIAS)
2235+ {
2236+ auto pb = biases (params_grad, weights.size ());
2237+ tt::assign_bias_gradient (pb, gi);
2238+ }
2239+ }
2240+
2241+ const auto & prev_gradient = sub.get_gradient_input ();
2242+ auto sgi = alias_tensor (prev_gradient.num_samples () * prev_gradient.k () * prev_gradient.nr (), num_inputs)(prev_gradient, 0 );
2243+ auto w = weights (params, 0 );
2244+ tt::gemm (1 , (tensor&)sgi, 1 , gi, false , w, true );
2245+ }
2246+
2247+ alias_tensor_instance get_weights () { return weights (params, 0 ); }
2248+ alias_tensor_const_instance get_weights () const { return weights (params, 0 ); }
2249+ alias_tensor_instance get_biases ()
2250+ {
2251+ static_assert (bias_mode == LINEAR_HAS_BIAS, " This linear_ layer doesn't have a bias vector "
2252+ " to be retrieved, as per template parameter 'bias_mode'." );
2253+ return biases (params, weights.size ());
2254+ }
2255+ alias_tensor_const_instance get_biases () const
2256+ {
2257+ static_assert (bias_mode == LINEAR_HAS_BIAS, " This linear_ layer doesn't have a bias vector "
2258+ " to be retrieved, as per template parameter 'bias_mode'." );
2259+ return biases (params, weights.size ());
2260+ }
2261+
2262+ inline dpoint map_input_to_output (const dpoint& p) const { return p; }
2263+ inline dpoint map_output_to_input (const dpoint& p) const { return p; }
2264+
2265+ const tensor& get_layer_params () const { return params; }
2266+ tensor& get_layer_params () { return params; }
2267+
2268+ friend void serialize (const linear_& item, std::ostream& out)
2269+ {
2270+ serialize (" linear_" , out);
2271+ serialize (item.num_outputs , out);
2272+ serialize (item.num_inputs , out);
2273+ serialize (item.params , out);
2274+ serialize (item.weights , out);
2275+ serialize (item.biases , out);
2276+ serialize ((int )item.bias_mode , out);
2277+ serialize (item.learning_rate_multiplier , out);
2278+ }
2279+
2280+ friend void deserialize (linear_& item, std::istream& in)
2281+ {
2282+ std::string version;
2283+ deserialize (version, in);
2284+ if (version == " linear_" )
2285+ {
2286+ deserialize (item.num_outputs , in);
2287+ deserialize (item.num_inputs , in);
2288+ deserialize (item.params , in);
2289+ deserialize (item.weights , in);
2290+ deserialize (item.biases , in);
2291+ int bmode;
2292+ deserialize (bmode, in);
2293+ item.bias_mode = static_cast <linear_bias_mode>(bmode);
2294+ if (bias_mode_ != item.bias_mode ) throw serialization_error (" Wrong bias_mode found while deserializing dlib::linear_" );
2295+ deserialize (item.learning_rate_multiplier , in);
2296+ }
2297+ else
2298+ {
2299+ throw serialization_error (" Unexpected version '" + version + " ' found while deserializing dlib::linear_." );
2300+ }
2301+ }
2302+
2303+ friend std::ostream& operator <<(std::ostream& out, const linear_& item)
2304+ {
2305+ out << " linear\t (num_outputs=" << item.num_outputs ;
2306+ if (item.bias_mode == LINEAR_HAS_BIAS)
2307+ out << " , bias=true" ;
2308+ else
2309+ out << " , bias=false" ;
2310+ out << " )" ;
2311+ out << " learning_rate_mult=" << item.learning_rate_multiplier ;
2312+ return out;
2313+ }
2314+
2315+ friend void to_xml (const linear_& item, std::ostream& out)
2316+ {
2317+ out << " <linear"
2318+ << " num_outputs='" << item.num_outputs << " '"
2319+ << " bias='" << ((item.bias_mode == LINEAR_HAS_BIAS) ? " true" : " false" ) << " '"
2320+ << " learning_rate_mult='" << item.learning_rate_multiplier << " '>\n " ;
2321+ out << mat (item.params );
2322+ out << " </linear>\n " ;
2323+ }
2324+
2325+ private:
2326+ unsigned long num_inputs;
2327+ unsigned long num_outputs;
2328+ double learning_rate_multiplier;
2329+ linear_bias_mode bias_mode;
2330+ resizable_tensor params;
2331+ alias_tensor weights, biases;
2332+ };
2333+
2334+ template <
2335+ unsigned long num_outputs,
2336+ typename SUBNET
2337+ >
2338+ using linear = add_layer<linear_<num_outputs, LINEAR_HAS_BIAS>, SUBNET>;
2339+
2340+ template <
2341+ unsigned long num_outputs,
2342+ typename SUBNET
2343+ >
2344+ using linear_no_bias = add_layer<linear_<num_outputs, LINEAR_NO_BIAS>, SUBNET>;
2345+
21462346// ----------------------------------------------------------------------------------------
21472347
21482348 class dropout_
0 commit comments