Skip to content

Commit 7889cf3

Browse files
CydraldaviskingCopilot
authored
Add transformer example with RoPE and MoE-like mechanisms (#3078)
* Implementation of linear_ layer for neural networks. This layer provides an optimized linear transformation for multi-dimensional inputs. * Minor change * Update dlib/dnn/layers.h Co-authored-by: Copilot <[email protected]> * Add reshape_to and flatten layers to Dlib's DNN module * Missing update to "visitors.h" * format fixing for reshape_to * Update dlib/test/dnn.cpp * Vocabulary size fixed for learning, and function added for transformation-free tokenization * Added a new example for learning a “complex” Transformer model. * Added a new example for learning a “complex” Transformer model. * Updated example for training a Transformer model. * fix for gcc/ffmpeg compilation * Fix a warning message for Ubuntu compilation. * Update for Linux environment. * Fix batch building * Slight improvement in model definition. * linear_ layer implementation improvement * finalizing the example * Fixing break condition in training method. * Fixing declaration order of variables. * bpe_tokenizer improvements. * Example updated. * bpe_tokenizer class refactoring. * Example updated. * bpe_tokenizer class updated. * Decoding part of the bpe_tokenizer updated. * Network definition update --------- Co-authored-by: Davis E. King <[email protected]> Co-authored-by: Copilot <[email protected]>
1 parent 131e46e commit 7889cf3

File tree

5 files changed

+1812
-331
lines changed

5 files changed

+1812
-331
lines changed

dlib/dnn/layers.h

Lines changed: 30 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2329,24 +2329,46 @@ namespace dlib
23292329

23302330
template <
23312331
unsigned long num_outputs_,
2332-
linear_bias_mode bias_mode_
2332+
linear_bias_mode bias_mode_ = LINEAR_HAS_BIAS
23332333
>
23342334
class linear_
23352335
{
23362336
static_assert(num_outputs_ > 0, "The number of outputs from a linear_ layer must be > 0");
23372337

23382338
public:
2339-
linear_() :
2339+
explicit linear_() :
23402340
num_outputs(num_outputs_),
2341-
num_inputs(0),
2341+
num_inputs(0),
23422342
learning_rate_multiplier(1),
23432343
bias_mode(bias_mode_) {
23442344
}
23452345

2346+
linear_(const linear_& other) :
2347+
num_outputs(other.num_outputs),
2348+
num_inputs(other.num_inputs),
2349+
learning_rate_multiplier(other.learning_rate_multiplier),
2350+
bias_mode(other.bias_mode),
2351+
params(other.params),
2352+
weights(other.weights),
2353+
biases(other.biases) {
2354+
}
2355+
2356+
linear_& operator=(const linear_& other) {
2357+
if (this != &other) {
2358+
num_outputs = other.num_outputs;
2359+
num_inputs = other.num_inputs;
2360+
learning_rate_multiplier = other.learning_rate_multiplier;
2361+
bias_mode = other.bias_mode;
2362+
params = other.params;
2363+
weights = other.weights;
2364+
biases = other.biases;
2365+
}
2366+
return *this;
2367+
}
2368+
23462369
double get_learning_rate_multiplier() const { return learning_rate_multiplier; }
23472370
void set_learning_rate_multiplier(double val) { learning_rate_multiplier = val; }
2348-
2349-
unsigned long get_num_inputs() const { return num_inputs; }
2371+
23502372
unsigned long get_num_outputs() const { return num_outputs; }
23512373
void set_num_outputs(long num)
23522374
{
@@ -2358,6 +2380,7 @@ namespace dlib
23582380
num_outputs = num;
23592381
}
23602382
}
2383+
unsigned long get_num_inputs() const { return num_inputs; }
23612384
linear_bias_mode get_bias_mode() const { return bias_mode; }
23622385

23632386
template <typename SUBNET>
@@ -2503,8 +2526,8 @@ namespace dlib
25032526
}
25042527

25052528
private:
2506-
unsigned long num_inputs;
25072529
unsigned long num_outputs;
2530+
unsigned long num_inputs;
25082531
double learning_rate_multiplier;
25092532
linear_bias_mode bias_mode;
25102533
resizable_tensor params;
@@ -2515,7 +2538,7 @@ namespace dlib
25152538
unsigned long num_outputs,
25162539
typename SUBNET
25172540
>
2518-
using linear = add_layer<linear_<num_outputs, LINEAR_HAS_BIAS>, SUBNET>;
2541+
using linear = add_layer<linear_<num_outputs>, SUBNET>;
25192542

25202543
template <
25212544
unsigned long num_outputs,

0 commit comments

Comments
 (0)