@@ -4166,152 +4166,6 @@ namespace dlib
41664166 template <long diag, long num, long den, typename SUBNET>
41674167 using tril_diag = add_layer<tril_<diag, void , num, den>, SUBNET>;
41684168
4169- // ----------------------------------------------------------------------------------------
4170-
4171- class positional_encodings_
4172- {
4173- /* !
4174- WHAT THIS OBJECT REPRESENTS
4175- This is an implementation of the EXAMPLE_COMPUTATIONAL_LAYER_ interface.
4176- It defines a positional encoding layer that adds position information to
4177- the input tensor. This is particularly useful in transformer architectures
4178- where the order of the sequence matters.
4179-
4180- The dimensions of the tensors output by this layer are the same as the input
4181- tensor dimensions.
4182-
4183- This implementation is based on the positional encoding described in:
4184- Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A. N.,
4185- Kaiser, Ł., & Polosukhin, I. (2017). Attention is all you need. In Advances
4186- in neural information processing systems (pp. 5998-6008).
4187-
4188- The encoding uses sine and cosine functions of different frequencies:
4189- PE(pos, 2i) = sin(pos / 10000^(2i/d_model))
4190- PE(pos, 2i+1) = cos(pos / 10000^(2i/d_model))
4191- where pos is the position and i is the dimension.
4192- !*/
4193-
4194- public:
4195-
4196- positional_encodings_ (
4197- unsigned long sequence_dim_ = 1 ,
4198- unsigned long embedding_dim_ = 1
4199- );
4200- /* !
4201- ensures
4202- - #sequence_dim == sequence_dim_
4203- - #embedding_dim == embedding_dim_
4204- !*/
4205-
4206- positional_encodings_ (
4207- const positional_encodings_& item
4208- );
4209- /* !
4210- ensures
4211- - EXAMPLE_COMPUTATIONAL_LAYER_ objects are copy constructable
4212- !*/
4213-
4214- positional_encodings_& operator =(
4215- const positional_encodings_& item
4216- );
4217- /* !
4218- ensures
4219- - EXAMPLE_COMPUTATIONAL_LAYER_ objects are assignable
4220- !*/
4221-
4222- template <typename SUBNET>
4223- void setup (
4224- const SUBNET& sub
4225- );
4226- /* !
4227- requires
4228- - SUBNET implements the SUBNET interface defined at the top of this file.
4229- ensures
4230- - performs any necessary setup for the layer, including the calculation
4231- of positional encodings based on the dimensions of the input.
4232- !*/
4233-
4234- template <typename SUBNET>
4235- void forward (
4236- const SUBNET& sub,
4237- resizable_tensor& output
4238- );
4239- /* !
4240- requires
4241- - SUBNET implements the SUBNET interface defined at the top of this file.
4242- - setup() has been called.
4243- ensures
4244- - Adds the positional encodings to the output of the subnetwork and
4245- stores the results into #output.
4246- !*/
4247-
4248- template <typename SUBNET>
4249- void backward (
4250- const tensor& gradient_input,
4251- SUBNET& sub,
4252- tensor& params_grad
4253- );
4254- /* !
4255- requires
4256- - SUBNET implements the SUBNET interface defined at the top of this file.
4257- - setup() has been called.
4258- - #params_grad is unused in this layer as there are no learnable parameters.
4259- ensures
4260- - Computes the gradient of the layer with respect to the input, which
4261- is simply the input gradient itself as positional encodings are constant.
4262- !*/
4263-
4264- const tensor& get_layer_params (
4265- ) const ;
4266- /* !
4267- ensures
4268- - returns the parameters that define the behavior of forward().
4269- Note: This layer has no learnable parameters, so this returns an empty tensor.
4270- !*/
4271-
4272- tensor& get_layer_params (
4273- );
4274- /* !
4275- ensures
4276- - returns the parameters that define the behavior of forward().
4277- Note: This layer has no learnable parameters, so this returns an empty tensor.
4278- !*/
4279-
4280- const tensor& get_positional_encodings (
4281- ) const ;
4282- /* !
4283- ensures
4284- - returns the computed positional encodings.
4285- !*/
4286-
4287- tensor& get_positional_encodings (
4288- );
4289- /* !
4290- ensures
4291- - returns the computed positional encodings.
4292- !*/
4293-
4294- friend void serialize (const positional_encodings_& item, std::ostream& out);
4295- friend void deserialize (positional_encodings_& item, std::istream& in);
4296- /* !
4297- provides serialization support
4298- !*/
4299-
4300- friend std::ostream& operator <<(std::ostream& out, const positional_encodings_& item);
4301- /* !
4302- print a string describing this layer.
4303- !*/
4304-
4305- friend void to_xml (const positional_encodings_& item, std::ostream& out);
4306- /* !
4307- This function is optional, but required if you want to print your networks with
4308- net_to_xml(). It prints a layer as XML.
4309- !*/
4310- };
4311-
4312- template <typename SUBNET>
4313- using positional_encodings = add_layer<positional_encodings_, SUBNET>;
4314-
43154169// ----------------------------------------------------------------------------------------
43164170
43174171}
0 commit comments