Skip to content

Commit 1e59d89

Browse files
up
1 parent 22e355f commit 1e59d89

14 files changed

+275
-198
lines changed

src/dueling_categorical_dqn.c

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ SOFTWARE.
2424

2525
#include "dueling_categorical_dqn.h"
2626

27+
2728
dueling_categorical_dqn* dueling_categorical_dqn_init(int input_size, int action_size, int n_atoms, float v_min, float v_max, model* shared_hidden_layers, model* v_hidden_layers, model* a_hidden_layers, model* v_linear_last_layer, model* a_linear_last_layer){
2829
if(shared_hidden_layers == NULL || v_hidden_layers == NULL || a_hidden_layers == NULL || v_linear_last_layer == NULL || a_linear_last_layer == NULL){
2930
fprintf(stderr,"Error: you cannot have null model passed as input!\n");
@@ -1360,6 +1361,10 @@ float compute_kl_dueling_categorical_dqn_opt(dueling_categorical_dqn* online_net
13601361
// used to rescale alpha that is the most important part)
13611362
* */
13621363
clip_vector(error,-clip,clip,online_net->action_size);
1364+
/*for(i = 0; i < online_net->action_size; i++){
1365+
printf("%f ",error[i]);
1366+
}
1367+
printf("\n");*/
13631368
// we got the partial derivatives of the q functions, now we need to compute the partial derivatives respect to the softmax final layer of the network
13641369
for(i = 0; i < online_net->action_size; i++){
13651370
for(j = 0; j < online_net->n_atoms; j++){
@@ -1508,3 +1513,19 @@ int get_input_layer_size_dueling_categorical_dqn(dueling_categorical_dqn* dqn){
15081513
return 0;
15091514
return get_input_layer_size(dqn->shared_hidden_layers);
15101515
}
1516+
1517+
void inference_dqn(dueling_categorical_dqn* dqn){
1518+
inference_model(dqn->shared_hidden_layers);
1519+
inference_model(dqn->a_hidden_layers);
1520+
inference_model(dqn->v_hidden_layers);
1521+
inference_model(dqn->a_linear_last_layer);
1522+
inference_model(dqn->v_linear_last_layer);
1523+
}
1524+
1525+
void train_dqn(dueling_categorical_dqn* dqn){
1526+
train_model(dqn->shared_hidden_layers);
1527+
train_model(dqn->a_hidden_layers);
1528+
train_model(dqn->v_hidden_layers);
1529+
train_model(dqn->a_linear_last_layer);
1530+
train_model(dqn->v_linear_last_layer);
1531+
}

src/dueling_categorical_dqn.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,5 +94,7 @@ int get_input_layer_size_dueling_categorical_dqn(dueling_categorical_dqn* dqn);
9494
float* get_loss_for_dueling_categorical_dqn_opt_with_error(dueling_categorical_dqn* online_net,dueling_categorical_dqn* online_net_wlp, dueling_categorical_dqn* target_net, dueling_categorical_dqn* target_net_wlp, float* state_t, int action_t, float reward_t, float* state_t_1, float lambda_value, int state_sizes, int nonterminal_s_t_1, float* new_error, float weight_error);
9595
float compute_kl_dueling_categorical_dqn(dueling_categorical_dqn* online_net, float* state_t, float* q_functions, float weight, float alpha, float clip);
9696
float compute_kl_dueling_categorical_dqn_opt(dueling_categorical_dqn* online_net,dueling_categorical_dqn* online_net_wlp, float* state_t, float* q_functions, float weight, float alpha, float clip);
97+
void inference_dqn(dueling_categorical_dqn* dqn);
98+
void train_dqn(dueling_categorical_dqn* dqn);
9799

98100
#endif

src/fully_connected_layers.c

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2061,3 +2061,21 @@ void make_the_fcl_only_for_ff(fcl* f){
20612061
f->dropout_threshold = 1-f->dropout_threshold;
20622062
}
20632063
}
2064+
2065+
void inference_fcl(fcl* f){
2066+
if(f == NULL)
2067+
return;
2068+
if(f->dropout_flag == DROPOUT){
2069+
f->dropout_flag = DROPOUT_TEST;
2070+
f->dropout_threshold = 1-f->dropout_threshold;
2071+
}
2072+
}
2073+
2074+
void train_fcl(fcl* f){
2075+
if(f == NULL)
2076+
return;
2077+
if(f->dropout_flag == DROPOUT_TEST){
2078+
f->dropout_flag = DROPOUT;
2079+
f->dropout_threshold = 1-f->dropout_threshold;
2080+
}
2081+
}

src/fully_connected_layers.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,8 @@ uint64_t count_weights_fcl(fcl* f);
8282
void make_the_fcl_only_for_ff(fcl* f);
8383
fcl* fully_connected_without_arrays(int input, int output, int layer, int dropout_flag, int activation_flag, float dropout_threshold, int n_groups, int normalization_flag, int training_mode, int feed_forward_flag);
8484
void free_fully_connected_without_arrays(fcl* f);
85+
void inference_fcl(fcl* f);
86+
void train_fcl(fcl* f);
8587

8688

8789

src/llab.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -809,6 +809,7 @@ typedef struct rainbow{
809809
float** diversity_driven_states;
810810
float* last_errors_dqn;
811811
float* last_errors_diversity_driven;
812+
int* uniform_sampling_indices;
812813
int* positive_rewards;
813814
int* negative_rewards;
814815
int* neutral_rewards;
@@ -852,7 +853,6 @@ typedef struct rainbow{
852853

853854
#include "attention.h"
854855
#include "batch_norm_layers.h"
855-
#include "client.h"
856856
#include "clipping_gradient.h"
857857
#include "convolutional.h"
858858
#include "convolutional_layers.h"
@@ -882,7 +882,6 @@ typedef struct rainbow{
882882
#include "rmodel.h"
883883
#include "positional_encoding.h"
884884
#include "scaled_l2_norm_layers.h"
885-
#include "server.h"
886885
#include "struct_conn.h"
887886
#include "struct_conn_handler.h"
888887
#include "training.h"

src/model.c

Lines changed: 22 additions & 108 deletions
Original file line numberDiff line numberDiff line change
@@ -7360,36 +7360,7 @@ int model_tensor_input_ff_without_arrays(model* m, int tensor_depth, int tensor_
73607360
return 0;
73617361
}
73627362

7363-
if(k3-count == m->rls[z]->n_cl-1){
7364-
if(m->rls[z]->cls[k3-count]->pooling_flag){
7365-
float* pooltemp_prev = m->rls[z]->cls[k3-count]->post_pooling;
7366-
if(m->rls[z]->cls[k3-count]->convolutional_flag == NO_CONVOLUTION && m->rls[z]->cls[k3-count]->stride2_cols == 1 && m->rls[z]->cls[k3-count]->stride2_rows == 1 && m->rls[z]->cls[k3-count]->padding2_rows == 0 && m->rls[z]->cls[k3-count]->padding2_cols == 0 && m->rls[z]->cls[k3-count]->pooling_rows == 1 && m->rls[z]->cls[k3-count]->pooling_cols == 1){
7367-
pooltemp_prev = m->rls[z]->cls[k3-count]->pooltemp;
7368-
}
7369-
sum1D(m->rls[z]->input,pooltemp_prev,m->rls[z]->cl_output->pre_activation,m->rls[z]->cls[k3-count]->n_kernels*m->rls[z]->cls[k3-count]->rows2*m->rls[z]->cls[k3-count]->cols2);
7370-
}
7371-
else if(m->rls[z]->cls[k3-count]->normalization_flag)
7372-
sum1D(m->rls[z]->input,m->rls[z]->cls[k3-count]->post_normalization,m->rls[z]->cl_output->pre_activation,m->rls[z]->cls[k3-count]->n_kernels*m->rls[z]->cls[k3-count]->rows1*m->rls[z]->cls[k3-count]->cols1);
7373-
7374-
else if(m->rls[z]->cls[k3-count]->activation_flag){
7375-
sum1D(m->rls[z]->input,m->rls[z]->cls[k3-count]->post_activation,m->rls[z]->cl_output->pre_activation,m->rls[z]->cls[k3-count]->n_kernels*m->rls[z]->cls[k3-count]->rows1*m->rls[z]->cls[k3-count]->cols1);
7376-
}
7377-
else
7378-
sum1D(m->rls[z]->input,m->rls[z]->cls[k3-count]->pre_activation,m->rls[z]->cl_output->pre_activation,m->rls[z]->cls[k3-count]->n_kernels*m->rls[z]->cls[k3-count]->rows1*m->rls[z]->cls[k3-count]->cols1);
7379-
7380-
if(m->rls[z]->cl_output->activation_flag == LEAKY_RELU){
7381-
leaky_relu_array(m->rls[z]->cl_output->pre_activation,m->rls[z]->cl_output->post_activation, m->rls[z]->cl_output->n_kernels*m->rls[z]->cl_output->rows1*m->rls[z]->cl_output->cols1);
7382-
}
7383-
else if(m->rls[z]->cl_output->activation_flag == RELU){
7384-
relu_array(m->rls[z]->cl_output->pre_activation,m->rls[z]->cl_output->post_activation, m->rls[z]->cl_output->n_kernels*m->rls[z]->cl_output->rows1*m->rls[z]->cl_output->cols1);
7385-
}
7386-
else if(m->rls[z]->cl_output->activation_flag == ELU)
7387-
elu_array(m->rls[z]->cl_output->pre_activation,m->rls[z]->cl_output->post_activation, m->rls[z]->cl_output->n_kernels*m->rls[z]->cl_output->rows1*m->rls[z]->cl_output->cols1,ELU_THRESHOLD);
7388-
else if(m->rls[z]->cl_output->activation_flag == SIGMOID)
7389-
sigmoid_array(m->rls[z]->cl_output->pre_activation,m->rls[z]->cl_output->post_activation, m->rls[z]->cl_output->n_kernels*m->rls[z]->cl_output->rows1*m->rls[z]->cl_output->cols1);
7390-
else if(m->rls[z]->cl_output->activation_flag == TANH)
7391-
tanhh_array(m->rls[z]->cl_output->pre_activation,m->rls[z]->cl_output->post_activation, m->rls[z]->cl_output->n_kernels*m->rls[z]->cl_output->rows1*m->rls[z]->cl_output->cols1);
7392-
}
7363+
73937364

73947365
k3++;
73957366

@@ -7508,31 +7479,7 @@ int model_tensor_input_ff_without_arrays(model* m, int tensor_depth, int tensor_
75087479

75097480

75107481
if(m->sla[i-1][0] == FCLS){
7511-
if(k3-count == 0){
7512-
if(m->fcls[k1-1]->dropout_flag){
7513-
if(m->fcls[k1-1]->activation_flag){
7514-
dot1D(m->fcls[k1-1]->post_activation,m->fcls[k1-1]->dropout_mask,m->fcls[k1-1]->dropout_temp,m->rls[z]->channels*m->rls[z]->input_rows*m->rls[z]->input_cols);
7515-
m->rls[z]->input = m->fcls[k1-1]->dropout_temp;
7516-
}
7517-
else{
7518-
dot1D(m->fcls[k1-1]->pre_activation,m->fcls[k1-1]->dropout_mask,m->fcls[k1-1]->dropout_temp,m->rls[z]->channels*m->rls[z]->input_rows*m->rls[z]->input_cols);
7519-
m->rls[z]->input = m->fcls[k1-1]->dropout_temp;
7520-
}
7521-
}
7522-
else{
7523-
7524-
if(m->fcls[k1-1]->normalization_flag){
7525-
m->rls[z]->input = m->fcls[k1-1]->post_normalization;
7526-
}
7527-
7528-
else if(m->fcls[k1-1]->activation_flag){
7529-
m->rls[z]->input = m->fcls[k1-1]->post_activation;
7530-
}
7531-
else{
7532-
m->rls[z]->input = m->fcls[k1-1]->pre_activation;
7533-
}
7534-
}
7535-
}
7482+
75367483

75377484
if(!ff_fcl_cl_without_arrays(m->fcls[k1-1],m->rls[z]->cls[k3-count])){
75387485
free(temp);
@@ -7541,25 +7488,7 @@ int model_tensor_input_ff_without_arrays(model* m, int tensor_depth, int tensor_
75417488
}
75427489

75437490
else if(m->sla[i-1][0] == CLS){
7544-
if(k3-count == 0){
7545-
if(m->cls[k2-1]->pooling_flag){
7546-
float* pooltemp_prev = m->cls[k2-1]->post_pooling;
7547-
if(m->cls[k2-1]->convolutional_flag == NO_CONVOLUTION && m->cls[k2-1]->stride2_cols == 1 && m->cls[k2-1]->stride2_rows == 1 && m->cls[k2-1]->padding2_rows == 0 && m->cls[k2-1]->padding2_cols == 0 && m->cls[k2-1]->pooling_rows == 1 && m->cls[k2-1]->pooling_cols == 1){
7548-
pooltemp_prev = m->cls[k2-1]->pooltemp;
7549-
}
7550-
m->rls[z]->input = pooltemp_prev;
7551-
}
7552-
else if(m->cls[k2-1]->normalization_flag){
7553-
m->rls[z]->input = m->cls[k2-1]->post_normalization;
7554-
}
7555-
7556-
else if(m->cls[k2-1]->activation_flag){
7557-
m->rls[z]->input = m->cls[k2-1]->post_activation;
7558-
}
7559-
else{
7560-
m->rls[z]->input = m->cls[k2-1]->pre_activation;
7561-
}
7562-
}
7491+
75637492
if(!ff_cl_cl_without_arrays(m->cls[k2-1],m->rls[z]->cls[k3-count])){
75647493
free(temp);
75657494
return 0;
@@ -7596,40 +7525,7 @@ int model_tensor_input_ff_without_arrays(model* m, int tensor_depth, int tensor_
75967525
}
75977526
}
75987527

7599-
if(k3-count == m->rls[z]->n_cl-1){
7600-
if(m->rls[z]->cls[k3-count]->pooling_flag){
7601-
float* pooltemp_prev = m->rls[z]->cls[k3-count]->post_pooling;
7602-
if(m->rls[z]->cls[k3-count]->convolutional_flag == NO_CONVOLUTION && m->rls[z]->cls[k3-count]->stride2_cols == 1 && m->rls[z]->cls[k3-count]->stride2_rows == 1 && m->rls[z]->cls[k3-count]->padding2_rows == 0 && m->rls[z]->cls[k3-count]->padding2_cols == 0 && m->rls[z]->cls[k3-count]->pooling_rows == 1 && m->rls[z]->cls[k3-count]->pooling_cols == 1){
7603-
pooltemp_prev = m->rls[z]->cls[k3-count]->pooltemp;
7604-
}
7605-
sum1D(m->rls[z]->input,pooltemp_prev,m->rls[z]->cl_output->pre_activation,m->rls[z]->cls[k3-count]->n_kernels*m->rls[z]->cls[k3-count]->rows2*m->rls[z]->cls[k3-count]->cols2);
7606-
//printf("summing from previous pooling\n");
7607-
}
7608-
else if(m->rls[z]->cls[k3-count]->normalization_flag)
7609-
sum1D(m->rls[z]->input,m->rls[z]->cls[k3-count]->post_normalization,m->rls[z]->cl_output->pre_activation,m->rls[z]->cls[k3-count]->n_kernels*m->rls[z]->cls[k3-count]->rows1*m->rls[z]->cls[k3-count]->cols1);
7610-
else if(m->rls[z]->cls[k3-count]->activation_flag){
7611-
sum1D(m->rls[z]->input,m->rls[z]->cls[k3-count]->post_activation,m->rls[z]->cl_output->pre_activation,m->rls[z]->cls[k3-count]->n_kernels*m->rls[z]->cls[k3-count]->rows1*m->rls[z]->cls[k3-count]->cols1);
7612-
//printf("summing from previous activation\n");
7613-
}
7614-
else
7615-
sum1D(m->rls[z]->input,m->rls[z]->cls[k3-count]->pre_activation,m->rls[z]->cl_output->pre_activation,m->rls[z]->cls[k3-count]->n_kernels*m->rls[z]->cls[k3-count]->rows1*m->rls[z]->cls[k3-count]->cols1);
7616-
7617-
if(m->rls[z]->cl_output->activation_flag == LEAKY_RELU){
7618-
leaky_relu_array(m->rls[z]->cl_output->pre_activation,m->rls[z]->cl_output->post_activation, m->rls[z]->cl_output->n_kernels*m->rls[z]->cl_output->rows1*m->rls[z]->cl_output->cols1);
7619-
//printf("applying leaky relu rls\n");
7620-
}
7621-
else if(m->rls[z]->cl_output->activation_flag == RELU){
7622-
relu_array(m->rls[z]->cl_output->pre_activation,m->rls[z]->cl_output->post_activation, m->rls[z]->cl_output->n_kernels*m->rls[z]->cl_output->rows1*m->rls[z]->cl_output->cols1);
7623-
//printf("applying relu rls\n");
7624-
}
7625-
else if(m->rls[z]->cl_output->activation_flag == ELU)
7626-
elu_array(m->rls[z]->cl_output->pre_activation,m->rls[z]->cl_output->post_activation, m->rls[z]->cl_output->n_kernels*m->rls[z]->cl_output->rows1*m->rls[z]->cl_output->cols1,ELU_THRESHOLD);
7627-
else if(m->rls[z]->cl_output->activation_flag == SIGMOID)
7628-
sigmoid_array(m->rls[z]->cl_output->pre_activation,m->rls[z]->cl_output->post_activation, m->rls[z]->cl_output->n_kernels*m->rls[z]->cl_output->rows1*m->rls[z]->cl_output->cols1);
7629-
else if(m->rls[z]->cl_output->activation_flag == TANH)
7630-
tanhh_array(m->rls[z]->cl_output->pre_activation,m->rls[z]->cl_output->post_activation, m->rls[z]->cl_output->n_kernels*m->rls[z]->cl_output->rows1*m->rls[z]->cl_output->cols1);
7631-
7632-
}
7528+
76337529

76347530
k3++;
76357531

@@ -9208,6 +9104,24 @@ void make_the_model_only_for_ff(model* m){
92089104
}
92099105
}
92109106

9107+
void inference_model(model* m){
9108+
if(m == NULL)
9109+
return;
9110+
int i,j;
9111+
for(i = 0; i < m->n_fcl; i++){
9112+
inference_fcl(m->fcls[i]);
9113+
}
9114+
}
9115+
9116+
void train_model(model* m){
9117+
if(m == NULL)
9118+
return;
9119+
int i,j;
9120+
for(i = 0; i < m->n_fcl; i++){
9121+
train_fcl(m->fcls[i]);
9122+
}
9123+
}
9124+
92119125
void set_model_beta(model* m, float beta1, float beta2){
92129126
if(m == NULL)
92139127
return;

src/model.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,5 +121,7 @@ int ff_fcl_cl_without_arrays(fcl* f1, cl* f2);
121121
int ff_cl_fcl_without_arrays(cl* f1, fcl* f2);
122122
int ff_cl_cl_without_arrays(cl* f1, cl* f2);
123123
int model_tensor_input_ff_without_arrays(model* m, int tensor_depth, int tensor_i, int tensor_j, float* input);
124+
void inference_model(model* m);
125+
void train_model(model* m);
124126

125127
#endif

src/neat.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -429,6 +429,7 @@ void neat_generation_run(neat* nes){
429429
nes->count+=nes->actual_genomes;
430430
for(nes->i = 0; nes->i < nes->actual_genomes; nes->i++){
431431
gg[nes->i]->fitness = 0;
432+
adjust_genome(gg[nes->i]);
432433
}
433434
}
434435

src/neat_functions.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ int round_up(float num);
4747
char* get_genome_array(genome* g, int global_inn_numb_connections);
4848
genome* init_genome_from_array(int global_inn_numb_connections, char* g_array);
4949
int get_genome_array_size(genome* g, int global_inn_numb_connections);
50+
void adjust_genome(genome* g);
5051

5152

5253
// Functions defined in mutations.c

0 commit comments

Comments
 (0)