44import numpy as np
55from collections import deque
66import random
7- import math
7+
88
99class SLAMAgent :
1010 def __init__ (self , state_size , action_size ):
@@ -20,6 +20,7 @@ def __init__(self, state_size, action_size):
2020 self .learning_rate_decay = 0.01
2121 self .randomActions = [0 ,1 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ]
2222 self .model = self ._build_model ()
23+
2324 def _build_model (self ):
2425 # Neural Net for Deep-Q learning Model
2526 model = Sequential ()
@@ -33,11 +34,13 @@ def _build_model(self):
3334 model .compile (loss = 'mse' ,
3435 optimizer = Adam (lr = self .learning_rate ,decay = self .learning_rate_decay ))
3536 return model
37+
3638 def remember (self , state , action , reward , next_state , done ):
3739 if reward == 0 :
3840 self .memory .append ((state , action , reward , next_state , done ))
3941 else :
4042 self .tempMemory .append ((state , action , reward , next_state , done ))
43+
4144 def act (self , state ):
4245 currentMinDistance = 1
4346 for i in range (len (state [0 ])):
@@ -51,6 +54,7 @@ def act(self, state):
5154 return self .randomActions [random .randrange (len (self .randomActions )- 1 )], True
5255 act_values = self .model .predict (state )
5356 return np .argmax (act_values [0 ]), False # returns action
57+
5458 def replay (self , batch_size ):
5559 minibatch = []
5660
@@ -77,9 +81,11 @@ def replay(self, batch_size):
7781 if self .epsilon > self .epsilon_min :
7882 self .epsilon *= self .epsilon_decay
7983 print ("epsilon attuale: " + str (self .epsilon ))
84+
8085 def save (self , fn ):
8186 self .model .save (fn ) #fn è il file name del file dei pesi dei neuroni alla fine del training
8287 print ("epsilon attuale: " + str (self .epsilon ))
88+
8389 def load (self , name , lastRandomValue ):
8490 self .model .load_weights (name )
8591 self .epsilon = lastRandomValue
0 commit comments