@@ -61,6 +61,7 @@ def export_graph(model_path, env_name="env", target_nodes="action,value_estimate
6161class PPOModel (object ):
6262 def __init__ (self ):
6363 self .normalize = False
64+ self .observation_in = []
6465
6566 def create_global_steps (self ):
6667 """Creates TF ops to track and increment global training step."""
@@ -89,11 +90,11 @@ def create_visual_encoder(self, o_size_h, o_size_w, bw, h_size, num_streams, act
8990 else :
9091 c_channels = 3
9192
92- self .observation_in = tf .placeholder (shape = [None , o_size_h , o_size_w , c_channels ], dtype = tf .float32 ,
93- name = 'observation_0' )
93+ self .observation_in . append ( tf .placeholder (shape = [None , o_size_h , o_size_w , c_channels ], dtype = tf .float32 ,
94+ name = 'observation_%d' % len ( self . observation_in )) )
9495 streams = []
9596 for i in range (num_streams ):
96- self .conv1 = tf .layers .conv2d (self .observation_in , 16 , kernel_size = [8 , 8 ], strides = [4 , 4 ],
97+ self .conv1 = tf .layers .conv2d (self .observation_in [ - 1 ] , 16 , kernel_size = [8 , 8 ], strides = [4 , 4 ],
9798 use_bias = False , activation = activation )
9899 self .conv2 = tf .layers .conv2d (self .conv1 , 32 , kernel_size = [4 , 4 ], strides = [2 , 2 ],
99100 use_bias = False , activation = activation )
@@ -213,10 +214,12 @@ def __init__(self, lr, brain, h_size, epsilon, max_step, normalize, num_layers):
213214 self .create_reward_encoder ()
214215
215216 hidden_state , hidden_visual , hidden_policy , hidden_value = None , None , None , None
216- if brain .number_observations > 0 :
217- height_size , width_size = brain .camera_resolutions [0 ]['height' ], brain .camera_resolutions [0 ]['width' ]
218- bw = brain .camera_resolutions [0 ]['blackAndWhite' ]
219- hidden_visual = self .create_visual_encoder (height_size , width_size , bw , h_size , 2 , tf .nn .tanh , num_layers )
217+ encoders = []
218+ for i in range (brain .number_observations ):
219+ height_size , width_size = brain .camera_resolutions [i ]['height' ], brain .camera_resolutions [i ]['width' ]
220+ bw = brain .camera_resolutions [i ]['blackAndWhite' ]
221+ encoders .append (self .create_visual_encoder (height_size , width_size , bw , h_size , 2 , tf .nn .tanh , num_layers ))
222+ hidden_visual = tf .concat (encoders , axis = 2 )
220223 if brain .state_space_size > 0 :
221224 s_size = brain .state_space_size
222225 if brain .state_space_type == "continuous" :
@@ -275,10 +278,12 @@ def __init__(self, lr, brain, h_size, epsilon, beta, max_step, normalize, num_la
275278 self .normalize = normalize
276279
277280 hidden_state , hidden_visual , hidden = None , None , None
278- if brain .number_observations > 0 :
279- height_size , width_size = brain .camera_resolutions [0 ]['height' ], brain .camera_resolutions [0 ]['width' ]
280- bw = brain .camera_resolutions [0 ]['blackAndWhite' ]
281- hidden_visual = self .create_visual_encoder (height_size , width_size , bw , h_size , 1 , tf .nn .elu , num_layers )[0 ]
281+ encoders = []
282+ for i in range (brain .number_observations ):
283+ height_size , width_size = brain .camera_resolutions [i ]['height' ], brain .camera_resolutions [i ]['width' ]
284+ bw = brain .camera_resolutions [i ]['blackAndWhite' ]
285+ encoders .append (self .create_visual_encoder (height_size , width_size , bw , h_size , 1 , tf .nn .elu , num_layers )[0 ])
286+ hidden_visual = tf .concat (encoders , axis = 1 )
282287 if brain .state_space_size > 0 :
283288 s_size = brain .state_space_size
284289 if brain .state_space_type == "continuous" :
0 commit comments