Skip to content

Commit 8f6d0f8

Browse files
author
Ervin T
authored
Fix naming conflict between Curiosity and GAIL (#2406)
1 parent 2a76490 commit 8f6d0f8

File tree

2 files changed

+16
-16
lines changed
  • ml-agents/mlagents/trainers/components/reward_signals

2 files changed

+16
-16
lines changed

ml-agents/mlagents/trainers/components/reward_signals/curiosity/model.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ def create_curiosity_encoders(self) -> Tuple[tf.Tensor, tf.Tensor]:
4242
# Create input ops for next (t+1) visual observations.
4343
next_visual_input = LearningModel.create_visual_input(
4444
self.policy_model.brain.camera_resolutions[i],
45-
name="next_visual_observation_" + str(i),
45+
name="curiosity_next_visual_observation_" + str(i),
4646
)
4747
self.next_visual_in.append(next_visual_input)
4848

@@ -53,7 +53,7 @@ def create_curiosity_encoders(self) -> Tuple[tf.Tensor, tf.Tensor]:
5353
self.encoding_size,
5454
LearningModel.swish,
5555
1,
56-
"stream_{}_visual_obs_encoder".format(i),
56+
"curiosity_stream_{}_visual_obs_encoder".format(i),
5757
False,
5858
)
5959

@@ -62,7 +62,7 @@ def create_curiosity_encoders(self) -> Tuple[tf.Tensor, tf.Tensor]:
6262
self.encoding_size,
6363
LearningModel.swish,
6464
1,
65-
"stream_{}_visual_obs_encoder".format(i),
65+
"curiosity_stream_{}_visual_obs_encoder".format(i),
6666
True,
6767
)
6868
visual_encoders.append(encoded_visual)
@@ -80,23 +80,23 @@ def create_curiosity_encoders(self) -> Tuple[tf.Tensor, tf.Tensor]:
8080
self.next_vector_in = tf.placeholder(
8181
shape=[None, self.policy_model.vec_obs_size],
8282
dtype=tf.float32,
83-
name="next_vector_observation",
83+
name="curiosity_next_vector_observation",
8484
)
8585

8686
encoded_vector_obs = self.policy_model.create_vector_observation_encoder(
8787
self.policy_model.vector_in,
8888
self.encoding_size,
8989
LearningModel.swish,
9090
2,
91-
"vector_obs_encoder",
91+
"curiosity_vector_obs_encoder",
9292
False,
9393
)
9494
encoded_next_vector_obs = self.policy_model.create_vector_observation_encoder(
9595
self.next_vector_in,
9696
self.encoding_size,
9797
LearningModel.swish,
9898
2,
99-
"vector_obs_encoder",
99+
"curiosity_vector_obs_encoder",
100100
True,
101101
)
102102
encoded_state_list.append(encoded_vector_obs)

ml-agents/mlagents/trainers/components/reward_signals/gail/model.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ def make_inputs(self) -> None:
112112
# Create input ops for next (t+1) visual observations.
113113
visual_input = self.policy_model.create_visual_input(
114114
self.policy_model.brain.camera_resolutions[i],
115-
name="visual_observation_" + str(i),
115+
name="gail_visual_observation_" + str(i),
116116
)
117117
self.expert_visual_in.append(visual_input)
118118

@@ -121,7 +121,7 @@ def make_inputs(self) -> None:
121121
self.encoding_size,
122122
LearningModel.swish,
123123
1,
124-
"stream_{}_visual_obs_encoder".format(i),
124+
"gail_stream_{}_visual_obs_encoder".format(i),
125125
False,
126126
)
127127

@@ -130,7 +130,7 @@ def make_inputs(self) -> None:
130130
self.encoding_size,
131131
LearningModel.swish,
132132
1,
133-
"stream_{}_visual_obs_encoder".format(i),
133+
"gail_stream_{}_visual_obs_encoder".format(i),
134134
True,
135135
)
136136
visual_policy_encoders.append(encoded_policy_visual)
@@ -163,15 +163,15 @@ def create_encoder(
163163
concat_input,
164164
self.h_size,
165165
activation=LearningModel.swish,
166-
name="d_hidden_1",
166+
name="gail_d_hidden_1",
167167
reuse=reuse,
168168
)
169169

170170
hidden_2 = tf.layers.dense(
171171
hidden_1,
172172
self.h_size,
173173
activation=LearningModel.swish,
174-
name="d_hidden_2",
174+
name="gail_d_hidden_2",
175175
reuse=reuse,
176176
)
177177

@@ -182,7 +182,7 @@ def create_encoder(
182182
hidden_2,
183183
self.z_size,
184184
reuse=reuse,
185-
name="z_mean",
185+
name="gail_z_mean",
186186
kernel_initializer=LearningModel.scaled_init(0.01),
187187
)
188188

@@ -198,7 +198,7 @@ def create_encoder(
198198
estimate_input,
199199
1,
200200
activation=tf.nn.sigmoid,
201-
name="d_estimate",
201+
name="gail_d_estimate",
202202
reuse=reuse,
203203
)
204204
return estimate, z_mean, concat_input
@@ -209,15 +209,15 @@ def create_network(self) -> None:
209209
"""
210210
if self.use_vail:
211211
self.z_sigma = tf.get_variable(
212-
"sigma_vail",
212+
"gail_sigma_vail",
213213
self.z_size,
214214
dtype=tf.float32,
215215
initializer=tf.ones_initializer(),
216216
)
217217
self.z_sigma_sq = self.z_sigma * self.z_sigma
218218
self.z_log_sigma_sq = tf.log(self.z_sigma_sq + EPSILON)
219219
self.use_noise = tf.placeholder(
220-
shape=[1], dtype=tf.float32, name="NoiseLevel"
220+
shape=[1], dtype=tf.float32, name="gail_NoiseLevel"
221221
)
222222
self.expert_estimate, self.z_mean_expert, _ = self.create_encoder(
223223
self.encoded_expert, self.expert_action, self.done_expert, reuse=False
@@ -229,7 +229,7 @@ def create_network(self) -> None:
229229
reuse=True,
230230
)
231231
self.discriminator_score = tf.reshape(
232-
self.policy_estimate, [-1], name="GAIL_reward"
232+
self.policy_estimate, [-1], name="gail_reward"
233233
)
234234
self.intrinsic_reward = -tf.log(1.0 - self.discriminator_score + EPSILON)
235235

0 commit comments

Comments
 (0)