Skip to content

Commit 2b50129

Browse files
committed
add replication run wandb
1 parent 888a16d commit 2b50129

File tree

2 files changed

+11
-13
lines changed

2 files changed

+11
-13
lines changed

pufferlib/config/ocean/g2048.ini

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,9 @@ policy_name = G2048
55
rnn_name = Recurrent
66

77
[policy]
8-
; hidden_size = 256
98
hidden_size = 512
109

1110
[rnn]
12-
; input_size = 256
13-
; hidden_size = 256
1411
input_size = 512
1512
hidden_size = 512
1613

@@ -26,10 +23,8 @@ use_heuristic_rewards = True
2623
snake_reward_weight = 0.0005
2724

2825
[train]
29-
# 256 hidden: https://wandb.ai/kywch/pufferlib/runs/nvd0pfuj?nw=nwuserkywch
30-
# 512 hidden: https://wandb.ai/kywch/pufferlib/runs/2ch3my60?nw=nwuserkywch
26+
# 512 hidden: https://wandb.ai/kywch/pufferlib/runs/5thsjr61?nw=nwuserkywch
3127
total_timesteps = 6_767_676_767
32-
; total_timesteps = 1_000_000_000
3328
anneal_lr = True
3429
min_learning_rate = 0.00005
3530
batch_size = auto
@@ -43,11 +38,6 @@ gamma = 0.99567
4338
vf_clip_coef = 0.167
4439
vf_coef = 2.0
4540

46-
# for 256 hidden
47-
; learning_rate = 0.0005
48-
; max_grad_norm = 0.5
49-
50-
# for 512 hidden
5141
learning_rate = 0.000467
5242
max_grad_norm = 0.5
5343

pufferlib/ocean/g2048/eval.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,15 @@ def evaluate(env_name, load_model_path):
5757
Episode length -- Avg: 21539.7, Max: 29680.3
5858
Merge score -- Avg: 618011.8, Max: 918755.8
5959
Reached 32768 prob: 68.25 %
60-
Reached 65536 prob: 13.09 %
60+
Reached 65536 prob: 13.09 %
61+
62+
# hidden 512 (replication): https://wandb.ai/kywch/pufferlib/runs/5thsjr61?nw=nwuserkywch
63+
Num episodes: 115652
64+
Max tile avg: 31773.2
65+
Episode length -- Avg: 22196.4, Max: 30316.5
66+
Merge score -- Avg: 639395.6, Max: 909969.8
67+
Reached 32768 prob: 71.22 %
68+
Reached 65536 prob: 14.75 %
6169
"""
6270

6371
def finetune(env_name, load_model_path):
@@ -80,5 +88,5 @@ def finetune(env_name, load_model_path):
8088
pufferl.train(env_name, args)
8189

8290
if __name__ == '__main__':
83-
evaluate('puffer_g2048', load_model_path='puffer_g2048_2ch3my60.pt')
91+
evaluate('puffer_g2048', load_model_path='puffer_g2048_5thsjr61.pt')
8492
# finetune('puffer_g2048', load_model_path='puffer_g2048_256_base.pt')

0 commit comments

Comments
 (0)