Skip to content

Commit ba34433

Browse files
authored
Merge pull request #3 from y-hesse/3.0
g2048 fix 1
2 parents 753914d + da6e2f9 commit ba34433

File tree

3 files changed

+61
-52
lines changed

3 files changed

+61
-52
lines changed

pufferlib/config/ocean/g2048.ini

Lines changed: 19 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,11 @@ policy_name = Policy
55
rnn_name = Recurrent
66

77
[policy]
8-
hidden_size = 128
8+
hidden_size = 1024
99

1010
[rnn]
11-
input_size = 128
12-
hidden_size = 128
11+
input_size = 1024
12+
hidden_size = 1024
1313

1414
[vec]
1515
num_envs = 4
@@ -19,22 +19,21 @@ num_envs = 4096
1919

2020
[train]
2121
total_timesteps = 5_000_000_000
22-
adam_beta1 = 0.9529488439604378
23-
adam_beta2 = 0.9993901829477296
24-
adam_eps = 2.745365927413118e-7
22+
adam_beta1 = 0.982603624444803
23+
adam_beta2 = 0.982603624444803
24+
adam_eps = 3.2888696338626164e-11
2525
bptt_horizon = 64
26-
clip_coef = 0.596573170393339
27-
ent_coef = 0.02107417730003862
28-
gae_lambda = 0.9940613415815854
29-
gamma = 0.9889857974154952
26+
clip_coef = 0.2709219986085283
27+
ent_coef = 0.09221187601118314
28+
gae_lambda = 0.5999999999999999
29+
gamma = 0.9913033082924563
3030
#learning_rate = 0.0032402460796988127
31-
learning_rate = 0.001
32-
max_grad_norm = 1.0752406726589745
33-
minibatch_size = 16384
34-
prio_alpha = 0.25297099593586336
35-
prio_beta0 = 0.940606268942572
36-
vf_clip_coef = 0.1
37-
vf_coef = 1.6362878279900643
38-
vtrace_c_clip = 0
39-
vtrace_rho_clip = 1.2917509971869054
40-
anneal_lr = False
31+
learning_rate = 0.001370087925623787
32+
max_grad_norm = 3.382578348055827
33+
minibatch_size = 32768
34+
prio_alpha = 0.09999999999999998
35+
prio_beta0 = 0.941336023531629
36+
vf_clip_coef = 0.3229933703598912
37+
vf_coef = 3.591594736259073
38+
vtrace_c_clip = 1.405090934486193
39+
vtrace_rho_clip = 0.836535302835556

pufferlib/ocean/g2048/2048.h

Lines changed: 27 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
#define RIGHT 4
1515

1616
// Precomputed constants
17-
#define REWARD_MULTIPLIER 0.09090909f
17+
#define REWARD_MULTIPLIER 0.0625f
1818
#define INVALID_MOVE_PENALTY -0.05f
1919
#define GAME_OVER_PENALTY -1.0f
2020

@@ -93,9 +93,24 @@ static inline void update_empty_count(Game* game) {
9393
game->empty_count = count;
9494
}
9595

96+
// Optimized score calculation
97+
static inline unsigned char calc_score(Game* game) {
98+
unsigned char max_tile = 0;
99+
// Unroll loop for better performance
100+
for (int i = 0; i < SIZE; i++) {
101+
for (int j = 0; j < SIZE; j++) {
102+
if (game->grid[i][j] > max_tile) {
103+
max_tile = game->grid[i][j];
104+
}
105+
}
106+
}
107+
return max_tile;
108+
}
109+
96110
void add_log(Game* game) {
97-
game->log.score = (float)(1 << game->score);
98-
game->log.perf += ((float)game->score) * REWARD_MULTIPLIER;
111+
unsigned char s = calc_score(game);
112+
game->log.score = (float)(1 << s);
113+
game->log.perf += ((float)s) * 0.0909f;
99114
game->log.episode_length += game->tick;
100115
game->log.episode_return += game->episode_reward;
101116
game->log.n += 1;
@@ -162,7 +177,7 @@ void add_random_tile(Game* game) {
162177
}
163178

164179
// Optimized slide and merge with fewer memory operations
165-
static inline bool slide_and_merge(unsigned char* row, float* reward) {
180+
static inline bool slide_and_merge(unsigned char* row, float* reward, float* score_increase) {
166181
bool moved = false;
167182
int write_pos = 0;
168183

@@ -183,6 +198,7 @@ static inline bool slide_and_merge(unsigned char* row, float* reward) {
183198
if (row[i] != EMPTY && row[i] == row[i + 1]) {
184199
row[i]++;
185200
*reward += ((float)row[i]) * REWARD_MULTIPLIER;
201+
*score_increase += (float)(1 << (int)row[i]);
186202
// Shift remaining elements left
187203
for (int j = i + 1; j < SIZE - 1; j++) {
188204
row[j] = row[j + 1];
@@ -195,7 +211,7 @@ static inline bool slide_and_merge(unsigned char* row, float* reward) {
195211
return moved;
196212
}
197213

198-
bool move(Game* game, int direction, float* reward) {
214+
bool move(Game* game, int direction, float* reward, float* score_increase) {
199215
bool moved = false;
200216
unsigned char temp[SIZE];
201217

@@ -207,7 +223,7 @@ bool move(Game* game, int direction, float* reward) {
207223
temp[i] = game->grid[idx][col];
208224
}
209225

210-
if (slide_and_merge(temp, reward)) {
226+
if (slide_and_merge(temp, reward, score_increase)) {
211227
moved = true;
212228
// Write back column
213229
for (int i = 0; i < SIZE; i++) {
@@ -224,7 +240,7 @@ bool move(Game* game, int direction, float* reward) {
224240
temp[i] = game->grid[row][idx];
225241
}
226242

227-
if (slide_and_merge(temp, reward)) {
243+
if (slide_and_merge(temp, reward, score_increase)) {
228244
moved = true;
229245
// Write back row
230246
for (int i = 0; i < SIZE; i++) {
@@ -280,28 +296,15 @@ bool is_game_over(Game* game) {
280296
return true;
281297
}
282298

283-
// Optimized score calculation
284-
static inline unsigned char calc_score(Game* game) {
285-
unsigned char max_tile = 0;
286-
// Unroll loop for better performance
287-
for (int i = 0; i < SIZE; i++) {
288-
for (int j = 0; j < SIZE; j++) {
289-
if (game->grid[i][j] > max_tile) {
290-
max_tile = game->grid[i][j];
291-
}
292-
}
293-
}
294-
return max_tile;
295-
}
296-
297299
void c_step(Game* game) {
298300
float reward = 0.0f;
299-
bool did_move = move(game, game->actions[0] + 1, &reward);
301+
float score_add = 0.0f;
302+
bool did_move = move(game, game->actions[0] + 1, &reward, &score_add);
300303
game->tick++;
301304

302305
if (did_move) {
303306
add_random_tile(game);
304-
game->score = calc_score(game);
307+
game->score += score_add;
305308
update_empty_count(game); // Update after adding tile
306309
}
307310

@@ -369,7 +372,7 @@ void c_render(Game* game) {
369372
}
370373

371374
// Draw score (format once per frame)
372-
snprintf(score_text, sizeof(score_text), "Score: %d", 1 << game->score);
375+
snprintf(score_text, sizeof(score_text), "Score: %d", game->score);
373376
DrawText(score_text, 10, px * SIZE + 10, 24, PUFF_WHITE);
374377

375378
EndDrawing();

pufferlib/ocean/g2048/g2048.c

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -22,18 +22,19 @@ int main() {
2222

2323
// Main game loop
2424
int frame = 0;
25+
int action = -1;
2526
while (!WindowShouldClose()) {
2627
c_render(&env);
2728
frame++;
28-
29-
int action = 0;
29+
3030
if (IsKeyDown(KEY_LEFT_SHIFT)) {
31-
if (IsKeyPressed(KEY_W) || IsKeyPressed(KEY_UP)) action = UP;
32-
else if (IsKeyPressed(KEY_S) || IsKeyPressed(KEY_DOWN)) action = DOWN;
33-
else if (IsKeyPressed(KEY_A) || IsKeyPressed(KEY_LEFT)) action = LEFT;
34-
else if (IsKeyPressed(KEY_D) || IsKeyPressed(KEY_RIGHT)) action = RIGHT;
31+
action = -1;
32+
if (IsKeyDown(KEY_W) || IsKeyDown(KEY_UP)) action = UP;
33+
else if (IsKeyDown(KEY_S) || IsKeyDown(KEY_DOWN)) action = DOWN;
34+
else if (IsKeyDown(KEY_A) || IsKeyDown(KEY_LEFT)) action = LEFT;
35+
else if (IsKeyDown(KEY_D) || IsKeyDown(KEY_RIGHT)) action = RIGHT;
3536
env.actions[0] = action - 1;
36-
} else if (frame % 10 != 0) {
37+
} else if (frame % 1 != 0) {
3738
continue;
3839
} else {
3940
action = 1;
@@ -43,9 +44,15 @@ int main() {
4344
forward_linearlstm(net, net->obs, env.actions);
4445
}
4546

46-
if (action != 0) {
47+
if (action > 0) {
4748
c_step(&env);
4849
}
50+
51+
if (IsKeyDown(KEY_LEFT_SHIFT) && action > 0) {
52+
WaitTime(0.05);
53+
}
54+
55+
4956
}
5057

5158
free_linearlstm(net);

0 commit comments

Comments
 (0)