Skip to content

Commit 34a4d53

Browse files
author
Joseph Suarez
committed
2048
1 parent 57ade97 commit 34a4d53

File tree

3 files changed

+53
-28
lines changed

3 files changed

+53
-28
lines changed

pufferlib/config/ocean/g2048.ini

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,24 @@
11
[base]
22
package = ocean
33
env_name = puffer_g2048
4-
policy_name = G2048
4+
policy_name = Policy
55
rnn_name = Recurrent
66

77
[policy]
8-
hidden_size = 256
8+
hidden_size = 128
99

1010
[rnn]
11-
input_size = 256
12-
hidden_size = 256
11+
input_size = 128
12+
hidden_size = 128
1313

1414
[vec]
1515
num_envs = 4
1616

1717
[env]
18-
num_envs = 4024
18+
num_envs = 4096
1919

2020
[train]
21-
total_timesteps = 600_000_000
21+
total_timesteps = 5_000_000_000
2222
adam_beta1 = 0.9529488439604378
2323
adam_beta2 = 0.9993901829477296
2424
adam_eps = 2.745365927413118e-7
@@ -27,7 +27,8 @@ clip_coef = 0.596573170393339
2727
ent_coef = 0.02107417730003862
2828
gae_lambda = 0.9940613415815854
2929
gamma = 0.9889857974154952
30-
learning_rate = 0.0032402460796988127
30+
#learning_rate = 0.0032402460796988127
31+
learning_rate = 0.001
3132
max_grad_norm = 1.0752406726589745
3233
minibatch_size = 16384
3334
prio_alpha = 0.25297099593586336
@@ -36,4 +37,4 @@ vf_clip_coef = 0.1
3637
vf_coef = 1.6362878279900643
3738
vtrace_c_clip = 0
3839
vtrace_rho_clip = 1.2917509971869054
39-
anneal_lr = False
40+
anneal_lr = False

pufferlib/ocean/g2048/2048.h

Lines changed: 26 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -44,19 +44,24 @@ typedef struct {
4444
} Game;
4545

4646
// Precomputed color table for rendering optimization
47+
const Color PUFF_BACKGROUND = (Color){6, 24, 24, 255};
48+
const Color PUFF_WHITE = (Color){241, 241, 241, 241};
49+
const Color PUFF_RED = (Color){187, 0, 0, 255};
50+
const Color PUFF_CYAN = (Color){0, 187, 187, 255};
51+
4752
static Color tile_colors[12] = {
48-
{205, 193, 180, 255}, // Empty/background
49-
{238, 228, 218, 255}, // 2
50-
{237, 224, 200, 255}, // 4
51-
{242, 177, 121, 255}, // 8
52-
{245, 149, 99, 255}, // 16
53-
{246, 124, 95, 255}, // 32
54-
{246, 94, 59, 255}, // 64
55-
{237, 207, 114, 255}, // 128
56-
{237, 204, 97, 255}, // 256
57-
{237, 200, 80, 255}, // 512
58-
{237, 197, 63, 255}, // 1024
59-
{237, 194, 46, 255} // 2048+
53+
{6, 24, 24, 255}, // Empty/background
54+
{187, 187, 187, 255}, // 2
55+
{170, 187, 187, 255}, // 4
56+
{150, 187, 187, 255}, // 8
57+
{130, 187, 187, 255}, // 16
58+
{110, 187, 187, 255}, // 32
59+
{90, 187, 187, 255}, // 64
60+
{70, 187, 187, 255}, // 128
61+
{50, 187, 187, 255}, // 256
62+
{30, 187, 187, 255}, // 512
63+
{10, 187, 187, 255}, // 1024
64+
{0, 187, 187, 255} // 2048+
6065
};
6166

6267
// --- Logging ---
@@ -326,7 +331,7 @@ void c_render(Game* game) {
326331

327332
if (!window_initialized) {
328333
InitWindow(px * SIZE, px * SIZE + 50, "2048");
329-
SetTargetFPS(10); // Increased for smoother rendering
334+
SetTargetFPS(30); // Increased for smoother rendering
330335
window_initialized = true;
331336
}
332337

@@ -336,7 +341,7 @@ void c_render(Game* game) {
336341
}
337342

338343
BeginDrawing();
339-
ClearBackground(RAYWHITE);
344+
ClearBackground(PUFF_BACKGROUND);
340345

341346
// Draw grid
342347
for (int i = 0; i < SIZE; i++) {
@@ -354,14 +359,18 @@ void c_render(Game* game) {
354359
int display_val = 1 << val; // Power of 2
355360
// Pre-format text to avoid repeated formatting
356361
snprintf(score_text, sizeof(score_text), "%d", display_val);
357-
DrawText(score_text, j * px + 30, i * px + 40, 32, BLACK);
362+
if (display_val < 1000) {
363+
DrawText(score_text, j * px + 30, i * px + 40, 32, PUFF_WHITE);
364+
} else {
365+
DrawText(score_text, j * px + 20, i * px + 40, 32, PUFF_WHITE);
366+
}
358367
}
359368
}
360369
}
361370

362371
// Draw score (format once per frame)
363372
snprintf(score_text, sizeof(score_text), "Score: %d", 1 << game->score);
364-
DrawText(score_text, 10, px * SIZE + 10, 24, DARKGRAY);
373+
DrawText(score_text, 10, px * SIZE + 10, 24, PUFF_WHITE);
365374

366375
EndDrawing();
367376
}
@@ -370,4 +379,4 @@ void c_close(Game* game) {
370379
if (IsWindowReady()) {
371380
CloseWindow();
372381
}
373-
}
382+
}

pufferlib/ocean/g2048/g2048.c

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#include "2048.h"
2+
#include "puffernet.h"
23
#include <stdio.h>
34
#include <stdlib.h>
45
#include <termios.h>
@@ -18,25 +19,38 @@ int main() {
1819
env.actions = actions;
1920
env.rewards = rewards;
2021

22+
Weights* weights = load_weights("resources/g2048/g2048_weights.bin", 134917);
23+
int logit_sizes[1] = {4};
24+
LinearLSTM* net = make_linearlstm(weights, 1, 16, logit_sizes, 1);
2125
c_reset(&env);
2226

2327
// Main game loop
28+
int frame = 0;
2429
while (1) {
2530
c_render(&env);
31+
frame++;
2632

2733
int action = 0;
28-
if (IsWindowReady()) {
34+
if (IsKeyDown(KEY_LEFT_SHIFT)) {
2935
if (IsKeyPressed(KEY_W) || IsKeyPressed(KEY_UP)) action = UP;
3036
else if (IsKeyPressed(KEY_S) || IsKeyPressed(KEY_DOWN)) action = DOWN;
3137
else if (IsKeyPressed(KEY_A) || IsKeyPressed(KEY_LEFT)) action = LEFT;
3238
else if (IsKeyPressed(KEY_D) || IsKeyPressed(KEY_RIGHT)) action = RIGHT;
39+
env.actions[0] = action - 1;
40+
} else if (frame % 10 != 0) {
41+
continue;
42+
} else {
43+
action = 1;
44+
for (int i = 0; i < 16; i++) {
45+
net->obs[i] = env.observations[i];
46+
}
47+
forward_linearlstm(net, net->obs, env.actions);
3348
}
3449

3550
if (action != 0) {
36-
env.actions[0] = action - 1;
3751
c_step(&env);
3852
if (!IsWindowReady()) {
39-
print_grid(&env);
53+
//print_grid(&env);
4054
printf("Reward: %.0f\n", env.rewards[0]);
4155
}
4256
}
@@ -46,6 +60,7 @@ int main() {
4660
}
4761
}
4862

63+
free_linearlstm(net);
4964
c_close(&env);
5065
printf("Game Over! Final Max Tile: %d\n", env.score);
5166
return 0;

0 commit comments

Comments
 (0)