|
4 | 4 | #define min(a, b) (((a) < (b)) ? (a) : (b)) |
5 | 5 | #define max(a, b) (((a) > (b)) ? (a) : (b)) |
6 | 6 |
|
| 7 | +// Network with hidden size 256. Should go to puffernet |
| 8 | +LinearLSTM* make_linearlstm_256(Weights* weights, int num_agents, int input_dim, int logit_sizes[], int num_actions) { |
| 9 | + LinearLSTM* net = calloc(1, sizeof(LinearLSTM)); |
| 10 | + net->num_agents = num_agents; |
| 11 | + net->obs = calloc(num_agents*input_dim, sizeof(float)); |
| 12 | + int hidden_dim = 256; |
| 13 | + net->encoder = make_linear(weights, num_agents, input_dim, hidden_dim); |
| 14 | + net->gelu1 = make_gelu(num_agents, hidden_dim); |
| 15 | + int atn_sum = 0; |
| 16 | + for (int i = 0; i < num_actions; i++) { |
| 17 | + atn_sum += logit_sizes[i]; |
| 18 | + } |
| 19 | + net->actor = make_linear(weights, num_agents, hidden_dim, atn_sum); |
| 20 | + net->value_fn = make_linear(weights, num_agents, hidden_dim, 1); |
| 21 | + net->lstm = make_lstm(weights, num_agents, hidden_dim, hidden_dim); |
| 22 | + net->multidiscrete = make_multidiscrete(num_agents, logit_sizes, num_actions); |
| 23 | + return net; |
| 24 | +} |
| 25 | + |
| 26 | + |
7 | 27 | void demo() { |
8 | 28 | Tetris env = { |
9 | 29 | .n_rows = 20, |
10 | 30 | .n_cols = 10, |
11 | | - .deck_size=3, |
| 31 | + .use_deck_obs = true, |
| 32 | + .n_noise_obs = 0, |
| 33 | + .n_init_garbage = 0, |
12 | 34 | }; |
13 | 35 | allocate(&env); |
14 | 36 | env.client = make_client(&env); |
15 | 37 | c_reset(&env); |
16 | 38 |
|
17 | | - Weights* weights = load_weights("resources/tetris/tetris_weights.bin", 163208); |
| 39 | + Weights* weights = load_weights("resources/tetris/tetris_weights.bin", 588552); |
18 | 40 | int logit_sizes[1] = {7}; |
19 | | - LinearLSTM* net = make_linearlstm(weights, 1, 234, logit_sizes, 1); |
| 41 | + LinearLSTM* net = make_linearlstm_256(weights, 1, 234, logit_sizes, 1); |
20 | 42 |
|
| 43 | + // State tracking for single-press actions to avoid using IsKeyPressed |
| 44 | + // because IsKeyPressed doesn't work well in web browsers |
| 45 | + static bool rotate_key_was_down = false; |
| 46 | + static bool hard_drop_key_was_down = false; |
| 47 | + static bool swap_key_was_down = false; |
| 48 | + |
| 49 | + int frame = 0; |
| 50 | + env.actions[0] = 0; |
21 | 51 | while (!WindowShouldClose()) { |
| 52 | + bool process_logic = true; |
| 53 | + frame++; |
| 54 | + |
22 | 55 | if (IsKeyDown(KEY_LEFT_SHIFT)) { |
23 | | - if (IsKeyDown(KEY_LEFT) || IsKeyDown(KEY_A)){ |
24 | | - env.actions[0] = 1; |
25 | | - } |
26 | | - if (IsKeyDown(KEY_RIGHT) || IsKeyDown(KEY_D)){ |
27 | | - env.actions[0] = 2; |
28 | | - } |
29 | | - if (IsKeyPressed(KEY_UP) || IsKeyDown(KEY_W)) { |
30 | | - env.actions[0] = 3; |
31 | | - } |
32 | | - if (IsKeyDown(KEY_DOWN) || IsKeyDown(KEY_S)) { |
33 | | - env.actions[0] = 4; |
34 | | - } |
35 | | - if (IsKeyPressed(KEY_SPACE)) { |
36 | | - env.actions[0] = 5; |
37 | | - } |
38 | | - if (IsKeyPressed(KEY_C)) { |
39 | | - env.actions[0] = 6; |
| 56 | + if (frame % 3 != 0) { |
| 57 | + // This effectively slows down the client by 3x |
| 58 | + process_logic = false; |
| 59 | + } else { |
| 60 | + // Use KeyDown for left, right, down to allow continuous input |
| 61 | + // Though, IsKeyDown can overshoot ... |
| 62 | + if (IsKeyDown(KEY_LEFT) || IsKeyDown(KEY_A)) { |
| 63 | + env.actions[0] = 1; |
| 64 | + } else if (IsKeyDown(KEY_RIGHT) || IsKeyDown(KEY_D)) { |
| 65 | + env.actions[0] = 2; |
| 66 | + } else if (IsKeyDown(KEY_DOWN) || IsKeyDown(KEY_S)) { |
| 67 | + env.actions[0] = 4; // Soft drop |
| 68 | + } |
| 69 | + // Manual state tracking for single-press actions, mutually exclusive |
| 70 | + else if ((IsKeyDown(KEY_UP) || IsKeyDown(KEY_W)) && !rotate_key_was_down) { |
| 71 | + env.actions[0] = 3; // Rotate |
| 72 | + } else if (IsKeyDown(KEY_SPACE) && !hard_drop_key_was_down) { |
| 73 | + env.actions[0] = 5; // Hard drop |
| 74 | + } else if (IsKeyDown(KEY_C) && !swap_key_was_down) { |
| 75 | + env.actions[0] = 6; // Swap |
| 76 | + } |
40 | 77 | } |
41 | 78 | } else { |
42 | 79 | forward_linearlstm(net, env.observations, env.actions); |
43 | 80 | } |
44 | 81 |
|
45 | | - c_step(&env); |
46 | | - env.actions[0] = 0; |
| 82 | + if (process_logic) { |
| 83 | + // Update key state flags after processing actions for the frame |
| 84 | + rotate_key_was_down = IsKeyDown(KEY_UP) || IsKeyDown(KEY_W); |
| 85 | + hard_drop_key_was_down = IsKeyDown(KEY_SPACE); |
| 86 | + swap_key_was_down = IsKeyDown(KEY_C); |
| 87 | + |
| 88 | + c_step(&env); |
| 89 | + |
| 90 | + env.actions[0] = 0; |
| 91 | + } |
| 92 | + |
47 | 93 | c_render(&env); |
48 | 94 | } |
| 95 | + |
49 | 96 | free_linearlstm(net); |
50 | 97 | free_allocated(&env); |
51 | 98 | close_client(env.client); |
|
0 commit comments