55#include <math.h>
66#include <string.h>
77#include "raylib.h"
8+ #define max (a , b ) (((a) > (b)) ? (a) : (b))
89
910#define SIZE 4
1011#define EMPTY 0
1112#define UP 1
1213#define DOWN 2
1314#define LEFT 3
1415#define RIGHT 4
16+ #define BASE_MAX_TICKS 2000
1517
1618// Precomputed constants
17- #define REWARD_MULTIPLIER 0.09090909f
19+ #define REWARD_MULTIPLIER 0.0625f
1820#define INVALID_MOVE_PENALTY -0.05f
1921#define GAME_OVER_PENALTY -1.0f
2022
23+ // To normalize perf from 0 to 1. Reachable with hidden size 256.
24+ #define OBSERVED_MAX_TILE 4096.0f
25+
2126typedef struct {
2227 float perf ;
2328 float score ;
29+ float merge_score ;
2430 float episode_return ;
2531 float episode_length ;
2632 float n ;
@@ -36,6 +42,8 @@ typedef struct {
3642 int tick ;
3743 unsigned char grid [SIZE ][SIZE ];
3844 float episode_reward ; // Accumulate episode reward
45+ int moves_made ;
46+ int max_episode_ticks ; // Dynamic max_ticks based on score
3947
4048 // Cached values to avoid recomputation
4149 int empty_count ;
@@ -93,9 +101,24 @@ static inline void update_empty_count(Game* game) {
93101 game -> empty_count = count ;
94102}
95103
104+ static inline unsigned char get_max_tile (Game * game ) {
105+ unsigned char max_tile = 0 ;
106+ // Unroll loop for better performance
107+ for (int i = 0 ; i < SIZE ; i ++ ) {
108+ for (int j = 0 ; j < SIZE ; j ++ ) {
109+ if (game -> grid [i ][j ] > max_tile ) {
110+ max_tile = game -> grid [i ][j ];
111+ }
112+ }
113+ }
114+ return max_tile ;
115+ }
116+
96117void add_log (Game * game ) {
97- game -> log .score = (float )(1 << game -> score );
98- game -> log .perf += ((float )game -> score ) * REWARD_MULTIPLIER ;
118+ unsigned char s = get_max_tile (game );
119+ game -> log .score += (float )(1 << s );
120+ game -> log .perf += (float )(1 << s ) / OBSERVED_MAX_TILE ;
121+ game -> log .merge_score += (float )game -> score ;
99122 game -> log .episode_length += game -> tick ;
100123 game -> log .episode_return += game -> episode_reward ;
101124 game -> log .n += 1 ;
@@ -114,6 +137,8 @@ void c_reset(Game* game) {
114137 game -> empty_count = SIZE * SIZE ;
115138 game -> game_over_cached = false;
116139 game -> grid_changed = true;
140+ game -> moves_made = 0 ;
141+ game -> max_episode_ticks = BASE_MAX_TICKS ;
117142
118143 if (game -> terminals ) game -> terminals [0 ] = 0 ;
119144
@@ -153,6 +178,7 @@ void add_random_tile(Game* game) {
153178 if (chosen_pos >= 0 ) {
154179 int i = chosen_pos / SIZE ;
155180 int j = chosen_pos % SIZE ;
181+ // Implement the 90% 2, 10% 4 rule
156182 game -> grid [i ][j ] = (rand () % 10 == 0 ) ? 2 : 1 ;
157183 game -> empty_count -- ;
158184 game -> grid_changed = true;
@@ -162,7 +188,7 @@ void add_random_tile(Game* game) {
162188}
163189
164190// Optimized slide and merge with fewer memory operations
165- static inline bool slide_and_merge (unsigned char * row , float * reward ) {
191+ static inline bool slide_and_merge (unsigned char * row , float * reward , float * score_increase ) {
166192 bool moved = false;
167193 int write_pos = 0 ;
168194
@@ -183,6 +209,7 @@ static inline bool slide_and_merge(unsigned char* row, float* reward) {
183209 if (row [i ] != EMPTY && row [i ] == row [i + 1 ]) {
184210 row [i ]++ ;
185211 * reward += ((float )row [i ]) * REWARD_MULTIPLIER ;
212+ * score_increase += (float )(1 << (int )row [i ]);
186213 // Shift remaining elements left
187214 for (int j = i + 1 ; j < SIZE - 1 ; j ++ ) {
188215 row [j ] = row [j + 1 ];
@@ -195,7 +222,7 @@ static inline bool slide_and_merge(unsigned char* row, float* reward) {
195222 return moved ;
196223}
197224
198- bool move (Game * game , int direction , float * reward ) {
225+ bool move (Game * game , int direction , float * reward , float * score_increase ) {
199226 bool moved = false;
200227 unsigned char temp [SIZE ];
201228
@@ -207,7 +234,7 @@ bool move(Game* game, int direction, float* reward) {
207234 temp [i ] = game -> grid [idx ][col ];
208235 }
209236
210- if (slide_and_merge (temp , reward )) {
237+ if (slide_and_merge (temp , reward , score_increase )) {
211238 moved = true;
212239 // Write back column
213240 for (int i = 0 ; i < SIZE ; i ++ ) {
@@ -224,7 +251,7 @@ bool move(Game* game, int direction, float* reward) {
224251 temp [i ] = game -> grid [row ][idx ];
225252 }
226253
227- if (slide_and_merge (temp , reward )) {
254+ if (slide_and_merge (temp , reward , score_increase )) {
228255 moved = true;
229256 // Write back row
230257 for (int i = 0 ; i < SIZE ; i ++ ) {
@@ -235,9 +262,7 @@ bool move(Game* game, int direction, float* reward) {
235262 }
236263 }
237264
238- if (!moved ) {
239- * reward = INVALID_MOVE_PENALTY ;
240- } else {
265+ if (moved ) {
241266 game -> grid_changed = true;
242267 game -> game_over_cached = false; // Invalidate cache
243268 }
@@ -280,34 +305,28 @@ bool is_game_over(Game* game) {
280305 return true;
281306}
282307
283- // Optimized score calculation
284- static inline unsigned char calc_score (Game * game ) {
285- unsigned char max_tile = 0 ;
286- // Unroll loop for better performance
287- for (int i = 0 ; i < SIZE ; i ++ ) {
288- for (int j = 0 ; j < SIZE ; j ++ ) {
289- if (game -> grid [i ][j ] > max_tile ) {
290- max_tile = game -> grid [i ][j ];
291- }
292- }
293- }
294- return max_tile ;
295- }
296-
297308void c_step (Game * game ) {
298309 float reward = 0.0f ;
299- bool did_move = move (game , game -> actions [0 ] + 1 , & reward );
310+ float score_add = 0.0f ;
311+ bool did_move = move (game , game -> actions [0 ] + 1 , & reward , & score_add );
300312 game -> tick ++ ;
301313
302314 if (did_move ) {
315+ game -> moves_made ++ ;
303316 add_random_tile (game );
304- game -> score = calc_score ( game ) ;
317+ game -> score += score_add ;
305318 update_empty_count (game ); // Update after adding tile
319+ // This is to limit infinite invalid moves during eval
320+ // Don't need to be tight. Don't need to show to user?
321+ game -> max_episode_ticks = max (BASE_MAX_TICKS , game -> score / 10 );
322+ } else {
323+ reward = INVALID_MOVE_PENALTY ;
306324 }
307-
325+
308326 bool game_over = is_game_over (game );
309- game -> terminals [0 ] = game_over ? 1 : 0 ;
310-
327+ bool max_ticks_reached = game -> tick >= game -> max_episode_ticks ;
328+ game -> terminals [0 ] = (game_over || max_ticks_reached ) ? 1 : 0 ;
329+
311330 if (game_over ) {
312331 reward = GAME_OVER_PENALTY ;
313332 }
@@ -369,8 +388,11 @@ void c_render(Game* game) {
369388 }
370389
371390 // Draw score (format once per frame)
372- snprintf (score_text , sizeof (score_text ), "Score: %d" , 1 << game -> score );
391+ snprintf (score_text , sizeof (score_text ), "Score: %d" , game -> score );
373392 DrawText (score_text , 10 , px * SIZE + 10 , 24 , PUFF_WHITE );
393+
394+ snprintf (score_text , sizeof (score_text ), "Moves: %d" , game -> moves_made );
395+ DrawText (score_text , 210 , px * SIZE + 10 , 24 , PUFF_WHITE );
374396
375397 EndDrawing ();
376398}
0 commit comments