2020#define INVALID_MOVE_PENALTY -0.05f
2121#define GAME_OVER_PENALTY -1.0f
2222
23- // To normalize perf from 0 to 1. Update when beaten .
24- #define OBSERVED_MAX_SCORE 100000 .0f
23+ // To normalize perf from 0 to 1. Reachable with hidden size 256 .
24+ #define OBSERVED_MAX_TILE 4096 .0f
2525
2626typedef struct {
2727 float perf ;
2828 float score ;
29- float max_tile ;
29+ float merge_score ;
3030 float episode_return ;
3131 float episode_length ;
3232 float n ;
@@ -42,6 +42,7 @@ typedef struct {
4242 int tick ;
4343 unsigned char grid [SIZE ][SIZE ];
4444 float episode_reward ; // Accumulate episode reward
45+ int moves_made ;
4546 int max_episode_ticks ; // Dynamic max_ticks based on score
4647
4748 // Cached values to avoid recomputation
@@ -115,9 +116,9 @@ static inline unsigned char get_max_tile(Game* game) {
115116
116117void add_log (Game * game ) {
117118 unsigned char s = get_max_tile (game );
118- game -> log .max_tile += (float )(1 << s );
119- game -> log .score += (float )game -> score ;
120- game -> log .perf += (float )game -> score / OBSERVED_MAX_SCORE ;
119+ game -> log .score += (float )(1 << s );
120+ game -> log .perf += (float )( 1 << s ) / OBSERVED_MAX_TILE ;
121+ game -> log .merge_score += (float )game -> score ;
121122 game -> log .episode_length += game -> tick ;
122123 game -> log .episode_return += game -> episode_reward ;
123124 game -> log .n += 1 ;
@@ -136,6 +137,7 @@ void c_reset(Game* game) {
136137 game -> empty_count = SIZE * SIZE ;
137138 game -> game_over_cached = false;
138139 game -> grid_changed = true;
140+ game -> moves_made = 0 ;
139141 game -> max_episode_ticks = BASE_MAX_TICKS ;
140142
141143 if (game -> terminals ) game -> terminals [0 ] = 0 ;
@@ -176,6 +178,7 @@ void add_random_tile(Game* game) {
176178 if (chosen_pos >= 0 ) {
177179 int i = chosen_pos / SIZE ;
178180 int j = chosen_pos % SIZE ;
181+ // Implement the 90% 2, 10% 4 rule
179182 game -> grid [i ][j ] = (rand () % 10 == 0 ) ? 2 : 1 ;
180183 game -> empty_count -- ;
181184 game -> grid_changed = true;
@@ -309,11 +312,13 @@ void c_step(Game* game) {
309312 game -> tick ++ ;
310313
311314 if (did_move ) {
315+ game -> moves_made ++ ;
312316 add_random_tile (game );
313317 game -> score += score_add ;
314318 update_empty_count (game ); // Update after adding tile
315319 // This is to limit infinite invalid moves during eval
316- game -> max_episode_ticks = max (BASE_MAX_TICKS , game -> score / 20 );
320+ // Don't need to be tight. Don't need to show to user?
321+ game -> max_episode_ticks = max (BASE_MAX_TICKS , game -> score / 10 );
317322 } else {
318323 reward = INVALID_MOVE_PENALTY ;
319324 }
@@ -385,6 +390,9 @@ void c_render(Game* game) {
385390 // Draw score (format once per frame)
386391 snprintf (score_text , sizeof (score_text ), "Score: %d" , game -> score );
387392 DrawText (score_text , 10 , px * SIZE + 10 , 24 , PUFF_WHITE );
393+
394+ snprintf (score_text , sizeof (score_text ), "Moves: %d" , game -> moves_made );
395+ DrawText (score_text , 210 , px * SIZE + 10 , 24 , PUFF_WHITE );
388396
389397 EndDrawing ();
390398}
0 commit comments