55#include <math.h>
66#include <string.h>
77#include "raylib.h"
8+ #define max (a , b ) (((a) > (b)) ? (a) : (b))
89
910#define SIZE 4
1011#define EMPTY 0
1112#define UP 1
1213#define DOWN 2
1314#define LEFT 3
1415#define RIGHT 4
16+ #define BASE_MAX_TICKS 2000
1517
1618// Precomputed constants
1719#define REWARD_MULTIPLIER 0.0625f
1820#define INVALID_MOVE_PENALTY -0.05f
1921#define GAME_OVER_PENALTY -1.0f
2022
23+ // To normalize perf from 0 to 1. Update when beaten.
24+ #define OBSERVED_MAX_SCORE 100000.0f
25+
2126typedef struct {
2227 float perf ;
2328 float score ;
29+ float max_tile ;
2430 float episode_return ;
2531 float episode_length ;
2632 float n ;
@@ -36,6 +42,7 @@ typedef struct {
3642 int tick ;
3743 unsigned char grid [SIZE ][SIZE ];
3844 float episode_reward ; // Accumulate episode reward
45+ int max_episode_ticks ; // Dynamic max_ticks based on score
3946
4047 // Cached values to avoid recomputation
4148 int empty_count ;
@@ -93,8 +100,7 @@ static inline void update_empty_count(Game* game) {
93100 game -> empty_count = count ;
94101}
95102
96- // Optimized score calculation
97- static inline unsigned char calc_score (Game * game ) {
103+ static inline unsigned char get_max_tile (Game * game ) {
98104 unsigned char max_tile = 0 ;
99105 // Unroll loop for better performance
100106 for (int i = 0 ; i < SIZE ; i ++ ) {
@@ -108,9 +114,10 @@ static inline unsigned char calc_score(Game* game) {
108114}
109115
110116void add_log (Game * game ) {
111- unsigned char s = calc_score (game );
112- game -> log .score = (float )(1 << s );
113- game -> log .perf += ((float )s ) * 0.0909f ;
117+ unsigned char s = get_max_tile (game );
118+ game -> log .max_tile += (float )(1 << s );
119+ game -> log .score += (float )game -> score ;
120+ game -> log .perf += (float )game -> score / OBSERVED_MAX_SCORE ;
114121 game -> log .episode_length += game -> tick ;
115122 game -> log .episode_return += game -> episode_reward ;
116123 game -> log .n += 1 ;
@@ -129,6 +136,7 @@ void c_reset(Game* game) {
129136 game -> empty_count = SIZE * SIZE ;
130137 game -> game_over_cached = false;
131138 game -> grid_changed = true;
139+ game -> max_episode_ticks = BASE_MAX_TICKS ;
132140
133141 if (game -> terminals ) game -> terminals [0 ] = 0 ;
134142
@@ -251,9 +259,7 @@ bool move(Game* game, int direction, float* reward, float* score_increase) {
251259 }
252260 }
253261
254- if (!moved ) {
255- * reward = INVALID_MOVE_PENALTY ;
256- } else {
262+ if (moved ) {
257263 game -> grid_changed = true;
258264 game -> game_over_cached = false; // Invalidate cache
259265 }
@@ -306,11 +312,16 @@ void c_step(Game* game) {
306312 add_random_tile (game );
307313 game -> score += score_add ;
308314 update_empty_count (game ); // Update after adding tile
315+ // This is to limit infinite invalid moves during eval
316+ game -> max_episode_ticks = max (BASE_MAX_TICKS , game -> score / 20 );
317+ } else {
318+ reward = INVALID_MOVE_PENALTY ;
309319 }
310-
320+
311321 bool game_over = is_game_over (game );
312- game -> terminals [0 ] = game_over ? 1 : 0 ;
313-
322+ bool max_ticks_reached = game -> tick >= game -> max_episode_ticks ;
323+ game -> terminals [0 ] = (game_over || max_ticks_reached ) ? 1 : 0 ;
324+
314325 if (game_over ) {
315326 reward = GAME_OVER_PENALTY ;
316327 }
0 commit comments