@@ -21,16 +21,15 @@ typedef struct {
2121 float n ;
2222} Log ;
2323
24- // Required struct for env_binding.h compatibility
2524typedef struct {
2625 Log log ; // Required
27- unsigned char * observations ; // Required (flattened 256 floats: 16 tiles * 16 one-hot)
26+ unsigned char * observations ; // Cheaper in memory if encoded in uint_8
2827 int * actions ; // Required
2928 float * rewards ; // Required
3029 unsigned char * terminals ; // Required
3130 int score ;
3231 int tick ;
33- unsigned char grid [SIZE ][SIZE ]; // Store tile values directly as floats
32+ unsigned char grid [SIZE ][SIZE ];
3433 float episode_reward ; // Accumulate episode reward
3534} Game ;
3635
@@ -43,27 +42,15 @@ void c_step(Game* env);
4342void c_render (Game * env );
4443void c_close (Game * env );
4544
46- // Update the observation vector to be one-hot encoded for all tiles
4745static void update_observations (Game * game ) {
48- for (int i = 0 ; i < SIZE ; i ++ ) {
49- for (int j = 0 ; j < SIZE ; j ++ ) {
50- game -> observations [i * SIZE + j ] = game -> grid [i ][j ];
51- }
52- }
46+ memcpy (game -> observations , game -> grid , sizeof (unsigned char ) * SIZE * SIZE );
5347}
5448
5549// --- Implementation ---
5650
5751void add_log (Game * game ) {
58- int max_tile = 0 ;
59- for (int i = 0 ; i < SIZE ; i ++ ) {
60- for (int j = 0 ; j < SIZE ; j ++ ) {
61- int tile = (int )(game -> grid [i ][j ]);
62- if (tile > max_tile ) max_tile = tile ;
63- }
64- }
65- game -> log .score = (float )pow (2 ,max_tile );
66- game -> log .perf += (game -> rewards [0 ] > 0 ) ? 1 : 0 ;
52+ game -> log .score = pow (2 ,(float )game -> score );
53+ game -> log .perf += (float )game -> score /11. ;
6754 game -> log .episode_length += game -> tick ;
6855 game -> log .episode_return += game -> episode_reward ;
6956 game -> log .n += 1 ;
@@ -103,22 +90,12 @@ void add_random_tile(Game* game) {
10390 }
10491 if (count > 0 ) {
10592 int random_index = rand () % count ;
106- int value = (rand () % 10 == 0 ) ? 4 : 2 ;
107- game -> grid [empty_cells [random_index ][0 ]][empty_cells [random_index ][1 ]] = ( float ) value ;
93+ int value = (rand () % 10 == 0 ) ? 2 : 1 ;
94+ game -> grid [empty_cells [random_index ][0 ]][empty_cells [random_index ][1 ]] = value ;
10895 }
10996 update_observations (game );
11097}
11198
112- void print_grid (Game * game ) {
113- for (int i = 0 ; i < SIZE ; i ++ ) {
114- for (int j = 0 ; j < SIZE ; j ++ ) {
115- printf ("%4.0f " , game -> grid [i ][j ]);
116- }
117- printf ("\n" );
118- }
119- printf ("Score: %d\n" , game -> score );
120- }
121-
12299bool slide_and_merge_row (float * row , float * reward ) {
123100 bool moved = false;
124101 // Slide left
@@ -231,11 +208,11 @@ bool is_game_over(Game* game) {
231208 return true;
232209}
233210
234- int calc_score (Game * game ) {
235- int max_tile = 0 ;
211+ unsigned char calc_score (Game * game ) {
212+ unsigned char max_tile = 0 ;
236213 for (int i = 0 ; i < SIZE ; i ++ ) {
237214 for (int j = 0 ; j < SIZE ; j ++ ) {
238- int tile = (int )( game -> grid [i ][j ]);
215+ int tile = (game -> grid [i ][j ]);
239216 if (tile > max_tile ) max_tile = tile ;
240217 }
241218 }
@@ -248,7 +225,7 @@ void c_step(Game* game) {
248225 game -> tick += 1 ;
249226 if (did_move ) {
250227 add_random_tile (game );
251- game -> score = calc_score (game );
228+ game -> score = ( float ) calc_score (game );
252229 }
253230
254231 game -> terminals [0 ] = is_game_over (game ) ? 1 : 0 ;
0 commit comments