66#include < cassert>
77
88inline constexpr size_t PERSISTENT_CHUNK = 8 ;
9+ inline constexpr size_t BLOCKS_PER_CHUNK = 2 ;
910inline constexpr size_t VOLATILE_CHUNK = 1 ;
1011
11- template <size_t CHUNK> struct BoundaryRecord {
12+ template <size_t CHUNK, size_t BLOCKS > struct BoundaryRecord {
1213 uint32_t address_space;
1314 uint32_t ptr;
14- uint32_t timestamp ;
15+ uint32_t timestamps[BLOCKS] ;
1516 uint32_t values[CHUNK];
1617};
1718
@@ -21,7 +22,7 @@ template <typename T> struct PersistentBoundaryCols {
2122 T leaf_label;
2223 T values[PERSISTENT_CHUNK];
2324 T hash[PERSISTENT_CHUNK];
24- T timestamp ;
25+ T timestamps[BLOCKS_PER_CHUNK] ;
2526};
2627
2728inline constexpr size_t ADDR_ELTS = 2 ;
@@ -42,7 +43,7 @@ __global__ void cukernel_persistent_boundary_tracegen(
4243 size_t height,
4344 size_t width,
4445 uint8_t const *const *initial_mem,
45- BoundaryRecord<PERSISTENT_CHUNK> *records,
46+ BoundaryRecord<PERSISTENT_CHUNK, BLOCKS_PER_CHUNK > *records,
4647 size_t num_records,
4748 FpArray<16 > *poseidon2_buffer,
4849 uint32_t *poseidon2_buffer_idx,
@@ -53,7 +54,7 @@ __global__ void cukernel_persistent_boundary_tracegen(
5354 RowSlice row = RowSlice (trace + row_idx, height);
5455
5556 if (record_idx < num_records) {
56- BoundaryRecord<PERSISTENT_CHUNK> record = records[record_idx];
57+ BoundaryRecord<PERSISTENT_CHUNK, BLOCKS_PER_CHUNK > record = records[record_idx];
5758 Poseidon2Buffer poseidon2 (poseidon2_buffer, poseidon2_buffer_idx, poseidon2_capacity);
5859 COL_WRITE_VALUE (row, PersistentBoundaryCols, address_space, record.address_space );
5960 COL_WRITE_VALUE (row, PersistentBoundaryCols, leaf_label, record.ptr / PERSISTENT_CHUNK);
@@ -77,24 +78,32 @@ __global__ void cukernel_persistent_boundary_tracegen(
7778 }
7879 FpArray<8 > init_hash = poseidon2.hash_and_record (init_values);
7980 COL_WRITE_VALUE (row, PersistentBoundaryCols, expand_direction, Fp::one ());
80- COL_WRITE_VALUE (row, PersistentBoundaryCols, timestamp, Fp::zero ());
8181 COL_WRITE_ARRAY (
8282 row, PersistentBoundaryCols, values, reinterpret_cast <Fp const *>(init_values.v )
8383 );
8484 COL_WRITE_ARRAY (
8585 row, PersistentBoundaryCols, hash, reinterpret_cast <Fp const *>(init_hash.v )
8686 );
87+ Fp ts_values[BLOCKS_PER_CHUNK];
88+ for (int i = 0 ; i < BLOCKS_PER_CHUNK; ++i) {
89+ ts_values[i] = Fp::zero ();
90+ }
91+ COL_WRITE_ARRAY (row, PersistentBoundaryCols, timestamps, ts_values);
8792 } else {
8893 FpArray<8 > final_values = FpArray<8 >::from_raw_array (record.values );
8994 FpArray<8 > final_hash = poseidon2.hash_and_record (final_values);
9095 COL_WRITE_VALUE (row, PersistentBoundaryCols, expand_direction, Fp::neg_one ());
91- COL_WRITE_VALUE (row, PersistentBoundaryCols, timestamp, record.timestamp );
9296 COL_WRITE_ARRAY (
9397 row, PersistentBoundaryCols, values, reinterpret_cast <Fp const *>(final_values.v )
9498 );
9599 COL_WRITE_ARRAY (
96100 row, PersistentBoundaryCols, hash, reinterpret_cast <Fp const *>(final_hash.v )
97101 );
102+ Fp ts_values[BLOCKS_PER_CHUNK];
103+ for (int i = 0 ; i < BLOCKS_PER_CHUNK; ++i) {
104+ ts_values[i] = Fp (record.timestamps [i]);
105+ }
106+ COL_WRITE_ARRAY (row, PersistentBoundaryCols, timestamps, ts_values);
98107 }
99108 } else {
100109 row.fill_zero (0 , width);
@@ -105,7 +114,7 @@ __global__ void cukernel_volatile_boundary_tracegen(
105114 Fp *trace,
106115 size_t height,
107116 size_t width,
108- BoundaryRecord<VOLATILE_CHUNK> const *records,
117+ BoundaryRecord<VOLATILE_CHUNK, 1 > const *records,
109118 size_t num_records,
110119 uint32_t *range_checker,
111120 size_t range_checker_num_bins,
@@ -122,7 +131,7 @@ __global__ void cukernel_volatile_boundary_tracegen(
122131 // For the sake of always filling `addr_lt_aux`
123132 row.fill_zero (0 , width);
124133 }
125- BoundaryRecord<VOLATILE_CHUNK> record = records[idx];
134+ BoundaryRecord<VOLATILE_CHUNK, 1 > record = records[idx];
126135 rc.decompose (
127136 record.address_space ,
128137 as_max_bits,
@@ -137,11 +146,11 @@ __global__ void cukernel_volatile_boundary_tracegen(
137146 );
138147 COL_WRITE_VALUE (row, VolatileBoundaryCols, initial_data, Fp::zero ());
139148 COL_WRITE_VALUE (row, VolatileBoundaryCols, final_data, record.values [0 ]);
140- COL_WRITE_VALUE (row, VolatileBoundaryCols, final_timestamp, record.timestamp );
149+ COL_WRITE_VALUE (row, VolatileBoundaryCols, final_timestamp, record.timestamps [ 0 ] );
141150 COL_WRITE_VALUE (row, VolatileBoundaryCols, is_valid, Fp::one ());
142151
143152 if (idx != num_records - 1 ) {
144- BoundaryRecord<VOLATILE_CHUNK> next_record = records[idx + 1 ];
153+ BoundaryRecord<VOLATILE_CHUNK, 1 > next_record = records[idx + 1 ];
145154 uint32_t curr[ADDR_ELTS] = {record.address_space , record.ptr };
146155 uint32_t next[ADDR_ELTS] = {next_record.address_space , next_record.ptr };
147156 IsLessThanArray::generate_subrow (
@@ -189,8 +198,8 @@ extern "C" int _persistent_boundary_tracegen(
189198 size_t poseidon2_capacity
190199) {
191200 auto [grid, block] = kernel_launch_params (height);
192- BoundaryRecord<PERSISTENT_CHUNK> *d_records =
193- reinterpret_cast <BoundaryRecord<PERSISTENT_CHUNK> *>(d_raw_records);
201+ BoundaryRecord<PERSISTENT_CHUNK, BLOCKS_PER_CHUNK > *d_records =
202+ reinterpret_cast <BoundaryRecord<PERSISTENT_CHUNK, BLOCKS_PER_CHUNK > *>(d_raw_records);
194203 FpArray<16 > *d_poseidon2_buffer = reinterpret_cast <FpArray<16 > *>(d_poseidon2_raw_buffer);
195204 cukernel_persistent_boundary_tracegen<<<grid, block>>> (
196205 d_trace,
@@ -218,7 +227,7 @@ extern "C" int _volatile_boundary_tracegen(
218227 size_t ptr_max_bits
219228) {
220229 auto [grid, block] = kernel_launch_params (height, 512 );
221- auto d_records = reinterpret_cast <BoundaryRecord<VOLATILE_CHUNK> const *>(d_raw_records);
230+ auto d_records = reinterpret_cast <BoundaryRecord<VOLATILE_CHUNK, 1 > const *>(d_raw_records);
222231 cukernel_volatile_boundary_tracegen<<<grid, block>>> (
223232 d_trace,
224233 height,
0 commit comments