@@ -28,6 +28,21 @@ using namespace CHERI;
2828const int RandTestBlockSize = 256 ;
2929const int HyperramSize = (1024 * 1024 ) / 4 ;
3030
31+ // Ensure that all writing of code to memory has completed before commencing execution
32+ // of that code. Code has been written to [start, end] with both addresses being
33+ // inclusive.
34+ static inline void instr_fence (volatile uint32_t *start, volatile uint32_t *end) {
35+ // CPU fence instruction, but this does not guarantee the ordering of transactions
36+ // with the two TL-UL crossbars and a memory (SRAM or HyperRAM) presenting two
37+ // separated ports onto those two crossbars.
38+ asm volatile (" fence.i" : : : " memory" );
39+
40+ // By writing the first word of the code again we can ensure that the code is
41+ // flushed out to the HyperRAM and will thus be coherent with instruction
42+ // fetching when the code is executed.
43+ *start = *start;
44+ }
45+
3146// Write random values to a block of memory (size given by 'RandTestBlockSize'
3247// global constant). Reads them all back and checks read values matched written
3348// values.
@@ -199,12 +214,7 @@ void write_prog(Capability<volatile uint32_t> &hyperram_area, uint32_t addr) {
199214 hyperram_area[addr + 3 ] = 0x00000517 ;
200215 hyperram_area[addr + 4 ] = 0x8082 ;
201216
202- asm volatile (" fence.i" : : : " memory" );
203-
204- // By writing the first word of the code again we can ensure that the code is
205- // flushed out to the HyperRAM and will thus be coherent with instruction
206- // fetching when the code is executed.
207- hyperram_area[addr] = hyperram_area[addr];
217+ instr_fence (&hyperram_area[addr], &hyperram_area[addr + 4 ]);
208218}
209219
210220// Writes a short function to a random area of hyperram and executes it checking
@@ -453,6 +463,7 @@ int write_tests(Capability<volatile uint8_t> hyperram_b_area, Capability<volatil
453463 if (UINT32_MAX == src_off) {
454464 src_off = prng () & 0x3ffu ;
455465 }
466+ dst_off = 0x26c ;
456467 // Control area must not be overlapped by any write operation.
457468 const uint32_t ctrl_off = 0x800u ;
458469
@@ -582,6 +593,12 @@ int write_tests(Capability<volatile uint8_t> hyperram_b_area, Capability<volatil
582593 // at this point in the test?
583594 if (rd_off >= written_start && rd_off < written_end) exp_data = ~exp_data;
584595 failures += (rd_data != exp_data);
596+ if (failures) {
597+ log.println (" rd_off1 {:#x} {:#x} {:#x} {:#x} {:#x} {:#x} {:#x}" , rd_off, (int )test_type, written_start,
598+ written_end, rd_data, exp_data, dst_off);
599+ log.println (" src_off {:#x} len {:#x}" , src_off, len);
600+ while (1 ) asm (" " );
601+ }
585602 }
586603
587604 // Advance the destination pointers for the next chunk, maintaining natural alignment
@@ -600,6 +617,11 @@ int write_tests(Capability<volatile uint8_t> hyperram_b_area, Capability<volatil
600617 // read buffer within the controller interface.
601618 for (uint32_t i = 0u ; i < 0x80 / 4 ; i++) {
602619 failures += (hyperram_w_area[i + (ctrl_off >> 2 )] != 0u );
620+ if (failures) {
621+ log.println (" ctrl_off1 {:#x} {:#x} {:#x} {:#x}" , ctrl_off, (int )test_type, i,
622+ hyperram_w_area[i + (ctrl_off >> 2 )]);
623+ while (1 ) asm (" " );
624+ }
603625 }
604626
605627 // Read and check the entire target area.
@@ -609,12 +631,62 @@ int write_tests(Capability<volatile uint8_t> hyperram_b_area, Capability<volatil
609631 // Does the byte that we're checking lie within the range that should have been overwritten?
610632 if (i >= exp_start && (i - exp_start) < len) exp_data = ~exp_data;
611633 failures += (hyperram_b_area[i + dst_off] != exp_data);
634+ if (failures) {
635+ log.println (" rd_off2 {:#x} {:#x} {:#x} {:#x} {:#x} {:#x} {:#x}" , i, (int )test_type, exp_start,
636+ hyperram_b_area[i + dst_off], exp_data, dst_off, len);
637+ while (1 ) asm (" " );
638+ }
612639 }
613640 }
614641
615642 return failures;
616643}
617644
645+ // Simple performance test of linear code execution from the HyperRAM.
646+ // - build a sequence of repeated instructions at the given address
647+ int linear_execution_test (Capability<volatile uint32_t > hyperram_w_area, ds::xoroshiro::P64R32 &prng, Log &log,
648+ bool report_times = true , uint32_t prog_addr = UINT32_MAX, uint32_t prog_len = 0x2000u ,
649+ int iterations = 1 ) {
650+ int failures = 0 ;
651+
652+ // Choose a target address if not specified.
653+ if (prog_addr == UINT32_MAX) {
654+ prog_addr = prng () & 0x3fcu ; // This is sufficient to achieve all valid alignments.
655+ }
656+
657+ // Emit code; 8KiB (2048 instructions) of repeated 'cincoffset ca0, ca0, 0x4' instructions.
658+ // 0045155b cincoffset ca0, ca0, 0x4
659+ // 00008067 cret
660+ const uint32_t cret_instr = 0x00008067u ;
661+ const uint32_t inc_instr = 0x0045155bu ;
662+ uint32_t cret_idx = (prog_addr + prog_len) >> 2 ;
663+ uint32_t prog_idx = prog_addr >> 2 ;
664+ for (uint32_t idx = prog_idx; idx < cret_idx; ++idx) {
665+ hyperram_w_area[idx] = inc_instr;
666+ }
667+ // Complete the code.
668+ hyperram_w_area[cret_idx] = cret_instr;
669+ test_fn_t test_fn = get_hyperram_fn_ptr (HYPERRAM_ADDRESS + prog_addr);
670+
671+ instr_fence (&hyperram_w_area[prog_idx], &hyperram_w_area[cret_idx]);
672+
673+ // Start timing the execution.
674+ uint32_t start_time = get_mcycle ();
675+ for (int iter = 0 ; iter < iterations; ++iter) {
676+ // Invoke the function with a pointer to itself; each instruction advances the pointer
677+ // by one instruction.
678+ void *ret_ptr = test_fn ((uint32_t *)&hyperram_w_area[prog_idx]);
679+ // Check the returned pointer indicates the `cret` instruction.
680+ failures += (ret_ptr != &hyperram_w_area[cret_idx]);
681+ }
682+
683+ if (report_times) {
684+ log.println (" {} iteration(s) took {} cycles" , iterations, get_mcycle () - start_time);
685+ }
686+
687+ return failures;
688+ }
689+
618690/* *
619691 * C++ entry point for the loader. This is called from assembly, with the
620692 * read-write root in the first argument.
@@ -739,6 +811,25 @@ extern "C" [[noreturn]] void entry_point(void *rwRoot) {
739811 failures += buffering_test (hyperram_area, prng, 0x1000u );
740812 write_test_result (log, failures);
741813
814+ // Linear code sequence executing from HyperRAM.
815+ //
816+ // Executing with the icache disabled places more strain on the HyperRAM controller because
817+ // it will receive many more instruction fetches.
818+ const uint32_t lin_exec_len = 0x2000u ; // 8KiB of code is larger than the icache.
819+ const int lin_exec_iters = 25 ;
820+ bool cache_enabled = false ;
821+ do {
822+ cache_enabled = !cache_enabled;
823+ icache_enabled_set (cache_enabled);
824+ icache_invalidate ();
825+ log.println (" Running linear execution test with icache {:s}..." , cache_enabled ? " enabled" : " disabled" );
826+ failures += linear_execution_test (hyperram_area, prng, log, true , 0u , lin_exec_len, lin_exec_iters);
827+ log.print (" result..." );
828+ write_test_result (log, failures);
829+ } while (cache_enabled);
830+ // Reinstate the normal icache operation.
831+ icache_enabled_set (true );
832+
742833 // Write tests exercise the write coalescing logic of the HyperRAM controller interface.
743834 log.println (" Running write tests..." );
744835 for (int test_type = WriteTestType_B; test_type <= WriteTestType_CD; ++test_type) {
0 commit comments