Skip to content

Commit 6d0cd0a

Browse files
committed
Linear execution test of HyperRAM interface
Add a new test that both checks and measures the performance of executing a linear sequence of repeated instructions from the HyperRAM. This is perhaps more relevant for most current uses of the HyperRAM because it is largely intended to be used for additional code rather than loading/storing data.
1 parent ed86a2e commit 6d0cd0a

File tree

1 file changed

+97
-6
lines changed

1 file changed

+97
-6
lines changed

sw/cheri/checks/hyperram_test.cc

Lines changed: 97 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,21 @@ using namespace CHERI;
2828
const int RandTestBlockSize = 256;
2929
const int HyperramSize = (1024 * 1024) / 4;
3030

31+
// Ensure that all writing of code to memory has completed before commencing execution
32+
// of that code. Code has been written to [start, end] with both addresses being
33+
// inclusive.
34+
static inline void instr_fence(volatile uint32_t *start, volatile uint32_t *end) {
35+
// CPU fence instruction, but this does not guarantee the ordering of transactions
36+
// with the two TL-UL crossbars and a memory (SRAM or HyperRAM) presenting two
37+
// separated ports onto those two crossbars.
38+
asm volatile("fence.i" : : : "memory");
39+
40+
// By writing the first word of the code again we can ensure that the code is
41+
// flushed out to the HyperRAM and will thus be coherent with instruction
42+
// fetching when the code is executed.
43+
*start = *start;
44+
}
45+
3146
// Write random values to a block of memory (size given by 'RandTestBlockSize'
3247
// global constant). Reads them all back and checks read values matched written
3348
// values.
@@ -199,12 +214,7 @@ void write_prog(Capability<volatile uint32_t> &hyperram_area, uint32_t addr) {
199214
hyperram_area[addr + 3] = 0x00000517;
200215
hyperram_area[addr + 4] = 0x8082;
201216

202-
asm volatile("fence.i" : : : "memory");
203-
204-
// By writing the first word of the code again we can ensure that the code is
205-
// flushed out to the HyperRAM and will thus be coherent with instruction
206-
// fetching when the code is executed.
207-
hyperram_area[addr] = hyperram_area[addr];
217+
instr_fence(&hyperram_area[addr], &hyperram_area[addr + 4]);
208218
}
209219

210220
// Writes a short function to a random area of hyperram and executes it checking
@@ -453,6 +463,7 @@ int write_tests(Capability<volatile uint8_t> hyperram_b_area, Capability<volatil
453463
if (UINT32_MAX == src_off) {
454464
src_off = prng() & 0x3ffu;
455465
}
466+
dst_off = 0x26c;
456467
// Control area must not be overlapped by any write operation.
457468
const uint32_t ctrl_off = 0x800u;
458469

@@ -582,6 +593,12 @@ int write_tests(Capability<volatile uint8_t> hyperram_b_area, Capability<volatil
582593
// at this point in the test?
583594
if (rd_off >= written_start && rd_off < written_end) exp_data = ~exp_data;
584595
failures += (rd_data != exp_data);
596+
if (failures) {
597+
log.println("rd_off1 {:#x} {:#x} {:#x} {:#x} {:#x} {:#x} {:#x}", rd_off, (int)test_type, written_start,
598+
written_end, rd_data, exp_data, dst_off);
599+
log.println(" src_off {:#x} len {:#x}", src_off, len);
600+
while (1) asm(" ");
601+
}
585602
}
586603

587604
// Advance the destination pointers for the next chunk, maintaining natural alignment
@@ -600,6 +617,11 @@ int write_tests(Capability<volatile uint8_t> hyperram_b_area, Capability<volatil
600617
// read buffer within the controller interface.
601618
for (uint32_t i = 0u; i < 0x80 / 4; i++) {
602619
failures += (hyperram_w_area[i + (ctrl_off >> 2)] != 0u);
620+
if (failures) {
621+
log.println("ctrl_off1 {:#x} {:#x} {:#x} {:#x}", ctrl_off, (int)test_type, i,
622+
hyperram_w_area[i + (ctrl_off >> 2)]);
623+
while (1) asm(" ");
624+
}
603625
}
604626

605627
// Read and check the entire target area.
@@ -609,12 +631,62 @@ int write_tests(Capability<volatile uint8_t> hyperram_b_area, Capability<volatil
609631
// Does the byte that we're checking lie within the range that should have been overwritten?
610632
if (i >= exp_start && (i - exp_start) < len) exp_data = ~exp_data;
611633
failures += (hyperram_b_area[i + dst_off] != exp_data);
634+
if (failures) {
635+
log.println("rd_off2 {:#x} {:#x} {:#x} {:#x} {:#x} {:#x} {:#x}", i, (int)test_type, exp_start,
636+
hyperram_b_area[i + dst_off], exp_data, dst_off, len);
637+
while (1) asm(" ");
638+
}
612639
}
613640
}
614641

615642
return failures;
616643
}
617644

645+
// Simple performance test of linear code execution from the HyperRAM.
646+
// - build a sequence of repeated instructions at the given address
647+
int linear_execution_test(Capability<volatile uint32_t> hyperram_w_area, ds::xoroshiro::P64R32 &prng, Log &log,
648+
bool report_times = true, uint32_t prog_addr = UINT32_MAX, uint32_t prog_len = 0x2000u,
649+
int iterations = 1) {
650+
int failures = 0;
651+
652+
// Choose a target address if not specified.
653+
if (prog_addr == UINT32_MAX) {
654+
prog_addr = prng() & 0x3fcu; // This is sufficient to achieve all valid alignments.
655+
}
656+
657+
// Emit code; 8KiB (2048 instructions) of repeated 'cincoffset ca0, ca0, 0x4' instructions.
658+
// 0045155b cincoffset ca0, ca0, 0x4
659+
// 00008067 cret
660+
const uint32_t cret_instr = 0x00008067u;
661+
const uint32_t inc_instr = 0x0045155bu;
662+
uint32_t cret_idx = (prog_addr + prog_len) >> 2;
663+
uint32_t prog_idx = prog_addr >> 2;
664+
for (uint32_t idx = prog_idx; idx < cret_idx; ++idx) {
665+
hyperram_w_area[idx] = inc_instr;
666+
}
667+
// Complete the code.
668+
hyperram_w_area[cret_idx] = cret_instr;
669+
test_fn_t test_fn = get_hyperram_fn_ptr(HYPERRAM_ADDRESS + prog_addr);
670+
671+
instr_fence(&hyperram_w_area[prog_idx], &hyperram_w_area[cret_idx]);
672+
673+
// Start timing the execution.
674+
uint32_t start_time = get_mcycle();
675+
for (int iter = 0; iter < iterations; ++iter) {
676+
// Invoke the function with a pointer to itself; each instruction advances the pointer
677+
// by one instruction.
678+
void *ret_ptr = test_fn((uint32_t *)&hyperram_w_area[prog_idx]);
679+
// Check the returned pointer indicates the `cret` instruction.
680+
failures += (ret_ptr != &hyperram_w_area[cret_idx]);
681+
}
682+
683+
if (report_times) {
684+
log.println(" {} iteration(s) took {} cycles", iterations, get_mcycle() - start_time);
685+
}
686+
687+
return failures;
688+
}
689+
618690
/**
619691
* C++ entry point for the loader. This is called from assembly, with the
620692
* read-write root in the first argument.
@@ -739,6 +811,25 @@ extern "C" [[noreturn]] void entry_point(void *rwRoot) {
739811
failures += buffering_test(hyperram_area, prng, 0x1000u);
740812
write_test_result(log, failures);
741813

814+
// Linear code sequence executing from HyperRAM.
815+
//
816+
// Executing with the icache disabled places more strain on the HyperRAM controller because
817+
// it will receive many more instruction fetches.
818+
const uint32_t lin_exec_len = 0x2000u; // 8KiB of code is larger than the icache.
819+
const int lin_exec_iters = 25;
820+
bool cache_enabled = false;
821+
do {
822+
cache_enabled = !cache_enabled;
823+
icache_enabled_set(cache_enabled);
824+
icache_invalidate();
825+
log.println("Running linear execution test with icache {:s}...", cache_enabled ? "enabled" : "disabled");
826+
failures += linear_execution_test(hyperram_area, prng, log, true, 0u, lin_exec_len, lin_exec_iters);
827+
log.print(" result...");
828+
write_test_result(log, failures);
829+
} while (cache_enabled);
830+
// Reinstate the normal icache operation.
831+
icache_enabled_set(true);
832+
742833
// Write tests exercise the write coalescing logic of the HyperRAM controller interface.
743834
log.println("Running write tests...");
744835
for (int test_type = WriteTestType_B; test_type <= WriteTestType_CD; ++test_type) {

0 commit comments

Comments
 (0)