Skip to content

Commit 1b731b7

Browse files
committed
Fixes for getting the "test-app" booting
1 parent 5f9278c commit 1b731b7

File tree

14 files changed

+539
-113
lines changed

14 files changed

+539
-113
lines changed

config/examples/versal_vmk180.config

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,10 @@ DEBUG?=1
2727
DEBUG_SYMBOLS=1
2828
DEBUG_UART=1
2929

30+
# Boot Benchmarking (optional):
31+
# Enables timing of boot operations (flash read, integrity, signature).
32+
BOOT_BENCHMARK?=1
33+
3034
VTOR?=1
3135
CORTEX_M0?=0
3236
NO_ASM?=0

docs/Targets.md

Lines changed: 48 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1862,7 +1862,7 @@ qemu-system-aarch64 -machine xlnx-zcu102 -cpu cortex-a53 -serial stdio -display
18621862

18631863
## Versal Gen 1 VMK180
18641864

1865-
AMD Versal Prime VM1802 ACAP - Dual ARM Cortex-A72 (VMK180 Evaluation Board)
1865+
AMD Versal Prime Series VMK180 Evaluation Kit - Versal Prime XCVM1802-2MSEVSVA2197 Adaptive SoC - Dual ARM Cortex-A72
18661866

18671867
wolfBoot replaces U-Boot in the Versal boot flow:
18681868
```
@@ -1961,6 +1961,29 @@ Flash `BOOT.BIN` to QSPI flash using one of the following methods:
19611961
sf write ${loadaddr} 0 ${filesize}
19621962
```
19631963

1964+
### QSPI Flash
1965+
1966+
VMK180 uses dual parallel MT25QU01GBBB flash (128MB each, 256MB total). The QSPI driver supports:
1967+
- DMA mode (default) or IO polling mode (`GQSPI_MODE_IO`)
1968+
- Quad SPI (4-bit) for faster reads
1969+
- 4-byte addressing for full flash access
1970+
- Hardware striping for dual parallel operation
1971+
- 75MHz default clock (configurable via `GQSPI_CLK_DIV`)
1972+
1973+
### Building and Signing Test Application
1974+
1975+
```sh
1976+
# Build and sign the test application
1977+
make test-app/image.bin
1978+
make test-app/image_v1_signed.bin
1979+
```
1980+
1981+
The signed test application will be at `test-app/image_v1_signed.bin`.
1982+
1983+
### Flashing Test Application
1984+
1985+
After flashing `BOOT.BIN` to QSPI offset 0x0, flash the signed test app to the boot partition at offset `0x800000` using your preferred method.
1986+
19641987
### Example Boot Output
19651988

19661989
```
@@ -1969,11 +1992,30 @@ wolfBoot Secure Boot - AMD Versal
19691992
========================================
19701993
Current EL: 2
19711994
Timer Freq: 99999904 Hz
1972-
ext_flash_read: STUB
1973-
ext_flash_read: STUB
1974-
Versions: Boot 0, Update 0
1975-
No valid image found!
1976-
wolfBoot: PANIC!
1995+
QSPI: Lower ID: 20 BB 21
1996+
QSPI: Upper ID: 20 BB 21
1997+
QSPI: 75MHz, Quad mode, DMA
1998+
Versions: Boot 1, Update 0
1999+
Trying Boot partition at 0x800000
2000+
Loading header 512 bytes from 0x800000 to 0xFFFFE00
2001+
Loading image 664 bytes from 0x800200 to 0x10000000...done
2002+
Boot partition: 0xFFFFE00 (sz 664, ver 0x1, type 0x601)
2003+
Checking integrity...done
2004+
Verifying signature...done
2005+
Successfully selected image in part: 0
2006+
Firmware Valid
2007+
Loading elf at 0x10000000
2008+
Invalid elf, falling back to raw binary
2009+
Loading DTB (size 24894) from 0x1000 to RAM at 0x1000
2010+
Booting at 0x10000000
2011+
2012+
===========================================
2013+
wolfBoot Test Application - AMD Versal
2014+
===========================================
2015+
2016+
Application running successfully!
2017+
2018+
Entering idle loop...
19772019
```
19782020

19792021

hal/versal.c

Lines changed: 94 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -272,6 +272,21 @@ void hal_delay_us(uint32_t us)
272272
;
273273
}
274274

275+
/**
276+
* Get current time in microseconds (for benchmarking)
277+
*/
278+
uint64_t hal_get_timer_us(void)
279+
{
280+
uint64_t cntpct = timer_get_count();
281+
uint64_t cntfrq = timer_get_freq();
282+
283+
if (cntfrq == 0)
284+
cntfrq = TIMER_CLK_FREQ;
285+
286+
/* Convert to microseconds: (count * 1000000) / freq */
287+
return (cntpct * 1000000ULL) / cntfrq;
288+
}
289+
275290

276291
/* ============================================================================
277292
* QSPI Flash Driver (GQSPI)
@@ -473,7 +488,9 @@ static void flush_dcache_range(uintptr_t start, uintptr_t end)
473488
__asm__ volatile("dsb sy" : : : "memory");
474489
}
475490

476-
/* Wait for DMA completion */
491+
/* Wait for DMA completion
492+
* Returns: 0 on success, -1 on timeout
493+
*/
477494
static int qspi_dma_wait(void)
478495
{
479496
uint32_t timeout = GQSPIDMA_TIMEOUT_TRIES;
@@ -483,6 +500,8 @@ static int qspi_dma_wait(void)
483500

484501
if (timeout == 0) {
485502
QSPI_DEBUG_PRINTF("QSPI: DMA timeout\n");
503+
/* Clear any pending interrupts */
504+
GQSPIDMA_ISR = GQSPIDMA_ISR_ALL_MASK;
486505
return -1;
487506
}
488507

@@ -782,10 +801,12 @@ static int qspi_transfer_qread_dma(QspiDev_t *dev, const uint8_t *cmd, uint32_t
782801

783802
/* DMA RX Phase */
784803
if (ret == 0 && rxLen > 0) {
785-
uint32_t remaining = rxLen;
804+
uint32_t remaining;
786805
uint32_t xferSz;
787806

788-
/* Check alignment - DMA requires cache-line aligned buffer */
807+
/* Check alignment - DMA requires cache-line aligned buffer.
808+
* If unaligned or not a multiple of 4 bytes, use temp buffer.
809+
* CRITICAL: GenFIFO transfer size must match DMA size! */
789810
if (((uintptr_t)rxData & (GQSPI_DMA_ALIGN - 1)) || (rxLen & 3)) {
790811
/* Use temp buffer for unaligned data */
791812
dmaPtr = dma_tmpbuf;
@@ -799,6 +820,9 @@ static int qspi_transfer_qread_dma(QspiDev_t *dev, const uint8_t *cmd, uint32_t
799820
dmaLen = rxLen;
800821
}
801822

823+
/* GenFIFO must request the same number of bytes as DMA expects */
824+
remaining = dmaLen;
825+
802826
/* Setup DMA destination */
803827
GQSPIDMA_DST = ((uintptr_t)dmaPtr & 0xFFFFFFFFUL);
804828
GQSPIDMA_DST_MSB = ((uintptr_t)dmaPtr >> 32);
@@ -839,7 +863,7 @@ static int qspi_transfer_qread_dma(QspiDev_t *dev, const uint8_t *cmd, uint32_t
839863
/* Invalidate cache after DMA */
840864
flush_dcache_range((uintptr_t)dmaPtr, (uintptr_t)dmaPtr + dmaLen);
841865

842-
/* Copy from temp buffer if needed */
866+
/* Copy from temp buffer if needed (only copy requested bytes) */
843867
if (ret == 0 && useTemp) {
844868
memcpy(rxData, dmaPtr, rxLen);
845869
}
@@ -1224,11 +1248,12 @@ static void qspi_init(void)
12241248
GQSPI_ISR = GQSPI_IXR_ALL_MASK;
12251249
dsb();
12261250

1227-
/* Preserve PLM's CFG but switch to IO mode for our transfers
1251+
/* Preserve PLM's CFG but set IO mode for initial commands (ID read, etc.)
12281252
* PLM: 0xA0080010 = DMA mode | manual start | WP_HOLD | CLK_POL
1229-
* Key: Keep manual start mode (bit 29) and clock settings */
1253+
* Key: Keep manual start mode (bit 29) and clock settings
1254+
* Note: qspi_transfer_qread_dma() will switch to DMA mode for reads */
12301255
cfg = (cfg & ~GQSPI_CFG_MODE_EN_MASK); /* Clear mode bits */
1231-
cfg |= GQSPI_CFG_MODE_EN_IO; /* Set IO mode */
1256+
cfg |= GQSPI_CFG_MODE_EN_IO; /* Set IO mode for init */
12321257
GQSPI_CFG = cfg;
12331258
dsb();
12341259

@@ -1237,6 +1262,18 @@ static void qspi_init(void)
12371262
GQSPI_RX_THRESH = 1;
12381263
GQSPI_GF_THRESH = 16;
12391264

1265+
#ifndef GQSPI_MODE_IO
1266+
/* Initialize DMA controller - this was missing compared to zynq.c!
1267+
* Without this, DMA transfers can hang or timeout because the DMA
1268+
* controller is in an undefined state after PLM handoff.
1269+
*/
1270+
GQSPIDMA_CTRL = GQSPIDMA_CTRL_DEF;
1271+
GQSPIDMA_CTRL2 = GQSPIDMA_CTRL2_DEF;
1272+
GQSPIDMA_ISR = GQSPIDMA_ISR_ALL_MASK; /* Clear all pending interrupts */
1273+
GQSPIDMA_IER = GQSPIDMA_ISR_ALL_MASK; /* Enable all interrupts */
1274+
dsb();
1275+
#endif
1276+
12401277
QSPI_DEBUG_PRINTF("QSPI: After config - CFG=0x%08x\n", GQSPI_CFG);
12411278

12421279
/* Configure device for single flash (lower) first */
@@ -1353,9 +1390,35 @@ void hal_prepare_boot(void)
13531390
}
13541391
#endif
13551392

1356-
/* Memory barriers before jumping to application */
1357-
dsb();
1358-
isb();
1393+
/* Clean and invalidate caches for the loaded application.
1394+
* The application was written to RAM via D-cache, but the CPU will
1395+
* fetch instructions via I-cache from main memory. We must:
1396+
* 1. Clean D-cache (flush dirty data to memory)
1397+
* 2. Invalidate I-cache (ensure fresh instruction fetch)
1398+
*/
1399+
1400+
/* Clean entire D-cache to Point of Coherency */
1401+
__asm__ volatile("dsb sy");
1402+
1403+
/* Clean D-cache for application region (0x10000000, 1MB should be enough) */
1404+
{
1405+
uintptr_t addr;
1406+
uintptr_t end = 0x10000000 + (1 * 1024 * 1024);
1407+
for (addr = 0x10000000; addr < end; addr += 64) {
1408+
/* DC CVAC - Clean data cache line by VA to PoC */
1409+
__asm__ volatile("dc cvac, %0" : : "r"(addr));
1410+
}
1411+
}
1412+
1413+
/* Data synchronization barrier - ensure clean completes */
1414+
__asm__ volatile("dsb sy");
1415+
1416+
/* Invalidate instruction cache to ensure fresh code is fetched */
1417+
__asm__ volatile("ic iallu");
1418+
1419+
/* Ensure cache invalidation completes before jumping */
1420+
__asm__ volatile("dsb sy");
1421+
__asm__ volatile("isb");
13591422
}
13601423

13611424
#ifdef MMU
@@ -1504,9 +1567,14 @@ int ext_flash_read(uintptr_t address, uint8_t *data, int len)
15041567
return -1;
15051568
}
15061569

1570+
QSPI_DEBUG_PRINTF("ext_flash_read: addr=0x%lx len=%d\n",
1571+
(unsigned long)address, len);
1572+
15071573
if (qspiDev.stripe) {
15081574
/* For dual parallel the address is divided by 2 */
15091575
addr /= 2;
1576+
QSPI_DEBUG_PRINTF(" stripe mode: flash_addr=0x%lx\n",
1577+
(unsigned long)addr);
15101578
}
15111579

15121580
/* Use Quad Read command (0x6C) with 4-byte address */
@@ -1523,7 +1591,22 @@ int ext_flash_read(uintptr_t address, uint8_t *data, int len)
15231591
ret = qspi_transfer_qread_dma(&qspiDev, cmd, 5, data, len, GQSPI_DUMMY_READ);
15241592
#endif
15251593

1526-
return ret;
1594+
/* On DMA timeout, fill buffer with 0xFF to simulate unwritten flash.
1595+
* This handles reads to partition trailer areas that haven't been written.
1596+
* wolfBoot will see 0xFF (not magic) and handle appropriately. */
1597+
if (ret != 0) {
1598+
memset(data, 0xFF, len);
1599+
}
1600+
1601+
QSPI_DEBUG_PRINTF("ext_flash_read: ret=%d data[0-7]=%02x %02x %02x %02x %02x %02x %02x %02x\n",
1602+
ret,
1603+
len > 0 ? data[0] : 0, len > 1 ? data[1] : 0,
1604+
len > 2 ? data[2] : 0, len > 3 ? data[3] : 0,
1605+
len > 4 ? data[4] : 0, len > 5 ? data[5] : 0,
1606+
len > 6 ? data[6] : 0, len > 7 ? data[7] : 0);
1607+
1608+
/* Return bytes read on success (like zynq.c) */
1609+
return (ret == 0) ? len : ret;
15271610
}
15281611

15291612
int ext_flash_erase(uintptr_t address, int len)

hal/versal.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,21 @@
6161

6262
#endif /* USE_BUILTIN_STARTUP */
6363

64+
/* ============================================================================
65+
* Versal-specific Boot Defaults
66+
* ============================================================================
67+
* SKIP_GIC_INIT: Versal uses GICv3 (not GICv2 like ZynqMP).
68+
* BL31 handles GIC initialization, so skip gicv2_init_secure().
69+
* BOOT_EL1: wolfBoot runs at EL2, but applications (Linux, test-app)
70+
* expect EL1. Transition from EL2 to EL1 before jumping to app.
71+
*/
72+
#ifndef SKIP_GIC_INIT
73+
#define SKIP_GIC_INIT 1
74+
#endif
75+
#ifndef BOOT_EL1
76+
#define BOOT_EL1 1
77+
#endif
78+
6479

6580
/* ============================================================================
6681
* Memory Map

include/hal.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,8 @@ void hal_deinit();
5353

5454
void hal_init(void);
5555

56-
#ifdef WOLFBOOT_UPDATE_DISK
57-
/* Timer functions (platform-specific) */
56+
/* Timer functions (platform-specific, used for benchmarking) */
57+
#if defined(WOLFBOOT_UPDATE_DISK) || defined(BOOT_BENCHMARK)
5858
uint64_t hal_get_timer_us(void);
5959
#endif
6060

options.mk

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -604,6 +604,15 @@ endif
604604
ifeq ($(NO_QNX),1)
605605
CFLAGS+=-D"NO_QNX"
606606
endif
607+
ifeq ($(SKIP_GIC_INIT),1)
608+
CFLAGS+=-D"SKIP_GIC_INIT"
609+
endif
610+
ifeq ($(BOOT_EL1),1)
611+
CFLAGS+=-D"BOOT_EL1"
612+
endif
613+
ifeq ($(BOOT_BENCHMARK),1)
614+
CFLAGS+=-D"BOOT_BENCHMARK"
615+
endif
607616

608617
ifeq ($(ALLOW_DOWNGRADE),1)
609618
CFLAGS+= -D"ALLOW_DOWNGRADE"

0 commit comments

Comments
 (0)