Skip to content

Commit 864e2b3

Browse files
committed
pbdrv/cache.h: Ensure DMA/uncached buffers don't share cache lines
If DMA RX buffers share cache lines with other data, it can lead to race conditions which corrupt data. If we flush and invalidate the cache before performing the DMA, reading the unrelated data can cause the cache line to be loaded back into cache, cancelling out the invalidation. If we wait until after performing the DMA and perform a flush and invalidate, the flush can corrupt part of the data which was written by the DMA. If we only perform an invalidate, writes to the unrelated data can be lost. A similar race condition can happen if an uncached alias is used to write to data which sits next to unrelated data. This currently only applies to small buffers in the SPI block driver. The solution we use here is to place each buffer in its own cache-line-aligned block of memory and only perform an invalidate upon DMA completion. This makes sure that no unrelated data can get caught up in the same cache lines. This issue does not affect DMA TX buffers written using the normal cached mapping. Those are handled by flushing the cache.
1 parent f899810 commit 864e2b3

File tree

4 files changed

+46
-36
lines changed

4 files changed

+46
-36
lines changed

lib/pbio/drv/block_device/block_device_ev3.c

Lines changed: 39 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -94,21 +94,27 @@ enum {
9494
static struct {
9595
/** HAL Transfer status */
9696
volatile spi_status_t status;
97-
// This is used when SPI only needs to receive. It should always stay as 0.
98-
uint8_t tx_dummy_byte;
99-
// This is used when received data is to be discarded. Its value should be ignored.
100-
uint8_t rx_dummy_byte;
97+
// This stores the RX buffer address so that we clean the cache when DMA is complete
98+
uint32_t rx_user_buf_addr;
99+
// This stores the RX buffer size;
100+
uint32_t rx_user_buf_sz;
101+
} spi_dev;
102+
103+
/**
104+
* SPI small DMA buffers
105+
*/
106+
static struct {
101107
// This is used when transmitting so that the last byte clears CSHOLD.
102108
uint32_t tx_last_word;
103109
// This is used to hold the initial command to the SPI peripheral.
104110
uint8_t spi_cmd_buf_tx[SPI_CMD_BUF_SZ];
105111
// This is used to hold the replies to commands to the SPI peripheral.
106112
uint8_t spi_cmd_buf_rx[SPI_CMD_BUF_SZ];
107-
// This stores the RX buffer address so that we clean the cache when DMA is complete
108-
uint32_t rx_user_buf_addr;
109-
// This stores the RX buffer size;
110-
uint32_t rx_user_buf_sz;
111-
} spi_dev;
113+
// This is used when SPI only needs to receive. It should always stay as 0.
114+
uint8_t tx_dummy_byte;
115+
// This is used when received data is to be discarded. Its value should be ignored.
116+
uint8_t rx_dummy_byte;
117+
} spi_dev_bufs PBDRV_DMA_BUF;
112118

113119
static uint32_t last_spi_dma_complete_time;
114120

@@ -161,7 +167,7 @@ static void spi0_isr(void) {
161167
continue;
162168
}
163169

164-
PBDRV_UNCACHED(spi_dev.spi_cmd_buf_rx[0]) = HWREG(SOC_SPI_0_REGS + SPI_SPIBUF);
170+
PBDRV_UNCACHED(spi_dev_bufs.spi_cmd_buf_rx[0]) = HWREG(SOC_SPI_0_REGS + SPI_SPIBUF);
165171
spi_dev.status &= ~SPI_STATUS_WAIT_RX;
166172
SPIIntDisable(SOC_SPI_0_REGS, SPI_RECV_INT);
167173
pbio_os_request_poll();
@@ -366,17 +372,17 @@ static pbio_error_t spi_begin_for_flash(
366372
SPIIntEnable(SOC_SPI_0_REGS, SPI_RECV_INT);
367373
HWREG(SOC_SPI_0_REGS + SPI_SPIDAT1) = tx;
368374
} else {
369-
memcpy(PBDRV_UNCACHED_ADDR(spi_dev.spi_cmd_buf_tx), cmd, cmd_len);
375+
memcpy(PBDRV_UNCACHED_ADDR(spi_dev_bufs.spi_cmd_buf_tx), cmd, cmd_len);
370376
spi_dev.rx_user_buf_addr = (uint32_t)user_data_rx;
371377
spi_dev.rx_user_buf_sz = user_data_len;
372378

373379
if (user_data_len == 0) {
374380
// Only a command, no user data
375381

376-
PBDRV_UNCACHED(spi_dev.tx_last_word) = spi0_last_dat1_for_flash(cmd[cmd_len - 1]);
382+
PBDRV_UNCACHED(spi_dev_bufs.tx_last_word) = spi0_last_dat1_for_flash(cmd[cmd_len - 1]);
377383

378384
// TX everything except last byte
379-
ps.p.srcAddr = (unsigned int)(&spi_dev.spi_cmd_buf_tx);
385+
ps.p.srcAddr = (unsigned int)(&spi_dev_bufs.spi_cmd_buf_tx);
380386
ps.p.destAddr = SOC_SPI_0_REGS + SPI_SPIDAT1;
381387
ps.p.aCnt = 1;
382388
ps.p.bCnt = cmd_len - 1;
@@ -391,7 +397,7 @@ static pbio_error_t spi_begin_for_flash(
391397
edma3_set_param(EDMA3_CHA_SPI0_TX, &ps);
392398

393399
// TX last byte, clearing CSHOLD
394-
ps.p.srcAddr = (unsigned int)(&spi_dev.tx_last_word);
400+
ps.p.srcAddr = (unsigned int)(&spi_dev_bufs.tx_last_word);
395401
ps.p.aCnt = 4;
396402
ps.p.bCnt = 1;
397403
ps.p.linkAddr = 0xffff;
@@ -400,7 +406,7 @@ static pbio_error_t spi_begin_for_flash(
400406

401407
// RX all bytes
402408
ps.p.srcAddr = SOC_SPI_0_REGS + SPI_SPIBUF;
403-
ps.p.destAddr = (unsigned int)(&spi_dev.spi_cmd_buf_rx);
409+
ps.p.destAddr = (unsigned int)(&spi_dev_bufs.spi_cmd_buf_rx);
404410
ps.p.aCnt = 1;
405411
ps.p.bCnt = cmd_len;
406412
ps.p.cCnt = 1;
@@ -416,7 +422,7 @@ static pbio_error_t spi_begin_for_flash(
416422
// Command *and* user data
417423

418424
// TX the command
419-
ps.p.srcAddr = (unsigned int)(&spi_dev.spi_cmd_buf_tx);
425+
ps.p.srcAddr = (unsigned int)(&spi_dev_bufs.spi_cmd_buf_tx);
420426
ps.p.destAddr = SOC_SPI_0_REGS + SPI_SPIDAT1;
421427
ps.p.aCnt = 1;
422428
ps.p.bCnt = cmd_len;
@@ -432,7 +438,7 @@ static pbio_error_t spi_begin_for_flash(
432438

433439
if (user_data_tx) {
434440
pbdrv_cache_prepare_before_dma(user_data_tx, user_data_len);
435-
PBDRV_UNCACHED(spi_dev.tx_last_word) = spi0_last_dat1_for_flash(user_data_tx[user_data_len - 1]);
441+
PBDRV_UNCACHED(spi_dev_bufs.tx_last_word) = spi0_last_dat1_for_flash(user_data_tx[user_data_len - 1]);
436442

437443
// TX all but the last byte
438444
ps.p.srcAddr = (unsigned int)(user_data_tx);
@@ -441,24 +447,24 @@ static pbio_error_t spi_begin_for_flash(
441447
edma3_set_param(126, &ps);
442448

443449
// TX the last byte, clearing CSHOLD
444-
ps.p.srcAddr = (unsigned int)(&spi_dev.tx_last_word);
450+
ps.p.srcAddr = (unsigned int)(&spi_dev_bufs.tx_last_word);
445451
ps.p.aCnt = 4;
446452
ps.p.bCnt = 1;
447453
ps.p.linkAddr = 0xffff;
448454
ps.p.opt = EDMA3CC_OPT_TCINTEN | (EDMA3_CHA_SPI0_TX << EDMA3CC_OPT_TCC_SHIFT);
449455
edma3_set_param(127, &ps);
450456
} else {
451-
PBDRV_UNCACHED(spi_dev.tx_last_word) = spi0_last_dat1_for_flash(0);
457+
PBDRV_UNCACHED(spi_dev_bufs.tx_last_word) = spi0_last_dat1_for_flash(0);
452458

453459
// TX all but the last byte
454-
ps.p.srcAddr = (unsigned int)(&spi_dev.tx_dummy_byte);
460+
ps.p.srcAddr = (unsigned int)(&spi_dev_bufs.tx_dummy_byte);
455461
ps.p.bCnt = user_data_len - 1;
456462
ps.p.srcBIdx = 0;
457463
ps.p.linkAddr = 127 * 32;
458464
edma3_set_param(126, &ps);
459465

460466
// TX the last byte, clearing CSHOLD
461-
ps.p.srcAddr = (unsigned int)(&spi_dev.tx_last_word);
467+
ps.p.srcAddr = (unsigned int)(&spi_dev_bufs.tx_last_word);
462468
ps.p.aCnt = 4;
463469
ps.p.bCnt = 1;
464470
ps.p.linkAddr = 0xffff;
@@ -468,7 +474,7 @@ static pbio_error_t spi_begin_for_flash(
468474

469475
// RX the command
470476
ps.p.srcAddr = SOC_SPI_0_REGS + SPI_SPIBUF;
471-
ps.p.destAddr = (unsigned int)(&spi_dev.spi_cmd_buf_rx);
477+
ps.p.destAddr = (unsigned int)(&spi_dev_bufs.spi_cmd_buf_rx);
472478
ps.p.aCnt = 1;
473479
ps.p.bCnt = cmd_len;
474480
ps.p.cCnt = 1;
@@ -490,7 +496,7 @@ static pbio_error_t spi_begin_for_flash(
490496
edma3_set_param(125, &ps);
491497
} else {
492498
// RX dummy
493-
ps.p.destAddr = (unsigned int)(&spi_dev.rx_dummy_byte);
499+
ps.p.destAddr = (unsigned int)(&spi_dev_bufs.rx_dummy_byte);
494500
ps.p.bCnt = user_data_len;
495501
ps.p.destBIdx = 0;
496502
ps.p.linkAddr = 0xffff;
@@ -614,7 +620,7 @@ static pbio_error_t flash_wait_write(pbio_os_state_t *state) {
614620
}
615621
PBIO_OS_AWAIT_WHILE(state, spi_dev.status & SPI_STATUS_WAIT_ANY);
616622

617-
status = PBDRV_UNCACHED(spi_dev.spi_cmd_buf_rx[1]);
623+
status = PBDRV_UNCACHED(spi_dev_bufs.spi_cmd_buf_rx[1]);
618624
} while (status & FLASH_STATUS_BUSY);
619625

620626
PBIO_OS_ASYNC_END(PBIO_SUCCESS);
@@ -736,7 +742,7 @@ static const uint32_t channel_cmd[PBDRV_CONFIG_ADC_EV3_ADC_NUM_CHANNELS + PBDRV_
736742
MANUAL_ADC_CHANNEL(15),
737743
MANUAL_ADC_CHANNEL(15),
738744
};
739-
static volatile uint16_t channel_data[PBDRV_CONFIG_ADC_EV3_ADC_NUM_CHANNELS + PBDRV_ADC_EV3_NUM_DELAY_SAMPLES];
745+
static volatile uint16_t channel_data[PBDRV_CONFIG_ADC_EV3_ADC_NUM_CHANNELS + PBDRV_ADC_EV3_NUM_DELAY_SAMPLES] PBDRV_DMA_BUF;
740746

741747
pbio_error_t pbdrv_adc_get_ch(uint8_t ch, uint16_t *value) {
742748
if (ch >= PBDRV_CONFIG_ADC_EV3_ADC_NUM_CHANNELS) {
@@ -747,8 +753,8 @@ pbio_error_t pbdrv_adc_get_ch(uint8_t ch, uint16_t *value) {
747753
uint16_t a, b;
748754
do {
749755
// Values for the requested channel are received several samples later.
750-
a = channel_data[ch + PBDRV_ADC_EV3_NUM_DELAY_SAMPLES];
751-
b = channel_data[ch + PBDRV_ADC_EV3_NUM_DELAY_SAMPLES];
756+
a = PBDRV_UNCACHED(channel_data[ch + PBDRV_ADC_EV3_NUM_DELAY_SAMPLES]);
757+
b = PBDRV_UNCACHED(channel_data[ch + PBDRV_ADC_EV3_NUM_DELAY_SAMPLES]);
752758
} while (a != b);
753759

754760
// Mask the data to 10 bits
@@ -818,10 +824,10 @@ static pbio_error_t pbdrv_block_device_ev3_spi_begin_for_adc(const uint32_t *cmd
818824
ps.p.opt = EDMA3CC_OPT_TCINTEN | (EDMA3_CHA_SPI0_RX << EDMA3CC_OPT_TCC_SHIFT);
819825
edma3_set_param(EDMA3_CHA_SPI0_RX, &ps);
820826

821-
// We play dangerously and don't flush the cache for commands (since they're const)
822-
// but we do need to flush the cache for the data which is read.
823-
spi_dev.rx_user_buf_addr = (uint32_t)data;
824-
spi_dev.rx_user_buf_sz = sizeof(uint16_t) * len;
827+
// We play dangerously and don't flush the cache for the ADC.
828+
// The commands are const, and the values are read through an uncached mapping.
829+
spi_dev.rx_user_buf_addr = 0;
830+
spi_dev.rx_user_buf_sz = 0;
825831

826832
spi_dev.status = SPI_STATUS_WAIT_TX | SPI_STATUS_WAIT_RX;
827833

@@ -855,7 +861,7 @@ static struct {
855861
pbsys_storage_data_map_t data_map;
856862
uint8_t data[PBDRV_CONFIG_BLOCK_DEVICE_RAM_SIZE];
857863
};
858-
} ramdisk __attribute__((section(".noinit"), used));
864+
} ramdisk __attribute__((aligned(PBDRV_CACHE_LINE_SZ), section(".noinit"), used));
859865

860866
uint32_t pbdrv_block_device_get_writable_size(void) {
861867
return PBDRV_CONFIG_BLOCK_DEVICE_EV3_SIZE - sizeof(ramdisk.saved_size);
@@ -888,7 +894,7 @@ pbio_error_t ev3_spi_process_thread(pbio_os_state_t *state, void *context) {
888894
return err;
889895
}
890896
PBIO_OS_AWAIT_WHILE(state, spi_dev.status & SPI_STATUS_WAIT_ANY);
891-
if (memcmp(device_id, PBDRV_UNCACHED_ADDR(spi_dev.spi_cmd_buf_rx[1]), sizeof(device_id))) {
897+
if (memcmp(device_id, PBDRV_UNCACHED_ADDR(spi_dev_bufs.spi_cmd_buf_rx[1]), sizeof(device_id))) {
892898
return PBIO_ERROR_FAILED;
893899
}
894900

lib/pbio/drv/usb/usb_ev3.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,7 @@ static bool pbdrv_usb_is_usb_hs;
221221
static bool pbdrv_usb_is_events_subscribed;
222222

223223
// Buffers, used for different logical flows on the data endpoint
224-
static uint8_t ep1_rx_buf[PYBRICKS_EP_PKT_SZ_HS];
224+
static uint8_t ep1_rx_buf[PYBRICKS_EP_PKT_SZ_HS] PBDRV_DMA_BUF;
225225
static uint8_t ep1_tx_response_buf[PYBRICKS_EP_PKT_SZ_HS];
226226
static uint8_t ep1_tx_status_buf[PYBRICKS_EP_PKT_SZ_HS];
227227
static uint8_t ep1_tx_stdout_buf[PYBRICKS_EP_PKT_SZ_HS];

lib/pbio/include/pbdrv/cache.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,6 @@ void pbdrv_cache_prepare_after_dma(const void *buf, size_t sz);
2424
#define PBDRV_CACHE_LINE_SZ 32
2525

2626
// Align data to a cache line, which is needed for clean RX DMA
27-
#define PBDRV_CACHE_LINE_ALIGNED __attribute__((aligned(PBDRV_CACHE_LINE_SZ)))
27+
#define PBDRV_DMA_BUF __attribute__((aligned(PBDRV_CACHE_LINE_SZ), section(".dma")))
2828

2929
#endif

lib/pbio/platform/ev3/platform.ld

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,11 @@ SECTIONS
6666
_bss_start = .;
6767
*(.bss)
6868
*(.bss.*)
69-
. = ALIGN(4);
69+
/* Put DMA RX buffers here so that they never share cache lines with
70+
unrelated data. This makes sure that they can be invalidated properly. */
71+
. = ALIGN(32);
72+
*(.dma)
73+
. = ALIGN(32);
7074
_bss_end = .;
7175
} > DDR
7276

0 commit comments

Comments
 (0)