Skip to content

Commit a2e3b7a

Browse files
committed
pbdrv/cache.h: Ensure DMA/uncached buffers don't share cache lines
If DMA RX buffers share cache lines with other data, it can lead to race conditions which corrupt data. If we flush and invalidate the cache before performing the DMA, reading the unrelated data can cause the cache line to be loaded back into cache, cancelling out the invalidation. If we wait until after performing the DMA and perform a flush and invalidate, the flush can corrupt part of the data which was written by the DMA. If we only perform an invalidate, writes to the unrelated data can be lost. A similar race condition can happen if an uncached alias is used to read/write to data which sits next to unrelated data. This currently applies to the ADC RX buffer. The solution we use here is to place each buffer in its own cache-line-aligned block of memory and only perform an invalidate upon DMA completion. This makes sure that no unrelated data can get caught up in the same cache lines. This issue does not affect DMA TX buffers written using the normal cached mapping. Those are handled by flushing the cache.
1 parent f899810 commit a2e3b7a

File tree

4 files changed

+50
-38
lines changed

4 files changed

+50
-38
lines changed

lib/pbio/drv/block_device/block_device_ev3.c

Lines changed: 43 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -94,21 +94,27 @@ enum {
9494
static struct {
9595
/** HAL Transfer status */
9696
volatile spi_status_t status;
97-
// This is used when SPI only needs to receive. It should always stay as 0.
98-
uint8_t tx_dummy_byte;
99-
// This is used when received data is to be discarded. Its value should be ignored.
100-
uint8_t rx_dummy_byte;
97+
// This stores the RX buffer address so that we clean the cache when DMA is complete
98+
uint32_t rx_user_buf_addr;
99+
// This stores the RX buffer size;
100+
uint32_t rx_user_buf_sz;
101+
} spi_dev;
102+
103+
/**
104+
* SPI small DMA buffers
105+
*/
106+
static struct {
101107
// This is used when transmitting so that the last byte clears CSHOLD.
102108
uint32_t tx_last_word;
103109
// This is used to hold the initial command to the SPI peripheral.
104110
uint8_t spi_cmd_buf_tx[SPI_CMD_BUF_SZ];
105111
// This is used to hold the replies to commands to the SPI peripheral.
106112
uint8_t spi_cmd_buf_rx[SPI_CMD_BUF_SZ];
107-
// This stores the RX buffer address so that we clean the cache when DMA is complete
108-
uint32_t rx_user_buf_addr;
109-
// This stores the RX buffer size;
110-
uint32_t rx_user_buf_sz;
111-
} spi_dev;
113+
// This is used when SPI only needs to receive. It should always stay as 0.
114+
uint8_t tx_dummy_byte;
115+
// This is used when received data is to be discarded. Its value should be ignored.
116+
uint8_t rx_dummy_byte;
117+
} spi_dev_bufs PBDRV_DMA_BUF;
112118

113119
static uint32_t last_spi_dma_complete_time;
114120

@@ -119,6 +125,7 @@ static void spi_dma_complete(void) {
119125
}
120126
SPIIntDisable(SOC_SPI_0_REGS, SPI_DMA_REQUEST_ENA_INT);
121127
pbio_os_request_poll();
128+
pbdrv_cache_prepare_after_dma(&spi_dev_bufs, sizeof(spi_dev_bufs));
122129
if (spi_dev.rx_user_buf_addr && spi_dev.rx_user_buf_sz) {
123130
pbdrv_cache_prepare_after_dma((void *)spi_dev.rx_user_buf_addr, spi_dev.rx_user_buf_sz);
124131
}
@@ -161,7 +168,7 @@ static void spi0_isr(void) {
161168
continue;
162169
}
163170

164-
PBDRV_UNCACHED(spi_dev.spi_cmd_buf_rx[0]) = HWREG(SOC_SPI_0_REGS + SPI_SPIBUF);
171+
spi_dev_bufs.spi_cmd_buf_rx[0] = HWREG(SOC_SPI_0_REGS + SPI_SPIBUF);
165172
spi_dev.status &= ~SPI_STATUS_WAIT_RX;
166173
SPIIntDisable(SOC_SPI_0_REGS, SPI_RECV_INT);
167174
pbio_os_request_poll();
@@ -366,17 +373,17 @@ static pbio_error_t spi_begin_for_flash(
366373
SPIIntEnable(SOC_SPI_0_REGS, SPI_RECV_INT);
367374
HWREG(SOC_SPI_0_REGS + SPI_SPIDAT1) = tx;
368375
} else {
369-
memcpy(PBDRV_UNCACHED_ADDR(spi_dev.spi_cmd_buf_tx), cmd, cmd_len);
376+
memcpy(spi_dev_bufs.spi_cmd_buf_tx, cmd, cmd_len);
370377
spi_dev.rx_user_buf_addr = (uint32_t)user_data_rx;
371378
spi_dev.rx_user_buf_sz = user_data_len;
372379

373380
if (user_data_len == 0) {
374381
// Only a command, no user data
375382

376-
PBDRV_UNCACHED(spi_dev.tx_last_word) = spi0_last_dat1_for_flash(cmd[cmd_len - 1]);
383+
spi_dev_bufs.tx_last_word = spi0_last_dat1_for_flash(cmd[cmd_len - 1]);
377384

378385
// TX everything except last byte
379-
ps.p.srcAddr = (unsigned int)(&spi_dev.spi_cmd_buf_tx);
386+
ps.p.srcAddr = (unsigned int)(&spi_dev_bufs.spi_cmd_buf_tx);
380387
ps.p.destAddr = SOC_SPI_0_REGS + SPI_SPIDAT1;
381388
ps.p.aCnt = 1;
382389
ps.p.bCnt = cmd_len - 1;
@@ -391,7 +398,7 @@ static pbio_error_t spi_begin_for_flash(
391398
edma3_set_param(EDMA3_CHA_SPI0_TX, &ps);
392399

393400
// TX last byte, clearing CSHOLD
394-
ps.p.srcAddr = (unsigned int)(&spi_dev.tx_last_word);
401+
ps.p.srcAddr = (unsigned int)(&spi_dev_bufs.tx_last_word);
395402
ps.p.aCnt = 4;
396403
ps.p.bCnt = 1;
397404
ps.p.linkAddr = 0xffff;
@@ -400,7 +407,7 @@ static pbio_error_t spi_begin_for_flash(
400407

401408
// RX all bytes
402409
ps.p.srcAddr = SOC_SPI_0_REGS + SPI_SPIBUF;
403-
ps.p.destAddr = (unsigned int)(&spi_dev.spi_cmd_buf_rx);
410+
ps.p.destAddr = (unsigned int)(&spi_dev_bufs.spi_cmd_buf_rx);
404411
ps.p.aCnt = 1;
405412
ps.p.bCnt = cmd_len;
406413
ps.p.cCnt = 1;
@@ -416,7 +423,7 @@ static pbio_error_t spi_begin_for_flash(
416423
// Command *and* user data
417424

418425
// TX the command
419-
ps.p.srcAddr = (unsigned int)(&spi_dev.spi_cmd_buf_tx);
426+
ps.p.srcAddr = (unsigned int)(&spi_dev_bufs.spi_cmd_buf_tx);
420427
ps.p.destAddr = SOC_SPI_0_REGS + SPI_SPIDAT1;
421428
ps.p.aCnt = 1;
422429
ps.p.bCnt = cmd_len;
@@ -432,7 +439,7 @@ static pbio_error_t spi_begin_for_flash(
432439

433440
if (user_data_tx) {
434441
pbdrv_cache_prepare_before_dma(user_data_tx, user_data_len);
435-
PBDRV_UNCACHED(spi_dev.tx_last_word) = spi0_last_dat1_for_flash(user_data_tx[user_data_len - 1]);
442+
spi_dev_bufs.tx_last_word = spi0_last_dat1_for_flash(user_data_tx[user_data_len - 1]);
436443

437444
// TX all but the last byte
438445
ps.p.srcAddr = (unsigned int)(user_data_tx);
@@ -441,24 +448,24 @@ static pbio_error_t spi_begin_for_flash(
441448
edma3_set_param(126, &ps);
442449

443450
// TX the last byte, clearing CSHOLD
444-
ps.p.srcAddr = (unsigned int)(&spi_dev.tx_last_word);
451+
ps.p.srcAddr = (unsigned int)(&spi_dev_bufs.tx_last_word);
445452
ps.p.aCnt = 4;
446453
ps.p.bCnt = 1;
447454
ps.p.linkAddr = 0xffff;
448455
ps.p.opt = EDMA3CC_OPT_TCINTEN | (EDMA3_CHA_SPI0_TX << EDMA3CC_OPT_TCC_SHIFT);
449456
edma3_set_param(127, &ps);
450457
} else {
451-
PBDRV_UNCACHED(spi_dev.tx_last_word) = spi0_last_dat1_for_flash(0);
458+
spi_dev_bufs.tx_last_word = spi0_last_dat1_for_flash(0);
452459

453460
// TX all but the last byte
454-
ps.p.srcAddr = (unsigned int)(&spi_dev.tx_dummy_byte);
461+
ps.p.srcAddr = (unsigned int)(&spi_dev_bufs.tx_dummy_byte);
455462
ps.p.bCnt = user_data_len - 1;
456463
ps.p.srcBIdx = 0;
457464
ps.p.linkAddr = 127 * 32;
458465
edma3_set_param(126, &ps);
459466

460467
// TX the last byte, clearing CSHOLD
461-
ps.p.srcAddr = (unsigned int)(&spi_dev.tx_last_word);
468+
ps.p.srcAddr = (unsigned int)(&spi_dev_bufs.tx_last_word);
462469
ps.p.aCnt = 4;
463470
ps.p.bCnt = 1;
464471
ps.p.linkAddr = 0xffff;
@@ -468,7 +475,7 @@ static pbio_error_t spi_begin_for_flash(
468475

469476
// RX the command
470477
ps.p.srcAddr = SOC_SPI_0_REGS + SPI_SPIBUF;
471-
ps.p.destAddr = (unsigned int)(&spi_dev.spi_cmd_buf_rx);
478+
ps.p.destAddr = (unsigned int)(&spi_dev_bufs.spi_cmd_buf_rx);
472479
ps.p.aCnt = 1;
473480
ps.p.bCnt = cmd_len;
474481
ps.p.cCnt = 1;
@@ -490,7 +497,7 @@ static pbio_error_t spi_begin_for_flash(
490497
edma3_set_param(125, &ps);
491498
} else {
492499
// RX dummy
493-
ps.p.destAddr = (unsigned int)(&spi_dev.rx_dummy_byte);
500+
ps.p.destAddr = (unsigned int)(&spi_dev_bufs.rx_dummy_byte);
494501
ps.p.bCnt = user_data_len;
495502
ps.p.destBIdx = 0;
496503
ps.p.linkAddr = 0xffff;
@@ -501,8 +508,9 @@ static pbio_error_t spi_begin_for_flash(
501508

502509
spi_dev.status = SPI_STATUS_WAIT_TX | SPI_STATUS_WAIT_RX;
503510

504-
// Prevent write to spi_dev.status from being reordered
505-
pbdrv_compiler_memory_barrier();
511+
// Make sure writes to tx buffer leave cache
512+
// (we already flush the user buffer earlier)
513+
pbdrv_cache_prepare_before_dma(&spi_dev_bufs, sizeof(spi_dev_bufs));
506514

507515
EDMA3EnableTransfer(SOC_EDMA30CC_0_REGS, EDMA3_CHA_SPI0_TX, EDMA3_TRIG_MODE_EVENT);
508516
EDMA3EnableTransfer(SOC_EDMA30CC_0_REGS, EDMA3_CHA_SPI0_RX, EDMA3_TRIG_MODE_EVENT);
@@ -614,7 +622,7 @@ static pbio_error_t flash_wait_write(pbio_os_state_t *state) {
614622
}
615623
PBIO_OS_AWAIT_WHILE(state, spi_dev.status & SPI_STATUS_WAIT_ANY);
616624

617-
status = PBDRV_UNCACHED(spi_dev.spi_cmd_buf_rx[1]);
625+
status = PBDRV_UNCACHED(spi_dev_bufs.spi_cmd_buf_rx[1]);
618626
} while (status & FLASH_STATUS_BUSY);
619627

620628
PBIO_OS_ASYNC_END(PBIO_SUCCESS);
@@ -736,7 +744,7 @@ static const uint32_t channel_cmd[PBDRV_CONFIG_ADC_EV3_ADC_NUM_CHANNELS + PBDRV_
736744
MANUAL_ADC_CHANNEL(15),
737745
MANUAL_ADC_CHANNEL(15),
738746
};
739-
static volatile uint16_t channel_data[PBDRV_CONFIG_ADC_EV3_ADC_NUM_CHANNELS + PBDRV_ADC_EV3_NUM_DELAY_SAMPLES];
747+
static volatile uint16_t channel_data[PBDRV_CONFIG_ADC_EV3_ADC_NUM_CHANNELS + PBDRV_ADC_EV3_NUM_DELAY_SAMPLES] PBDRV_DMA_BUF;
740748

741749
pbio_error_t pbdrv_adc_get_ch(uint8_t ch, uint16_t *value) {
742750
if (ch >= PBDRV_CONFIG_ADC_EV3_ADC_NUM_CHANNELS) {
@@ -747,8 +755,8 @@ pbio_error_t pbdrv_adc_get_ch(uint8_t ch, uint16_t *value) {
747755
uint16_t a, b;
748756
do {
749757
// Values for the requested channel are received several samples later.
750-
a = channel_data[ch + PBDRV_ADC_EV3_NUM_DELAY_SAMPLES];
751-
b = channel_data[ch + PBDRV_ADC_EV3_NUM_DELAY_SAMPLES];
758+
a = PBDRV_UNCACHED(channel_data[ch + PBDRV_ADC_EV3_NUM_DELAY_SAMPLES]);
759+
b = PBDRV_UNCACHED(channel_data[ch + PBDRV_ADC_EV3_NUM_DELAY_SAMPLES]);
752760
} while (a != b);
753761

754762
// Mask the data to 10 bits
@@ -818,10 +826,10 @@ static pbio_error_t pbdrv_block_device_ev3_spi_begin_for_adc(const uint32_t *cmd
818826
ps.p.opt = EDMA3CC_OPT_TCINTEN | (EDMA3_CHA_SPI0_RX << EDMA3CC_OPT_TCC_SHIFT);
819827
edma3_set_param(EDMA3_CHA_SPI0_RX, &ps);
820828

821-
// We play dangerously and don't flush the cache for commands (since they're const)
822-
// but we do need to flush the cache for the data which is read.
823-
spi_dev.rx_user_buf_addr = (uint32_t)data;
824-
spi_dev.rx_user_buf_sz = sizeof(uint16_t) * len;
829+
// We play dangerously and don't flush the cache for the ADC.
830+
// The commands are const, and the values are read through an uncached mapping.
831+
spi_dev.rx_user_buf_addr = 0;
832+
spi_dev.rx_user_buf_sz = 0;
825833

826834
spi_dev.status = SPI_STATUS_WAIT_TX | SPI_STATUS_WAIT_RX;
827835

@@ -855,7 +863,7 @@ static struct {
855863
pbsys_storage_data_map_t data_map;
856864
uint8_t data[PBDRV_CONFIG_BLOCK_DEVICE_RAM_SIZE];
857865
};
858-
} ramdisk __attribute__((section(".noinit"), used));
866+
} ramdisk __attribute__((aligned(PBDRV_CACHE_LINE_SZ), section(".noinit"), used));
859867

860868
uint32_t pbdrv_block_device_get_writable_size(void) {
861869
return PBDRV_CONFIG_BLOCK_DEVICE_EV3_SIZE - sizeof(ramdisk.saved_size);
@@ -888,7 +896,7 @@ pbio_error_t ev3_spi_process_thread(pbio_os_state_t *state, void *context) {
888896
return err;
889897
}
890898
PBIO_OS_AWAIT_WHILE(state, spi_dev.status & SPI_STATUS_WAIT_ANY);
891-
if (memcmp(device_id, PBDRV_UNCACHED_ADDR(spi_dev.spi_cmd_buf_rx[1]), sizeof(device_id))) {
899+
if (memcmp(device_id, PBDRV_UNCACHED_ADDR(spi_dev_bufs.spi_cmd_buf_rx[1]), sizeof(device_id))) {
892900
return PBIO_ERROR_FAILED;
893901
}
894902

lib/pbio/drv/usb/usb_ev3.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,7 @@ static bool pbdrv_usb_is_usb_hs;
221221
static bool pbdrv_usb_is_events_subscribed;
222222

223223
// Buffers, used for different logical flows on the data endpoint
224-
static uint8_t ep1_rx_buf[PYBRICKS_EP_PKT_SZ_HS];
224+
static uint8_t ep1_rx_buf[PYBRICKS_EP_PKT_SZ_HS] PBDRV_DMA_BUF;
225225
static uint8_t ep1_tx_response_buf[PYBRICKS_EP_PKT_SZ_HS];
226226
static uint8_t ep1_tx_status_buf[PYBRICKS_EP_PKT_SZ_HS];
227227
static uint8_t ep1_tx_stdout_buf[PYBRICKS_EP_PKT_SZ_HS];

lib/pbio/include/pbdrv/cache.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,6 @@ void pbdrv_cache_prepare_after_dma(const void *buf, size_t sz);
2424
#define PBDRV_CACHE_LINE_SZ 32
2525

2626
// Align data to a cache line, which is needed for clean RX DMA
27-
#define PBDRV_CACHE_LINE_ALIGNED __attribute__((aligned(PBDRV_CACHE_LINE_SZ)))
27+
#define PBDRV_DMA_BUF __attribute__((aligned(PBDRV_CACHE_LINE_SZ), section(".dma")))
2828

2929
#endif

lib/pbio/platform/ev3/platform.ld

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,11 @@ SECTIONS
6666
_bss_start = .;
6767
*(.bss)
6868
*(.bss.*)
69-
. = ALIGN(4);
69+
/* Put DMA RX buffers here so that they never share cache lines with
70+
unrelated data. This makes sure that they can be invalidated properly. */
71+
. = ALIGN(32);
72+
*(.dma)
73+
. = ALIGN(32);
7074
_bss_end = .;
7175
} > DDR
7276

0 commit comments

Comments
 (0)