From 81afa660785ceb18a501b059cb3f0133d97e4db7 Mon Sep 17 00:00:00 2001 From: Scott Shawcroft Date: Fri, 29 Aug 2025 11:56:36 -0700 Subject: [PATCH] Fix display glitch during neopixel update The DMA can get stalled by DMA to/from PSRAM. So, copy the data into SRAM when we need to. Fail if we can't. Fixes #10564 --- .../common-hal/rp2pio/StateMachine.c | 90 +++++++++++++++---- 1 file changed, 71 insertions(+), 19 deletions(-) diff --git a/ports/raspberrypi/common-hal/rp2pio/StateMachine.c b/ports/raspberrypi/common-hal/rp2pio/StateMachine.c index fefecf05ff715..74b9a7f0c31b0 100644 --- a/ports/raspberrypi/common-hal/rp2pio/StateMachine.c +++ b/ports/raspberrypi/common-hal/rp2pio/StateMachine.c @@ -853,25 +853,64 @@ static bool _transfer(rp2pio_statemachine_obj_t *self, // This implementation is based on SPI but varies because the tx and rx buffers // may be different lengths and occur at different times or speeds. - // Use DMA for large transfers if channels are available. - // Don't exceed FIFO size. - const size_t dma_min_size_threshold = self->fifo_depth; int chan_tx = -1; int chan_rx = -1; size_t len = MAX(out_len, in_len); bool tx = data_out != NULL; bool rx = data_in != NULL; - bool use_dma = len >= dma_min_size_threshold || swap_out || swap_in; + bool free_data_out = false; + bool free_data_in = false; + uint8_t *sram_data_out = (uint8_t *)data_out; + uint8_t *sram_data_in = data_in; + bool tx_fits_in_fifo = (out_len / out_stride_in_bytes) <= self->fifo_depth; + bool rx_fits_in_fifo = (in_len / in_stride_in_bytes) <= self->fifo_depth; + bool use_dma = !(tx_fits_in_fifo && rx_fits_in_fifo) || swap_out || swap_in; + if (use_dma) { - // Use DMA channels to service the two FIFOs + // We can only reliably use DMA for SRAM buffers. So, if we're given PSRAM buffers, + // then copy them to SRAM first. If we can't, then fail. + // Use DMA channels to service the two FIFOs. Fail if we can't allocate DMA channels. if (tx) { + if (data_out < (uint8_t *)SRAM_BASE) { + // Try to allocate a temporary buffer for DMA transfer + uint8_t *temp_buffer = (uint8_t *)port_malloc(len, true); + if (temp_buffer == NULL) { + mp_printf(&mp_plat_print, "Failed to allocate temporary buffer for DMA tx\n"); + return false; + } + memcpy(temp_buffer, data_out, len); + sram_data_out = temp_buffer; + free_data_out = true; + } chan_tx = dma_claim_unused_channel(false); // DMA allocation failed... if (chan_tx < 0) { + if (free_data_out) { + port_free(sram_data_out); + } + if (free_data_in) { + port_free(sram_data_in); + } return false; } } if (rx) { + if (data_in < (uint8_t *)SRAM_BASE) { + // Try to allocate a temporary buffer for DMA transfer + uint8_t *temp_buffer = (uint8_t *)port_malloc(len, true); + if (temp_buffer == NULL) { + mp_printf(&mp_plat_print, "Failed to allocate temporary buffer for DMA rx\n"); + if (chan_tx >= 0) { + dma_channel_unclaim(chan_tx); + } + if (free_data_out) { + port_free(sram_data_out); + } + return false; + } + sram_data_in = temp_buffer; + free_data_in = true; + } chan_rx = dma_claim_unused_channel(false); // DMA allocation failed... if (chan_rx < 0) { @@ -879,6 +918,12 @@ static bool _transfer(rp2pio_statemachine_obj_t *self, if (chan_tx >= 0) { dma_channel_unclaim(chan_tx); } + if (free_data_out) { + port_free(sram_data_out); + } + if (free_data_in) { + port_free(sram_data_in); + } return false; } } @@ -910,7 +955,7 @@ static bool _transfer(rp2pio_statemachine_obj_t *self, channel_config_set_bswap(&c, swap_out); dma_channel_configure(chan_tx, &c, tx_destination, - data_out, + sram_data_out, out_len / out_stride_in_bytes, false); channel_mask |= 1u << chan_tx; @@ -923,7 +968,7 @@ static bool _transfer(rp2pio_statemachine_obj_t *self, channel_config_set_write_increment(&c, true); channel_config_set_bswap(&c, swap_in); dma_channel_configure(chan_rx, &c, - data_in, + sram_data_in, rx_source, in_len / in_stride_in_bytes, false); @@ -950,8 +995,7 @@ static bool _transfer(rp2pio_statemachine_obj_t *self, self->pio->fdebug = stall_mask; } - // If we have claimed only one channel successfully, we should release immediately. This also - // releases the DMA after use_dma has been done. + // Release the DMA channels after use_dma has been done. if (chan_rx >= 0) { dma_channel_unclaim(chan_rx); } @@ -960,31 +1004,31 @@ static bool _transfer(rp2pio_statemachine_obj_t *self, } if (!use_dma && !(self->user_interruptible && mp_hal_is_interrupted())) { - // Use software for small transfers, or if couldn't claim two DMA channels + // Use software for small transfers size_t rx_remaining = in_len / in_stride_in_bytes; size_t tx_remaining = out_len / out_stride_in_bytes; while (rx_remaining || tx_remaining) { while (tx_remaining && !pio_sm_is_tx_fifo_full(self->pio, self->state_machine)) { if (out_stride_in_bytes == 1) { - *tx_destination = *data_out; + *tx_destination = *sram_data_out; } else if (out_stride_in_bytes == 2) { - *((uint16_t *)tx_destination) = *((uint16_t *)data_out); + *((uint16_t *)tx_destination) = *((uint16_t *)sram_data_out); } else if (out_stride_in_bytes == 4) { - *((uint32_t *)tx_destination) = *((uint32_t *)data_out); + *((uint32_t *)tx_destination) = *((uint32_t *)sram_data_out); } - data_out += out_stride_in_bytes; + sram_data_out += out_stride_in_bytes; --tx_remaining; } while (rx_remaining && !pio_sm_is_rx_fifo_empty(self->pio, self->state_machine)) { if (in_stride_in_bytes == 1) { - *data_in = (uint8_t)*rx_source; + *sram_data_in = (uint8_t)*rx_source; } else if (in_stride_in_bytes == 2) { - *((uint16_t *)data_in) = *((uint16_t *)rx_source); + *((uint16_t *)sram_data_in) = *((uint16_t *)rx_source); } else if (in_stride_in_bytes == 4) { - *((uint32_t *)data_in) = *((uint32_t *)rx_source); + *((uint32_t *)sram_data_in) = *((uint32_t *)rx_source); } - data_in += in_stride_in_bytes; + sram_data_in += in_stride_in_bytes; --rx_remaining; } RUN_BACKGROUND_TASKS; @@ -996,7 +1040,7 @@ static bool _transfer(rp2pio_statemachine_obj_t *self, self->pio->fdebug = stall_mask; } // Wait for the state machine to finish transmitting the data we've queued - // up. + // up (either from the CPU or via DMA.) if (tx) { while (!pio_sm_is_tx_fifo_empty(self->pio, self->state_machine) || (self->wait_for_txstall && (self->pio->fdebug & stall_mask) == 0)) { @@ -1006,6 +1050,14 @@ static bool _transfer(rp2pio_statemachine_obj_t *self, } } } + if (free_data_out) { + port_free(sram_data_out); + } + if (free_data_in) { + // Copy the data from the SRAM buffer to the user PSRAM buffer. + memcpy(data_in, sram_data_in, len); + port_free(sram_data_in); + } return true; }