Skip to content

Commit 2d53a75

Browse files
authored
Merge branch 'adafruit:main' into bls_coin_m0
2 parents 059b0e7 + 1eb0ae6 commit 2d53a75

File tree

4 files changed

+78
-22
lines changed

4 files changed

+78
-22
lines changed

ports/raspberrypi/common-hal/busio/SPI.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,10 @@ static bool _transfer(busio_spi_obj_t *self,
182182
chan_tx = dma_claim_unused_channel(false);
183183
chan_rx = dma_claim_unused_channel(false);
184184
}
185-
bool use_dma = chan_rx >= 0 && chan_tx >= 0;
185+
bool has_dma_channels = chan_rx >= 0 && chan_tx >= 0;
186+
// Only use DMA if both data buffers are in SRAM. Otherwise, we'll stall the DMA with PSRAM or flash cache misses.
187+
bool data_in_sram = data_in >= (uint8_t *)SRAM_BASE && data_out >= (uint8_t *)SRAM_BASE;
188+
bool use_dma = has_dma_channels && data_in_sram;
186189
if (use_dma) {
187190
dma_channel_config c = dma_channel_get_default_config(chan_tx);
188191
channel_config_set_transfer_data_size(&c, DMA_SIZE_8);

ports/raspberrypi/common-hal/rp2pio/StateMachine.c

Lines changed: 71 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -853,32 +853,77 @@ static bool _transfer(rp2pio_statemachine_obj_t *self,
853853
// This implementation is based on SPI but varies because the tx and rx buffers
854854
// may be different lengths and occur at different times or speeds.
855855

856-
// Use DMA for large transfers if channels are available.
857-
// Don't exceed FIFO size.
858-
const size_t dma_min_size_threshold = self->fifo_depth;
859856
int chan_tx = -1;
860857
int chan_rx = -1;
861858
size_t len = MAX(out_len, in_len);
862859
bool tx = data_out != NULL;
863860
bool rx = data_in != NULL;
864-
bool use_dma = len >= dma_min_size_threshold || swap_out || swap_in;
861+
bool free_data_out = false;
862+
bool free_data_in = false;
863+
uint8_t *sram_data_out = (uint8_t *)data_out;
864+
uint8_t *sram_data_in = data_in;
865+
bool tx_fits_in_fifo = (out_len / out_stride_in_bytes) <= self->fifo_depth;
866+
bool rx_fits_in_fifo = (in_len / in_stride_in_bytes) <= self->fifo_depth;
867+
bool use_dma = !(tx_fits_in_fifo && rx_fits_in_fifo) || swap_out || swap_in;
868+
865869
if (use_dma) {
866-
// Use DMA channels to service the two FIFOs
870+
// We can only reliably use DMA for SRAM buffers. So, if we're given PSRAM buffers,
871+
// then copy them to SRAM first. If we can't, then fail.
872+
// Use DMA channels to service the two FIFOs. Fail if we can't allocate DMA channels.
867873
if (tx) {
874+
if (data_out < (uint8_t *)SRAM_BASE) {
875+
// Try to allocate a temporary buffer for DMA transfer
876+
uint8_t *temp_buffer = (uint8_t *)port_malloc(len, true);
877+
if (temp_buffer == NULL) {
878+
mp_printf(&mp_plat_print, "Failed to allocate temporary buffer for DMA tx\n");
879+
return false;
880+
}
881+
memcpy(temp_buffer, data_out, len);
882+
sram_data_out = temp_buffer;
883+
free_data_out = true;
884+
}
868885
chan_tx = dma_claim_unused_channel(false);
869886
// DMA allocation failed...
870887
if (chan_tx < 0) {
888+
if (free_data_out) {
889+
port_free(sram_data_out);
890+
}
891+
if (free_data_in) {
892+
port_free(sram_data_in);
893+
}
871894
return false;
872895
}
873896
}
874897
if (rx) {
898+
if (data_in < (uint8_t *)SRAM_BASE) {
899+
// Try to allocate a temporary buffer for DMA transfer
900+
uint8_t *temp_buffer = (uint8_t *)port_malloc(len, true);
901+
if (temp_buffer == NULL) {
902+
mp_printf(&mp_plat_print, "Failed to allocate temporary buffer for DMA rx\n");
903+
if (chan_tx >= 0) {
904+
dma_channel_unclaim(chan_tx);
905+
}
906+
if (free_data_out) {
907+
port_free(sram_data_out);
908+
}
909+
return false;
910+
}
911+
sram_data_in = temp_buffer;
912+
free_data_in = true;
913+
}
875914
chan_rx = dma_claim_unused_channel(false);
876915
// DMA allocation failed...
877916
if (chan_rx < 0) {
878917
// may need to free tx channel
879918
if (chan_tx >= 0) {
880919
dma_channel_unclaim(chan_tx);
881920
}
921+
if (free_data_out) {
922+
port_free(sram_data_out);
923+
}
924+
if (free_data_in) {
925+
port_free(sram_data_in);
926+
}
882927
return false;
883928
}
884929
}
@@ -910,7 +955,7 @@ static bool _transfer(rp2pio_statemachine_obj_t *self,
910955
channel_config_set_bswap(&c, swap_out);
911956
dma_channel_configure(chan_tx, &c,
912957
tx_destination,
913-
data_out,
958+
sram_data_out,
914959
out_len / out_stride_in_bytes,
915960
false);
916961
channel_mask |= 1u << chan_tx;
@@ -923,7 +968,7 @@ static bool _transfer(rp2pio_statemachine_obj_t *self,
923968
channel_config_set_write_increment(&c, true);
924969
channel_config_set_bswap(&c, swap_in);
925970
dma_channel_configure(chan_rx, &c,
926-
data_in,
971+
sram_data_in,
927972
rx_source,
928973
in_len / in_stride_in_bytes,
929974
false);
@@ -950,8 +995,7 @@ static bool _transfer(rp2pio_statemachine_obj_t *self,
950995
self->pio->fdebug = stall_mask;
951996
}
952997

953-
// If we have claimed only one channel successfully, we should release immediately. This also
954-
// releases the DMA after use_dma has been done.
998+
// Release the DMA channels after use_dma has been done.
955999
if (chan_rx >= 0) {
9561000
dma_channel_unclaim(chan_rx);
9571001
}
@@ -960,31 +1004,31 @@ static bool _transfer(rp2pio_statemachine_obj_t *self,
9601004
}
9611005

9621006
if (!use_dma && !(self->user_interruptible && mp_hal_is_interrupted())) {
963-
// Use software for small transfers, or if couldn't claim two DMA channels
1007+
// Use software for small transfers
9641008
size_t rx_remaining = in_len / in_stride_in_bytes;
9651009
size_t tx_remaining = out_len / out_stride_in_bytes;
9661010

9671011
while (rx_remaining || tx_remaining) {
9681012
while (tx_remaining && !pio_sm_is_tx_fifo_full(self->pio, self->state_machine)) {
9691013
if (out_stride_in_bytes == 1) {
970-
*tx_destination = *data_out;
1014+
*tx_destination = *sram_data_out;
9711015
} else if (out_stride_in_bytes == 2) {
972-
*((uint16_t *)tx_destination) = *((uint16_t *)data_out);
1016+
*((uint16_t *)tx_destination) = *((uint16_t *)sram_data_out);
9731017
} else if (out_stride_in_bytes == 4) {
974-
*((uint32_t *)tx_destination) = *((uint32_t *)data_out);
1018+
*((uint32_t *)tx_destination) = *((uint32_t *)sram_data_out);
9751019
}
976-
data_out += out_stride_in_bytes;
1020+
sram_data_out += out_stride_in_bytes;
9771021
--tx_remaining;
9781022
}
9791023
while (rx_remaining && !pio_sm_is_rx_fifo_empty(self->pio, self->state_machine)) {
9801024
if (in_stride_in_bytes == 1) {
981-
*data_in = (uint8_t)*rx_source;
1025+
*sram_data_in = (uint8_t)*rx_source;
9821026
} else if (in_stride_in_bytes == 2) {
983-
*((uint16_t *)data_in) = *((uint16_t *)rx_source);
1027+
*((uint16_t *)sram_data_in) = *((uint16_t *)rx_source);
9841028
} else if (in_stride_in_bytes == 4) {
985-
*((uint32_t *)data_in) = *((uint32_t *)rx_source);
1029+
*((uint32_t *)sram_data_in) = *((uint32_t *)rx_source);
9861030
}
987-
data_in += in_stride_in_bytes;
1031+
sram_data_in += in_stride_in_bytes;
9881032
--rx_remaining;
9891033
}
9901034
RUN_BACKGROUND_TASKS;
@@ -996,7 +1040,7 @@ static bool _transfer(rp2pio_statemachine_obj_t *self,
9961040
self->pio->fdebug = stall_mask;
9971041
}
9981042
// Wait for the state machine to finish transmitting the data we've queued
999-
// up.
1043+
// up (either from the CPU or via DMA.)
10001044
if (tx) {
10011045
while (!pio_sm_is_tx_fifo_empty(self->pio, self->state_machine) ||
10021046
(self->wait_for_txstall && (self->pio->fdebug & stall_mask) == 0)) {
@@ -1006,6 +1050,14 @@ static bool _transfer(rp2pio_statemachine_obj_t *self,
10061050
}
10071051
}
10081052
}
1053+
if (free_data_out) {
1054+
port_free(sram_data_out);
1055+
}
1056+
if (free_data_in) {
1057+
// Copy the data from the SRAM buffer to the user PSRAM buffer.
1058+
memcpy(data_in, sram_data_in, len);
1059+
port_free(sram_data_in);
1060+
}
10091061
return true;
10101062
}
10111063

shared-module/lvfontio/OnDiskFont.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -253,9 +253,10 @@ static bool load_font_header(lvfontio_ondiskfont_t *self, FIL *file, size_t *max
253253
*max_slots = advance_count[1] * 2 + advance_count[0];
254254
}
255255
}
256+
} else {
257+
*max_slots = advance_count[0] + advance_count[1];
256258
}
257259

258-
259260
found_glyf = true;
260261
}
261262

0 commit comments

Comments
 (0)