Skip to content

Commit 818bda1

Browse files
committed
Fix FIFO transfer and buffer alignment.
1 parent 2f2c8ce commit 818bda1

File tree

2 files changed

+142
-26
lines changed

2 files changed

+142
-26
lines changed

src/portable/st/stm32_fsdev/dcd_stm32_fsdev.c

Lines changed: 141 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -186,12 +186,12 @@ static void dcd_ep_ctr_handler(void);
186186
static uint8_t open_ep_count;
187187
static uint16_t ep_buf_ptr; ///< Points to first free memory location
188188
static void dcd_pma_alloc_reset(void);
189-
static uint16_t dcd_pma_alloc(uint8_t ep_addr, size_t length);
189+
static uint16_t dcd_pma_alloc(uint8_t ep_addr, uint16_t length);
190190
static void dcd_pma_free(uint8_t ep_addr);
191191
static void dcd_ep_free(uint8_t ep_addr);
192192
static uint8_t dcd_ep_alloc(uint8_t ep_addr, uint8_t ep_type);
193-
static bool dcd_write_packet_memory(uint16_t dst, const void *__restrict src, size_t wNBytes);
194-
static bool dcd_read_packet_memory(void *__restrict dst, uint16_t src, size_t wNBytes);
193+
static bool dcd_write_packet_memory(uint16_t dst, const void *__restrict src, uint16_t wNBytes);
194+
static bool dcd_read_packet_memory(void *__restrict dst, uint16_t src, uint16_t wNBytes);
195195

196196
static bool dcd_write_packet_memory_ff(tu_fifo_t * ff, uint16_t dst, uint16_t wNBytes);
197197
static bool dcd_read_packet_memory_ff(tu_fifo_t * ff, uint16_t src, uint16_t wNBytes);
@@ -792,7 +792,7 @@ static void dcd_pma_alloc_reset(void)
792792
*
793793
* During failure, TU_ASSERT is used. If this happens, rework/reallocate memory manually.
794794
*/
795-
static uint16_t dcd_pma_alloc(uint8_t ep_addr, size_t length)
795+
static uint16_t dcd_pma_alloc(uint8_t ep_addr, uint16_t length)
796796
{
797797
xfer_ctl_t* epXferCtl = xfer_ctl_ptr(ep_addr);
798798

@@ -804,6 +804,13 @@ static uint16_t dcd_pma_alloc(uint8_t ep_addr, size_t length)
804804
return epXferCtl->pma_ptr;
805805
}
806806

807+
// Ensure allocated buffer is aligned
808+
#ifdef PMA_32BIT_ACCESS
809+
length = (length + 3) & ~0x03;
810+
#else
811+
length = (length + 1) & ~0x01;
812+
#endif
813+
807814
open_ep_count++;
808815

809816
uint16_t addr = ep_buf_ptr;
@@ -814,7 +821,7 @@ static uint16_t dcd_pma_alloc(uint8_t ep_addr, size_t length)
814821

815822
epXferCtl->pma_ptr = addr;
816823
epXferCtl->pma_alloc_size = length;
817-
//TU_LOG2("dcd_pma_alloc(%x,%x)=%x\r\n",ep_addr,length,addr);
824+
//TU_LOG1("dcd_pma_alloc(%x,%x)=%x\r\n",ep_addr,length,addr);
818825

819826
return addr;
820827
}
@@ -1240,15 +1247,36 @@ void dcd_edpt_clear_stall (uint8_t rhport, uint8_t ep_addr)
12401247
}
12411248

12421249
#ifdef PMA_32BIT_ACCESS
1243-
static bool dcd_write_packet_memory(uint16_t dst, const void *__restrict src, size_t wNBytes)
1250+
static bool dcd_write_packet_memory(uint16_t dst, const void *__restrict src, uint16_t wNBytes)
12441251
{
1245-
// FIXME original function uses byte-access to source memory (to support non-aligned buffers)
1246-
const uint32_t* src32 = (const uint32_t*)(src);
1252+
const uint8_t* srcVal = src;
12471253
volatile uint32_t* dst32 = (volatile uint32_t*)(USB_PMAADDR + dst);
1248-
for (unsigned n=wNBytes/4; n>0; --n) {
1249-
*dst32++ = *src32++;
1254+
1255+
for (uint32_t n = wNBytes / 4; n > 0; --n) {
1256+
*dst32++ = tu_unaligned_read32(srcVal);
1257+
srcVal += 4;
1258+
}
1259+
1260+
wNBytes = wNBytes & 0x03;
1261+
if (wNBytes)
1262+
{
1263+
uint32_t wrVal = *srcVal;
1264+
wNBytes--;
1265+
1266+
if (wNBytes)
1267+
{
1268+
wrVal |= *++srcVal << 8;
1269+
wNBytes--;
1270+
1271+
if (wNBytes)
1272+
{
1273+
wrVal |= *++srcVal << 16;
1274+
}
1275+
}
1276+
1277+
*dst32 = wrVal;
12501278
}
1251-
*dst32 = (*src32) & ((1<<8*(wNBytes % 4)) - 1);
1279+
12521280
return true;
12531281
}
12541282
#else
@@ -1263,7 +1291,7 @@ static bool dcd_write_packet_memory(uint16_t dst, const void *__restrict src, si
12631291
* @param wNBytes no. of bytes to be copied.
12641292
* @retval None
12651293
*/
1266-
static bool dcd_write_packet_memory(uint16_t dst, const void *__restrict src, size_t wNBytes)
1294+
static bool dcd_write_packet_memory(uint16_t dst, const void *__restrict src, uint16_t wNBytes)
12671295
{
12681296
uint32_t n = (uint32_t)wNBytes >> 1U;
12691297
uint16_t temp1, temp2;
@@ -1286,7 +1314,7 @@ static bool dcd_write_packet_memory(uint16_t dst, const void *__restrict src, si
12861314
srcVal++;
12871315
}
12881316

1289-
if (wNBytes & 0x01)
1317+
if (wNBytes)
12901318
{
12911319
temp1 = *srcVal;
12921320
*pdwVal = temp1;
@@ -1313,7 +1341,37 @@ static bool dcd_write_packet_memory_ff(tu_fifo_t * ff, uint16_t dst, uint16_t wN
13131341

13141342
// We want to read from the FIFO and write it into the PMA, if LIN part is ODD and has WRAPPED part,
13151343
// last lin byte will be combined with wrapped part
1316-
// To ensure PMA is always access 16bit aligned (dst aligned to 16 bit)
1344+
// To ensure PMA is always access aligned (dst aligned to 16 or 32 bit)
1345+
#ifdef PMA_32BIT_ACCESS
1346+
if((cnt_lin & 0x03) && cnt_wrap)
1347+
{
1348+
// Copy first linear part
1349+
dcd_write_packet_memory(dst, info.ptr_lin, cnt_lin &~0x03);
1350+
dst += cnt_lin &~0x03;
1351+
1352+
// Copy last linear bytes & first wrapped bytes to buffer
1353+
uint32_t i;
1354+
uint8_t tmp[4];
1355+
for (i = 0; i < (cnt_lin & 0x03); i++)
1356+
{
1357+
tmp[i] = ((uint8_t*)info.ptr_lin)[(cnt_lin &~0x03) + i];
1358+
}
1359+
uint32_t wCnt = cnt_wrap;
1360+
for (; i < 4 && wCnt > 0; i++, wCnt--)
1361+
{
1362+
tmp[i] = *(uint8_t*)info.ptr_wrap;
1363+
info.ptr_wrap = (uint8_t*)info.ptr_wrap + 1;
1364+
}
1365+
1366+
// Write unaligned buffer
1367+
dcd_write_packet_memory(dst, &tmp, 4);
1368+
dst += 4;
1369+
1370+
// Copy rest of wrapped byte
1371+
if (wCnt)
1372+
dcd_write_packet_memory(dst, info.ptr_wrap, wCnt);
1373+
}
1374+
#else
13171375
if((cnt_lin & 0x01) && cnt_wrap)
13181376
{
13191377
// Copy first linear part
@@ -1328,6 +1386,7 @@ static bool dcd_write_packet_memory_ff(tu_fifo_t * ff, uint16_t dst, uint16_t wN
13281386
// Copy rest of wrapped byte
13291387
dcd_write_packet_memory(dst, ((uint8_t*)info.ptr_wrap) + 1, cnt_wrap - 1);
13301388
}
1389+
#endif
13311390
else
13321391
{
13331392
// Copy linear part
@@ -1347,11 +1406,37 @@ static bool dcd_write_packet_memory_ff(tu_fifo_t * ff, uint16_t dst, uint16_t wN
13471406
}
13481407

13491408
#ifdef PMA_32BIT_ACCESS
1350-
static bool dcd_read_packet_memory(void *__restrict dst, uint16_t src, size_t wNBytes)
1409+
static bool dcd_read_packet_memory(void *__restrict dst, uint16_t src, uint16_t wNBytes)
13511410
{
1352-
// FIXME this should probably be modified for possible unaligned access?
1353-
memcpy(dst, (void*)(USB_PMAADDR+src), wNBytes);
1354-
return true;
1411+
uint8_t* dstVal = dst;
1412+
volatile uint32_t* src32 = (volatile uint32_t*)(USB_PMAADDR + src);
1413+
1414+
for (uint32_t n = wNBytes / 4; n > 0; --n) {
1415+
tu_unaligned_write32(dstVal, *src32++);
1416+
dstVal += 4;
1417+
}
1418+
1419+
wNBytes = wNBytes & 0x03;
1420+
if (wNBytes)
1421+
{
1422+
uint32_t rdVal = *src32;
1423+
1424+
*dstVal = tu_u32_byte0(rdVal);
1425+
wNBytes--;
1426+
1427+
if (wNBytes)
1428+
{
1429+
*++dstVal = tu_u32_byte1(rdVal);
1430+
wNBytes--;
1431+
1432+
if (wNBytes)
1433+
{
1434+
*++dstVal = tu_u32_byte2(rdVal);
1435+
}
1436+
}
1437+
}
1438+
1439+
return true;
13551440
}
13561441
#else
13571442
/**
@@ -1360,7 +1445,7 @@ static bool dcd_read_packet_memory(void *__restrict dst, uint16_t src, size_t wN
13601445
* @param wNBytes no. of bytes to be copied.
13611446
* @retval None
13621447
*/
1363-
static bool dcd_read_packet_memory(void *__restrict dst, uint16_t src, size_t wNBytes)
1448+
static bool dcd_read_packet_memory(void *__restrict dst, uint16_t src, uint16_t wNBytes)
13641449
{
13651450
uint32_t n = (uint32_t)wNBytes >> 1U;
13661451
// The GCC optimizer will combine access to 32-bit sizes if we let it. Force
@@ -1405,26 +1490,57 @@ static bool dcd_read_packet_memory_ff(tu_fifo_t * ff, uint16_t src, uint16_t wNB
14051490
uint16_t cnt_lin = TU_MIN(wNBytes, info.len_lin);
14061491
uint16_t cnt_wrap = TU_MIN(wNBytes - cnt_lin, info.len_wrap);
14071492

1493+
14081494
// We want to read from PMA and write it into the FIFO, if LIN part is ODD and has WRAPPED part,
14091495
// last lin byte will be combined with wrapped part
1410-
// To ensure PMA is always access 16bit aligned (src aligned to 16 bit)
1496+
// To ensure PMA is always access aligned (src aligned to 16 or 32 bit)
1497+
#ifdef PMA_32BIT_ACCESS
1498+
if((cnt_lin & 0x03) && cnt_wrap)
1499+
{
1500+
// Copy first linear part
1501+
dcd_read_packet_memory(info.ptr_lin, src, cnt_lin &~0x03);
1502+
src += cnt_lin &~0x03;
1503+
1504+
// Copy last linear bytes & first wrapped bytes
1505+
uint8_t tmp[4];
1506+
dcd_read_packet_memory(tmp, src, 4);
1507+
src += 4;
1508+
1509+
uint32_t i;
1510+
for (i = 0; i < (cnt_lin & 0x03); i++)
1511+
{
1512+
((uint8_t*)info.ptr_lin)[(cnt_lin &~0x03) + i] = tmp[i];
1513+
}
1514+
uint32_t wCnt = cnt_wrap;
1515+
for (; i < 4 && wCnt > 0; i++, wCnt--)
1516+
{
1517+
*(uint8_t*)info.ptr_wrap = tmp[i];
1518+
info.ptr_wrap = (uint8_t*)info.ptr_wrap + 1;
1519+
}
1520+
1521+
// Copy rest of wrapped byte
1522+
if (wCnt)
1523+
dcd_read_packet_memory(info.ptr_wrap, src, wCnt);
1524+
}
1525+
#else
14111526
if((cnt_lin & 0x01) && cnt_wrap)
14121527
{
14131528
// Copy first linear part
14141529
dcd_read_packet_memory(info.ptr_lin, src, cnt_lin &~0x01);
14151530
src += cnt_lin &~0x01;
14161531

14171532
// Copy last linear byte & first wrapped byte
1418-
uint16_t tmp;
1419-
dcd_read_packet_memory(&tmp, src, 2);
1420-
1421-
((uint8_t*)info.ptr_lin)[cnt_lin - 1] = (uint8_t)tmp;
1422-
((uint8_t*)info.ptr_wrap)[0] = (uint8_t)(tmp >> 8U);
1533+
uint8_t tmp[2];
1534+
dcd_read_packet_memory(tmp, src, 2);
14231535
src += 2;
14241536

1537+
((uint8_t*)info.ptr_lin)[cnt_lin - 1] = tmp[0];
1538+
((uint8_t*)info.ptr_wrap)[0] = tmp[1];
1539+
14251540
// Copy rest of wrapped byte
14261541
dcd_read_packet_memory(((uint8_t*)info.ptr_wrap) + 1, src, cnt_wrap - 1);
14271542
}
1543+
#endif
14281544
else
14291545
{
14301546
// Copy linear part

src/portable/st/stm32_fsdev/dcd_stm32_fsdev_pvt_st.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@
8585
#elif CFG_TUSB_MCU == OPT_MCU_STM32G0
8686
#include "stm32g0xx.h"
8787
#define PMA_32BIT_ACCESS
88-
#define PMA_LENGTH (1024u) // FIXME it is 2048, really
88+
#define PMA_LENGTH (2048u)
8989
#undef USB_PMAADDR
9090
#define USB_PMAADDR USB_DRD_PMAADDR
9191
#define USB_TypeDef USB_DRD_TypeDef

0 commit comments

Comments
 (0)