@@ -186,12 +186,12 @@ static void dcd_ep_ctr_handler(void);
186
186
static uint8_t open_ep_count ;
187
187
static uint16_t ep_buf_ptr ; ///< Points to first free memory location
188
188
static void dcd_pma_alloc_reset (void );
189
- static uint16_t dcd_pma_alloc (uint8_t ep_addr , size_t length );
189
+ static uint16_t dcd_pma_alloc (uint8_t ep_addr , uint16_t length );
190
190
static void dcd_pma_free (uint8_t ep_addr );
191
191
static void dcd_ep_free (uint8_t ep_addr );
192
192
static uint8_t dcd_ep_alloc (uint8_t ep_addr , uint8_t ep_type );
193
- static bool dcd_write_packet_memory (uint16_t dst , const void * __restrict src , size_t wNBytes );
194
- static bool dcd_read_packet_memory (void * __restrict dst , uint16_t src , size_t wNBytes );
193
+ static bool dcd_write_packet_memory (uint16_t dst , const void * __restrict src , uint16_t wNBytes );
194
+ static bool dcd_read_packet_memory (void * __restrict dst , uint16_t src , uint16_t wNBytes );
195
195
196
196
static bool dcd_write_packet_memory_ff (tu_fifo_t * ff , uint16_t dst , uint16_t wNBytes );
197
197
static bool dcd_read_packet_memory_ff (tu_fifo_t * ff , uint16_t src , uint16_t wNBytes );
@@ -792,7 +792,7 @@ static void dcd_pma_alloc_reset(void)
792
792
*
793
793
* During failure, TU_ASSERT is used. If this happens, rework/reallocate memory manually.
794
794
*/
795
- static uint16_t dcd_pma_alloc (uint8_t ep_addr , size_t length )
795
+ static uint16_t dcd_pma_alloc (uint8_t ep_addr , uint16_t length )
796
796
{
797
797
xfer_ctl_t * epXferCtl = xfer_ctl_ptr (ep_addr );
798
798
@@ -804,6 +804,13 @@ static uint16_t dcd_pma_alloc(uint8_t ep_addr, size_t length)
804
804
return epXferCtl -> pma_ptr ;
805
805
}
806
806
807
+ // Ensure allocated buffer is aligned
808
+ #ifdef PMA_32BIT_ACCESS
809
+ length = (length + 3 ) & ~0x03 ;
810
+ #else
811
+ length = (length + 1 ) & ~0x01 ;
812
+ #endif
813
+
807
814
open_ep_count ++ ;
808
815
809
816
uint16_t addr = ep_buf_ptr ;
@@ -814,7 +821,7 @@ static uint16_t dcd_pma_alloc(uint8_t ep_addr, size_t length)
814
821
815
822
epXferCtl -> pma_ptr = addr ;
816
823
epXferCtl -> pma_alloc_size = length ;
817
- //TU_LOG2 ("dcd_pma_alloc(%x,%x)=%x\r\n",ep_addr,length,addr);
824
+ //TU_LOG1 ("dcd_pma_alloc(%x,%x)=%x\r\n",ep_addr,length,addr);
818
825
819
826
return addr ;
820
827
}
@@ -1240,15 +1247,36 @@ void dcd_edpt_clear_stall (uint8_t rhport, uint8_t ep_addr)
1240
1247
}
1241
1248
1242
1249
#ifdef PMA_32BIT_ACCESS
1243
- static bool dcd_write_packet_memory (uint16_t dst , const void * __restrict src , size_t wNBytes )
1250
+ static bool dcd_write_packet_memory (uint16_t dst , const void * __restrict src , uint16_t wNBytes )
1244
1251
{
1245
- // FIXME original function uses byte-access to source memory (to support non-aligned buffers)
1246
- const uint32_t * src32 = (const uint32_t * )(src );
1252
+ const uint8_t * srcVal = src ;
1247
1253
volatile uint32_t * dst32 = (volatile uint32_t * )(USB_PMAADDR + dst );
1248
- for (unsigned n = wNBytes /4 ; n > 0 ; -- n ) {
1249
- * dst32 ++ = * src32 ++ ;
1254
+
1255
+ for (uint32_t n = wNBytes / 4 ; n > 0 ; -- n ) {
1256
+ * dst32 ++ = tu_unaligned_read32 (srcVal );
1257
+ srcVal += 4 ;
1258
+ }
1259
+
1260
+ wNBytes = wNBytes & 0x03 ;
1261
+ if (wNBytes )
1262
+ {
1263
+ uint32_t wrVal = * srcVal ;
1264
+ wNBytes -- ;
1265
+
1266
+ if (wNBytes )
1267
+ {
1268
+ wrVal |= * ++ srcVal << 8 ;
1269
+ wNBytes -- ;
1270
+
1271
+ if (wNBytes )
1272
+ {
1273
+ wrVal |= * ++ srcVal << 16 ;
1274
+ }
1275
+ }
1276
+
1277
+ * dst32 = wrVal ;
1250
1278
}
1251
- * dst32 = ( * src32 ) & (( 1 << 8 * ( wNBytes % 4 )) - 1 );
1279
+
1252
1280
return true;
1253
1281
}
1254
1282
#else
@@ -1263,7 +1291,7 @@ static bool dcd_write_packet_memory(uint16_t dst, const void *__restrict src, si
1263
1291
* @param wNBytes no. of bytes to be copied.
1264
1292
* @retval None
1265
1293
*/
1266
- static bool dcd_write_packet_memory (uint16_t dst , const void * __restrict src , size_t wNBytes )
1294
+ static bool dcd_write_packet_memory (uint16_t dst , const void * __restrict src , uint16_t wNBytes )
1267
1295
{
1268
1296
uint32_t n = (uint32_t )wNBytes >> 1U ;
1269
1297
uint16_t temp1 , temp2 ;
@@ -1286,7 +1314,7 @@ static bool dcd_write_packet_memory(uint16_t dst, const void *__restrict src, si
1286
1314
srcVal ++ ;
1287
1315
}
1288
1316
1289
- if (wNBytes & 0x01 )
1317
+ if (wNBytes )
1290
1318
{
1291
1319
temp1 = * srcVal ;
1292
1320
* pdwVal = temp1 ;
@@ -1313,7 +1341,37 @@ static bool dcd_write_packet_memory_ff(tu_fifo_t * ff, uint16_t dst, uint16_t wN
1313
1341
1314
1342
// We want to read from the FIFO and write it into the PMA, if LIN part is ODD and has WRAPPED part,
1315
1343
// last lin byte will be combined with wrapped part
1316
- // To ensure PMA is always access 16bit aligned (dst aligned to 16 bit)
1344
+ // To ensure PMA is always access aligned (dst aligned to 16 or 32 bit)
1345
+ #ifdef PMA_32BIT_ACCESS
1346
+ if ((cnt_lin & 0x03 ) && cnt_wrap )
1347
+ {
1348
+ // Copy first linear part
1349
+ dcd_write_packet_memory (dst , info .ptr_lin , cnt_lin & ~0x03 );
1350
+ dst += cnt_lin & ~0x03 ;
1351
+
1352
+ // Copy last linear bytes & first wrapped bytes to buffer
1353
+ uint32_t i ;
1354
+ uint8_t tmp [4 ];
1355
+ for (i = 0 ; i < (cnt_lin & 0x03 ); i ++ )
1356
+ {
1357
+ tmp [i ] = ((uint8_t * )info .ptr_lin )[(cnt_lin & ~0x03 ) + i ];
1358
+ }
1359
+ uint32_t wCnt = cnt_wrap ;
1360
+ for (; i < 4 && wCnt > 0 ; i ++ , wCnt -- )
1361
+ {
1362
+ tmp [i ] = * (uint8_t * )info .ptr_wrap ;
1363
+ info .ptr_wrap = (uint8_t * )info .ptr_wrap + 1 ;
1364
+ }
1365
+
1366
+ // Write unaligned buffer
1367
+ dcd_write_packet_memory (dst , & tmp , 4 );
1368
+ dst += 4 ;
1369
+
1370
+ // Copy rest of wrapped byte
1371
+ if (wCnt )
1372
+ dcd_write_packet_memory (dst , info .ptr_wrap , wCnt );
1373
+ }
1374
+ #else
1317
1375
if ((cnt_lin & 0x01 ) && cnt_wrap )
1318
1376
{
1319
1377
// Copy first linear part
@@ -1328,6 +1386,7 @@ static bool dcd_write_packet_memory_ff(tu_fifo_t * ff, uint16_t dst, uint16_t wN
1328
1386
// Copy rest of wrapped byte
1329
1387
dcd_write_packet_memory (dst , ((uint8_t * )info .ptr_wrap ) + 1 , cnt_wrap - 1 );
1330
1388
}
1389
+ #endif
1331
1390
else
1332
1391
{
1333
1392
// Copy linear part
@@ -1347,11 +1406,37 @@ static bool dcd_write_packet_memory_ff(tu_fifo_t * ff, uint16_t dst, uint16_t wN
1347
1406
}
1348
1407
1349
1408
#ifdef PMA_32BIT_ACCESS
1350
- static bool dcd_read_packet_memory (void * __restrict dst , uint16_t src , size_t wNBytes )
1409
+ static bool dcd_read_packet_memory (void * __restrict dst , uint16_t src , uint16_t wNBytes )
1351
1410
{
1352
- // FIXME this should probably be modified for possible unaligned access?
1353
- memcpy (dst , (void * )(USB_PMAADDR + src ), wNBytes );
1354
- return true;
1411
+ uint8_t * dstVal = dst ;
1412
+ volatile uint32_t * src32 = (volatile uint32_t * )(USB_PMAADDR + src );
1413
+
1414
+ for (uint32_t n = wNBytes / 4 ; n > 0 ; -- n ) {
1415
+ tu_unaligned_write32 (dstVal , * src32 ++ );
1416
+ dstVal += 4 ;
1417
+ }
1418
+
1419
+ wNBytes = wNBytes & 0x03 ;
1420
+ if (wNBytes )
1421
+ {
1422
+ uint32_t rdVal = * src32 ;
1423
+
1424
+ * dstVal = tu_u32_byte0 (rdVal );
1425
+ wNBytes -- ;
1426
+
1427
+ if (wNBytes )
1428
+ {
1429
+ * ++ dstVal = tu_u32_byte1 (rdVal );
1430
+ wNBytes -- ;
1431
+
1432
+ if (wNBytes )
1433
+ {
1434
+ * ++ dstVal = tu_u32_byte2 (rdVal );
1435
+ }
1436
+ }
1437
+ }
1438
+
1439
+ return true;
1355
1440
}
1356
1441
#else
1357
1442
/**
@@ -1360,7 +1445,7 @@ static bool dcd_read_packet_memory(void *__restrict dst, uint16_t src, size_t wN
1360
1445
* @param wNBytes no. of bytes to be copied.
1361
1446
* @retval None
1362
1447
*/
1363
- static bool dcd_read_packet_memory (void * __restrict dst , uint16_t src , size_t wNBytes )
1448
+ static bool dcd_read_packet_memory (void * __restrict dst , uint16_t src , uint16_t wNBytes )
1364
1449
{
1365
1450
uint32_t n = (uint32_t )wNBytes >> 1U ;
1366
1451
// The GCC optimizer will combine access to 32-bit sizes if we let it. Force
@@ -1405,26 +1490,57 @@ static bool dcd_read_packet_memory_ff(tu_fifo_t * ff, uint16_t src, uint16_t wNB
1405
1490
uint16_t cnt_lin = TU_MIN (wNBytes , info .len_lin );
1406
1491
uint16_t cnt_wrap = TU_MIN (wNBytes - cnt_lin , info .len_wrap );
1407
1492
1493
+
1408
1494
// We want to read from PMA and write it into the FIFO, if LIN part is ODD and has WRAPPED part,
1409
1495
// last lin byte will be combined with wrapped part
1410
- // To ensure PMA is always access 16bit aligned (src aligned to 16 bit)
1496
+ // To ensure PMA is always access aligned (src aligned to 16 or 32 bit)
1497
+ #ifdef PMA_32BIT_ACCESS
1498
+ if ((cnt_lin & 0x03 ) && cnt_wrap )
1499
+ {
1500
+ // Copy first linear part
1501
+ dcd_read_packet_memory (info .ptr_lin , src , cnt_lin & ~0x03 );
1502
+ src += cnt_lin & ~0x03 ;
1503
+
1504
+ // Copy last linear bytes & first wrapped bytes
1505
+ uint8_t tmp [4 ];
1506
+ dcd_read_packet_memory (tmp , src , 4 );
1507
+ src += 4 ;
1508
+
1509
+ uint32_t i ;
1510
+ for (i = 0 ; i < (cnt_lin & 0x03 ); i ++ )
1511
+ {
1512
+ ((uint8_t * )info .ptr_lin )[(cnt_lin & ~0x03 ) + i ] = tmp [i ];
1513
+ }
1514
+ uint32_t wCnt = cnt_wrap ;
1515
+ for (; i < 4 && wCnt > 0 ; i ++ , wCnt -- )
1516
+ {
1517
+ * (uint8_t * )info .ptr_wrap = tmp [i ];
1518
+ info .ptr_wrap = (uint8_t * )info .ptr_wrap + 1 ;
1519
+ }
1520
+
1521
+ // Copy rest of wrapped byte
1522
+ if (wCnt )
1523
+ dcd_read_packet_memory (info .ptr_wrap , src , wCnt );
1524
+ }
1525
+ #else
1411
1526
if ((cnt_lin & 0x01 ) && cnt_wrap )
1412
1527
{
1413
1528
// Copy first linear part
1414
1529
dcd_read_packet_memory (info .ptr_lin , src , cnt_lin & ~0x01 );
1415
1530
src += cnt_lin & ~0x01 ;
1416
1531
1417
1532
// Copy last linear byte & first wrapped byte
1418
- uint16_t tmp ;
1419
- dcd_read_packet_memory (& tmp , src , 2 );
1420
-
1421
- ((uint8_t * )info .ptr_lin )[cnt_lin - 1 ] = (uint8_t )tmp ;
1422
- ((uint8_t * )info .ptr_wrap )[0 ] = (uint8_t )(tmp >> 8U );
1533
+ uint8_t tmp [2 ];
1534
+ dcd_read_packet_memory (tmp , src , 2 );
1423
1535
src += 2 ;
1424
1536
1537
+ ((uint8_t * )info .ptr_lin )[cnt_lin - 1 ] = tmp [0 ];
1538
+ ((uint8_t * )info .ptr_wrap )[0 ] = tmp [1 ];
1539
+
1425
1540
// Copy rest of wrapped byte
1426
1541
dcd_read_packet_memory (((uint8_t * )info .ptr_wrap ) + 1 , src , cnt_wrap - 1 );
1427
1542
}
1543
+ #endif
1428
1544
else
1429
1545
{
1430
1546
// Copy linear part
0 commit comments