@@ -1544,6 +1544,181 @@ void xe_migrate_wait(struct xe_migrate *m)
1544
1544
dma_fence_wait (m -> fence , false);
1545
1545
}
1546
1546
1547
+ static u32 pte_update_cmd_size (u64 size )
1548
+ {
1549
+ u32 num_dword ;
1550
+ u64 entries = DIV_ROUND_UP (size , XE_PAGE_SIZE );
1551
+
1552
+ XE_WARN_ON (size > MAX_PREEMPTDISABLE_TRANSFER );
1553
+ /*
1554
+ * MI_STORE_DATA_IMM command is used to update page table. Each
1555
+ * instruction can update maximumly 0x1ff pte entries. To update
1556
+ * n (n <= 0x1ff) pte entries, we need:
1557
+ * 1 dword for the MI_STORE_DATA_IMM command header (opcode etc)
1558
+ * 2 dword for the page table's physical location
1559
+ * 2*n dword for value of pte to fill (each pte entry is 2 dwords)
1560
+ */
1561
+ num_dword = (1 + 2 ) * DIV_ROUND_UP (entries , 0x1ff );
1562
+ num_dword += entries * 2 ;
1563
+
1564
+ return num_dword ;
1565
+ }
1566
+
1567
+ static void build_pt_update_batch_sram (struct xe_migrate * m ,
1568
+ struct xe_bb * bb , u32 pt_offset ,
1569
+ dma_addr_t * sram_addr , u32 size )
1570
+ {
1571
+ u16 pat_index = tile_to_xe (m -> tile )-> pat .idx [XE_CACHE_WB ];
1572
+ u32 ptes ;
1573
+ int i = 0 ;
1574
+
1575
+ ptes = DIV_ROUND_UP (size , XE_PAGE_SIZE );
1576
+ while (ptes ) {
1577
+ u32 chunk = min (0x1ffU , ptes );
1578
+
1579
+ bb -> cs [bb -> len ++ ] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW (chunk );
1580
+ bb -> cs [bb -> len ++ ] = pt_offset ;
1581
+ bb -> cs [bb -> len ++ ] = 0 ;
1582
+
1583
+ pt_offset += chunk * 8 ;
1584
+ ptes -= chunk ;
1585
+
1586
+ while (chunk -- ) {
1587
+ u64 addr = sram_addr [i ++ ] & PAGE_MASK ;
1588
+
1589
+ xe_tile_assert (m -> tile , addr );
1590
+ addr = m -> q -> vm -> pt_ops -> pte_encode_addr (m -> tile -> xe ,
1591
+ addr , pat_index ,
1592
+ 0 , false, 0 );
1593
+ bb -> cs [bb -> len ++ ] = lower_32_bits (addr );
1594
+ bb -> cs [bb -> len ++ ] = upper_32_bits (addr );
1595
+ }
1596
+ }
1597
+ }
1598
+
1599
+ enum xe_migrate_copy_dir {
1600
+ XE_MIGRATE_COPY_TO_VRAM ,
1601
+ XE_MIGRATE_COPY_TO_SRAM ,
1602
+ };
1603
+
1604
+ static struct dma_fence * xe_migrate_vram (struct xe_migrate * m ,
1605
+ unsigned long npages ,
1606
+ dma_addr_t * sram_addr , u64 vram_addr ,
1607
+ const enum xe_migrate_copy_dir dir )
1608
+ {
1609
+ struct xe_gt * gt = m -> tile -> primary_gt ;
1610
+ struct xe_device * xe = gt_to_xe (gt );
1611
+ struct dma_fence * fence = NULL ;
1612
+ u32 batch_size = 2 ;
1613
+ u64 src_L0_ofs , dst_L0_ofs ;
1614
+ u64 round_update_size ;
1615
+ struct xe_sched_job * job ;
1616
+ struct xe_bb * bb ;
1617
+ u32 update_idx , pt_slot = 0 ;
1618
+ int err ;
1619
+
1620
+ if (npages * PAGE_SIZE > MAX_PREEMPTDISABLE_TRANSFER )
1621
+ return ERR_PTR (- EINVAL );
1622
+
1623
+ round_update_size = npages * PAGE_SIZE ;
1624
+ batch_size += pte_update_cmd_size (round_update_size );
1625
+ batch_size += EMIT_COPY_DW ;
1626
+
1627
+ bb = xe_bb_new (gt , batch_size , true);
1628
+ if (IS_ERR (bb )) {
1629
+ err = PTR_ERR (bb );
1630
+ return ERR_PTR (err );
1631
+ }
1632
+
1633
+ build_pt_update_batch_sram (m , bb , pt_slot * XE_PAGE_SIZE ,
1634
+ sram_addr , round_update_size );
1635
+
1636
+ if (dir == XE_MIGRATE_COPY_TO_VRAM ) {
1637
+ src_L0_ofs = xe_migrate_vm_addr (pt_slot , 0 );
1638
+ dst_L0_ofs = xe_migrate_vram_ofs (xe , vram_addr , false);
1639
+
1640
+ } else {
1641
+ src_L0_ofs = xe_migrate_vram_ofs (xe , vram_addr , false);
1642
+ dst_L0_ofs = xe_migrate_vm_addr (pt_slot , 0 );
1643
+ }
1644
+
1645
+ bb -> cs [bb -> len ++ ] = MI_BATCH_BUFFER_END ;
1646
+ update_idx = bb -> len ;
1647
+
1648
+ emit_copy (gt , bb , src_L0_ofs , dst_L0_ofs , round_update_size ,
1649
+ XE_PAGE_SIZE );
1650
+
1651
+ job = xe_bb_create_migration_job (m -> q , bb ,
1652
+ xe_migrate_batch_base (m , true),
1653
+ update_idx );
1654
+ if (IS_ERR (job )) {
1655
+ err = PTR_ERR (job );
1656
+ goto err ;
1657
+ }
1658
+
1659
+ xe_sched_job_add_migrate_flush (job , 0 );
1660
+
1661
+ mutex_lock (& m -> job_mutex );
1662
+ xe_sched_job_arm (job );
1663
+ fence = dma_fence_get (& job -> drm .s_fence -> finished );
1664
+ xe_sched_job_push (job );
1665
+
1666
+ dma_fence_put (m -> fence );
1667
+ m -> fence = dma_fence_get (fence );
1668
+ mutex_unlock (& m -> job_mutex );
1669
+
1670
+ xe_bb_free (bb , fence );
1671
+
1672
+ return fence ;
1673
+
1674
+ err :
1675
+ xe_bb_free (bb , NULL );
1676
+
1677
+ return ERR_PTR (err );
1678
+ }
1679
+
1680
+ /**
1681
+ * xe_migrate_to_vram() - Migrate to VRAM
1682
+ * @m: The migration context.
1683
+ * @npages: Number of pages to migrate.
1684
+ * @src_addr: Array of dma addresses (source of migrate)
1685
+ * @dst_addr: Device physical address of VRAM (destination of migrate)
1686
+ *
1687
+ * Copy from an array dma addresses to a VRAM device physical address
1688
+ *
1689
+ * Return: dma fence for migrate to signal completion on succees, ERR_PTR on
1690
+ * failure
1691
+ */
1692
+ struct dma_fence * xe_migrate_to_vram (struct xe_migrate * m ,
1693
+ unsigned long npages ,
1694
+ dma_addr_t * src_addr ,
1695
+ u64 dst_addr )
1696
+ {
1697
+ return xe_migrate_vram (m , npages , src_addr , dst_addr ,
1698
+ XE_MIGRATE_COPY_TO_VRAM );
1699
+ }
1700
+
1701
+ /**
1702
+ * xe_migrate_from_vram() - Migrate from VRAM
1703
+ * @m: The migration context.
1704
+ * @npages: Number of pages to migrate.
1705
+ * @src_addr: Device physical address of VRAM (source of migrate)
1706
+ * @dst_addr: Array of dma addresses (destination of migrate)
1707
+ *
1708
+ * Copy from a VRAM device physical address to an array dma addresses
1709
+ *
1710
+ * Return: dma fence for migrate to signal completion on succees, ERR_PTR on
1711
+ * failure
1712
+ */
1713
+ struct dma_fence * xe_migrate_from_vram (struct xe_migrate * m ,
1714
+ unsigned long npages ,
1715
+ u64 src_addr ,
1716
+ dma_addr_t * dst_addr )
1717
+ {
1718
+ return xe_migrate_vram (m , npages , dst_addr , src_addr ,
1719
+ XE_MIGRATE_COPY_TO_SRAM );
1720
+ }
1721
+
1547
1722
#if IS_ENABLED (CONFIG_DRM_XE_KUNIT_TEST )
1548
1723
#include "tests/xe_migrate.c"
1549
1724
#endif
0 commit comments