Skip to content

Commit 9c44fd5

Browse files
committed
drm/xe: Add migrate layer functions for SVM support
Add functions which migrate to / from VRAM accepting a single DPA argument (VRAM) and array of dma addresses (SRAM). Used for SVM migrations. v2: - Don't unlock job_mutex in error path of xe_migrate_vram v3: - Kernel doc (Thomas) - Better commit message (Thomas) - s/dword/num_dword (Thomas) - Return error on to large of migration (Thomas) Signed-off-by: Oak Zeng <[email protected]> Signed-off-by: Matthew Brost <[email protected]> Reviewed-by: Thomas Hellström <[email protected]> Link: https://patchwork.freedesktop.org/patch/msgid/[email protected]
1 parent 77613a2 commit 9c44fd5

File tree

2 files changed

+185
-0
lines changed

2 files changed

+185
-0
lines changed

drivers/gpu/drm/xe/xe_migrate.c

Lines changed: 175 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1544,6 +1544,181 @@ void xe_migrate_wait(struct xe_migrate *m)
15441544
dma_fence_wait(m->fence, false);
15451545
}
15461546

1547+
static u32 pte_update_cmd_size(u64 size)
1548+
{
1549+
u32 num_dword;
1550+
u64 entries = DIV_ROUND_UP(size, XE_PAGE_SIZE);
1551+
1552+
XE_WARN_ON(size > MAX_PREEMPTDISABLE_TRANSFER);
1553+
/*
1554+
* MI_STORE_DATA_IMM command is used to update page table. Each
1555+
* instruction can update maximumly 0x1ff pte entries. To update
1556+
* n (n <= 0x1ff) pte entries, we need:
1557+
* 1 dword for the MI_STORE_DATA_IMM command header (opcode etc)
1558+
* 2 dword for the page table's physical location
1559+
* 2*n dword for value of pte to fill (each pte entry is 2 dwords)
1560+
*/
1561+
num_dword = (1 + 2) * DIV_ROUND_UP(entries, 0x1ff);
1562+
num_dword += entries * 2;
1563+
1564+
return num_dword;
1565+
}
1566+
1567+
static void build_pt_update_batch_sram(struct xe_migrate *m,
1568+
struct xe_bb *bb, u32 pt_offset,
1569+
dma_addr_t *sram_addr, u32 size)
1570+
{
1571+
u16 pat_index = tile_to_xe(m->tile)->pat.idx[XE_CACHE_WB];
1572+
u32 ptes;
1573+
int i = 0;
1574+
1575+
ptes = DIV_ROUND_UP(size, XE_PAGE_SIZE);
1576+
while (ptes) {
1577+
u32 chunk = min(0x1ffU, ptes);
1578+
1579+
bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(chunk);
1580+
bb->cs[bb->len++] = pt_offset;
1581+
bb->cs[bb->len++] = 0;
1582+
1583+
pt_offset += chunk * 8;
1584+
ptes -= chunk;
1585+
1586+
while (chunk--) {
1587+
u64 addr = sram_addr[i++] & PAGE_MASK;
1588+
1589+
xe_tile_assert(m->tile, addr);
1590+
addr = m->q->vm->pt_ops->pte_encode_addr(m->tile->xe,
1591+
addr, pat_index,
1592+
0, false, 0);
1593+
bb->cs[bb->len++] = lower_32_bits(addr);
1594+
bb->cs[bb->len++] = upper_32_bits(addr);
1595+
}
1596+
}
1597+
}
1598+
1599+
enum xe_migrate_copy_dir {
1600+
XE_MIGRATE_COPY_TO_VRAM,
1601+
XE_MIGRATE_COPY_TO_SRAM,
1602+
};
1603+
1604+
static struct dma_fence *xe_migrate_vram(struct xe_migrate *m,
1605+
unsigned long npages,
1606+
dma_addr_t *sram_addr, u64 vram_addr,
1607+
const enum xe_migrate_copy_dir dir)
1608+
{
1609+
struct xe_gt *gt = m->tile->primary_gt;
1610+
struct xe_device *xe = gt_to_xe(gt);
1611+
struct dma_fence *fence = NULL;
1612+
u32 batch_size = 2;
1613+
u64 src_L0_ofs, dst_L0_ofs;
1614+
u64 round_update_size;
1615+
struct xe_sched_job *job;
1616+
struct xe_bb *bb;
1617+
u32 update_idx, pt_slot = 0;
1618+
int err;
1619+
1620+
if (npages * PAGE_SIZE > MAX_PREEMPTDISABLE_TRANSFER)
1621+
return ERR_PTR(-EINVAL);
1622+
1623+
round_update_size = npages * PAGE_SIZE;
1624+
batch_size += pte_update_cmd_size(round_update_size);
1625+
batch_size += EMIT_COPY_DW;
1626+
1627+
bb = xe_bb_new(gt, batch_size, true);
1628+
if (IS_ERR(bb)) {
1629+
err = PTR_ERR(bb);
1630+
return ERR_PTR(err);
1631+
}
1632+
1633+
build_pt_update_batch_sram(m, bb, pt_slot * XE_PAGE_SIZE,
1634+
sram_addr, round_update_size);
1635+
1636+
if (dir == XE_MIGRATE_COPY_TO_VRAM) {
1637+
src_L0_ofs = xe_migrate_vm_addr(pt_slot, 0);
1638+
dst_L0_ofs = xe_migrate_vram_ofs(xe, vram_addr, false);
1639+
1640+
} else {
1641+
src_L0_ofs = xe_migrate_vram_ofs(xe, vram_addr, false);
1642+
dst_L0_ofs = xe_migrate_vm_addr(pt_slot, 0);
1643+
}
1644+
1645+
bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
1646+
update_idx = bb->len;
1647+
1648+
emit_copy(gt, bb, src_L0_ofs, dst_L0_ofs, round_update_size,
1649+
XE_PAGE_SIZE);
1650+
1651+
job = xe_bb_create_migration_job(m->q, bb,
1652+
xe_migrate_batch_base(m, true),
1653+
update_idx);
1654+
if (IS_ERR(job)) {
1655+
err = PTR_ERR(job);
1656+
goto err;
1657+
}
1658+
1659+
xe_sched_job_add_migrate_flush(job, 0);
1660+
1661+
mutex_lock(&m->job_mutex);
1662+
xe_sched_job_arm(job);
1663+
fence = dma_fence_get(&job->drm.s_fence->finished);
1664+
xe_sched_job_push(job);
1665+
1666+
dma_fence_put(m->fence);
1667+
m->fence = dma_fence_get(fence);
1668+
mutex_unlock(&m->job_mutex);
1669+
1670+
xe_bb_free(bb, fence);
1671+
1672+
return fence;
1673+
1674+
err:
1675+
xe_bb_free(bb, NULL);
1676+
1677+
return ERR_PTR(err);
1678+
}
1679+
1680+
/**
1681+
* xe_migrate_to_vram() - Migrate to VRAM
1682+
* @m: The migration context.
1683+
* @npages: Number of pages to migrate.
1684+
* @src_addr: Array of dma addresses (source of migrate)
1685+
* @dst_addr: Device physical address of VRAM (destination of migrate)
1686+
*
1687+
* Copy from an array dma addresses to a VRAM device physical address
1688+
*
1689+
* Return: dma fence for migrate to signal completion on succees, ERR_PTR on
1690+
* failure
1691+
*/
1692+
struct dma_fence *xe_migrate_to_vram(struct xe_migrate *m,
1693+
unsigned long npages,
1694+
dma_addr_t *src_addr,
1695+
u64 dst_addr)
1696+
{
1697+
return xe_migrate_vram(m, npages, src_addr, dst_addr,
1698+
XE_MIGRATE_COPY_TO_VRAM);
1699+
}
1700+
1701+
/**
1702+
* xe_migrate_from_vram() - Migrate from VRAM
1703+
* @m: The migration context.
1704+
* @npages: Number of pages to migrate.
1705+
* @src_addr: Device physical address of VRAM (source of migrate)
1706+
* @dst_addr: Array of dma addresses (destination of migrate)
1707+
*
1708+
* Copy from a VRAM device physical address to an array dma addresses
1709+
*
1710+
* Return: dma fence for migrate to signal completion on succees, ERR_PTR on
1711+
* failure
1712+
*/
1713+
struct dma_fence *xe_migrate_from_vram(struct xe_migrate *m,
1714+
unsigned long npages,
1715+
u64 src_addr,
1716+
dma_addr_t *dst_addr)
1717+
{
1718+
return xe_migrate_vram(m, npages, dst_addr, src_addr,
1719+
XE_MIGRATE_COPY_TO_SRAM);
1720+
}
1721+
15471722
#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
15481723
#include "tests/xe_migrate.c"
15491724
#endif

drivers/gpu/drm/xe/xe_migrate.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,16 @@ struct xe_migrate_pt_update {
9595

9696
struct xe_migrate *xe_migrate_init(struct xe_tile *tile);
9797

98+
struct dma_fence *xe_migrate_to_vram(struct xe_migrate *m,
99+
unsigned long npages,
100+
dma_addr_t *src_addr,
101+
u64 dst_addr);
102+
103+
struct dma_fence *xe_migrate_from_vram(struct xe_migrate *m,
104+
unsigned long npages,
105+
u64 src_addr,
106+
dma_addr_t *dst_addr);
107+
98108
struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
99109
struct xe_bo *src_bo,
100110
struct xe_bo *dst_bo,

0 commit comments

Comments
 (0)