Skip to content

Commit e1bed9a

Browse files
yishaihrleon
authored andcommitted
RDMA/mlx5: Add DMAH support for reg_user_mr/reg_user_dmabuf_mr
As part of this enhancement, allow the creation of an MKEY associated with a DMA handle. Additional notes: MKEYs with TPH (i.e. TLP Processing Hints) attributes are currently not UMR-capable unless explicitly enabled by firmware or hardware. Therefore, to maintain such MKEYs in the MR cache, the TPH fields have been added to the rb_key structure, with a dedicated hash bucket. The ability to bypass the kernel verbs flow and create an MKEY with TPH attributes using DEVX has been restricted. TPH must follow the standard InfiniBand flow, where a DMAH is created with the appropriate security checks and management mechanisms in place. DMA handles are currently not supported in conjunction with On-Demand Paging (ODP). Re-registration of memory regions originally created with TPH attributes is currently not supported. Signed-off-by: Yishai Hadas <[email protected]> Reviewed-by: Edward Srouji <[email protected]> Link: https://patch.msgid.link/1c485651cf8417694ddebb80446c5093d5a791a9.1752752567.git.leon@kernel.org Signed-off-by: Leon Romanovsky <[email protected]>
1 parent a272019 commit e1bed9a

File tree

4 files changed

+94
-20
lines changed

4 files changed

+94
-20
lines changed

drivers/infiniband/hw/mlx5/devx.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1393,6 +1393,10 @@ static int devx_handle_mkey_create(struct mlx5_ib_dev *dev,
13931393
}
13941394

13951395
MLX5_SET(create_mkey_in, in, mkey_umem_valid, 1);
1396+
/* TPH is not allowed to bypass the regular kernel's verbs flow */
1397+
MLX5_SET(mkc, mkc, pcie_tph_en, 0);
1398+
MLX5_SET(mkc, mkc, pcie_tph_steering_tag_index,
1399+
MLX5_MKC_PCIE_TPH_NO_STEERING_TAG_INDEX);
13961400
return 0;
13971401
}
13981402

drivers/infiniband/hw/mlx5/mlx5_ib.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -638,8 +638,13 @@ enum mlx5_mkey_type {
638638
MLX5_MKEY_IMPLICIT_CHILD,
639639
};
640640

641+
/* Used for non-existent ph value */
642+
#define MLX5_IB_NO_PH 0xff
643+
641644
struct mlx5r_cache_rb_key {
642645
u8 ats:1;
646+
u8 ph;
647+
u16 st_index;
643648
unsigned int access_mode;
644649
unsigned int access_flags;
645650
unsigned int ndescs;

drivers/infiniband/hw/mlx5/mr.c

Lines changed: 84 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
#include "mlx5_ib.h"
4545
#include "umr.h"
4646
#include "data_direct.h"
47+
#include "dmah.h"
4748

4849
enum {
4950
MAX_PENDING_REG_MR = 8,
@@ -57,7 +58,7 @@ create_mkey_callback(int status, struct mlx5_async_work *context);
5758
static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem,
5859
u64 iova, int access_flags,
5960
unsigned long page_size, bool populate,
60-
int access_mode);
61+
int access_mode, u16 st_index, u8 ph);
6162
static int __mlx5_ib_dereg_mr(struct ib_mr *ibmr);
6263

6364
static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr,
@@ -256,6 +257,14 @@ static void set_cache_mkc(struct mlx5_cache_ent *ent, void *mkc)
256257
get_mkc_octo_size(ent->rb_key.access_mode,
257258
ent->rb_key.ndescs));
258259
MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT);
260+
261+
if (ent->rb_key.ph != MLX5_IB_NO_PH) {
262+
MLX5_SET(mkc, mkc, pcie_tph_en, 1);
263+
MLX5_SET(mkc, mkc, pcie_tph_ph, ent->rb_key.ph);
264+
if (ent->rb_key.st_index != MLX5_MKC_PCIE_TPH_NO_STEERING_TAG_INDEX)
265+
MLX5_SET(mkc, mkc, pcie_tph_steering_tag_index,
266+
ent->rb_key.st_index);
267+
}
259268
}
260269

261270
/* Asynchronously schedule new MRs to be populated in the cache. */
@@ -641,6 +650,14 @@ static int cache_ent_key_cmp(struct mlx5r_cache_rb_key key1,
641650
if (res)
642651
return res;
643652

653+
res = key1.st_index - key2.st_index;
654+
if (res)
655+
return res;
656+
657+
res = key1.ph - key2.ph;
658+
if (res)
659+
return res;
660+
644661
/*
645662
* keep ndescs the last in the compare table since the find function
646663
* searches for an exact match on all properties and only closest
@@ -712,6 +729,8 @@ mkey_cache_ent_from_rb_key(struct mlx5_ib_dev *dev,
712729
smallest->rb_key.access_mode == rb_key.access_mode &&
713730
smallest->rb_key.access_flags == rb_key.access_flags &&
714731
smallest->rb_key.ats == rb_key.ats &&
732+
smallest->rb_key.st_index == rb_key.st_index &&
733+
smallest->rb_key.ph == rb_key.ph &&
715734
smallest->rb_key.ndescs <= ndescs_limit) ?
716735
smallest :
717736
NULL;
@@ -786,7 +805,8 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
786805
struct mlx5r_cache_rb_key rb_key = {
787806
.ndescs = ndescs,
788807
.access_mode = access_mode,
789-
.access_flags = get_unchangeable_access_flags(dev, access_flags)
808+
.access_flags = get_unchangeable_access_flags(dev, access_flags),
809+
.ph = MLX5_IB_NO_PH,
790810
};
791811
struct mlx5_cache_ent *ent = mkey_cache_ent_from_rb_key(dev, rb_key);
792812

@@ -943,6 +963,7 @@ int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev)
943963
struct rb_root *root = &dev->cache.rb_root;
944964
struct mlx5r_cache_rb_key rb_key = {
945965
.access_mode = MLX5_MKC_ACCESS_MODE_MTT,
966+
.ph = MLX5_IB_NO_PH,
946967
};
947968
struct mlx5_cache_ent *ent;
948969
struct rb_node *node;
@@ -1119,7 +1140,8 @@ static unsigned int mlx5_umem_dmabuf_default_pgsz(struct ib_umem *umem,
11191140

11201141
static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
11211142
struct ib_umem *umem, u64 iova,
1122-
int access_flags, int access_mode)
1143+
int access_flags, int access_mode,
1144+
u16 st_index, u8 ph)
11231145
{
11241146
struct mlx5_ib_dev *dev = to_mdev(pd->device);
11251147
struct mlx5r_cache_rb_key rb_key = {};
@@ -1139,14 +1161,17 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
11391161
rb_key.ndescs = ib_umem_num_dma_blocks(umem, page_size);
11401162
rb_key.ats = mlx5_umem_needs_ats(dev, umem, access_flags);
11411163
rb_key.access_flags = get_unchangeable_access_flags(dev, access_flags);
1164+
rb_key.st_index = st_index;
1165+
rb_key.ph = ph;
11421166
ent = mkey_cache_ent_from_rb_key(dev, rb_key);
11431167
/*
11441168
* If the MR can't come from the cache then synchronously create an uncached
11451169
* one.
11461170
*/
11471171
if (!ent) {
11481172
mutex_lock(&dev->slow_path_mutex);
1149-
mr = reg_create(pd, umem, iova, access_flags, page_size, false, access_mode);
1173+
mr = reg_create(pd, umem, iova, access_flags, page_size, false, access_mode,
1174+
st_index, ph);
11501175
mutex_unlock(&dev->slow_path_mutex);
11511176
if (IS_ERR(mr))
11521177
return mr;
@@ -1231,7 +1256,7 @@ reg_create_crossing_vhca_mr(struct ib_pd *pd, u64 iova, u64 length, int access_f
12311256
static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem,
12321257
u64 iova, int access_flags,
12331258
unsigned long page_size, bool populate,
1234-
int access_mode)
1259+
int access_mode, u16 st_index, u8 ph)
12351260
{
12361261
struct mlx5_ib_dev *dev = to_mdev(pd->device);
12371262
struct mlx5_ib_mr *mr;
@@ -1241,7 +1266,8 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem,
12411266
u32 *in;
12421267
int err;
12431268
bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg)) &&
1244-
(access_mode == MLX5_MKC_ACCESS_MODE_MTT);
1269+
(access_mode == MLX5_MKC_ACCESS_MODE_MTT) &&
1270+
(ph == MLX5_IB_NO_PH);
12451271
bool ksm_mode = (access_mode == MLX5_MKC_ACCESS_MODE_KSM);
12461272

12471273
if (!page_size)
@@ -1305,6 +1331,13 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem,
13051331
get_octo_len(iova, umem->length, mr->page_shift));
13061332
}
13071333

1334+
if (ph != MLX5_IB_NO_PH) {
1335+
MLX5_SET(mkc, mkc, pcie_tph_en, 1);
1336+
MLX5_SET(mkc, mkc, pcie_tph_ph, ph);
1337+
if (st_index != MLX5_MKC_PCIE_TPH_NO_STEERING_TAG_INDEX)
1338+
MLX5_SET(mkc, mkc, pcie_tph_steering_tag_index, st_index);
1339+
}
1340+
13081341
err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen);
13091342
if (err) {
13101343
mlx5_ib_warn(dev, "create mkey failed\n");
@@ -1424,24 +1457,37 @@ struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm,
14241457
}
14251458

14261459
static struct ib_mr *create_real_mr(struct ib_pd *pd, struct ib_umem *umem,
1427-
u64 iova, int access_flags)
1460+
u64 iova, int access_flags,
1461+
struct ib_dmah *dmah)
14281462
{
14291463
struct mlx5_ib_dev *dev = to_mdev(pd->device);
14301464
struct mlx5_ib_mr *mr = NULL;
14311465
bool xlt_with_umr;
1466+
u16 st_index = MLX5_MKC_PCIE_TPH_NO_STEERING_TAG_INDEX;
1467+
u8 ph = MLX5_IB_NO_PH;
14321468
int err;
14331469

1470+
if (dmah) {
1471+
struct mlx5_ib_dmah *mdmah = to_mdmah(dmah);
1472+
1473+
ph = dmah->ph;
1474+
if (dmah->valid_fields & BIT(IB_DMAH_CPU_ID_EXISTS))
1475+
st_index = mdmah->st_index;
1476+
}
1477+
14341478
xlt_with_umr = mlx5r_umr_can_load_pas(dev, umem->length);
14351479
if (xlt_with_umr) {
14361480
mr = alloc_cacheable_mr(pd, umem, iova, access_flags,
1437-
MLX5_MKC_ACCESS_MODE_MTT);
1481+
MLX5_MKC_ACCESS_MODE_MTT,
1482+
st_index, ph);
14381483
} else {
14391484
unsigned long page_size = mlx5_umem_mkc_find_best_pgsz(
14401485
dev, umem, iova, MLX5_MKC_ACCESS_MODE_MTT);
14411486

14421487
mutex_lock(&dev->slow_path_mutex);
14431488
mr = reg_create(pd, umem, iova, access_flags, page_size,
1444-
true, MLX5_MKC_ACCESS_MODE_MTT);
1489+
true, MLX5_MKC_ACCESS_MODE_MTT,
1490+
st_index, ph);
14451491
mutex_unlock(&dev->slow_path_mutex);
14461492
}
14471493
if (IS_ERR(mr)) {
@@ -1505,7 +1551,9 @@ static struct ib_mr *create_user_odp_mr(struct ib_pd *pd, u64 start, u64 length,
15051551
return ERR_CAST(odp);
15061552

15071553
mr = alloc_cacheable_mr(pd, &odp->umem, iova, access_flags,
1508-
MLX5_MKC_ACCESS_MODE_MTT);
1554+
MLX5_MKC_ACCESS_MODE_MTT,
1555+
MLX5_MKC_PCIE_TPH_NO_STEERING_TAG_INDEX,
1556+
MLX5_IB_NO_PH);
15091557
if (IS_ERR(mr)) {
15101558
ib_umem_release(&odp->umem);
15111559
return ERR_CAST(mr);
@@ -1536,7 +1584,8 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
15361584
struct ib_umem *umem;
15371585
int err;
15381586

1539-
if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM) || dmah)
1587+
if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM) ||
1588+
((access_flags & IB_ACCESS_ON_DEMAND) && dmah))
15401589
return ERR_PTR(-EOPNOTSUPP);
15411590

15421591
mlx5_ib_dbg(dev, "start 0x%llx, iova 0x%llx, length 0x%llx, access_flags 0x%x\n",
@@ -1552,7 +1601,7 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
15521601
umem = ib_umem_get(&dev->ib_dev, start, length, access_flags);
15531602
if (IS_ERR(umem))
15541603
return ERR_CAST(umem);
1555-
return create_real_mr(pd, umem, iova, access_flags);
1604+
return create_real_mr(pd, umem, iova, access_flags, dmah);
15561605
}
15571606

15581607
static void mlx5_ib_dmabuf_invalidate_cb(struct dma_buf_attachment *attach)
@@ -1577,12 +1626,15 @@ static struct dma_buf_attach_ops mlx5_ib_dmabuf_attach_ops = {
15771626
static struct ib_mr *
15781627
reg_user_mr_dmabuf(struct ib_pd *pd, struct device *dma_device,
15791628
u64 offset, u64 length, u64 virt_addr,
1580-
int fd, int access_flags, int access_mode)
1629+
int fd, int access_flags, int access_mode,
1630+
struct ib_dmah *dmah)
15811631
{
15821632
bool pinned_mode = (access_mode == MLX5_MKC_ACCESS_MODE_KSM);
15831633
struct mlx5_ib_dev *dev = to_mdev(pd->device);
15841634
struct mlx5_ib_mr *mr = NULL;
15851635
struct ib_umem_dmabuf *umem_dmabuf;
1636+
u16 st_index = MLX5_MKC_PCIE_TPH_NO_STEERING_TAG_INDEX;
1637+
u8 ph = MLX5_IB_NO_PH;
15861638
int err;
15871639

15881640
err = mlx5r_umr_resource_init(dev);
@@ -1605,8 +1657,17 @@ reg_user_mr_dmabuf(struct ib_pd *pd, struct device *dma_device,
16051657
return ERR_CAST(umem_dmabuf);
16061658
}
16071659

1660+
if (dmah) {
1661+
struct mlx5_ib_dmah *mdmah = to_mdmah(dmah);
1662+
1663+
ph = dmah->ph;
1664+
if (dmah->valid_fields & BIT(IB_DMAH_CPU_ID_EXISTS))
1665+
st_index = mdmah->st_index;
1666+
}
1667+
16081668
mr = alloc_cacheable_mr(pd, &umem_dmabuf->umem, virt_addr,
1609-
access_flags, access_mode);
1669+
access_flags, access_mode,
1670+
st_index, ph);
16101671
if (IS_ERR(mr)) {
16111672
ib_umem_release(&umem_dmabuf->umem);
16121673
return ERR_CAST(mr);
@@ -1663,7 +1724,8 @@ reg_user_mr_dmabuf_by_data_direct(struct ib_pd *pd, u64 offset,
16631724
access_flags &= ~IB_ACCESS_RELAXED_ORDERING;
16641725
crossed_mr = reg_user_mr_dmabuf(pd, &data_direct_dev->pdev->dev,
16651726
offset, length, virt_addr, fd,
1666-
access_flags, MLX5_MKC_ACCESS_MODE_KSM);
1727+
access_flags, MLX5_MKC_ACCESS_MODE_KSM,
1728+
NULL);
16671729
if (IS_ERR(crossed_mr)) {
16681730
ret = PTR_ERR(crossed_mr);
16691731
goto end;
@@ -1698,7 +1760,7 @@ struct ib_mr *mlx5_ib_reg_user_mr_dmabuf(struct ib_pd *pd, u64 offset,
16981760
int err;
16991761

17001762
if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM) ||
1701-
!IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) || dmah)
1763+
!IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
17021764
return ERR_PTR(-EOPNOTSUPP);
17031765

17041766
if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_REG_DMABUF_MR_ACCESS_FLAGS)) {
@@ -1723,7 +1785,8 @@ struct ib_mr *mlx5_ib_reg_user_mr_dmabuf(struct ib_pd *pd, u64 offset,
17231785

17241786
return reg_user_mr_dmabuf(pd, pd->device->dma_device,
17251787
offset, length, virt_addr,
1726-
fd, access_flags, MLX5_MKC_ACCESS_MODE_MTT);
1788+
fd, access_flags, MLX5_MKC_ACCESS_MODE_MTT,
1789+
dmah);
17271790
}
17281791

17291792
/*
@@ -1821,7 +1884,8 @@ struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
18211884
struct mlx5_ib_mr *mr = to_mmr(ib_mr);
18221885
int err;
18231886

1824-
if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM) || mr->data_direct)
1887+
if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM) || mr->data_direct ||
1888+
mr->mmkey.rb_key.ph != MLX5_IB_NO_PH)
18251889
return ERR_PTR(-EOPNOTSUPP);
18261890

18271891
mlx5_ib_dbg(
@@ -1865,7 +1929,7 @@ struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
18651929
atomic_sub(ib_umem_num_pages(umem), &dev->mdev->priv.reg_pages);
18661930

18671931
return create_real_mr(new_pd, umem, mr->ibmr.iova,
1868-
new_access_flags);
1932+
new_access_flags, NULL);
18691933
}
18701934

18711935
/*
@@ -1896,7 +1960,7 @@ struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
18961960
}
18971961
return NULL;
18981962
}
1899-
return create_real_mr(new_pd, new_umem, iova, new_access_flags);
1963+
return create_real_mr(new_pd, new_umem, iova, new_access_flags, NULL);
19001964
}
19011965

19021966
/*

drivers/infiniband/hw/mlx5/odp.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1883,6 +1883,7 @@ int mlx5_odp_init_mkey_cache(struct mlx5_ib_dev *dev)
18831883
struct mlx5r_cache_rb_key rb_key = {
18841884
.access_mode = MLX5_MKC_ACCESS_MODE_KSM,
18851885
.ndescs = mlx5_imr_ksm_entries,
1886+
.ph = MLX5_IB_NO_PH,
18861887
};
18871888
struct mlx5_cache_ent *ent;
18881889

0 commit comments

Comments
 (0)