Skip to content

Commit 6827738

Browse files
nikhilpraofenrus75
authored andcommitted
dmaengine: idxd: add a write() method for applications to submit work
After the patch to restrict the use of mmap() to CAP_SYS_RAWIO for the currently existing devices, most applications can no longer make use of the accelerators as in production "you don't run things as root". To keep the DSA and IAA accelerators usable, hook up a write() method so that applications can still submit work. In the write method, sufficient input validation is performed to avoid the security issue that required the mmap CAP_SYS_RAWIO check. One complication is that the DSA device allows for indirect ("batched") descriptors. There is no reasonable way to do the input validation on these indirect descriptors so the write() method will not allow these to be submitted to the hardware on affected hardware, and the sysfs enumeration of support for the opcode is also removed. Early performance data shows that the performance delta for most common cases is within the noise. Signed-off-by: Nikhil Rao <[email protected]> Signed-off-by: Arjan van de Ven <[email protected]>
1 parent e11452e commit 6827738

File tree

2 files changed

+90
-2
lines changed

2 files changed

+90
-2
lines changed

drivers/dma/idxd/cdev.c

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -426,6 +426,70 @@ static int idxd_cdev_mmap(struct file *filp, struct vm_area_struct *vma)
426426
vma->vm_page_prot);
427427
}
428428

429+
static int idxd_submit_user_descriptor(struct idxd_user_context *ctx,
430+
struct dsa_hw_desc __user *udesc)
431+
{
432+
struct idxd_wq *wq = ctx->wq;
433+
struct idxd_dev *idxd_dev = &wq->idxd->idxd_dev;
434+
const uint64_t comp_addr_align = is_dsa_dev(idxd_dev) ? 0x20 : 0x40;
435+
void __iomem *portal = idxd_wq_portal_addr(wq);
436+
struct dsa_hw_desc descriptor __aligned(64);
437+
int rc;
438+
439+
rc = copy_from_user(&descriptor, udesc, sizeof(descriptor));
440+
if (rc)
441+
return -EFAULT;
442+
443+
/*
444+
* DSA devices are capable of indirect ("batch") command submission.
445+
* On devices where direct user submissions are not safe, we cannot
446+
* allow this since there is no good way for us to verify these
447+
* indirect commands.
448+
*/
449+
if (is_dsa_dev(idxd_dev) && descriptor.opcode == DSA_OPCODE_BATCH &&
450+
!wq->idxd->user_submission_safe)
451+
return -EINVAL;
452+
/*
453+
* As per the programming specification, the completion address must be
454+
* aligned to 32 or 64 bytes. If this is violated the hardware
455+
* engine can get very confused (security issue).
456+
*/
457+
if (!IS_ALIGNED(descriptor.completion_addr, comp_addr_align))
458+
return -EINVAL;
459+
460+
if (wq_dedicated(wq))
461+
iosubmit_cmds512(portal, &descriptor, 1);
462+
else {
463+
descriptor.priv = 0;
464+
descriptor.pasid = ctx->pasid;
465+
rc = idxd_enqcmds(wq, portal, &descriptor);
466+
if (rc < 0)
467+
return rc;
468+
}
469+
470+
return 0;
471+
}
472+
473+
static ssize_t idxd_cdev_write(struct file *filp, const char __user *buf, size_t len,
474+
loff_t *unused)
475+
{
476+
struct dsa_hw_desc __user *udesc = (struct dsa_hw_desc __user *)buf;
477+
struct idxd_user_context *ctx = filp->private_data;
478+
ssize_t written = 0;
479+
int i;
480+
481+
for (i = 0; i < len/sizeof(struct dsa_hw_desc); i++) {
482+
int rc = idxd_submit_user_descriptor(ctx, udesc + i);
483+
484+
if (rc)
485+
return written ? written : rc;
486+
487+
written += sizeof(struct dsa_hw_desc);
488+
}
489+
490+
return written;
491+
}
492+
429493
static __poll_t idxd_cdev_poll(struct file *filp,
430494
struct poll_table_struct *wait)
431495
{
@@ -448,6 +512,7 @@ static const struct file_operations idxd_cdev_fops = {
448512
.open = idxd_cdev_open,
449513
.release = idxd_cdev_release,
450514
.mmap = idxd_cdev_mmap,
515+
.write = idxd_cdev_write,
451516
.poll = idxd_cdev_poll,
452517
};
453518

drivers/dma/idxd/sysfs.c

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1197,12 +1197,35 @@ static ssize_t wq_enqcmds_retries_store(struct device *dev, struct device_attrib
11971197
static struct device_attribute dev_attr_wq_enqcmds_retries =
11981198
__ATTR(enqcmds_retries, 0644, wq_enqcmds_retries_show, wq_enqcmds_retries_store);
11991199

1200+
static ssize_t op_cap_show_common(struct device *dev, char *buf, unsigned long *opcap_bmap)
1201+
{
1202+
ssize_t pos;
1203+
int i;
1204+
1205+
pos = 0;
1206+
for (i = IDXD_MAX_OPCAP_BITS/64 - 1; i >= 0; i--) {
1207+
unsigned long val = opcap_bmap[i];
1208+
1209+
/* On systems where direct user submissions are not safe, we need to clear out
1210+
* the BATCH capability from the capability mask in sysfs since we cannot support
1211+
* that command on such systems.
1212+
*/
1213+
if (i == DSA_OPCODE_BATCH/64 && !confdev_to_idxd(dev)->user_submission_safe)
1214+
clear_bit(DSA_OPCODE_BATCH % 64, &val);
1215+
1216+
pos += sysfs_emit_at(buf, pos, "%*pb", 64, &val);
1217+
pos += sysfs_emit_at(buf, pos, "%c", i == 0 ? '\n' : ',');
1218+
}
1219+
1220+
return pos;
1221+
}
1222+
12001223
static ssize_t wq_op_config_show(struct device *dev,
12011224
struct device_attribute *attr, char *buf)
12021225
{
12031226
struct idxd_wq *wq = confdev_to_wq(dev);
12041227

1205-
return sysfs_emit(buf, "%*pb\n", IDXD_MAX_OPCAP_BITS, wq->opcap_bmap);
1228+
return op_cap_show_common(dev, buf, wq->opcap_bmap);
12061229
}
12071230

12081231
static int idxd_verify_supported_opcap(struct idxd_device *idxd, unsigned long *opmask)
@@ -1455,7 +1478,7 @@ static ssize_t op_cap_show(struct device *dev,
14551478
{
14561479
struct idxd_device *idxd = confdev_to_idxd(dev);
14571480

1458-
return sysfs_emit(buf, "%*pb\n", IDXD_MAX_OPCAP_BITS, idxd->opcap_bmap);
1481+
return op_cap_show_common(dev, buf, idxd->opcap_bmap);
14591482
}
14601483
static DEVICE_ATTR_RO(op_cap);
14611484

0 commit comments

Comments
 (0)