Skip to content

Commit 24fe962

Browse files
bsberndMiklos Szeredi
authored andcommitted
fuse: {io-uring} Handle SQEs - register commands
This adds basic support for ring SQEs (with opcode=IORING_OP_URING_CMD). For now only FUSE_IO_URING_CMD_REGISTER is handled to register queue entries. Signed-off-by: Bernd Schubert <[email protected]> Reviewed-by: Pavel Begunkov <[email protected]> # io_uring Reviewed-by: Luis Henriques <[email protected]> Signed-off-by: Miklos Szeredi <[email protected]>
1 parent 7ccd86b commit 24fe962

File tree

7 files changed

+542
-1
lines changed

7 files changed

+542
-1
lines changed

fs/fuse/Kconfig

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,3 +63,15 @@ config FUSE_PASSTHROUGH
6363
to be performed directly on a backing file.
6464

6565
If you want to allow passthrough operations, answer Y.
66+
67+
config FUSE_IO_URING
68+
bool "FUSE communication over io-uring"
69+
default y
70+
depends on FUSE_FS
71+
depends on IO_URING
72+
help
73+
This allows sending FUSE requests over the io-uring interface and
74+
also adds request core affinity.
75+
76+
If you want to allow fuse server/client communication through io-uring,
77+
answer Y

fs/fuse/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,5 +15,6 @@ fuse-y += iomode.o
1515
fuse-$(CONFIG_FUSE_DAX) += dax.o
1616
fuse-$(CONFIG_FUSE_PASSTHROUGH) += passthrough.o
1717
fuse-$(CONFIG_SYSCTL) += sysctl.o
18+
fuse-$(CONFIG_FUSE_IO_URING) += dev_uring.o
1819

1920
virtiofs-y := virtio_fs.o

fs/fuse/dev_uring.c

Lines changed: 326 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,326 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
/*
3+
* FUSE: Filesystem in Userspace
4+
* Copyright (c) 2023-2024 DataDirect Networks.
5+
*/
6+
7+
#include "fuse_i.h"
8+
#include "dev_uring_i.h"
9+
#include "fuse_dev_i.h"
10+
11+
#include <linux/fs.h>
12+
#include <linux/io_uring/cmd.h>
13+
14+
static bool __read_mostly enable_uring;
15+
module_param(enable_uring, bool, 0644);
16+
MODULE_PARM_DESC(enable_uring,
17+
"Enable userspace communication through io-uring");
18+
19+
#define FUSE_URING_IOV_SEGS 2 /* header and payload */
20+
21+
22+
bool fuse_uring_enabled(void)
23+
{
24+
return enable_uring;
25+
}
26+
27+
void fuse_uring_destruct(struct fuse_conn *fc)
28+
{
29+
struct fuse_ring *ring = fc->ring;
30+
int qid;
31+
32+
if (!ring)
33+
return;
34+
35+
for (qid = 0; qid < ring->nr_queues; qid++) {
36+
struct fuse_ring_queue *queue = ring->queues[qid];
37+
38+
if (!queue)
39+
continue;
40+
41+
WARN_ON(!list_empty(&queue->ent_avail_queue));
42+
WARN_ON(!list_empty(&queue->ent_commit_queue));
43+
44+
kfree(queue);
45+
ring->queues[qid] = NULL;
46+
}
47+
48+
kfree(ring->queues);
49+
kfree(ring);
50+
fc->ring = NULL;
51+
}
52+
53+
/*
54+
* Basic ring setup for this connection based on the provided configuration
55+
*/
56+
static struct fuse_ring *fuse_uring_create(struct fuse_conn *fc)
57+
{
58+
struct fuse_ring *ring;
59+
size_t nr_queues = num_possible_cpus();
60+
struct fuse_ring *res = NULL;
61+
size_t max_payload_size;
62+
63+
ring = kzalloc(sizeof(*fc->ring), GFP_KERNEL_ACCOUNT);
64+
if (!ring)
65+
return NULL;
66+
67+
ring->queues = kcalloc(nr_queues, sizeof(struct fuse_ring_queue *),
68+
GFP_KERNEL_ACCOUNT);
69+
if (!ring->queues)
70+
goto out_err;
71+
72+
max_payload_size = max(FUSE_MIN_READ_BUFFER, fc->max_write);
73+
max_payload_size = max(max_payload_size, fc->max_pages * PAGE_SIZE);
74+
75+
spin_lock(&fc->lock);
76+
if (fc->ring) {
77+
/* race, another thread created the ring in the meantime */
78+
spin_unlock(&fc->lock);
79+
res = fc->ring;
80+
goto out_err;
81+
}
82+
83+
fc->ring = ring;
84+
ring->nr_queues = nr_queues;
85+
ring->fc = fc;
86+
ring->max_payload_sz = max_payload_size;
87+
88+
spin_unlock(&fc->lock);
89+
return ring;
90+
91+
out_err:
92+
kfree(ring->queues);
93+
kfree(ring);
94+
return res;
95+
}
96+
97+
static struct fuse_ring_queue *fuse_uring_create_queue(struct fuse_ring *ring,
98+
int qid)
99+
{
100+
struct fuse_conn *fc = ring->fc;
101+
struct fuse_ring_queue *queue;
102+
103+
queue = kzalloc(sizeof(*queue), GFP_KERNEL_ACCOUNT);
104+
if (!queue)
105+
return NULL;
106+
queue->qid = qid;
107+
queue->ring = ring;
108+
spin_lock_init(&queue->lock);
109+
110+
INIT_LIST_HEAD(&queue->ent_avail_queue);
111+
INIT_LIST_HEAD(&queue->ent_commit_queue);
112+
113+
spin_lock(&fc->lock);
114+
if (ring->queues[qid]) {
115+
spin_unlock(&fc->lock);
116+
kfree(queue);
117+
return ring->queues[qid];
118+
}
119+
120+
/*
121+
* write_once and lock as the caller mostly doesn't take the lock at all
122+
*/
123+
WRITE_ONCE(ring->queues[qid], queue);
124+
spin_unlock(&fc->lock);
125+
126+
return queue;
127+
}
128+
129+
/*
130+
* Make a ring entry available for fuse_req assignment
131+
*/
132+
static void fuse_uring_ent_avail(struct fuse_ring_ent *ent,
133+
struct fuse_ring_queue *queue)
134+
{
135+
WARN_ON_ONCE(!ent->cmd);
136+
list_move(&ent->list, &queue->ent_avail_queue);
137+
ent->state = FRRS_AVAILABLE;
138+
}
139+
140+
/*
141+
* fuse_uring_req_fetch command handling
142+
*/
143+
static void fuse_uring_do_register(struct fuse_ring_ent *ent,
144+
struct io_uring_cmd *cmd,
145+
unsigned int issue_flags)
146+
{
147+
struct fuse_ring_queue *queue = ent->queue;
148+
149+
spin_lock(&queue->lock);
150+
ent->cmd = cmd;
151+
fuse_uring_ent_avail(ent, queue);
152+
spin_unlock(&queue->lock);
153+
}
154+
155+
/*
156+
* sqe->addr is a ptr to an iovec array, iov[0] has the headers, iov[1]
157+
* the payload
158+
*/
159+
static int fuse_uring_get_iovec_from_sqe(const struct io_uring_sqe *sqe,
160+
struct iovec iov[FUSE_URING_IOV_SEGS])
161+
{
162+
struct iovec __user *uiov = u64_to_user_ptr(READ_ONCE(sqe->addr));
163+
struct iov_iter iter;
164+
ssize_t ret;
165+
166+
if (sqe->len != FUSE_URING_IOV_SEGS)
167+
return -EINVAL;
168+
169+
/*
170+
* Direction for buffer access will actually be READ and WRITE,
171+
* using write for the import should include READ access as well.
172+
*/
173+
ret = import_iovec(WRITE, uiov, FUSE_URING_IOV_SEGS,
174+
FUSE_URING_IOV_SEGS, &iov, &iter);
175+
if (ret < 0)
176+
return ret;
177+
178+
return 0;
179+
}
180+
181+
static struct fuse_ring_ent *
182+
fuse_uring_create_ring_ent(struct io_uring_cmd *cmd,
183+
struct fuse_ring_queue *queue)
184+
{
185+
struct fuse_ring *ring = queue->ring;
186+
struct fuse_ring_ent *ent;
187+
size_t payload_size;
188+
struct iovec iov[FUSE_URING_IOV_SEGS];
189+
int err;
190+
191+
err = fuse_uring_get_iovec_from_sqe(cmd->sqe, iov);
192+
if (err) {
193+
pr_info_ratelimited("Failed to get iovec from sqe, err=%d\n",
194+
err);
195+
return ERR_PTR(err);
196+
}
197+
198+
err = -EINVAL;
199+
if (iov[0].iov_len < sizeof(struct fuse_uring_req_header)) {
200+
pr_info_ratelimited("Invalid header len %zu\n", iov[0].iov_len);
201+
return ERR_PTR(err);
202+
}
203+
204+
payload_size = iov[1].iov_len;
205+
if (payload_size < ring->max_payload_sz) {
206+
pr_info_ratelimited("Invalid req payload len %zu\n",
207+
payload_size);
208+
return ERR_PTR(err);
209+
}
210+
211+
err = -ENOMEM;
212+
ent = kzalloc(sizeof(*ent), GFP_KERNEL_ACCOUNT);
213+
if (!ent)
214+
return ERR_PTR(err);
215+
216+
INIT_LIST_HEAD(&ent->list);
217+
218+
ent->queue = queue;
219+
ent->headers = iov[0].iov_base;
220+
ent->payload = iov[1].iov_base;
221+
222+
return ent;
223+
}
224+
225+
/*
226+
* Register header and payload buffer with the kernel and puts the
227+
* entry as "ready to get fuse requests" on the queue
228+
*/
229+
static int fuse_uring_register(struct io_uring_cmd *cmd,
230+
unsigned int issue_flags, struct fuse_conn *fc)
231+
{
232+
const struct fuse_uring_cmd_req *cmd_req = io_uring_sqe_cmd(cmd->sqe);
233+
struct fuse_ring *ring = fc->ring;
234+
struct fuse_ring_queue *queue;
235+
struct fuse_ring_ent *ent;
236+
int err;
237+
unsigned int qid = READ_ONCE(cmd_req->qid);
238+
239+
err = -ENOMEM;
240+
if (!ring) {
241+
ring = fuse_uring_create(fc);
242+
if (!ring)
243+
return err;
244+
}
245+
246+
if (qid >= ring->nr_queues) {
247+
pr_info_ratelimited("fuse: Invalid ring qid %u\n", qid);
248+
return -EINVAL;
249+
}
250+
251+
queue = ring->queues[qid];
252+
if (!queue) {
253+
queue = fuse_uring_create_queue(ring, qid);
254+
if (!queue)
255+
return err;
256+
}
257+
258+
/*
259+
* The created queue above does not need to be destructed in
260+
* case of entry errors below, will be done at ring destruction time.
261+
*/
262+
263+
ent = fuse_uring_create_ring_ent(cmd, queue);
264+
if (IS_ERR(ent))
265+
return PTR_ERR(ent);
266+
267+
fuse_uring_do_register(ent, cmd, issue_flags);
268+
269+
return 0;
270+
}
271+
272+
/*
273+
* Entry function from io_uring to handle the given passthrough command
274+
* (op code IORING_OP_URING_CMD)
275+
*/
276+
int __maybe_unused fuse_uring_cmd(struct io_uring_cmd *cmd,
277+
unsigned int issue_flags)
278+
{
279+
struct fuse_dev *fud;
280+
struct fuse_conn *fc;
281+
u32 cmd_op = cmd->cmd_op;
282+
int err;
283+
284+
if (!enable_uring) {
285+
pr_info_ratelimited("fuse-io-uring is disabled\n");
286+
return -EOPNOTSUPP;
287+
}
288+
289+
/* This extra SQE size holds struct fuse_uring_cmd_req */
290+
if (!(issue_flags & IO_URING_F_SQE128))
291+
return -EINVAL;
292+
293+
fud = fuse_get_dev(cmd->file);
294+
if (!fud) {
295+
pr_info_ratelimited("No fuse device found\n");
296+
return -ENOTCONN;
297+
}
298+
fc = fud->fc;
299+
300+
if (fc->aborted)
301+
return -ECONNABORTED;
302+
if (!fc->connected)
303+
return -ENOTCONN;
304+
305+
/*
306+
* fuse_uring_register() needs the ring to be initialized,
307+
* we need to know the max payload size
308+
*/
309+
if (!fc->initialized)
310+
return -EAGAIN;
311+
312+
switch (cmd_op) {
313+
case FUSE_IO_URING_CMD_REGISTER:
314+
err = fuse_uring_register(cmd, issue_flags, fc);
315+
if (err) {
316+
pr_info_once("FUSE_IO_URING_CMD_REGISTER failed err=%d\n",
317+
err);
318+
return err;
319+
}
320+
break;
321+
default:
322+
return -EINVAL;
323+
}
324+
325+
return -EIOCBQUEUED;
326+
}

0 commit comments

Comments
 (0)