Skip to content

Commit 1144da5

Browse files
authored
Merge pull request #15645 from daos-stack/wangdi/google_26_dfuse
patches series for dfuse to support simultaneous reads
2 parents 86279b1 + c171530 commit 1144da5

File tree

16 files changed

+1004
-233
lines changed

16 files changed

+1004
-233
lines changed

docs/user/filesystem.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -228,7 +228,6 @@ Additionally, there are several optional command-line options:
228228
| --container=<label\|uuid\> | container label or uuid to open |
229229
| --sys-name=<name\> | DAOS system name |
230230
| --foreground | run in foreground |
231-
| --singlethreaded | run single threaded |
232231
| --thread-count=<count> | Number of threads to use |
233232
| --multi-user | Run in multi user mode |
234233
| --read-only | Mount in read-only mode |

src/client/dfuse/SConscript

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ DFUSE_SRC = ['dfuse_core.c',
77
'dfuse_main.c',
88
'dfuse_fuseops.c',
99
'inval.c',
10+
'file.c',
1011
'dfuse_cont.c',
1112
'dfuse_thread.c',
1213
'dfuse_pool.c']

src/client/dfuse/dfuse.h

Lines changed: 67 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@ struct dfuse_info {
2929
char *di_mountpoint;
3030
int32_t di_thread_count;
3131
uint32_t di_eq_count;
32-
bool di_threaded;
3332
bool di_foreground;
3433
bool di_caching;
3534
bool di_multi_user;
@@ -96,6 +95,9 @@ struct dfuse_eq {
9695
* memory consumption */
9796
#define DFUSE_MAX_PRE_READ (1024 * 1024 * 4)
9897

98+
/* Maximum file-size for pre-read in all cases */
99+
#define DFUSE_MAX_PRE_READ_ONCE (1024 * 1024 * 1)
100+
99101
/* Launch fuse, and do not return until complete */
100102
int
101103
dfuse_launch_fuse(struct dfuse_info *dfuse_info, struct fuse_args *args);
@@ -137,9 +139,10 @@ struct dfuse_inode_entry;
137139
* when EOF is returned to the kernel. If it's still present on release then it's freed then.
138140
*/
139141
struct dfuse_pre_read {
140-
pthread_mutex_t dra_lock;
142+
d_list_t req_list;
141143
struct dfuse_event *dra_ev;
142144
int dra_rc;
145+
bool complete;
143146
};
144147

145148
/** what is returned as the handle for fuse fuse_file_info on create/open/opendir */
@@ -149,8 +152,6 @@ struct dfuse_obj_hdl {
149152
/** the DFS object handle. Not created for directories. */
150153
dfs_obj_t *doh_obj;
151154

152-
struct dfuse_pre_read *doh_readahead;
153-
154155
/** the inode entry for the file */
155156
struct dfuse_inode_entry *doh_ie;
156157

@@ -169,17 +170,24 @@ struct dfuse_obj_hdl {
169170
/* Pointer to the last returned drc entry */
170171
struct dfuse_readdir_c *doh_rd_nextc;
171172

172-
/* Linear read function, if a file is read from start to end then this normally requires
173-
* a final read request at the end of the file that returns zero bytes. Detect this case
174-
* and when the final read is detected then just return without a round trip.
175-
* Store a flag for this being enabled (starts as true, but many I/O patterns will set it
176-
* to false), the expected position of the next read and a boolean for if EOF has been
177-
* detected.
173+
/* Linear read tracking. If a file is opened and read from start to finish then this is
174+
* called a linear read, linear reads however may or may not read EOF at the end of a file,
175+
* as the reader may be checking the file size.
176+
*
177+
* Detect this case and track it at the file handle level, this is then used in two places:
178+
* For read of EOF it means the round-trip can be avoided.
179+
* On release we can use this flag to apply a setting to the directory inode.
180+
*
181+
* This flag starts enabled and many I/O patterns will disable it. We also store the next
182+
* expected read position and if EOF has been reached.
178183
*/
184+
179185
off_t doh_linear_read_pos;
180186
bool doh_linear_read;
181187
bool doh_linear_read_eof;
182188

189+
bool doh_set_linear_read;
190+
183191
/** True if caching is enabled for this file. */
184192
bool doh_caching;
185193

@@ -197,6 +205,8 @@ struct dfuse_obj_hdl {
197205
bool doh_kreaddir_finished;
198206

199207
bool doh_evict_on_close;
208+
/* the handle is doing readhead for the moment */
209+
bool doh_readahead_inflight;
200210
};
201211

202212
/* Readdir support.
@@ -401,11 +411,20 @@ struct dfuse_event {
401411
d_iov_t de_iov;
402412
d_sg_list_t de_sgl;
403413
d_list_t de_list;
414+
415+
/* Position in a list of events, this will either be off active->open_reads or
416+
* de->de_read_slaves.
417+
*/
418+
d_list_t de_read_list;
419+
/* List of slave events */
420+
d_list_t de_read_slaves;
404421
struct dfuse_eq *de_eqt;
405422
union {
406423
struct dfuse_obj_hdl *de_oh;
407424
struct dfuse_inode_entry *de_ie;
425+
struct read_chunk_data *de_cd;
408426
};
427+
struct dfuse_info *de_di;
409428
off_t de_req_position; /**< The file position requested by fuse */
410429
union {
411430
size_t de_req_len;
@@ -1009,10 +1028,32 @@ struct dfuse_inode_entry {
10091028
*/
10101029
ATOMIC bool ie_linear_read;
10111030

1031+
struct active_inode *ie_active;
1032+
10121033
/* Entry on the evict list */
10131034
d_list_t ie_evict_entry;
10141035
};
10151036

1037+
struct active_inode {
1038+
d_list_t chunks;
1039+
d_list_t open_reads;
1040+
pthread_spinlock_t lock;
1041+
ATOMIC uint64_t read_count;
1042+
struct dfuse_pre_read *readahead;
1043+
};
1044+
1045+
/* Increase active count on inode. This takes a reference and allocates ie->active as required */
1046+
int
1047+
active_ie_init(struct dfuse_inode_entry *ie);
1048+
1049+
/* Mark a oh as closing and drop the ref on inode active */
1050+
void
1051+
active_oh_decref(struct dfuse_info *dfuse_info, struct dfuse_obj_hdl *oh);
1052+
1053+
/* Decrease active count on inode, called on error where there is no oh */
1054+
void
1055+
active_ie_decref(struct dfuse_info *dfuse_info, struct dfuse_inode_entry *ie);
1056+
10161057
/* Flush write-back cache writes to a inode. It does this by waiting for and then releasing an
10171058
* exclusive lock on the inode. Writes take a shared lock so this will block until all pending
10181059
* writes are complete.
@@ -1108,6 +1149,13 @@ dfuse_compute_inode(struct dfuse_cont *dfs,
11081149
void
11091150
dfuse_cache_evict_dir(struct dfuse_info *dfuse_info, struct dfuse_inode_entry *ie);
11101151

1152+
/* Free any read chunk data for an inode.
1153+
*
1154+
* Returns true if feature was used.
1155+
*/
1156+
bool
1157+
read_chunk_close(struct dfuse_inode_entry *ie);
1158+
11111159
/* Metadata caching functions. */
11121160

11131161
/* Mark the cache as up-to-date from now */
@@ -1171,7 +1219,15 @@ bool
11711219
dfuse_dcache_get_valid(struct dfuse_inode_entry *ie, double max_age);
11721220

11731221
void
1174-
dfuse_pre_read(struct dfuse_info *dfuse_info, struct dfuse_obj_hdl *oh);
1222+
dfuse_pre_read(struct dfuse_info *dfuse_info, struct dfuse_obj_hdl *oh, struct dfuse_event *ev);
1223+
1224+
int
1225+
dfuse_pre_read_init(struct dfuse_info *dfuse_info, struct dfuse_inode_entry *ie,
1226+
struct dfuse_event **evp);
1227+
1228+
void
1229+
dfuse_pre_read_abort(struct dfuse_info *dfuse_info, struct dfuse_obj_hdl *oh,
1230+
struct dfuse_event *ev, int rc);
11751231

11761232
int
11771233
check_for_uns_ep(struct dfuse_info *dfuse_info, struct dfuse_inode_entry *ie, char *attr,

src/client/dfuse/dfuse_core.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1274,6 +1274,7 @@ dfuse_ie_close(struct dfuse_info *dfuse_info, struct dfuse_inode_entry *ie)
12741274
atomic_load_relaxed(&ie->ie_il_count));
12751275
D_ASSERTF(atomic_load_relaxed(&ie->ie_open_count) == 0, "open_count is %d",
12761276
atomic_load_relaxed(&ie->ie_open_count));
1277+
D_ASSERT(!ie->ie_active);
12771278

12781279
if (ie->ie_obj) {
12791280
rc = dfs_release(ie->ie_obj);
@@ -1317,6 +1318,8 @@ dfuse_read_event_size(void *arg, size_t size)
13171318
ev->de_sgl.sg_nr = 1;
13181319
}
13191320

1321+
D_INIT_LIST_HEAD(&ev->de_read_slaves);
1322+
13201323
rc = daos_event_init(&ev->de_ev, ev->de_eqt->de_eq, NULL);
13211324
if (rc != -DER_SUCCESS) {
13221325
return false;

src/client/dfuse/dfuse_fuseops.c

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/**
2-
* (C) Copyright 2016-2023 Intel Corporation.
2+
* (C) Copyright 2016-2024 Intel Corporation.
33
*
44
* SPDX-License-Identifier: BSD-2-Clause-Patent
55
*/
@@ -88,7 +88,12 @@ dfuse_fuse_init(void *arg, struct fuse_conn_info *conn)
8888
DFUSE_TRA_INFO(dfuse_info, "kernel readdir cache support compiled in");
8989

9090
conn->want |= FUSE_CAP_READDIRPLUS;
91-
conn->want |= FUSE_CAP_READDIRPLUS_AUTO;
91+
/* Temporarily force readdir plus for all cases now, which can
92+
* help to save some lookup RPC for some cases. Though this can be
93+
* removed once we use object enumeration to replace the normal key
94+
* enumeration for readdir. XXX
95+
*/
96+
conn->want &= ~FUSE_CAP_READDIRPLUS_AUTO;
9297

9398
#ifdef FUSE_CAP_CACHE_SYMLINKS
9499
conn->want |= FUSE_CAP_CACHE_SYMLINKS;

src/client/dfuse/dfuse_main.c

Lines changed: 8 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,7 @@ dfuse_bg(struct dfuse_info *dfuse_info)
166166
*
167167
* Should be called from the post_start plugin callback and creates
168168
* a filesystem.
169+
* Returns a DAOS error code.
169170
* Returns true on success, false on failure.
170171
*/
171172
int
@@ -204,18 +205,17 @@ dfuse_launch_fuse(struct dfuse_info *dfuse_info, struct fuse_args *args)
204205
DFUSE_TRA_ERROR(dfuse_info, "Error sending signal to fg: "DF_RC, DP_RC(rc));
205206

206207
/* Blocking */
207-
if (dfuse_info->di_threaded)
208-
rc = dfuse_loop(dfuse_info);
209-
else
210-
rc = fuse_session_loop(dfuse_info->di_session);
211-
if (rc != 0)
208+
rc = dfuse_loop(dfuse_info);
209+
if (rc != 0) {
212210
DHS_ERROR(dfuse_info, rc, "Fuse loop exited");
211+
rc = daos_errno2der(rc);
212+
}
213213

214214
umount:
215215

216216
fuse_session_unmount(dfuse_info->di_session);
217217

218-
return daos_errno2der(rc);
218+
return rc;
219219
}
220220

221221
#define DF_POOL_PREFIX "pool="
@@ -279,7 +279,6 @@ show_help(char *name)
279279
" --path=<path> Path to load UNS pool/container data\n"
280280
" --sys-name=STR DAOS system name context for servers\n"
281281
"\n"
282-
" -S --singlethread Single threaded (deprecated)\n"
283282
" -t --thread-count=count Total number of threads to use\n"
284283
" -e --eq-count=count Number of event queues to use\n"
285284
" -f --foreground Run in foreground\n"
@@ -423,7 +422,6 @@ main(int argc, char **argv)
423422
{"pool", required_argument, 0, 'p'},
424423
{"container", required_argument, 0, 'c'},
425424
{"sys-name", required_argument, 0, 'G'},
426-
{"singlethread", no_argument, 0, 'S'},
427425
{"thread-count", required_argument, 0, 't'},
428426
{"eq-count", required_argument, 0, 'e'},
429427
{"foreground", no_argument, 0, 'f'},
@@ -447,13 +445,12 @@ main(int argc, char **argv)
447445
if (dfuse_info == NULL)
448446
D_GOTO(out_debug, rc = -DER_NOMEM);
449447

450-
dfuse_info->di_threaded = true;
451448
dfuse_info->di_caching = true;
452449
dfuse_info->di_wb_cache = true;
453450
dfuse_info->di_eq_count = 1;
454451

455452
while (1) {
456-
c = getopt_long(argc, argv, "Mm:St:o:fhe:v", long_options, NULL);
453+
c = getopt_long(argc, argv, "Mm:t:o:fhe:v", long_options, NULL);
457454

458455
if (c == -1)
459456
break;
@@ -491,13 +488,6 @@ main(int argc, char **argv)
491488
case 'P':
492489
path = optarg;
493490
break;
494-
case 'S':
495-
/* Set it to be single threaded, but allow an extra one
496-
* for the event queue processing
497-
*/
498-
dfuse_info->di_threaded = false;
499-
dfuse_info->di_thread_count = 2;
500-
break;
501491
case 'e':
502492
dfuse_info->di_eq_count = atoi(optarg);
503493
break;
@@ -564,7 +554,7 @@ main(int argc, char **argv)
564554
* check CPU binding. If bound to a number of cores then launch that number of threads,
565555
* if not bound them limit to 16.
566556
*/
567-
if (dfuse_info->di_threaded && !have_thread_count) {
557+
if (!have_thread_count) {
568558
struct hwloc_topology *hwt;
569559
hwloc_const_cpuset_t hw;
570560
int total;

0 commit comments

Comments
 (0)