Skip to content

Commit b81bcf0

Browse files
committed
core: Improve access tracking performance for read/write operations
Currently, read and write operations to the VirtualFileSystem are "stateless" in the sense that there is no corresponding "open" and "close" exposed. At the NFS4 level, this exists, and nfs4j tracks these internally already (`stateid4.opaque` byte sequences), but they're not exposed to `VirtualFileSystem`. This is a problem not only for scenarios where an explicit open/close is required, but even for currently implemented scenarios it's a performance problem: PseudoFs calls `checkAccess(inode, ACE4_READ_DATA/ACE4_WRITE_DATA)` for each `read`/`write`, which in turn triggers a Stat for each read/write operation. This incurs an unnecessary performance penalty of more than 20%. The access check is unnecessary because a successful check upon "open" will be valid for the entire lifetime of the call, just as it is when opening a file descriptor in POSIX. In order to properly track granted access, we can leverage data stored in stateid4 and the existing work in FileTracker. Since stateid4 exists in NFSv4 only, we make sure there is no performance degradation in NFSv3 and no exposure of NFSv4-only internals in the VirtualFileSystem API: 1. Expose "Opaque" stateids to VirtualFileSystem read/write/commit; add open/close for custom FS interop. 2. Rework stateid4, which currently conflates stateid and seqid -- directly reference stateid4.other Opaque when possible. 3. In PseudoFS, check SharedAccess state from FileTracker and use this to determine access during read/write when available. With the change, for sustained reads I'm seeing 854 MB/s instead of 712 MB/s on LocalFileSystem, and 2000 MB/s instead of 1666 MB/s on a custom implementation. Signed-off-by: Christian Kohlschütter <[email protected]>
1 parent f3ae713 commit b81bcf0

19 files changed

+330
-127
lines changed

core/src/main/java/org/dcache/nfs/v4/FileTracker.java

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -315,9 +315,9 @@ public OpenRecord addOpen(NFS4Client client, StateOwner owner, Inode inode, int
315315
os.shareDenySeen |= 1 << ((shareDeny & nfs4_prot.OPEN4_SHARE_ACCESS_BOTH) - 1);
316316
}
317317

318-
os.stateid.seqid++;
318+
os.stateid.bumpSeqid();
319319
// we need to return copy to avoid modification by concurrent opens
320-
var openStateid = new stateid4(os.stateid.other, os.stateid.seqid);
320+
var openStateid = os.stateid.clone();
321321

322322
// yet another open from the same client. Let's check if we can delegate.
323323
if (canDelegateRead && (os.shareAccess
@@ -341,11 +341,12 @@ public OpenRecord addOpen(NFS4Client client, StateOwner owner, Inode inode, int
341341
stateid = state.stateid();
342342
OpenState openState = new OpenState(client, owner, stateid, shareAccess, shareDeny);
343343
opens.add(openState);
344-
state.addDisposeListener(s -> removeOpen(inode, stateid));
345-
stateid.seqid++;
344+
Opaque fileIdKey = inode.getFileIdKey();
345+
state.addDisposeListener(s -> removeOpen(fileIdKey, stateid));
346+
stateid.bumpSeqid();
346347

347348
// we need to return copy to avoid modification by concurrent opens
348-
var openStateid = new stateid4(stateid.other, stateid.seqid);
349+
var openStateid = stateid.clone();
349350

350351
// REVISIT: currently only read-delegations are supported
351352
if (canDelegateRead && (wantReadDelegation || adlHeuristic.shouldDelegate(client, inode))) {
@@ -408,9 +409,9 @@ public stateid4 downgradeOpen(NFS4Client client, stateid4 stateid, Inode inode,
408409
os.shareAccess = shareAccess;
409410
os.shareDeny = shareDeny;
410411

411-
os.stateid.seqid++;
412+
os.stateid.bumpSeqid();
412413
// we need to return copy to avoid modification by concurrent opens
413-
return new stateid4(os.stateid.other, os.stateid.seqid);
414+
return os.stateid.clone();
414415
} finally {
415416
lock.unlock();
416417
}
@@ -463,16 +464,21 @@ public void delegationReturn(NFS4Client client, stateid4 stateid, Inode inode)
463464
*/
464465
public int getShareAccess(NFS4Client client, Inode inode, stateid4 stateid)
465466
throws ChimeraNFSException {
467+
Opaque stateIdOpaque = stateid.getOpaque();
468+
return getShareAccess(client, inode, stateIdOpaque);
469+
}
466470

471+
public int getShareAccess(NFS4Client client, Inode inode, Opaque stateid)
472+
throws ChimeraNFSException {
467473
Opaque fileId = inode.getFileIdKey();
468474
Lock lock = filesLock.get(fileId);
469475
lock.lock();
470476
try {
471477

472-
switch (stateid.other[11]) {
478+
switch (stateid.byteAt(11)) {
473479
case Stateids.LOCK_STATE_ID:
474480
NFS4State lockState = client.state(stateid);
475-
stateid = lockState.getOpenState().stateid();
481+
stateid = lockState.getOpenState().stateid().getOpaque();
476482
// fall through
477483
case Stateids.OPEN_STATE_ID: {
478484
final List<OpenState> opens = files.get(fileId);
@@ -481,10 +487,10 @@ public int getShareAccess(NFS4Client client, Inode inode, stateid4 stateid)
481487
throw new BadStateidException("no matching open");
482488
}
483489

484-
final stateid4 openStateid = stateid;
490+
final Opaque openStateid = stateid;
485491
return opens.stream()
486492
.filter(s -> client.getId() == s.client.getId())
487-
.filter(s -> s.stateid.equals(openStateid))
493+
.filter(s -> s.stateid.getOpaque().equals(openStateid))
488494
.mapToInt(OpenState::getShareAccess)
489495
.findAny()
490496
.orElseThrow(BadStateidException::new);
@@ -496,11 +502,11 @@ public int getShareAccess(NFS4Client client, Inode inode, stateid4 stateid)
496502
throw new BadStateidException("no delegation found");
497503
}
498504

499-
stateid4 delegationStateid = stateid;
505+
Opaque delegationStateid = stateid;
500506

501507
var delegation = fileDelegations.stream()
502508
.filter(d -> d.client().getId().equals(client.getId()))
503-
.filter(d -> d.delegationStateid().stateid().equals(delegationStateid))
509+
.filter(d -> d.delegationStateid().stateid().getOpaque().equals(delegationStateid))
504510
.findAny()
505511
.orElseThrow(BadStateidException::new);
506512

@@ -526,8 +532,10 @@ public int getShareAccess(NFS4Client client, Inode inode, stateid4 stateid)
526532
* @param stateid associated with the open.
527533
*/
528534
void removeOpen(Inode inode, stateid4 stateid) {
535+
removeOpen(inode.getFileIdKey(), stateid);
536+
}
529537

530-
Opaque fileId = inode.getFileIdKey();
538+
void removeOpen(Opaque fileId, stateid4 stateid) {
531539
Lock lock = filesLock.get(fileId);
532540
lock.lock();
533541
try {

core/src/main/java/org/dcache/nfs/v4/NFS4Client.java

Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ public class NFS4Client {
117117
*/
118118
private int _sessionSequence = 1;
119119

120-
private final Map<stateid4, NFS4State> _clientStates = new ConcurrentHashMap<>();
120+
private final Map<Opaque, NFS4State> _clientStates = new ConcurrentHashMap<>();
121121

122122
/**
123123
* sessions associated with the client
@@ -334,18 +334,20 @@ private NFS4State createState(StateOwner stateOwner, byte type, NFS4State openSt
334334

335335
NFS4State state = new NFS4State(openState, stateOwner, _stateHandler.createStateId(this, type, _stateIdCounter
336336
.incrementAndGet()));
337+
stateid4 stateId = state.stateid();
338+
Opaque opaque = stateId.getOpaque();
337339
if (openState != null) {
338340
openState.addDisposeListener(s -> {
339341
// remove and dispose derived states.
340-
NFS4State nfsState = _clientStates.get(state.stateid());
342+
NFS4State nfsState = _clientStates.get(opaque);
341343
if (nfsState != null) {
342344
_log.debug("removing derived state {}", nfsState);
343345
nfsState.tryDispose();
344346
}
345-
_clientStates.remove(state.stateid());
347+
_clientStates.remove(opaque);
346348
});
347349
}
348-
_clientStates.put(state.stateid(), state);
350+
_clientStates.put(opaque, state);
349351
return state;
350352
}
351353

@@ -410,29 +412,34 @@ public NFS4State createServerSideCopyState(StateOwner stateOwner, NFS4State open
410412
}
411413

412414
public void releaseState(stateid4 stateid) throws ChimeraNFSException {
415+
Opaque opaque = stateid.getOpaque();
413416

414-
NFS4State state = _clientStates.get(stateid);
417+
NFS4State state = _clientStates.get(opaque);
415418
if (state == null) {
416419
throw new BadStateidException("State not known to the client: " + stateid);
417420
}
418421
state.disposeIgnoreFailures();
419-
_clientStates.remove(stateid);
422+
_clientStates.remove(opaque);
420423
}
421424

422425
public void tryReleaseState(stateid4 stateid) throws ChimeraNFSException {
423426

424-
NFS4State state = _clientStates.get(stateid);
427+
NFS4State state = _clientStates.get(stateid.getOpaque());
425428
if (state == null) {
426429
throw new BadStateidException("State not known to the client: " + stateid);
427430
}
428431
state.tryDispose();
429-
_clientStates.remove(stateid);
432+
_clientStates.remove(stateid.getOpaque());
430433
}
431434

432435
public NFS4State state(stateid4 stateid) throws ChimeraNFSException {
433-
NFS4State state = _clientStates.get(stateid);
436+
return state(stateid.getOpaque());
437+
}
438+
439+
public NFS4State state(Opaque stateidOpaque) throws ChimeraNFSException {
440+
NFS4State state = _clientStates.get(stateidOpaque);
434441
if (state == null) {
435-
throw new BadStateidException("State not known to the client: " + stateid);
442+
throw new BadStateidException("State not known to the client: " + stateidOpaque);
436443
}
437444
return state;
438445
}
@@ -534,7 +541,7 @@ public boolean hasState() {
534541
* @param state to attach
535542
*/
536543
public void attachState(NFS4State state) {
537-
_clientStates.put(state.stateid(), state);
544+
_clientStates.put(state.stateid().getOpaque(), state);
538545
}
539546

540547
/**
@@ -543,7 +550,7 @@ public void attachState(NFS4State state) {
543550
* @param state to detach
544551
*/
545552
public void detachState(NFS4State state) {
546-
_clientStates.remove(state.stateid());
553+
_clientStates.remove(state.stateid().getOpaque());
547554
}
548555

549556
@GuardedBy("this")

core/src/main/java/org/dcache/nfs/v4/NFS4State.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ public NFS4State(NFS4State openState, StateOwner owner, stateid4 stateid) {
6868
}
6969

7070
public void bumpSeqid() {
71-
++_stateid.seqid;
71+
_stateid.bumpSeqid();
7272
}
7373

7474
public stateid4 stateid() {

core/src/main/java/org/dcache/nfs/v4/NFSServerV41.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ public COMPOUND4res NFSPROC4_COMPOUND_4(RpcCall call$, COMPOUND4args arg1) {
134134
}
135135
res.resarray = new ArrayList<>(arg1.argarray.length);
136136

137-
VirtualFileSystem fs = new PseudoFs(_fs, call$, _exportTable);
137+
VirtualFileSystem fs = new PseudoFs(_fs, call$, _exportTable, _statHandler);
138138

139139
CompoundContextBuilder builder = new CompoundContextBuilder()
140140
.withMinorversion(arg1.minorversion.value)

core/src/main/java/org/dcache/nfs/v4/NFSv4StateHandler.java

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -237,13 +237,22 @@ public NFS4Client getClient(clientid4 clientid) throws StaleClientidException {
237237
}
238238
}
239239

240+
public NFS4Client getClientIfExists(long clientId) {
241+
_readLock.lock();
242+
try {
243+
return _clientsByServerId.get(new clientid4(clientId));
244+
} finally {
245+
_readLock.unlock();
246+
}
247+
}
248+
240249
public NFS4Client getClientIdByStateId(stateid4 stateId) throws ChimeraNFSException {
241250

242251
_readLock.lock();
243252
try {
244253
checkState(_running, "NFS state handler not running");
245254

246-
clientid4 clientId = new clientid4(Bytes.getLong(stateId.other, 0));
255+
clientid4 clientId = new clientid4(stateId.getClientId());
247256
NFS4Client client = _clientsByServerId.get(clientId);
248257
if (client == null) {
249258
throw new BadStateidException("no client for stateid: " + stateId);
@@ -439,7 +448,7 @@ public int getInstanceId() {
439448
* @return state hander id.
440449
*/
441450
public static int getInstanceId(stateid4 stateid) {
442-
long clientid = Bytes.getLong(stateid.other, 0);
451+
long clientid = stateid.getClientId();
443452
return (int) (clientid >> 16) & 0xFFFF;
444453
}
445454

@@ -472,7 +481,7 @@ public stateid4 createStateId(NFS4Client client, byte type, int count) {
472481
// we eat the first 8 bits if the counter, however, we don't expect 16M states be active at the same time,
473482
// thus the probability of a collision is too low
474483
Bytes.putInt(other, 8, count << 8 | (type & 0xFF));
475-
return new stateid4(other, STATE_INITIAL_SEQUENCE);
484+
return stateid4.forBytes(other, STATE_INITIAL_SEQUENCE);
476485
}
477486

478487
/**

core/src/main/java/org/dcache/nfs/v4/OperationCLOSE.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,8 @@ public void process(CompoundContext context, nfs_resop4 result)
5959
NFS4State nfsState = client.state(stateid);
6060
Stateids.checkStateId(nfsState.stateid(), stateid);
6161

62+
context.getFs().close(stateid.getOpaque());
63+
6264
if (context.getMinorversion() == 0) {
6365
nfsState.getStateOwner().acceptAsNextSequence(_args.opclose.seqid);
6466
client.updateLeaseTime();
@@ -68,6 +70,5 @@ public void process(CompoundContext context, nfs_resop4 result)
6870

6971
res.open_stateid = Stateids.invalidStateId();
7072
res.status = nfsstat.NFS_OK;
71-
7273
}
7374
}

core/src/main/java/org/dcache/nfs/v4/OperationCOMMIT.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import org.dcache.nfs.v4.xdr.nfs_argop4;
2929
import org.dcache.nfs.v4.xdr.nfs_opnum4;
3030
import org.dcache.nfs.v4.xdr.nfs_resop4;
31+
import org.dcache.nfs.v4.xdr.stateid4;
3132
import org.dcache.nfs.vfs.Inode;
3233

3334
public class OperationCOMMIT extends AbstractNFSv4Operation {
@@ -42,8 +43,11 @@ public void process(CompoundContext context, nfs_resop4 result) throws ChimeraNF
4243
final COMMIT4res res = result.opcommit;
4344
Inode inode = context.currentInode();
4445

46+
stateid4 stateid = Stateids.getCurrentStateidIfNeeded(context, _args.opwrite.stateid);
47+
4548
_args.opcommit.offset.checkOverflow(_args.opcommit.count.value, "offset + length overflow");
46-
context.getFs().commit(inode, _args.opcommit.offset.value, _args.opcommit.count.value);
49+
context.getFs().commit(stateid.getOpaque(), inode, _args.opcommit.offset.value,
50+
_args.opcommit.count.value);
4751

4852
res.resok4 = new COMMIT4resok();
4953
res.resok4.writeverf = context.getRebootVerifier();

0 commit comments

Comments
 (0)