Skip to content

Commit 8f4b342

Browse files
committed
Handle ListBlobs returning less items than requested with cont token
Both in s3 and azure, it is possible that when we call listPrefix with limit=N, we might get a result whose size is smaller than N, but still has a continuation token. This behaviour does not hurt when we are listing the fuse files under a directory. But when doing dir checks i.e. 1) testing if the given path is a directory 2) if a given directory is empty, this can make goofys wrongly think a directory is empty or a given prefix is not a directory. Add a wrapper in list.go, that does this: If the backend returns less items than requested and has a continuation token, it will use the continuation token to fetch more items.
1 parent dc63112 commit 8f4b342

File tree

1 file changed

+57
-6
lines changed

1 file changed

+57
-6
lines changed

internal/dir.go

Lines changed: 57 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -392,7 +392,7 @@ func (dh *DirHandle) listObjects(prefix string) (resp *ListBlobsOutput, err erro
392392
// is nothing left to list or the last listed entry has all characters > "/"
393393
// Relavant test case: TestReadDirDash
394394
func listBlobsSafe(cloud StorageBackend, param *ListBlobsInput) (*ListBlobsOutput, error) {
395-
res, err := cloud.ListBlobs(param)
395+
res, err := listBlobsWrapper(cloud, param)
396396
if err != nil {
397397
return nil, err
398398
}
@@ -406,7 +406,7 @@ func listBlobsSafe(cloud StorageBackend, param *ListBlobsInput) (*ListBlobsOutpu
406406
// Get the continuation token from the result.
407407
ContinuationToken: res.NextContinuationToken,
408408
}
409-
nextRes, err := cloud.ListBlobs(nextReq)
409+
nextRes, err := listBlobsWrapper(cloud, nextReq)
410410
if err != nil {
411411
return nil, err
412412
}
@@ -426,6 +426,54 @@ func listBlobsSafe(cloud StorageBackend, param *ListBlobsInput) (*ListBlobsOutpu
426426
return res, nil
427427
}
428428

429+
// Both in s3 and azure, it is possible that when we call listPrefix with limit=N, we might get a
430+
// result whose size is smaller than N, but still has a continuation token. This behaviour does not
431+
// hurt when we are listing the fuse files under a directory. But when doing dir checks i.e.
432+
// 1) testing if the given path is a directory 2) if a given directory is empty, this can make
433+
// goofys wrongly think a directory is empty or a given prefix is not a directory.
434+
//
435+
// If the backend returns less items than requested and has a continuation token, this will use the
436+
// continuation token to fetch more items.
437+
func listBlobsWrapper(cloud StorageBackend, param *ListBlobsInput) (*ListBlobsOutput, error) {
438+
targetNumElements := NilUint32(param.MaxKeys)
439+
ret, err := cloud.ListBlobs(param)
440+
if targetNumElements == 0 {
441+
// If MaxKeys is not specified (or is 0), we don't need any of the following special handling.
442+
return ret, err
443+
} else if err != nil {
444+
return nil, err
445+
}
446+
447+
for {
448+
curNumElements := uint32(len(ret.Prefixes) + len(ret.Items))
449+
if curNumElements >= targetNumElements {
450+
break // We got all we want. Nothing else to do.
451+
} else if ret.NextContinuationToken == nil {
452+
break // We got all blobs under the prefix.
453+
}
454+
455+
internalResp, err := cloud.ListBlobs(&ListBlobsInput{
456+
Prefix: param.Prefix,
457+
Delimiter: param.Delimiter,
458+
MaxKeys: PUInt32(targetNumElements - curNumElements),
459+
ContinuationToken: ret.NextContinuationToken,
460+
// We will not set StartAfter for page requests. Only the first request might have it.
461+
})
462+
if err != nil {
463+
return nil, err
464+
}
465+
466+
ret = &ListBlobsOutput{
467+
Prefixes: append(ret.Prefixes, internalResp.Prefixes...),
468+
Items: append(ret.Items, internalResp.Items...),
469+
NextContinuationToken: internalResp.NextContinuationToken,
470+
IsTruncated: internalResp.IsTruncated,
471+
RequestId: internalResp.RequestId,
472+
}
473+
}
474+
return ret, nil
475+
}
476+
429477
// LOCKS_REQUIRED(dh.mu)
430478
// LOCKS_EXCLUDED(dh.inode.mu)
431479
// LOCKS_EXCLUDED(dh.inode.fs)
@@ -972,7 +1020,7 @@ func (parent *Inode) isEmptyDir(fs *Goofys, name string) (isDir bool, err error)
9721020
cloud, key := parent.cloud()
9731021
key = appendChildName(key, name) + "/"
9741022

975-
resp, err := cloud.ListBlobs(&ListBlobsInput{
1023+
resp, err := listBlobsWrapper(cloud, &ListBlobsInput{
9761024
Delimiter: aws.String("/"),
9771025
MaxKeys: PUInt32(2),
9781026
Prefix: &key,
@@ -1306,10 +1354,13 @@ func (parent *Inode) LookUpInodeDir(name string, c chan ListBlobsOutput, errc ch
13061354
cloud, key := parent.cloud()
13071355
key = appendChildName(key, name) + "/"
13081356

1309-
resp, err := cloud.ListBlobs(&ListBlobsInput{
1357+
resp, err := listBlobsWrapper(cloud, &ListBlobsInput{
13101358
Delimiter: aws.String("/"),
1311-
MaxKeys: PUInt32(1),
1312-
Prefix: &key,
1359+
// Ideally one result should be sufficient. But when azure hierarchical
1360+
// namespaces are enabled, azblob returns "a" when we list blobs under "a/".
1361+
// In such cases we remove "a" from the result. So request for 2 blobs.
1362+
MaxKeys: PUInt32(1),
1363+
Prefix: &key,
13131364
})
13141365

13151366
if err != nil {

0 commit comments

Comments
 (0)