Skip to content

Commit bbf24cd

Browse files
committed
Optional uid, gid, UNIX permissions, special file and mtime support
1 parent 57f72c8 commit bbf24cd

File tree

7 files changed

+343
-72
lines changed

7 files changed

+343
-72
lines changed

README.md

Lines changed: 23 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -21,35 +21,41 @@ Also check out our CSI S3 driver (GeeseFS-based): https://github.com/yandex-clou
2121
| Read after write | + | + | - | + | + |
2222
| Partial writes | + | + | - | + | + |
2323
| Truncate | + | - | - | + | + |
24-
| chmod/chown | - | - | - | + | - |
24+
| chmod/chown | Y | - | - | + | - |
2525
| fsync | + | - | - | + | + |
26-
| Symlinks | + | - | - | + | + |
26+
| Symlinks | Y | - | - | + | + |
27+
| Socket files | Y | - | - | + | - |
28+
| Device files | Y | - | - | - | - |
29+
| Custom mtime | Y | + | - | + | + |
2730
| xattr | + | - | + | + | - |
2831
| Directory renames | + | + | * | + | + |
2932
| readdir & changes | + | + | - | + | + |
3033

31-
\* Directory renames are allowed in Goofys for directories with no more than 1000 entries and the limit is hardcoded
34+
**Y** Only works correctly with Yandex S3.
35+
36+
**\*** Directory renames are allowed in Goofys for directories with no more than 1000 entries and the limit is hardcoded.
3237

3338
List of non-POSIX behaviors/limitations for GeeseFS:
34-
* symbolic links are only restored correctly when using Yandex S3 because standard S3
35-
doesn't return user metadata in listings and detecting symlinks in standard S3 would
36-
require an additional HEAD request for every file in listing which would make listings
37-
too slow
38-
* does not store file mode/owner/group, use `--(dir|file)-mode` or `--(uid|gid)` options
39-
* does not support hard links
40-
* does not support special files (block/character devices, named pipes, UNIX sockets)
41-
* does not support locking
42-
* `ctime`, `atime` is always the same as `mtime`
43-
* file modification time can't be set by user (for example with `cp --preserve` or utimes(2))
39+
* File mode/owner/group, symbolic links, custom mtimes and special files (block/character devices,
40+
named pipes, UNIX sockets) are supported, but they are restored correctly only when
41+
using Yandex S3 because standard S3 doesn't return user metadata in listings and
42+
reading all this metadata in standard S3 would require an additional HEAD request
43+
for every file in listing which would make listings too slow.
44+
* Special file support is enabled by default for Yandex S3 (disable with `--no-specials`) and disabled for others.
45+
* File mode/owner/group are disabled by default even for Yandex S3 (enable with `--enable-perms`).
46+
When disabled, global permissions can be set with `--(dir|file)-mode` and `--(uid|gid)` options.
47+
* Custom modification times are also disabled by default even for Yandex S3 (enable with `--enable-mtime`).
48+
When disabled:
49+
- `ctime`, `atime` and `mtime` are always the same
50+
- file modification time can't be set by user (for example with `cp --preserve` or utimes(2))
51+
* Does not support hard links
52+
* Does not support locking
4453

4554
In addition to the items above:
46-
* default file size limit is 1.03 TB, achieved by splitting the file into 1000x 5MB parts,
55+
* Default file size limit is 1.03 TB, achieved by splitting the file into 1000x 5MB parts,
4756
1000x 25 MB parts and 8000x 125 MB parts. You can change part sizes, but AWS's own limit
4857
is anyway 5 TB.
4958

50-
Owner & group, modification times and special files are in fact supportable with Yandex S3
51-
because it has listings with metadata. Feel free to post issues if you want it. :-)
52-
5359
# Stability
5460

5561
GeeseFS is stable enough to pass most of `xfstests` which are applicable,

api/common/config.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,14 @@ type FlagStorage struct {
7171
SinglePartMB uint64
7272
MaxMergeCopyMB uint64
7373
IgnoreFsync bool
74+
EnablePerms bool
75+
EnableSpecials bool
76+
EnableMtime bool
77+
UidAttr string
78+
GidAttr string
79+
FileModeAttr string
80+
RdevAttr string
81+
MtimeAttr string
7482
SymlinkAttr string
7583
CachePopularThreshold int64
7684
CacheMaxHits int64

internal/dir.go

Lines changed: 26 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -330,7 +330,7 @@ func (inode *Inode) sealDir() {
330330
inode.dir.listDone = true
331331
inode.dir.lastFromCloud = nil
332332
inode.dir.DirTime = time.Now()
333-
inode.Attributes.Mtime = inode.findChildMaxTime()
333+
inode.Attributes.Mtime, inode.Attributes.Ctime = inode.findChildMaxTime()
334334
}
335335

336336
// Sorting order of entries in directories is slightly inconsistent between goofys
@@ -1014,7 +1014,11 @@ func (parent *Inode) Create(name string) (inode *Inode, fh *FileHandle) {
10141014
defer inode.mu.Unlock()
10151015
inode.Attributes = InodeAttributes{
10161016
Size: 0,
1017+
Ctime: now,
10171018
Mtime: now,
1019+
Uid: fs.flags.Uid,
1020+
Gid: fs.flags.Gid,
1021+
Mode: fs.flags.FileMode,
10181022
}
10191023
// one ref is for lookup
10201024
inode.Ref()
@@ -1085,6 +1089,10 @@ func (parent *Inode) doMkDir(name string) (inode *Inode) {
10851089
oldInode.refcnt = 0
10861090
oldInode.Ref()
10871091
oldInode.SetCacheState(ST_MODIFIED)
1092+
oldInode.Attributes.Ctime = time.Now()
1093+
if parent.Attributes.Ctime.Before(oldInode.Attributes.Ctime) {
1094+
parent.Attributes.Ctime = oldInode.Attributes.Ctime
1095+
}
10881096
oldInode.Attributes.Mtime = time.Now()
10891097
if parent.Attributes.Mtime.Before(oldInode.Attributes.Mtime) {
10901098
parent.Attributes.Mtime = oldInode.Attributes.Mtime
@@ -1102,7 +1110,10 @@ func (parent *Inode) doMkDir(name string) (inode *Inode) {
11021110
inode.ToDir()
11031111
inode.touch()
11041112
// Record dir as actual
1105-
inode.dir.DirTime = inode.Attributes.Mtime
1113+
inode.dir.DirTime = inode.Attributes.Ctime
1114+
if parent.Attributes.Ctime.Before(inode.Attributes.Ctime) {
1115+
parent.Attributes.Ctime = inode.Attributes.Ctime
1116+
}
11061117
if parent.Attributes.Mtime.Before(inode.Attributes.Mtime) {
11071118
parent.Attributes.Mtime = inode.Attributes.Mtime
11081119
}
@@ -1139,6 +1150,10 @@ func (parent *Inode) CreateSymlink(
11391150
inode.Attributes = InodeAttributes{
11401151
Size: 0,
11411152
Mtime: now,
1153+
Ctime: now,
1154+
Uid: fs.flags.Uid,
1155+
Gid: fs.flags.Gid,
1156+
Mode: fs.flags.FileMode,
11421157
}
11431158
// one ref is for lookup
11441159
inode.Ref()
@@ -1587,20 +1602,24 @@ func (parent *Inode) insertSubTree(path string, obj *BlobItemOutput, dirs map[*I
15871602
}
15881603
}
15891604

1590-
func (parent *Inode) findChildMaxTime() time.Time {
1591-
maxTime := parent.Attributes.Mtime
1605+
func (parent *Inode) findChildMaxTime() (maxMtime, maxCtime time.Time) {
1606+
maxCtime = parent.Attributes.Ctime
1607+
maxMtime = parent.Attributes.Mtime
15921608

15931609
for i, c := range parent.dir.Children {
15941610
if i < 2 {
15951611
// skip . and ..
15961612
continue
15971613
}
1598-
if c.Attributes.Mtime.After(maxTime) {
1599-
maxTime = c.Attributes.Mtime
1614+
if c.Attributes.Ctime.After(maxCtime) {
1615+
maxCtime = c.Attributes.Ctime
1616+
}
1617+
if c.Attributes.Mtime.After(maxMtime) {
1618+
maxMtime = c.Attributes.Mtime
16001619
}
16011620
}
16021621

1603-
return maxTime
1622+
return
16041623
}
16051624

16061625
func (parent *Inode) LookUp(name string) (*Inode, error) {

internal/file.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -435,6 +435,7 @@ func (fh *FileHandle) WriteFile(offset int64, data []byte, copyData bool) (err e
435435
// FIXME: Don't activate the flusher immediately for small writes
436436
fh.inode.fs.WakeupFlusher()
437437
fh.inode.Attributes.Mtime = time.Now()
438+
fh.inode.Attributes.Ctime = fh.inode.Attributes.Mtime
438439

439440
fh.inode.mu.Unlock()
440441

@@ -2005,7 +2006,7 @@ func (inode *Inode) updateFromFlush(size uint64, etag *string, lastModified *tim
20052006
inode.s3Metadata["storage-class"] = []byte(*storageClass)
20062007
}
20072008
if lastModified != nil {
2008-
inode.Attributes.Mtime = *lastModified
2009+
inode.Attributes.Ctime = *lastModified
20092010
}
20102011
inode.knownSize = size
20112012
inode.knownETag = *etag

internal/flags.go

Lines changed: 86 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -222,13 +222,13 @@ func NewApp() (app *cli.App) {
222222

223223
cli.StringFlag{
224224
Name: "multipart-age",
225-
Usage: "Multipart uploads older than this value will be deleted on start (default: 48 hours)",
225+
Usage: "Multipart uploads older than this value will be deleted on start",
226226
Value: "48h",
227227
},
228228

229229
cli.IntFlag{
230230
Name: "multipart-copy-threshold",
231-
Usage: "Threshold for switching from single-part to multipart object copy in MB. Maximum for AWS S3 is 5 GB (default: 128 MB)",
231+
Usage: "Threshold for switching from single-part to multipart object copy in MB. Maximum for AWS S3 is 5 GB",
232232
Value: 128,
233233
},
234234

@@ -280,27 +280,27 @@ func NewApp() (app *cli.App) {
280280
cli.IntFlag{
281281
Name: "max-flushers",
282282
Value: 16,
283-
Usage: "How much parallel requests should be used for flushing changes to server (default: 16)",
283+
Usage: "How much parallel requests should be used for flushing changes to server",
284284
},
285285

286286
cli.IntFlag{
287287
Name: "max-parallel-parts",
288288
Value: 8,
289289
Usage: "How much parallel requests out of the total number can be used for large part uploads."+
290-
" Large parts take more bandwidth so they usually require less parallelism (default: 8)",
290+
" Large parts take more bandwidth so they usually require less parallelism",
291291
},
292292

293293
cli.IntFlag{
294294
Name: "max-parallel-copy",
295295
Value: 16,
296296
Usage: "How much parallel unmodified part copy requests should be used."+
297-
" This limit is separate from max-flushers (default: 16)",
297+
" This limit is separate from max-flushers",
298298
},
299299

300300
cli.IntFlag{
301301
Name: "read-ahead",
302302
Value: 5*1024,
303-
Usage: "How much data in KB should be pre-loaded with every read by default (default: 5 MB)",
303+
Usage: "How much data in KB should be pre-loaded with every read by default",
304304
},
305305

306306
cli.IntFlag{
@@ -312,45 +312,45 @@ func NewApp() (app *cli.App) {
312312
cli.IntFlag{
313313
Name: "small-read-cutoff",
314314
Value: 128,
315-
Usage: "Maximum average size of last reads in KB to trigger \"small\" readahead (default: 128 KB)",
315+
Usage: "Maximum average size of last reads in KB to trigger \"small\" readahead",
316316
},
317317

318318
cli.IntFlag{
319319
Name: "read-ahead-small",
320320
Value: 128,
321-
Usage: "Smaller readahead size in KB to be used when small random reads are detected (default: 128 KB)",
321+
Usage: "Smaller readahead size in KB to be used when small random reads are detected",
322322
},
323323

324324
cli.IntFlag{
325325
Name: "large-read-cutoff",
326326
Value: 20*1024,
327-
Usage: "Amount of linear read in KB after which the \"large\" readahead should be triggered (default: 20 MB)",
327+
Usage: "Amount of linear read in KB after which the \"large\" readahead should be triggered",
328328
},
329329

330330
cli.IntFlag{
331331
Name: "read-ahead-large",
332332
Value: 100*1024,
333-
Usage: "Larger readahead size in KB to be used when long linear reads are detected (default: 100 MB)",
333+
Usage: "Larger readahead size in KB to be used when long linear reads are detected",
334334
},
335335

336336
cli.IntFlag{
337337
Name: "read-ahead-parallel",
338338
Value: 20*1024,
339-
Usage: "Larger readahead will be triggered in parallel chunks of this size in KB (default: 20 MB)",
339+
Usage: "Larger readahead will be triggered in parallel chunks of this size in KB",
340340
},
341341

342342
cli.IntFlag{
343343
Name: "read-merge",
344344
Value: 512,
345345
Usage: "Two HTTP requests required to satisfy a read will be merged into one" +
346-
" if they're at most this number of KB away (default: 512)",
346+
" if they're at most this number of KB away",
347347
},
348348

349349
cli.IntFlag{
350350
Name: "single-part",
351351
Value: 5,
352352
Usage: "Maximum size of an object in MB to upload it as a single part." +
353-
" Can't be less than 5 MB (default: 5 MB)",
353+
" Can't be less than 5 MB",
354354
},
355355

356356
cli.StringFlag{
@@ -366,18 +366,72 @@ func NewApp() (app *cli.App) {
366366
Value: 0,
367367
Usage: "If non-zero, allow to compose larger parts up to this number of megabytes" +
368368
" in size from existing unchanged parts when doing server-side part copy."+
369-
" Must be left at 0 for Yandex S3 (default: 0)",
369+
" Must be left at 0 for Yandex S3",
370370
},
371371

372372
cli.BoolFlag{
373373
Name: "ignore-fsync",
374374
Usage: "Do not wait until changes are persisted to the server on fsync() call (default: off)",
375375
},
376376

377+
cli.BoolFlag{
378+
Name: "enable-perms",
379+
Usage: "Enable permissions, user and group ID." +
380+
" Only works correctly if your S3 returns UserMetadata in listings (default: off)",
381+
},
382+
383+
cli.BoolFlag{
384+
Name: "enable-specials",
385+
Usage: "Enable special file support (sockets, devices, named pipes)." +
386+
" Only works correctly if your S3 returns UserMetadata in listings (default: on for Yandex, off for others)",
387+
},
388+
389+
cli.BoolFlag{
390+
Name: "no-specials",
391+
Usage: "Disable special file support (sockets, devices, named pipes).",
392+
},
393+
394+
cli.BoolFlag{
395+
Name: "enable-mtime",
396+
Usage: "Enable modification time preservation." +
397+
" Only works correctly if your S3 returns UserMetadata in listings (default: off)",
398+
},
399+
400+
cli.StringFlag{
401+
Name: "uid-attr",
402+
Value: "uid",
403+
Usage: "User ID metadata attribute name",
404+
},
405+
406+
cli.StringFlag{
407+
Name: "gid-attr",
408+
Value: "gid",
409+
Usage: "Group ID metadata attribute name",
410+
},
411+
412+
cli.StringFlag{
413+
Name: "mode-attr",
414+
Value: "mode",
415+
Usage: "File mode (permissions & special file flags) metadata attribute name",
416+
},
417+
418+
cli.StringFlag{
419+
Name: "rdev-attr",
420+
Value: "rdev",
421+
Usage: "Block/character device number metadata attribute name",
422+
},
423+
424+
cli.StringFlag{
425+
Name: "mtime-attr",
426+
Value: "mtime",
427+
Usage: "File modification time (UNIX time) metadata attribute name",
428+
},
429+
377430
cli.StringFlag{
378431
Name: "symlink-attr",
379432
Value: "--symlink-target",
380-
Usage: "Symbolic link target metadata attribute (default: --symlink-target)",
433+
Usage: "Symbolic link target metadata attribute name." +
434+
" Only works correctly if your S3 returns UserMetadata in listings",
381435
},
382436

383437
cli.DurationFlag{
@@ -617,6 +671,14 @@ func PopulateFlags(c *cli.Context) (ret *FlagStorage) {
617671
SinglePartMB: uint64(singlePart),
618672
MaxMergeCopyMB: uint64(c.Int("max-merge-copy")),
619673
IgnoreFsync: c.Bool("ignore-fsync"),
674+
EnablePerms: c.Bool("enable-perms"),
675+
EnableSpecials: c.Bool("enable-specials"),
676+
EnableMtime: c.Bool("enable-mtime"),
677+
UidAttr: c.String("uid-attr"),
678+
GidAttr: c.String("gid-attr"),
679+
FileModeAttr: c.String("mode-attr"),
680+
RdevAttr: c.String("rdev-attr"),
681+
MtimeAttr: c.String("mtime-attr"),
620682
SymlinkAttr: c.String("symlink-attr"),
621683
CachePopularThreshold: int64(c.Int("cache-popular-threshold")),
622684
CacheMaxHits: int64(c.Int("cache-max-hits")),
@@ -662,8 +724,12 @@ func PopulateFlags(c *cli.Context) (ret *FlagStorage) {
662724
config.IAMHeader = c.String("iam-header")
663725
config.MultipartAge = c.Duration("multipart-age")
664726
listType := c.String("list-type")
727+
isYandex := strings.Index(flags.Endpoint, "yandex") != -1
728+
if isYandex && !c.IsSet("no-specials") {
729+
flags.EnableSpecials = true
730+
}
665731
if listType == "" {
666-
if idx := strings.Index(flags.Endpoint, "yandex"); idx != -1 {
732+
if isYandex {
667733
listType = "ext-v1"
668734
} else {
669735
listType = "1"
@@ -680,6 +746,10 @@ func PopulateFlags(c *cli.Context) (ret *FlagStorage) {
680746
}
681747
}
682748

749+
if c.IsSet("no-specials") {
750+
flags.EnableSpecials = false
751+
}
752+
683753
// Handle the repeated "-o" flag.
684754
for _, o := range c.StringSlice("o") {
685755
parseOptions(flags.MountOptions, o)

0 commit comments

Comments
 (0)