Skip to content

Commit b99527d

Browse files
committed
Implement fallocate
1 parent aa5d8cf commit b99527d

File tree

3 files changed

+147
-30
lines changed

3 files changed

+147
-30
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ Also check out our CSI S3 driver (GeeseFS-based): https://github.com/yandex-clou
2121
| Read after write | + | + | - | + | + |
2222
| Partial writes | + | + | - | + | + |
2323
| Truncate | + | - | - | + | + |
24+
| fallocate | + | - | - | - | - |
2425
| chmod/chown | Y | - | - | + | - |
2526
| fsync | + | - | - | + | + |
2627
| Symlinks | Y | - | - | + | + |

internal/file.go

Lines changed: 64 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -205,20 +205,54 @@ func insertBuffer(buffers []*FileBuffer, pos int, add ...*FileBuffer) []*FileBuf
205205
}
206206

207207
func (inode *Inode) addBuffer(offset uint64, data []byte, state int16, copyData bool) int64 {
208-
allocated := int64(0)
209-
210-
start := locateBuffer(inode.buffers, offset)
211208
dataLen := uint64(len(data))
212209
endOffset := offset+dataLen
213210

214211
// Remove intersecting parts as they're being overwritten
215-
// If we're inserting a clean buffer, don't remove dirty ones
212+
allocated := inode.removeRange(offset, dataLen, state)
213+
214+
// Insert non-overlapping parts of the buffer
215+
curOffset := offset
216+
dataPtr := &BufferPointer{
217+
mem: data,
218+
refs: 0,
219+
}
220+
start := locateBuffer(inode.buffers, offset)
221+
pos := start
222+
for ; pos < len(inode.buffers) && curOffset < endOffset; pos++ {
223+
b := inode.buffers[pos]
224+
if b.offset + b.length <= offset {
225+
continue
226+
}
227+
if b.offset > curOffset {
228+
// insert curOffset->min(b.offset,endOffset)
229+
nextEnd := b.offset
230+
if nextEnd > endOffset {
231+
nextEnd = endOffset
232+
}
233+
allocated += inode.insertBuffer(pos, curOffset, data[curOffset-offset : nextEnd-offset], state, copyData, dataPtr)
234+
}
235+
curOffset = b.offset + b.length
236+
}
237+
if curOffset < endOffset {
238+
// Insert curOffset->endOffset
239+
allocated += inode.insertBuffer(pos, curOffset, data[curOffset-offset : ], state, copyData, dataPtr)
240+
}
241+
242+
return allocated
243+
}
244+
245+
// Remove buffers in range (offset..size)
246+
func (inode *Inode) removeRange(offset, size uint64, state int16) (allocated int64) {
247+
start := locateBuffer(inode.buffers, offset)
248+
endOffset := offset+size
216249
for pos := start; pos < len(inode.buffers); pos++ {
217250
b := inode.buffers[pos]
218251
if b.offset >= endOffset {
219252
break
220253
}
221254
bufEnd := b.offset+b.length
255+
// If we're inserting a clean buffer, don't remove dirty ones
222256
if (state >= BUF_DIRTY || b.state < BUF_DIRTY) && bufEnd > offset && endOffset > b.offset {
223257
if offset <= b.offset {
224258
if endOffset >= bufEnd {
@@ -284,35 +318,35 @@ func (inode *Inode) addBuffer(offset uint64, data []byte, state int16, copyData
284318
}
285319
}
286320
}
321+
return
322+
}
287323

288-
// Insert non-overlapping parts of the buffer
289-
curOffset := offset
290-
dataPtr := &BufferPointer{
291-
mem: data,
292-
refs: 0,
293-
}
294-
pos := start
295-
for ; pos < len(inode.buffers) && curOffset < endOffset; pos++ {
296-
b := inode.buffers[pos]
297-
if b.offset + b.length <= offset {
298-
continue
299-
}
300-
if b.offset > curOffset {
301-
// insert curOffset->min(b.offset,endOffset)
302-
nextEnd := b.offset
303-
if nextEnd > endOffset {
304-
nextEnd = endOffset
305-
}
306-
allocated += inode.insertBuffer(pos, curOffset, data[curOffset-offset : nextEnd-offset], state, copyData, dataPtr)
307-
}
308-
curOffset = b.offset + b.length
309-
}
310-
if curOffset < endOffset {
311-
// Insert curOffset->endOffset
312-
allocated += inode.insertBuffer(pos, curOffset, data[curOffset-offset : ], state, copyData, dataPtr)
324+
func (inode *Inode) zeroRange(offset, size uint64) (bool, int64) {
325+
// Check if it's already zeroed
326+
pos := locateBuffer(inode.buffers, offset)
327+
if pos < len(inode.buffers) && inode.buffers[pos].zero &&
328+
inode.buffers[pos].offset == offset && inode.buffers[pos].length == size {
329+
return false, 0
313330
}
314331

315-
return allocated
332+
// Remove intersecting parts as they're being overwritten
333+
allocated := inode.removeRange(offset, size, BUF_DIRTY)
334+
335+
// Insert a zero buffer
336+
pos = locateBuffer(inode.buffers, offset)
337+
inode.buffers = insertBuffer(inode.buffers, pos, &FileBuffer{
338+
offset: offset,
339+
dirtyID: atomic.AddUint64(&inode.fs.bufferPool.curDirtyID, 1),
340+
state: BUF_DIRTY,
341+
onDisk: false,
342+
zero: true,
343+
recency: 0,
344+
length: size,
345+
data: nil,
346+
ptr: nil,
347+
})
348+
349+
return true, allocated
316350
}
317351

318352
func (inode *Inode) ResizeUnlocked(newSize uint64, zeroFill bool, finalizeFlushed bool) {

internal/goofys.go

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1805,3 +1805,85 @@ func (fs *Goofys) SyncFS(parent *Inode) (err error) {
18051805
}
18061806
return
18071807
}
1808+
1809+
const (
1810+
FALLOC_FL_KEEP_SIZE = uint32(0x01)
1811+
FALLOC_FL_PUNCH_HOLE = uint32(0x02)
1812+
FALLOC_FL_COLLAPSE_RANGE = uint32(0x08)
1813+
FALLOC_FL_ZERO_RANGE = uint32(0x10)
1814+
FALLOC_FL_INSERT_RANGE = uint32(0x20)
1815+
)
1816+
1817+
func (fs *Goofys) Fallocate(
1818+
ctx context.Context,
1819+
op *fuseops.FallocateOp) (err error) {
1820+
1821+
atomic.AddInt64(&fs.stats.metadataWrites, 1)
1822+
1823+
fs.mu.RLock()
1824+
inode := fs.getInodeOrDie(op.Inode)
1825+
fs.mu.RUnlock()
1826+
1827+
if atomic.LoadInt32(&inode.refreshed) == -1 {
1828+
// Stale inode
1829+
return syscall.ESTALE
1830+
}
1831+
1832+
if op.Length == 0 {
1833+
return nil
1834+
}
1835+
1836+
inode.mu.Lock()
1837+
1838+
modified := false
1839+
1840+
if (op.Mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE)) != 0 {
1841+
// Insert range/remove range operations are not supported
1842+
// It's possible to support them, but it will require buffer remapping support.
1843+
// I.e. if you open a file, insert/collapse a range and then read past the
1844+
// affected offset you should get data from the old offset! And it's probably
1845+
// wise to use UploadPartCopy with the corresponding ranges to optimize copying
1846+
// on the server side in this case. Some day we might even be able to preserve
1847+
// multipart part IDs if cutting a non-finalized upload across part boundaries,
1848+
// but now we can't - part offsets are always fixed.
1849+
inode.mu.Unlock()
1850+
return syscall.ENOTSUP
1851+
}
1852+
1853+
if op.Offset+op.Length > inode.Attributes.Size {
1854+
if (op.Mode & FALLOC_FL_KEEP_SIZE) == 0 {
1855+
// Resize
1856+
if op.Offset+op.Length > fs.getMaxFileSize() {
1857+
// File size too large
1858+
log.Warnf(
1859+
"Maximum file size exceeded when trying to extend %v to %v bytes using fallocate",
1860+
inode.FullName(), op.Offset+op.Length,
1861+
)
1862+
inode.mu.Unlock()
1863+
return syscall.EFBIG
1864+
}
1865+
inode.ResizeUnlocked(op.Offset+op.Length, true, true)
1866+
modified = true
1867+
} else {
1868+
if op.Offset > inode.Attributes.Size {
1869+
op.Offset = inode.Attributes.Size
1870+
}
1871+
op.Length = inode.Attributes.Size-op.Offset
1872+
}
1873+
}
1874+
1875+
if (op.Mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)) != 0 {
1876+
// Zero fill
1877+
mod, _ := inode.zeroRange(op.Offset, op.Length)
1878+
modified = modified || mod
1879+
}
1880+
1881+
if modified && inode.CacheState == ST_CACHED {
1882+
inode.SetCacheState(ST_MODIFIED)
1883+
inode.fs.WakeupFlusher()
1884+
}
1885+
1886+
inode.mu.Unlock()
1887+
1888+
return
1889+
}

0 commit comments

Comments
 (0)