Skip to content

Commit 9fc65f1

Browse files
committed
os/bluestore: Make truncate() drop unused allocations
Now when truncate() drops unused allocations. Modified Close() in BlueRocksEnv to unconditionally call truncate. Fixes: https://tracker.ceph.com/issues/68385 Signed-off-by: Adam Kupczyk <[email protected]>
1 parent 7ecad6d commit 9fc65f1

File tree

2 files changed

+56
-23
lines changed

2 files changed

+56
-23
lines changed

src/os/bluestore/BlueFS.cc

Lines changed: 52 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3760,15 +3760,16 @@ int BlueFS::truncate(FileWriter *h, uint64_t offset)/*_WF_L*/
37603760
{
37613761
auto t0 = mono_clock::now();
37623762
std::lock_guard hl(h->lock);
3763+
auto& fnode = h->file->fnode;
37633764
dout(10) << __func__ << " 0x" << std::hex << offset << std::dec
3764-
<< " file " << h->file->fnode << dendl;
3765+
<< " file " << fnode << dendl;
37653766
if (h->file->deleted) {
37663767
dout(10) << __func__ << " deleted, no-op" << dendl;
37673768
return 0;
37683769
}
37693770

37703771
// we never truncate internal log files
3771-
ceph_assert(h->file->fnode.ino > 1);
3772+
ceph_assert(fnode.ino > 1);
37723773

37733774
// truncate off unflushed data?
37743775
if (h->pos < offset &&
@@ -3782,20 +3783,58 @@ int BlueFS::truncate(FileWriter *h, uint64_t offset)/*_WF_L*/
37823783
if (r < 0)
37833784
return r;
37843785
}
3785-
if (offset == h->file->fnode.size) {
3786-
return 0; // no-op!
3787-
}
3788-
if (offset > h->file->fnode.size) {
3786+
if (offset > fnode.size) {
37893787
ceph_abort_msg("truncate up not supported");
37903788
}
3791-
ceph_assert(h->file->fnode.size >= offset);
3789+
ceph_assert(offset <= fnode.size);
37923790
_flush_bdev(h);
3793-
3794-
std::lock_guard ll(log.lock);
3795-
vselector->sub_usage(h->file->vselector_hint, h->file->fnode.size - offset);
3796-
h->file->fnode.size = offset;
3797-
h->file->is_dirty = true;
3798-
log.t.op_file_update_inc(h->file->fnode);
3791+
{
3792+
std::lock_guard ll(log.lock);
3793+
std::lock_guard dl(dirty.lock);
3794+
bool changed_extents = false;
3795+
vselector->sub_usage(h->file->vselector_hint, fnode);
3796+
uint64_t x_off = 0;
3797+
auto p = fnode.seek(offset, &x_off);
3798+
uint64_t cut_off =
3799+
(p == fnode.extents.end()) ? 0 : p2roundup(x_off, alloc_size[p->bdev]);
3800+
uint64_t new_allocated;
3801+
if (0 == cut_off) {
3802+
// whole pextent to remove
3803+
changed_extents = true;
3804+
new_allocated = offset;
3805+
} else if (cut_off < p->length) {
3806+
dirty.pending_release[p->bdev].insert(p->offset + cut_off, p->length - cut_off);
3807+
new_allocated = (offset - x_off) + cut_off;
3808+
p->length = cut_off;
3809+
changed_extents = true;
3810+
++p;
3811+
} else {
3812+
ceph_assert(cut_off >= p->length);
3813+
new_allocated = (offset - x_off) + p->length;
3814+
// just leave it here
3815+
++p;
3816+
}
3817+
while (p != fnode.extents.end()) {
3818+
dirty.pending_release[p->bdev].insert(p->offset, p->length);
3819+
p = fnode.extents.erase(p);
3820+
changed_extents = true;
3821+
}
3822+
if (changed_extents) {
3823+
fnode.size = offset;
3824+
fnode.allocated = new_allocated;
3825+
fnode.reset_delta();
3826+
log.t.op_file_update(fnode);
3827+
// sad, but is_dirty must be set to signal flushing of the log
3828+
h->file->is_dirty = true;
3829+
} else {
3830+
if (offset != fnode.size) {
3831+
fnode.size = offset;
3832+
//skipping log.t.op_file_update_inc, it will be done by flush()
3833+
h->file->is_dirty = true;
3834+
}
3835+
}
3836+
vselector->add_usage(h->file->vselector_hint, fnode);
3837+
}
37993838
logger->tinc(l_bluefs_truncate_lat, mono_clock::now() - t0);
38003839
return 0;
38013840
}

src/os/bluestore/BlueRocksEnv.cc

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -221,18 +221,12 @@ class BlueRocksWritableFile : public rocksdb::WritableFile {
221221
}
222222

223223
rocksdb::Status Close() override {
224-
fs->fsync(h);
225224

226-
// mimic posix env, here. shrug.
227-
size_t block_size;
228-
size_t last_allocated_block;
229-
GetPreallocationStatus(&block_size, &last_allocated_block);
230-
if (last_allocated_block > 0) {
231-
int r = fs->truncate(h, h->pos);
232-
if (r < 0)
233-
return err_to_status(r);
225+
int r = fs->truncate(h, h->pos);
226+
if (r < 0) {
227+
return err_to_status(r);
234228
}
235-
229+
fs->fsync(h);
236230
return rocksdb::Status::OK();
237231
}
238232

0 commit comments

Comments
 (0)