Skip to content

Commit fe2bc06

Browse files
authored
Merge pull request ceph#57369 from YiteGu/bluestore-offline-trim
tools/bluestore: Add command 'trim' to ceph-bluestore-tool
2 parents 2ee3def + 0441dfc commit fe2bc06

File tree

8 files changed

+107
-3
lines changed

8 files changed

+107
-3
lines changed

doc/man/8/ceph-bluestore-tool.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ Synopsis
2929
| **ceph-bluestore-tool** free-dump|free-score --path *osd path* [ --allocator block/bluefs-wal/bluefs-db/bluefs-slow ]
3030
| **ceph-bluestore-tool** reshard --path *osd path* --sharding *new sharding* [ --sharding-ctrl *control string* ]
3131
| **ceph-bluestore-tool** show-sharding --path *osd path*
32+
| **ceph-bluestore-tool** trim --path *osd path*
3233
3334

3435
Description
@@ -131,6 +132,13 @@ Commands
131132

132133
Show sharding that is currently applied to BlueStore's RocksDB.
133134

135+
:command: `trim` --path *osd path*
136+
137+
An SSD that has been used heavily may experience performance degradation.
138+
This operation uses TRIM / discard to free unused blocks from BlueStore and BlueFS block devices,
139+
and allows the drive to perform more efficient internal housekeeping.
140+
If BlueStore runs with discard enabled, this option may not be useful.
141+
134142
Options
135143
=======
136144

src/blk/BlockDevice.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,7 @@ class BlockDevice {
233233
uint64_t get_size() const { return size; }
234234
uint64_t get_block_size() const { return block_size; }
235235
uint64_t get_optimal_io_size() const { return optimal_io_size; }
236+
bool is_discard_supported() const { return support_discard; }
236237

237238
/// hook to provide utilization of thinly-provisioned device
238239
virtual int get_ebd_state(ExtBlkDevState &state) const {

src/blk/kernel/KernelDevice.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1131,8 +1131,8 @@ int KernelDevice::_discard(uint64_t offset, uint64_t len)
11311131
return 0;
11321132
}
11331133
dout(10) << __func__
1134-
<< " 0x" << std::hex << offset << "~" << len << std::dec
1135-
<< dendl;
1134+
<< " 0x" << std::hex << offset << "~" << len << std::dec
1135+
<< dendl;
11361136
r = BlkDev{fd_directs[WRITE_LIFE_NOT_SET]}.discard((int64_t)offset, (int64_t)len);
11371137
return r;
11381138
}

src/os/bluestore/BlueFS.cc

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4778,6 +4778,37 @@ size_t BlueFS::probe_alloc_avail(int dev, uint64_t alloc_size)
47784778
}
47794779
return total;
47804780
}
4781+
4782+
void BlueFS::trim_free_space(const string& type, std::ostream& outss)
4783+
{
4784+
unsigned bdev_id;
4785+
if(type == "bdev-wal") {
4786+
bdev_id = BDEV_WAL;
4787+
} else if (type == "bdev-db") {
4788+
bdev_id = BDEV_DB;
4789+
} else {
4790+
derr << __func__ << " unknown bdev type " << type << dendl;
4791+
return;
4792+
}
4793+
auto iterated_allocation = [&](size_t off, size_t len) {
4794+
ceph_assert(len > 0);
4795+
interval_set<uint64_t> to_discard;
4796+
to_discard.union_insert(off, len);
4797+
bdev[bdev_id]->try_discard(to_discard, false);
4798+
};
4799+
if (!bdev[bdev_id]) {
4800+
outss << "device " << type << " is not configured";
4801+
return;
4802+
}
4803+
if (alloc[bdev_id] && !is_shared_alloc(bdev_id)) {
4804+
if (!bdev[bdev_id]->is_discard_supported()) {
4805+
outss << "device " << type << " does not support trim";
4806+
return;
4807+
}
4808+
alloc[bdev_id]->foreach(iterated_allocation);
4809+
outss << "device " << type << " trim done";
4810+
}
4811+
}
47814812
// ===============================================
47824813
// OriginalVolumeSelector
47834814

src/os/bluestore/BlueFS.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -778,6 +778,7 @@ class BlueFS {
778778
}
779779
uint64_t debug_get_dirty_seq(FileWriter *h);
780780
bool debug_get_is_dev_dirty(FileWriter *h, uint8_t dev);
781+
void trim_free_space(const std::string& type, std::ostream& outss);
781782

782783
private:
783784
// Wrappers for BlockDevice::read(...) and BlockDevice::read_random(...)

src/os/bluestore/BlueStore.cc

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8639,6 +8639,26 @@ int BlueStore::dump_bluefs_sizes(ostream& out)
86398639
return r;
86408640
}
86418641

8642+
void BlueStore::trim_free_space(const string& type, std::ostream& outss)
8643+
{
8644+
auto iterated_allocation = [&](size_t off, size_t len) {
8645+
ceph_assert(len > 0);
8646+
interval_set<uint64_t> to_discard;
8647+
to_discard.union_insert(off, len);
8648+
bdev->try_discard(to_discard, false);
8649+
};
8650+
if (type == "bdev-block") {
8651+
if (!bdev->is_discard_supported()) {
8652+
outss << "device " << type << " does not support trim";
8653+
return;
8654+
}
8655+
shared_alloc.a->foreach(iterated_allocation);
8656+
outss << "device " << type << " trim done";
8657+
} else {
8658+
bluefs->trim_free_space(type, outss);
8659+
}
8660+
}
8661+
86428662
void BlueStore::set_cache_shards(unsigned num)
86438663
{
86448664
dout(10) << __func__ << " " << num << dendl;

src/os/bluestore/BlueStore.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3085,6 +3085,7 @@ class BlueStore : public ObjectStore,
30853085
std::string get_device_path(unsigned id);
30863086

30873087
int dump_bluefs_sizes(std::ostream& out);
3088+
void trim_free_space(const std::string& type, std::ostream& outss);
30883089

30893090
public:
30903091
int statfs(struct store_statfs_t *buf,

src/os/bluestore/bluestore_tool.cc

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -285,9 +285,11 @@ int main(int argc, char **argv)
285285
string dest_file;
286286
string key, value;
287287
vector<string> allocs_name;
288+
vector<string> bdev_type;
288289
string empty_sharding(1, '\0');
289290
string new_sharding = empty_sharding;
290291
string resharding_ctrl;
292+
string really;
291293
int log_level = 30;
292294
bool fsck_deep = false;
293295
po::options_description po_options("Options");
@@ -309,6 +311,8 @@ int main(int argc, char **argv)
309311
("key,k", po::value<string>(&key), "label metadata key name")
310312
("value,v", po::value<string>(&value), "label metadata value")
311313
("allocator", po::value<vector<string>>(&allocs_name), "allocator to inspect: 'block'/'bluefs-wal'/'bluefs-db'")
314+
("bdev-type", po::value<vector<string>>(&bdev_type), "bdev type to inspect: 'bdev-block'/'bdev-wal'/'bdev-db'")
315+
("really", po::value<string>(&really), "--yes-i-really-really-mean-it")
312316
("sharding", po::value<string>(&new_sharding), "new sharding to apply")
313317
("resharding-ctrl", po::value<string>(&resharding_ctrl), "gives control over resharding procedure details")
314318
("op", po::value<string>(&action_aux),
@@ -340,7 +344,8 @@ int main(int argc, char **argv)
340344
"free-fragmentation, "
341345
"bluefs-stats, "
342346
"reshard, "
343-
"show-sharding")
347+
"show-sharding, "
348+
"trim")
344349
;
345350
po::options_description po_all("All options");
346351
po_all.add(po_options).add(po_positional);
@@ -572,6 +577,29 @@ int main(int argc, char **argv)
572577
exit(EXIT_FAILURE);
573578
}
574579
}
580+
if (action == "trim") {
581+
if (path.empty()) {
582+
cerr << "must specify bluestore path" << std::endl;
583+
exit(EXIT_FAILURE);
584+
}
585+
if (really.empty() || strcmp(really.c_str(), "--yes-i-really-really-mean-it") != 0) {
586+
cerr << "Trimming a non healthy bluestore is a dangerous operation which could cause data loss, "
587+
<< "please run fsck and confirm with --yes-i-really-really-mean-it option"
588+
<< std::endl;
589+
exit(EXIT_FAILURE);
590+
}
591+
for (auto type : bdev_type) {
592+
if (!type.empty() &&
593+
type != "bdev-block" &&
594+
type != "bdev-db" &&
595+
type != "bdev-wal") {
596+
cerr << "unknown bdev type '" << type << "'" << std::endl;
597+
exit(EXIT_FAILURE);
598+
}
599+
}
600+
if (bdev_type.empty())
601+
bdev_type = vector<string>{"bdev-block", "bdev-db", "bdev-wal"};
602+
}
575603

576604
if (action == "restore_cfb") {
577605
#ifndef CEPH_BLUESTORE_TOOL_RESTORE_ALLOCATION
@@ -1175,6 +1203,20 @@ int main(int argc, char **argv)
11751203
exit(EXIT_FAILURE);
11761204
}
11771205
cout << sharding << std::endl;
1206+
} else if (action == "trim") {
1207+
BlueStore bluestore(cct.get(), path);
1208+
int r = bluestore.cold_open();
1209+
if (r < 0) {
1210+
cerr << "error from cold_open: " << cpp_strerror(r) << std::endl;
1211+
exit(EXIT_FAILURE);
1212+
}
1213+
for (auto type : bdev_type) {
1214+
cout << "trimming: " << type << std::endl;
1215+
ostringstream outss;
1216+
bluestore.trim_free_space(type, outss);
1217+
cout << "status: " << outss.str() << std::endl;
1218+
}
1219+
bluestore.cold_close();
11781220
} else {
11791221
cerr << "unrecognized action " << action << std::endl;
11801222
return 1;

0 commit comments

Comments
 (0)