Skip to content

Commit 09e8bd4

Browse files
authored
Merge pull request ceph#62224 from aclamk/wip-aclamk-pere-wal-fsync
os/bluestore: Fast WAL for RocksDB
2 parents f826318 + 014528d commit 09e8bd4

File tree

13 files changed

+1256
-97
lines changed

13 files changed

+1256
-97
lines changed

doc/man/8/ceph-bluestore-tool.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ Synopsis
3434
| **ceph-bluestore-tool** show-sharding --path *osd path*
3535
| **ceph-bluestore-tool** trim --path *osd path*
3636
| **ceph-bluestore-tool** zap-device --dev *dev path*
37+
| **ceph-bluestore-tool** revert-wal-to-plain --path *osd path*
3738
3839

3940
Description
@@ -165,6 +166,11 @@ Commands
165166

166167
Zeros all device label locations. This effectively makes device appear empty.
167168

169+
:command: `revert-wal-to-plain` --path *osd path*
170+
171+
Changes WAL files from envelope mode to the legacy plain mode.
172+
Useful for downgrades, or if you might want to disable this new feature (bluefs_wal_envelope_mode).
173+
168174
Options
169175
=======
170176

src/common/options/global.yaml.in

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4248,6 +4248,15 @@ options:
42484248
level: advanced
42494249
default: false
42504250
with_legacy: true
4251+
- name: bluefs_wal_envelope_mode
4252+
type: bool
4253+
level: advanced
4254+
desc: Enables a faster backend in BlueFS for WAL writes.
4255+
long_desc: In envelope mode BlueFS files do not need to update metadata. When applied to RocksDB WAL files,
4256+
it reduces by ~50% the amount of fdatasync syscalls.
4257+
Downgrading from an envelope mode to legacy mode requires `ceph-bluestore-tool --command downgrade-wal-to-v1`.
4258+
default: true
4259+
with_legacy: false
42514260
- name: bluefs_allocator
42524261
type: str
42534262
level: dev

src/include/buffer.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -834,6 +834,7 @@ struct error_code;
834834
contiguous_filler(char* const pos) : pos(pos) {}
835835

836836
public:
837+
contiguous_filler() : pos(nullptr) {}
837838
void advance(const unsigned len) {
838839
pos += len;
839840
}

src/include/denc.h

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1835,8 +1835,6 @@ struct StructVChecker
18351835
p += 2 + 4; \
18361836
} \
18371837
static void _denc_finish(size_t& p, \
1838-
__u8 *struct_v, \
1839-
__u8 *struct_compat, \
18401838
char **, uint32_t *) { } \
18411839
/* encode */ \
18421840
static void _denc_start(::ceph::buffer::list::contiguous_appender& p, \
@@ -1850,8 +1848,6 @@ struct StructVChecker
18501848
*start_oob_off = p.get_out_of_band_offset(); \
18511849
} \
18521850
static void _denc_finish(::ceph::buffer::list::contiguous_appender& p, \
1853-
__u8 *struct_v, \
1854-
__u8 *struct_compat, \
18551851
char **len_pos, \
18561852
uint32_t *start_oob_off) { \
18571853
*(ceph_le32*)*len_pos = p.get_pos() - *len_pos - sizeof(uint32_t) + \
@@ -1872,7 +1868,6 @@ struct StructVChecker
18721868
*start_pos = const_cast<char*>(p.get_pos()); \
18731869
} \
18741870
static void _denc_finish(::ceph::buffer::ptr::const_iterator& p, \
1875-
__u8 *struct_v, __u8 *struct_compat, \
18761871
char **start_pos, \
18771872
uint32_t *struct_len) { \
18781873
const char *pos = p.get_pos(); \
@@ -1902,6 +1897,16 @@ struct StructVChecker
19021897
_denc_start(p, &struct_v.v, &struct_compat, &_denc_pchar, &_denc_u32); \
19031898
do {
19041899

1900+
// the variant for seldom-used cases when we manually select encoding version
1901+
#define DENC_START_UNCHECKED(_v, compat, p) \
1902+
__u8 struct_v = _v; \
1903+
__u8 struct_compat = compat; \
1904+
char *_denc_pchar; \
1905+
uint32_t _denc_u32; \
1906+
static_assert(CEPH_RELEASE >= (CEPH_RELEASE_SQUID /*19*/ + 2) || compat == 1); \
1907+
_denc_start(p, &struct_v, &struct_compat, &_denc_pchar, &_denc_u32); \
1908+
do {
1909+
19051910
// For the only type that is with compat 2: unittest.
19061911
#define DENC_START_COMPAT_2(_v, compat, p) \
19071912
StructVChecker<_v> struct_v{_v}; \
@@ -1912,6 +1917,16 @@ struct StructVChecker
19121917
_denc_start(p, &struct_v.v, &struct_compat, &_denc_pchar, &_denc_u32); \
19131918
do {
19141919

1920+
// This variant is unsafe, because older versions will not even catch incompatibility.
1921+
// The ability to decode must be verified by other means,
1922+
#define DENC_START_UNSAFE(v, compat, p) \
1923+
__u8 struct_v = v; \
1924+
__u8 struct_compat = compat; \
1925+
char *_denc_pchar; \
1926+
uint32_t _denc_u32; \
1927+
_denc_start(p, &struct_v, &struct_compat, &_denc_pchar, &_denc_u32); \
1928+
do {
1929+
19151930
// For osd_reqid_t which cannot be upgraded at all.
19161931
// We used it to communicate with clients and now we cannot safely upgrade.
19171932
#define DENC_START_OSD_REQID(_v, compat, p) \
@@ -1925,8 +1940,7 @@ struct StructVChecker
19251940

19261941
#define DENC_FINISH(p) \
19271942
} while (false); \
1928-
_denc_finish(p, &struct_v.v, &struct_compat, &_denc_pchar, &_denc_u32);
1929-
1943+
_denc_finish(p, &_denc_pchar, &_denc_u32);
19301944

19311945
// ----------------------------------------------------------------------
19321946

0 commit comments

Comments
 (0)