Skip to content

Commit 8bd63f8

Browse files
committed
Merge PR ceph#55421 into main
* refs/pull/55421/head: qa/cephfs: add test to verify backtrace update failure on deleted data pool mds: batch backtrace updates by pool-id when expiring a log segment mds: dump log segment in segment expiry callback mds: dump log segment end along with offset Reviewed-by: Xiubo Li <[email protected]> Reviewed-by: Patrick Donnelly <[email protected]>
2 parents b57eb17 + 9f27bde commit 8bd63f8

File tree

6 files changed

+82
-26
lines changed

6 files changed

+82
-26
lines changed

qa/tasks/cephfs/test_backtrace.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,3 +100,29 @@ def test_backtrace(self):
100100
# we don't update the layout in all the old pools whenever it changes
101101
old_pool_layout = self.fs.read_layout(file_ino, pool=old_data_pool_name)
102102
self.assertEqual(old_pool_layout['object_size'], 4194304)
103+
104+
def test_backtrace_flush_on_deleted_data_pool(self):
105+
"""
106+
that the MDS does not go read-only when handling backtrace update errors
107+
when backtrace updates are batched and flushed to RADOS (during journal trim)
108+
and some of the pool have been removed.
109+
"""
110+
data_pool = self.fs.get_data_pool_name()
111+
extra_data_pool_name_1 = data_pool + '_extra1'
112+
self.fs.add_data_pool(extra_data_pool_name_1)
113+
114+
self.mount_a.run_shell(["mkdir", "dir_x"])
115+
self.mount_a.setfattr("dir_x", "ceph.dir.layout.pool", extra_data_pool_name_1)
116+
self.mount_a.run_shell(["touch", "dir_x/file_x"])
117+
self.fs.flush()
118+
119+
extra_data_pool_name_2 = data_pool + '_extra2'
120+
self.fs.add_data_pool(extra_data_pool_name_2)
121+
self.mount_a.setfattr("dir_x/file_x", "ceph.file.layout.pool", extra_data_pool_name_2)
122+
self.mount_a.run_shell(["setfattr", "-x", "ceph.dir.layout", "dir_x"])
123+
self.run_ceph_cmd("fs", "rm_data_pool", self.fs.name, extra_data_pool_name_1)
124+
self.fs.flush()
125+
126+
# quick test to check if the mds has handled backtrace update failure
127+
# on the deleted data pool without going read-only.
128+
self.mount_a.run_shell(["mkdir", "dir_y"])

src/mds/CInode.cc

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1386,7 +1386,7 @@ void CInode::_commit_ops(int r, C_GatherBuilder &gather_bld,
13861386
}
13871387

13881388
void CInode::_store_backtrace(std::vector<CInodeCommitOperation> &ops_vec,
1389-
inode_backtrace_t &bt, int op_prio)
1389+
inode_backtrace_t &bt, int op_prio, bool ignore_old_pools)
13901390
{
13911391
dout(10) << __func__ << " on " << *this << dendl;
13921392
ceph_assert(is_dirty_parent());
@@ -1407,8 +1407,8 @@ void CInode::_store_backtrace(std::vector<CInodeCommitOperation> &ops_vec,
14071407
ops_vec.emplace_back(op_prio, pool, get_inode()->layout,
14081408
mdcache->mds->mdsmap->get_up_features(), slink);
14091409

1410-
if (!state_test(STATE_DIRTYPOOL) || get_inode()->old_pools.empty()) {
1411-
dout(20) << __func__ << ": no dirtypool or no old pools" << dendl;
1410+
if (!state_test(STATE_DIRTYPOOL) || get_inode()->old_pools.empty() || ignore_old_pools) {
1411+
dout(20) << __func__ << ": no dirtypool or no old pools or ignore_old_pools" << dendl;
14121412
return;
14131413
}
14141414

@@ -1431,7 +1431,7 @@ void CInode::store_backtrace(MDSContext *fin, int op_prio)
14311431
inode_backtrace_t bt;
14321432
auto version = get_inode()->backtrace_version;
14331433

1434-
_store_backtrace(ops_vec, bt, op_prio);
1434+
_store_backtrace(ops_vec, bt, op_prio, false);
14351435

14361436
C_GatherBuilder gather(g_ceph_context,
14371437
new C_OnFinisher(
@@ -1442,12 +1442,14 @@ void CInode::store_backtrace(MDSContext *fin, int op_prio)
14421442
gather.activate();
14431443
}
14441444

1445-
void CInode::store_backtrace(CInodeCommitOperations &op, int op_prio)
1445+
void CInode::store_backtrace(CInodeCommitOperations &op, int op_prio,
1446+
bool ignore_old_pools)
14461447
{
14471448
op.version = get_inode()->backtrace_version;
14481449
op.in = this;
14491450

1450-
_store_backtrace(op.ops_vec, op.bt, op_prio);
1451+
// update backtraces in old pools
1452+
_store_backtrace(op.ops_vec, op.bt, op_prio, ignore_old_pools);
14511453
}
14521454

14531455
void CInode::_stored_backtrace(int r, version_t v, Context *fin)

src/mds/CInode.h

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -754,8 +754,9 @@ class CInode : public MDSCacheObject, public InodeStoreBase, public Counter<CIno
754754
inode_backtrace_t &bt);
755755
void build_backtrace(int64_t pool, inode_backtrace_t& bt);
756756
void _store_backtrace(std::vector<CInodeCommitOperation> &ops_vec,
757-
inode_backtrace_t &bt, int op_prio);
758-
void store_backtrace(CInodeCommitOperations &op, int op_prio);
757+
inode_backtrace_t &bt, int op_prio, bool ignore_old_pools);
758+
void store_backtrace(CInodeCommitOperations &op, int op_prio,
759+
bool ignore_old_pools=false);
759760
void store_backtrace(MDSContext *fin, int op_prio=-1);
760761
void _stored_backtrace(int r, version_t v, Context *fin);
761762
void fetch_backtrace(Context *fin, ceph::buffer::list *backtrace);
@@ -1142,6 +1143,14 @@ class CInode : public MDSCacheObject, public InodeStoreBase, public Counter<CIno
11421143
// client caps
11431144
client_t loner_cap = -1, want_loner_cap = -1;
11441145

1146+
/**
1147+
* Return the pool ID where we currently write backtraces for
1148+
* this inode (in addition to inode.old_pools)
1149+
*
1150+
* @returns a pool ID >=0
1151+
*/
1152+
int64_t get_backtrace_pool() const;
1153+
11451154
protected:
11461155
ceph_lock_state_t *get_fcntl_lock_state() {
11471156
if (!fcntl_locks)
@@ -1192,14 +1201,6 @@ class CInode : public MDSCacheObject, public InodeStoreBase, public Counter<CIno
11921201
clear_flock_lock_state();
11931202
}
11941203

1195-
/**
1196-
* Return the pool ID where we currently write backtraces for
1197-
* this inode (in addition to inode.old_pools)
1198-
*
1199-
* @returns a pool ID >=0
1200-
*/
1201-
int64_t get_backtrace_pool() const;
1202-
12031204
// parent dentries in cache
12041205
CDentry *parent = nullptr; // primary link
12051206
mempool::mds_co::compact_set<CDentry*> remote_parents; // if hard linked

src/mds/LogSegment.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ class LogSegment {
108108

109109
static inline std::ostream& operator<<(std::ostream& out, const LogSegment& ls) {
110110
return out << "LogSegment(" << ls.seq << "/0x" << std::hex << ls.offset
111-
<< std::dec << " events=" << ls.num_events << ")";
111+
<< "~" << ls.end << std::dec << " events=" << ls.num_events << ")";
112112
}
113113

114114
#endif

src/mds/MDLog.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -749,6 +749,7 @@ class C_MaybeExpiredSegment : public MDSInternalContext {
749749
C_MaybeExpiredSegment(MDLog *mdl, LogSegment *s, int p) :
750750
MDSInternalContext(mdl->mds), mdlog(mdl), ls(s), op_prio(p) {}
751751
void finish(int res) override {
752+
dout(10) << __func__ << ": ls=" << *ls << ", r=" << res << dendl;
752753
if (res < 0)
753754
mdlog->mds->handle_write_error(res);
754755
mdlog->_maybe_expired(ls, op_prio);

src/mds/journal.cc

Lines changed: 35 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -237,27 +237,53 @@ void LogSegment::try_to_expire(MDSRank *mds, MDSGatherBuilder &gather_bld, int o
237237

238238
ceph_assert(g_conf()->mds_kill_journal_expire_at != 3);
239239

240-
size_t count = 0;
241-
for (elist<CInode*>::iterator it = dirty_parent_inodes.begin(); !it.end(); ++it)
242-
count++;
243-
244-
std::vector<CInodeCommitOperations> ops_vec;
245-
ops_vec.reserve(count);
240+
std::map<int64_t, std::vector<CInodeCommitOperations>> ops_vec_map;
246241
// backtraces to be stored/updated
247242
for (elist<CInode*>::iterator p = dirty_parent_inodes.begin(); !p.end(); ++p) {
248243
CInode *in = *p;
249244
ceph_assert(in->is_auth());
250245
if (in->can_auth_pin()) {
251246
dout(15) << "try_to_expire waiting for storing backtrace on " << *in << dendl;
252-
ops_vec.resize(ops_vec.size() + 1);
253-
in->store_backtrace(ops_vec.back(), op_prio);
247+
auto pool_id = in->get_backtrace_pool();
248+
249+
// this is for the default data pool
250+
dout(20) << __func__ << ": updating pool=" << pool_id << dendl;
251+
ops_vec_map[pool_id].push_back(CInodeCommitOperations());
252+
in->store_backtrace(ops_vec_map[pool_id].back(), op_prio, true);
253+
254+
255+
if (!in->state_test(CInode::STATE_DIRTYPOOL)) {
256+
dout(20) << __func__ << ": no dirtypool" << dendl;
257+
continue;
258+
}
259+
260+
// dispatch separate ops for backtrace updates for old pools
261+
for (auto _pool_id : in->get_inode()->old_pools) {
262+
if (_pool_id == pool_id) {
263+
continue;
264+
}
265+
266+
in->auth_pin(in); // CInode::_stored_backtrace() does auth_unpin()
267+
dout(20) << __func__ << ": updating old_pool=" << _pool_id << dendl;
268+
269+
auto cco = CInodeCommitOperations();
270+
cco.in = in;
271+
// use backtrace from the main pool so as to pickup the main
272+
// pool-id for old pool updates.
273+
cco.bt = ops_vec_map[pool_id].back().bt;
274+
cco.ops_vec.emplace_back(op_prio, _pool_id);
275+
cco.version = in->get_inode()->backtrace_version;
276+
ops_vec_map[_pool_id].push_back(cco);
277+
}
254278
} else {
255279
dout(15) << "try_to_expire waiting for unfreeze on " << *in << dendl;
256280
in->add_waiter(CInode::WAIT_UNFREEZE, gather_bld.new_sub());
257281
}
258282
}
259-
if (!ops_vec.empty())
283+
284+
for (auto& [pool_id, ops_vec] : ops_vec_map) {
260285
mds->finisher->queue(new BatchCommitBacktrace(mds, gather_bld.new_sub(), std::move(ops_vec)));
286+
}
261287

262288
ceph_assert(g_conf()->mds_kill_journal_expire_at != 4);
263289

0 commit comments

Comments
 (0)