Skip to content

Commit d1d3a8c

Browse files
committed
mds: batch backtrace updates by pool-id when expiring a log segment
LogSegment::try_to_expire() batches backtrace updations for inodes in dirty_parent_inodes list. If a backtrace update operations fails for one inode due to missing (removed) data pool, which is specially handled by treating the operation as a success, however, the errno (-ENOENT) is stored by the gather context and passed on as the return value to subsequent operations (even for successful backtrace update operations in the same gather context). Fixes: http://tracker.ceph.com/issues/63259 Signed-off-by: Venky Shankar <[email protected]>
1 parent e5728c4 commit d1d3a8c

File tree

3 files changed

+54
-25
lines changed

3 files changed

+54
-25
lines changed

src/mds/CInode.cc

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1386,7 +1386,7 @@ void CInode::_commit_ops(int r, C_GatherBuilder &gather_bld,
13861386
}
13871387

13881388
void CInode::_store_backtrace(std::vector<CInodeCommitOperation> &ops_vec,
1389-
inode_backtrace_t &bt, int op_prio)
1389+
inode_backtrace_t &bt, int op_prio, bool ignore_old_pools)
13901390
{
13911391
dout(10) << __func__ << " on " << *this << dendl;
13921392
ceph_assert(is_dirty_parent());
@@ -1407,8 +1407,8 @@ void CInode::_store_backtrace(std::vector<CInodeCommitOperation> &ops_vec,
14071407
ops_vec.emplace_back(op_prio, pool, get_inode()->layout,
14081408
mdcache->mds->mdsmap->get_up_features(), slink);
14091409

1410-
if (!state_test(STATE_DIRTYPOOL) || get_inode()->old_pools.empty()) {
1411-
dout(20) << __func__ << ": no dirtypool or no old pools" << dendl;
1410+
if (!state_test(STATE_DIRTYPOOL) || get_inode()->old_pools.empty() || ignore_old_pools) {
1411+
dout(20) << __func__ << ": no dirtypool or no old pools or ignore_old_pools" << dendl;
14121412
return;
14131413
}
14141414

@@ -1431,7 +1431,7 @@ void CInode::store_backtrace(MDSContext *fin, int op_prio)
14311431
inode_backtrace_t bt;
14321432
auto version = get_inode()->backtrace_version;
14331433

1434-
_store_backtrace(ops_vec, bt, op_prio);
1434+
_store_backtrace(ops_vec, bt, op_prio, false);
14351435

14361436
C_GatherBuilder gather(g_ceph_context,
14371437
new C_OnFinisher(
@@ -1442,12 +1442,14 @@ void CInode::store_backtrace(MDSContext *fin, int op_prio)
14421442
gather.activate();
14431443
}
14441444

1445-
void CInode::store_backtrace(CInodeCommitOperations &op, int op_prio)
1445+
void CInode::store_backtrace(CInodeCommitOperations &op, int op_prio,
1446+
bool ignore_old_pools)
14461447
{
14471448
op.version = get_inode()->backtrace_version;
14481449
op.in = this;
14491450

1450-
_store_backtrace(op.ops_vec, op.bt, op_prio);
1451+
// update backtraces in old pools
1452+
_store_backtrace(op.ops_vec, op.bt, op_prio, ignore_old_pools);
14511453
}
14521454

14531455
void CInode::_stored_backtrace(int r, version_t v, Context *fin)

src/mds/CInode.h

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -750,8 +750,9 @@ class CInode : public MDSCacheObject, public InodeStoreBase, public Counter<CIno
750750
inode_backtrace_t &bt);
751751
void build_backtrace(int64_t pool, inode_backtrace_t& bt);
752752
void _store_backtrace(std::vector<CInodeCommitOperation> &ops_vec,
753-
inode_backtrace_t &bt, int op_prio);
754-
void store_backtrace(CInodeCommitOperations &op, int op_prio);
753+
inode_backtrace_t &bt, int op_prio, bool ignore_old_pools);
754+
void store_backtrace(CInodeCommitOperations &op, int op_prio,
755+
bool ignore_old_pools=false);
755756
void store_backtrace(MDSContext *fin, int op_prio=-1);
756757
void _stored_backtrace(int r, version_t v, Context *fin);
757758
void fetch_backtrace(Context *fin, ceph::buffer::list *backtrace);
@@ -1129,6 +1130,14 @@ class CInode : public MDSCacheObject, public InodeStoreBase, public Counter<CIno
11291130
// client caps
11301131
client_t loner_cap = -1, want_loner_cap = -1;
11311132

1133+
/**
1134+
* Return the pool ID where we currently write backtraces for
1135+
* this inode (in addition to inode.old_pools)
1136+
*
1137+
* @returns a pool ID >=0
1138+
*/
1139+
int64_t get_backtrace_pool() const;
1140+
11321141
protected:
11331142
ceph_lock_state_t *get_fcntl_lock_state() {
11341143
if (!fcntl_locks)
@@ -1179,14 +1188,6 @@ class CInode : public MDSCacheObject, public InodeStoreBase, public Counter<CIno
11791188
clear_flock_lock_state();
11801189
}
11811190

1182-
/**
1183-
* Return the pool ID where we currently write backtraces for
1184-
* this inode (in addition to inode.old_pools)
1185-
*
1186-
* @returns a pool ID >=0
1187-
*/
1188-
int64_t get_backtrace_pool() const;
1189-
11901191
// parent dentries in cache
11911192
CDentry *parent = nullptr; // primary link
11921193
mempool::mds_co::compact_set<CDentry*> remote_parents; // if hard linked

src/mds/journal.cc

Lines changed: 35 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -237,27 +237,53 @@ void LogSegment::try_to_expire(MDSRank *mds, MDSGatherBuilder &gather_bld, int o
237237

238238
ceph_assert(g_conf()->mds_kill_journal_expire_at != 3);
239239

240-
size_t count = 0;
241-
for (elist<CInode*>::iterator it = dirty_parent_inodes.begin(); !it.end(); ++it)
242-
count++;
243-
244-
std::vector<CInodeCommitOperations> ops_vec;
245-
ops_vec.reserve(count);
240+
std::map<int64_t, std::vector<CInodeCommitOperations>> ops_vec_map;
246241
// backtraces to be stored/updated
247242
for (elist<CInode*>::iterator p = dirty_parent_inodes.begin(); !p.end(); ++p) {
248243
CInode *in = *p;
249244
ceph_assert(in->is_auth());
250245
if (in->can_auth_pin()) {
251246
dout(15) << "try_to_expire waiting for storing backtrace on " << *in << dendl;
252-
ops_vec.resize(ops_vec.size() + 1);
253-
in->store_backtrace(ops_vec.back(), op_prio);
247+
auto pool_id = in->get_backtrace_pool();
248+
249+
// this is for the default data pool
250+
dout(20) << __func__ << ": updating pool=" << pool_id << dendl;
251+
ops_vec_map[pool_id].push_back(CInodeCommitOperations());
252+
in->store_backtrace(ops_vec_map[pool_id].back(), op_prio, true);
253+
254+
255+
if (!in->state_test(CInode::STATE_DIRTYPOOL)) {
256+
dout(20) << __func__ << ": no dirtypool" << dendl;
257+
continue;
258+
}
259+
260+
// dispatch separate ops for backtrace updates for old pools
261+
for (auto _pool_id : in->get_inode()->old_pools) {
262+
if (_pool_id == pool_id) {
263+
continue;
264+
}
265+
266+
in->auth_pin(in); // CInode::_stored_backtrace() does auth_unpin()
267+
dout(20) << __func__ << ": updating old_pool=" << _pool_id << dendl;
268+
269+
auto cco = CInodeCommitOperations();
270+
cco.in = in;
271+
// use backtrace from the main pool so as to pickup the main
272+
// pool-id for old pool updates.
273+
cco.bt = ops_vec_map[pool_id].back().bt;
274+
cco.ops_vec.emplace_back(op_prio, _pool_id);
275+
cco.version = in->get_inode()->backtrace_version;
276+
ops_vec_map[_pool_id].push_back(cco);
277+
}
254278
} else {
255279
dout(15) << "try_to_expire waiting for unfreeze on " << *in << dendl;
256280
in->add_waiter(CInode::WAIT_UNFREEZE, gather_bld.new_sub());
257281
}
258282
}
259-
if (!ops_vec.empty())
283+
284+
for (auto& [pool_id, ops_vec] : ops_vec_map) {
260285
mds->finisher->queue(new BatchCommitBacktrace(mds, gather_bld.new_sub(), std::move(ops_vec)));
286+
}
261287

262288
ceph_assert(g_conf()->mds_kill_journal_expire_at != 4);
263289

0 commit comments

Comments
 (0)