Skip to content

Commit 2422817

Browse files
committed
crimson/os/pg_map: allow multiple shards to create new pg mappings at the same time
Also: * Better detections in case of inconsistent racings, such as: * The new mapping is creating towards different cores. * Mapping creation is racing with its eracing. * Multiple shards are erasing the same mapping at the same time. * Add more logs to debug in case of unexpected issues. Fixes: https://tracker.ceph.com/issues/64934 Fixes: https://tracker.ceph.com/issues/64009 Signed-off-by: Yingxin Cheng <[email protected]>
1 parent de2e667 commit 2422817

File tree

1 file changed

+111
-48
lines changed

1 file changed

+111
-48
lines changed

src/crimson/osd/pg_map.cc

Lines changed: 111 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -19,75 +19,138 @@ seastar::future<core_id_t> PGShardMapping::get_or_create_pg_mapping(
1919
LOG_PREFIX(PGShardMapping::get_or_create_pg_mapping);
2020
auto find_iter = pg_to_core.find(pgid);
2121
if (find_iter != pg_to_core.end()) {
22-
ceph_assert_always(find_iter->second != NULL_CORE);
23-
if (core_expected != NULL_CORE) {
24-
ceph_assert_always(find_iter->second == core_expected);
22+
auto core_found = find_iter->second;
23+
assert(core_found != NULL_CORE);
24+
if (core_expected != NULL_CORE && core_expected != core_found) {
25+
ERROR("the mapping is inconsistent for pg {}: core {}, expected {}",
26+
pgid, core_found, core_expected);
27+
ceph_abort("The pg mapping is inconsistent!");
2528
}
26-
return seastar::make_ready_future<core_id_t>(find_iter->second);
29+
return seastar::make_ready_future<core_id_t>(core_found);
2730
} else {
31+
DEBUG("calling primary to add mapping for pg {} to the expected core {}",
32+
pgid, core_expected);
2833
return container().invoke_on(
2934
0, [pgid, core_expected, FNAME](auto &primary_mapping) {
30-
auto [insert_iter, inserted] =
31-
primary_mapping.pg_to_core.emplace(pgid, core_expected);
32-
ceph_assert_always(inserted);
33-
ceph_assert_always(primary_mapping.core_to_num_pgs.size() > 0);
34-
std::map<core_id_t, unsigned>::iterator core_iter;
35-
if (core_expected == NULL_CORE) {
36-
core_iter = std::min_element(
37-
primary_mapping.core_to_num_pgs.begin(),
38-
primary_mapping.core_to_num_pgs.end(),
39-
[](const auto &left, const auto &right) {
40-
return left.second < right.second;
35+
auto core_to_update = core_expected;
36+
auto find_iter = primary_mapping.pg_to_core.find(pgid);
37+
if (find_iter != primary_mapping.pg_to_core.end()) {
38+
// this pgid was already mapped within primary_mapping, assert that the
39+
// mapping is consistent and avoid emplacing once again.
40+
auto core_found = find_iter->second;
41+
assert(core_found != NULL_CORE);
42+
if (core_expected != NULL_CORE) {
43+
if (core_expected != core_found) {
44+
ERROR("the mapping is inconsistent for pg {} (primary): core {}, expected {}",
45+
pgid, core_found, core_expected);
46+
ceph_abort("The pg mapping is inconsistent!");
4147
}
42-
);
43-
core_expected = core_iter->first;
44-
} else {
45-
core_iter = primary_mapping.core_to_num_pgs.find(core_expected);
48+
// core_expected == core_found
49+
DEBUG("mapping pg {} to core {} (primary): already mapped and expected",
50+
pgid, core_to_update);
51+
} else { // core_expected == NULL_CORE
52+
core_to_update = core_found;
53+
DEBUG("mapping pg {} to core {} (primary): already mapped",
54+
pgid, core_to_update);
55+
}
56+
// proceed to broadcast core_to_update
57+
} else { // find_iter == primary_mapping.pg_to_core.end()
58+
// this pgid isn't mapped within primary_mapping,
59+
// add the mapping and ajust core_to_num_pgs
60+
ceph_assert_always(primary_mapping.core_to_num_pgs.size() > 0);
61+
std::map<core_id_t, unsigned>::iterator count_iter;
62+
if (core_expected == NULL_CORE) {
63+
count_iter = std::min_element(
64+
primary_mapping.core_to_num_pgs.begin(),
65+
primary_mapping.core_to_num_pgs.end(),
66+
[](const auto &left, const auto &right) {
67+
return left.second < right.second;
68+
}
69+
);
70+
core_to_update = count_iter->first;
71+
} else { // core_expected != NULL_CORE
72+
count_iter = primary_mapping.core_to_num_pgs.find(core_to_update);
73+
}
74+
ceph_assert_always(primary_mapping.core_to_num_pgs.end() != count_iter);
75+
++(count_iter->second);
76+
auto [insert_iter, inserted] =
77+
primary_mapping.pg_to_core.emplace(pgid, core_to_update);
78+
assert(inserted);
79+
DEBUG("mapping pg {} to core {} (primary): num_pgs {}",
80+
pgid, core_to_update, count_iter->second);
4681
}
47-
assert(core_expected != NULL_CORE);
48-
ceph_assert_always(primary_mapping.core_to_num_pgs.end() != core_iter);
49-
insert_iter->second = core_expected;
50-
core_iter->second++;
51-
DEBUG("mapping pg {} to core {} (primary) with num_pgs {}",
52-
pgid, core_expected, core_iter->second);
82+
assert(core_to_update != NULL_CORE);
5383
return primary_mapping.container().invoke_on_others(
54-
[pgid, core_expected, FNAME](auto &other_mapping) {
55-
auto [insert_iter, inserted] =
56-
other_mapping.pg_to_core.emplace(pgid, core_expected);
57-
ceph_assert_always(inserted);
58-
DEBUG("mapping pg {} to core {} (others)",
59-
pgid, core_expected);
84+
[pgid, core_to_update, FNAME](auto &other_mapping) {
85+
auto find_iter = other_mapping.pg_to_core.find(pgid);
86+
if (find_iter == other_mapping.pg_to_core.end()) {
87+
DEBUG("mapping pg {} to core {} (others)",
88+
pgid, core_to_update);
89+
auto [insert_iter, inserted] =
90+
other_mapping.pg_to_core.emplace(pgid, core_to_update);
91+
assert(inserted);
92+
} else {
93+
auto core_found = find_iter->second;
94+
if (core_found != core_to_update) {
95+
ERROR("the mapping is inconsistent for pg {} (others): core {}, expected {}",
96+
pgid, core_found, core_to_update);
97+
ceph_abort("The pg mapping is inconsistent!");
98+
}
99+
DEBUG("mapping pg {} to core {} (others): already mapped",
100+
pgid, core_to_update);
101+
}
60102
});
61-
}).then([this, pgid, FNAME] {
103+
}).then([this, pgid, core_expected, FNAME] {
62104
auto find_iter = pg_to_core.find(pgid);
63-
ceph_assert_always(find_iter != pg_to_core.end());
64-
DEBUG("returning pg {} mapping to core {}",
65-
pgid, find_iter->second);
66-
return seastar::make_ready_future<core_id_t>(find_iter->second);
105+
if (find_iter == pg_to_core.end()) {
106+
ERROR("the mapping is inconsistent for pg {}: core not found, expected {}",
107+
pgid, core_expected);
108+
ceph_abort("The pg mapping is inconsistent!");
109+
}
110+
auto core_found = find_iter->second;
111+
if (core_expected != NULL_CORE && core_found != core_expected) {
112+
ERROR("the mapping is inconsistent for pg {}: core {}, expected {}",
113+
pgid, core_found, core_expected);
114+
ceph_abort("The pg mapping is inconsistent!");
115+
}
116+
DEBUG("returning pg {} mapping to core {} after broadcasted",
117+
pgid, core_found);
118+
return seastar::make_ready_future<core_id_t>(core_found);
67119
});
68120
}
69121
}
70122

71123
seastar::future<> PGShardMapping::remove_pg_mapping(spg_t pgid) {
72124
LOG_PREFIX(PGShardMapping::remove_pg_mapping);
73-
DEBUG("{}", pgid);
125+
auto find_iter = pg_to_core.find(pgid);
126+
if (find_iter == pg_to_core.end()) {
127+
ERROR("trying to remove non-exist mapping for pg {}", pgid);
128+
ceph_abort("The pg mapping is inconsistent!");
129+
}
130+
DEBUG("calling primary to remove mapping for pg {}", pgid);
74131
return container().invoke_on(
75132
0, [pgid, FNAME](auto &primary_mapping) {
76-
auto iter = primary_mapping.pg_to_core.find(pgid);
77-
ceph_assert_always(iter != primary_mapping.pg_to_core.end());
78-
ceph_assert_always(iter->second != NULL_CORE);
79-
auto count_iter = primary_mapping.core_to_num_pgs.find(iter->second);
80-
ceph_assert_always(count_iter != primary_mapping.core_to_num_pgs.end());
81-
ceph_assert_always(count_iter->second > 0);
133+
auto find_iter = primary_mapping.pg_to_core.find(pgid);
134+
if (find_iter == primary_mapping.pg_to_core.end()) {
135+
ERROR("trying to remove non-exist mapping for pg {} (primary)", pgid);
136+
ceph_abort("The pg mapping is inconsistent!");
137+
}
138+
assert(find_iter->second != NULL_CORE);
139+
auto count_iter = primary_mapping.core_to_num_pgs.find(find_iter->second);
140+
assert(count_iter != primary_mapping.core_to_num_pgs.end());
141+
assert(count_iter->second > 0);
82142
--(count_iter->second);
83-
primary_mapping.pg_to_core.erase(iter);
143+
primary_mapping.pg_to_core.erase(find_iter);
84144
DEBUG("pg {} mapping erased (primary)", pgid);
85145
return primary_mapping.container().invoke_on_others(
86146
[pgid, FNAME](auto &other_mapping) {
87-
auto iter = other_mapping.pg_to_core.find(pgid);
88-
ceph_assert_always(iter != other_mapping.pg_to_core.end());
89-
ceph_assert_always(iter->second != NULL_CORE);
90-
other_mapping.pg_to_core.erase(iter);
147+
auto find_iter = other_mapping.pg_to_core.find(pgid);
148+
if (find_iter == other_mapping.pg_to_core.end()) {
149+
ERROR("trying to remove non-exist mapping for pg {} (others)", pgid);
150+
ceph_abort("The pg mapping is inconsistent!");
151+
}
152+
assert(find_iter->second != NULL_CORE);
153+
other_mapping.pg_to_core.erase(find_iter);
91154
DEBUG("pg {} mapping erased (others)", pgid);
92155
});
93156
});

0 commit comments

Comments
 (0)