Skip to content

Commit 9555086

Browse files
authored
Merge pull request ceph#56332 from cyx1231st/wip-fix-racing-get-or-create-pg-mapping
crimson/os/pg_map: allow multiple shards to create new pg mappings at the same time Reviewed-by: Samuel Just <[email protected]> Reviewed-by: Matan Breizman <[email protected]>
2 parents 742d5fa + 2422817 commit 9555086

File tree

3 files changed

+122
-51
lines changed

3 files changed

+122
-51
lines changed

src/crimson/osd/pg_map.cc

Lines changed: 118 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -14,75 +14,144 @@ namespace crimson::osd {
1414

1515
seastar::future<core_id_t> PGShardMapping::get_or_create_pg_mapping(
1616
spg_t pgid,
17-
core_id_t core)
17+
core_id_t core_expected)
1818
{
1919
LOG_PREFIX(PGShardMapping::get_or_create_pg_mapping);
2020
auto find_iter = pg_to_core.find(pgid);
2121
if (find_iter != pg_to_core.end()) {
22-
ceph_assert_always(find_iter->second != NULL_CORE);
23-
if (core != NULL_CORE) {
24-
ceph_assert_always(find_iter->second == core);
22+
auto core_found = find_iter->second;
23+
assert(core_found != NULL_CORE);
24+
if (core_expected != NULL_CORE && core_expected != core_found) {
25+
ERROR("the mapping is inconsistent for pg {}: core {}, expected {}",
26+
pgid, core_found, core_expected);
27+
ceph_abort("The pg mapping is inconsistent!");
2528
}
26-
return seastar::make_ready_future<core_id_t>(find_iter->second);
29+
return seastar::make_ready_future<core_id_t>(core_found);
2730
} else {
28-
return container().invoke_on(0,[pgid, core, FNAME]
29-
(auto &primary_mapping) {
30-
auto [insert_iter, inserted] = primary_mapping.pg_to_core.emplace(pgid, core);
31-
ceph_assert_always(inserted);
32-
ceph_assert_always(primary_mapping.core_to_num_pgs.size() > 0);
33-
std::map<core_id_t, unsigned>::iterator core_iter;
34-
if (core == NULL_CORE) {
35-
core_iter = std::min_element(
36-
primary_mapping.core_to_num_pgs.begin(),
37-
primary_mapping.core_to_num_pgs.end(),
31+
DEBUG("calling primary to add mapping for pg {} to the expected core {}",
32+
pgid, core_expected);
33+
return container().invoke_on(
34+
0, [pgid, core_expected, FNAME](auto &primary_mapping) {
35+
auto core_to_update = core_expected;
36+
auto find_iter = primary_mapping.pg_to_core.find(pgid);
37+
if (find_iter != primary_mapping.pg_to_core.end()) {
38+
// this pgid was already mapped within primary_mapping, assert that the
39+
// mapping is consistent and avoid emplacing once again.
40+
auto core_found = find_iter->second;
41+
assert(core_found != NULL_CORE);
42+
if (core_expected != NULL_CORE) {
43+
if (core_expected != core_found) {
44+
ERROR("the mapping is inconsistent for pg {} (primary): core {}, expected {}",
45+
pgid, core_found, core_expected);
46+
ceph_abort("The pg mapping is inconsistent!");
47+
}
48+
// core_expected == core_found
49+
DEBUG("mapping pg {} to core {} (primary): already mapped and expected",
50+
pgid, core_to_update);
51+
} else { // core_expected == NULL_CORE
52+
core_to_update = core_found;
53+
DEBUG("mapping pg {} to core {} (primary): already mapped",
54+
pgid, core_to_update);
55+
}
56+
// proceed to broadcast core_to_update
57+
} else { // find_iter == primary_mapping.pg_to_core.end()
58+
// this pgid isn't mapped within primary_mapping,
59+
// add the mapping and ajust core_to_num_pgs
60+
ceph_assert_always(primary_mapping.core_to_num_pgs.size() > 0);
61+
std::map<core_id_t, unsigned>::iterator count_iter;
62+
if (core_expected == NULL_CORE) {
63+
count_iter = std::min_element(
64+
primary_mapping.core_to_num_pgs.begin(),
65+
primary_mapping.core_to_num_pgs.end(),
3866
[](const auto &left, const auto &right) {
39-
return left.second < right.second;
40-
});
41-
} else {
42-
core_iter = primary_mapping.core_to_num_pgs.find(core);
67+
return left.second < right.second;
68+
}
69+
);
70+
core_to_update = count_iter->first;
71+
} else { // core_expected != NULL_CORE
72+
count_iter = primary_mapping.core_to_num_pgs.find(core_to_update);
73+
}
74+
ceph_assert_always(primary_mapping.core_to_num_pgs.end() != count_iter);
75+
++(count_iter->second);
76+
auto [insert_iter, inserted] =
77+
primary_mapping.pg_to_core.emplace(pgid, core_to_update);
78+
assert(inserted);
79+
DEBUG("mapping pg {} to core {} (primary): num_pgs {}",
80+
pgid, core_to_update, count_iter->second);
4381
}
44-
ceph_assert_always(primary_mapping.core_to_num_pgs.end() != core_iter);
45-
insert_iter->second = core_iter->first;
46-
core_iter->second++;
47-
DEBUG("mapping pg {} to core: {} with num_pgs of: {}",
48-
pgid, insert_iter->second, core_iter->second);
82+
assert(core_to_update != NULL_CORE);
4983
return primary_mapping.container().invoke_on_others(
50-
[pgid = insert_iter->first, core = insert_iter->second, FNAME]
51-
(auto &other_mapping) {
52-
ceph_assert_always(core != NULL_CORE);
53-
auto [insert_iter, inserted] = other_mapping.pg_to_core.emplace(pgid, core);
54-
ceph_assert_always(inserted);
55-
DEBUG("mapping pg {} to core: {}", pgid, core);
84+
[pgid, core_to_update, FNAME](auto &other_mapping) {
85+
auto find_iter = other_mapping.pg_to_core.find(pgid);
86+
if (find_iter == other_mapping.pg_to_core.end()) {
87+
DEBUG("mapping pg {} to core {} (others)",
88+
pgid, core_to_update);
89+
auto [insert_iter, inserted] =
90+
other_mapping.pg_to_core.emplace(pgid, core_to_update);
91+
assert(inserted);
92+
} else {
93+
auto core_found = find_iter->second;
94+
if (core_found != core_to_update) {
95+
ERROR("the mapping is inconsistent for pg {} (others): core {}, expected {}",
96+
pgid, core_found, core_to_update);
97+
ceph_abort("The pg mapping is inconsistent!");
98+
}
99+
DEBUG("mapping pg {} to core {} (others): already mapped",
100+
pgid, core_to_update);
101+
}
56102
});
57-
}).then([this, pgid, FNAME] {
103+
}).then([this, pgid, core_expected, FNAME] {
58104
auto find_iter = pg_to_core.find(pgid);
59-
ceph_assert_always(find_iter != pg_to_core.end());
60-
DEBUG("returning pg {} mapping to core {}", pgid, find_iter->second);
61-
return seastar::make_ready_future<core_id_t>(find_iter->second);
105+
if (find_iter == pg_to_core.end()) {
106+
ERROR("the mapping is inconsistent for pg {}: core not found, expected {}",
107+
pgid, core_expected);
108+
ceph_abort("The pg mapping is inconsistent!");
109+
}
110+
auto core_found = find_iter->second;
111+
if (core_expected != NULL_CORE && core_found != core_expected) {
112+
ERROR("the mapping is inconsistent for pg {}: core {}, expected {}",
113+
pgid, core_found, core_expected);
114+
ceph_abort("The pg mapping is inconsistent!");
115+
}
116+
DEBUG("returning pg {} mapping to core {} after broadcasted",
117+
pgid, core_found);
118+
return seastar::make_ready_future<core_id_t>(core_found);
62119
});
63120
}
64121
}
65122

66123
seastar::future<> PGShardMapping::remove_pg_mapping(spg_t pgid) {
67124
LOG_PREFIX(PGShardMapping::remove_pg_mapping);
68-
DEBUG("{}", pgid);
69-
return container().invoke_on(0, [pgid, FNAME](auto &primary_mapping) {
70-
auto iter = primary_mapping.pg_to_core.find(pgid);
71-
ceph_assert_always(iter != primary_mapping.pg_to_core.end());
72-
ceph_assert_always(iter->second != NULL_CORE);
73-
auto count_iter = primary_mapping.core_to_num_pgs.find(iter->second);
74-
ceph_assert_always(count_iter != primary_mapping.core_to_num_pgs.end());
75-
ceph_assert_always(count_iter->second > 0);
125+
auto find_iter = pg_to_core.find(pgid);
126+
if (find_iter == pg_to_core.end()) {
127+
ERROR("trying to remove non-exist mapping for pg {}", pgid);
128+
ceph_abort("The pg mapping is inconsistent!");
129+
}
130+
DEBUG("calling primary to remove mapping for pg {}", pgid);
131+
return container().invoke_on(
132+
0, [pgid, FNAME](auto &primary_mapping) {
133+
auto find_iter = primary_mapping.pg_to_core.find(pgid);
134+
if (find_iter == primary_mapping.pg_to_core.end()) {
135+
ERROR("trying to remove non-exist mapping for pg {} (primary)", pgid);
136+
ceph_abort("The pg mapping is inconsistent!");
137+
}
138+
assert(find_iter->second != NULL_CORE);
139+
auto count_iter = primary_mapping.core_to_num_pgs.find(find_iter->second);
140+
assert(count_iter != primary_mapping.core_to_num_pgs.end());
141+
assert(count_iter->second > 0);
76142
--(count_iter->second);
77-
primary_mapping.pg_to_core.erase(iter);
78-
DEBUG("pg {} mapping erased", pgid);
143+
primary_mapping.pg_to_core.erase(find_iter);
144+
DEBUG("pg {} mapping erased (primary)", pgid);
79145
return primary_mapping.container().invoke_on_others(
80146
[pgid, FNAME](auto &other_mapping) {
81-
auto iter = other_mapping.pg_to_core.find(pgid);
82-
ceph_assert_always(iter != other_mapping.pg_to_core.end());
83-
ceph_assert_always(iter->second != NULL_CORE);
84-
other_mapping.pg_to_core.erase(iter);
85-
DEBUG("pg {} mapping erased", pgid);
147+
auto find_iter = other_mapping.pg_to_core.find(pgid);
148+
if (find_iter == other_mapping.pg_to_core.end()) {
149+
ERROR("trying to remove non-exist mapping for pg {} (others)", pgid);
150+
ceph_abort("The pg mapping is inconsistent!");
151+
}
152+
assert(find_iter->second != NULL_CORE);
153+
other_mapping.pg_to_core.erase(find_iter);
154+
DEBUG("pg {} mapping erased (others)", pgid);
86155
});
87156
});
88157
}

src/crimson/osd/pg_map.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ class PGShardMapping : public seastar::peering_sharded_service<PGShardMapping> {
3737
/// Returns mapping for pgid, creates new one if it doesn't already exist
3838
seastar::future<core_id_t> get_or_create_pg_mapping(
3939
spg_t pgid,
40-
core_id_t core = NULL_CORE);
40+
core_id_t core_expected = NULL_CORE);
4141

4242
/// Remove pgid mapping
4343
seastar::future<> remove_pg_mapping(spg_t pgid);
@@ -60,7 +60,9 @@ class PGShardMapping : public seastar::peering_sharded_service<PGShardMapping> {
6060
}
6161

6262
private:
63+
// only in shard 0
6364
std::map<core_id_t, unsigned> core_to_num_pgs;
65+
// per-shard, updated by shard 0
6466
std::map<spg_t, core_id_t> pg_to_core;
6567
};
6668

src/crimson/osd/shard_services.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ seastar::future<> OSDSingletonState::osdmap_subscribe(
183183
version_t epoch, bool force_request)
184184
{
185185
LOG_PREFIX(OSDSingletonState::osdmap_subscribe);
186-
INFO("epoch {}");
186+
INFO("epoch {}", epoch);
187187
if (monc.sub_want_increment("osdmap", epoch, CEPH_SUBSCRIBE_ONETIME) ||
188188
force_request) {
189189
return monc.renew_subs();

0 commit comments

Comments
 (0)