Skip to content

Commit 53d906b

Browse files
authored
Merge pull request ceph#56534 from athanatos/sjust/wip-64975-crimson-scrub-snapmapper-2
crimson: introduce INTERNAL_PG_LOCAL_NS, use for snapmapper Reviewed-by: Yingxin Cheng <[email protected]>
2 parents 9d29fa7 + e69883a commit 53d906b

File tree

9 files changed

+154
-116
lines changed

9 files changed

+154
-116
lines changed

src/common/hobject.h

Lines changed: 46 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -313,6 +313,26 @@ struct hobject_t {
313313
return nspace;
314314
}
315315

316+
/**
317+
* PG_LOCAL_NS
318+
*
319+
* Used exclusively by crimson at this time.
320+
*
321+
* Namespace for objects maintained by the local pg instantiation updated
322+
* independently of the pg log. librados IO to this namespace should fail.
323+
* Listing operations related to pg objects should exclude objects in this
324+
* namespace along with temp objects, ec rollback objects, and the pg
325+
* meta object. Such operations include:
326+
* - scrub
327+
* - backfill
328+
* - pgls
329+
* See crimson/osd/pg_backend PGBackend::list_objects
330+
*/
331+
static constexpr std::string_view INTERNAL_PG_LOCAL_NS = ".internal_pg_local";
332+
bool is_internal_pg_local() const {
333+
return nspace == INTERNAL_PG_LOCAL_NS;
334+
}
335+
316336
bool parse(const std::string& s);
317337

318338
void encode(ceph::buffer::list& bl) const;
@@ -378,10 +398,10 @@ struct formatter<hobject_t> {
378398
return ctx.out();
379399
}
380400

381-
constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
401+
constexpr auto parse(format_parse_context& ctx) const { return ctx.begin(); }
382402

383403
template <typename FormatContext>
384-
auto format(const hobject_t& ho, FormatContext& ctx)
404+
auto format(const hobject_t& ho, FormatContext& ctx) const
385405
{
386406
if (ho == hobject_t{}) {
387407
return fmt::format_to(ctx.out(), "MIN");
@@ -482,6 +502,30 @@ struct ghobject_t {
482502
return hobj.pool >= 0 && hobj.oid.name.empty();
483503
}
484504

505+
bool is_internal_pg_local() const {
506+
return hobj.is_internal_pg_local();
507+
}
508+
509+
/**
510+
* SNAPMAPPER_OID, make_snapmapper, is_snapmapper
511+
*
512+
* Used exclusively by crimson at this time.
513+
*
514+
* Unlike classic, crimson uses a snap mapper object for each pg.
515+
* The snapmapper object provides an index for efficient trimming of clones as
516+
* snapshots are removed.
517+
*
518+
* As with the pgmeta object, we pin the hash to the pg hash.
519+
*/
520+
static constexpr std::string_view SNAPMAPPER_OID = "snapmapper";
521+
static ghobject_t make_snapmapper(
522+
int64_t pool, uint32_t hash, shard_id_t shard) {
523+
hobject_t h(object_t(SNAPMAPPER_OID), std::string(),
524+
CEPH_NOSNAP, hash, pool,
525+
std::string(hobject_t::INTERNAL_PG_LOCAL_NS));
526+
return ghobject_t(h, NO_GEN, shard);
527+
}
528+
485529
bool match(uint32_t bits, uint32_t match) const {
486530
return hobj.match_hash(hobj.hash, bits, match);
487531
}

src/crimson/osd/ops_executer.cc

Lines changed: 1 addition & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1164,10 +1164,6 @@ static PG::interruptible_future<hobject_t> pgls_filter(
11641164
}
11651165
}
11661166

1167-
static inline bool is_snapmapper_oid(const hobject_t &obj) {
1168-
return obj.oid.name == SNAPMAPPER_OID;
1169-
}
1170-
11711167
static PG::interruptible_future<ceph::bufferlist> do_pgnls_common(
11721168
const hobject_t& pg_start,
11731169
const hobject_t& pg_end,
@@ -1188,13 +1184,6 @@ static PG::interruptible_future<ceph::bufferlist> do_pgnls_common(
11881184
[&backend, filter, nspace](auto&& ret)
11891185
-> PG::interruptible_future<std::tuple<std::vector<hobject_t>, hobject_t>> {
11901186
auto& [objects, next] = ret;
1191-
auto is_snapmapper = [](const hobject_t &obj) {
1192-
if (is_snapmapper_oid(obj)) {
1193-
return false;
1194-
} else {
1195-
return true;
1196-
}
1197-
};
11981187
auto in_my_namespace = [&nspace](const hobject_t& obj) {
11991188
using crimson::common::local_conf;
12001189
if (obj.get_namespace() == local_conf()->osd_hit_set_namespace) {
@@ -1222,8 +1211,7 @@ static PG::interruptible_future<ceph::bufferlist> do_pgnls_common(
12221211
}
12231212
};
12241213

1225-
auto range = objects | boost::adaptors::filtered(is_snapmapper)
1226-
| boost::adaptors::filtered(in_my_namespace)
1214+
auto range = objects | boost::adaptors::filtered(in_my_namespace)
12271215
| boost::adaptors::transformed(to_pglsed);
12281216
logger().debug("do_pgnls_common: finishing the 1st stage of pgls");
12291217
return seastar::when_all_succeed(std::begin(range),
@@ -1356,9 +1344,6 @@ static PG::interruptible_future<ceph::bufferlist> do_pgls_common(
13561344
PG::interruptor::map_reduce(std::move(objects),
13571345
[&backend, filter, nspace](const hobject_t& obj)
13581346
-> PG::interruptible_future<hobject_t>{
1359-
if (is_snapmapper_oid(obj)) {
1360-
return seastar::make_ready_future<hobject_t>();
1361-
}
13621347
if (obj.get_namespace() == nspace) {
13631348
if (filter) {
13641349
return pgls_filter(*filter, backend, obj);

src/crimson/osd/osd_operations/client_request.cc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -324,6 +324,9 @@ ClientRequest::do_process(
324324
return reply_op_error(pg, -ENAMETOOLONG);
325325
} else if (m->get_hobj().oid.name.empty()) {
326326
return reply_op_error(pg, -EINVAL);
327+
} else if (m->get_hobj().is_internal_pg_local()) {
328+
// clients are not allowed to write to hobject_t::INTERNAL_PG_LOCAL_NS
329+
return reply_op_error(pg, -EINVAL);
327330
} else if (pg->get_osdmap()->is_blocklisted(
328331
get_foreign_connection().get_peer_addr())) {
329332
DEBUGDPP("{}.{}: {} is blocklisted",

src/crimson/osd/osd_operations/scrub_events.cc

Lines changed: 38 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
// vim: ts=8 sw=2 smarttab
33

44
#include "crimson/common/log.h"
5+
#include "crimson/common/coroutine.h"
56
#include "crimson/osd/pg.h"
67
#include "crimson/osd/osd_connection_priv.h"
78
#include "messages/MOSDRepScrubMap.h"
@@ -88,24 +89,17 @@ ScrubFindRange::ifut<> ScrubFindRange::run(PG &pg)
8889
{
8990
LOG_PREFIX(ScrubFindRange::run);
9091
using crimson::common::local_conf;
91-
return interruptor::make_interruptible(
92-
pg.shard_services.get_store().list_objects(
93-
pg.get_collection_ref(),
94-
ghobject_t(begin, ghobject_t::NO_GEN, pg.get_pgid().shard),
95-
ghobject_t::get_max(),
96-
local_conf().get_val<int64_t>("osd_scrub_chunk_max")
97-
)
98-
).then_interruptible([FNAME, this, &pg](auto ret) {
99-
auto &[_, next] = ret;
92+
auto [_, next] = co_await pg.backend->list_objects(
93+
begin,
94+
local_conf().get_val<int64_t>("osd_scrub_chunk_max"));
10095

101-
// We rely on seeing an entire set of snapshots in a single chunk
102-
auto end = next.hobj.get_max_object_boundary();
96+
// We rely on seeing an entire set of snapshots in a single chunk
97+
auto end = next.get_max_object_boundary();
10398

104-
DEBUGDPP("got next.hobj: {}, returning begin, end: {}, {}",
105-
pg, next.hobj, begin, end);
106-
pg.scrubber.machine.process_event(
107-
scrub::ScrubContext::request_range_complete_t{begin, end});
108-
});
99+
DEBUGDPP("got next: {}, returning begin, end: {}, {}",
100+
pg, next, begin, end);
101+
pg.scrubber.machine.process_event(
102+
scrub::ScrubContext::request_range_complete_t{begin, end});
109103
}
110104

111105
template class ScrubAsyncOpT<ScrubFindRange>;
@@ -151,50 +145,36 @@ ScrubScan::ifut<> ScrubScan::run(PG &pg)
151145
ret.valid_through = pg.get_info().last_update;
152146

153147
DEBUGDPP("begin: {}, end: {}", pg, begin, end);
154-
return interruptor::make_interruptible(
155-
pg.shard_services.get_store().list_objects(
156-
pg.get_collection_ref(),
157-
ghobject_t(begin, ghobject_t::NO_GEN, pg.get_pgid().shard),
158-
ghobject_t(end, ghobject_t::NO_GEN, pg.get_pgid().shard),
159-
std::numeric_limits<uint64_t>::max())
160-
).then_interruptible([FNAME, this, &pg](auto &&result) {
161-
DEBUGDPP("listed {} objects", pg, std::get<0>(result).size());
162-
return seastar::do_with(
163-
std::move(std::get<0>(result)),
164-
[this, &pg](auto &objects) {
165-
return interruptor::do_for_each(
166-
objects,
167-
[this, &pg](auto &obj) {
168-
if (obj.is_pgmeta() || obj.hobj.is_temp()) {
169-
return interruptor::now();
170-
} else {
171-
return scan_object(pg, obj);
172-
}
173-
});
174-
});
175-
}).then_interruptible([FNAME, this, &pg] {
176-
if (local) {
177-
DEBUGDPP("complete, submitting local event", pg);
178-
pg.scrubber.handle_event(
179-
scrub::ScrubContext::scan_range_complete_t(
180-
pg.get_pg_whoami(),
181-
std::move(ret)));
182-
return seastar::now();
183-
} else {
184-
DEBUGDPP("complete, sending response to primary", pg);
185-
auto m = crimson::make_message<MOSDRepScrubMap>(
186-
spg_t(pg.get_pgid().pgid, pg.get_primary().shard),
187-
pg.get_osdmap_epoch(),
188-
pg.get_pg_whoami());
189-
encode(ret, m->get_data());
190-
pg.scrubber.handle_event(
191-
scrub::ScrubContext::generate_and_submit_chunk_result_complete_t{});
192-
return pg.shard_services.send_to_osd(
148+
auto [objects, _] = co_await pg.backend->list_objects(begin, end);
149+
150+
DEBUGDPP("listed {} objects", pg, objects);
151+
for (const auto &object: objects) {
152+
co_await scan_object(
153+
pg,
154+
ghobject_t(object, ghobject_t::NO_GEN, pg.get_pgid().shard));
155+
}
156+
157+
if (local) {
158+
DEBUGDPP("complete, submitting local event", pg);
159+
pg.scrubber.handle_event(
160+
scrub::ScrubContext::scan_range_complete_t(
161+
pg.get_pg_whoami(),
162+
std::move(ret)));
163+
} else {
164+
DEBUGDPP("complete, sending response to primary", pg);
165+
auto m = crimson::make_message<MOSDRepScrubMap>(
166+
spg_t(pg.get_pgid().pgid, pg.get_primary().shard),
167+
pg.get_osdmap_epoch(),
168+
pg.get_pg_whoami());
169+
encode(ret, m->get_data());
170+
pg.scrubber.handle_event(
171+
scrub::ScrubContext::generate_and_submit_chunk_result_complete_t{});
172+
co_await interruptor::make_interruptible(
173+
pg.shard_services.send_to_osd(
193174
pg.get_primary().osd,
194175
std::move(m),
195-
pg.get_osdmap_epoch());
196-
}
197-
});
176+
pg.get_osdmap_epoch()));
177+
}
198178
}
199179

200180
ScrubScan::ifut<> ScrubScan::scan_object(

src/crimson/osd/pg.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ PG::PG(
136136
osdriver(
137137
&shard_services.get_store(),
138138
coll_ref,
139-
make_snapmapper_oid()),
139+
pgid.make_snapmapper_oid()),
140140
snap_mapper(
141141
this->shard_services.get_cct(),
142142
&osdriver,
@@ -618,10 +618,10 @@ seastar::future<> PG::init(
618618
new_acting_primary, history, pi, t);
619619
assert(coll_ref);
620620
return shard_services.get_store().exists(
621-
get_collection_ref(), make_snapmapper_oid()
621+
get_collection_ref(), pgid.make_snapmapper_oid()
622622
).safe_then([&t, this](bool existed) {
623623
if (!existed) {
624-
t.touch(coll_ref->get_cid(), make_snapmapper_oid());
624+
t.touch(coll_ref->get_cid(), pgid.make_snapmapper_oid());
625625
}
626626
},
627627
::crimson::ct_error::assert_all{"unexpected eio"}

src/crimson/osd/pg.h

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,6 @@
4141
#include "crimson/osd/object_context_loader.h"
4242
#include "crimson/osd/scrub/pg_scrubber.h"
4343

44-
#define SNAPMAPPER_OID "snapmapper"
45-
4644
class MQuery;
4745
class OSDMap;
4846
class PGBackend;
@@ -649,16 +647,6 @@ class PG : public boost::intrusive_ref_counter<
649647
private:
650648
OSDriver osdriver;
651649
SnapMapper snap_mapper;
652-
ghobject_t make_snapmapper_oid() const {
653-
return ghobject_t(hobject_t(
654-
sobject_t(
655-
object_t(SNAPMAPPER_OID),
656-
0),
657-
std::string(),
658-
pgid.ps(),
659-
pgid.pool(),
660-
std::string()));
661-
}
662650
public:
663651
// PeeringListener
664652
void publish_stats_to_osd() final;

src/crimson/osd/pg_backend.cc

Lines changed: 26 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include "common/Checksummer.h"
1818
#include "common/Clock.h"
1919

20+
#include "crimson/common/coroutine.h"
2021
#include "crimson/common/exception.h"
2122
#include "crimson/common/tmap_helpers.h"
2223
#include "crimson/os/futurized_collection.h"
@@ -1080,33 +1081,33 @@ PGBackend::remove(ObjectState& os, ceph::os::Transaction& txn,
10801081
}
10811082

10821083
PGBackend::interruptible_future<std::tuple<std::vector<hobject_t>, hobject_t>>
1083-
PGBackend::list_objects(const hobject_t& start, uint64_t limit) const
1084+
PGBackend::list_objects(
1085+
const hobject_t& start, const hobject_t &end, uint64_t limit) const
10841086
{
10851087
auto gstart = start.is_min() ? ghobject_t{} : ghobject_t{start, 0, shard};
1086-
return interruptor::make_interruptible(store->list_objects(coll,
1087-
gstart,
1088-
ghobject_t::get_max(),
1089-
limit))
1090-
.then_interruptible([](auto ret) {
1091-
auto& [gobjects, next] = ret;
1092-
std::vector<hobject_t> objects;
1093-
boost::copy(gobjects |
1094-
boost::adaptors::filtered([](const ghobject_t& o) {
1095-
if (o.is_pgmeta()) {
1096-
return false;
1097-
} else if (o.hobj.is_temp()) {
1098-
return false;
1099-
} else {
1100-
return o.is_no_gen();
1101-
}
1102-
}) |
1103-
boost::adaptors::transformed([](const ghobject_t& o) {
1104-
return o.hobj;
1105-
}),
1106-
std::back_inserter(objects));
1107-
return seastar::make_ready_future<std::tuple<std::vector<hobject_t>, hobject_t>>(
1108-
std::make_tuple(objects, next.hobj));
1109-
});
1088+
auto gend = end.is_max() ? ghobject_t::get_max() : ghobject_t{end, 0, shard};
1089+
auto [gobjects, next] = co_await interruptor::make_interruptible(
1090+
store->list_objects(coll, gstart, gend, limit));
1091+
1092+
std::vector<hobject_t> objects;
1093+
boost::copy(
1094+
gobjects |
1095+
boost::adaptors::filtered([](const ghobject_t& o) {
1096+
if (o.is_pgmeta()) {
1097+
return false;
1098+
} else if (o.hobj.is_temp()) {
1099+
return false;
1100+
} else if (o.is_internal_pg_local()) {
1101+
return false;
1102+
} else {
1103+
return o.is_no_gen();
1104+
}
1105+
}) |
1106+
boost::adaptors::transformed([](const ghobject_t& o) {
1107+
return o.hobj;
1108+
}),
1109+
std::back_inserter(objects));
1110+
co_return std::make_tuple(objects, next.hobj);
11101111
}
11111112

11121113
PGBackend::setxattr_ierrorator::future<> PGBackend::setxattr(

0 commit comments

Comments
 (0)