Skip to content

Commit d451898

Browse files
authored
Merge pull request ceph#49594 from Matan-B/wip-matanb-crimson-calc-subsets
crimson: Support Partial Object Recovery Reviewed-by: Samuel Just <[email protected]>
2 parents 88a0e91 + b5df21f commit d451898

File tree

11 files changed

+769
-59
lines changed

11 files changed

+769
-59
lines changed

src/crimson/osd/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ add_executable(crimson-osd
1515
pg_shard_manager.cc
1616
object_context.cc
1717
object_context_loader.cc
18+
object_metadata_helper.cc
1819
ops_executer.cc
1920
osd_operation.cc
2021
osd_operations/client_request.cc
Lines changed: 222 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,222 @@
1+
#include "crimson/osd/object_metadata_helper.h"
2+
3+
namespace {
4+
seastar::logger& logger() {
5+
return crimson::get_logger(ceph_subsys_osd);
6+
}
7+
}
8+
9+
namespace crimson::osd {
10+
11+
/*
12+
* The clone object content may already overlap with the
13+
* next older and the next newest clone obejct.
14+
* Use the existing (next) clones object overlaps instead
15+
* of pushing the whole clone object to the replica.
16+
*/
17+
18+
subsets_t calc_clone_subsets(
19+
SnapSet& snapset, const hobject_t& soid,
20+
const pg_missing_t& missing,
21+
const hobject_t &last_backfill)
22+
{
23+
subsets_t subsets;
24+
logger().debug("{}: {} clone_overlap {} ",
25+
__func__, soid, snapset.clone_overlap);
26+
27+
uint64_t size = snapset.clone_size[soid.snap];
28+
if (size) {
29+
subsets.data_subset.insert(0, size);
30+
}
31+
32+
// TODO: make sure CEPH_FEATURE_OSD_CACHEPOOL is not supported in Crimson
33+
// Skips clone subsets if caching was enabled (allow_incomplete_clones).
34+
35+
#ifndef UNIT_TESTS_BUILT
36+
if (!crimson::common::local_conf()->osd_recover_clone_overlap) {
37+
logger().debug("{} {} -- osd_recover_clone_overlap is disabled",
38+
__func__, soid); ;
39+
return subsets;
40+
}
41+
#endif
42+
43+
if (snapset.clones.empty()) {
44+
logger().debug("{} {} -- no clones", __func__, soid);
45+
return subsets;
46+
}
47+
48+
auto soid_snap_iter = find(snapset.clones.begin(),
49+
snapset.clones.end(),
50+
soid.snap);
51+
assert(soid_snap_iter != snapset.clones.end());
52+
auto soid_snap_index = soid_snap_iter - snapset.clones.begin();
53+
54+
// any overlap with next older clone?
55+
interval_set<uint64_t> cloning;
56+
interval_set<uint64_t> prev;
57+
if (size) {
58+
prev.insert(0, size);
59+
}
60+
for (int i = soid_snap_index - 1; i >= 0; i--) {
61+
hobject_t clone = soid;
62+
clone.snap = snapset.clones[i];
63+
// clone_overlap of i holds the overlap between i to i+1
64+
prev.intersection_of(snapset.clone_overlap[snapset.clones[i]]);
65+
if (!missing.is_missing(clone) && clone < last_backfill) {
66+
logger().debug("{} {} has prev {} overlap {}",
67+
__func__, soid, clone, prev);
68+
subsets.clone_subsets[clone] = prev;
69+
cloning.union_of(prev);
70+
break;
71+
}
72+
logger().debug("{} {} does not have prev {} overlap {}",
73+
__func__, soid, clone, prev);
74+
}
75+
76+
// overlap with next newest?
77+
interval_set<uint64_t> next;
78+
if (size) {
79+
next.insert(0, size);
80+
}
81+
for (unsigned i = soid_snap_index+1;
82+
i < snapset.clones.size(); i++) {
83+
hobject_t clone = soid;
84+
clone.snap = snapset.clones[i];
85+
// clone_overlap of i-1 holds the overlap between i-1 to i
86+
next.intersection_of(snapset.clone_overlap[snapset.clones[i - 1]]);
87+
if (!missing.is_missing(clone) && clone < last_backfill) {
88+
logger().debug("{} {} has next {} overlap {}",
89+
__func__, soid, clone, next);
90+
subsets.clone_subsets[clone] = next;
91+
cloning.union_of(next);
92+
break;
93+
}
94+
logger().debug("{} {} does not have next {} overlap {}",
95+
__func__, soid, clone, next);
96+
}
97+
98+
#ifndef UNIT_TESTS_BUILT
99+
if (cloning.num_intervals() >
100+
crimson::common::local_conf().get_val<uint64_t>
101+
("osd_recover_clone_overlap_limit")) {
102+
logger().debug("skipping clone, too many holes");
103+
subsets.clone_subsets.clear();
104+
cloning.clear();
105+
}
106+
#endif
107+
108+
// what's left for us to push?
109+
subsets.data_subset.subtract(cloning);
110+
logger().debug("{} {} data_subsets {}"
111+
"clone_subsets {}",
112+
__func__, soid, subsets.data_subset, subsets.clone_subsets);
113+
return subsets;
114+
}
115+
116+
/*
117+
* Instead of pushing the whole object to the replica,
118+
* make use of:
119+
* 1) ObjectCleanRegion - push modified content only.
120+
* - See: dev/osd_internals/partial_object_recovery
121+
* 2) The modified content may already overlap with the
122+
* next older clone obejct. Use the existing clone
123+
* object overlap as well.
124+
*/
125+
126+
subsets_t calc_head_subsets(
127+
uint64_t obj_size,
128+
SnapSet& snapset,
129+
const hobject_t& head,
130+
const pg_missing_t& missing,
131+
const hobject_t &last_backfill)
132+
{
133+
logger().debug("{}: {} clone_overlap {} ",
134+
__func__, head, snapset.clone_overlap);
135+
136+
subsets_t subsets;
137+
138+
// 1) Calculate modified content only
139+
if (obj_size) {
140+
subsets.data_subset.insert(0, obj_size);
141+
}
142+
assert(missing.get_items().contains(head));
143+
const pg_missing_item missing_item = missing.get_items().at(head);
144+
// let data_subset store only the modified content of the object.
145+
subsets.data_subset.intersection_of(missing_item.clean_regions.get_dirty_regions());
146+
logger().debug("{} {} data_subset {}",
147+
__func__, head, subsets.data_subset);
148+
149+
// TODO: make sure CEPH_FEATURE_OSD_CACHEPOOL is not supported in Crimson
150+
// Skips clone subsets if caching was enabled (allow_incomplete_clones).
151+
152+
#ifndef UNIT_TESTS_BUILT
153+
if (!crimson::common::local_conf()->osd_recover_clone_overlap) {
154+
logger().debug("{} {} -- osd_recover_clone_overlap is disabled",
155+
__func__, head);
156+
return subsets;
157+
}
158+
#endif
159+
160+
if (snapset.clones.empty()) {
161+
logger().debug("{} {} -- no clones", __func__, head);
162+
return subsets;
163+
}
164+
165+
// 2) Find any overlap with next older clone
166+
interval_set<uint64_t> cloning;
167+
interval_set<uint64_t> prev;
168+
hobject_t clone = head;
169+
if (obj_size) {
170+
prev.insert(0, obj_size);
171+
}
172+
for (int i = snapset.clones.size()-1; i >= 0; i--) {
173+
clone.snap = snapset.clones[i];
174+
// let prev store only the overlap with clone i
175+
prev.intersection_of(snapset.clone_overlap[snapset.clones[i]]);
176+
if (!missing.is_missing(clone) && clone < last_backfill) {
177+
logger().debug("{} {} has prev {} overlap {}",
178+
__func__, head, clone, prev);
179+
cloning = prev;
180+
break;
181+
}
182+
logger().debug("{} {} does not have prev {} overlap {}",
183+
__func__, head, clone, prev);
184+
}
185+
186+
// let cloning store only the overlap with data_subset
187+
cloning.intersection_of(subsets.data_subset);
188+
if (cloning.empty()) {
189+
logger().debug("skipping clone, nothing needs to clone");
190+
return subsets;
191+
}
192+
193+
#ifndef UNIT_TESTS_BUILT
194+
if (cloning.num_intervals() >
195+
crimson::common::local_conf().get_val<uint64_t>
196+
("osd_recover_clone_overlap_limit")) {
197+
logger().debug("skipping clone, too many holes");
198+
subsets.clone_subsets.clear();
199+
cloning.clear();
200+
}
201+
#endif
202+
203+
// what's left for us to push?
204+
subsets.clone_subsets[clone] = cloning;
205+
subsets.data_subset.subtract(cloning);
206+
logger().debug("{} {} data_subsets {}"
207+
"clone_subsets {}",
208+
__func__, head, subsets.data_subset, subsets.clone_subsets);
209+
210+
return subsets;
211+
}
212+
213+
void set_subsets(
214+
const subsets_t& subsets,
215+
ObjectRecoveryInfo& recovery_info)
216+
{
217+
recovery_info.copy_subset = subsets.data_subset;
218+
recovery_info.clone_subset = subsets.clone_subsets;
219+
}
220+
221+
222+
}
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
#pragma once
2+
3+
#include "osd/osd_types_fmt.h"
4+
5+
namespace crimson::osd {
6+
struct subsets_t {
7+
interval_set<uint64_t> data_subset;
8+
std::map<hobject_t, interval_set<uint64_t>> clone_subsets;
9+
};
10+
11+
subsets_t calc_clone_subsets(
12+
SnapSet& snapset, const hobject_t& soid,
13+
const pg_missing_t& missing,
14+
const hobject_t &last_backfill);
15+
subsets_t calc_head_subsets(
16+
uint64_t obj_size,
17+
SnapSet& snapset,
18+
const hobject_t& head,
19+
const pg_missing_t& missing,
20+
const hobject_t &last_backfill);
21+
void set_subsets(
22+
const subsets_t& subsets,
23+
ObjectRecoveryInfo& recovery_info);
24+
}

src/crimson/osd/ops_executer.cc

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -674,7 +674,16 @@ OpsExecuter::do_execute_op(OSDOp& osd_op)
674674
whiteout = true;
675675
}
676676
return do_write_op([this, whiteout](auto& backend, auto& os, auto& txn) {
677-
return backend.remove(os, txn, delta_stats, whiteout);
677+
int num_bytes = 0;
678+
// Calculate num_bytes to be removed
679+
if (obc->obs.oi.soid.is_snap()) {
680+
ceph_assert(obc->ssc->snapset.clone_overlap.count(obc->obs.oi.soid.snap));
681+
num_bytes = obc->ssc->snapset.get_clone_bytes(obc->obs.oi.soid.snap);
682+
} else {
683+
num_bytes = obc->obs.oi.size;
684+
}
685+
return backend.remove(os, txn, *osd_op_params,
686+
delta_stats, whiteout, num_bytes);
678687
});
679688
}
680689
case CEPH_OSD_OP_CALL:
@@ -961,7 +970,17 @@ std::unique_ptr<OpsExecuter::CloningContext> OpsExecuter::execute_clone(
961970
osd_op_params->at_version.version++;
962971
encode(cloned_snaps, cloning_ctx->log_entry.snaps);
963972

964-
// TODO: update most recent clone_overlap and usage stats
973+
// update most recent clone_overlap and usage stats
974+
assert(cloning_ctx->new_snapset.clones.size() > 0);
975+
// In classic, we check for evicted clones before
976+
// adjusting the clone_overlap.
977+
// This check is redundant here since `clone_obc`
978+
// was just created (See prepare_clone()).
979+
interval_set<uint64_t> &newest_overlap =
980+
cloning_ctx->new_snapset.clone_overlap.rbegin()->second;
981+
osd_op_params->modified_ranges.intersection_of(newest_overlap);
982+
delta_stats.num_bytes += osd_op_params->modified_ranges.size();
983+
newest_overlap.subtract(osd_op_params->modified_ranges);
965984
return cloning_ctx;
966985
}
967986

src/crimson/osd/osd_operations/osdop_params.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ struct osd_op_params_t {
1717
version_t user_at_version = 0;
1818
bool user_modify = false;
1919
ObjectCleanRegions clean_regions;
20-
20+
interval_set<uint64_t> modified_ranges;
21+
//TODO: Move delta_stats to osd_op_params_t
2122
osd_op_params_t() = default;
2223
};

0 commit comments

Comments
 (0)