|
| 1 | +#include "crimson/osd/object_metadata_helper.h" |
| 2 | + |
| 3 | +namespace { |
| 4 | + seastar::logger& logger() { |
| 5 | + return crimson::get_logger(ceph_subsys_osd); |
| 6 | + } |
| 7 | +} |
| 8 | + |
| 9 | +namespace crimson::osd { |
| 10 | + |
| 11 | +/* |
| 12 | + * The clone object content may already overlap with the |
| 13 | + * next older and the next newest clone obejct. |
| 14 | + * Use the existing (next) clones object overlaps instead |
| 15 | + * of pushing the whole clone object to the replica. |
| 16 | + */ |
| 17 | + |
| 18 | +subsets_t calc_clone_subsets( |
| 19 | + SnapSet& snapset, const hobject_t& soid, |
| 20 | + const pg_missing_t& missing, |
| 21 | + const hobject_t &last_backfill) |
| 22 | +{ |
| 23 | + subsets_t subsets; |
| 24 | + logger().debug("{}: {} clone_overlap {} ", |
| 25 | + __func__, soid, snapset.clone_overlap); |
| 26 | + |
| 27 | + uint64_t size = snapset.clone_size[soid.snap]; |
| 28 | + if (size) { |
| 29 | + subsets.data_subset.insert(0, size); |
| 30 | + } |
| 31 | + |
| 32 | + // TODO: make sure CEPH_FEATURE_OSD_CACHEPOOL is not supported in Crimson |
| 33 | + // Skips clone subsets if caching was enabled (allow_incomplete_clones). |
| 34 | + |
| 35 | +#ifndef UNIT_TESTS_BUILT |
| 36 | + if (!crimson::common::local_conf()->osd_recover_clone_overlap) { |
| 37 | + logger().debug("{} {} -- osd_recover_clone_overlap is disabled", |
| 38 | + __func__, soid); ; |
| 39 | + return subsets; |
| 40 | + } |
| 41 | +#endif |
| 42 | + |
| 43 | + if (snapset.clones.empty()) { |
| 44 | + logger().debug("{} {} -- no clones", __func__, soid); |
| 45 | + return subsets; |
| 46 | + } |
| 47 | + |
| 48 | + auto soid_snap_iter = find(snapset.clones.begin(), |
| 49 | + snapset.clones.end(), |
| 50 | + soid.snap); |
| 51 | + assert(soid_snap_iter != snapset.clones.end()); |
| 52 | + auto soid_snap_index = soid_snap_iter - snapset.clones.begin(); |
| 53 | + |
| 54 | + // any overlap with next older clone? |
| 55 | + interval_set<uint64_t> cloning; |
| 56 | + interval_set<uint64_t> prev; |
| 57 | + if (size) { |
| 58 | + prev.insert(0, size); |
| 59 | + } |
| 60 | + for (int i = soid_snap_index - 1; i >= 0; i--) { |
| 61 | + hobject_t clone = soid; |
| 62 | + clone.snap = snapset.clones[i]; |
| 63 | + // clone_overlap of i holds the overlap between i to i+1 |
| 64 | + prev.intersection_of(snapset.clone_overlap[snapset.clones[i]]); |
| 65 | + if (!missing.is_missing(clone) && clone < last_backfill) { |
| 66 | + logger().debug("{} {} has prev {} overlap {}", |
| 67 | + __func__, soid, clone, prev); |
| 68 | + subsets.clone_subsets[clone] = prev; |
| 69 | + cloning.union_of(prev); |
| 70 | + break; |
| 71 | + } |
| 72 | + logger().debug("{} {} does not have prev {} overlap {}", |
| 73 | + __func__, soid, clone, prev); |
| 74 | + } |
| 75 | + |
| 76 | + // overlap with next newest? |
| 77 | + interval_set<uint64_t> next; |
| 78 | + if (size) { |
| 79 | + next.insert(0, size); |
| 80 | + } |
| 81 | + for (unsigned i = soid_snap_index+1; |
| 82 | + i < snapset.clones.size(); i++) { |
| 83 | + hobject_t clone = soid; |
| 84 | + clone.snap = snapset.clones[i]; |
| 85 | + // clone_overlap of i-1 holds the overlap between i-1 to i |
| 86 | + next.intersection_of(snapset.clone_overlap[snapset.clones[i - 1]]); |
| 87 | + if (!missing.is_missing(clone) && clone < last_backfill) { |
| 88 | + logger().debug("{} {} has next {} overlap {}", |
| 89 | + __func__, soid, clone, next); |
| 90 | + subsets.clone_subsets[clone] = next; |
| 91 | + cloning.union_of(next); |
| 92 | + break; |
| 93 | + } |
| 94 | + logger().debug("{} {} does not have next {} overlap {}", |
| 95 | + __func__, soid, clone, next); |
| 96 | + } |
| 97 | + |
| 98 | +#ifndef UNIT_TESTS_BUILT |
| 99 | + if (cloning.num_intervals() > |
| 100 | + crimson::common::local_conf().get_val<uint64_t> |
| 101 | + ("osd_recover_clone_overlap_limit")) { |
| 102 | + logger().debug("skipping clone, too many holes"); |
| 103 | + subsets.clone_subsets.clear(); |
| 104 | + cloning.clear(); |
| 105 | + } |
| 106 | +#endif |
| 107 | + |
| 108 | + // what's left for us to push? |
| 109 | + subsets.data_subset.subtract(cloning); |
| 110 | + logger().debug("{} {} data_subsets {}" |
| 111 | + "clone_subsets {}", |
| 112 | + __func__, soid, subsets.data_subset, subsets.clone_subsets); |
| 113 | + return subsets; |
| 114 | +} |
| 115 | + |
| 116 | +/* |
| 117 | + * Instead of pushing the whole object to the replica, |
| 118 | + * make use of: |
| 119 | + * 1) ObjectCleanRegion - push modified content only. |
| 120 | + * - See: dev/osd_internals/partial_object_recovery |
| 121 | + * 2) The modified content may already overlap with the |
| 122 | + * next older clone obejct. Use the existing clone |
| 123 | + * object overlap as well. |
| 124 | + */ |
| 125 | + |
| 126 | +subsets_t calc_head_subsets( |
| 127 | + uint64_t obj_size, |
| 128 | + SnapSet& snapset, |
| 129 | + const hobject_t& head, |
| 130 | + const pg_missing_t& missing, |
| 131 | + const hobject_t &last_backfill) |
| 132 | +{ |
| 133 | + logger().debug("{}: {} clone_overlap {} ", |
| 134 | + __func__, head, snapset.clone_overlap); |
| 135 | + |
| 136 | + subsets_t subsets; |
| 137 | + |
| 138 | +// 1) Calculate modified content only |
| 139 | + if (obj_size) { |
| 140 | + subsets.data_subset.insert(0, obj_size); |
| 141 | + } |
| 142 | + assert(missing.get_items().contains(head)); |
| 143 | + const pg_missing_item missing_item = missing.get_items().at(head); |
| 144 | + // let data_subset store only the modified content of the object. |
| 145 | + subsets.data_subset.intersection_of(missing_item.clean_regions.get_dirty_regions()); |
| 146 | + logger().debug("{} {} data_subset {}", |
| 147 | + __func__, head, subsets.data_subset); |
| 148 | + |
| 149 | + // TODO: make sure CEPH_FEATURE_OSD_CACHEPOOL is not supported in Crimson |
| 150 | + // Skips clone subsets if caching was enabled (allow_incomplete_clones). |
| 151 | + |
| 152 | +#ifndef UNIT_TESTS_BUILT |
| 153 | + if (!crimson::common::local_conf()->osd_recover_clone_overlap) { |
| 154 | + logger().debug("{} {} -- osd_recover_clone_overlap is disabled", |
| 155 | + __func__, head); |
| 156 | + return subsets; |
| 157 | + } |
| 158 | +#endif |
| 159 | + |
| 160 | + if (snapset.clones.empty()) { |
| 161 | + logger().debug("{} {} -- no clones", __func__, head); |
| 162 | + return subsets; |
| 163 | + } |
| 164 | + |
| 165 | + // 2) Find any overlap with next older clone |
| 166 | + interval_set<uint64_t> cloning; |
| 167 | + interval_set<uint64_t> prev; |
| 168 | + hobject_t clone = head; |
| 169 | + if (obj_size) { |
| 170 | + prev.insert(0, obj_size); |
| 171 | + } |
| 172 | + for (int i = snapset.clones.size()-1; i >= 0; i--) { |
| 173 | + clone.snap = snapset.clones[i]; |
| 174 | + // let prev store only the overlap with clone i |
| 175 | + prev.intersection_of(snapset.clone_overlap[snapset.clones[i]]); |
| 176 | + if (!missing.is_missing(clone) && clone < last_backfill) { |
| 177 | + logger().debug("{} {} has prev {} overlap {}", |
| 178 | + __func__, head, clone, prev); |
| 179 | + cloning = prev; |
| 180 | + break; |
| 181 | + } |
| 182 | + logger().debug("{} {} does not have prev {} overlap {}", |
| 183 | + __func__, head, clone, prev); |
| 184 | + } |
| 185 | + |
| 186 | + // let cloning store only the overlap with data_subset |
| 187 | + cloning.intersection_of(subsets.data_subset); |
| 188 | + if (cloning.empty()) { |
| 189 | + logger().debug("skipping clone, nothing needs to clone"); |
| 190 | + return subsets; |
| 191 | + } |
| 192 | + |
| 193 | +#ifndef UNIT_TESTS_BUILT |
| 194 | + if (cloning.num_intervals() > |
| 195 | + crimson::common::local_conf().get_val<uint64_t> |
| 196 | + ("osd_recover_clone_overlap_limit")) { |
| 197 | + logger().debug("skipping clone, too many holes"); |
| 198 | + subsets.clone_subsets.clear(); |
| 199 | + cloning.clear(); |
| 200 | + } |
| 201 | +#endif |
| 202 | + |
| 203 | + // what's left for us to push? |
| 204 | + subsets.clone_subsets[clone] = cloning; |
| 205 | + subsets.data_subset.subtract(cloning); |
| 206 | + logger().debug("{} {} data_subsets {}" |
| 207 | + "clone_subsets {}", |
| 208 | + __func__, head, subsets.data_subset, subsets.clone_subsets); |
| 209 | + |
| 210 | + return subsets; |
| 211 | +} |
| 212 | + |
| 213 | +void set_subsets( |
| 214 | + const subsets_t& subsets, |
| 215 | + ObjectRecoveryInfo& recovery_info) |
| 216 | +{ |
| 217 | + recovery_info.copy_subset = subsets.data_subset; |
| 218 | + recovery_info.clone_subset = subsets.clone_subsets; |
| 219 | +} |
| 220 | + |
| 221 | + |
| 222 | +} |
0 commit comments