@@ -14470,3 +14470,202 @@ void MDCache::upkeep_main(void)
1447014470 upkeep_cvar.wait_for (lock, interval);
1447114471 }
1447214472}
14473+
14474+ struct C_ListSnapsAggregator : public MDSIOContext {
14475+ C_ListSnapsAggregator (MDSRank *mds, CInode *in1, CInode *in2, BlockDiff *block_diff,
14476+ Context *on_finish)
14477+ : MDSIOContext(mds),
14478+ in1 (in1),
14479+ in2(in2),
14480+ block_diff(block_diff),
14481+ on_finish(on_finish) {
14482+ }
14483+
14484+ void finish (int r) override {
14485+ mds->mdcache ->aggregate_snap_sets (snap_set_context, in1, in2,
14486+ block_diff, on_finish);
14487+ }
14488+
14489+ virtual void print (std::ostream& os) const {
14490+ os << " listsnaps" ;
14491+ }
14492+
14493+ void add_snap_set_context (std::unique_ptr<MDCache::SnapSetContext> ssc) {
14494+ snap_set_context.push_back (std::move (ssc));
14495+ }
14496+
14497+ CInode *in1;
14498+ CInode *in2;
14499+ BlockDiff *block_diff;
14500+ Context *on_finish;
14501+ std::vector<std::unique_ptr<MDCache::SnapSetContext>> snap_set_context;
14502+ };
14503+
14504+ void MDCache::file_blockdiff (CInode *in1, CInode *in2, BlockDiff *block_diff, uint64_t max_objects,
14505+ MDSContext *ctx) {
14506+ ceph_assert (in1->last <= in2->last );
14507+
14508+ // I think this is not required since the MDS disallows setting
14509+ // layout when truncate_seq > 1.
14510+ if (in1->get_inode ()->layout != in2->get_inode ()->layout ) {
14511+ dout (20 ) << __func__ << " : snaps have different layout: " << in1->get_inode ()->layout
14512+ << " vs " << in2->get_inode ()->layout << dendl;
14513+ block_diff->blocks .union_insert (0 , in2->get_inode ()->size );
14514+ ctx->complete (0 );
14515+ return ;
14516+ }
14517+
14518+ uint64_t scan_idx = block_diff->scan_idx ;
14519+ uint64_t num_objects1 = Striper::get_num_objects (in1->get_inode ()->layout ,
14520+ in1->get_inode ()->size );
14521+ uint64_t num_objects2 = Striper::get_num_objects (in2->get_inode ()->layout ,
14522+ in2->get_inode ()->size );
14523+ uint64_t num_objects_pending1 = num_objects1 - scan_idx;
14524+ uint64_t num_objects_pending2 = num_objects2 - scan_idx;
14525+
14526+ uint64_t scans = std::min (
14527+ std::min (num_objects_pending1, num_objects_pending2),
14528+ std::min ((uint64_t )(g_conf ().get_val <uint64_t >(" mds_file_blockdiff_max_concurrent_object_scans" )),
14529+ max_objects));
14530+
14531+ dout (20 ) << __func__ << " : scanning " << scans << " objects" << dendl;
14532+ if (scans == 0 ) {
14533+ // we ran out of objects to scan - figure which ones
14534+ if (num_objects_pending1 == 0 && num_objects_pending2 == 0 ) {
14535+ // easy - both snaps have same number of objects
14536+ dout (20 ) << __func__ << " : equal extent" << dendl;
14537+ ctx->complete (0 );
14538+ } else {
14539+ if (num_objects_pending1 == 0 ) {
14540+ // first snapshot has lesser number of objects - return
14541+ // an extent covering EOF.
14542+ dout (20 ) << __func__ << " : EOF extent" << dendl;
14543+ uint64_t offset = Striper::get_file_offset (g_ceph_context, &(in2->get_inode ()->layout ),
14544+ scan_idx, 0 );
14545+ block_diff->blocks .union_insert (offset, in2->get_inode ()->size - offset);
14546+ ctx->complete (0 );
14547+ } else {
14548+ // num_objects_pending2 == 0
14549+ dout (20 ) << __func__ << " : truncated extent" << dendl;
14550+ ctx->complete (0 );
14551+ }
14552+ }
14553+
14554+ return ;
14555+ }
14556+
14557+ C_ListSnapsAggregator *on_finish = new C_ListSnapsAggregator (mds, in1, in2, block_diff, ctx);
14558+ MDSGatherBuilder gather_ctx (g_ceph_context, on_finish);
14559+
14560+ while (scans > 0 ) {
14561+ ObjectOperation op;
14562+ std::unique_ptr<SnapSetContext> ssc (new SnapSetContext ());
14563+ op.list_snaps (&ssc->snaps , &ssc->r );
14564+ ssc->objectid = scan_idx;
14565+
14566+ mds->objecter ->read (file_object_t (in1->ino (), scan_idx),
14567+ OSDMap::file_to_object_locator (in2->get_inode ()->layout ),
14568+ op, LIBRADOS_SNAP_DIR, NULL , 0 , gather_ctx.new_sub ());
14569+ on_finish->add_snap_set_context (std::move (ssc));
14570+ ++scan_idx;
14571+ --scans;
14572+ }
14573+
14574+ gather_ctx.activate ();
14575+ }
14576+
14577+ void MDCache::aggregate_snap_sets (const std::vector<std::unique_ptr<SnapSetContext>> &snap_set_ctx,
14578+ CInode *in1, CInode *in2, BlockDiff *block_diff, Context *on_finish) {
14579+ dout (20 ) << __func__ << dendl;
14580+
14581+ // always signal to the client to request again since request
14582+ // completion is signalled in file_blockdiff().
14583+ int r = 1 ;
14584+ snapid_t snapid1 = in1->last ;
14585+ snapid_t snapid2 = in2->last ;
14586+ uint64_t scans = snap_set_ctx.size ();
14587+
14588+ interval_set<uint64_t > extents;
14589+ for (auto &snap_set : snap_set_ctx) {
14590+ dout (20 ) << __func__ << " : objectid=" << snap_set->objectid << " , r=" << snap_set->r
14591+ << dendl;
14592+ if (snap_set->r != 0 && snap_set->r != -ENOENT) {
14593+ derr << " : failed to get snap set for objectid=" << snap_set->objectid
14594+ << " , r=" << snap_set->r << dendl;
14595+ r = snap_set->r ;
14596+ break ;
14597+ }
14598+
14599+ if (snap_set->r == 0 ) {
14600+ auto &clones = snap_set->snaps .clones ;
14601+ auto it1 = std::find_if (clones.begin (), clones.end (),
14602+ [snapid1](const librados::clone_info_t &clone)
14603+ {
14604+ return snapid1 == clone.cloneid ||
14605+ (std::find (clone.snaps .begin (), clone.snaps .end (), snapid1) != clone.snaps .end ());
14606+ });
14607+ // point to "head" if not found
14608+ if (it1 == clones.end ()) {
14609+ it1 = std::prev (it1);
14610+ }
14611+ auto it2 = std::find_if (clones.begin (), clones.end (),
14612+ [snapid2](const librados::clone_info_t &clone)
14613+ {
14614+ return snapid2 == clone.cloneid ||
14615+ (std::find (clone.snaps .begin (), clone.snaps .end (), snapid2) != clone.snaps .end ());
14616+ });
14617+ // point to "head" if not found
14618+ if (it2 == clones.end ()) {
14619+ it2 = std::prev (it2);
14620+ }
14621+
14622+ if (it1 == it2) {
14623+ dout (10 ) << __func__ << " : both snaps in same clone" << dendl;
14624+ continue ;
14625+ }
14626+
14627+ interval_set<uint64_t > extent;
14628+ uint64_t offset = Striper::get_file_offset (g_ceph_context, &(in2->get_inode ()->layout ),
14629+ snap_set->objectid , 0 );
14630+
14631+ for (auto hops = std::distance (it1, it2); hops > 0 ; --hops) {
14632+ dout (20 ) << __func__ << " : [cloneid: " << it1->cloneid << " snaps: " << it1->snaps
14633+ << " overlap: " << it1->overlap << " ]" << dendl;
14634+ auto next_it = it1 + 1 ;
14635+ dout (20 ) << __func__ << " : [next cloneid: " << next_it->cloneid << " snaps: " << next_it->snaps
14636+ << " overlap: " << next_it->overlap << " ]" << dendl;
14637+ auto sz = next_it->size ;
14638+ if (sz == 0 ) {
14639+ // this object is a hole in the file.
14640+ // TODO: report holes in blockdiff strucuter. that way,
14641+ // caller can optimize and punch holes rather than writing
14642+ // zeros.
14643+ dout (10 ) << __func__ << " : hole: [" << offset << " ~" << it1->size << " ]" << dendl;
14644+ dout (10 ) << __func__ << " : adding whole extent - reader will read zeros" << dendl;
14645+ sz = it1->size ;
14646+ }
14647+
14648+ extent.clear ();
14649+ extent.union_insert (offset, sz);
14650+ for (auto &overlap_region : it1->overlap ) {
14651+ uint64_t overlap_offset = Striper::get_file_offset (g_ceph_context, &(in2->get_inode ()->layout ),
14652+ snap_set->objectid , overlap_region.first );
14653+ extent.erase (overlap_offset, overlap_region.second );
14654+ }
14655+
14656+ dout (20 ) << __func__ << " : (non overlapping) extent=" << extent << dendl;
14657+ extents.union_of (extent);
14658+ dout (20 ) << __func__ << " : (modified) extents=" << extents << dendl;
14659+ ++it1;
14660+ }
14661+ }
14662+ }
14663+
14664+ block_diff->rval = r;
14665+ if (r >= 0 ) {
14666+ r = 0 ;
14667+ block_diff->scan_idx += scans;
14668+ block_diff->blocks = extents;
14669+ }
14670+ on_finish->complete (r);
14671+ }
0 commit comments