@@ -12550,7 +12550,7 @@ void Server::handle_client_readdir_snapdiff(const MDRequestRef& mdr)
1255012550 offset_hash = (__u32)req->head .args .snapdiff .offset_hash ;
1255112551 }
1255212552
12553- dout (10 ) << " frag " << fg << " offset '" << offset_str << " '"
12553+ dout (10 ) << __func__ << " frag " << fg << " offset '" << offset_str << " '"
1255412554 << " offset_hash " << offset_hash << " flags " << req_flags << dendl;
1255512555
1255612556 // does the frag exist?
@@ -12707,8 +12707,18 @@ void Server::_readdir_diff(
1270712707 std::swap (snapid, snapid_prev);
1270812708 }
1270912709 bool from_the_beginning = !offset_hash && offset_str.empty ();
12710- // skip all dns < dentry_key_t(snapid, offset_str, offset_hash)
12711- dentry_key_t skip_key (snapid_prev, offset_str.c_str (), offset_hash);
12710+ // skip all dns <= dentry_key_t(*, offset_str, offset_hash)
12711+ dentry_key_t skip_key (CEPH_NOSNAP, offset_str.c_str (), offset_hash);
12712+
12713+ // We need to rollback all the entries with the same name
12714+ // when some entries with this name don't fit into the same fragment.
12715+ // This is caused by the limited ability for offset provisioning between
12716+ // fragments - there is no way to identify specific snapshot for the last entry.
12717+ // The following vars denote the potential rollback position for such a case.
12718+ // Fixes: https://tracker.ceph.com/issues/72518
12719+ string last_name;
12720+ size_t rollback_pos = 0 ;
12721+ size_t rollback_num = 0 ;
1271212722
1271312723 bool end = build_snap_diff (
1271412724 mdr,
@@ -12727,7 +12737,16 @@ void Server::_readdir_diff(
1272712737 effective_snapid = exists ? snapid : snapid_prev;
1272812738 name.append (dn_name);
1272912739 if ((int )(dnbl.length () + name.length () + sizeof (__u32) + sizeof (LeaseStat)) > bytes_left) {
12730- dout (10 ) << " ran out of room, stopping at " << dnbl.length () << " < " << bytes_left << dendl;
12740+ dout (10 ) << " ran out of room for name, stopping at " << dnbl.length () << " < " << bytes_left << dendl;
12741+ if (name == last_name) {
12742+ bufferlist keep;
12743+ keep.substr_of (dnbl, 0 , rollback_pos);
12744+ dnbl.swap (keep);
12745+ last_name.clear ();
12746+ rollback_pos = 0 ;
12747+ numfiles = rollback_num;
12748+ rollback_num = 0 ;
12749+ }
1273112750 return false ;
1273212751 }
1273312752
@@ -12736,6 +12755,7 @@ void Server::_readdir_diff(
1273612755 unsigned start_len = dnbl.length ();
1273712756 dout (10 ) << " inc dn " << *dn << " as " << name
1273812757 << std::hex << " hash 0x" << hash << std::dec
12758+ << " " << effective_snapid
1273912759 << dendl;
1274012760 encode (name, dnbl);
1274112761 mds->locker ->issue_client_lease (dn, in, mdr, now, dnbl);
@@ -12748,11 +12768,24 @@ void Server::_readdir_diff(
1274812768 dout (10 ) << " ran out of room, stopping at "
1274912769 << start_len << " < " << bytes_left << dendl;
1275012770 bufferlist keep;
12751- keep.substr_of (dnbl, 0 , start_len);
12771+
12772+ keep.substr_of (dnbl, 0 ,
12773+ name == last_name ? rollback_pos : start_len);
1275212774 dnbl.swap (keep);
12775+
12776+ last_name.clear ();
12777+ rollback_pos = 0 ;
12778+ numfiles = rollback_num;
12779+ rollback_num = 0 ;
1275312780 return false ;
1275412781 }
1275512782
12783+ // set rollback position
12784+ if (name != last_name) {
12785+ last_name = name;
12786+ rollback_pos = start_len;
12787+ rollback_num = numfiles;
12788+ }
1275612789 // touch dn
1275712790 mdcache->lru .lru_touch (dn);
1275812791 ++numfiles;
@@ -12800,7 +12833,7 @@ bool Server::build_snap_diff(
1280012833 return r;
1280112834 };
1280212835
12803- auto it = !skip_key ? dir->begin () : dir->lower_bound (*skip_key);
12836+ auto it = !skip_key ? dir->begin () : dir->upper_bound (*skip_key);
1280412837
1280512838 while (it != dir->end ()) {
1280612839 CDentry* dn = it->second ;
@@ -12821,11 +12854,6 @@ bool Server::build_snap_diff(
1282112854 dout (20 ) << __func__ << " not in range, skipping" << dendl;
1282212855 continue ;
1282312856 }
12824- if (skip_key) {
12825- skip_key->snapid = dn->last ;
12826- if (!(*skip_key < dn->key ()))
12827- continue ;
12828- }
1282912857
1283012858 CInode* in = dnl->get_inode ();
1283112859 if (in && in->ino () == CEPH_INO_CEPH)
@@ -12864,7 +12892,6 @@ bool Server::build_snap_diff(
1286412892 ceph_assert (in);
1286512893
1286612894 utime_t mtime = in->get_inode ()->mtime ;
12867-
1286812895 if (in->is_dir ()) {
1286912896
1287012897 // we need to maintain the order of entries (determined by their name hashes)
0 commit comments