Skip to content

Commit 4c4fd3c

Browse files
authored
Merge pull request ceph#64995 from ifed01/wip-ifed-fix-snapdiff-fragment
mds: fix snapdiff result fragmentation Reviewed-by: Venky Shankar <[email protected]>
2 parents fa39f1c + 23397d3 commit 4c4fd3c

File tree

3 files changed

+95
-12
lines changed

3 files changed

+95
-12
lines changed

src/mds/CDir.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -352,6 +352,7 @@ class CDir : public MDSCacheObject, public Counter<CDir> {
352352
dentry_key_map::iterator begin() { return items.begin(); }
353353
dentry_key_map::iterator end() { return items.end(); }
354354
dentry_key_map::iterator lower_bound(dentry_key_t key) { return items.lower_bound(key); }
355+
dentry_key_map::iterator upper_bound(dentry_key_t key) { return items.upper_bound(key); }
355356

356357
unsigned get_num_head_items() const { return num_head_items; }
357358
unsigned get_num_head_null() const { return num_head_null; }

src/mds/Server.cc

Lines changed: 39 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -12550,7 +12550,7 @@ void Server::handle_client_readdir_snapdiff(const MDRequestRef& mdr)
1255012550
offset_hash = (__u32)req->head.args.snapdiff.offset_hash;
1255112551
}
1255212552

12553-
dout(10) << " frag " << fg << " offset '" << offset_str << "'"
12553+
dout(10) << __func__ << " frag " << fg << " offset '" << offset_str << "'"
1255412554
<< " offset_hash " << offset_hash << " flags " << req_flags << dendl;
1255512555

1255612556
// does the frag exist?
@@ -12707,8 +12707,18 @@ void Server::_readdir_diff(
1270712707
std::swap(snapid, snapid_prev);
1270812708
}
1270912709
bool from_the_beginning = !offset_hash && offset_str.empty();
12710-
// skip all dns < dentry_key_t(snapid, offset_str, offset_hash)
12711-
dentry_key_t skip_key(snapid_prev, offset_str.c_str(), offset_hash);
12710+
// skip all dns <= dentry_key_t(*, offset_str, offset_hash)
12711+
dentry_key_t skip_key(CEPH_NOSNAP, offset_str.c_str(), offset_hash);
12712+
12713+
// We need to rollback all the entries with the same name
12714+
// when some entries with this name don't fit into the same fragment.
12715+
// This is caused by the limited ability for offset provisioning between
12716+
// fragments - there is no way to identify specific snapshot for the last entry.
12717+
// The following vars denote the potential rollback position for such a case.
12718+
// Fixes: https://tracker.ceph.com/issues/72518
12719+
string last_name;
12720+
size_t rollback_pos = 0;
12721+
size_t rollback_num = 0;
1271212722

1271312723
bool end = build_snap_diff(
1271412724
mdr,
@@ -12727,7 +12737,16 @@ void Server::_readdir_diff(
1272712737
effective_snapid = exists ? snapid : snapid_prev;
1272812738
name.append(dn_name);
1272912739
if ((int)(dnbl.length() + name.length() + sizeof(__u32) + sizeof(LeaseStat)) > bytes_left) {
12730-
dout(10) << " ran out of room, stopping at " << dnbl.length() << " < " << bytes_left << dendl;
12740+
dout(10) << " ran out of room for name, stopping at " << dnbl.length() << " < " << bytes_left << dendl;
12741+
if (name == last_name) {
12742+
bufferlist keep;
12743+
keep.substr_of(dnbl, 0, rollback_pos);
12744+
dnbl.swap(keep);
12745+
last_name.clear();
12746+
rollback_pos = 0;
12747+
numfiles = rollback_num;
12748+
rollback_num = 0;
12749+
}
1273112750
return false;
1273212751
}
1273312752

@@ -12736,6 +12755,7 @@ void Server::_readdir_diff(
1273612755
unsigned start_len = dnbl.length();
1273712756
dout(10) << "inc dn " << *dn << " as " << name
1273812757
<< std::hex << " hash 0x" << hash << std::dec
12758+
<< " " << effective_snapid
1273912759
<< dendl;
1274012760
encode(name, dnbl);
1274112761
mds->locker->issue_client_lease(dn, in, mdr, now, dnbl);
@@ -12748,11 +12768,24 @@ void Server::_readdir_diff(
1274812768
dout(10) << " ran out of room, stopping at "
1274912769
<< start_len << " < " << bytes_left << dendl;
1275012770
bufferlist keep;
12751-
keep.substr_of(dnbl, 0, start_len);
12771+
12772+
keep.substr_of(dnbl, 0,
12773+
name == last_name ? rollback_pos : start_len);
1275212774
dnbl.swap(keep);
12775+
12776+
last_name.clear();
12777+
rollback_pos = 0;
12778+
numfiles = rollback_num;
12779+
rollback_num = 0;
1275312780
return false;
1275412781
}
1275512782

12783+
// set rollback position
12784+
if (name != last_name) {
12785+
last_name = name;
12786+
rollback_pos = start_len;
12787+
rollback_num = numfiles;
12788+
}
1275612789
// touch dn
1275712790
mdcache->lru.lru_touch(dn);
1275812791
++numfiles;
@@ -12800,7 +12833,7 @@ bool Server::build_snap_diff(
1280012833
return r;
1280112834
};
1280212835

12803-
auto it = !skip_key ? dir->begin() : dir->lower_bound(*skip_key);
12836+
auto it = !skip_key ? dir->begin() : dir->upper_bound(*skip_key);
1280412837

1280512838
while(it != dir->end()) {
1280612839
CDentry* dn = it->second;
@@ -12821,11 +12854,6 @@ bool Server::build_snap_diff(
1282112854
dout(20) << __func__ << " not in range, skipping" << dendl;
1282212855
continue;
1282312856
}
12824-
if (skip_key) {
12825-
skip_key->snapid = dn->last;
12826-
if (!(*skip_key < dn->key()))
12827-
continue;
12828-
}
1282912857

1283012858
CInode* in = dnl->get_inode();
1283112859
if (in && in->ino() == CEPH_INO_CEPH)
@@ -12864,7 +12892,6 @@ bool Server::build_snap_diff(
1286412892
ceph_assert(in);
1286512893

1286612894
utime_t mtime = in->get_inode()->mtime;
12867-
1286812895
if (in->is_dir()) {
1286912896

1287012897
// we need to maintain the order of entries (determined by their name hashes)

src/test/libcephfs/snapdiff.cc

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
#include <dirent.h>
3535
#include <optional>
3636
#include <random>
37+
#include <string.h>
3738

3839
using namespace std;
3940
class TestMount {
@@ -2137,3 +2138,57 @@ TEST(LibCephFS, SnapDiffChangedBlockWithCustomObjectSize)
21372138
ASSERT_EQ(0, test_mount.rmsnap("snap1"));
21382139
ASSERT_EQ(0, test_mount.rmsnap("snap2"));
21392140
}
2141+
2142+
TEST(LibCephFS, SnapDiffDeletionRecreation) {
2143+
int bulk_count = 1 << 15;
2144+
TestMount test_mount("/SnapdiffDeletionRecreation");
2145+
2146+
ASSERT_EQ(0, test_mount.mkdir("bulk"));
2147+
ASSERT_EQ(0, test_mount.mkdir("test"));
2148+
2149+
int i;
2150+
char path[PATH_MAX];
2151+
for (i = 0; i < bulk_count; i++) {
2152+
snprintf(path, PATH_MAX - 1, "bulk/%d", i);
2153+
test_mount.write_full(path, path);
2154+
}
2155+
ASSERT_EQ(0, test_mount.mksnap("snap1"));
2156+
// creation of snap1 done
2157+
2158+
for (i = 0; i < bulk_count / 2; ++i) {
2159+
snprintf(path, PATH_MAX - 1, "bulk/%d", i);
2160+
ASSERT_EQ(0, test_mount.unlink(path));
2161+
if (i >= bulk_count / 4) {
2162+
ASSERT_EQ(0, test_mount.mkdir(path));
2163+
}
2164+
}
2165+
for (i = bulk_count; i < 2 * bulk_count; ++i) {
2166+
snprintf(path, PATH_MAX - 1, "bulk/%d", i);
2167+
test_mount.write_full(path, path);
2168+
}
2169+
ASSERT_EQ(0, test_mount.mksnap("snap2"));
2170+
2171+
uint64_t snapid1;
2172+
uint64_t snapid2;
2173+
2174+
// learn snapshot ids and do basic verification
2175+
ASSERT_EQ(0, test_mount.get_snapid("snap1", &snapid1));
2176+
ASSERT_EQ(0, test_mount.get_snapid("snap2", &snapid2));
2177+
2178+
vector <pair <string, uint64_t>> expected;
2179+
for (int i = 0; i < 2 * bulk_count; ++i) {
2180+
string dir = to_string(i);
2181+
if (i < bulk_count / 4) {
2182+
expected.push_back({dir, snapid1});
2183+
} else if (i >= bulk_count / 4 && i < bulk_count / 2) {
2184+
expected.push_back({dir, snapid1});
2185+
expected.push_back({dir, snapid2});
2186+
} else if (i >= bulk_count) {
2187+
expected.push_back({dir, snapid2});
2188+
}
2189+
}
2190+
test_mount.verify_snap_diff(expected, "bulk", "snap1", "snap2");
2191+
2192+
test_mount.rmsnap("snap1");
2193+
test_mount.rmsnap("snap2");
2194+
}

0 commit comments

Comments
 (0)