Skip to content

Commit cb5a62f

Browse files
authored
Merge pull request ceph#60909 from ifed01/wip-ifed-client-cache-trim-repro
libcephfs/client: pin inode/dentry for an opened directory Reviewed-by: Venky Shankar <[email protected]>
2 parents 655f681 + 4badc83 commit cb5a62f

File tree

4 files changed

+329
-6
lines changed

4 files changed

+329
-6
lines changed

src/client/Client.cc

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10444,8 +10444,17 @@ int Client::_open(Inode *in, int flags, mode_t mode, Fh **fhp,
1044410444

1044510445
// success?
1044610446
if (result >= 0) {
10447-
if (fhp)
10447+
if (fhp) {
1044810448
*fhp = _create_fh(in, flags, cmode, perms);
10449+
// ceph_flags_sys2wire/ceph_flags_to_mode() calls above transforms O_DIRECTORY flag
10450+
// into CEPH_FILE_MODE_PIN mode. Although this mode is used at server size
10451+
// we [ab]use it here to determine whether we should pin inode to prevent from
10452+
// undesired cache eviction.
10453+
if (cmode == CEPH_FILE_MODE_PIN) {
10454+
ldout(cct, 20) << " pinning ll_get() call for " << *in << dendl;
10455+
_ll_get(in);
10456+
}
10457+
}
1044910458
} else {
1045010459
in->put_open_ref(cmode);
1045110460
}
@@ -10502,6 +10511,10 @@ int Client::_close(int fd)
1050210511
Fh *fh = get_filehandle(fd);
1050310512
if (!fh)
1050410513
return -CEPHFS_EBADF;
10514+
if (fh->mode == CEPH_FILE_MODE_PIN) {
10515+
ldout(cct, 20) << " unpinning ll_put() call for " << *(fh->inode.get()) << dendl;
10516+
_ll_put(fh->inode.get(), 1);
10517+
}
1050510518
int err = _release_fh(fh);
1050610519
fd_map.erase(fd);
1050710520
put_fd(fd);

src/common/ceph_fs.cc

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -77,13 +77,8 @@ int ceph_flags_sys2wire(int flags)
7777
ceph_sys2wire(O_EXCL);
7878
ceph_sys2wire(O_TRUNC);
7979

80-
#ifndef _WIN32
8180
ceph_sys2wire(O_DIRECTORY);
8281
ceph_sys2wire(O_NOFOLLOW);
83-
// In some cases, FILE_FLAG_BACKUP_SEMANTICS may be used instead
84-
// of O_DIRECTORY. We may need some workarounds in order to handle
85-
// the fact that those flags are not available on Windows.
86-
#endif
8782

8883
#undef ceph_sys2wire
8984

src/test/libcephfs/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ if(WITH_LIBCEPHFS)
1010
main.cc
1111
deleg.cc
1212
monconfig.cc
13+
client_cache.cc
1314
)
1415
target_link_libraries(ceph_test_libcephfs
1516
ceph-common

src/test/libcephfs/client_cache.cc

Lines changed: 314 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,314 @@
1+
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2+
// vim: ts=8 sw=2 smarttab
3+
/*
4+
* Ceph - scalable distributed file system
5+
*
6+
*
7+
* This is free software; you can redistribute it and/or
8+
* modify it under the terms of the GNU Lesser General Public
9+
* License version 2.1, as published by the Free Software
10+
* Foundation. See file COPYING.
11+
*
12+
*/
13+
14+
#include "gtest/gtest.h"
15+
#include "include/cephfs/libcephfs.h"
16+
#include "include/ceph_assert.h"
17+
#include "include/object.h"
18+
#include "include/stringify.h"
19+
#include <errno.h>
20+
#include <sys/types.h>
21+
#include <sys/stat.h>
22+
#include <string>
23+
#include <dirent.h>
24+
25+
using namespace std;
26+
class TestMount {
27+
public:
28+
ceph_mount_info* cmount = nullptr;
29+
string dir_path;
30+
31+
public:
32+
TestMount(const char* root_dir_name = "") : dir_path(root_dir_name) {
33+
ceph_create(&cmount, NULL);
34+
ceph_conf_read_file(cmount, NULL);
35+
ceph_conf_parse_env(cmount, NULL);
36+
ceph_assert(0 == ceph_mount(cmount, NULL));
37+
}
38+
~TestMount()
39+
{
40+
ceph_shutdown(cmount);
41+
}
42+
43+
int conf_get(const char *option, char *buf, size_t len) {
44+
return ceph_conf_get(cmount, option, buf, len);
45+
}
46+
47+
int conf_set(const char *option, const char *val) {
48+
return ceph_conf_set(cmount, option, val);
49+
}
50+
51+
string make_file_path(const char* relpath) {
52+
string ret = dir_path;
53+
ret += '/';
54+
ret += relpath;
55+
return ret;
56+
}
57+
58+
int write_full(const char* relpath, const string& data)
59+
{
60+
auto file_path = make_file_path(relpath);
61+
int fd = ceph_open(cmount, file_path.c_str(), O_WRONLY | O_CREAT, 0666);
62+
if (fd < 0) {
63+
return -EACCES;
64+
}
65+
int r = ceph_write(cmount, fd, data.c_str(), data.size(), 0);
66+
if (r >= 0) {
67+
ceph_truncate(cmount, file_path.c_str(), data.size());
68+
ceph_fsync(cmount, fd, 0);
69+
}
70+
ceph_close(cmount, fd);
71+
return r;
72+
}
73+
string concat_path(string_view path, string_view name) {
74+
string s(path);
75+
if (s.empty() || s.back() != '/') {
76+
s += '/';
77+
}
78+
s += name;
79+
return s;
80+
}
81+
int unlink(const char* relpath)
82+
{
83+
auto file_path = make_file_path(relpath);
84+
return ceph_unlink(cmount, file_path.c_str());
85+
}
86+
87+
int get_snapid(const char* relpath, uint64_t* res)
88+
{
89+
ceph_assert(res);
90+
snap_info snap_info;
91+
92+
auto snap_path = make_file_path(relpath);
93+
int r = ceph_get_snap_info(cmount, snap_path.c_str(), &snap_info);
94+
if (r >= 0) {
95+
*res = snap_info.id;
96+
r = 0;
97+
}
98+
return r;
99+
}
100+
101+
int for_each_readdir(const char* relpath,
102+
std::function<bool(const dirent*, const struct ceph_statx*)> fn)
103+
{
104+
auto subdir_path = make_file_path(relpath);
105+
struct ceph_dir_result* ls_dir;
106+
int r = ceph_opendir(cmount, subdir_path.c_str(), &ls_dir);
107+
if (r != 0) {
108+
return r;
109+
}
110+
111+
while (1) {
112+
struct dirent result;
113+
struct ceph_statx stx;
114+
115+
r = ceph_readdirplus_r(
116+
cmount, ls_dir, &result, &stx, CEPH_STATX_BASIC_STATS,
117+
0,
118+
NULL);
119+
if (!r)
120+
break;
121+
if (r < 0) {
122+
std::cerr << "ceph_readdirplus_r failed, error: "
123+
<< r << std::endl;
124+
return r;
125+
}
126+
127+
if (strcmp(result.d_name, ".") == 0 ||
128+
strcmp(result.d_name, "..") == 0) {
129+
continue;
130+
}
131+
if (!fn(&result, &stx)) {
132+
r = -EINTR;
133+
break;
134+
}
135+
}
136+
ceph_assert(0 == ceph_closedir(cmount, ls_dir));
137+
return r;
138+
}
139+
140+
int mkdir(const char* relpath)
141+
{
142+
auto path = make_file_path(relpath);
143+
return ceph_mkdir(cmount, path.c_str(), 0777);
144+
}
145+
int rmdir(const char* relpath)
146+
{
147+
auto path = make_file_path(relpath);
148+
return ceph_rmdir(cmount, path.c_str());
149+
}
150+
int purge_dir(const char* relpath0)
151+
{
152+
int r =
153+
for_each_readdir(relpath0,
154+
[&](const dirent* dire, const struct ceph_statx* stx) {
155+
string relpath = concat_path(relpath0, dire->d_name);
156+
157+
if (S_ISDIR(stx->stx_mode)) {
158+
purge_dir(relpath.c_str());
159+
rmdir(relpath.c_str());
160+
} else {
161+
unlink(relpath.c_str());
162+
}
163+
return true;
164+
});
165+
if (r != 0) {
166+
return r;
167+
}
168+
if (*relpath0 != 0) {
169+
r = rmdir(relpath0);
170+
}
171+
return r;
172+
}
173+
174+
ceph_mount_info* get_cmount() {
175+
return cmount;
176+
}
177+
178+
int test_open(const char* relpath)
179+
{
180+
auto subdir_path = make_file_path(relpath);
181+
int r = ceph_open(cmount, subdir_path.c_str(), O_DIRECTORY | O_RDONLY, 0);
182+
if (r < 0) {
183+
std::cout << "test_open error: " << subdir_path.c_str() << ", " << r << std::endl;
184+
return r;
185+
}
186+
return r;
187+
}
188+
int test_close(int fd)
189+
{
190+
ceph_assert(0 == ceph_close(cmount, fd));
191+
return 0;
192+
}
193+
194+
int test_statxat(int fd, const char* entry)
195+
{
196+
int r;
197+
{
198+
struct ceph_statx stx;
199+
r = ceph_statxat(cmount, fd, entry, &stx, CEPH_STATX_MODE | CEPH_STATX_INO, AT_STATX_DONT_SYNC | AT_SYMLINK_NOFOLLOW);
200+
if (r < 0) {
201+
std::cout << "test_statxat " << entry << " returns " << r << std::endl;
202+
} else {
203+
// replace CEPH_NOSNAP with 0 as the former is negative
204+
// and hence might be confused with an error.
205+
r = (uint64_t)stx.stx_dev == CEPH_NOSNAP ? 0 : stx.stx_dev;
206+
std::cout << "stx=" << stx.stx_ino << "." << r << std::endl;
207+
}
208+
}
209+
return r;
210+
}
211+
int test_statx(const char* path)
212+
{
213+
int r;
214+
{
215+
struct ceph_statx stx;
216+
r = ceph_statx(cmount, path, &stx, CEPH_STATX_MODE | CEPH_STATX_INO, AT_STATX_DONT_SYNC | AT_SYMLINK_NOFOLLOW);
217+
if (r < 0) {
218+
std::cout << "test_statx " << path << " returns " << r << std::endl;
219+
} else {
220+
// replace CEPH_NOSNAP with 0 as the former is negative
221+
// and hence might be confused with an error.
222+
r = (uint64_t)stx.stx_dev == CEPH_NOSNAP ? 0 : stx.stx_dev;
223+
std::cout << "stx=" << stx.stx_ino << "." << r << std::endl;
224+
}
225+
}
226+
return r;
227+
}
228+
229+
};
230+
231+
void prepareTrimCacheTest(TestMount& tm, size_t max_bulk)
232+
{
233+
ceph_rmsnap(tm.cmount, "/BrokenStatxAfterTrimeCacheTest", "snap1");
234+
ceph_rmsnap(tm.cmount, "/BrokenStatxAfterTrimeCacheTest", "snap2");
235+
tm.purge_dir("/BrokenStatxAfterTrimeCacheTest");
236+
237+
ASSERT_EQ(0, tm.mkdir("/BrokenStatxAfterTrimeCacheTest"));
238+
ASSERT_EQ(0, tm.mkdir("/BrokenStatxAfterTrimeCacheTest/bulk"));
239+
ASSERT_EQ(0, tm.mkdir("/BrokenStatxAfterTrimeCacheTest/test"));
240+
char path[PATH_MAX];
241+
for (size_t i = 0; i < max_bulk; i++) {
242+
snprintf(path, PATH_MAX - 1, "/BrokenStatxAfterTrimeCacheTest/bulk/%lu", i);
243+
tm.write_full(path, path);
244+
}
245+
246+
tm.write_full("/BrokenStatxAfterTrimeCacheTest/test/file1", "abcdef");
247+
ASSERT_EQ(0, tm.mkdir("/BrokenStatxAfterTrimeCacheTest/.snap/snap1"));
248+
tm.write_full("/BrokenStatxAfterTrimeCacheTest/test/file1", "snap2>>>");
249+
tm.write_full("/BrokenStatxAfterTrimeCacheTest/test/file2", "snap2>>>abcdef");
250+
ASSERT_EQ(0, tm.mkdir("/BrokenStatxAfterTrimeCacheTest/.snap/snap2"));
251+
}
252+
253+
TEST(LibCephFS, BrokenStatxAfterTrimCache)
254+
{
255+
size_t bulk_count = 100;
256+
{
257+
TestMount tm;
258+
prepareTrimCacheTest(tm, bulk_count);
259+
}
260+
TestMount test_mount;
261+
ASSERT_EQ(0, test_mount.conf_set("client_cache_size", stringify(bulk_count/2).c_str()));
262+
263+
uint64_t snapid1;
264+
uint64_t snapid2;
265+
266+
// learn snapshot ids and do basic verification
267+
ASSERT_EQ(0, test_mount.get_snapid("/BrokenStatxAfterTrimeCacheTest/.snap/snap1", &snapid1));
268+
ASSERT_EQ(0, test_mount.get_snapid("/BrokenStatxAfterTrimeCacheTest/.snap/snap2", &snapid2));
269+
270+
int s1fd = test_mount.test_open("/BrokenStatxAfterTrimeCacheTest/.snap/snap1");
271+
int s2fd = test_mount.test_open("/BrokenStatxAfterTrimeCacheTest/.snap/snap2");
272+
273+
// check if file1's statxat points to snap1
274+
ASSERT_EQ(snapid1, test_mount.test_statxat(s1fd, "test/file1"));
275+
// check if file1's statxat points to snap2
276+
ASSERT_EQ(snapid2, test_mount.test_statxat(s2fd, "test/file1"));
277+
// check if file2's statxat returns -2
278+
ASSERT_EQ(-2, test_mount.test_statxat(s1fd, "test/file2"));
279+
// check if file2's statx returns -2
280+
ASSERT_EQ(-2, test_mount.test_statx("/BrokenStatxAfterTrimeCacheTest/.snap/snap1/test/file2"));
281+
282+
int cnt = 0;
283+
int r = test_mount.for_each_readdir("/BrokenStatxAfterTrimeCacheTest/bulk",
284+
[&](const dirent*, const struct ceph_statx*) {
285+
++cnt;
286+
return true;
287+
});
288+
ASSERT_EQ(0, r);
289+
ASSERT_EQ(bulk_count, cnt);
290+
291+
// open folder to trigger cache trimming
292+
int bulk_fd = test_mount.test_open("/BrokenStatxAfterTrimeCacheTest/bulk");
293+
294+
// checking if statxat returns the same values as above,
295+
// which isn't the case if cache trimming evicted dentries behind
296+
// inodes bound to s1fd/s2fd.
297+
EXPECT_EQ(snapid1, test_mount.test_statxat(s1fd, "test/file1"));
298+
EXPECT_EQ(snapid2, test_mount.test_statxat(s2fd, "test/file1"));
299+
// check if file2's statxat returns -2
300+
EXPECT_EQ(-2, test_mount.test_statxat(s1fd, "test/file2"));
301+
// check if file2's statx still returns -2, should be fine irrespective of cache state.
302+
// This will also update the cache and bring file2 inode back to good shape
303+
ASSERT_EQ(-2, test_mount.test_statx("/BrokenStatxAfterTrimeCacheTest/.snap/snap1/test/file2"));
304+
// check if file2's statxat returns -2
305+
ASSERT_EQ(-2, test_mount.test_statxat(s1fd, "test/file2"));
306+
test_mount.test_close(bulk_fd);
307+
308+
test_mount.test_close(s2fd);
309+
test_mount.test_close(s1fd);
310+
311+
ceph_rmsnap(test_mount.cmount, "/BrokenStatxAfterTrimeCacheTest", "snap1");
312+
ceph_rmsnap(test_mount.cmount, "/BrokenStatxAfterTrimeCacheTest", "snap2");
313+
test_mount.purge_dir("/BrokenStatxAfterTrimeCacheTest");
314+
}

0 commit comments

Comments
 (0)