Skip to content

Commit feb33d7

Browse files
committed
FST: Compute the IO perfromance measurements by using directly the block
device for random operations. Make the necessary dance to allow the FST process to access the block device. Fixes EOS-5939
1 parent fe41263 commit feb33d7

File tree

5 files changed

+243
-70
lines changed

5 files changed

+243
-70
lines changed

eos.spec.in

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -404,10 +404,9 @@ The EOS server installation
404404

405405
%post -n eos-server
406406
if [ ! -f /etc/sysconfig/eos-yum-noscripts ]; then
407-
echo "Starting conditional EOS services"
408-
sleep 1
409-
systemctl daemon-reload > /dev/null 2>&1 || :
410-
systemctl --no-legend list-units "eos@*" | grep -v "q.*db" | awk '{print $1}' | xargs --no-run-if-empty -n1 systemctl restart || :
407+
echo "Starting conditional EOS services"
408+
systemctl daemon-reload > /dev/null 2>&1 || :
409+
systemctl --no-legend list-units "eos@*" | grep -v "q.*db" | awk '{print $1}' | xargs --no-run-if-empty -n1 systemctl restart || :
411410
fi
412411

413412
%preun -n eos-server

fst/storage/FileSystem.cc

Lines changed: 71 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,66 @@
3333
#define O_DIRECT 0
3434
#endif
3535

36+
namespace
37+
{
38+
//----------------------------------------------------------------------------
39+
//! Struct SwitchToRootEuid takes care of switching the current effective UID
40+
//! of the **current thread** to root (uid=0) and then revert to the original
41+
//! setup once this object is destroyed. We need this to be able to access
42+
//! the block devices on the machine during start up to measure the IO
43+
//! performance.
44+
//----------------------------------------------------------------------------
45+
struct SwitchToRootEuid {
46+
//--------------------------------------------------------------------------
47+
//! Constructor
48+
//--------------------------------------------------------------------------
49+
SwitchToRootEuid()
50+
{
51+
if (syscall(SYS_getresuid, &mRuid, &mEuid, &mSuid) == -1) {
52+
eos_static_err("%s", "msg=\"failed to get uids\"");
53+
mFailed = true;
54+
return;
55+
}
56+
57+
eos_static_info("msg=\"initial user identity\" ruid=%i euid=%i suid=%i",
58+
mRuid, mEuid, mSuid);
59+
60+
// Switch to target user
61+
if (syscall(SYS_setresuid, sRootUid, sRootUid, sRootUid) == -1) {
62+
eos_static_err("msg=\"insuffcient priviledges to switch to root\" "
63+
"euid=%i", mEuid);
64+
mFailed = true;
65+
return;
66+
}
67+
}
68+
69+
//--------------------------------------------------------------------------
70+
//! Destructor
71+
//--------------------------------------------------------------------------
72+
~SwitchToRootEuid()
73+
{
74+
// Put back the original effective UID
75+
if (syscall(SYS_setresuid, mRuid, mEuid, mSuid) == -1) {
76+
eos_static_err("msg=\"insuffcient priviledges to switch to user\" "
77+
"euid=%i", mEuid);
78+
}
79+
80+
if (syscall(SYS_getresuid, &mRuid, &mEuid, &mSuid) == -1) {
81+
eos_static_err("%s", "msg=\"failed to get uids\"");
82+
}
83+
84+
eos_static_info("msg=\"reverted user identity\" ruid=%i euid=%i suid=%i",
85+
mRuid, mEuid, mSuid);
86+
}
87+
88+
static constexpr uid_t sRootUid = 0;
89+
//! Real, effective and saved UID
90+
uid_t mRuid, mEuid, mSuid;
91+
//! Flag to mark any failures
92+
bool mFailed {false};
93+
};
94+
}
95+
3696
EOSFSTNAMESPACE_BEGIN
3797

3898
// Set of key updates to be tracked at the file system level
@@ -65,7 +125,7 @@ FileSystem::FileSystem(const common::FileSystemLocator& locator,
65125
mFileIO.reset(FileIoPlugin::GetIoObject(mLocator.getStoragePath()));
66126
// Subscribe to the underlying SharedHash object to get updates
67127
mSubscription = mq::SharedHashWrapper(mRealm, mHashLocator).subscribe();
68-
128+
69129
if (mSubscription) {
70130
using namespace std::placeholders;
71131
mSubscription->attachCallback(std::bind(&FileSystem::ProcessUpdateCb,
@@ -266,35 +326,25 @@ FileSystem::IoPing()
266326
return;
267327
}
268328

269-
// Create temporary file (1GB) name on the mountpoint
270-
uint64_t fn_size = 1 << 30; // 1 GB
271-
const std::string fn_path = eos::fst::MakeTemporaryFile(GetPath());
329+
std::string device_path = eos::fst::GetDevicePath(GetPath());
272330

273-
if (fn_path.empty()) {
274-
eos_static_err("msg=\"failed to create tmp file\" base_path=%s",
331+
if (device_path.empty()) {
332+
eos_static_err("msg=\"failed to resolve block device\" path=%s",
275333
GetPath().c_str());
276334
return;
277335
}
278336

279-
// Open the file for direct access
280-
int fd = open(fn_path.c_str(), O_RDWR | O_TRUNC | O_DIRECT | O_SYNC);
281-
282-
if (fd == -1) {
283-
eos_static_err("msg=\"failed to open file\" path=%s", fn_path.c_str());
284-
return;
285-
}
286-
287-
// Unlink the file so that we don't leave any behind even in the case of
288-
// a crash of the FST. The file descritor will still be valid for use.
289-
(void) unlink(fn_path.c_str());
337+
// Switch to root euid will be reverted upon destruction
338+
SwitchToRootEuid root_euid;
290339

291-
// Fill the file up to the given size with random data
292-
if (!eos::fst::FillFileGivenSize(fd, fn_size)) {
293-
eos_static_err("msg=\"failed to fill file\" path=%s", fn_path.c_str());
294-
(void) close(fd);
340+
if (root_euid.mFailed) {
341+
eos_static_err("%s", "msg=\"failed to switch euid for IO perfmance "
342+
"measurements\"");
295343
return;
296344
}
297345

346+
// Open the file for direct access
347+
int fd = open(device_path.c_str(), O_RDONLY | O_DIRECT);
298348
using namespace std::chrono;
299349
auto start_iops = high_resolution_clock::now();
300350
IOPS = eos::fst::ComputeIops(fd);

fst/utils/DiskMeasurements.cc

Lines changed: 134 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,12 @@
2828
#include <chrono>
2929
#include <random>
3030
#include <sys/stat.h>
31+
#include <sys/sysmacros.h>
3132
#include <fcntl.h>
33+
#include <unistd.h>
34+
#include <cstring>
35+
#include <sys/ioctl.h>
36+
#include <linux/fs.h>
3237

3338
EOSFSTNAMESPACE_BEGIN
3439

@@ -99,6 +104,106 @@ std::string MakeTemporaryFile(std::string base_path)
99104
return tmp_path;
100105
}
101106

107+
//------------------------------------------------------------------------------
108+
// Get block device for a given path
109+
//------------------------------------------------------------------------------
110+
std::string GetDevicePath(const std::string& path)
111+
{
112+
struct stat st;
113+
114+
if (stat(path.c_str(), &st)) {
115+
return "";
116+
}
117+
118+
dev_t dev = st.st_dev;
119+
FILE* file = fopen("/proc/self/mountinfo", "r");
120+
121+
if (!file) {
122+
file = fopen("/proc/mounts", "r");
123+
}
124+
125+
if (!file) {
126+
return "";
127+
}
128+
129+
char* line = NULL;
130+
size_t len = 0;
131+
unsigned int major, minor;
132+
std::string device_path;
133+
134+
while (getline(&line, &len, file) != -1) {
135+
// Try parsing mountinfo format first: "id parent major:minor ..."
136+
// If not, it might be /proc/mounts format, but /proc/self/mountinfo is standard on modern linux
137+
// Scan for major:minor
138+
int num_scanned = sscanf(line, "%*d %*d %u:%u", &major, &minor);
139+
140+
if (num_scanned == 2) {
141+
if (makedev(major, minor) == dev) {
142+
// Found it in mountinfo format
143+
// The device is usually the field after " - "
144+
// Format: ... - <fstype> <device> <options>
145+
char* sep = strstr(line, " - ");
146+
147+
if (sep) {
148+
char* fstype = strtok(sep + 3, " ");
149+
char* dev_str = strtok(NULL, " ");
150+
(void) fstype;
151+
152+
if (dev_str) {
153+
device_path = dev_str;
154+
break;
155+
}
156+
}
157+
}
158+
} else {
159+
// Fallback for simple /proc/mounts format: <device> <mountpoint> ...
160+
// We need to stat the mountpoint to see if it matches our dev
161+
char dev_str[1024];
162+
char mount_str[1024];
163+
164+
if (sscanf(line, "%1023s %1023s", dev_str, mount_str) == 2) {
165+
struct stat mp_st;
166+
167+
if (stat(mount_str, &mp_st) == 0) {
168+
if (mp_st.st_dev == dev) {
169+
// This mountpoint corresponds to our device
170+
// But wait, st_dev of a file IS the device ID of the filesystem it is on.
171+
// So if st_dev matches mp_st.st_dev, then dev_str is likely our device.
172+
device_path = dev_str;
173+
break;
174+
}
175+
}
176+
}
177+
}
178+
}
179+
180+
free(line);
181+
fclose(file);
182+
return device_path;
183+
}
184+
185+
//------------------------------------------------------------------------------
186+
// Get file/device size
187+
//------------------------------------------------------------------------------
188+
uint64_t GetBlkSize(int fd)
189+
{
190+
struct stat st;
191+
192+
if (fstat(fd, &st)) {
193+
return 0;
194+
}
195+
196+
if (S_ISBLK(st.st_mode)) {
197+
uint64_t size = 0;
198+
199+
if (ioctl(fd, BLKGETSIZE64, &size) == 0) {
200+
return size;
201+
}
202+
}
203+
204+
return st.st_size;
205+
}
206+
102207
//------------------------------------------------------------------------------
103208
// Get IOPS measurement for the given path
104209
//------------------------------------------------------------------------------
@@ -107,16 +212,14 @@ int ComputeIops(int fd, uint64_t rd_buf_size, std::chrono::seconds timeout)
107212
using namespace eos::common;
108213
using namespace std::chrono;
109214
int IOPS = -1;
110-
// Get file size
111-
struct stat info;
215+
uint64_t fn_size = GetBlkSize(fd);
112216

113-
if (fstat(fd, &info)) {
114-
std::cerr << "err: failed to stat file fd=" << fd << std::endl;
115-
eos_static_err("msg=\"failed to stat file\" fd=%i", fd);
217+
if (fn_size == 0) {
218+
std::cerr << "err: failed to get file size fd=" << fd << std::endl;
219+
eos_static_err("msg=\"failed to get file size\" fd=%i", fd);
116220
return IOPS;
117221
}
118222

119-
uint64_t fn_size = info.st_size;
120223
auto buf = GetAlignedBuffer(rd_buf_size);
121224
// Get a uniform int distribution for offset generation
122225
std::random_device rd;
@@ -161,23 +264,39 @@ int ComputeBandwidth(int fd, uint64_t rd_buf_size, std::chrono::seconds timeout)
161264
using namespace eos::common;
162265
using namespace std::chrono;
163266
int bandwidth = -1;
164-
// Get file size
165-
struct stat info;
267+
uint64_t fn_size = GetBlkSize(fd);
166268

167-
if (fstat(fd, &info)) {
168-
std::cerr << "err: failed to stat file fd=" << fd << std::endl;
169-
eos_static_err("msg=\"failed to stat file\" fd=%i", fd);
269+
if (fn_size == 0) {
270+
std::cerr << "err: failed to get file size fd=" << fd << std::endl;
271+
eos_static_err("msg=\"failed to get file size\" fd=%i", fd);
170272
return bandwidth;
171273
}
172274

173-
uint64_t fn_size = info.st_size;
174275
auto buf = GetAlignedBuffer(rd_buf_size);
175-
uint64_t offset = 0ull;
176276
uint64_t max_read = 1 << 28; // 256 MB
277+
// Randomize start offset if file is large enough
278+
uint64_t offset = 0;
279+
280+
if (fn_size > max_read) {
281+
std::random_device rd;
282+
std::mt19937 gen(rd());
283+
// Align to rd_buf_size (4MB)
284+
uint64_t max_blocks = (fn_size - max_read) / rd_buf_size;
285+
std::uniform_int_distribution<uint64_t> distrib(0, max_blocks);
286+
offset = distrib(gen) * rd_buf_size;
287+
}
288+
289+
uint64_t start_offset = offset;
290+
uint64_t end_offset = offset + max_read;
291+
292+
if (end_offset > fn_size) {
293+
end_offset = fn_size;
294+
}
295+
177296
time_point<high_resolution_clock> start, end;
178297
start = high_resolution_clock::now();
179298

180-
while ((offset < fn_size) && (offset < max_read)) {
299+
while (offset < end_offset) {
181300
if (pread(fd, buf.get(), rd_buf_size, offset) == -1) {
182301
std::cerr << "error: failed to read at offset=" << offset << std::endl;
183302
eos_static_err("msg=\"failed read\" offset=%llu", offset);
@@ -195,7 +314,7 @@ int ComputeBandwidth(int fd, uint64_t rd_buf_size, std::chrono::seconds timeout)
195314

196315
end = high_resolution_clock::now();
197316
auto duration = duration_cast<microseconds> (end - start).count();
198-
bandwidth = ((offset >> 20) * 1000000.0) / duration;
317+
bandwidth = (((offset - start_offset) >> 20) * 1000000.0) / duration;
199318
return bandwidth;
200319
}
201320

fst/utils/DiskMeasurements.hh

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,24 @@ bool FillFileGivenSize(int fd, uint64_t length);
5555
//------------------------------------------------------------------------------
5656
std::string MakeTemporaryFile(std::string base_path);
5757

58+
//------------------------------------------------------------------------------
59+
//! Get block device for a given path
60+
//!
61+
//! @param path input path
62+
//!
63+
//! @return device path if found, otherwise empty string
64+
//------------------------------------------------------------------------------
65+
std::string GetDevicePath(const std::string& path);
66+
67+
//------------------------------------------------------------------------------
68+
//! Get file/device size
69+
//!
70+
//! @param fd file descriptor
71+
//!
72+
//! @return size of the file or device
73+
//------------------------------------------------------------------------------
74+
uint64_t GetBlkSize(int fd);
75+
5876
//------------------------------------------------------------------------------
5977
//! Get IOPS measurement using the given file descriptor
6078
//!

0 commit comments

Comments
 (0)