Skip to content

Commit e1a2dd3

Browse files
committed
configurable shm/memory checks
1 parent 92fd0f0 commit e1a2dd3

File tree

4 files changed

+75
-30
lines changed

4 files changed

+75
-30
lines changed

doc/releaseNotes.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -358,3 +358,4 @@ This file describes the main feature changes for each readout.exe released versi
358358
## Next version
359359
- Updated configuration parameters:
360360
- added readout.disableTimefarmes: when set, all timeframe-related features are disabled (STF slicing, TF rate limits, etc). All data are tagged with TF id = 0. To be used for some calibration runs not using a central trigger clock.
361+
- added consumer-FMQchannel.checkResources: controls which resources are checked for fitting unmanaged region. This is a comma-separated list of items to be checked. By default, no checks are done. Recommended value: /dev/shm, MemAvailable.

src/ConsumerFMQchannel.cxx

Lines changed: 31 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -188,34 +188,44 @@ class ConsumerFMQchannel : public Consumer
188188

189189
// check system resources first, as region creation does not check available memory, so bad crash could occur later
190190
theLog.log(LogInfoDevel_(3002), "Configuring memory buffer %lld MB", (long long)(mMemorySize/1048576LL));
191-
192-
// free system memory
193-
unsigned long long freeBytes;
194-
if (getStatsFreeMemory(freeBytes)) {
195-
theLog.log(LogWarningSupport_(3230), "Can not get stats about system free memory available");
196-
} else {
197-
theLog.log(LogInfoSupport_(3230), "Stats free memory available: %lld MB", (long long)(freeBytes/1048576LL));
198-
if ((long long)freeBytes < mMemorySize) {
199-
theLog.log(LogErrorSupport_(3230), "Not enough system memory available - check /proc/meminfo");
200-
throw "ConsumerFMQ: can not allocate shared memory region";
201-
} else {
202191

203-
}
192+
// configuration parameter: | consumer-FairMQChannel-* | checkResources | string | | Check beforehand if unmanaged region would fit in given list of resources. Comma-separated list of items to be checked: eg /dev/shm, MemFree, MemAvailable. (any filesystem path, and any /proc/meminfo entry).|
193+
std::string cfgCheckResources;
194+
cfg.getOptionalValue<std::string>(cfgEntryPoint + ".checkResources", cfgCheckResources);
195+
bool isResourceError = 0;
196+
std::vector<std::string> resources;
197+
198+
if (getListFromString(cfgCheckResources, resources)) {
199+
throw("Can not parse configuration item checkResources");
204200
}
205-
206-
// free SHM memory
207-
// check only if transport is of type shmem
208-
if (cfgTransportType == "shmem") {
209-
if (getStatsFreeSHM(freeBytes)) {
210-
theLog.log(LogWarningSupport_(3230), "Can not get stats about shared memory available");
201+
202+
for(auto r : resources) {
203+
unsigned long long freeBytes;
204+
205+
int getStatsErr = 0;
206+
if (r.find_first_of("/")!=std::string::npos) {
207+
// this is a path
208+
getStatsErr = getStatsFilesystem(freeBytes, r);
211209
} else {
212-
theLog.log(LogInfoSupport_(3230), "Stats shared memory available: %lld MB", (long long)(freeBytes/1048576LL));
210+
// look in /proc/meminfo
211+
getStatsErr = getStatsMemory(freeBytes, r);
212+
r = "/proc/meminfo " + r;
213+
}
214+
215+
if (getStatsErr) {
216+
theLog.log(LogWarningSupport_(3230), "Can not get stats for %s", r.c_str());
217+
} else {
218+
theLog.log(LogInfoSupport_(3230), "Stats for %s : %lld MB available", r.c_str(), (long long)(freeBytes/1048576LL));
213219
if ((long long)freeBytes < mMemorySize) {
214-
theLog.log(LogErrorSupport_(3230), "Not enough shared memory available - check /dev/shm");
215-
throw "ConsumerFMQ: can not allocate shared memory region";
220+
theLog.log(LogErrorSupport_(3230), "Not enough space on %s", r.c_str());
221+
isResourceError = 1;
216222
}
217223
}
218224
}
225+
226+
if (isResourceError) {
227+
throw "ConsumerFMQ: can not allocate shared memory region, system resources check failed";
228+
}
219229

220230
theLog.log(LogInfoDevel_(3008), "Creating FMQ unmanaged memory region");
221231
memoryBuffer = sendingChannel->Transport()->CreateUnmanagedRegion(mMemorySize, [](void* /*data*/, size_t /*size*/, void* hint) { // cleanup callback

src/ReadoutUtils.cxx

Lines changed: 34 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -181,18 +181,48 @@ int getIntegerListFromString(const std::string& input, std::vector<int>& output)
181181
return 0;
182182
}
183183

184-
int getStatsFreeMemory(unsigned long long &freeBytes) {
184+
185+
// check if a string made of only of simple chars
186+
// arbitrary selection: letters, digits, ()_
187+
bool isSimpleString(const std::string &str) {
188+
return find_if_not(str.begin(), str.end(),
189+
[](char c) { return (isalnum(c) || (c == '(') || (c == ')')|| (c == '_')); }) == str.end();
190+
}
191+
192+
193+
int getListFromString(const std::string& input, std::vector<std::string>& output)
194+
{
195+
// coma-separated list of simple strings
196+
std::istringstream f(input);
197+
std::string s;
198+
while (std::getline(f, s, ',')) {
199+
// trim
200+
const std::string blanks = "\t\n\v\f\r ";
201+
s.erase(s.find_last_not_of(blanks) + 1);
202+
s.erase(0, s.find_first_not_of(blanks));
203+
output.push_back(s);
204+
}
205+
return 0;
206+
}
207+
208+
int getStatsMemory(unsigned long long &freeBytes, const std::string &keyword) {
185209
FILE *fp;
186210
const int lineBufSz = 128;
187211
char lineBuf[lineBufSz];
188212
long long value;
189213
int success = 0;
190214
freeBytes = 0;
191215

216+
// check if keyword looks suspicious
217+
if (!isSimpleString(keyword)) {
218+
return -1;
219+
}
220+
192221
if ((fp = fopen("/proc/meminfo", "r")) != NULL) {
193222

223+
std::string entryLine = keyword + ": %lld kB";
194224
while (fgets(lineBuf, lineBufSz, fp) != NULL) {
195-
if ( sscanf(lineBuf, "MemFree: %lld kB", &value) == 1) {
225+
if ( sscanf(lineBuf, entryLine.c_str(), &value) == 1) {
196226
freeBytes = ((unsigned long long)value) * 1024;
197227
success = 1;
198228
break;
@@ -208,12 +238,12 @@ int getStatsFreeMemory(unsigned long long &freeBytes) {
208238
return 0;
209239
}
210240

211-
int getStatsFreeSHM(unsigned long long &freeBytes) {
241+
int getStatsFilesystem(unsigned long long &freeBytes, const std::string &path) {
212242
int success = 0;
213243
freeBytes = 0;
214244

215245
try {
216-
freeBytes = (unsigned long long) (std::filesystem::space("/dev/shm")).free;
246+
freeBytes = (unsigned long long) (std::filesystem::space(path)).free;
217247
success = 1;
218248
}
219249
catch (...) {

src/ReadoutUtils.h

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,10 @@ int getKeyValuePairsFromString(const std::string& input, std::map<std::string, s
4545
// returns 0 on success, -1 on error
4646
int getIntegerListFromString(const std::string& input, std::vector<int>& output);
4747

48+
// parse a string of coma-separated strings into a vector
49+
// returns 0 on success, -1 on error
50+
int getListFromString(const std::string& input, std::vector<std::string>& output);
51+
4852
// function to convert a value in bytes to a prefixed number 3+3 digits
4953
// suffix is the "base unit" to add after calculated prefix, e.g. Byte-> kBytes
5054
std::string NumberOfBytesToString(double value, const char* suffix, int base = 1024);
@@ -56,15 +60,15 @@ int getProcessStats(double& uTime, double& sTime);
5660
typedef uint32_t tRunNumber;
5761
typedef uint32_t tTimeframeId;
5862

59-
// function to retrieve amount of free memory on the system
63+
// function to retrieve some memory statistics on the system
6064
// Works only when /proc/meminfo available
65+
// Look for entry corresponding to provided keyword (eg: MemFree, MemAvailable)
6166
// returns 0 on success, -1 on error
62-
int getStatsFreeMemory(unsigned long long &freeBytes);
67+
int getStatsMemory(unsigned long long &freeBytes, const std::string& keyword);
6368

64-
// function to retrieve amount of free memory on the system
65-
// Works only when /dev/shm available
69+
// function to retrieve amount of free area for given path on the filesystem
6670
// returns 0 on success, -1 on error
67-
int getStatsFreeSHM(unsigned long long &freeBytes);
71+
int getStatsFilesystem(unsigned long long &freeBytes, const std::string& path);
6872

6973
// end of _READOUTUTILS_H
7074
#endif

0 commit comments

Comments
 (0)