Skip to content

Commit 85f6f7d

Browse files
committed
FMQ memory checks added
1 parent 23d1049 commit 85f6f7d

File tree

2 files changed

+36
-1
lines changed

2 files changed

+36
-1
lines changed

doc/releaseNotes.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -352,3 +352,5 @@ This file describes the main feature changes for each readout.exe released versi
352352
- FMQ stats not printed when consoleUpdate=1 unless there is a running consumerFMQchannel with disableSending=0.
353353
- tfRateLimit is handled in the equipment directly and avoid potential issues with timeframe slicing at very slow rates.
354354
- equipment-cruemulator: TF id extracted from trigger counters (single timer source for improved coherency).
355+
- Memory allocation policy updated: all readout memory is locked (RAM only, can not be swapped). A warning is reported if not.
356+
- consumer-FMQchannel: checks are done before FMQ shared memory region is created, to avoid going in a state with over-committed memory (no checks done in FMQ library about the validity of the region created, which can cause severe crash when trying to access it). Both /proc/meminfo (MemFree) and /dev/shm (if using shmem transport type) should report enough available memory before proceeding. Memory is also immediately locked and zeroed to avoid later crashes.

src/ConsumerFMQchannel.cxx

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -185,14 +185,47 @@ class ConsumerFMQchannel : public Consumer
185185
cfg.getOptionalValue<std::string>(cfgEntryPoint + ".unmanagedMemorySize", cfgUnmanagedMemorySize);
186186
long long mMemorySize = ReadoutUtils::getNumberOfBytesFromString(cfgUnmanagedMemorySize.c_str());
187187
if (mMemorySize > 0) {
188+
189+
// check system resources first, as region creation does not check available memory, so bad crash could occur later
190+
theLog.log(LogInfoDevel_(3002), "Configuring memory buffer %lld MB", (long long)(mMemorySize/1048576LL));
191+
192+
// free system memory
193+
unsigned long long freeBytes;
194+
if (getStatsFreeMemory(freeBytes)) {
195+
theLog.log(LogWarningSupport_(3230), "Can not get stats about system free memory available");
196+
} else {
197+
theLog.log(LogInfoSupport_(3230), "Stats free memory available: %lld MB", (long long)(freeBytes/1048576LL));
198+
if ((long long)freeBytes < mMemorySize) {
199+
theLog.log(LogErrorSupport_(3230), "Not enough system memory available - check /proc/meminfo");
200+
throw "ConsumerFMQ: can not allocate shared memory region";
201+
} else {
202+
203+
}
204+
}
205+
206+
// free SHM memory
207+
// check only if transport is of type shmem
208+
if (cfgTransportType == "shmem") {
209+
if (getStatsFreeSHM(freeBytes)) {
210+
theLog.log(LogWarningSupport_(3230), "Can not get stats about shared memory available");
211+
} else {
212+
theLog.log(LogInfoSupport_(3230), "Stats shared memory available: %lld MB", (long long)(freeBytes/1048576LL));
213+
if ((long long)freeBytes < mMemorySize) {
214+
theLog.log(LogErrorSupport_(3230), "Not enough shared memory available - check /dev/shm");
215+
throw "ConsumerFMQ: can not allocate shared memory region";
216+
}
217+
}
218+
}
219+
220+
theLog.log(LogInfoDevel_(3008), "Creating FMQ unmanaged memory region");
188221
memoryBuffer = sendingChannel->Transport()->CreateUnmanagedRegion(mMemorySize, [](void* /*data*/, size_t /*size*/, void* hint) { // cleanup callback
189222
if (hint != nullptr) {
190223
DataBlockContainerReference* blockRef = (DataBlockContainerReference*)hint;
191224
//printf("ack hint=%p page %p\n",hint,(*blockRef)->getData());
192225
decDataBlockStats((*blockRef)->getData());
193226
delete blockRef;
194227
}
195-
});
228+
},"",0,fair::mq::RegionConfig{true, true}); // lock / zero
196229

197230
theLog.log(LogInfoDevel_(3008), "Got FMQ unmanaged memory buffer size %lu @ %p", memoryBuffer->GetSize(), memoryBuffer->GetData());
198231
}

0 commit comments

Comments
 (0)