Skip to content

Commit 1efd3f0

Browse files
committed
added o2-readout-monitor-memory
1 parent 6d221c5 commit 1efd3f0

File tree

5 files changed

+122
-14
lines changed

5 files changed

+122
-14
lines changed

CMakeLists.txt

Lines changed: 26 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ find_package(BookkeepingApi)
7575
find_package(ZMQ)
7676
find_package(MySQL)
7777
#find_package(gperftools)
78+
find_package(SDL)
7879

7980
# extract include directories from targets
8081
get_target_property(InfoLogger_INCLUDE_DIRS AliceO2::InfoLogger INTERFACE_INCLUDE_DIRECTORIES)
@@ -147,9 +148,18 @@ if(${gperftools_FOUND})
147148
message(STATUS "gperftools enabled")
148149
endif()
149150

151+
# check SDL
152+
if(SDL_FOUND)
153+
message(
154+
STATUS
155+
"SDL found: ${SDL_INCLUDE_DIRS} ${SDL_LIBRARIES}")
156+
else ()
157+
message(STATUS "SDL not found, corresponding features will be disabled.")
158+
endif()
159+
150160

151161
# add flags to enable optional features in Readout, based on available dependencies
152-
add_compile_definitions($<$<BOOL:${Numa_FOUND}>:WITH_NUMA> $<$<BOOL:${RDMA_FOUND}>:WITH_RDMA> $<$<BOOL:${Configuration_FOUND}>:WITH_CONFIG> $<$<BOOL:${FairMQ_FOUND}>:WITH_FAIRMQ> $<$<BOOL:${Occ_FOUND}>:WITH_OCC> $<$<BOOL:${BookkeepingApi_FOUND}>:WITH_LOGBOOK> $<$<BOOL:${ZMQ_FOUND}>:WITH_ZMQ> $<$<BOOL:${MYSQL_FOUND}>:WITH_DB> $<$<BOOL:${ReadoutCard_FOUND}>:WITH_READOUTCARD> $<$<BOOL:${gperftools_FOUND}>:WITH_GPERFTOOLS>)
162+
add_compile_definitions($<$<BOOL:${Numa_FOUND}>:WITH_NUMA> $<$<BOOL:${RDMA_FOUND}>:WITH_RDMA> $<$<BOOL:${Configuration_FOUND}>:WITH_CONFIG> $<$<BOOL:${FairMQ_FOUND}>:WITH_FAIRMQ> $<$<BOOL:${Occ_FOUND}>:WITH_OCC> $<$<BOOL:${BookkeepingApi_FOUND}>:WITH_LOGBOOK> $<$<BOOL:${ZMQ_FOUND}>:WITH_ZMQ> $<$<BOOL:${MYSQL_FOUND}>:WITH_DB> $<$<BOOL:${ReadoutCard_FOUND}>:WITH_READOUTCARD> $<$<BOOL:${gperftools_FOUND}>:WITH_GPERFTOOLS> $<$<BOOL:${SDL_FOUND}>:WITH_SDL>)
153163

154164
# define include directories
155165
set(READOUT_INCLUDE_DIRS
@@ -332,6 +342,9 @@ endif()
332342
# executables
333343
###################################################
334344

345+
# list of executables build (to be completed depending on dependencies found)
346+
set(executables o2-readout-exe o2-readout-receiver o2-readout-test-fmq-tx o2-readout-test-fmq-rx o2-readout-test-fmq-perf-tx o2-readout-test-fmq-perf-rx o2-readout-test-memorybanks o2-readout-rawreader o2-readout-test-lib-monitoring)
347+
335348
# o2-readout-exe : main executable
336349
add_executable(
337350
o2-readout-exe
@@ -428,7 +441,6 @@ add_executable(
428441
o2-readout-eventdump
429442
${SOURCE_DIR}/eventDump.cxx
430443
$<TARGET_OBJECTS:objReadoutUtils>
431-
432444
)
433445

434446
# a process to monitor stats of remote readout processes
@@ -441,6 +453,17 @@ add_executable(
441453
target_include_directories(o2-readout-monitor PRIVATE ${READOUT_INCLUDE_DIRS})
442454
target_link_libraries(o2-readout-monitor PRIVATE AliceO2::InfoLogger AliceO2::Common ${ZMQ_LIBRARIES})
443455

456+
if(SDL_FOUND AND ZMQ_FOUND)
457+
# a process to monitor memory of remote readout processes
458+
add_executable(
459+
o2-readout-monitor-memory
460+
${SOURCE_DIR}/readoutMemoryMonitor.cxx
461+
)
462+
target_include_directories(o2-readout-monitor-memory PRIVATE ${READOUT_INCLUDE_DIRS})
463+
target_link_libraries(o2-readout-monitor-memory PRIVATE AliceO2::InfoLogger AliceO2::Common ${ZMQ_LIBRARIES} $<$<BOOL:${SDL_FOUND}>:${SDL_LIBRARIES}> SDL2)
464+
list (APPEND executables o2-readout-monitor-memory)
465+
endif()
466+
444467
# a process to admin readout database
445468
if (MYSQL_FOUND)
446469
add_executable(
@@ -449,6 +472,7 @@ add_executable(
449472
$<TARGET_OBJECTS:objReadoutDatabase>
450473
)
451474
target_include_directories(o2-readout-admin-db PRIVATE ${READOUT_INCLUDE_DIRS})
475+
list (APPEND executables o2-readout-admin-db)
452476
endif()
453477

454478
endif()
@@ -481,7 +505,6 @@ if (NOT FairMQ_FOUND)
481505
endif ()
482506

483507
# set include and libraries for all
484-
set(executables o2-readout-exe o2-readout-receiver o2-readout-test-fmq-tx o2-readout-test-fmq-rx o2-readout-test-fmq-perf-tx o2-readout-test-fmq-perf-rx o2-readout-test-memorybanks o2-readout-rawreader o2-readout-test-lib-monitoring)
485508
if (ReadoutCard_FOUND)
486509
list (APPEND executables o2-readout-test-roc o2-readout-test-roc-find)
487510
endif()
@@ -491,9 +514,6 @@ endif()
491514
if (BookkeepingApi_FOUND)
492515
list (APPEND executables o2-readout-test-lib-bookkeeping)
493516
endif()
494-
if (MYSQL_FOUND)
495-
list (APPEND executables o2-readout-admin-db)
496-
endif()
497517
foreach (exe ${executables})
498518
target_include_directories(${exe} PRIVATE ${READOUT_INCLUDE_DIRS})
499519
target_link_libraries(${exe} PRIVATE ${READOUT_LINK_LIBRARIES})

doc/memory.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,3 +61,10 @@ The grand-total for the shared memory block is therefore typically: `((128 * (nu
6161
Which makes, for a FLP with 3 CRUs, 12 links per end-point, 5GB/s data throughput, 0.5s aggregator STF timeout, .25M page size and 30% safety margin:
6262
( (128 * 3 * 2 * 12 * 0.25 + 0.5 * 5000 ) * 1.2 + 42000 ) * 1.3 = 62 GB
6363

64+
65+
## Checking runtime memory status
66+
67+
The current usage of the data pages can be checked at runtime with a command such as:
68+
`o2-readout-monitor-memory tcp://127.0.0.1:50002`
69+
70+
provided that the parameters `membanksMonitorRate` is defined in `readout-defaults.cfg`.

doc/releaseNotes.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -567,3 +567,4 @@ This file describes the main feature changes for each readout.exe released versi
567567
## next version
568568
- Added a separate thread to call bookkeeping API functions, so that data flow is not affected in case they are blocking.
569569
- o2-readout-test-lib-bookkeeping: more options.
570+
- Added o2-readout-monitor-memory: to view in real time state of data pages.

src/MemoryBankManager.cxx

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,9 @@
1818
#include <sys/types.h>
1919
#include <sys/stat.h>
2020

21+
#ifdef WITH_ZMQ
22+
#include <zmq.h>
23+
#endif
2124

2225
#include "readoutInfoLogger.h"
2326

@@ -258,6 +261,55 @@ void MemoryBankManager::reset()
258261
void MemoryBankManager::monitorThLoop() {
259262
AliceO2::Common::Timer t;
260263
t.reset(1000000.0 / monitorUpdateRate);
264+
MemoryPagesPool::Stats mps;
265+
266+
#ifdef WITH_ZMQ
267+
void* zmqContext = nullptr;
268+
void* zmqHandle = nullptr;
269+
bool zmqEnabled = 1;
270+
void zmqCleanup();
271+
int zmqError = 0;
272+
std::string zmqPort = "tcp://127.0.0.1:50002";
273+
try {
274+
zmqContext = zmq_ctx_new();
275+
if (zmqContext == nullptr) {
276+
zmqError = zmq_errno();
277+
throw __LINE__;
278+
}
279+
280+
zmqHandle = zmq_socket(zmqContext, ZMQ_PUB);
281+
if (zmqHandle == nullptr) {
282+
zmqError = zmq_errno();
283+
throw __LINE__;
284+
}
285+
286+
const int cfgZmqLinger = 1000;
287+
zmqError = zmq_setsockopt(zmqHandle, ZMQ_LINGER, (void*)&cfgZmqLinger, sizeof(cfgZmqLinger)); // close timeout
288+
if (zmqError) {
289+
throw __LINE__;
290+
}
291+
292+
zmqError = zmq_bind(zmqHandle, zmqPort.c_str());
293+
if (zmqError) {
294+
throw __LINE__;
295+
}
296+
297+
zmqEnabled = 1;
298+
299+
} catch (int lineErr) {
300+
if (zmqError) {
301+
theLog.log(LogErrorDevel, "ZeroMQ error @%d : (%d) %s", lineErr, zmqError, zmq_strerror(zmqError));
302+
} else {
303+
theLog.log(LogErrorDevel, "Error @%d", lineErr);
304+
}
305+
// ZMQ unavailable does not cause consumer to fail starting
306+
theLog.log(LogErrorDevel, "Memory banks manager: ZMQ stats publishing disabled");
307+
}
308+
if (zmqEnabled) {
309+
theLog.log(LogInfoDevel, "Memory banks manager: ZMQ stats publishing enabled on %s", zmqPort.c_str());
310+
}
311+
#endif
312+
261313
for(;!monitorThShutdown.load();) {
262314
if (t.isTimeout()) {
263315
std::unique_lock<std::mutex> lock(bankMutex);
@@ -271,6 +323,34 @@ void MemoryBankManager::monitorThLoop() {
271323
fclose(fp);
272324
}
273325
}
326+
#ifdef WITH_ZMQ
327+
if (zmqEnabled) {
328+
int msgSize = 0;
329+
uint32_t numberOfPools = pools.size();
330+
zmq_send(zmqHandle, &numberOfPools, sizeof(numberOfPools), ZMQ_SNDMORE);
331+
for (auto& it : pools) {
332+
it->getDetailedStats(mps);
333+
zmq_send(zmqHandle, &mps, sizeof(mps), ZMQ_SNDMORE);
334+
zmq_send(zmqHandle, &mps.states[0], sizeof(mps.states[0]) * mps.states.size(), ZMQ_SNDMORE);
335+
msgSize += (int)(sizeof(mps) + sizeof(mps.states[0]) * mps.states.size());
336+
}
337+
uint32_t trailer = 0xF00F;
338+
zmq_send(zmqHandle, &trailer, sizeof(trailer), ZMQ_DONTWAIT);
339+
msgSize += (int)(sizeof(numberOfPools) + sizeof(trailer));
340+
//theLog.log(LogDebugDevel, "mem monitor: published %d bytes", msgSize);
341+
}
342+
#endif
343+
/*
344+
for (auto& it : pools) {
345+
it->getDetailedStats(mps);
346+
printf("pool %d : %f - %f = %f\n", mps.id, mps.t1, mps.t0, mps.t1-mps.t0);
347+
for (unsigned int i=0; i<10; i++) {
348+
if (i>=mps.states.size()) break;
349+
printf("%d %s %.6f\n",i,MemoryPage::getPageStateString(mps.states[i].state), mps.states[i].timeInCurrentState);
350+
}
351+
}
352+
*/
353+
274354
t.increment();
275355
} else {
276356
std::this_thread::sleep_for(std::chrono::microseconds(10000));

src/readoutMemoryMonitor.cxx

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -199,13 +199,13 @@ int main(int argc, char** argv)
199199
}
200200
auto stats = ((MemoryPagesPool::Stats *)msgBuffer[1+p*2]);
201201
unsigned int npages = msgSize[2+p*2] / sizeof(MemoryPagesPool::PageStat);
202-
printf("%d %d: %f - %f - %u\n",p, stats->id, stats->t0,stats->t1,npages);
202+
//printf("%d %d: %f - %f - %u\n",p, stats->id, stats->t0,stats->t1,npages);
203203
auto ps = (MemoryPagesPool::PageStat*)msgBuffer[2+p*2];
204204
int c = 0;
205205
for (unsigned int k = 0; k<npages; k++) {
206206
if (ps[k].state != MemoryPage::PageState::Idle) c++;
207207
}
208-
printf("busy pages = %d / %u\n",c,npages);
208+
//printf("busy pages = %d / %u\n",c,npages);
209209
}
210210

211211
if (isOk) {
@@ -216,17 +216,17 @@ int main(int argc, char** argv)
216216
SDL_SetRenderDrawColor(hRenderer, 0, 0, 0, 0);
217217
SDL_RenderClear(hRenderer);
218218

219-
printf("%d,%d\n", szx,szy);
219+
//printf("%d,%d\n", szx,szy);
220220

221221
int border=10;
222222
// one column per pool
223223
int cw=(szx-(nPools+1)*border)/nPools;
224224
int cy=szy-2*border;
225-
printf("cw,cy= %d, %d\n",cw, cy);
225+
//printf("cw,cy= %d, %d\n",cw, cy);
226226
for (unsigned int p=0; p<nPools; p++) {
227227
int ox=border+(border+cw)*p;
228228
int oy=border;
229-
printf("ox,oy= %d, %d\n",ox, oy);
229+
//printf("ox,oy= %d, %d\n",ox, oy);
230230
SDL_SetRenderDrawColor(hRenderer, 0, 0, 255, 255);
231231
SDL_Rect r = {ox,oy,cw,cy};
232232
SDL_RenderDrawRect(hRenderer, &r);
@@ -239,12 +239,12 @@ int main(int argc, char** argv)
239239
unsigned int sq=(cw-2*bb)*(cy-2*bb);
240240
unsigned int pxk = (int)sqrt(sq/npages);
241241
pxk=6;
242-
printf("pxk=%d\n",pxk);
242+
//printf("pxk=%d\n",pxk);
243243
unsigned int npl = (cw-bb) / pxk;
244244
for (unsigned int k = 0; k<npages; k++) {
245245
switch (ps[k].state) {
246246
case MemoryPage::PageState::Idle:
247-
SDL_SetRenderDrawColor(hRenderer, 32, 32, 32, 255);
247+
SDL_SetRenderDrawColor(hRenderer, 48, 48, 48, 255);
248248
break;
249249
case MemoryPage::PageState::InROC:
250250
SDL_SetRenderDrawColor(hRenderer, 0, 255, 255, 255);
@@ -292,7 +292,7 @@ int main(int argc, char** argv)
292292

293293

294294
if (SDL_PollEvent(&event)) {
295-
printf("event type=%d\n",(int)event.type);
295+
//printf("event type=%d\n",(int)event.type);
296296
shutdown=1;
297297
switch (event.type) {
298298
case SDL_QUIT:

0 commit comments

Comments
 (0)