Skip to content

Commit f7670c8

Browse files
authored
Merge pull request #256 from sy-c/master
readout-monitor-memory
2 parents 0d6c839 + 1efd3f0 commit f7670c8

File tree

7 files changed

+481
-8
lines changed

7 files changed

+481
-8
lines changed

CMakeLists.txt

Lines changed: 26 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ find_package(BookkeepingApi)
7575
find_package(ZMQ)
7676
find_package(MySQL)
7777
#find_package(gperftools)
78+
find_package(SDL)
7879

7980
# extract include directories from targets
8081
get_target_property(InfoLogger_INCLUDE_DIRS AliceO2::InfoLogger INTERFACE_INCLUDE_DIRECTORIES)
@@ -147,9 +148,18 @@ if(${gperftools_FOUND})
147148
message(STATUS "gperftools enabled")
148149
endif()
149150

151+
# check SDL
152+
if(SDL_FOUND)
153+
message(
154+
STATUS
155+
"SDL found: ${SDL_INCLUDE_DIRS} ${SDL_LIBRARIES}")
156+
else ()
157+
message(STATUS "SDL not found, corresponding features will be disabled.")
158+
endif()
159+
150160

151161
# add flags to enable optional features in Readout, based on available dependencies
152-
add_compile_definitions($<$<BOOL:${Numa_FOUND}>:WITH_NUMA> $<$<BOOL:${RDMA_FOUND}>:WITH_RDMA> $<$<BOOL:${Configuration_FOUND}>:WITH_CONFIG> $<$<BOOL:${FairMQ_FOUND}>:WITH_FAIRMQ> $<$<BOOL:${Occ_FOUND}>:WITH_OCC> $<$<BOOL:${BookkeepingApi_FOUND}>:WITH_LOGBOOK> $<$<BOOL:${ZMQ_FOUND}>:WITH_ZMQ> $<$<BOOL:${MYSQL_FOUND}>:WITH_DB> $<$<BOOL:${ReadoutCard_FOUND}>:WITH_READOUTCARD> $<$<BOOL:${gperftools_FOUND}>:WITH_GPERFTOOLS>)
162+
add_compile_definitions($<$<BOOL:${Numa_FOUND}>:WITH_NUMA> $<$<BOOL:${RDMA_FOUND}>:WITH_RDMA> $<$<BOOL:${Configuration_FOUND}>:WITH_CONFIG> $<$<BOOL:${FairMQ_FOUND}>:WITH_FAIRMQ> $<$<BOOL:${Occ_FOUND}>:WITH_OCC> $<$<BOOL:${BookkeepingApi_FOUND}>:WITH_LOGBOOK> $<$<BOOL:${ZMQ_FOUND}>:WITH_ZMQ> $<$<BOOL:${MYSQL_FOUND}>:WITH_DB> $<$<BOOL:${ReadoutCard_FOUND}>:WITH_READOUTCARD> $<$<BOOL:${gperftools_FOUND}>:WITH_GPERFTOOLS> $<$<BOOL:${SDL_FOUND}>:WITH_SDL>)
153163

154164
# define include directories
155165
set(READOUT_INCLUDE_DIRS
@@ -332,6 +342,9 @@ endif()
332342
# executables
333343
###################################################
334344

345+
# list of executables build (to be completed depending on dependencies found)
346+
set(executables o2-readout-exe o2-readout-receiver o2-readout-test-fmq-tx o2-readout-test-fmq-rx o2-readout-test-fmq-perf-tx o2-readout-test-fmq-perf-rx o2-readout-test-memorybanks o2-readout-rawreader o2-readout-test-lib-monitoring)
347+
335348
# o2-readout-exe : main executable
336349
add_executable(
337350
o2-readout-exe
@@ -428,7 +441,6 @@ add_executable(
428441
o2-readout-eventdump
429442
${SOURCE_DIR}/eventDump.cxx
430443
$<TARGET_OBJECTS:objReadoutUtils>
431-
432444
)
433445

434446
# a process to monitor stats of remote readout processes
@@ -441,6 +453,17 @@ add_executable(
441453
target_include_directories(o2-readout-monitor PRIVATE ${READOUT_INCLUDE_DIRS})
442454
target_link_libraries(o2-readout-monitor PRIVATE AliceO2::InfoLogger AliceO2::Common ${ZMQ_LIBRARIES})
443455

456+
if(SDL_FOUND AND ZMQ_FOUND)
457+
# a process to monitor memory of remote readout processes
458+
add_executable(
459+
o2-readout-monitor-memory
460+
${SOURCE_DIR}/readoutMemoryMonitor.cxx
461+
)
462+
target_include_directories(o2-readout-monitor-memory PRIVATE ${READOUT_INCLUDE_DIRS})
463+
target_link_libraries(o2-readout-monitor-memory PRIVATE AliceO2::InfoLogger AliceO2::Common ${ZMQ_LIBRARIES} $<$<BOOL:${SDL_FOUND}>:${SDL_LIBRARIES}> SDL2)
464+
list (APPEND executables o2-readout-monitor-memory)
465+
endif()
466+
444467
# a process to admin readout database
445468
if (MYSQL_FOUND)
446469
add_executable(
@@ -449,6 +472,7 @@ add_executable(
449472
$<TARGET_OBJECTS:objReadoutDatabase>
450473
)
451474
target_include_directories(o2-readout-admin-db PRIVATE ${READOUT_INCLUDE_DIRS})
475+
list (APPEND executables o2-readout-admin-db)
452476
endif()
453477

454478
endif()
@@ -481,7 +505,6 @@ if (NOT FairMQ_FOUND)
481505
endif ()
482506

483507
# set include and libraries for all
484-
set(executables o2-readout-exe o2-readout-receiver o2-readout-test-fmq-tx o2-readout-test-fmq-rx o2-readout-test-fmq-perf-tx o2-readout-test-fmq-perf-rx o2-readout-test-memorybanks o2-readout-rawreader o2-readout-test-lib-monitoring)
485508
if (ReadoutCard_FOUND)
486509
list (APPEND executables o2-readout-test-roc o2-readout-test-roc-find)
487510
endif()
@@ -491,9 +514,6 @@ endif()
491514
if (BookkeepingApi_FOUND)
492515
list (APPEND executables o2-readout-test-lib-bookkeeping)
493516
endif()
494-
if (MYSQL_FOUND)
495-
list (APPEND executables o2-readout-admin-db)
496-
endif()
497517
foreach (exe ${executables})
498518
target_include_directories(${exe} PRIVATE ${READOUT_INCLUDE_DIRS})
499519
target_link_libraries(${exe} PRIVATE ${READOUT_LINK_LIBRARIES})

doc/configurationParameters.md

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,36 @@
11
# Readout configuration parameters reference
22

3+
This document describes all the configuration parameters that can be set in readout.
4+
5+
Some parameters control the behavior of readout as soon as it starts, and valid for the lifetime of the process. (1)
6+
7+
Other parameters (2) are set whenever the control state-machine receives the CONFIGURE command (or shortly after startup when running o2-readout-exe outside from the ECS workflow, typically from the command line) and until the RESET command.
8+
The corresponding parameters usually come from a local file or from a central reposity, as provided by the O2 system.
9+
They can be reloaded several time during the lifetime of the process.
10+
11+
12+
# Readout defaults (1)
13+
14+
These are defined in local file `/etc/o2.d/readout-default.cfg`, which is loaded on `o2-readout-exe` startup (if file exists). They are valid for the lifetime of the o2-readout-exe process. There is usually no need for users to edit these settings. The file format is '.ini' style, with `[sections]` and `key=value` pairs.
15+
16+
Following table describe the parameters of the `[readout]` section.
17+
18+
| Parameter name | Type | Default value | Description |
19+
|--|--|--|--|
20+
21+
| memLock | int | 0 | Defines the memory swapping policy. If set, all readout memory is locked in RAM with mlockall() |
22+
| readoutExe | string | | If set, overwrites the current running executable with the one at given path. Used for tests. |
23+
| readoutConfig | string | | If set with readoutExe, launch the new exe with this parameter as argument. Used for tests. |
24+
| statsPublishAddress | string | | If set, address where to publish readout stats, eg "tcp://127.0.0.1:6008". Connect to this service with o2-readout-monitor. |
25+
| statsPublishInterval | string | | If set, interval for readout stats publish, in seconds. |
26+
| db | string | | If set, defines connection parameters to a MySQL database in the form 'user:password@host/dbname'. Runtime statistics are stored in a table. Structure can be created with o2-readout-admin-db. |
27+
| customCommandsEnabled | int | 0 | If set, custom commands are enabled, i.e. launching external scripts at some specific state transitions. See customCommands keyword below. |
28+
| membanksMonitorPath | string | | Path to memory banks monitor output (a local file FIFO, to be connected with o2-readout-monitor). |
29+
| membanksMonitorRate | double | 0 | Rate (in Hertz) at which to publish memory status. |
30+
31+
32+
# Readout configuration file (2)
33+
334
## Sections
435
The readout configuration is split in different sections. Some sections have a unique instance (single name), some may have multiple instances (prefix-[instance name], shown as prefix-\* in this document).
536

doc/memory.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,3 +61,10 @@ The grand-total for the shared memory block is therefore typically: `((128 * (nu
6161
Which makes, for a FLP with 3 CRUs, 12 links per end-point, 5GB/s data throughput, 0.5s aggregator STF timeout, .25M page size and 30% safety margin:
6262
( (128 * 3 * 2 * 12 * 0.25 + 0.5 * 5000 ) * 1.2 + 42000 ) * 1.3 = 62 GB
6363

64+
65+
## Checking runtime memory status
66+
67+
The current usage of the data pages can be checked at runtime with a command such as:
68+
`o2-readout-monitor-memory tcp://127.0.0.1:50002`
69+
70+
provided that the parameters `membanksMonitorRate` is defined in `readout-defaults.cfg`.

doc/releaseNotes.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -567,3 +567,4 @@ This file describes the main feature changes for each readout.exe released versi
567567
## next version
568568
- Added a separate thread to call bookkeeping API functions, so that data flow is not affected in case they are blocking.
569569
- o2-readout-test-lib-bookkeeping: more options.
570+
- Added o2-readout-monitor-memory: to view in real time state of data pages.

src/MemoryBankManager.cxx

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,9 @@
1818
#include <sys/types.h>
1919
#include <sys/stat.h>
2020

21+
#ifdef WITH_ZMQ
22+
#include <zmq.h>
23+
#endif
2124

2225
#include "readoutInfoLogger.h"
2326

@@ -258,6 +261,55 @@ void MemoryBankManager::reset()
258261
void MemoryBankManager::monitorThLoop() {
259262
AliceO2::Common::Timer t;
260263
t.reset(1000000.0 / monitorUpdateRate);
264+
MemoryPagesPool::Stats mps;
265+
266+
#ifdef WITH_ZMQ
267+
void* zmqContext = nullptr;
268+
void* zmqHandle = nullptr;
269+
bool zmqEnabled = 1;
270+
void zmqCleanup();
271+
int zmqError = 0;
272+
std::string zmqPort = "tcp://127.0.0.1:50002";
273+
try {
274+
zmqContext = zmq_ctx_new();
275+
if (zmqContext == nullptr) {
276+
zmqError = zmq_errno();
277+
throw __LINE__;
278+
}
279+
280+
zmqHandle = zmq_socket(zmqContext, ZMQ_PUB);
281+
if (zmqHandle == nullptr) {
282+
zmqError = zmq_errno();
283+
throw __LINE__;
284+
}
285+
286+
const int cfgZmqLinger = 1000;
287+
zmqError = zmq_setsockopt(zmqHandle, ZMQ_LINGER, (void*)&cfgZmqLinger, sizeof(cfgZmqLinger)); // close timeout
288+
if (zmqError) {
289+
throw __LINE__;
290+
}
291+
292+
zmqError = zmq_bind(zmqHandle, zmqPort.c_str());
293+
if (zmqError) {
294+
throw __LINE__;
295+
}
296+
297+
zmqEnabled = 1;
298+
299+
} catch (int lineErr) {
300+
if (zmqError) {
301+
theLog.log(LogErrorDevel, "ZeroMQ error @%d : (%d) %s", lineErr, zmqError, zmq_strerror(zmqError));
302+
} else {
303+
theLog.log(LogErrorDevel, "Error @%d", lineErr);
304+
}
305+
// ZMQ unavailable does not cause consumer to fail starting
306+
theLog.log(LogErrorDevel, "Memory banks manager: ZMQ stats publishing disabled");
307+
}
308+
if (zmqEnabled) {
309+
theLog.log(LogInfoDevel, "Memory banks manager: ZMQ stats publishing enabled on %s", zmqPort.c_str());
310+
}
311+
#endif
312+
261313
for(;!monitorThShutdown.load();) {
262314
if (t.isTimeout()) {
263315
std::unique_lock<std::mutex> lock(bankMutex);
@@ -271,6 +323,34 @@ void MemoryBankManager::monitorThLoop() {
271323
fclose(fp);
272324
}
273325
}
326+
#ifdef WITH_ZMQ
327+
if (zmqEnabled) {
328+
int msgSize = 0;
329+
uint32_t numberOfPools = pools.size();
330+
zmq_send(zmqHandle, &numberOfPools, sizeof(numberOfPools), ZMQ_SNDMORE);
331+
for (auto& it : pools) {
332+
it->getDetailedStats(mps);
333+
zmq_send(zmqHandle, &mps, sizeof(mps), ZMQ_SNDMORE);
334+
zmq_send(zmqHandle, &mps.states[0], sizeof(mps.states[0]) * mps.states.size(), ZMQ_SNDMORE);
335+
msgSize += (int)(sizeof(mps) + sizeof(mps.states[0]) * mps.states.size());
336+
}
337+
uint32_t trailer = 0xF00F;
338+
zmq_send(zmqHandle, &trailer, sizeof(trailer), ZMQ_DONTWAIT);
339+
msgSize += (int)(sizeof(numberOfPools) + sizeof(trailer));
340+
//theLog.log(LogDebugDevel, "mem monitor: published %d bytes", msgSize);
341+
}
342+
#endif
343+
/*
344+
for (auto& it : pools) {
345+
it->getDetailedStats(mps);
346+
printf("pool %d : %f - %f = %f\n", mps.id, mps.t1, mps.t0, mps.t1-mps.t0);
347+
for (unsigned int i=0; i<10; i++) {
348+
if (i>=mps.states.size()) break;
349+
printf("%d %s %.6f\n",i,MemoryPage::getPageStateString(mps.states[i].state), mps.states[i].timeInCurrentState);
350+
}
351+
}
352+
*/
353+
274354
t.increment();
275355
} else {
276356
std::this_thread::sleep_for(std::chrono::microseconds(10000));

src/ReadoutStats.cxx

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,9 @@ ReadoutStats::~ReadoutStats() {
4444
publishThread->join();
4545
publishThread = nullptr;
4646
}
47+
#ifdef WITH_ZMQ
4748
zmqCleanup();
49+
#endif
4850
}
4951

5052
void ReadoutStats::reset(bool lightReset)
@@ -93,9 +95,8 @@ uint64_t stringToUint64(const char* in)
9395

9496
int ReadoutStats::startPublish(const std::string &cfgZmqPublishAddress, double cfgZmqPublishInterval) {
9597

96-
if (zmqEnabled) return __LINE__;
97-
9898
#ifdef WITH_ZMQ
99+
if (zmqEnabled) return __LINE__;
99100

100101
// default ZMQ settings for data monitoring
101102
int cfg_ZMQ_CONFLATE = 0; // buffer last message only
@@ -157,6 +158,9 @@ int ReadoutStats::startPublish(const std::string &cfgZmqPublishAddress, double c
157158
} else {
158159
return -1; //disabled
159160
}
161+
#else
162+
(void)cfgZmqPublishAddress;
163+
(void)cfgZmqPublishInterval;
160164
#endif
161165

162166
publishNow();
@@ -182,7 +186,9 @@ void ReadoutStats::zmqCleanup() {
182186

183187
int ReadoutStats::stopPublish() {
184188
publishNow();
189+
#ifdef WITH_ZMQ
185190
zmqCleanup();
191+
#endif
186192
return 0;
187193
}
188194

0 commit comments

Comments
 (0)