Skip to content

Commit 0d6c839

Browse files
authored
Merge pull request #255 from sy-c/master
logbook
2 parents 8719af3 + 4e7add9 commit 0d6c839

File tree

3 files changed

+139
-46
lines changed

3 files changed

+139
-46
lines changed

doc/releaseNotes.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -563,3 +563,7 @@ This file describes the main feature changes for each readout.exe released versi
563563
- o2-readout-test-fmq-memory: more options.
564564
- Added protection against unhandled exceptions (e.g. from ReadoutCard).
565565
- Added protection against unexpected state machine transitions (e.g. ECS sending RESET while STARTING).
566+
567+
## next version
568+
- Added a separate thread to call bookkeeping API functions, so that data flow is not affected in case they are blocking.
569+
- o2-readout-test-lib-bookkeeping: more options.

src/mainReadout.cxx

Lines changed: 73 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,68 @@ class TheLogRedirection {
193193
}
194194
};
195195

196+
// an object running a thread to publish data to logbook
197+
// this allows isolating blocking calls to the bookkeeping api
198+
#ifdef WITH_LOGBOOK
199+
class LogbookThread {
200+
public:
201+
LogbookThread(std::unique_ptr<o2::bkp::api::BkpClient> h) {
202+
this->logbookHandle = std::move(h);
203+
shutdownRequest = 0;
204+
publishRequest = 0;
205+
std::function<void(void)> f = std::bind(&LogbookThread::run, this);
206+
th = std::make_unique<std::thread>(f);
207+
};
208+
~LogbookThread() {
209+
shutdownRequest = 1;
210+
if (th != nullptr) {
211+
th->join();
212+
th = nullptr;
213+
}
214+
};
215+
int publishStats() {
216+
if (logbookHandle == nullptr) return __LINE__; // fail if no connection
217+
if (publishRequest.load()) return __LINE__; // fail if request already pending
218+
publishRequest = 1;
219+
return 0;
220+
};
221+
private:
222+
std::unique_ptr<o2::bkp::api::BkpClient> logbookHandle; // handle to logbook
223+
std::unique_ptr<std::thread> th; // a thread reading from fd and injecting to theLog
224+
std::atomic<int> shutdownRequest; // flag to terminate thread
225+
std::atomic<int> publishRequest; // flag to ask thread to publish current values
226+
void run() {
227+
setThreadName("logbook");
228+
// thread loop, 10Hz
229+
while (!shutdownRequest && (logbookHandle != nullptr)) {
230+
if (publishRequest.load() == 1) {
231+
bool isOk = false;
232+
try {
233+
// interface: https://github.com/AliceO2Group/Bookkeeping/tree/main/cxx-client/include/BookkeepingApi
234+
logbookHandle->flp()->updateReadoutCountersByFlpNameAndRunNumber(
235+
occRole, occRunNumber,
236+
(int64_t)gReadoutStats.counters.numberOfSubtimeframes, (int64_t)gReadoutStats.counters.bytesReadout, (int64_t)gReadoutStats.counters.bytesRecorded, (int64_t)gReadoutStats.counters.bytesFairMQ
237+
);
238+
isOk = true;
239+
} catch (const std::exception& ex) {
240+
theLog.log(LogErrorDevel_(3210), "Failed to update logbook: %s", ex.what());
241+
} catch (...) {
242+
theLog.log(LogErrorDevel_(3210), "Failed to update logbook: unknown exception");
243+
}
244+
if (!isOk) {
245+
// closing logbook immediately
246+
logbookHandle = nullptr;
247+
theLog.log(LogErrorSupport_(3210), "Logbook now disabled");
248+
break;
249+
}
250+
publishRequest = 0;
251+
}
252+
usleep(100000);
253+
}
254+
}
255+
};
256+
#endif
257+
196258
class Readout
197259
{
198260

@@ -331,8 +393,9 @@ class Readout
331393
bool logFirstError = 0; // flag set to 1 after 1 error reported from iterateCheck/iterateRunning procedures
332394

333395
#ifdef WITH_LOGBOOK
334-
std::unique_ptr<o2::bkp::api::BkpClient> logbookHandle; // handle to logbook
396+
std::unique_ptr<LogbookThread> theLogbookThread; // handle to logbook
335397
#endif
398+
336399
#ifdef WITH_DB
337400
std::unique_ptr<ReadoutDatabase> dbHandle; // handle to readout database
338401
#endif
@@ -351,37 +414,12 @@ bool testLogbook = false; // flag for logbook test mode
351414

352415
void Readout::publishLogbookStats()
353416
{
417+
// gReadoutStats.print();
418+
354419
#ifdef WITH_LOGBOOK
355-
if (logbookHandle != nullptr) {
356-
bool isOk = false;
357-
try {
358-
// interface: https://github.com/AliceO2Group/Bookkeeping/tree/main/cxx-client/include/BookkeepingApi
359-
if (testLogbook) {
360-
// in test mode, create a dummy run entry in logbook
361-
if (occRole.length() == 0) { occRole = "flp-test"; }
362-
if (occRunNumber == 0) { occRunNumber = 999999999; }
363-
theLog.log(LogInfoDevel_(3210), "Logbook in test mode: create run number/flp %d / %s", (int)occRunNumber, occRole.c_str());
364-
/*
365-
std::time_t now = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now());
366-
logbookHandle->runStart(occRunNumber, now, now, "readout", RunType::TECHNICAL, 0, 0, 0, false, false, false, "normal");
367-
logbookHandle->flpAdd(occRole, "localhost", occRunNumber);
368-
*/
369-
testLogbook=0;
370-
}
371-
logbookHandle->flp()->updateReadoutCountersByFlpNameAndRunNumber(occRole, occRunNumber, (int64_t)gReadoutStats.counters.numberOfSubtimeframes, (int64_t)gReadoutStats.counters.bytesReadout, (int64_t)gReadoutStats.counters.bytesRecorded, (int64_t)gReadoutStats.counters.bytesFairMQ);
372-
isOk = true;
373-
} catch (const std::exception& ex) {
374-
theLog.log(LogErrorDevel_(3210), "Failed to update logbook: %s", ex.what());
375-
} catch (...) {
376-
theLog.log(LogErrorDevel_(3210), "Failed to update logbook: unknown exception");
377-
}
378-
if (!isOk) {
379-
// closing logbook immediately
380-
logbookHandle = nullptr;
381-
theLog.log(LogErrorSupport_(3210), "Logbook now disabled");
382-
}
420+
if (theLogbookThread != nullptr) {
421+
theLogbookThread->publishStats();
383422
}
384-
// gReadoutStats.print();
385423
#endif
386424

387425
#ifdef WITH_DB
@@ -924,9 +962,11 @@ int Readout::_configure(const boost::property_tree::ptree& properties)
924962
cfg.getOptionalValue<std::string>("readout.logbookUrl", cfgLogbookUrl);
925963

926964
theLog.log(LogInfoDevel, "Logbook enabled, %ds update interval, using URL = %s", cfgLogbookUpdateInterval, cfgLogbookUrl.c_str());
927-
logbookHandle = o2::bkp::api::BkpClientFactory::create(cfgLogbookUrl);
965+
auto logbookHandle = o2::bkp::api::BkpClientFactory::create(cfgLogbookUrl);
928966
if (logbookHandle == nullptr) {
929967
theLog.log(LogErrorSupport_(3210), "Failed to create handle to logbook");
968+
} else {
969+
theLogbookThread = std::make_unique<LogbookThread>(std::move(logbookHandle));
930970
}
931971
#endif
932972
}
@@ -1731,7 +1771,7 @@ int Readout::_reset()
17311771

17321772
#ifdef WITH_LOGBOOK
17331773
// closing logbook
1734-
logbookHandle = nullptr;
1774+
theLogbookThread = nullptr;
17351775
#endif
17361776

17371777
#ifdef WITH_ZMQ
@@ -2112,7 +2152,6 @@ int main(int argc, char* argv[])
21122152
} else if (theState == States::Configured) {
21132153
if (theCommand == Commands::Start) {
21142154
occRunNumber++;
2115-
printf("run number = %d\n", occRunNumber);
21162155
err = theReadout->start();
21172156
if (err) {
21182157
newState = States::Error;
@@ -2223,6 +2262,7 @@ int main(int argc, char* argv[])
22232262

22242263
// loop for testing, single iteration in normal conditions
22252264
for (int i = 0; i < nloop; i++) {
2265+
occRunNumber++;
22262266
err = theReadout->start();
22272267
if (err) {
22282268
return err;

src/testBookkeeping.cxx

Lines changed: 62 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#endif
44

55
#include <stdio.h>
6+
#include <stdarg.h>
67
#include <InfoLogger/InfoLogger.hxx>
78
using namespace AliceO2::InfoLogger;
89

@@ -15,23 +16,71 @@ int main(int argc, char **argv) {
1516
setenv("O2_INFOLOGGER_MODE", "stdout", 1);
1617
InfoLogger theLog;
1718

18-
const char *cfgLogbookUrl="localhost:4001";
19-
20-
if (argc>1) {
21-
cfgLogbookUrl=argv[1];
19+
// get options from command line
20+
std::string cfgLogbookUrl="localhost:4001"; // bookkeeping API server end-point
21+
int syncTime = 0; // startup synchronization
22+
std::string occRole = "flp-test";
23+
unsigned int occRunNumber = 1;
24+
int sleepTime = 1000; // sleep time between iterations (milliseconds)
25+
int nPerRun = 10; // iterations per run
26+
int nPerHost = 0; // iterations per host
27+
28+
for (int i = 1; i < argc; i++) {
29+
char *k = argv[i];
30+
char *v = strchr(k, '=');
31+
if (v != nullptr) {
32+
*v = 0;
33+
v++;
34+
}
35+
if (strcmp(k, "cfgLogbookUrl") == 0) {
36+
cfgLogbookUrl = v;
37+
} else if (strcmp(k, "syncTime") == 0) {
38+
syncTime = atoi(v);
39+
} else if (strcmp(k, "occRole") == 0) {
40+
occRole = v;
41+
} else if (strcmp(k, "occRunNumber") == 0) {
42+
occRunNumber = atoi(v);
43+
} else if (strcmp(k, "sleepTime") == 0) {
44+
sleepTime = atoi(v);
45+
} else if (strcmp(k, "nPerRun") == 0) {
46+
nPerRun = atoi(v);
47+
} else if (strcmp(k, "nPerHost") == 0) {
48+
nPerHost = atoi(v);
49+
} else {
50+
printf("unknown option %s\n", k);
51+
return -1;
52+
}
53+
}
54+
// wait until scheduled startup time (given modulo round number of seconds)
55+
if (syncTime>0) {
56+
time_t t = time(NULL);
57+
time_t waitT = syncTime - (t % syncTime);
58+
theLog.log("Waiting sync time (%ds)", (int)waitT);
59+
t = t + waitT;
60+
while (time(NULL) != t) {
61+
usleep(10000);
62+
}
2263
}
2364

2465
try {
25-
theLog.log("Create handle");
66+
theLog.log("Create handle to %s", cfgLogbookUrl.c_str());
2667
auto logbookHandle = o2::bkp::api::BkpClientFactory::create(cfgLogbookUrl);
27-
28-
std::string occRole = "flp-test";
29-
unsigned int occRunNumber = 1;
30-
31-
theLog.log("Updating");
32-
for (int i=0; i<10; i++) {
33-
logbookHandle->flp()->updateReadoutCountersByFlpNameAndRunNumber(occRole, occRunNumber, i,i,i,i);
34-
sleep(1);
68+
69+
theLog.log("Updating %s:%d (%d loops for %d hosts, %d ms sleep between each)", occRole.c_str(), (int)occRunNumber, nPerRun, nPerHost, sleepTime);
70+
for (int i=0; i<nPerRun; i++) {
71+
for (int k=1; k <= (nPerHost ? nPerHost : 1); k++) {
72+
char host[256];
73+
if (nPerHost>0) {
74+
snprintf(host,256,"%s-%03d", occRole.c_str(), k+1);
75+
} else {
76+
snprintf(host,256,"%s", occRole.c_str());
77+
}
78+
printf("%s : %d\n", host, i);
79+
logbookHandle->flp()->updateReadoutCountersByFlpNameAndRunNumber(host, occRunNumber, i,i,i,i);
80+
if (sleepTime>0) {
81+
usleep(sleepTime * 1000);
82+
}
83+
}
3584
}
3685
theLog.log("Done updating");
3786
}

0 commit comments

Comments
 (0)