Skip to content

Commit 826f88d

Browse files
authored
Merge pull request #100 from sy-c/master
v2.9.0
2 parents bbdcaf7 + 6fcfd39 commit 826f88d

14 files changed

+1731
-6
lines changed

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,7 @@ add_executable(
254254
src/infoLoggerServer.cxx
255255
src/InfoLoggerDispatch.cxx
256256
src/InfoLoggerDispatchBrowser.cxx
257+
src/InfoLoggerDispatchStats.cxx
257258
src/ConfigInfoLoggerServer.cxx
258259
src/infoLoggerMessageDecode.c
259260
src/InfoLoggerMessageHelper.cxx

doc/README.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ The following executables, presented with the _nicknames_ used below, are part o
1111
- o2-infologger-admindb or _infoLoggerAdminDB_: to maintain the logging database, i.e. create, archive, clean or destroy the database content.
1212
- o2-infologger-newdb : helper script for the initial set-up of the logging database, in particular for the definition of access credentials.
1313
- o2-infologger-tester : a tool to check the logging chain, from injection to DB storage and online subscription.
14+
- o2-infologger-alert
1415

1516
The following libraries are also provided, to inject logs into the system:
1617

@@ -314,3 +315,13 @@ achieved on CentOS 7 with e.g. (as root):
314315
- in the infoLoggerD configuration section, set: `msgQueueReset=1` (this is permanent, done on each startup of infoLoggerD, which might not be what you want)
315316
- when starting infoLoggerD process from the command line (not with the systemctl service), add option: `-o msgQueueReset=1`
316317
- create a file named [msgQueuePath].reset (by default, msgQueuePath=/tmp/infoLoggerD/infoLoggerD.queue), e.g. `touch /tmp/infoLoggerD/infoLoggerD.queue.reset`. This will reset the queue on next startup (by hand or with e.g. service infoLoggerD restart), and the reset file will also be deleted (which ensures cleanup is done once only).
318+
319+
320+
* o2-infologger-alert
321+
322+
This is a daemon connecting to the stream of online messages and generating alerts (themselves log message + telegraf metric) based on the definition of some logic rules to detect known bad situations possibly requiring intervention. This is intended to raise the awareness of the shift crew to some online operational issues, and trigger a response.
323+
324+
* o2-infologger-server statistics
325+
326+
Messages are indexed by the server, and published as a TCL list on a socket (eg port 6103), to allow categorizing messages and presenting a high-level view of current logging activity.
327+
See the configuration parameters to define window size, publish interval, and amount of history kept.

doc/releaseNotes.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,3 +174,11 @@ This file describes the main feature changes for each InfoLogger released versio
174174

175175
# v2.8.3 - 11/06/2025
176176
- Compilation / security fix. (string handling)
177+
178+
# v2.9.0 - 11/11/2025
179+
- o2-infologger-server:
180+
- improved handling of SQL insert errors, messages dropped after retry.
181+
- added indexing of messages to publish stats
182+
- o2-infologger-alert service
183+
- o2-infologger-browser:
184+
- added some extra startup option, to preconfigure filters

o2-infologger-alert.service

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
[Unit]
2+
Description=O2 infoLogger alert daemon, generating alerts from log messages.
3+
After=network.target
4+
5+
[Service]
6+
Type=simple
7+
TimeoutSec=5
8+
ExecStart=/opt/o2-InfoLogger/bin/o2-infologger-alert -z /etc/o2.d/infologger/infoLogger.cfg
9+
ExecStop=/usr/bin/kill -s SIGTERM $MAINPID
10+
GuessMainPID=1
11+
Restart=always
12+
13+
[Install]
14+
WantedBy=multi-user.target

src/ConfigInfoLoggerServer.cxx

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,5 +38,13 @@ void ConfigInfoLoggerServer::readFromConfigFile(ConfigFile& config)
3838

3939
config.getOptionalValue<int>(INFOLOGGER_CONFIG_SECTION_NAME_SERVER ".serverPortTx", serverPortTx);
4040
config.getOptionalValue<int>(INFOLOGGER_CONFIG_SECTION_NAME_SERVER ".maxClientsTx", maxClientsTx);
41+
42+
config.getOptionalValue<int>(INFOLOGGER_CONFIG_SECTION_NAME_SERVER ".statsEnabled", statsEnabled);
43+
config.getOptionalValue<int>(INFOLOGGER_CONFIG_SECTION_NAME_SERVER ".statsPort", statsPort);
44+
config.getOptionalValue<int>(INFOLOGGER_CONFIG_SECTION_NAME_SERVER ".statsMaxClients", statsMaxClients);
45+
config.getOptionalValue<int>(INFOLOGGER_CONFIG_SECTION_NAME_SERVER ".statsPublishInterval", statsPublishInterval);
46+
config.getOptionalValue<int>(INFOLOGGER_CONFIG_SECTION_NAME_SERVER ".statsResetInterval", statsResetInterval);
47+
config.getOptionalValue<int>(INFOLOGGER_CONFIG_SECTION_NAME_SERVER ".statsHistory", statsHistory);
48+
4149
}
4250

src/ConfigInfoLoggerServer.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,14 @@ class ConfigInfoLoggerServer
5050
// settings for infoBrowser clients
5151
int serverPortTx = INFOLOGGER_DEFAULT_SERVER_TX_PORT;
5252
int maxClientsTx = 100;
53+
54+
// settings for infoLoggerStats clients
55+
int statsEnabled = 1; // flag to enable/disable feature
56+
int statsPort = INFOLOGGER_DEFAULT_SERVER_STATS_PORT; // TCP/IP port number
57+
int statsMaxClients = 5; // max number of clients connections allowed
58+
int statsPublishInterval = 5 ; // publish interval time (seconds)
59+
int statsResetInterval = 60; // size of the stats window (seconds)
60+
int statsHistory = 600; // backlog of stats kept and published (seconds)
5361
};
5462

5563
#endif // SRC_CONFIGINFOLOGGERSERVER_H_

src/InfoLoggerDispatch.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,5 +96,18 @@ class InfoLoggerDispatchSQL : public InfoLoggerDispatch
9696
std::unique_ptr<InfoLoggerDispatchSQLImpl> dPtr;
9797
};
9898

99+
// a class to dispatch stats about online messages
100+
class InfoLoggerDispatchStatsImpl;
101+
class InfoLoggerDispatchStats : public InfoLoggerDispatch
102+
{
103+
public:
104+
InfoLoggerDispatchStats(ConfigInfoLoggerServer* theConfig, SimpleLog* theLog);
105+
~InfoLoggerDispatchStats();
106+
int customMessageProcess(std::shared_ptr<InfoLoggerMessageList> msg);
107+
int customLoop();
108+
109+
private:
110+
std::unique_ptr<InfoLoggerDispatchStatsImpl> dPtr;
111+
};
99112
#endif
100113

src/InfoLoggerDispatchBrowser.cxx

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,8 @@ InfoLoggerDispatchOnlineBrowser::InfoLoggerDispatchOnlineBrowser(ConfigInfoLogge
8282
throw __LINE__;
8383
}
8484
//theLog.info("%s() success\n",__FUNCTION__);
85-
85+
theLog->info("Publishing online messages on port %d", theConfig->serverPortTx);
86+
8687
// enable customloop callback
8788
isReady = true;
8889
}

src/InfoLoggerDispatchSQL.cxx

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include "InfoLoggerDispatch.h"
1313
#include <mysql.h>
1414
#include <mysqld_error.h>
15+
#include <errmsg.h>
1516
#include "utility.h"
1617
#include "infoLoggerMessage.h"
1718
#include <unistd.h>
@@ -61,6 +62,9 @@ class InfoLoggerDispatchSQLImpl
6162
int commitTimeout = 1000000; // time between commits
6263
Timer commitTimer; // timer for transaction
6364
int commitNumberOfMsg; // number of messages since last commit
65+
66+
int numberOfSuccessiveFailures = 0; // count consecutive insert failures
67+
int maxNumberOfRetries = 1; // number of retries allowed
6468
};
6569

6670
void InfoLoggerDispatchSQLImpl::start()
@@ -384,13 +388,31 @@ int InfoLoggerDispatchSQLImpl::customMessageProcess(std::shared_ptr<InfoLoggerMe
384388
// Do the insertion
385389
if (mysql_stmt_execute(stmt)) {
386390
parent->logError("mysql_stmt_exec() failed: (%d) %s", mysql_errno(db), mysql_error(db));
391+
unsigned int err = mysql_errno(db);
387392
// column too long
388-
if (mysql_errno(db) == ER_DATA_TOO_LONG) {
393+
if ( err == ER_DATA_TOO_LONG) {
394+
return returnDroppedMessage(msg, m);
395+
}
396+
// column with wrong value
397+
if ( err == ER_TRUNCATED_WRONG_VALUE_FOR_FIELD) {
389398
return returnDroppedMessage(msg, m);
390399
}
391-
// retry with new connection - usually it means server was down
392-
disconnectDB();
393-
return returnDelayedMessage();
400+
// server gone - retry with new connection
401+
if (( err == CR_SERVER_LOST ) || ( err == CR_SERVER_GONE_ERROR )) {
402+
disconnectDB();
403+
return returnDelayedMessage();
404+
}
405+
406+
numberOfSuccessiveFailures++;
407+
if (numberOfSuccessiveFailures <= maxNumberOfRetries) {
408+
disconnectDB();
409+
return returnDelayedMessage();
410+
}
411+
numberOfSuccessiveFailures = 0;
412+
413+
// by default: drop message
414+
parent->logError("Unhandled error code %d after %d attempts", mysql_errno(db), maxNumberOfRetries);
415+
return returnDroppedMessage(msg, m);
394416
}
395417

396418
insertCount++;

0 commit comments

Comments
 (0)