Skip to content

Commit 35a3f83

Browse files
authored
[OMON-669] Detect orbitId mismatch (#320)
1 parent c37d937 commit 35a3f83

File tree

2 files changed

+112
-1
lines changed

2 files changed

+112
-1
lines changed

CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ endif()
3131

3232
# Define project
3333
project(Monitoring
34-
VERSION 3.15.5
34+
VERSION 3.16.0
3535
DESCRIPTION "O2 Monitoring library"
3636
LANGUAGES CXX
3737
)
@@ -276,6 +276,7 @@ if(RdKafka_FOUND AND Protobuf_FOUND AND CURL_FOUND)
276276

277277
set(PROTO_EXAMPLES
278278
examples/12-KafkaToInfluxDb.cxx
279+
examples/14-OrbitId.cxx
279280
examples/8-KafkaToHttpServer.cxx
280281
)
281282
foreach (example ${PROTO_EXAMPLES})

examples/14-OrbitId.cxx

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
///
2+
/// \file 14-OrbitId.cxx
3+
/// \author Adam Wegrzynek <[email protected]>
4+
///
5+
6+
#include "../src/Transports/KafkaConsumer.h"
7+
#include "../src/Transports/Unix.h"
8+
9+
#include <iostream>
10+
#include <memory>
11+
#include <thread>
12+
#include <boost/program_options.hpp>
13+
#include <boost/algorithm/string/join.hpp>
14+
#include <boost/algorithm/string.hpp>
15+
16+
#include "envs.pb.h"
17+
18+
using namespace o2::monitoring;
19+
20+
std::map<std::string, unsigned int> detectorRunMap;
21+
22+
std::map<unsigned int, std::string> referenceOrbitIdMap;
23+
24+
std::string getValueFromMetric(const std::string& key, const std::string& metric) {
25+
auto indexStart = metric.find(key + "=");
26+
if (indexStart == std::string::npos) {
27+
return {};
28+
}
29+
auto indexEnd = std::find_if(metric.begin() + indexStart, metric.end(), [](const char& s) { return s == ' ' or s == ','; });
30+
return std::string(metric.begin() + indexStart + key.size() + 1, indexEnd);
31+
}
32+
33+
34+
int main(int argc, char* argv[])
35+
{
36+
boost::program_options::options_description desc("Program options");
37+
desc.add_options()
38+
("kafka-host", boost::program_options::value<std::string>()->required(), "Kafka broker hostname");
39+
boost::program_options::variables_map vm;
40+
boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), vm);
41+
boost::program_options::notify(vm);
42+
43+
std::vector<std::string> topics = {"aliecs.env_list.RUNNING", "cru.link_status"};
44+
auto kafkaConsumer = std::make_unique<transports::KafkaConsumer>(vm["kafka-host"].as<std::string>() + ":9092", topics, "orbitid");
45+
auto unixSocket = std::make_unique<transports::Unix>("/tmp/telegraf.sock");
46+
for (;;) {
47+
auto messages = kafkaConsumer->pull();
48+
if (!messages.empty()) {
49+
for (auto& message : messages) {
50+
// handle active runs messages
51+
if (message.first == "aliecs.env_list.RUNNING") {
52+
aliceo2::envs::ActiveRunsList activeRuns;
53+
activeRuns.ParseFromString(message.second);
54+
detectorRunMap.clear();
55+
for (int i = 0; i < activeRuns.activeruns_size(); i++) {
56+
auto run = activeRuns.activeruns(i).runnumber();
57+
for (int j = 0; j < activeRuns.activeruns(i).detectors_size(); j++) {
58+
auto detector = activeRuns.activeruns(i).detectors(j);
59+
for (auto& c : detector) c = std::tolower(c);
60+
detectorRunMap.insert({detector, run});
61+
}
62+
}
63+
if (detectorRunMap.empty()) {
64+
std::cout << "No ongoing runs" << std::endl;
65+
referenceOrbitIdMap.clear();
66+
}
67+
for (const auto &p : detectorRunMap) {
68+
std::cout << p.first << " belongs to run " << p.second << std::endl;
69+
}
70+
// if SOR
71+
// handle link status messages
72+
} else if (message.first == "cru.link_status") {
73+
auto detector = getValueFromMetric("detector", message.second);
74+
auto orbitId = getValueFromMetric("orbitSor", message.second);
75+
auto status = getValueFromMetric("status", message.second);
76+
if (detector.empty() or orbitId.empty()) {
77+
continue;
78+
}
79+
// if detector is not running
80+
auto detectorInRun = detectorRunMap.find(detector);
81+
if (detectorInRun == detectorRunMap.end()) {
82+
continue;
83+
}
84+
// if link is excluded
85+
if (status != "1i") {
86+
continue;
87+
}
88+
89+
std::string outputMetric = "orbitIdMismatch" + message.second.substr(message.second.find(","), message.second.find(" ") - message.second.find(",")) + ",run=" + std::to_string(detectorRunMap.at(detector));
90+
auto referenceOrbit = referenceOrbitIdMap.find(detectorRunMap.at(detector));
91+
if (referenceOrbit == referenceOrbitIdMap.end()) {
92+
/// wait for trigger
93+
if (orbitId == "0i") {
94+
continue;
95+
}
96+
referenceOrbitIdMap.insert({detectorRunMap.at(detector), orbitId});
97+
std::cout << "Set reference orbitId for run " << detectorRunMap.at(detector) << ": " << orbitId << std::endl;
98+
unixSocket->send(outputMetric + " reference=" + orbitId);
99+
}
100+
auto referenceOrbitId = referenceOrbitIdMap.at(detectorRunMap.at(detector));
101+
if (orbitId != referenceOrbitId) {
102+
std::cout << "Abnormal condition for " << detector << "; expected orbitID: " << referenceOrbitId << " but got: " << orbitId << std::endl;
103+
unixSocket->send(outputMetric + " mismatched=" + orbitId);
104+
}
105+
}
106+
}
107+
}
108+
std::this_thread::sleep_for(std::chrono::milliseconds(1));
109+
}
110+
}

0 commit comments

Comments
 (0)