|
| 1 | +/// |
| 2 | +/// \file 14-OrbitId.cxx |
| 3 | +/// \author Adam Wegrzynek <[email protected]> |
| 4 | +/// |
| 5 | + |
| 6 | +#include "../src/Transports/KafkaConsumer.h" |
| 7 | +#include "../src/Transports/Unix.h" |
| 8 | + |
| 9 | +#include <iostream> |
| 10 | +#include <memory> |
| 11 | +#include <thread> |
| 12 | +#include <boost/program_options.hpp> |
| 13 | +#include <boost/algorithm/string/join.hpp> |
| 14 | +#include <boost/algorithm/string.hpp> |
| 15 | + |
| 16 | +#include "envs.pb.h" |
| 17 | + |
| 18 | +using namespace o2::monitoring; |
| 19 | + |
| 20 | +std::map<std::string, unsigned int> detectorRunMap; |
| 21 | + |
| 22 | +std::map<unsigned int, std::string> referenceOrbitIdMap; |
| 23 | + |
| 24 | +std::string getValueFromMetric(const std::string& key, const std::string& metric) { |
| 25 | + auto indexStart = metric.find(key + "="); |
| 26 | + if (indexStart == std::string::npos) { |
| 27 | + return {}; |
| 28 | + } |
| 29 | + auto indexEnd = std::find_if(metric.begin() + indexStart, metric.end(), [](const char& s) { return s == ' ' or s == ','; }); |
| 30 | + return std::string(metric.begin() + indexStart + key.size() + 1, indexEnd); |
| 31 | +} |
| 32 | + |
| 33 | + |
| 34 | +int main(int argc, char* argv[]) |
| 35 | +{ |
| 36 | + boost::program_options::options_description desc("Program options"); |
| 37 | + desc.add_options() |
| 38 | + ("kafka-host", boost::program_options::value<std::string>()->required(), "Kafka broker hostname"); |
| 39 | + boost::program_options::variables_map vm; |
| 40 | + boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), vm); |
| 41 | + boost::program_options::notify(vm); |
| 42 | + |
| 43 | + std::vector<std::string> topics = {"aliecs.env_list.RUNNING", "cru.link_status"}; |
| 44 | + auto kafkaConsumer = std::make_unique<transports::KafkaConsumer>(vm["kafka-host"].as<std::string>() + ":9092", topics, "orbitid"); |
| 45 | + auto unixSocket = std::make_unique<transports::Unix>("/tmp/telegraf.sock"); |
| 46 | + for (;;) { |
| 47 | + auto messages = kafkaConsumer->pull(); |
| 48 | + if (!messages.empty()) { |
| 49 | + for (auto& message : messages) { |
| 50 | + // handle active runs messages |
| 51 | + if (message.first == "aliecs.env_list.RUNNING") { |
| 52 | + aliceo2::envs::ActiveRunsList activeRuns; |
| 53 | + activeRuns.ParseFromString(message.second); |
| 54 | + detectorRunMap.clear(); |
| 55 | + for (int i = 0; i < activeRuns.activeruns_size(); i++) { |
| 56 | + auto run = activeRuns.activeruns(i).runnumber(); |
| 57 | + for (int j = 0; j < activeRuns.activeruns(i).detectors_size(); j++) { |
| 58 | + auto detector = activeRuns.activeruns(i).detectors(j); |
| 59 | + for (auto& c : detector) c = std::tolower(c); |
| 60 | + detectorRunMap.insert({detector, run}); |
| 61 | + } |
| 62 | + } |
| 63 | + if (detectorRunMap.empty()) { |
| 64 | + std::cout << "No ongoing runs" << std::endl; |
| 65 | + referenceOrbitIdMap.clear(); |
| 66 | + } |
| 67 | + for (const auto &p : detectorRunMap) { |
| 68 | + std::cout << p.first << " belongs to run " << p.second << std::endl; |
| 69 | + } |
| 70 | + // if SOR |
| 71 | + // handle link status messages |
| 72 | + } else if (message.first == "cru.link_status") { |
| 73 | + auto detector = getValueFromMetric("detector", message.second); |
| 74 | + auto orbitId = getValueFromMetric("orbitSor", message.second); |
| 75 | + auto status = getValueFromMetric("status", message.second); |
| 76 | + if (detector.empty() or orbitId.empty()) { |
| 77 | + continue; |
| 78 | + } |
| 79 | + // if detector is not running |
| 80 | + auto detectorInRun = detectorRunMap.find(detector); |
| 81 | + if (detectorInRun == detectorRunMap.end()) { |
| 82 | + continue; |
| 83 | + } |
| 84 | + // if link is excluded |
| 85 | + if (status != "1i") { |
| 86 | + continue; |
| 87 | + } |
| 88 | + |
| 89 | + std::string outputMetric = "orbitIdMismatch" + message.second.substr(message.second.find(","), message.second.find(" ") - message.second.find(",")) + ",run=" + std::to_string(detectorRunMap.at(detector)); |
| 90 | + auto referenceOrbit = referenceOrbitIdMap.find(detectorRunMap.at(detector)); |
| 91 | + if (referenceOrbit == referenceOrbitIdMap.end()) { |
| 92 | + /// wait for trigger |
| 93 | + if (orbitId == "0i") { |
| 94 | + continue; |
| 95 | + } |
| 96 | + referenceOrbitIdMap.insert({detectorRunMap.at(detector), orbitId}); |
| 97 | + std::cout << "Set reference orbitId for run " << detectorRunMap.at(detector) << ": " << orbitId << std::endl; |
| 98 | + unixSocket->send(outputMetric + " reference=" + orbitId); |
| 99 | + } |
| 100 | + auto referenceOrbitId = referenceOrbitIdMap.at(detectorRunMap.at(detector)); |
| 101 | + if (orbitId != referenceOrbitId) { |
| 102 | + std::cout << "Abnormal condition for " << detector << "; expected orbitID: " << referenceOrbitId << " but got: " << orbitId << std::endl; |
| 103 | + unixSocket->send(outputMetric + " mismatched=" + orbitId); |
| 104 | + } |
| 105 | + } |
| 106 | + } |
| 107 | + } |
| 108 | + std::this_thread::sleep_for(std::chrono::milliseconds(1)); |
| 109 | + } |
| 110 | +} |
0 commit comments