Skip to content

Commit c8e21a8

Browse files
authored
Merge pull request #233 from sy-c/master
check first orbit: also links + fatal error
2 parents 8a7c8d0 + 0d0a153 commit c8e21a8

File tree

4 files changed

+47
-7
lines changed

4 files changed

+47
-7
lines changed

doc/releaseNotes.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -493,3 +493,6 @@ This file describes the main feature changes for each readout.exe released versi
493493

494494
## v2.14.3 - 12/10/2022
495495
- Fix for DataDistribution multithreaded message formatting: TF ordering could get wrong out of the thread pool after having memory buffer empty.
496+
497+
## next release
498+
- equipment-*.rdhCheckFirstOrbit: when set, readout also check consistency of orbits between all the links of an equipment. If not all links/equipments first orbit are the same, readout will interrupt the run.

src/ReadoutEquipment.cxx

Lines changed: 30 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ ReadoutEquipment::ReadoutEquipment(ConfigFile& cfg, std::string cfgEntryPoint, b
139139
cfg.getOptionalValue<int>(cfgEntryPoint + ".rdhUseFirstInPageEnabled", cfgRdhUseFirstInPageEnabled);
140140
// configuration parameter: | equipment-* | rdhDumpFirstInPageEnabled | int | 0 | If set, the first RDH in each data page is logged. Setting a negative number will printit only for the first N pages. |
141141
cfg.getOptionalValue<int>(cfgEntryPoint + ".rdhDumpFirstInPageEnabled", cfgRdhDumpFirstInPageEnabled);
142-
// configuration parameter: | equipment-* | rdhCheckFirstOrbit | int | 1 | If set, it is checked that the first orbit of all equipments is the same. |
142+
// configuration parameter: | equipment-* | rdhCheckFirstOrbit | int | 1 | If set, it is checked that the first orbit of all equipments and links is the same. |
143143
cfg.getOptionalValue<int>(cfgEntryPoint + ".rdhCheckFirstOrbit", cfgRdhCheckFirstOrbit);
144144
// configuration parameter: | equipment-* | rdhCheckDetectorField | int | 0 | If set, the detector field is checked and changes reported. |
145145
cfg.getOptionalValue<int>(cfgEntryPoint + ".rdhCheckDetectorField", cfgRdhCheckDetectorField);
@@ -255,8 +255,11 @@ void ReadoutEquipment::start()
255255
}
256256
equipmentLinksUsed.reset();
257257
equipmentLinksData.resize(RdhMaxLinkId + 1);
258-
equipmentLinksData.clear();
258+
equipmentLinksStats statsInit;
259+
std::fill(equipmentLinksData.begin(), equipmentLinksData.end(), statsInit);
260+
firstLinkId = undefinedLinkId;
259261
isError = 0;
262+
isFatalError = 0;
260263
currentBlockId = 0;
261264
isDataOn = false;
262265
saveErrorPagesCount = 0;
@@ -314,7 +317,7 @@ void ReadoutEquipment::stop()
314317
std::string perLinkStats;
315318
for (unsigned int i = 0; i<= RdhMaxLinkId; i++) {
316319
if (equipmentLinksUsed[i]) {
317-
perLinkStats += "[" + std::to_string(i) + "]=" + NumberOfBytesToString(equipmentLinksData[i], "B", 1024) + " ";
320+
perLinkStats += "[" + std::to_string(i) + "]=" + NumberOfBytesToString(equipmentLinksData[i].bytesRx, "B", 1024) + " ";
318321
}
319322
}
320323
theLog.log(LogInfoDevel_(3003), "Links data received: %s", perLinkStats.c_str());
@@ -635,7 +638,8 @@ uint64_t ReadoutEquipment::getTimeframeFromOrbit(uint32_t hbOrbit)
635638
theLog.log(LogInfoDevel_(3011), "Equipment %s : first HB orbit = %X", name.c_str(), (unsigned int)firstTimeframeHbOrbitBegin);
636639
if (!isOk) {
637640
if (cfgRdhCheckFirstOrbit) {
638-
theLog.log(LogErrorDevel_(3241), "Equipment %s : first HB orbit is different from other equipments", name.c_str());
641+
theLog.log(LogErrorSupport_(3241), "Equipment %s : first HB orbit is different from other equipments", name.c_str());
642+
isFatalError++;
639643
}
640644
}
641645
}
@@ -810,9 +814,29 @@ int ReadoutEquipment::processRdh(DataBlockContainerReference& block)
810814
// update links statistics
811815
if (h.getLinkId() <= RdhMaxLinkId) {
812816
equipmentLinksUsed[h.getLinkId()] = 1;
813-
equipmentLinksData[h.getLinkId()] += blockHeader.dataSize;
817+
equipmentLinksData[h.getLinkId()].bytesRx += blockHeader.dataSize;
818+
819+
// check link first orbit
820+
// at this stage gReadoutStats.counters.firstOrbit is defined (done in getTimeframeFromOrbit())
821+
uint32_t orbitId = h.getHbOrbit();
822+
uint8_t linkId = h.getLinkId();
823+
if (firstLinkId == undefinedLinkId) {
824+
firstLinkId = linkId;
825+
firstLinkOrbit = orbitId;
826+
}
827+
if (!equipmentLinksData[linkId].firstOrbitIsDefined) {
828+
if (orbitId != firstLinkOrbit) {
829+
if (cfgRdhCheckFirstOrbit) {
830+
theLog.log(LogErrorSupport_(3241), "Equipment %s : first HB orbit of link %d is different from first link(%d): 0x%X != 0x%X", name.c_str(), linkId, firstLinkId, orbitId, firstLinkOrbit);
831+
isFatalError++;
832+
}
833+
} else {
834+
//theLog.log(LogInfoDevel_(3241), "Equipment %s : first HB orbit of link %d is same as first link(%d): 0x%X = 0x%X", name.c_str(), linkId, firstLinkId, orbitId, firstLinkOrbit);
835+
}
836+
equipmentLinksData[linkId].firstOrbit = orbitId;
837+
equipmentLinksData[linkId].firstOrbitIsDefined = 1;
838+
}
814839
}
815-
816840
}
817841

818842
// Dump RDH if configured to do so

src/ReadoutEquipment.h

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ class ReadoutEquipment
5151

5252
bool stopOnError = false; // if set, readout will stop when this equipment reports an error (isError flag)
5353
int isError = 0; // flag which might be used to count number of errors occuring in the equipment
54+
int isFatalError = 0; // flag which might be used to count number of fatal errors occuring in the equipment
5455

5556
// protected:
5657
// todo: give direct access to output FIFO?
@@ -114,7 +115,16 @@ class ReadoutEquipment
114115
std::vector<CounterStats> equipmentStats;
115116
std::vector<CounterValue> equipmentStatsLast;
116117
std::bitset<RdhMaxLinkId + 1> equipmentLinksUsed;
117-
std::vector<uint64_t> equipmentLinksData;
118+
119+
struct equipmentLinksStats {
120+
uint64_t bytesRx = 0; // number of bytes received
121+
uint32_t firstOrbit = undefinedOrbit; // first orbit received from this link
122+
bool firstOrbitIsDefined = 0; // when 0, no value has been set for first orbit yet. need this flag because undefinedOrbit=0.
123+
};
124+
uint8_t firstLinkId = undefinedLinkId; // id of first link which sent data
125+
uint32_t firstLinkOrbit = undefinedOrbit; // 1st orbit received from this equipment
126+
127+
std::vector<equipmentLinksStats> equipmentLinksData;
118128

119129
double cfgConsoleStatsUpdateTime = 0; // number of seconds between regular printing of statistics on console (if zero, only on stop)
120130
AliceO2::Common::Timer consoleStatsTimer; // timer to keep track of elapsed time between console statistics updates

src/mainReadout.cxx

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1302,6 +1302,9 @@ int Readout::iterateCheck()
13021302
if ((readoutDevice->isError) && (readoutDevice->stopOnError)) {
13031303
isError = 1;
13041304
}
1305+
if (readoutDevice->isFatalError) {
1306+
isError = 1;
1307+
}
13051308
}
13061309
if (isError) {
13071310
return -1;

0 commit comments

Comments
 (0)