Skip to content

Commit 999dd4d

Browse files
committed
Implement DDG pattern error-checking
1 parent 99c49da commit 999dd4d

File tree

2 files changed

+66
-62
lines changed

2 files changed

+66
-62
lines changed

src/CommandLineUtilities/ProgramDmaBench.cxx

Lines changed: 60 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -51,17 +51,17 @@ namespace po = boost::program_options;
5151

5252
namespace {
5353
/// Initial value for link counters
54-
constexpr auto LINK_COUNTER_INITIAL_VALUE = std::numeric_limits<uint64_t>::max();
54+
constexpr auto LINK_COUNTER_INITIAL_VALUE = std::numeric_limits<uint32_t>::max();
5555
/// Maximum supported links
5656
constexpr auto MAX_LINKS = 32;
5757
/// Interval for low priority thread (display updates, etc)
5858
constexpr auto LOW_PRIORITY_INTERVAL = 10ms;
5959
/// Buffer value to reset to
6060
constexpr uint32_t BUFFER_DEFAULT_VALUE = 0xCcccCccc;
6161
/// Fields: Time(hour:minute:second), Pages pushed, Pages read, Errors, °C
62-
const std::string PROGRESS_FORMAT_HEADER(" %-8s %-12s %-12s %-12s %-5.1f");
62+
const std::string PROGRESS_FORMAT_HEADER(" %-8s %-12s %-12s %-12s %-12s %-5.1f");
6363
/// Fields: Time(hour:minute:second), Pages pushed, Pages read, Errors, °C
64-
const std::string PROGRESS_FORMAT(" %02s:%02s:%02s %-12s %-12s %-12s %-5.1f");
64+
const std::string PROGRESS_FORMAT(" %02s:%02s:%02s %-12s %-12s %-12s %-12s %-5.1f");
6565
/// Path for error log
6666
auto READOUT_ERRORS_PATH = "readout_errors.txt";
6767
/// Max amount of errors that are recorded into the error stream
@@ -522,11 +522,8 @@ class ProgramDmaBench: public Program
522522
int popped = 0;
523523
while ((std::chrono::steady_clock::now() - start) < timeout) {
524524
auto size = mChannel->getReadyQueueSize();
525-
for (int i = 0; i < size; ++i) {
526-
auto superpage = mChannel->getSuperpage();
527-
readoutPage(mBufferBaseAddress + superpage.getOffset(), superpage.getSize(), fetchAddReadoutCount());
525+
for (int i = 0; i < size; ++i)
528526
mChannel->popSuperpage();
529-
}
530527
popped += size;
531528
}
532529
return popped;
@@ -546,10 +543,9 @@ class ProgramDmaBench: public Program
546543
case CardType::Crorc:
547544
return get32bitFromPage(pageAddress, 0);
548545
case CardType::Cru: {
549-
//return get32bitFromPage(pageAddress, Cru::DataFormat::getHeaderSize() / sizeof(uint32_t) + 2) - 1;
550-
// First payload word is empty, second is half empty. So we use the third and count one back.
546+
// Grab the first payload word as the counter's beginning
551547
auto payload = reinterpret_cast<const volatile uint32_t *>(pageAddress + Cru::DataFormat::getHeaderSize());
552-
return payload[2] - 1;
548+
return payload[0];
553549
}
554550
default: throw std::runtime_error("Error checking unsupported for this card type");
555551
}
@@ -574,13 +570,6 @@ class ProgramDmaBench: public Program
574570
}
575571
}
576572

577-
// First received page initializes the counter
578-
if (mLinkCounters[linkId] == LINK_COUNTER_INITIAL_VALUE) {
579-
auto counter = getDataGeneratorCounterFromPage(pageAddress);
580-
mErrorStream << b::format("resync counter for e:%d l:%d cnt:%d\n") % readoutCount % linkId % counter;
581-
mLinkCounters[linkId] = counter;
582-
}
583-
584573
// Check for errors
585574
bool hasError = true;
586575
switch (mCardType) {
@@ -608,63 +597,69 @@ class ProgramDmaBench: public Program
608597

609598
bool checkErrorsCru(uintptr_t pageAddress, size_t pageSize, int64_t eventNumber, int linkId)
610599
{
611-
const uint64_t counter = mLinkCounters[linkId];
612-
// Get stuff from the header
613-
const auto words256 = Cru::DataFormat::getEventSize(reinterpret_cast<const char*>(pageAddress)); // Amount of 256 bit words in DMA page
614-
const auto wordsBytes = words256 * (256 / 8);
615-
616-
if (words256 < 2 || wordsBytes > pageSize) {
617-
// Report error
618-
mErrorCount++;
619-
if (mErrorCount < MAX_RECORDED_ERRORS) {
620-
mErrorStream << b::format("event:%1% l:%2% cnt:%3% words:%4% size:%5% words out of range\n") % eventNumber
621-
% linkId % counter % words256 % pageSize;
600+
// Get memsize from the header
601+
const auto memBytes = Cru::DataFormat::getEventSize(reinterpret_cast<const char*>(pageAddress)); // Memory size [RDH, Payload]
602+
603+
if (memBytes < 40 || memBytes > pageSize) {
604+
// Report RDH error
605+
mRdhErrorCount++;
606+
if (mRdhErrorCount < MAX_RECORDED_ERRORS) {
607+
mErrorStream << b::format("[RDHERR]\tevent:%1% l:%2% payloadBytes:%3% size:%4% words out of range\n") % eventNumber
608+
% linkId % memBytes % pageSize;
622609
}
623610
return false;
624611
}
625612

626-
constexpr size_t HEADER_WORDS_256 = Cru::DataFormat::getHeaderSizeWords(); // We skip the header
627-
const auto payload = reinterpret_cast<const volatile uint32_t*>(pageAddress + Cru::DataFormat::getHeaderSize());
628-
const auto payloadWords256 = words256 - HEADER_WORDS_256;
629-
630-
mLinkCounters[linkId] += payloadWords256 * 6;
613+
// Get counter value only if page is valid...
614+
if (mLinkCounters[linkId] == LINK_COUNTER_INITIAL_VALUE) {
615+
auto counter = getDataGeneratorCounterFromPage(pageAddress);
616+
mErrorStream << b::format("resync counter for e:%d l:%d cnt:%x\n") % eventNumber % linkId % counter;
617+
mLinkCounters[linkId] = counter;
618+
}
619+
const uint32_t counter = mLinkCounters[linkId];
631620

632-
// Every 256 bit words is built as follows:
633-
// 32 bits (0x0) + 16 bits (0x0) + 16 bit lower counter + 32 bit counter+1 + 32 bit counter+2 + ...
621+
//skip the header -> address + 0x40
622+
const auto payload = reinterpret_cast<const volatile uint32_t*>(pageAddress + Cru::DataFormat::getHeaderSize());
623+
const auto payloadBytes = memBytes - Cru::DataFormat::getHeaderSize();
624+
625+
// Every 256-bit word is built as follows:
626+
// 32 bits counter + 32 bits counter + 16 lsb counter + 32 bit 0
627+
// 32 bits (counter + 1) + 32 bits (counter + 1) + 16 lsb (counter + 1) + 32 bit 0
634628

635629
bool foundError = false;
636630
auto checkValue = [&](uint32_t i, uint32_t expectedValue, uint32_t actualValue) {
637631
if (expectedValue != actualValue) {
638632
foundError = true;
639-
addError(eventNumber, linkId, i, counter, expectedValue, actualValue);
633+
addError(eventNumber, linkId, i, counter, expectedValue, actualValue, payloadBytes);
640634
}
641635
};
642636

643-
// Check per 256 bit word
644-
for (uint32_t i = 0; i < payloadWords256; ++i) {
645-
uint32_t offset = counter + i * 6; // 6 32-bit data words per 256-bit word
646-
uint32_t word32 = i * 8; // 8 32-bit data words per 256-bit word
647-
648-
checkValue(word32 + 0, 0x0, payload[word32 + 0]); // First 32 bits are 0
649-
checkValue(word32 + 1, (offset + 0) & 0xffff, payload[word32 + 1]); // Upper 16 bits 0, lower 16 bits truncated counter
650-
checkValue(word32 + 2, offset + 1, payload[word32 + 2]);
651-
checkValue(word32 + 3, offset + 2, payload[word32 + 3]);
652-
checkValue(word32 + 4, 0x0, payload[word32 + 4]); // Another 0-padded word
653-
checkValue(word32 + 5, (offset + 3) & 0xffff, payload[word32 + 5]); // Upper 16 bits 0, lower 16 bits truncated counter
654-
checkValue(word32 + 6, offset + 4, payload[word32 + 6]);
655-
checkValue(word32 + 7, offset + 5, payload[word32 + 7]);
637+
// Check iterating through 256-bit words
638+
uint32_t offset = (counter % (0x100000000)); // mod 0xffffffff + 1
639+
uint32_t i = 0;
640+
while ((i*4) < payloadBytes) {
641+
uint32_t word32 = i; // 32-bit word pointer
642+
643+
checkValue(word32 + 0, offset , payload[word32 + 0]); //32-bit counter
644+
checkValue(word32 + 1, offset , payload[word32 + 1]); //32-bit counter
645+
checkValue(word32 + 2, offset & 0xffff, payload[word32 + 2]); //16-lsb truncated counter
646+
checkValue(word32 + 3, 0x0 , payload[word32 + 3]); //32-bit 0-padding word
647+
648+
offset = (offset+1) % (0x100000000); //Increase counter by 1 - mod 0xffffffff + 1
649+
i += 4; // 4 = expected sizeof(uint32_t);
656650
}
657651

652+
mLinkCounters[linkId] = offset;
658653
return foundError;
659654
}
660655

661656
void addError(int64_t eventNumber, int linkId, int index, uint32_t generatorCounter, uint32_t expectedValue,
662-
uint32_t actualValue)
657+
uint32_t actualValue, uint32_t payloadBytes)
663658
{
664659
mErrorCount++;
665660
if (mErrorCount < MAX_RECORDED_ERRORS) {
666-
mErrorStream << b::format("event:%d link:%d cnt:%d i:%d exp:%d val:%d\n")
667-
% eventNumber % linkId % generatorCounter % index % expectedValue % actualValue;
661+
mErrorStream << b::format("[ERROR]\tevent:%d link:%d cnt:%x payloadBytes:%d i:%d exp:%x val:%x\n")
662+
% eventNumber % linkId % generatorCounter % payloadBytes % index % expectedValue % actualValue;
668663
}
669664
}
670665

@@ -678,15 +673,15 @@ class ProgramDmaBench: public Program
678673
auto pageSize32 = pageSize / sizeof(int32_t);
679674

680675
if (page[0] != counter) {
681-
addError(eventNumber, linkId, 0, counter, counter, page[0]);
676+
addError(eventNumber, linkId, 0, counter, counter, page[0], 0);
682677
}
683678

684679
// We skip the SDH
685680
for (uint32_t i = 8; i < pageSize32; ++i) {
686681
uint32_t expectedValue = patternFunction(i);
687682
uint32_t actualValue = page[i];
688683
if (actualValue != expectedValue) {
689-
addError(eventNumber, linkId, i, counter, expectedValue, actualValue);
684+
addError(eventNumber, linkId, i, counter, expectedValue, actualValue, 0);
690685
return true;
691686
}
692687
}
@@ -736,6 +731,7 @@ class ProgramDmaBench: public Program
736731
format % mReadoutCount.load(std::memory_order_relaxed);
737732

738733
mOptions.noErrorCheck ? format % "n/a" : format % mErrorCount; // Errors
734+
mOptions.noErrorCheck ? format % "n/a" : format % mRdhErrorCount; // Errors
739735

740736
if (mOptions.noTemperature) {
741737
format % "n/a";
@@ -765,8 +761,8 @@ class ProgramDmaBench: public Program
765761

766762
void printStatusHeader()
767763
{
768-
auto line1 = b::format(PROGRESS_FORMAT_HEADER) % "Time" % "Pushed" % "Read" % "Errors" % "°C";
769-
auto line2 = b::format(PROGRESS_FORMAT) % "00" % "00" % "00" % '-' % '-' % '-' % '-';
764+
auto line1 = b::format(PROGRESS_FORMAT_HEADER) % "Time" % "Pushed" % "Read" % "Errors" % "RDH Errors" % "°C";
765+
auto line2 = b::format(PROGRESS_FORMAT) % "00" % "00" % "00" % '-' % '-' % '-' % '-' % '-';
770766
cout << '\n' << line1;
771767
cout << '\n' << line2;
772768
}
@@ -796,6 +792,7 @@ class ProgramDmaBench: public Program
796792
put("Errors", "n/a");
797793
} else {
798794
put("Errors", mErrorCount);
795+
put("RDH Errors", mRdhErrorCount);
799796
}
800797
}
801798

@@ -944,17 +941,22 @@ class ProgramDmaBench: public Program
944941
CardType::type mCardType;
945942

946943
/// Page counters per link. Indexed by link ID.
947-
std::array<std::atomic<uint64_t>, MAX_LINKS> mLinkCounters;
944+
std::array<std::atomic<uint32_t>, MAX_LINKS> mLinkCounters;
948945

949946
/// Amount of superpages pushed
950947
std::atomic<uint64_t> mPushCount { 0 };
951948

952-
/// Amount of superpages read out
949+
//Kostas TODO: I'm under the impression this counter
950+
// actually refers to dma pages...
951+
/// Amount of superpages read out
953952
std::atomic<uint64_t> mReadoutCount { 0 };
954953

955954
/// Total amount of errors encountered
956955
int64_t mErrorCount = 0;
957956

957+
/// Total amount of RDH errors encountered
958+
int64_t mRdhErrorCount = 0;
959+
958960
/// Keep on pushing until we're explicitly stopped
959961
bool mInfinitePages = false;
960962

src/Cru/DataFormat.h

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,19 +31,21 @@ namespace
3131

3232
uint32_t getLinkId(const char* data)
3333
{
34-
return Utilities::getBits(getWord(data, 2), 8, 15);
34+
return Utilities::getBits(getWord(data, 3), 0, 7); //bits #[96-103] from RDH
3535
}
3636

3737
uint32_t getEventSize(const char* data)
3838
{
39-
return Utilities::getBits(getWord(data, 3), 8, 23);
39+
return Utilities::getBits(getWord(data, 2), 16, 31); //bits #[80-95] from RDH
4040
}
4141

42+
//TODO: Add getOffset (pointer to the beginning of the next dma page)
43+
4244
/// Get header size in bytes
4345
constexpr size_t getHeaderSize()
4446
{
4547
// Two 256-bit words = 64 bytes
46-
return 64;
48+
return 0x40;
4749
}
4850

4951
/// Get header size in 256-bit words
@@ -57,4 +59,4 @@ constexpr size_t getHeaderSizeWords()
5759
} // namespace roc
5860
} // namespace AliceO2
5961

60-
#endif // ALICEO2_READOUTCARD_CRU_DATAFORMAT_H_
62+
#endif // ALICEO2_READOUTCARD_CRU_DATAFORMAT_H_

0 commit comments

Comments
 (0)