Skip to content

Commit b5b3563

Browse files
committed
Implement interal PCIe error-checking
1 parent 7a54732 commit b5b3563

File tree

2 files changed

+79
-22
lines changed

2 files changed

+79
-22
lines changed

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,5 @@ build
22
.idea
33
.project
44
.cproject
5-
cmake-build-*
5+
cmake-build-*
6+
*.swp

src/CommandLineUtilities/ProgramDmaBench.cxx

Lines changed: 77 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -59,9 +59,9 @@ constexpr auto LOW_PRIORITY_INTERVAL = 10ms;
5959
/// Buffer value to reset to
6060
constexpr uint32_t BUFFER_DEFAULT_VALUE = 0xCcccCccc;
6161
/// Fields: Time(hour:minute:second), Pages pushed, Pages read, Errors, °C
62-
const std::string PROGRESS_FORMAT_HEADER(" %-8s %-12s %-12s %-12s %-12s %-5.1f");
62+
const std::string PROGRESS_FORMAT_HEADER(" %-8s %-12s %-12s %-12s %-5.1f");
6363
/// Fields: Time(hour:minute:second), Pages pushed, Pages read, Errors, °C
64-
const std::string PROGRESS_FORMAT(" %02s:%02s:%02s %-12s %-12s %-12s %-12s %-5.1f");
64+
const std::string PROGRESS_FORMAT(" %02s:%02s:%02s %-12s %-12s %-12s %-5.1f");
6565
/// Path for error log
6666
auto READOUT_ERRORS_PATH = "readout_errors.txt";
6767
/// Max amount of errors that are recorded into the error stream
@@ -537,14 +537,14 @@ class ProgramDmaBench: public Program
537537
return value;
538538
}
539539

540-
uint32_t getDataGeneratorCounterFromPage(uintptr_t pageAddress)
540+
uint32_t getDataGeneratorCounterFromPage(uintptr_t pageAddress, size_t headerSize)
541541
{
542542
switch (mCardType) {
543543
case CardType::Crorc:
544544
return get32bitFromPage(pageAddress, 0);
545545
case CardType::Cru: {
546546
// Grab the first payload word as the counter's beginning
547-
auto payload = reinterpret_cast<const volatile uint32_t *>(pageAddress + Cru::DataFormat::getHeaderSize());
547+
auto payload = reinterpret_cast<const volatile uint32_t *>(pageAddress + headerSize);
548548
return payload[0];
549549
}
550550
default: throw std::runtime_error("Error checking unsupported for this card type");
@@ -577,7 +577,7 @@ class ProgramDmaBench: public Program
577577
hasError = checkErrorsCrorc(pageAddress, pageSize, readoutCount, linkId);
578578
break;
579579
case CardType::Cru:
580-
hasError = checkErrorsCru(pageAddress, pageSize, readoutCount, linkId);
580+
hasError = checkErrorsCru(pageAddress, pageSize, readoutCount, linkId, mOptions.loopbackModeString);
581581
break;
582582
default:
583583
throw std::runtime_error("Error checking unsupported for this card type");
@@ -595,15 +595,75 @@ class ProgramDmaBench: public Program
595595
}
596596
}
597597

598-
bool checkErrorsCru(uintptr_t pageAddress, size_t pageSize, int64_t eventNumber, int linkId)
598+
bool checkErrorsCru(uintptr_t pageAddress, size_t pageSize, int64_t eventNumber, int linkId, std::string loopbackMode)
599+
{
600+
if (loopbackMode == "NONE")
601+
return checkErrorsCruDdg(pageAddress, pageSize, eventNumber, linkId);
602+
else if (loopbackMode == "INTERNAL")
603+
return checkErrorsCruInternal(pageAddress, pageSize, eventNumber, linkId);
604+
else
605+
throw std::runtime_error("Loopback Mode not supported");
606+
}
607+
608+
bool checkErrorsCruInternal(uintptr_t pageAddress, size_t pageSize, int64_t eventNumber, int linkId)
609+
{
610+
// pcie internal pattern
611+
// Every 256-bit word is built as follows:
612+
// 32 bits counter + 32 bits counter + 32-bit counter + 32 bit counter
613+
// 32 bits counter + 32 bits counter + 32-bit counter + 32 bit counter
614+
615+
// Get counter value only if page is valid...
616+
if (mLinkCounters[linkId] == LINK_COUNTER_INITIAL_VALUE) {
617+
auto counter = getDataGeneratorCounterFromPage(pageAddress, 0x0); // no header!
618+
mErrorStream << b::format("resync counter for e:%d l:%d cnt:%x\n") % eventNumber % linkId % counter;
619+
mLinkCounters[linkId] = counter;
620+
}
621+
622+
const uint32_t counter = mLinkCounters[linkId];
623+
624+
bool foundError = false;
625+
auto checkValue = [&](uint32_t i, uint32_t expectedValue, uint32_t actualValue) {
626+
if (expectedValue != actualValue) {
627+
foundError = true;
628+
addError(eventNumber, linkId, i, counter, expectedValue, actualValue, pageSize);
629+
}
630+
};
631+
632+
const auto payload = reinterpret_cast<const volatile uint32_t*> (pageAddress);
633+
634+
// Check iterating through 256-bit words
635+
uint32_t offset = (counter % (0x100000000)); // mod 0xffffffff + 1
636+
uint32_t i = 0;
637+
638+
while ((i*4) < pageSize) { //this is indexing, it has nothing to do with iterating step
639+
uint32_t word32 = i; // 32-bit word pointer
640+
641+
checkValue(word32 + 0, offset, payload[word32 + 0]); //32-bit counter
642+
checkValue(word32 + 1, offset, payload[word32 + 1]); //32-bit counter
643+
checkValue(word32 + 2, offset, payload[word32 + 2]); //32-bit counter
644+
checkValue(word32 + 3, offset, payload[word32 + 3]); //32-bit counter
645+
checkValue(word32 + 4, offset, payload[word32 + 4]); //32-bit counter
646+
checkValue(word32 + 5, offset, payload[word32 + 5]); //32-bit counter
647+
checkValue(word32 + 6, offset, payload[word32 + 6]); //32-bit counter
648+
checkValue(word32 + 7, offset, payload[word32 + 7]); //32-bit counter
649+
650+
offset = (offset+1) % (0x100000000); //Increase counter by 1 - mod 0xffffffff + 1
651+
i += 8; // 8 = expected 2*sizeof(uint32_t);
652+
}
653+
654+
mLinkCounters[linkId] = offset;
655+
return foundError;
656+
}
657+
658+
bool checkErrorsCruDdg(uintptr_t pageAddress, size_t pageSize, int64_t eventNumber, int linkId)
599659
{
600660
// Get memsize from the header
601661
const auto memBytes = Cru::DataFormat::getEventSize(reinterpret_cast<const char*>(pageAddress)); // Memory size [RDH, Payload]
602662

603663
if (memBytes < 40 || memBytes > pageSize) {
604664
// Report RDH error
605-
mRdhErrorCount++;
606-
if (mRdhErrorCount < MAX_RECORDED_ERRORS) {
665+
mErrorCount++;
666+
if (mErrorCount < MAX_RECORDED_ERRORS) {
607667
mErrorStream << b::format("[RDHERR]\tevent:%1% l:%2% payloadBytes:%3% size:%4% words out of range\n") % eventNumber
608668
% linkId % memBytes % pageSize;
609669
}
@@ -612,7 +672,7 @@ class ProgramDmaBench: public Program
612672

613673
// Get counter value only if page is valid...
614674
if (mLinkCounters[linkId] == LINK_COUNTER_INITIAL_VALUE) {
615-
auto counter = getDataGeneratorCounterFromPage(pageAddress);
675+
auto counter = getDataGeneratorCounterFromPage(pageAddress, Cru::DataFormat::getHeaderSize());
616676
mErrorStream << b::format("resync counter for e:%d l:%d cnt:%x\n") % eventNumber % linkId % counter;
617677
mLinkCounters[linkId] = counter;
618678
}
@@ -622,10 +682,6 @@ class ProgramDmaBench: public Program
622682
const auto payload = reinterpret_cast<const volatile uint32_t*>(pageAddress + Cru::DataFormat::getHeaderSize());
623683
const auto payloadBytes = memBytes - Cru::DataFormat::getHeaderSize();
624684

625-
// Every 256-bit word is built as follows:
626-
// 32 bits counter + 32 bits counter + 16 lsb counter + 32 bit 0
627-
// 32 bits (counter + 1) + 32 bits (counter + 1) + 16 lsb (counter + 1) + 32 bit 0
628-
629685
bool foundError = false;
630686
auto checkValue = [&](uint32_t i, uint32_t expectedValue, uint32_t actualValue) {
631687
if (expectedValue != actualValue) {
@@ -637,6 +693,12 @@ class ProgramDmaBench: public Program
637693
// Check iterating through 256-bit words
638694
uint32_t offset = (counter % (0x100000000)); // mod 0xffffffff + 1
639695
uint32_t i = 0;
696+
697+
// ddg pattern
698+
// Every 256-bit word is built as follows:
699+
// 32 bits counter + 32 bits counter + 16 lsb counter + 32 bit 0
700+
// 32 bits (counter + 1) + 32 bits (counter + 1) + 16 lsb (counter + 1) + 32 bit 0
701+
640702
while ((i*4) < payloadBytes) {
641703
uint32_t word32 = i; // 32-bit word pointer
642704

@@ -648,7 +710,6 @@ class ProgramDmaBench: public Program
648710
offset = (offset+1) % (0x100000000); //Increase counter by 1 - mod 0xffffffff + 1
649711
i += 4; // 4 = expected sizeof(uint32_t);
650712
}
651-
652713
mLinkCounters[linkId] = offset;
653714
return foundError;
654715
}
@@ -731,7 +792,6 @@ class ProgramDmaBench: public Program
731792
format % mReadoutCount.load(std::memory_order_relaxed);
732793

733794
mOptions.noErrorCheck ? format % "n/a" : format % mErrorCount; // Errors
734-
mOptions.noErrorCheck ? format % "n/a" : format % mRdhErrorCount; // Errors
735795

736796
if (mOptions.noTemperature) {
737797
format % "n/a";
@@ -761,8 +821,8 @@ class ProgramDmaBench: public Program
761821

762822
void printStatusHeader()
763823
{
764-
auto line1 = b::format(PROGRESS_FORMAT_HEADER) % "Time" % "Pushed" % "Read" % "Errors" % "RDH Errors" % "°C";
765-
auto line2 = b::format(PROGRESS_FORMAT) % "00" % "00" % "00" % '-' % '-' % '-' % '-' % '-';
824+
auto line1 = b::format(PROGRESS_FORMAT_HEADER) % "Time" % "Pushed" % "Read" % "Errors" % "°C";
825+
auto line2 = b::format(PROGRESS_FORMAT) % "00" % "00" % "00" % '-' % '-' % '-' % '-';
766826
cout << '\n' << line1;
767827
cout << '\n' << line2;
768828
}
@@ -792,7 +852,6 @@ class ProgramDmaBench: public Program
792852
put("Errors", "n/a");
793853
} else {
794854
put("Errors", mErrorCount);
795-
put("RDH Errors", mRdhErrorCount);
796855
}
797856
}
798857

@@ -954,9 +1013,6 @@ class ProgramDmaBench: public Program
9541013
/// Total amount of errors encountered
9551014
int64_t mErrorCount = 0;
9561015

957-
/// Total amount of RDH errors encountered
958-
int64_t mRdhErrorCount = 0;
959-
9601016
/// Keep on pushing until we're explicitly stopped
9611017
bool mInfinitePages = false;
9621018

0 commit comments

Comments
 (0)