Skip to content

Commit c3f4945

Browse files
committed
[bench-dma] Use superpage as the reference for transfer speed and size
1 parent 13dede8 commit c3f4945

File tree

1 file changed

+91
-100
lines changed

1 file changed

+91
-100
lines changed

src/CommandLineUtilities/ProgramDmaBench.cxx

Lines changed: 91 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -256,19 +256,18 @@ class ProgramDmaBench : public Program
256256
getLogger() << "IOMMU " << (AliceO2::Common::Iommu::isEnabled() ? "enabled" : "not enabled") << endm;
257257

258258
// Create channel buffer
259-
{
260-
if (mBufferSize < mSuperpageSize) {
261-
BOOST_THROW_EXCEPTION(ParameterException() << ErrorInfo::Message("Buffer size smaller than superpage size"));
262-
}
263259

264-
std::string bufferName = (b::format("roc-bench-dma_id=%s_chan=%s_pages") % map["id"].as<std::string>() % mOptions.dmaChannel).str();
260+
if (mBufferSize < mSuperpageSize) {
261+
BOOST_THROW_EXCEPTION(ParameterException() << ErrorInfo::Message("Buffer size smaller than superpage size"));
262+
}
265263

266-
Utilities::HugepageType hugepageType;
267-
mMemoryMappedFile = Utilities::tryMapFile(mBufferSize, bufferName, !mOptions.noRemovePagesFile, &hugepageType);
264+
std::string bufferName = (b::format("roc-bench-dma_id=%s_chan=%s_pages") % map["id"].as<std::string>() % mOptions.dmaChannel).str();
268265

269-
mBufferBaseAddress = reinterpret_cast<uintptr_t>(mMemoryMappedFile->getAddress());
270-
getLogger() << "Using buffer file path: " << mMemoryMappedFile->getFileName() << endm;
271-
}
266+
Utilities::HugepageType hugepageType;
267+
mMemoryMappedFile = Utilities::tryMapFile(mBufferSize, bufferName, !mOptions.noRemovePagesFile, &hugepageType);
268+
269+
mBufferBaseAddress = reinterpret_cast<uintptr_t>(mMemoryMappedFile->getAddress());
270+
getLogger() << "Using buffer file path: " << mMemoryMappedFile->getFileName() << endm;
272271

273272
// Set up channel parameters
274273
mPageSize = params.getDmaPageSize().get();
@@ -279,20 +278,18 @@ class ProgramDmaBench : public Program
279278
params.setLinkMask(Parameters::linkMaskFromString(mOptions.links));
280279

281280
mInfinitePages = (mOptions.maxBytes <= 0);
282-
mMaxPages = mOptions.maxBytes / mPageSize;
281+
mSuperpageLimit = mOptions.maxBytes / mSuperpageSize;
283282

284283
if (!Utilities::isMultiple(mSuperpageSize, mPageSize)) {
285284
throw ParameterException() << ErrorInfo::Message("Superpage size not a multiple of page size");
286285
}
287286

288-
mMaxSuperpages = mBufferSize / mSuperpageSize;
289-
mPagesPerSuperpage = mSuperpageSize / mPageSize;
287+
mSuperpagesInBuffer = mBufferSize / mSuperpageSize;
290288
getLogger() << "Buffer size: " << mBufferSize << endm;
291289
getLogger() << "Superpage size: " << mSuperpageSize << endm;
292-
getLogger() << "Superpages in buffer: " << mMaxSuperpages << endm;
293-
getLogger() << "Page size: " << mPageSize << endm;
294-
getLogger() << "Page limit: " << mMaxPages << endm;
295-
getLogger() << "Pages per superpage: " << mPagesPerSuperpage << endm;
290+
getLogger() << "Superpages in buffer: " << mSuperpagesInBuffer << endm;
291+
getLogger() << "Superpage limit: " << mSuperpageLimit << endm;
292+
getLogger() << "DMA page size: " << mPageSize << endm;
296293
if (mOptions.bufferFullCheck) {
297294
getLogger() << "Buffer-Full Check enabled" << endm;
298295
mBufferFullCheck = true;
@@ -358,8 +355,8 @@ class ProgramDmaBench : public Program
358355

359356
std::cout << "\n\n";
360357
mChannel->stopDma();
361-
int popped = freeExcessPages(10ms);
362-
getLogger() << "Popped " << popped << " remaining superpages" << endm;
358+
int numPopped = freeExcessPages(10ms);
359+
getLogger() << "Popped " << numPopped << " remaining superpages" << endm;
363360

364361
outputErrors();
365362
outputStats();
@@ -369,18 +366,18 @@ class ProgramDmaBench : public Program
369366
private:
370367
void dmaLoop()
371368
{
372-
if (mMaxSuperpages < 1) {
369+
if (mSuperpagesInBuffer < 1) {
373370
throw std::runtime_error("Buffer too small");
374371
}
375372

376373
// Lock-free queues. Usable size is (size-1), so we add 1
377374
/// Queue for passing filled superpages from the push thread to the readout thread
378-
folly::ProducerConsumerQueue<SuperpageInfo> readoutQueue{ static_cast<uint32_t>(mMaxSuperpages) + 1 };
375+
folly::ProducerConsumerQueue<SuperpageInfo> readoutQueue{ static_cast<uint32_t>(mSuperpagesInBuffer) + 1 };
379376
/// Queue for free superpages. This starts out as full, then the readout thread consumes them. When superpages
380377
/// arrive, they are passed via the readoutQueue to the readout thread. When the readout thread is done with it,
381378
/// it is put back in the freeQueue.
382-
folly::ProducerConsumerQueue<size_t> freeQueue{ static_cast<uint32_t>(mMaxSuperpages) + 1 };
383-
for (size_t i = 0; i < mMaxSuperpages; ++i) {
379+
folly::ProducerConsumerQueue<size_t> freeQueue{ static_cast<uint32_t>(mSuperpagesInBuffer) + 1 };
380+
for (size_t i = 0; i < mSuperpagesInBuffer; ++i) {
384381
size_t offset = i * mSuperpageSize;
385382
if (!freeQueue.write(offset)) {
386383
BOOST_THROW_EXCEPTION(Exception() << ErrorInfo::Message("Something went horribly wrong"));
@@ -404,14 +401,13 @@ class ProgramDmaBench : public Program
404401
}
405402

406403
// If there's a time limit, check it
407-
if (auto limit = mTimeLimitOptional) {
408-
if (std::chrono::steady_clock::now() >= limit) {
409-
mDmaLoopBreak = true;
410-
return;
411-
}
404+
auto limit = mTimeLimitOptional;
405+
if (limit && std::chrono::steady_clock::now() >= limit) {
406+
mDmaLoopBreak = true;
407+
return;
412408
}
413409

414-
if (mPushCount.load(std::memory_order_relaxed) != 0) {
410+
if (mSuperpagesPushed.load(std::memory_order_relaxed) != 0) {
415411

416412
// Start our run timer when DMA starts
417413
if (!mRunTimeStarted) {
@@ -439,11 +435,10 @@ class ProgramDmaBench : public Program
439435
auto pushFuture = std::async(std::launch::async, [&] {
440436
try {
441437
RandomPauses pauses;
442-
int currentPagesCounted = 0;
443438

444439
while (!isStopDma()) {
445-
// Check if we need to stop in the case of a page limit
446-
if (!mInfinitePages && mPushCount.load(std::memory_order_relaxed) >= mMaxPages && (currentPagesCounted == 0)) {
440+
// Check if we need to stop in the case of a superpage limit
441+
if (!mInfinitePages && mSuperpagesPushed.load(std::memory_order_relaxed) >= mSuperpageLimit) {
447442
break;
448443
}
449444
if (mOptions.randomPause) {
@@ -453,50 +448,38 @@ class ProgramDmaBench : public Program
453448
// Keep the driver's queue filled
454449
mChannel->fillSuperpages();
455450

456-
auto shouldRest = false;
457-
458-
// Give free superpages to the driver
459-
if (mChannel->getTransferQueueAvailable() != 0) {
460-
while (mChannel->getTransferQueueAvailable() != 0) {
461-
Superpage superpage;
462-
size_t offsetRead;
463-
if (freeQueue.read(offsetRead)) {
464-
superpage.setSize(mSuperpageSize);
465-
superpage.setOffset(offsetRead);
466-
mChannel->pushSuperpage(superpage);
467-
} else {
468-
// No free pages available, so take a little break
469-
shouldRest = true;
470-
break;
471-
}
451+
bool shouldRest = true;
452+
453+
while (mChannel->getTransferQueueAvailable() != 0) {
454+
Superpage superpage;
455+
size_t offsetRead;
456+
457+
if (freeQueue.read(offsetRead)) {
458+
superpage.setSize(mSuperpageSize);
459+
superpage.setOffset(offsetRead);
460+
mChannel->pushSuperpage(superpage);
461+
} else {
462+
// freeQueue is backed up and we should rest
463+
shouldRest = true;
464+
break;
472465
}
473-
} else {
474-
// No transfer queue slots available on the card
475-
shouldRest = true;
476466
}
477467

478468
// Check for filled superpages
479469
while (mChannel->getReadyQueueSize() != 0) {
480470
auto superpage = mChannel->getSuperpage();
481-
// We do partial updates of the mPushCount because we can have very large superpages, which would otherwise
482-
// cause hiccups in the display
483-
int pages = superpage.getReceived() / mPageSize;
484-
int pagesToCount = pages - currentPagesCounted;
485-
mPushCount.fetch_add(pagesToCount, std::memory_order_relaxed);
471+
fetchAddSuperpagesPushed();
486472

487-
if (mBufferFullCheck && (mPushCount.load(std::memory_order_relaxed) == mMaxSuperpages * mPagesPerSuperpage)) {
473+
if (mBufferFullCheck && (mSuperpagesPushed.load(std::memory_order_relaxed) == mSuperpageLimit)) {
488474
mBufferFullTimeFinish = std::chrono::high_resolution_clock::now();
489475
mDmaLoopBreak = true;
490476
}
491477

492-
currentPagesCounted += pagesToCount;
493-
478+
// Move full superpage to readout queue
494479
if (superpage.isReady() && readoutQueue.write(SuperpageInfo{ superpage.getOffset(), superpage.getReceived() })) {
495-
// Move full superpage to readout queue
496-
currentPagesCounted = 0;
497480
mChannel->popSuperpage();
498481
} else {
499-
// Readout is backed up, so rest a while
482+
// readyQueue(=readout) is backed up, so rest a while
500483
shouldRest = true;
501484
break;
502485
}
@@ -517,10 +500,11 @@ class ProgramDmaBench : public Program
517500
RandomPauses pauses;
518501

519502
while (!isStopDma()) {
520-
if (!mInfinitePages && mReadoutCount.load(std::memory_order_relaxed) >= mMaxPages) {
503+
if (!mInfinitePages && mSuperpagesReadOut.load(std::memory_order_relaxed) >= mSuperpageLimit) {
521504
mDmaLoopBreak = true;
522505
break;
523506
}
507+
524508
if (mOptions.randomPause) {
525509
pauses.pauseIfNeeded();
526510
}
@@ -532,11 +516,11 @@ class ProgramDmaBench : public Program
532516
size_t readoutBytes = 0;
533517
auto superpageAddress = mBufferBaseAddress + superpageInfo.bufferOffset;
534518

535-
//std::cout << superpageInfo.effectiveSize << std::endl;
519+
fetchAddSuperpagesReadOut();
536520

537521
while ((readoutBytes < superpageInfo.effectiveSize) && !isStopDma()) {
538522
auto pageAddress = superpageAddress + readoutBytes;
539-
auto readoutCount = fetchAddReadoutCount();
523+
auto readoutCount = fetchAddDmaPagesReadOut();
540524
size_t pageSize = readoutPage(pageAddress, readoutCount);
541525

542526
if (mOptions.byteCountEnabled && !(mOptions.loopbackModeString == "INTERNAL")) {
@@ -570,13 +554,6 @@ class ProgramDmaBench : public Program
570554
lowPriorityFuture.get();
571555
}
572556

573-
/// Atomically fetch and increment the readout count. We do this because it is accessed by multiple threads.
574-
/// Although there is currently only one writer at a time and a regular increment probably would be OK.
575-
uint64_t fetchAddReadoutCount()
576-
{
577-
return mReadoutCount.fetch_add(1, std::memory_order_relaxed);
578-
}
579-
580557
/// Free the pages that remain after stopping DMA (these may not be filled)
581558
int freeExcessPages(std::chrono::milliseconds timeout)
582559
{
@@ -586,12 +563,13 @@ class ProgramDmaBench : public Program
586563
auto size = mChannel->getReadyQueueSize();
587564
for (int i = 0; i < size; ++i) {
588565
auto superpage = mChannel->popSuperpage();
566+
fetchAddSuperpagesReadOut();
589567
if ((mLoopback == LoopbackMode::None) || (mLoopback == LoopbackMode::Ddg)) {
590568
auto superpageAddress = mBufferBaseAddress + superpage.getOffset();
591569
size_t readoutBytes = 0;
592570
while ((readoutBytes < superpage.getReceived()) && !isSigInt()) { // At least one more dma page fits in the superpage
593571
auto pageAddress = superpageAddress + readoutBytes;
594-
auto readoutCount = fetchAddReadoutCount();
572+
auto readoutCount = fetchAddDmaPagesReadOut();
595573
size_t pageSize = readoutPage(pageAddress, readoutCount);
596574
readoutBytes += pageSize;
597575
}
@@ -866,15 +844,6 @@ class ProgramDmaBench : public Program
866844
return foundError;
867845
}
868846

869-
void resetPage(uintptr_t pageAddress, size_t pageSize) //TODO: Is this still relevant?
870-
{
871-
auto page = reinterpret_cast<volatile uint32_t*>(pageAddress);
872-
auto pageSize32 = pageSize / sizeof(uint32_t);
873-
for (size_t i = 0; i < pageSize32; i++) {
874-
page[i] = BUFFER_DEFAULT_VALUE;
875-
}
876-
}
877-
878847
void updateStatusDisplay()
879848
{
880849
if (!mHeaderPrinted) {
@@ -890,11 +859,11 @@ class ProgramDmaBench : public Program
890859

891860
auto format = b::format(PROGRESS_FORMAT);
892861
format % hour % minute % second; // Time
893-
format % (mPushCount.load(std::memory_order_relaxed) / mPagesPerSuperpage);
894-
format % (mReadoutCount.load(std::memory_order_relaxed) / mPagesPerSuperpage);
862+
format % mSuperpagesPushed.load(std::memory_order_relaxed);
863+
format % mSuperpagesReadOut.load(std::memory_order_relaxed);
895864

896865
double runTime = std::chrono::duration<double>(steady_clock::now() - mRunTime.start).count();
897-
double bytes = mOptions.byteCountEnabled ? double(mByteCount.load(std::memory_order_relaxed)) : double(mReadoutCount.load(std::memory_order_relaxed)) * mPageSize;
866+
double bytes = mOptions.byteCountEnabled ? double(mByteCount.load(std::memory_order_relaxed)) : double(mSuperpagesReadOut.load(std::memory_order_relaxed)) * mSuperpageSize;
898867
double Gb = bytes * 8 / (1000 * 1000 * 1000);
899868
double Gbps = Gb / runTime;
900869
format % Gbps;
@@ -941,7 +910,7 @@ class ProgramDmaBench : public Program
941910
{
942911
// Calculating throughput
943912
double runTime = std::chrono::duration<double>(mRunTime.end - mRunTime.start).count();
944-
double bytes = mOptions.byteCountEnabled ? double(mByteCount.load()) : double(mReadoutCount.load()) * mPageSize;
913+
double bytes = mOptions.byteCountEnabled ? double(mByteCount.load()) : double(mSuperpagesReadOut.load() * mSuperpageSize);
945914
double GB = bytes / (1000 * 1000 * 1000);
946915
double GBs = GB / runTime;
947916
double GiB = bytes / (1024 * 1024 * 1024);
@@ -951,10 +920,10 @@ class ProgramDmaBench : public Program
951920
auto put = [&](auto label, auto value) { cout << b::format(" %-24s %-10s\n") % label % value; };
952921
cout << '\n';
953922
put("Seconds", runTime);
954-
put("Superpages", mReadoutCount.load() / mPagesPerSuperpage);
955-
put("Superpage Latency(s)", runTime / (mReadoutCount.load() / mPagesPerSuperpage));
956-
put("DMA Pages", mReadoutCount.load());
957-
put("DMA Page Latency(s)", runTime / mReadoutCount.load());
923+
put("Superpages", mSuperpagesReadOut.load());
924+
put("Superpage Latency(s)", runTime / mSuperpagesReadOut.load());
925+
put("DMA Pages", mDmaPagesReadOut.load());
926+
put("DMA Page Latency(s)", runTime / mDmaPagesReadOut.load());
958927
if (bytes > 0.00001) {
959928
put("Bytes", bytes);
960929
put("GB", GB);
@@ -1056,6 +1025,24 @@ class ProgramDmaBench : public Program
10561025
return limit;
10571026
}
10581027

1028+
/// Atomically fetch and increment the Superpage and DMA page read out and pushed counts.
1029+
/// We do this because they are accessed by multiple threads.
1030+
/// Although there is currently only one writer at a time and a regular increment probably would be OK.
1031+
uint64_t fetchAddDmaPagesReadOut()
1032+
{
1033+
return mDmaPagesReadOut.fetch_add(1, std::memory_order_relaxed);
1034+
}
1035+
1036+
uint64_t fetchAddSuperpagesReadOut()
1037+
{
1038+
return mSuperpagesReadOut.fetch_add(1, std::memory_order_relaxed);
1039+
}
1040+
1041+
uint64_t fetchAddSuperpagesPushed()
1042+
{
1043+
return mSuperpagesPushed.fetch_add(1, std::memory_order_relaxed);
1044+
}
1045+
10591046
struct RandomPauses {
10601047
static constexpr int NEXT_PAUSE_MIN = 10; ///< Minimum random pause interval in milliseconds
10611048
static constexpr int NEXT_PAUSE_MAX = 2000; ///< Maximum random pause interval in milliseconds
@@ -1123,12 +1110,19 @@ class ProgramDmaBench : public Program
11231110
// Event counter per link
11241111
std::array<std::atomic<uint32_t>, MAX_LINKS> mEventCounters;
11251112

1126-
// Keep these as DMA page counters for better granularity
1113+
// Superpage counters
1114+
/// Amount of Superpages pushed
1115+
std::atomic<uint64_t> mSuperpagesPushed{ 0 };
1116+
1117+
// Amount of Superpages read out
1118+
std::atomic<uint64_t> mSuperpagesReadOut{ 0 };
1119+
1120+
// DMA page counters for better granularity
11271121
/// Amount of DMA pages pushed
1128-
std::atomic<uint64_t> mPushCount{ 0 };
1122+
//std::atomic<uint64_t> mDmaPagesPushed{ 0 };
11291123

11301124
// Amount of DMA pages read out
1131-
std::atomic<uint64_t> mReadoutCount{ 0 };
1125+
std::atomic<uint64_t> mDmaPagesReadOut{ 0 };
11321126

11331127
// Amount of bytes read out (as reported in the RDH)
11341128
std::atomic<uint64_t> mByteCount{ 0 };
@@ -1142,18 +1136,15 @@ class ProgramDmaBench : public Program
11421136
/// Size of superpages
11431137
size_t mSuperpageSize = 0;
11441138

1145-
/// Maximum amount of superpages
1146-
size_t mMaxSuperpages = 0;
1139+
/// Maximum amount of superpages to exchange
1140+
size_t mSuperpageLimit = 0;
11471141

1148-
/// Amount of DMA pages per superpage
1149-
size_t mPagesPerSuperpage = 0;
1142+
/// Maximum amount of superpages in buffer
1143+
size_t mSuperpagesInBuffer = 0;
11501144

11511145
/// Maximum size of pages
11521146
size_t mPageSize;
11531147

1154-
/// Maximum amount of pages to transfer
1155-
size_t mMaxPages;
1156-
11571148
/// The size of the channel DMA buffer
11581149
size_t mBufferSize;
11591150

0 commit comments

Comments
 (0)