@@ -256,19 +256,18 @@ class ProgramDmaBench : public Program
256256 getLogger () << " IOMMU " << (AliceO2::Common::Iommu::isEnabled () ? " enabled" : " not enabled" ) << endm;
257257
258258 // Create channel buffer
259- {
260- if (mBufferSize < mSuperpageSize ) {
261- BOOST_THROW_EXCEPTION (ParameterException () << ErrorInfo::Message (" Buffer size smaller than superpage size" ));
262- }
263259
264- std::string bufferName = (b::format (" roc-bench-dma_id=%s_chan=%s_pages" ) % map[" id" ].as <std::string>() % mOptions .dmaChannel ).str ();
260+ if (mBufferSize < mSuperpageSize ) {
261+ BOOST_THROW_EXCEPTION (ParameterException () << ErrorInfo::Message (" Buffer size smaller than superpage size" ));
262+ }
265263
266- Utilities::HugepageType hugepageType;
267- mMemoryMappedFile = Utilities::tryMapFile (mBufferSize , bufferName, !mOptions .noRemovePagesFile , &hugepageType);
264+ std::string bufferName = (b::format (" roc-bench-dma_id=%s_chan=%s_pages" ) % map[" id" ].as <std::string>() % mOptions .dmaChannel ).str ();
268265
269- mBufferBaseAddress = reinterpret_cast <uintptr_t >(mMemoryMappedFile ->getAddress ());
270- getLogger () << " Using buffer file path: " << mMemoryMappedFile ->getFileName () << endm;
271- }
266+ Utilities::HugepageType hugepageType;
267+ mMemoryMappedFile = Utilities::tryMapFile (mBufferSize , bufferName, !mOptions .noRemovePagesFile , &hugepageType);
268+
269+ mBufferBaseAddress = reinterpret_cast <uintptr_t >(mMemoryMappedFile ->getAddress ());
270+ getLogger () << " Using buffer file path: " << mMemoryMappedFile ->getFileName () << endm;
272271
273272 // Set up channel parameters
274273 mPageSize = params.getDmaPageSize ().get ();
@@ -279,20 +278,18 @@ class ProgramDmaBench : public Program
279278 params.setLinkMask (Parameters::linkMaskFromString (mOptions .links ));
280279
281280 mInfinitePages = (mOptions .maxBytes <= 0 );
282- mMaxPages = mOptions .maxBytes / mPageSize ;
281+ mSuperpageLimit = mOptions .maxBytes / mSuperpageSize ;
283282
284283 if (!Utilities::isMultiple (mSuperpageSize , mPageSize )) {
285284 throw ParameterException () << ErrorInfo::Message (" Superpage size not a multiple of page size" );
286285 }
287286
288- mMaxSuperpages = mBufferSize / mSuperpageSize ;
289- mPagesPerSuperpage = mSuperpageSize / mPageSize ;
287+ mSuperpagesInBuffer = mBufferSize / mSuperpageSize ;
290288 getLogger () << " Buffer size: " << mBufferSize << endm;
291289 getLogger () << " Superpage size: " << mSuperpageSize << endm;
292- getLogger () << " Superpages in buffer: " << mMaxSuperpages << endm;
293- getLogger () << " Page size: " << mPageSize << endm;
294- getLogger () << " Page limit: " << mMaxPages << endm;
295- getLogger () << " Pages per superpage: " << mPagesPerSuperpage << endm;
290+ getLogger () << " Superpages in buffer: " << mSuperpagesInBuffer << endm;
291+ getLogger () << " Superpage limit: " << mSuperpageLimit << endm;
292+ getLogger () << " DMA page size: " << mPageSize << endm;
296293 if (mOptions .bufferFullCheck ) {
297294 getLogger () << " Buffer-Full Check enabled" << endm;
298295 mBufferFullCheck = true ;
@@ -358,8 +355,8 @@ class ProgramDmaBench : public Program
358355
359356 std::cout << " \n\n " ;
360357 mChannel ->stopDma ();
361- int popped = freeExcessPages (10ms);
362- getLogger () << " Popped " << popped << " remaining superpages" << endm;
358+ int numPopped = freeExcessPages (10ms);
359+ getLogger () << " Popped " << numPopped << " remaining superpages" << endm;
363360
364361 outputErrors ();
365362 outputStats ();
@@ -369,18 +366,18 @@ class ProgramDmaBench : public Program
369366 private:
370367 void dmaLoop ()
371368 {
372- if (mMaxSuperpages < 1 ) {
369+ if (mSuperpagesInBuffer < 1 ) {
373370 throw std::runtime_error (" Buffer too small" );
374371 }
375372
376373 // Lock-free queues. Usable size is (size-1), so we add 1
377374 // / Queue for passing filled superpages from the push thread to the readout thread
378- folly::ProducerConsumerQueue<SuperpageInfo> readoutQueue{ static_cast <uint32_t >(mMaxSuperpages ) + 1 };
375+ folly::ProducerConsumerQueue<SuperpageInfo> readoutQueue{ static_cast <uint32_t >(mSuperpagesInBuffer ) + 1 };
379376 // / Queue for free superpages. This starts out as full, then the readout thread consumes them. When superpages
380377 // / arrive, they are passed via the readoutQueue to the readout thread. When the readout thread is done with it,
381378 // / it is put back in the freeQueue.
382- folly::ProducerConsumerQueue<size_t > freeQueue{ static_cast <uint32_t >(mMaxSuperpages ) + 1 };
383- for (size_t i = 0 ; i < mMaxSuperpages ; ++i) {
379+ folly::ProducerConsumerQueue<size_t > freeQueue{ static_cast <uint32_t >(mSuperpagesInBuffer ) + 1 };
380+ for (size_t i = 0 ; i < mSuperpagesInBuffer ; ++i) {
384381 size_t offset = i * mSuperpageSize ;
385382 if (!freeQueue.write (offset)) {
386383 BOOST_THROW_EXCEPTION (Exception () << ErrorInfo::Message (" Something went horribly wrong" ));
@@ -404,14 +401,13 @@ class ProgramDmaBench : public Program
404401 }
405402
406403 // If there's a time limit, check it
407- if (auto limit = mTimeLimitOptional ) {
408- if (std::chrono::steady_clock::now () >= limit) {
409- mDmaLoopBreak = true ;
410- return ;
411- }
404+ auto limit = mTimeLimitOptional ;
405+ if (limit && std::chrono::steady_clock::now () >= limit) {
406+ mDmaLoopBreak = true ;
407+ return ;
412408 }
413409
414- if (mPushCount .load (std::memory_order_relaxed) != 0 ) {
410+ if (mSuperpagesPushed .load (std::memory_order_relaxed) != 0 ) {
415411
416412 // Start our run timer when DMA starts
417413 if (!mRunTimeStarted ) {
@@ -439,11 +435,10 @@ class ProgramDmaBench : public Program
439435 auto pushFuture = std::async (std::launch::async, [&] {
440436 try {
441437 RandomPauses pauses;
442- int currentPagesCounted = 0 ;
443438
444439 while (!isStopDma ()) {
445- // Check if we need to stop in the case of a page limit
446- if (!mInfinitePages && mPushCount .load (std::memory_order_relaxed) >= mMaxPages && (currentPagesCounted == 0 ) ) {
440+ // Check if we need to stop in the case of a superpage limit
441+ if (!mInfinitePages && mSuperpagesPushed .load (std::memory_order_relaxed) >= mSuperpageLimit ) {
447442 break ;
448443 }
449444 if (mOptions .randomPause ) {
@@ -453,50 +448,38 @@ class ProgramDmaBench : public Program
453448 // Keep the driver's queue filled
454449 mChannel ->fillSuperpages ();
455450
456- auto shouldRest = false ;
457-
458- // Give free superpages to the driver
459- if (mChannel ->getTransferQueueAvailable () != 0 ) {
460- while (mChannel ->getTransferQueueAvailable () != 0 ) {
461- Superpage superpage;
462- size_t offsetRead;
463- if (freeQueue.read (offsetRead)) {
464- superpage.setSize (mSuperpageSize );
465- superpage.setOffset (offsetRead);
466- mChannel ->pushSuperpage (superpage);
467- } else {
468- // No free pages available, so take a little break
469- shouldRest = true ;
470- break ;
471- }
451+ bool shouldRest = true ;
452+
453+ while (mChannel ->getTransferQueueAvailable () != 0 ) {
454+ Superpage superpage;
455+ size_t offsetRead;
456+
457+ if (freeQueue.read (offsetRead)) {
458+ superpage.setSize (mSuperpageSize );
459+ superpage.setOffset (offsetRead);
460+ mChannel ->pushSuperpage (superpage);
461+ } else {
462+ // freeQueue is backed up and we should rest
463+ shouldRest = true ;
464+ break ;
472465 }
473- } else {
474- // No transfer queue slots available on the card
475- shouldRest = true ;
476466 }
477467
478468 // Check for filled superpages
479469 while (mChannel ->getReadyQueueSize () != 0 ) {
480470 auto superpage = mChannel ->getSuperpage ();
481- // We do partial updates of the mPushCount because we can have very large superpages, which would otherwise
482- // cause hiccups in the display
483- int pages = superpage.getReceived () / mPageSize ;
484- int pagesToCount = pages - currentPagesCounted;
485- mPushCount .fetch_add (pagesToCount, std::memory_order_relaxed);
471+ fetchAddSuperpagesPushed ();
486472
487- if (mBufferFullCheck && (mPushCount .load (std::memory_order_relaxed) == mMaxSuperpages * mPagesPerSuperpage )) {
473+ if (mBufferFullCheck && (mSuperpagesPushed .load (std::memory_order_relaxed) == mSuperpageLimit )) {
488474 mBufferFullTimeFinish = std::chrono::high_resolution_clock::now ();
489475 mDmaLoopBreak = true ;
490476 }
491477
492- currentPagesCounted += pagesToCount;
493-
478+ // Move full superpage to readout queue
494479 if (superpage.isReady () && readoutQueue.write (SuperpageInfo{ superpage.getOffset (), superpage.getReceived () })) {
495- // Move full superpage to readout queue
496- currentPagesCounted = 0 ;
497480 mChannel ->popSuperpage ();
498481 } else {
499- // Readout is backed up, so rest a while
482+ // readyQueue(=readout) is backed up, so rest a while
500483 shouldRest = true ;
501484 break ;
502485 }
@@ -517,10 +500,11 @@ class ProgramDmaBench : public Program
517500 RandomPauses pauses;
518501
519502 while (!isStopDma ()) {
520- if (!mInfinitePages && mReadoutCount .load (std::memory_order_relaxed) >= mMaxPages ) {
503+ if (!mInfinitePages && mSuperpagesReadOut .load (std::memory_order_relaxed) >= mSuperpageLimit ) {
521504 mDmaLoopBreak = true ;
522505 break ;
523506 }
507+
524508 if (mOptions .randomPause ) {
525509 pauses.pauseIfNeeded ();
526510 }
@@ -532,11 +516,11 @@ class ProgramDmaBench : public Program
532516 size_t readoutBytes = 0 ;
533517 auto superpageAddress = mBufferBaseAddress + superpageInfo.bufferOffset ;
534518
535- // std::cout << superpageInfo.effectiveSize << std::endl ;
519+ fetchAddSuperpagesReadOut () ;
536520
537521 while ((readoutBytes < superpageInfo.effectiveSize ) && !isStopDma ()) {
538522 auto pageAddress = superpageAddress + readoutBytes;
539- auto readoutCount = fetchAddReadoutCount ();
523+ auto readoutCount = fetchAddDmaPagesReadOut ();
540524 size_t pageSize = readoutPage (pageAddress, readoutCount);
541525
542526 if (mOptions .byteCountEnabled && !(mOptions .loopbackModeString == " INTERNAL" )) {
@@ -570,13 +554,6 @@ class ProgramDmaBench : public Program
570554 lowPriorityFuture.get ();
571555 }
572556
573- // / Atomically fetch and increment the readout count. We do this because it is accessed by multiple threads.
574- // / Although there is currently only one writer at a time and a regular increment probably would be OK.
575- uint64_t fetchAddReadoutCount ()
576- {
577- return mReadoutCount .fetch_add (1 , std::memory_order_relaxed);
578- }
579-
580557 // / Free the pages that remain after stopping DMA (these may not be filled)
581558 int freeExcessPages (std::chrono::milliseconds timeout)
582559 {
@@ -586,12 +563,13 @@ class ProgramDmaBench : public Program
586563 auto size = mChannel ->getReadyQueueSize ();
587564 for (int i = 0 ; i < size; ++i) {
588565 auto superpage = mChannel ->popSuperpage ();
566+ fetchAddSuperpagesReadOut ();
589567 if ((mLoopback == LoopbackMode::None) || (mLoopback == LoopbackMode::Ddg)) {
590568 auto superpageAddress = mBufferBaseAddress + superpage.getOffset ();
591569 size_t readoutBytes = 0 ;
592570 while ((readoutBytes < superpage.getReceived ()) && !isSigInt ()) { // At least one more dma page fits in the superpage
593571 auto pageAddress = superpageAddress + readoutBytes;
594- auto readoutCount = fetchAddReadoutCount ();
572+ auto readoutCount = fetchAddDmaPagesReadOut ();
595573 size_t pageSize = readoutPage (pageAddress, readoutCount);
596574 readoutBytes += pageSize;
597575 }
@@ -866,15 +844,6 @@ class ProgramDmaBench : public Program
866844 return foundError;
867845 }
868846
869- void resetPage (uintptr_t pageAddress, size_t pageSize) // TODO: Is this still relevant?
870- {
871- auto page = reinterpret_cast <volatile uint32_t *>(pageAddress);
872- auto pageSize32 = pageSize / sizeof (uint32_t );
873- for (size_t i = 0 ; i < pageSize32; i++) {
874- page[i] = BUFFER_DEFAULT_VALUE;
875- }
876- }
877-
878847 void updateStatusDisplay ()
879848 {
880849 if (!mHeaderPrinted ) {
@@ -890,11 +859,11 @@ class ProgramDmaBench : public Program
890859
891860 auto format = b::format (PROGRESS_FORMAT);
892861 format % hour % minute % second; // Time
893- format % ( mPushCount .load (std::memory_order_relaxed) / mPagesPerSuperpage );
894- format % ( mReadoutCount .load (std::memory_order_relaxed) / mPagesPerSuperpage );
862+ format % mSuperpagesPushed .load (std::memory_order_relaxed);
863+ format % mSuperpagesReadOut .load (std::memory_order_relaxed);
895864
896865 double runTime = std::chrono::duration<double >(steady_clock::now () - mRunTime .start ).count ();
897- double bytes = mOptions .byteCountEnabled ? double (mByteCount .load (std::memory_order_relaxed)) : double (mReadoutCount .load (std::memory_order_relaxed)) * mPageSize ;
866+ double bytes = mOptions .byteCountEnabled ? double (mByteCount .load (std::memory_order_relaxed)) : double (mSuperpagesReadOut .load (std::memory_order_relaxed)) * mSuperpageSize ;
898867 double Gb = bytes * 8 / (1000 * 1000 * 1000 );
899868 double Gbps = Gb / runTime;
900869 format % Gbps;
@@ -941,7 +910,7 @@ class ProgramDmaBench : public Program
941910 {
942911 // Calculating throughput
943912 double runTime = std::chrono::duration<double >(mRunTime .end - mRunTime .start ).count ();
944- double bytes = mOptions .byteCountEnabled ? double (mByteCount .load ()) : double (mReadoutCount .load ()) * mPageSize ;
913+ double bytes = mOptions .byteCountEnabled ? double (mByteCount .load ()) : double (mSuperpagesReadOut .load () * mSuperpageSize ) ;
945914 double GB = bytes / (1000 * 1000 * 1000 );
946915 double GBs = GB / runTime;
947916 double GiB = bytes / (1024 * 1024 * 1024 );
@@ -951,10 +920,10 @@ class ProgramDmaBench : public Program
951920 auto put = [&](auto label, auto value) { cout << b::format (" %-24s %-10s\n " ) % label % value; };
952921 cout << ' \n ' ;
953922 put (" Seconds" , runTime);
954- put (" Superpages" , mReadoutCount .load () / mPagesPerSuperpage );
955- put (" Superpage Latency(s)" , runTime / ( mReadoutCount .load () / mPagesPerSuperpage ));
956- put (" DMA Pages" , mReadoutCount .load ());
957- put (" DMA Page Latency(s)" , runTime / mReadoutCount .load ());
923+ put (" Superpages" , mSuperpagesReadOut .load ());
924+ put (" Superpage Latency(s)" , runTime / mSuperpagesReadOut .load ());
925+ put (" DMA Pages" , mDmaPagesReadOut .load ());
926+ put (" DMA Page Latency(s)" , runTime / mDmaPagesReadOut .load ());
958927 if (bytes > 0.00001 ) {
959928 put (" Bytes" , bytes);
960929 put (" GB" , GB);
@@ -1056,6 +1025,24 @@ class ProgramDmaBench : public Program
10561025 return limit;
10571026 }
10581027
1028+ // / Atomically fetch and increment the Superpage and DMA page read out and pushed counts.
1029+ // / We do this because they are accessed by multiple threads.
1030+ // / Although there is currently only one writer at a time and a regular increment probably would be OK.
1031+ uint64_t fetchAddDmaPagesReadOut ()
1032+ {
1033+ return mDmaPagesReadOut .fetch_add (1 , std::memory_order_relaxed);
1034+ }
1035+
1036+ uint64_t fetchAddSuperpagesReadOut ()
1037+ {
1038+ return mSuperpagesReadOut .fetch_add (1 , std::memory_order_relaxed);
1039+ }
1040+
1041+ uint64_t fetchAddSuperpagesPushed ()
1042+ {
1043+ return mSuperpagesPushed .fetch_add (1 , std::memory_order_relaxed);
1044+ }
1045+
10591046 struct RandomPauses {
10601047 static constexpr int NEXT_PAUSE_MIN = 10 ; // /< Minimum random pause interval in milliseconds
10611048 static constexpr int NEXT_PAUSE_MAX = 2000 ; // /< Maximum random pause interval in milliseconds
@@ -1123,12 +1110,19 @@ class ProgramDmaBench : public Program
11231110 // Event counter per link
11241111 std::array<std::atomic<uint32_t >, MAX_LINKS> mEventCounters ;
11251112
1126- // Keep these as DMA page counters for better granularity
1113+ // Superpage counters
1114+ // / Amount of Superpages pushed
1115+ std::atomic<uint64_t > mSuperpagesPushed { 0 };
1116+
1117+ // Amount of Superpages read out
1118+ std::atomic<uint64_t > mSuperpagesReadOut { 0 };
1119+
1120+ // DMA page counters for better granularity
11271121 // / Amount of DMA pages pushed
1128- std::atomic<uint64_t > mPushCount { 0 };
1122+ // std::atomic<uint64_t> mDmaPagesPushed { 0 };
11291123
11301124 // Amount of DMA pages read out
1131- std::atomic<uint64_t > mReadoutCount { 0 };
1125+ std::atomic<uint64_t > mDmaPagesReadOut { 0 };
11321126
11331127 // Amount of bytes read out (as reported in the RDH)
11341128 std::atomic<uint64_t > mByteCount { 0 };
@@ -1142,18 +1136,15 @@ class ProgramDmaBench : public Program
11421136 // / Size of superpages
11431137 size_t mSuperpageSize = 0 ;
11441138
1145- // / Maximum amount of superpages
1146- size_t mMaxSuperpages = 0 ;
1139+ // / Maximum amount of superpages to exchange
1140+ size_t mSuperpageLimit = 0 ;
11471141
1148- // / Amount of DMA pages per superpage
1149- size_t mPagesPerSuperpage = 0 ;
1142+ // / Maximum amount of superpages in buffer
1143+ size_t mSuperpagesInBuffer = 0 ;
11501144
11511145 // / Maximum size of pages
11521146 size_t mPageSize ;
11531147
1154- // / Maximum amount of pages to transfer
1155- size_t mMaxPages ;
1156-
11571148 // / The size of the channel DMA buffer
11581149 size_t mBufferSize ;
11591150
0 commit comments