@@ -344,7 +344,11 @@ class ProgramDmaBench: public Program
344344 }
345345
346346 // Lock-free queues. Usable size is (size-1), so we add 1
347+ // / Queue for passing filled superpages from the push thread to the readout thread
347348 folly::ProducerConsumerQueue<size_t > readoutQueue {static_cast <uint32_t >(mMaxSuperpages ) + 1 };
349+ // / Queue for free superpages. This starts out as full, then the readout thread consumes them. When superpages
350+ // / arrive, they are passed via the readoutQueue to the readout thread. When the readout thread is done with it,
351+ // / it is put back in the freeQueue.
348352 folly::ProducerConsumerQueue<size_t > freeQueue {static_cast <uint32_t >(mMaxSuperpages ) + 1 };
349353 for (size_t i = 0 ; i < mMaxSuperpages ; ++i) {
350354 size_t offset = i * mSuperpageSize ;
@@ -481,7 +485,7 @@ class ProgramDmaBench: public Program
481485 // Read out pages
482486 int pages = mSuperpageSize / mPageSize ;
483487 for (int i = 0 ; i < pages; ++i) {
484- auto readoutCount = mReadoutCount . fetch_add ( 1 , std::memory_order_relaxed );
488+ auto readoutCount = fetchAddReadoutCount ( );
485489 readoutPage (mBufferBaseAddress + offset + i * mPageSize , mPageSize , readoutCount);
486490 }
487491
@@ -501,22 +505,32 @@ class ProgramDmaBench: public Program
501505 lowPriorityFuture.get ();
502506 }
503507
508+ // / Atomically fetch and increment the readout count. We do this because it is accessed by multiple threads.
509+ // / Although there is currently only one writer at a time and a regular increment probably would be OK.
510+ uint64_t fetchAddReadoutCount ()
511+ {
512+ return mReadoutCount .fetch_add (1 , std::memory_order_relaxed);
513+ }
514+
504515 // / Free the pages that were pushed in excess
505516 void freeExcessPages (std::chrono::milliseconds timeout)
506517 {
518+ // First deal with the remaining filled superpages
507519 auto start = std::chrono::steady_clock::now ();
508520 int popped = 0 ;
509521 while ((std::chrono::steady_clock::now () - start) < timeout) {
510522 if (mChannel ->getReadyQueueSize () > 0 ) {
511523 auto superpage = mChannel ->getSuperpage ();
512524 if (superpage.isFilled ()) {
525+ readoutPage (mBufferBaseAddress + superpage.getOffset (), superpage.getSize (), fetchAddReadoutCount ());
513526 mChannel ->popSuperpage ();
514527 popped += superpage.getReceived () / mPageSize ;
515528 }
516529 }
517530 }
531+
518532 std::cout << " \n\n " ;
519- getLogger () << " Popped " << popped << " excess pages" << endm;
533+ getLogger () << " Popped " << popped << " excess filled pages" << endm;
520534 }
521535
522536 uint32_t get32bitFromPage (uintptr_t pageAddress, size_t index)
@@ -545,9 +559,7 @@ class ProgramDmaBench: public Program
545559 void readoutPage (uintptr_t pageAddress, size_t pageSize, int64_t readoutCount)
546560 {
547561 // Read out to file
548- if (mOptions .fileOutputAscii || mOptions .fileOutputBin ) {
549- printToFile (pageAddress, pageSize, readoutCount);
550- }
562+ printToFile (pageAddress, pageSize, readoutCount);
551563
552564 // Data error checking
553565 if (!mOptions .noErrorCheck ) {
@@ -815,6 +827,7 @@ class ProgramDmaBench: public Program
815827 }
816828 }
817829
830+ // / Prints the page to a file in ASCII or binary format if such output is enabled
818831 void printToFile (uintptr_t pageAddress, size_t pageSize, int64_t pageNumber)
819832 {
820833 auto page = reinterpret_cast <const volatile uint32_t *>(pageAddress);
0 commit comments