@@ -2302,6 +2302,7 @@ int freadMain(freadMainArgs _args)
23022302 //*********************************************************************************************
23032303 bool stopTeam = false, firstTime = true, restartTeam = false; // bool for MT-safey (cannot ever read half written bool value badly)
23042304 int nTypeBump = 0 , nTypeBumpCols = 0 ;
2305+ double thRead = 0 , thPush = 0 ; // reductions of timings within the parallel region
23052306 int max_col = 0 ;
23062307 char * typeBumpMsg = NULL ; size_t typeBumpMsgSize = 0 ;
23072308 int typeCounts [NUMTYPE ]; // used for verbose output; needs populating after first read and before reread (if any) -- see later comment
@@ -2397,9 +2398,7 @@ int freadMain(freadMainArgs _args)
23972398 }
23982399 prepareThreadContext (& ctx );
23992400
2400- double th_read_openmp = timestamps .th_read ;//pragma workaround
2401- double th_push_openmp = timestamps .th_push ;
2402- #pragma omp for ordered schedule(dynamic) reduction(+:th_read_openmp,th_push_openmp) reduction(max:max_col)
2401+ #pragma omp for ordered schedule(dynamic) reduction(+:thRead,thPush) reduction(max:max_col)
24032402 for (int jump = jump0 ; jump < nJumps ; jump ++ ) {
24042403 if (stopTeam ) continue ; // must continue and not break. We desire not to depend on (relatively new) omp cancel directive, yet
24052404 double tLast = 0.0 ; // thread local wallclock time at last measuring point for verbose mode only.
@@ -2420,7 +2419,7 @@ int freadMain(freadMainArgs _args)
24202419 myNrow = 0 ;
24212420 if (verbose || myShowProgress ) {
24222421 double now = wallclock ();
2423- timestamps . th_push += now - tLast ;
2422+ thPush += now - tLast ;
24242423 tLast = now ;
24252424 if (myShowProgress && /*wait for all threads to process 2 jumps*/ jump >= nth * 2 ) {
24262425 // Important for thread safety inside progress() that this is called not just from critical but that
@@ -2623,7 +2622,7 @@ int freadMain(freadMainArgs _args)
26232622 if (tch != eof ) tch ++ ;
26242623 myNrow ++ ;
26252624 }
2626- if (verbose ) { double now = wallclock (); timestamps . th_read += now - tLast ; tLast = now ; }
2625+ if (verbose ) { double now = wallclock (); thRead += now - tLast ; tLast = now ; }
26272626 ctx .anchor = thisJumpStart ;
26282627 ctx .nRows = myNrow ;
26292628 postprocessBuffer (& ctx );
@@ -2687,17 +2686,14 @@ int freadMain(freadMainArgs _args)
26872686 // Ordered has to be last in some OpenMP implementations currently. Logically though, pushBuffer happens now.
26882687 }
26892688
2690- timestamps .th_read = th_read_openmp ;//pragma workaround
2691- timestamps .th_push = th_push_openmp ;
2692-
26932689 // End for loop over all jump points
26942690
26952691 // Push out all buffers one last time (only needed because of gomp ordered workaround above with push first in the loop)
26962692 // If stopped early, this will happen once for thread at headPos (the only one left with myNrow>0)
26972693 if (myNrow ) {
26982694 double now = verbose ? wallclock () : 0 ;
26992695 pushBuffer (& ctx );
2700- if (verbose ) timestamps . th_push += wallclock () - now ;
2696+ if (verbose ) thPush += wallclock () - now ;
27012697 }
27022698 // Each thread to free their own buffer.
27032699 free (ctx .buff8 ); ctx .buff8 = NULL ;
@@ -2857,12 +2853,12 @@ int freadMain(freadMainArgs _args)
28572853 timestamps .coltype - timestamps .layout , 100.0 * (timestamps .coltype - timestamps .layout ) / timestamps .tot , sampleLines );
28582854 DTPRINT (_ ("%8.3fs (%3.0f%%) Allocation of %" PRId64 " rows x %d cols (%.3fGiB) of which %" PRId64 " (%3.0f%%) rows used\n" ),
28592855 timestamps .alloc - timestamps .coltype , 100.0 * (timestamps .alloc - timestamps .coltype ) / timestamps .tot , allocnrow , ncol , DTbytes / (1024.0 * 1024 * 1024 ), DTi , 100.0 * DTi / allocnrow );
2860- timestamps . th_read /= nth ; timestamps . th_push /= nth ;
2861- double thWaiting = timestamps .reread - timestamps .alloc - timestamps . th_read - timestamps . th_push ;
2856+ thRead /= nth ; thPush /= nth ;
2857+ double thWaiting = timestamps .reread - timestamps .alloc - thRead - thPush ;
28622858 DTPRINT (_ ("%8.3fs (%3.0f%%) Reading %d chunks (%d swept) of %.3fMiB (each chunk %" PRId64 " rows) using %d threads\n" ),
28632859 timestamps .reread - timestamps .alloc , 100.0 * (timestamps .reread - timestamps .alloc ) / timestamps .tot , nJumps , nSwept , (double )chunkBytes / (1024 * 1024 ), DTi / nJumps , nth );
2864- DTPRINT (_ (" + %8.3fs (%3.0f%%) Parse to row-major thread buffers (grown %d times)\n" ), timestamps . th_read , 100.0 * timestamps . th_read / timestamps .tot , buffGrown );
2865- DTPRINT (_ (" + %8.3fs (%3.0f%%) Transpose\n" ), timestamps . th_push , 100.0 * timestamps . th_push / timestamps .tot );
2860+ DTPRINT (_ (" + %8.3fs (%3.0f%%) Parse to row-major thread buffers (grown %d times)\n" ), thRead , 100.0 * thRead / timestamps .tot , buffGrown );
2861+ DTPRINT (_ (" + %8.3fs (%3.0f%%) Transpose\n" ), thPush , 100.0 * thPush / timestamps .tot );
28662862 DTPRINT (_ (" + %8.3fs (%3.0f%%) Waiting\n" ), thWaiting , 100.0 * thWaiting / timestamps .tot );
28672863 DTPRINT (_ ("%8.3fs (%3.0f%%) Rereading %d columns due to out-of-sample type exceptions\n" ),
28682864 timestamps .reread - timestamps .read , 100.0 * (timestamps .reread - timestamps .read ) / timestamps .tot , nTypeBumpCols );
0 commit comments