Skip to content

Commit b6aac63

Browse files
committed
Better messages
1 parent 6865e92 commit b6aac63

File tree

1 file changed

+6
-4
lines changed

1 file changed

+6
-4
lines changed

Code/matmul/memory.cc

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#define xxpermdi(XT, XA, XB, IM) asm("xxpermdi " #XT "," #XA "," #XB "," #IM)
2020
#define xxmfacc(XT) asm("xxmfacc " #XT)
2121
#define xvadddp(XT, XA, XB) asm("xvadddp " #XT "," #XA "," #XB)
22+
#define nop() asm("nop ")
2223

2324
void memory_load_1KiB
2425
(
@@ -27,6 +28,7 @@ void memory_load_1KiB
2728
)
2829
{
2930
asm("mtctr 4");
31+
asm("nop");
3032
asm("LOOP17:");
3133

3234
lxvp( 0+ 0, 3, 0+ 0); lxvp( 0+ 2, 3, 32+ 0);
@@ -83,8 +85,7 @@ double run_kernel
8385
#pragma omp parallel for
8486
for (int j=0; j<nthreads; j++)
8587
{
86-
uint32_t reps = count;
87-
for(; reps; reps--)
88+
for(uint32_t reps = count; reps; reps--)
8889
{
8990
kernel(A[j], r);
9091
}
@@ -112,7 +113,7 @@ void run_kernel_and_report
112113
volatile double GB = (1.0e-09)*N*sizeof(double)*count*omp_get_max_threads();
113114
elapsed = run_kernel(kernel, count, N, n);
114115
std::cout << std::setprecision(6);
115-
std::cout << "Time to run " << std::setw(30) << name << " (" << std::setw(9) << N << ") " << std::setw(6) << count << " times = " << std::setw(10) << std::fixed << elapsed << " seconds (" << std::setw(10) << std::scientific << GB/elapsed << " GB/s)" << std::endl;
116+
std::cout << "Time to run " << std::setw(30) << name << " (" << std::setw(9) << N << " doubles) " << std::setw(9) << count << " times = " << std::setw(10) << std::fixed << elapsed << " seconds (" << std::setw(10) << std::scientific << GB/elapsed << " GB/s)" << std::endl;
116117
}
117118

118119
#define RUN_KERNEL(kernel, count, N, n) run_kernel_and_report(kernel, count, #kernel, N, n)
@@ -127,8 +128,9 @@ int main
127128

128129
volatile double elapsed;
129130

131+
int nthreads = omp_get_max_threads();
130132
std::cout << "=========================================================================================================================" << std::endl;
131-
std::cout << "Running on " << omp_get_max_threads() << " threads" << std::endl;
133+
std::cout << "Running on " << nthreads << ((nthreads > 1) ? " threads" : " thread") << std::endl;
132134

133135
for (uint32_t i=1; i<1000000; i *= 2)
134136
RUN_KERNEL(memory_load_1KiB, memory_load_count/i, i*1024, 128);

0 commit comments

Comments
 (0)