Skip to content

Commit 12eb053

Browse files
committed
Added debug control and better printing
1 parent 3b98527 commit 12eb053

File tree

1 file changed

+36
-71
lines changed

1 file changed

+36
-71
lines changed

Code/OptionG/matmul.cc

Lines changed: 36 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ typedef int64_t s64;
1111

1212
const u32 COUNT = 1;
1313

14+
const u32 debug = 0;
15+
1416
class RV_t
1517
{
1618
public:
@@ -26,8 +28,7 @@ class RV_t
2628
virtual u32 VLEN() const = 0;
2729
virtual u32 lambda() const = 0;
2830
virtual u32 VLENE() const = 0;
29-
virtual u32 mu() const = 0;
30-
virtual u32 nu() const = 0;
31+
virtual u32 sigma() const = 0;
3132
};
3233

3334
template<u32 VLEN_, u32 lambda_>
@@ -52,18 +53,27 @@ class RVIME_t : public RV_t
5253
u32 L = VLEN_/SEW_;
5354
u32 m = L/lambda_;
5455
u32 n = m/lambda_;
55-
std::cout << "Executing double-precision vfmmacc : " << "vd = " << vd << ", vs1 = " << vs1 << ", vs2 = " << vs2 << ", L = " << L << ", m = " << m << ", n = " << n << std::endl;
56+
if (debug > 0)
57+
{
58+
std::cout << "Executing double-precision vfmmacc : " << "vd = " << vd << ", vs1 = " << vs1 << ", vs2 = " << vs2 << ", L = " << L << ", m = " << m << ", n = " << n << std::endl;
59+
}
5660

5761
for (u32 k=0; k<lambda_; k++)
5862
{
5963
for (u32 i=0; i<m; i++)
6064
for (u32 j=0; j<m; j++)
6165
{
62-
std::cout << "Computing VR[" << vd + j/lambda_ << "].f64[" << i + m*(j%lambda_) << "] += VR[" << vs1 << "].f64[" << i + m*k << "] * VR[" << vs2 << "].f64[" << j + m*k << "]" << std::endl;
66+
if (debug > 0)
67+
{
68+
std::cout << "Computing VR[" << vd + j/lambda_ << "].f64[" << i + m*(j%lambda_) << "] += VR[" << vs1 << "].f64[" << i + m*k << "] * VR[" << vs2 << "].f64[" << j + m*k << "]" << std::endl;
69+
}
6370
VR[vd + j/lambda_].f64[i + m*(j%lambda_)] += VR[vs1].f64[i + m*k] * VR[vs2].f64[j + m*k];
64-
std::cout << "VR[" << vs1 << "].f64[" << i + m*k << "] = " << VR[vs1].f64[i + m*k] << std::endl;
65-
std::cout << "VR[" << vs2 << "].f64[" << j + m*k << "] = " << VR[vs2].f64[j + m*k] << std::endl;
66-
std::cout << "VR[" << vd + j/lambda_ << "].f64[" << i + m*(j%lambda_) << "] = " << VR[vd + j/lambda_].f64[i + m*(j%lambda_)] << std::endl;
71+
if (debug > 0)
72+
{
73+
std::cout << "VR[" << vs1 << "].f64[" << i + m*k << "] = " << VR[vs1].f64[i + m*k] << std::endl;
74+
std::cout << "VR[" << vs2 << "].f64[" << j + m*k << "] = " << VR[vs2].f64[j + m*k] << std::endl;
75+
std::cout << "VR[" << vd + j/lambda_ << "].f64[" << i + m*(j%lambda_) << "] = " << VR[vd + j/lambda_].f64[i + m*(j%lambda_)] << std::endl;
76+
}
6777
}
6878
}
6979
}
@@ -105,13 +115,7 @@ class RVIME_t : public RV_t
105115
return lambda_;
106116
}
107117

108-
u32 mu() const
109-
{
110-
u32 L = VLEN_/SEW_;
111-
return L/lambda_;
112-
}
113-
114-
u32 nu() const
118+
u32 sigma() const
115119
{
116120
u32 L = VLEN_/SEW_;
117121
return L/lambda_;
@@ -129,7 +133,10 @@ class RVIME_t : public RV_t
129133
switch(SEW_)
130134
{
131135
case 64:
132-
std::cout << "Each basic vfmmacc produces " << B << " vector registers of output" << std::endl;
136+
if (debug > 0)
137+
{
138+
std::cout << "Each basic vfmmacc produces " << B << " vector registers of output" << std::endl;
139+
}
133140
for (u32 i=0; i<RMUL(); i++) for (u32 j=0; j<CMUL(); j++)
134141
vfmmacc_fp64(vd + B*i + B*j*RMUL(), vs1 + i, vs2 + j);
135142
break;
@@ -152,14 +159,14 @@ class RVIME_t : public RV_t
152159

153160
void vle64(u32 vd, double *A)
154161
{
155-
std::cout << "Loading VR[" << vd << "]" << std::endl;
162+
if (debug > 0) { std::cout << "Loading VR[" << vd << "]" << std::endl; }
156163
u32 L = VLEN_/SEW_;
157164
for (u32 i=0; i<L; i++) VR[vd].f64[i] = A[i];
158165
}
159166

160167
void vse64(u32 vs, double *A)
161168
{
162-
std::cout << "Storing VR[" << vs << "]" << std::endl;
169+
if (debug > 0) { std::cout << "Storing VR[" << vs << "]" << std::endl; }
163170
u32 L = VLEN_/SEW_;
164171
for (u32 i=0; i<L; i++) A[i] = VR[vs].f64[i];
165172
}
@@ -305,12 +312,15 @@ bool run_microgemm
305312
if (cmul > rmul) rmul = rmul*2;
306313
else cmul = cmul * 2;
307314
}
308-
std::cout << "RMUL = " << rmul << ", CMUL = " << cmul << std::endl;
315+
std::cout << "L = " << std::setw(2) << L << ", lambda = " << std::setw(2) << RV->lambda() << ", sigma = " << std::setw(2) << RV->sigma() << ", RMUL = " << rmul << ", CMUL = " << cmul;
316+
317+
u32 mu = rmul*RV->sigma();
318+
u32 nu = cmul*RV->sigma();
309319

310-
u32 M = rmul*RV->mu();
311-
u32 N = cmul*RV->nu();
320+
std::cout << ", microgemm geometry : " << std::setw(2) << mu << " x " << std::setw(2) << nu << std::endl;
312321

313-
std::cout << "Microgemm geometry : " << M << " x " << N << std::endl;
322+
u32 M = mu;
323+
u32 N = nu;
314324

315325
// Allocate A, B, and C panels
316326
double *A = new double[M*K]; for (u32 i=0; i<M*K; i++) A[i] = drand48() - 0.5;
@@ -327,8 +337,11 @@ bool run_microgemm
327337
double S = 0;
328338
for (u32 k=0; k<K; k++)
329339
{
330-
if ((2 == i) && (0 == j))
331-
std::cout << "A[" << i << ", " << k << "] = " << A[i+k*M] << std::endl;
340+
if (debug > 1)
341+
{
342+
if ((2 == i) && (0 == j))
343+
std::cout << "A[" << i << ", " << k << "] = " << A[i+k*M] << std::endl;
344+
}
332345
S += A[i+k*M]*B[j+k*N];
333346
}
334347
if (S != C[i+j*M])
@@ -347,54 +360,6 @@ bool run_microgemm
347360
return true;
348361
}
349362

350-
double run_kernel
351-
(
352-
void (kernel)(double*, double*, double*, double*),
353-
uint32_t count
354-
)
355-
{
356-
const uint32_t N = 1024*1024;
357-
double *A = (double*)aligned_alloc(4096, sizeof(double) * N); for (uint32_t i=0; i<N; i++) A[i] = drand48() - 0.5;
358-
double *B = (double*)aligned_alloc(4096, sizeof(double) * N); for (uint32_t i=0; i<N; i++) B[i] = drand48() - 0.5;
359-
double *C = (double*)aligned_alloc(4096, sizeof(double) * N); for (uint32_t i=0; i<N; i++) C[i] = drand48() - 0.5;
360-
double *D = (double*)aligned_alloc(4096, sizeof(double) * N); for (uint32_t i=0; i<N; i++) D[i] = drand48() - 0.5;
361-
362-
volatile double start, finish;
363-
364-
start = now();
365-
for(; count; count -= COUNT)
366-
{
367-
kernel(A,B,C,D);
368-
}
369-
finish = now();
370-
371-
free(D);
372-
free(C);
373-
free(B);
374-
free(A);
375-
376-
return (finish - start);
377-
}
378-
379-
void run_kernel_and_report
380-
(
381-
void (kernel)(double*, double*, double*, double*),
382-
uint32_t count,
383-
const char* name,
384-
uint32_t M,
385-
uint32_t N,
386-
uint32_t K
387-
)
388-
{
389-
volatile double elapsed;
390-
volatile double flops = 2.0*count*M*N*K;
391-
elapsed = run_kernel(kernel, count);
392-
std::cout << std::setprecision(6);
393-
std::cout << "Time to run " << std::setw(51) << name << " " << count << " times = " << std::setw(10) << std::fixed << elapsed << " seconds (" << std::setw(10) << std::scientific << flops/elapsed << " flops)" << std::endl;
394-
}
395-
396-
#define RUN_KERNEL(kernel, count, M, N, K) run_kernel_and_report(kernel, count, #kernel, M, N, K)
397-
398363
int main
399364
(
400365
int argc,

0 commit comments

Comments
 (0)