Skip to content

Commit a89d671

Browse files
author
Tim Moon
committed
Increasing flexibility of GEMM benchmark.
m, n, and k can be set to arbitrary constants. A and B matrices can be transposed independently.
1 parent 0e6b11b commit a89d671

File tree

1 file changed

+78
-54
lines changed

1 file changed

+78
-54
lines changed

benchmark/gemm.c

Lines changed: 78 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -121,100 +121,124 @@ static void *huge_malloc(BLASLONG size){
121121
int main(int argc, char *argv[]){
122122

123123
FLOAT *a, *b, *c;
124-
FLOAT alpha[] = {1.0, 1.0};
124+
FLOAT alpha[] = {1.0, 0.0};
125125
FLOAT beta [] = {0.0, 0.0};
126-
char trans='N';
127-
blasint m, n, i, j;
126+
char transa = 'N';
127+
char transb = 'N';
128+
blasint m, n, k, i, j, lda, ldb, ldc;
128129
int loops = 1;
129-
int has_param_n=0;
130-
int l;
130+
int has_param_m = 0;
131+
int has_param_n = 0;
132+
int has_param_k = 0;
131133
char *p;
132134

133135
int from = 1;
134136
int to = 200;
135137
int step = 1;
136138

137139
struct timeval start, stop;
138-
double time1,timeg;
140+
double time1, timeg;
139141

140142
argc--;argv++;
141143

142-
if (argc > 0) { from = atol(*argv); argc--; argv++;}
143-
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
144-
if (argc > 0) { step = atol(*argv); argc--; argv++;}
144+
if (argc > 0) { from = atol(*argv); argc--; argv++; }
145+
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++; }
146+
if (argc > 0) { step = atol(*argv); argc--; argv++; }
145147

146-
if ((p = getenv("OPENBLAS_TRANS"))) trans=*p;
148+
if ((p = getenv("OPENBLAS_TRANS"))) {
149+
transa=*p;
150+
transb=*p;
151+
}
152+
if ((p = getenv("OPENBLAS_TRANSA"))) {
153+
transa=*p;
154+
}
155+
if ((p = getenv("OPENBLAS_TRANSB"))) {
156+
transb=*p;
157+
}
158+
TOUPPER(transa);
159+
TOUPPER(transb);
147160

148-
fprintf(stderr, "From : %3d To : %3d Step=%d : Trans=%c\n", from, to, step, trans);
161+
fprintf(stderr, "From : %3d To : %3d Step=%d : Transa=%c : Transb=%c\n", from, to, step, transa, transb);
149162

150-
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
151-
fprintf(stderr,"Out of Memory!!\n");exit(1);
163+
p = getenv("OPENBLAS_LOOPS");
164+
if ( p != NULL ) {
165+
loops = atoi(p);
152166
}
153167

154-
if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
155-
fprintf(stderr,"Out of Memory!!\n");exit(1);
168+
if ((p = getenv("OPENBLAS_PARAM_M"))) {
169+
m = atoi(p);
170+
has_param_m=1;
171+
} else {
172+
m = to;
173+
}
174+
if ((p = getenv("OPENBLAS_PARAM_N"))) {
175+
n = atoi(p);
176+
has_param_n=1;
177+
} else {
178+
n = to;
179+
}
180+
if ((p = getenv("OPENBLAS_PARAM_K"))) {
181+
k = atoi(p);
182+
has_param_k=1;
183+
} else {
184+
k = to;
156185
}
157186

158-
if (( c = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
187+
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * m * k * COMPSIZE)) == NULL) {
159188
fprintf(stderr,"Out of Memory!!\n");exit(1);
160189
}
161-
162-
p = getenv("OPENBLAS_LOOPS");
163-
if ( p != NULL )
164-
loops = atoi(p);
165-
166-
if ((p = getenv("OPENBLAS_PARAM_N"))) {
167-
n = atoi(p);
168-
has_param_n=1;
190+
if (( b = (FLOAT *)malloc(sizeof(FLOAT) * k * n * COMPSIZE)) == NULL) {
191+
fprintf(stderr,"Out of Memory!!\n");exit(1);
192+
}
193+
if (( c = (FLOAT *)malloc(sizeof(FLOAT) * m * n * COMPSIZE)) == NULL) {
194+
fprintf(stderr,"Out of Memory!!\n");exit(1);
169195
}
170196

171197
#ifdef linux
172198
srandom(getpid());
173199
#endif
174-
175-
for(j = 0; j < to; j++){
176-
for(i = 0; i < to * COMPSIZE; i++){
177-
a[i + j * to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
178-
b[i + j * to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
179-
c[i + j * to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
180-
}
181-
}
182-
183-
184-
185-
fprintf(stderr, " SIZE Flops Time\n");
186200

187-
for(m = from; m <= to; m += step)
188-
{
201+
for (i = 0; i < m * k * COMPSIZE; i++) {
202+
a[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
203+
}
204+
for (i = 0; i < k * n * COMPSIZE; i++) {
205+
b[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
206+
}
207+
for (i = 0; i < m * n * COMPSIZE; i++) {
208+
c[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
209+
}
210+
211+
fprintf(stderr, " SIZE Flops Time\n");
189212

213+
for (i = from; i <= to; i += step) {
214+
190215
timeg=0;
191216

192-
if ( has_param_n == 1 && n <= m )
193-
n=n;
194-
else
195-
n=m;
217+
if (!has_param_m) { m = i; }
218+
if (!has_param_n) { n = i; }
219+
if (!has_param_k) { k = i; }
196220

221+
if (transa == 'N') { lda = m; }
222+
else { lda = k; }
223+
if (transb == 'N') { ldb = k; }
224+
else { ldb = n; }
225+
ldc = m;
197226

198-
199-
fprintf(stderr, " %6dx%d : ", (int)m, (int)n);
227+
fprintf(stderr, " M=%4d, N=%4d, K=%4d : ", (int)m, (int)n, (int)k);
200228
gettimeofday( &start, (struct timezone *)0);
201229

202-
for (l=0; l<loops; l++)
203-
{
204-
205-
GEMM (&trans, &trans, &m, &n, &m, alpha, a, &m, b, &m, beta, c, &m );
206-
207-
208-
230+
for (j=0; j<loops; j++) {
231+
GEMM (&transa, &transb, &m, &n, &k, alpha, a, &lda, b, &ldb, beta, c, &ldc);
209232
}
210-
gettimeofday( &stop, (struct timezone *)0);
211-
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
233+
234+
gettimeofday( &stop, (struct timezone *)0);
235+
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
212236

213237
timeg = time1/loops;
214238
fprintf(stderr,
215239
" %10.2f MFlops %10.6f sec\n",
216240
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m * (double)n / timeg * 1.e-6, time1);
217-
241+
218242
}
219243

220244
return 0;

0 commit comments

Comments
 (0)