@@ -121,100 +121,124 @@ static void *huge_malloc(BLASLONG size){
121
121
int main (int argc , char * argv []){
122
122
123
123
FLOAT * a , * b , * c ;
124
- FLOAT alpha [] = {1.0 , 1 .0 };
124
+ FLOAT alpha [] = {1.0 , 0 .0 };
125
125
FLOAT beta [] = {0.0 , 0.0 };
126
- char trans = 'N' ;
127
- blasint m , n , i , j ;
126
+ char transa = 'N' ;
127
+ char transb = 'N' ;
128
+ blasint m , n , k , i , j , lda , ldb , ldc ;
128
129
int loops = 1 ;
129
- int has_param_n = 0 ;
130
- int l ;
130
+ int has_param_m = 0 ;
131
+ int has_param_n = 0 ;
132
+ int has_param_k = 0 ;
131
133
char * p ;
132
134
133
135
int from = 1 ;
134
136
int to = 200 ;
135
137
int step = 1 ;
136
138
137
139
struct timeval start , stop ;
138
- double time1 ,timeg ;
140
+ double time1 , timeg ;
139
141
140
142
argc -- ;argv ++ ;
141
143
142
- if (argc > 0 ) { from = atol (* argv ); argc -- ; argv ++ ;}
143
- if (argc > 0 ) { to = MAX (atol (* argv ), from ); argc -- ; argv ++ ;}
144
- if (argc > 0 ) { step = atol (* argv ); argc -- ; argv ++ ;}
144
+ if (argc > 0 ) { from = atol (* argv ); argc -- ; argv ++ ; }
145
+ if (argc > 0 ) { to = MAX (atol (* argv ), from ); argc -- ; argv ++ ; }
146
+ if (argc > 0 ) { step = atol (* argv ); argc -- ; argv ++ ; }
145
147
146
- if ((p = getenv ("OPENBLAS_TRANS" ))) trans = * p ;
148
+ if ((p = getenv ("OPENBLAS_TRANS" ))) {
149
+ transa = * p ;
150
+ transb = * p ;
151
+ }
152
+ if ((p = getenv ("OPENBLAS_TRANSA" ))) {
153
+ transa = * p ;
154
+ }
155
+ if ((p = getenv ("OPENBLAS_TRANSB" ))) {
156
+ transb = * p ;
157
+ }
158
+ TOUPPER (transa );
159
+ TOUPPER (transb );
147
160
148
- fprintf (stderr , "From : %3d To : %3d Step=%d : Trans =%c\n" , from , to , step , trans );
161
+ fprintf (stderr , "From : %3d To : %3d Step=%d : Transa =%c : Transb=%c \n" , from , to , step , transa , transb );
149
162
150
- if (( a = (FLOAT * )malloc (sizeof (FLOAT ) * to * to * COMPSIZE )) == NULL ){
151
- fprintf (stderr ,"Out of Memory!!\n" );exit (1 );
163
+ p = getenv ("OPENBLAS_LOOPS" );
164
+ if ( p != NULL ) {
165
+ loops = atoi (p );
152
166
}
153
167
154
- if (( b = (FLOAT * )malloc (sizeof (FLOAT ) * to * to * COMPSIZE )) == NULL ){
155
- fprintf (stderr ,"Out of Memory!!\n" );exit (1 );
168
+ if ((p = getenv ("OPENBLAS_PARAM_M" ))) {
169
+ m = atoi (p );
170
+ has_param_m = 1 ;
171
+ } else {
172
+ m = to ;
173
+ }
174
+ if ((p = getenv ("OPENBLAS_PARAM_N" ))) {
175
+ n = atoi (p );
176
+ has_param_n = 1 ;
177
+ } else {
178
+ n = to ;
179
+ }
180
+ if ((p = getenv ("OPENBLAS_PARAM_K" ))) {
181
+ k = atoi (p );
182
+ has_param_k = 1 ;
183
+ } else {
184
+ k = to ;
156
185
}
157
186
158
- if (( c = (FLOAT * )malloc (sizeof (FLOAT ) * to * to * COMPSIZE )) == NULL ){
187
+ if (( a = (FLOAT * )malloc (sizeof (FLOAT ) * m * k * COMPSIZE )) == NULL ) {
159
188
fprintf (stderr ,"Out of Memory!!\n" );exit (1 );
160
189
}
161
-
162
- p = getenv ("OPENBLAS_LOOPS" );
163
- if ( p != NULL )
164
- loops = atoi (p );
165
-
166
- if ((p = getenv ("OPENBLAS_PARAM_N" ))) {
167
- n = atoi (p );
168
- has_param_n = 1 ;
190
+ if (( b = (FLOAT * )malloc (sizeof (FLOAT ) * k * n * COMPSIZE )) == NULL ) {
191
+ fprintf (stderr ,"Out of Memory!!\n" );exit (1 );
192
+ }
193
+ if (( c = (FLOAT * )malloc (sizeof (FLOAT ) * m * n * COMPSIZE )) == NULL ) {
194
+ fprintf (stderr ,"Out of Memory!!\n" );exit (1 );
169
195
}
170
196
171
197
#ifdef linux
172
198
srandom (getpid ());
173
199
#endif
174
-
175
- for (j = 0 ; j < to ; j ++ ){
176
- for (i = 0 ; i < to * COMPSIZE ; i ++ ){
177
- a [i + j * to * COMPSIZE ] = ((FLOAT ) rand () / (FLOAT ) RAND_MAX ) - 0.5 ;
178
- b [i + j * to * COMPSIZE ] = ((FLOAT ) rand () / (FLOAT ) RAND_MAX ) - 0.5 ;
179
- c [i + j * to * COMPSIZE ] = ((FLOAT ) rand () / (FLOAT ) RAND_MAX ) - 0.5 ;
180
- }
181
- }
182
-
183
-
184
-
185
- fprintf (stderr , " SIZE Flops Time\n" );
186
200
187
- for (m = from ; m <= to ; m += step )
188
- {
201
+ for (i = 0 ; i < m * k * COMPSIZE ; i ++ ) {
202
+ a [i ] = ((FLOAT ) rand () / (FLOAT ) RAND_MAX ) - 0.5 ;
203
+ }
204
+ for (i = 0 ; i < k * n * COMPSIZE ; i ++ ) {
205
+ b [i ] = ((FLOAT ) rand () / (FLOAT ) RAND_MAX ) - 0.5 ;
206
+ }
207
+ for (i = 0 ; i < m * n * COMPSIZE ; i ++ ) {
208
+ c [i ] = ((FLOAT ) rand () / (FLOAT ) RAND_MAX ) - 0.5 ;
209
+ }
210
+
211
+ fprintf (stderr , " SIZE Flops Time\n" );
189
212
213
+ for (i = from ; i <= to ; i += step ) {
214
+
190
215
timeg = 0 ;
191
216
192
- if ( has_param_n == 1 && n <= m )
193
- n = n ;
194
- else
195
- n = m ;
217
+ if (!has_param_m ) { m = i ; }
218
+ if (!has_param_n ) { n = i ; }
219
+ if (!has_param_k ) { k = i ; }
196
220
221
+ if (transa == 'N' ) { lda = m ; }
222
+ else { lda = k ; }
223
+ if (transb == 'N' ) { ldb = k ; }
224
+ else { ldb = n ; }
225
+ ldc = m ;
197
226
198
-
199
- fprintf (stderr , " %6dx%d : " , (int )m , (int )n );
227
+ fprintf (stderr , " M=%4d, N=%4d, K=%4d : " , (int )m , (int )n , (int )k );
200
228
gettimeofday ( & start , (struct timezone * )0 );
201
229
202
- for (l = 0 ; l < loops ; l ++ )
203
- {
204
-
205
- GEMM (& trans , & trans , & m , & n , & m , alpha , a , & m , b , & m , beta , c , & m );
206
-
207
-
208
-
230
+ for (j = 0 ; j < loops ; j ++ ) {
231
+ GEMM (& transa , & transb , & m , & n , & k , alpha , a , & lda , b , & ldb , beta , c , & ldc );
209
232
}
210
- gettimeofday ( & stop , (struct timezone * )0 );
211
- time1 = (double )(stop .tv_sec - start .tv_sec ) + (double )((stop .tv_usec - start .tv_usec )) * 1.e-6 ;
233
+
234
+ gettimeofday ( & stop , (struct timezone * )0 );
235
+ time1 = (double )(stop .tv_sec - start .tv_sec ) + (double )((stop .tv_usec - start .tv_usec )) * 1.e-6 ;
212
236
213
237
timeg = time1 /loops ;
214
238
fprintf (stderr ,
215
239
" %10.2f MFlops %10.6f sec\n" ,
216
240
COMPSIZE * COMPSIZE * 2. * (double )m * (double )m * (double )n / timeg * 1.e-6 , time1 );
217
-
241
+
218
242
}
219
243
220
244
return 0 ;
0 commit comments