@@ -122,6 +122,9 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
122
122
if (min_l > GEMM_Q ) min_l = GEMM_Q ;
123
123
min_i = min_l ;
124
124
if (min_i > GEMM_P ) min_i = GEMM_P ;
125
+ if ( min_i > GEMM_UNROLL_M ){
126
+ min_i = (min_i / GEMM_UNROLL_M ) * GEMM_UNROLL_M ;
127
+ }
125
128
126
129
START_RPCC ();
127
130
@@ -161,9 +164,12 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
161
164
}
162
165
163
166
164
- for (is = min_i ; is < min_l ; is += GEMM_P ){
167
+ for (is = min_i ; is < min_l ; is += min_i ){
165
168
min_i = min_l - is ;
166
169
if (min_i > GEMM_P ) min_i = GEMM_P ;
170
+ if ( min_i > GEMM_UNROLL_M ){
171
+ min_i = (min_i / GEMM_UNROLL_M ) * GEMM_UNROLL_M ;
172
+ }
167
173
168
174
START_RPCC ();
169
175
@@ -192,6 +198,10 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
192
198
if (min_l > GEMM_Q ) min_l = GEMM_Q ;
193
199
min_i = ls ;
194
200
if (min_i > GEMM_P ) min_i = GEMM_P ;
201
+ if ( min_i > GEMM_UNROLL_M ){
202
+ min_i = (min_i / GEMM_UNROLL_M ) * GEMM_UNROLL_M ;
203
+ }
204
+
195
205
196
206
START_RPCC ();
197
207
@@ -231,9 +241,12 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
231
241
STOP_RPCC (gemmcost );
232
242
}
233
243
234
- for (is = min_i ; is < ls ; is += GEMM_P ){
244
+ for (is = min_i ; is < ls ; is += min_i ){
235
245
min_i = ls - is ;
236
246
if (min_i > GEMM_P ) min_i = GEMM_P ;
247
+ if ( min_i > GEMM_UNROLL_M ){
248
+ min_i = (min_i / GEMM_UNROLL_M ) * GEMM_UNROLL_M ;
249
+ }
237
250
238
251
START_RPCC ();
239
252
@@ -256,9 +269,12 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
256
269
STOP_RPCC (gemmcost );
257
270
}
258
271
259
- for (is = ls ; is < ls + min_l ; is += GEMM_P ){
272
+ for (is = ls ; is < ls + min_l ; is += min_i ){
260
273
min_i = ls + min_l - is ;
261
274
if (min_i > GEMM_P ) min_i = GEMM_P ;
275
+ if ( min_i > GEMM_UNROLL_M ){
276
+ min_i = (min_i / GEMM_UNROLL_M ) * GEMM_UNROLL_M ;
277
+ }
262
278
263
279
START_RPCC ();
264
280
@@ -287,6 +303,10 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
287
303
if (min_l > GEMM_Q ) min_l = GEMM_Q ;
288
304
min_i = min_l ;
289
305
if (min_i > GEMM_P ) min_i = GEMM_P ;
306
+ if (min_i > GEMM_UNROLL_M ){
307
+ min_i = (min_i / GEMM_UNROLL_M ) * GEMM_UNROLL_M ;
308
+ }
309
+
290
310
291
311
START_RPCC ();
292
312
@@ -327,9 +347,14 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
327
347
STOP_RPCC (trmmcost );
328
348
}
329
349
330
- for (is = m - min_l + min_i ; is < m ; is += GEMM_P ){
350
+ for (is = m - min_l + min_i ; is < m ; is += min_i ){
331
351
min_i = m - is ;
332
352
if (min_i > GEMM_P ) min_i = GEMM_P ;
353
+ if (min_i > GEMM_UNROLL_M ){
354
+ min_i = (min_i / GEMM_UNROLL_M ) * GEMM_UNROLL_M ;
355
+ }
356
+
357
+
333
358
334
359
START_RPCC ();
335
360
@@ -357,6 +382,10 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
357
382
if (min_l > GEMM_Q ) min_l = GEMM_Q ;
358
383
min_i = min_l ;
359
384
if (min_i > GEMM_P ) min_i = GEMM_P ;
385
+ if (min_i > GEMM_UNROLL_M ){
386
+ min_i = (min_i / GEMM_UNROLL_M ) * GEMM_UNROLL_M ;
387
+ }
388
+
360
389
361
390
START_RPCC ();
362
391
@@ -397,9 +426,13 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
397
426
STOP_RPCC (trmmcost );
398
427
}
399
428
400
- for (is = ls - min_l + min_i ; is < ls ; is += GEMM_P ){
429
+ for (is = ls - min_l + min_i ; is < ls ; is += min_i ){
401
430
min_i = ls - is ;
402
431
if (min_i > GEMM_P ) min_i = GEMM_P ;
432
+ if (min_i > GEMM_UNROLL_M ){
433
+ min_i = (min_i / GEMM_UNROLL_M ) * GEMM_UNROLL_M ;
434
+ }
435
+
403
436
404
437
START_RPCC ();
405
438
@@ -423,9 +456,12 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
423
456
}
424
457
425
458
426
- for (is = ls ; is < m ; is += GEMM_P ){
459
+ for (is = ls ; is < m ; is += min_i ){
427
460
min_i = m - is ;
428
461
if (min_i > GEMM_P ) min_i = GEMM_P ;
462
+ if (min_i > GEMM_UNROLL_M ){
463
+ min_i = (min_i / GEMM_UNROLL_M ) * GEMM_UNROLL_M ;
464
+ }
429
465
430
466
START_RPCC ();
431
467
0 commit comments