Skip to content

Commit d8ca587

Browse files
author
Timmy
committed
bug fix of hemm and symm hanging with tuning KDB file and ssyr2k crashes on Kaveri with tuning KDB file
1 parent 7e239b6 commit d8ca587

File tree

2 files changed

+22
-8
lines changed

2 files changed

+22
-8
lines changed

src/library/blas/generic/solution_seq_make.c

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1435,9 +1435,12 @@ getStepGranulation(SolutionStep *step)
14351435
}
14361436
}
14371437

1438-
status = getGranularityInfo(&step->device, mempat->name,
1439-
step->args.dtype, step->extraFlags,
1440-
(int)MNK, dims, &step->pgran, &time);
1438+
if( step->funcID != CLBLAS_GEMM2 )
1439+
{
1440+
status = getGranularityInfo(&step->device, mempat->name,
1441+
step->args.dtype, step->extraFlags,
1442+
(int)MNK, dims, &step->pgran, &time);
1443+
}
14411444
/*
14421445
* Disable blocking for implementations dealing with cache reads
14431446
* from the global memory

src/library/blas/gens/syrxk.c

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121

2222
#include <string.h>
2323
#include <stdio.h>
24+
#include <stdlib.h>
2425
#include <assert.h>
2526

2627
#include <clBLAS.h>
@@ -1219,10 +1220,11 @@ genUpdateGenericDiagTile(
12191220
// type of the vectorized coordinates
12201221
Kstring vctype;
12211222
Kstring constOffs, constShifts, constMasks;
1222-
unsigned int i, j, nops;
1223+
unsigned int i, j, nops,size;
12231224
unsigned int maxFetches = 0;
12241225
const char *yname, *xname;
12251226
const char *ldcName;
1227+
char hexadec[1];
12261228

12271229
batch = createStmtBatch();
12281230
if (batch == NULL) {
@@ -1253,6 +1255,14 @@ genUpdateGenericDiagTile(
12531255
tifl = (isUpper) ? TILE_ITER_BACKWARD_ROWS :
12541256
TILE_ITER_BACKWARD_COLS;
12551257
iterInit(&iter, &tileTempC, 1, tifl);
1258+
nops = 0;
1259+
while (!iterIsEnd(&iter)) {
1260+
nops++;
1261+
size = nops / nrCols;
1262+
iterIterate(&iter);
1263+
}
1264+
1265+
iterInit(&iter, &tileTempC, 1, tifl);
12561266

12571267
initTmpResTile(&tileTempC, gset, true);
12581268

@@ -1316,7 +1326,7 @@ genUpdateGenericDiagTile(
13161326
maxFetches = umin(maxFetches, i);
13171327

13181328
// declare vectorized coordinates
1319-
declareDiagUpresIndexedVars(ctx, vctype.buf, "cc", tempRows);
1329+
declareDiagUpresIndexedVars(ctx, vctype.buf, "cc", size);
13201330

13211331
/*
13221332
* real y coordinate, offset mask and
@@ -1326,8 +1336,8 @@ genUpdateGenericDiagTile(
13261336
"unsigned int mask;\n"
13271337
"int hit;\n");
13281338
if (withBeta) {
1329-
declareDiagUpresIndexedVars(ctx, typeName, "alphaNew", tempRows);
1330-
declareDiagUpresIndexedVars(ctx, typeName, "betaNew", tempRows);
1339+
declareDiagUpresIndexedVars(ctx, typeName, "alphaNew", size);
1340+
declareDiagUpresIndexedVars(ctx, typeName, "betaNew", size);
13311341
}
13321342

13331343
// declare tile
@@ -1443,7 +1453,8 @@ genUpdateGenericDiagTile(
14431453
ksprintf(&kstr, "cc%u", i);
14441454
}
14451455
else {
1446-
ksprintf(&kstr, "cc%u.s%u", i, iter.col);
1456+
itoa(iter.col, hexadec, 16);
1457+
ksprintf(&kstr, "cc%u.s%s", i, hexadec);
14471458
}
14481459

14491460
// prepare multipliers and fetch

0 commit comments

Comments
 (0)