Skip to content

Commit 5ae0977

Browse files
author
Kent Knox
committed
Merge pull request #47 from TimmyLiu/develop
enhancement of tune tool for better sgemm performance
2 parents 5106457 + c390b5c commit 5ae0977

File tree

3 files changed

+13
-4
lines changed

3 files changed

+13
-4
lines changed

src/include/granulation.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@ typedef struct PGranularity {
4646
unsigned int wfSize;
4747
/** Record number of work-groups spawned */
4848
unsigned int numWGSpawned[2];
49+
/** max number of work group size */
50+
unsigned int maxWorkGroupSize;
4951
} PGranularity;
5052

5153
/**

src/library/blas/gens/gemm.c

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1104,6 +1104,8 @@ blockCheckCalcDecomp(
11041104
int check)
11051105
{
11061106
bool ret = true;
1107+
bool ret_multiple = false;
1108+
int i;
11071109

11081110
DUMMY_ARG_USAGE(subdimsNum);
11091111

@@ -1114,7 +1116,12 @@ blockCheckCalcDecomp(
11141116
minSize = (dtype == TYPE_COMPLEX_DOUBLE) ? 1 : 2;
11151117
ret = decompSanityCheck(subdims, minSize, maxSize, 24, dtype, true);
11161118
ret = ret && (subdims[0].bwidth == subdims[1].bwidth);
1117-
ret = ret && (pgran->wgSize[0] * pgran->wgSize[1] == 64);
1119+
for(i = 0; i < ( (pgran->maxWorkGroupSize) / (pgran->wfSize) ); i++)
1120+
{
1121+
// returns true if wgSize[0] * wgSize[1] is multiples of the 64 but not bigger than maxWorkGroupSize
1122+
ret_multiple = ret_multiple || ( pgran->wgSize[0] * pgran->wgSize[1] == pgran->wfSize * (i + 1) );
1123+
}
1124+
ret = ret && ret_multiple;
11181125
}
11191126
else {
11201127
calcPgranDedicated(pgran, subdims, 1, 3);

src/library/tools/tune/subdim.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -213,8 +213,8 @@ initVector(SubDimInfo* sd)
213213
setVariable(sd, V_L1_BW, 6, &dim[0]);
214214
}
215215
else {
216-
setVariable(sd, V_L0_X, 4, &dim[4]);
217-
setVariable(sd, V_L0_Y, 4, &dim[4]);
216+
setVariable(sd, V_L0_X, 4, &dim[5]);
217+
setVariable(sd, V_L0_Y, 4, &dim[5]);
218218
setVariable(sd, V_L0_BW, 6, &dim[0]);
219219
setVariable(sd, V_L1_X, 6, &dim[0]);
220220
setVariable(sd, V_L1_Y, 6, &dim[0]);
@@ -258,7 +258,7 @@ calcPGranularity (SubDimInfo* sd)
258258

259259
pgran->wgDim = 2;
260260
pgran->wfSize = 64;
261-
261+
pgran->maxWorkGroupSize = sd->workGroupSizes;
262262

263263
// if pattern provides granularity calculation
264264
// call the pattern function

0 commit comments

Comments
 (0)