Skip to content

Commit 56370c0

Browse files
author
unknown
committed
enable the tune tool to test all the workgroup sizes that are multiples of wavefront but no bigger than the max workgroup size, instead of only testing the workgroup size = wavefront size
1 parent 42284c0 commit 56370c0

File tree

3 files changed

+11
-2
lines changed

3 files changed

+11
-2
lines changed

src/include/granulation.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@ typedef struct PGranularity {
4646
unsigned int wfSize;
4747
/** Record number of work-groups spawned */
4848
unsigned int numWGSpawned[2];
49+
/** max number of work group size */
50+
unsigned int maxWorkGroupSize;
4951
} PGranularity;
5052

5153
/**

src/library/blas/gens/gemm.c

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1104,6 +1104,8 @@ blockCheckCalcDecomp(
11041104
int check)
11051105
{
11061106
bool ret = true;
1107+
bool ret_multiple = false;
1108+
int i;
11071109

11081110
DUMMY_ARG_USAGE(subdimsNum);
11091111

@@ -1114,7 +1116,12 @@ blockCheckCalcDecomp(
11141116
minSize = (dtype == TYPE_COMPLEX_DOUBLE) ? 1 : 2;
11151117
ret = decompSanityCheck(subdims, minSize, maxSize, 24, dtype, true);
11161118
ret = ret && (subdims[0].bwidth == subdims[1].bwidth);
1117-
ret = ret && (pgran->wgSize[0] * pgran->wgSize[1] == 64);
1119+
for(i = 0; i < ( (pgran->maxWorkGroupSize) / (pgran->wfSize) ); i++)
1120+
{
1121+
// returns true if wgSize[0] * wgSize[1] is multiples of the 64 but not bigger than maxWorkGroupSize
1122+
ret_multiple = ret_multiple || ( pgran->wgSize[0] * pgran->wgSize[1] == pgran->wfSize * (i + 1) );
1123+
}
1124+
ret = ret && ret_multiple;
11181125
}
11191126
else {
11201127
calcPgranDedicated(pgran, subdims, 1, 3);

src/library/tools/tune/subdim.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -258,7 +258,7 @@ calcPGranularity (SubDimInfo* sd)
258258

259259
pgran->wgDim = 2;
260260
pgran->wfSize = 64;
261-
261+
pgran->maxWorkGroupSize = sd->workGroupSizes;
262262

263263
// if pattern provides granularity calculation
264264
// call the pattern function

0 commit comments

Comments
 (0)