Skip to content

Commit 484dc16

Browse files
committed
Merge pull request #175 from hughperkins/fix-teardown-userkernels-rebase-develop
Fix teardown userkernels rebase develop
2 parents 75b0f92 + 2ea8c8a commit 484dc16

33 files changed

+146
-36
lines changed

src/library/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ set(AUTOGEMM_HEADERS
4343
)
4444

4545
set(AUTOGEMM_SRC
46+
${CMAKE_SOURCE_DIR}/library/blas/AutoGemm/UserGemmKernelSources/UserGemmClKernels.cc
4647
${CMAKE_BINARY_DIR}/include/AutoGemmIncludes/AutoGemmClKernels.cpp
4748
${CMAKE_BINARY_DIR}/include/AutoGemmIncludes/AutoGemmKernelBuildOptionsBinary.cpp
4849
${CMAKE_BINARY_DIR}/include/AutoGemmIncludes/AutoGemmKernelBinaries.cpp
@@ -851,6 +852,7 @@ set(CLBLAS_ALL_SOURCES
851852
#${USERGEMM_SRC}
852853
#${USERGEMM_HEADERS}
853854
)
855+
add_definitions(-DOPENCL_VERSION="${OPENCL_VERSION}")
854856
add_library(clBLAS ${CLBLAS_ALL_SOURCES})
855857
add_dependencies(clBLAS GENERATE_CLT)
856858

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
// GENERATED using create_user_gemm_cl_kernels.py
2+
3+
#if defined( __APPLE__ ) || defined( __MACOSX )
4+
#include <OpenCL/cl.h>
5+
#else
6+
#include <CL/cl.h>
7+
#endif
8+
9+
cl_kernel sgemm_Col_NT_B1_MX128_NX128_KX16_clKernel = NULL;
10+
cl_kernel sgemm_Col_NT_B1_MX032_NX064_KX16_ROW_clKernel = NULL;
11+
cl_kernel sgemm_Col_NT_B1_MX064_NX032_KX16_COLUMN_clKernel = NULL;
12+
cl_kernel sgemm_Col_NT_B1_MX032_NX032_KX16_SINGLE_clKernel = NULL;
13+
cl_kernel sgemm_Col_NN_B1_MX032_NX032_KX16_BRANCH_clKernel = NULL;
14+
cl_kernel sgemm_Col_NT_B1_MX032_NX032_KX16_BRANCH_clKernel = NULL;
15+
cl_kernel sgemm_Col_TN_B1_MX032_NX032_KX16_BRANCH_clKernel = NULL;
16+
17+
#ifdef __cplusplus
18+
extern "C" {
19+
#endif
20+
void initUserGemmClKernels(void);
21+
#ifdef __cplusplus
22+
}
23+
#endif
24+
25+
void initUserGemmClKernels(void) {
26+
if(sgemm_Col_NT_B1_MX128_NX128_KX16_clKernel != NULL) {
27+
clReleaseKernel(sgemm_Col_NT_B1_MX128_NX128_KX16_clKernel);
28+
sgemm_Col_NT_B1_MX128_NX128_KX16_clKernel = NULL;
29+
}
30+
if(sgemm_Col_NT_B1_MX032_NX064_KX16_ROW_clKernel != NULL) {
31+
clReleaseKernel(sgemm_Col_NT_B1_MX032_NX064_KX16_ROW_clKernel);
32+
sgemm_Col_NT_B1_MX032_NX064_KX16_ROW_clKernel = NULL;
33+
}
34+
if(sgemm_Col_NT_B1_MX064_NX032_KX16_COLUMN_clKernel != NULL) {
35+
clReleaseKernel(sgemm_Col_NT_B1_MX064_NX032_KX16_COLUMN_clKernel);
36+
sgemm_Col_NT_B1_MX064_NX032_KX16_COLUMN_clKernel = NULL;
37+
}
38+
if(sgemm_Col_NT_B1_MX032_NX032_KX16_SINGLE_clKernel != NULL) {
39+
clReleaseKernel(sgemm_Col_NT_B1_MX032_NX032_KX16_SINGLE_clKernel);
40+
sgemm_Col_NT_B1_MX032_NX032_KX16_SINGLE_clKernel = NULL;
41+
}
42+
if(sgemm_Col_NN_B1_MX032_NX032_KX16_BRANCH_clKernel != NULL) {
43+
clReleaseKernel(sgemm_Col_NN_B1_MX032_NX032_KX16_BRANCH_clKernel);
44+
sgemm_Col_NN_B1_MX032_NX032_KX16_BRANCH_clKernel = NULL;
45+
}
46+
if(sgemm_Col_NT_B1_MX032_NX032_KX16_BRANCH_clKernel != NULL) {
47+
clReleaseKernel(sgemm_Col_NT_B1_MX032_NX032_KX16_BRANCH_clKernel);
48+
sgemm_Col_NT_B1_MX032_NX032_KX16_BRANCH_clKernel = NULL;
49+
}
50+
if(sgemm_Col_TN_B1_MX032_NX032_KX16_BRANCH_clKernel != NULL) {
51+
clReleaseKernel(sgemm_Col_TN_B1_MX032_NX032_KX16_BRANCH_clKernel);
52+
sgemm_Col_TN_B1_MX032_NX032_KX16_BRANCH_clKernel = NULL;
53+
}
54+
}

src/library/blas/AutoGemm/UserGemmKernelSources/UserGemmClKernels.h

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,24 @@
88
#include <CL/cl.h>
99
#endif
1010

11-
static cl_kernel sgemm_Col_NT_B1_MX128_NX128_KX16_clKernel = NULL;
11+
extern cl_kernel sgemm_Col_NT_B1_MX128_NX128_KX16_clKernel;
1212

13-
static cl_kernel sgemm_Col_NT_B1_MX032_NX064_KX16_ROW_clKernel = NULL;
14-
static cl_kernel sgemm_Col_NT_B1_MX064_NX032_KX16_COLUMN_clKernel = NULL;
15-
static cl_kernel sgemm_Col_NT_B1_MX032_NX032_KX16_SINGLE_clKernel = NULL;
13+
extern cl_kernel sgemm_Col_NT_B1_MX032_NX064_KX16_ROW_clKernel;
14+
extern cl_kernel sgemm_Col_NT_B1_MX064_NX032_KX16_COLUMN_clKernel;
15+
extern cl_kernel sgemm_Col_NT_B1_MX032_NX032_KX16_SINGLE_clKernel;
1616

17-
static cl_kernel sgemm_Col_NN_B1_MX032_NX032_KX16_BRANCH_clKernel = NULL;
18-
static cl_kernel sgemm_Col_NT_B1_MX032_NX032_KX16_BRANCH_clKernel = NULL;
19-
static cl_kernel sgemm_Col_TN_B1_MX032_NX032_KX16_BRANCH_clKernel = NULL;
17+
extern cl_kernel sgemm_Col_NN_B1_MX032_NX032_KX16_BRANCH_clKernel;
18+
extern cl_kernel sgemm_Col_NT_B1_MX032_NX032_KX16_BRANCH_clKernel;
19+
extern cl_kernel sgemm_Col_TN_B1_MX032_NX032_KX16_BRANCH_clKernel;
2020

2121
static const int user_kernel_count = 7;
2222

23+
#ifdef __cplusplus
24+
extern "C" {
25+
#endif
26+
void initUserGemmClKernels(void);
27+
#ifdef __cplusplus
28+
}
29+
#endif
30+
2331
#endif

src/library/blas/AutoGemm/UserGemmKernelSources/UserGemmKernelSourceIncludes.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@
99

1010
//**** compiler flags
1111
//**** online compilation flags
12-
const char * const User_srcBuildOptions = "-cl-std=CL2.0";
13-
const char * const User_binBuildOptions = "-cl-std=CL2.0";
12+
const char * const User_srcBuildOptions = "-cl-std=CL" OPENCL_VERSION;
13+
const char * const User_binBuildOptions = "-cl-std=CL" OPENCL_VERSION;
1414

1515

1616
extern const unsigned int sgemm_Col_NT_B1_MX032_NX064_KX16_ROW_workGroupNumRows;
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
"""
2+
Run from same directory as this file is in
3+
Generates UserGemmClKernels.cc
4+
5+
Would be nice to use Jinja2 for this, but using print for now, for consistency
6+
"""
7+
8+
kernelNames = []
9+
# lets just read the kernel names from UserGemmClKernels.h:
10+
ifile = open('UserGemmClKernels.h', 'r')
11+
contents = ifile.read()
12+
for line in contents.split('\n'):
13+
if line.find('cl_kernel') < 0:
14+
continue
15+
kernelName = line.split()[2].split(';')[0] # probably not terribly un-fragile, but works for now
16+
kernelNames.append(kernelName)
17+
ifile.close()
18+
19+
ofile = open('UserGemmClKernels.cc', 'w')
20+
21+
ofile.write('// GENERATED using create_user_gemm_cl_kernels.py\n')
22+
ofile.write('\n')
23+
24+
ofile.write('#if defined( __APPLE__ ) || defined( __MACOSX )\n')
25+
ofile.write('#include <OpenCL/cl.h>\n')
26+
ofile.write('#else\n')
27+
ofile.write('#include <CL/cl.h>\n')
28+
ofile.write('#endif\n')
29+
ofile.write('\n')
30+
31+
for kernelName in kernelNames:
32+
ofile.write('cl_kernel %s = NULL;\n' % kernelName)
33+
ofile.write('\n')
34+
35+
ofile.write('void initUserGemmClKernels(void) {\n')
36+
37+
for kernelName in kernelNames:
38+
ofile.write(' if(%s != NULL) {\n' % kernelName)
39+
ofile.write(' clReleaseKernel(%s);\n' % kernelName)
40+
ofile.write(' %s = NULL;\n' % kernelName)
41+
ofile.write(' }\n')
42+
43+
ofile.write('}\n')
44+
ofile.close()

src/library/blas/AutoGemm/UserGemmKernelSources/dgemm_Col_NN_B0_MX048_NX048_KX08_src.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ __kernel void dgemm_Col_NN_B0_MX048_NX048_KX08 (
9696
C += offsetC;
9797

9898

99-
double rC[6][6] = {(double)0};
99+
double rC[6][6] = { {(double)0} };
100100
double rA[6];
101101
double rB[6];
102102
__local double lA[392];

src/library/blas/AutoGemm/UserGemmKernelSources/dgemm_Col_NN_B1_MX048_NX048_KX08_src.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ __kernel void dgemm_Col_NN_B1_MX048_NX048_KX08 (
9494
C += offsetC;
9595

9696

97-
double rC[6][6] = {(double)0};
97+
double rC[6][6] = { {(double)0} };
9898
double rA[6];
9999
double rB[6];
100100

src/library/blas/AutoGemm/UserGemmKernelSources/dgemm_Col_NT_B0_MX048_NX048_KX08_src.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ const char * const dgemm_Col_NT_B0_MX048_NX048_KX08_src = STRINGIFY(
108108
\n C += offsetC;
109109
\n
110110
\n
111-
\n double rC[6][6] = {(double)0};
111+
\n double rC[6][6] = { {(double)0} };
112112
\n double rA[6];
113113
\n double rB[6];
114114
\n

src/library/blas/AutoGemm/UserGemmKernelSources/dgemm_Col_NT_B1_MX048_NX048_KX08_src.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ const char * const dgemm_Col_NT_B1_MX048_NX048_KX08_src = STRINGIFY(
107107
\n C += offsetC;
108108
\n
109109
\n
110-
\n double rC[6][6] = {(double)0};
110+
\n double rC[6][6] = { {(double)0} };
111111
\n double rA[6];
112112
\n double rB[6];
113113
\n

src/library/blas/AutoGemm/UserGemmKernelSources/dgemm_Col_TN_B0_MX048_NX048_KX08_src.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ __kernel void dgemm_Col_TN_B0_MX048_NX048_KX08_src (
3636
uint const offsetB,
3737
uint const offsetC )
3838
{
39-
double rC[6][6] = {(double)0};
39+
double rC[6][6] = { {(double)0} };
4040
double rA[1][6];
4141
double rB[1][6];
4242

0 commit comments

Comments
 (0)