Skip to content

Commit f9a2250

Browse files
author
Timmy
committed
Merge pull request #111 from guacamoleo/develop
enables apiCallCount for zgemm within client and an zgemm bug fix
2 parents 6f476b8 + 03ae187 commit f9a2250

File tree

3 files changed

+23
-12
lines changed

3 files changed

+23
-12
lines changed

src/client/clfunc_xgemm.hpp

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1013,12 +1013,15 @@ void
10131013
xGemm<cl_double>::
10141014
xGemm_Function(bool flush, cl_uint apiCallCount )
10151015
{
1016-
clblasDgemm(order_, buffer_.trans_a_, buffer_.trans_b_,
1016+
for (int i = 0; i < apiCallCount; i++)
1017+
{
1018+
clblasDgemm(order_, buffer_.trans_a_, buffer_.trans_b_,
10171019
buffer_.m_, buffer_.n_, buffer_.k_, buffer_.alpha_,
10181020
buffer_.buf_a_, buffer_.offA_, buffer_.lda_,
10191021
buffer_.buf_b_, buffer_.offB_, buffer_.ldb_,
10201022
buffer_.beta_, buffer_.buf_c_, buffer_.offC_,
10211023
buffer_.ldc_, 1, &queue_, 0, NULL, &event_);
1024+
}
10221025
//flush==true if only the kernel time (library call) is timed
10231026
//flush==false if memory time is also timed
10241027
if (flush==true)
@@ -1032,12 +1035,15 @@ void
10321035
xGemm<cl_float2>::
10331036
xGemm_Function(bool flush, cl_uint apiCallCount )
10341037
{
1035-
clblasCgemm(order_, buffer_.trans_a_, buffer_.trans_b_,
1038+
for (int i = 0; i < apiCallCount; i++)
1039+
{
1040+
clblasCgemm(order_, buffer_.trans_a_, buffer_.trans_b_,
10361041
buffer_.m_, buffer_.n_, buffer_.k_, buffer_.alpha_,
10371042
buffer_.buf_a_, buffer_.offA_, buffer_.lda_,
10381043
buffer_.buf_b_, buffer_.offB_, buffer_.ldb_,
10391044
buffer_.beta_, buffer_.buf_c_, buffer_.offC_,
10401045
buffer_.ldc_, 1, &queue_, 0, NULL, &event_);
1046+
}
10411047
//flush==true if only the kernel time (library call) is timed
10421048
//flush==false if memory time is also timed
10431049
if (flush==true)
@@ -1051,12 +1057,15 @@ void
10511057
xGemm<cl_double2>::
10521058
xGemm_Function(bool flush, cl_uint apiCallCount )
10531059
{
1054-
clblasZgemm(order_, buffer_.trans_a_, buffer_.trans_b_,
1060+
for (int i = 0; i < apiCallCount; i++)
1061+
{
1062+
clblasZgemm(order_, buffer_.trans_a_, buffer_.trans_b_,
10551063
buffer_.m_, buffer_.n_, buffer_.k_, buffer_.alpha_,
10561064
buffer_.buf_a_, buffer_.offA_, buffer_.lda_,
10571065
buffer_.buf_b_, buffer_.offB_, buffer_.ldb_,
10581066
buffer_.beta_, buffer_.buf_c_, buffer_.offC_,
10591067
buffer_.ldc_, 1, &queue_, 0, NULL, &event_);
1068+
}
10601069
//flush==true if only the kernel time (library call) is timed
10611070
//flush==false if memory time is also timed
10621071
if (flush==true)
@@ -1070,15 +1079,15 @@ double
10701079
xGemm<cl_float2>::
10711080
gflops()
10721081
{
1073-
return (8.0*buffer_.m_*buffer_.n_*buffer_.k_)/time_in_ns();
1082+
return (8.0*buffer_.m_*buffer_.n_*buffer_.k_)/(time_in_ns() / buffer_.apiCallCount);
10741083
}
10751084

10761085
template<>
10771086
double
10781087
xGemm<cl_double2>::
10791088
gflops()
10801089
{
1081-
return (8.0*buffer_.m_*buffer_.n_*buffer_.k_)/time_in_ns();
1090+
return (8.0*buffer_.m_*buffer_.n_*buffer_.k_)/(time_in_ns() / buffer_.apiCallCount);
10821091
}
10831092

10841093
template<>

src/library/blas/functor/hawaii.cc

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -168,16 +168,13 @@ clblasSgemmFunctor * FunctorSelectorHawaii::select_sgemm_specific(clblasSgemmFun
168168
clblasZgemmFunctor * FunctorSelectorHawaii::select_zgemm_specific(clblasZgemmFunctor::Args & args)
169169
{
170170

171-
//TODO: the logic below is complicated; Needs cleanup;
172-
clblasZgemmFunctor * functor;
173-
174171
if ( args.M%32==0
175172
&& args.N%64==0
176173
&& args.K%8==0
177174
&& args.transA==clblasNoTrans
178175
&& args.transB==clblasTrans
179176
&& args.order==clblasColumnMajor) {
180-
functor = clblasZgemmFunctorGCN::provide(args, "Hawaii");
177+
return clblasZgemmFunctorGCN::provide(args, "Hawaii");
181178
} else {
182179
return this->clblasFunctorSelector::select_zgemm_specific(args);
183180
}

src/library/blas/gens/clTemplates/zgemm_gcn.cl

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -200,10 +200,15 @@ __kernel void KERNEL_NAME(DATA_TYPE_CHAR,TRANSPOSE_A,TRANSPOSE_B,MACRO_TILE_NUM_
200200
uint const lda,
201201
uint const ldb,
202202
uint const ldc,
203-
uint const offA,
204-
uint const offB,
205-
uint const offC )
203+
uint const offsetA,
204+
uint const offsetB,
205+
uint const offsetC )
206206
{
207+
// apply offsets
208+
A += offsetA;
209+
B += offsetB;
210+
C += offsetC;
211+
207212
// registers
208213
DATA_TYPE_STR rC[MICRO_TILE_NUM_ROWS][MICRO_TILE_NUM_COLS] = {0};
209214
DATA_TYPE_STR rA[MICRO_TILE_NUM_ROWS];

0 commit comments

Comments
 (0)