Skip to content

Commit c4e7964

Browse files
author
Timmy
committed
bug fix
1 parent d6e6a78 commit c4e7964

File tree

1 file changed

+18
-12
lines changed

1 file changed

+18
-12
lines changed

src/library/blas/xtrsm.cc

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,6 @@ static void makeKernel(
165165
CL_CHECK(err)
166166
}
167167
else {
168-
//std::cout << kernelSource << std::endl;
169168
clProgram = clCreateProgramWithSource(
170169
clContext,
171170
1, &kernelSource,
@@ -749,7 +748,6 @@ cl_int diag_dtrtri128(
749748
int outer_block_size,
750749
cl_event *event)
751750
{
752-
std::cout << "enter diag_dtrtri128 " << std::endl;
753751
const char *diag_dtrtri_kernel_upper_KernelSource = NULL;
754752
cl_kernel *diag_dtrtri_kernel_upper_ClKernel = NULL;
755753
size_t diag_dtrtri_kernel_upper_KernelBinarySize = 0;
@@ -871,6 +869,9 @@ cl_int diag_dtrtri128(
871869
err = clEnqueueNDRangeKernel(queue, *diag_dtrtri_kernel_upper_ClKernel, 1, NULL,
872870
globalThreads, globalLocal,
873871
0, NULL, NULL);
872+
CL_CHECK(err);
873+
//err = clFinish(queue);
874+
//CL_CHECK(err);
874875

875876
if (err != CL_SUCCESS) {
876877
//printf( "kernel -diag_dtrtri_kernel_upper- failed with %d\n", err );
@@ -899,7 +900,8 @@ cl_int diag_dtrtri128(
899900
queue,
900901
A, offA, d_dinvA, i, lda, M, event);
901902
CL_CHECK(err);
902-
903+
//err = clFinish(queue);
904+
//CL_CHECK(err);
903905
break;
904906

905907
case 32:
@@ -915,6 +917,8 @@ cl_int diag_dtrtri128(
915917
queue,
916918
A, offA, d_dinvA, i, lda, M, event);
917919
CL_CHECK(err);
920+
//err = clFinish(queue);
921+
//CL_CHECK(err);
918922
err = call_kernel_triple_update128(&triple_dgemm_update_128_32_PART2_R_clKernel,
919923
triple_dgemm_update_128_32_PART2_R_src,
920924
TrtriBuildOptions,
@@ -924,6 +928,8 @@ cl_int diag_dtrtri128(
924928
queue,
925929
A, offA, d_dinvA, i, lda, M, event);
926930
CL_CHECK(err);
931+
//err = clFinish(queue);
932+
//CL_CHECK(err);
927933

928934
break;
929935

@@ -940,6 +946,8 @@ cl_int diag_dtrtri128(
940946
queue,
941947
A, offA, d_dinvA, i, lda, M, event);
942948
CL_CHECK(err);
949+
//err = clFinish(queue);
950+
//CL_CHECK(err);
943951

944952
err = call_kernel_triple_update128(&triple_dgemm_update_128_64_PART2_R_clKernel,
945953
triple_dgemm_update_128_64_PART2_R_src,
@@ -950,6 +958,8 @@ cl_int diag_dtrtri128(
950958
queue,
951959
A, offA, d_dinvA, i, lda, M, event);
952960
CL_CHECK(err);
961+
//err = clFinish(queue);
962+
//CL_CHECK(err);
953963

954964
break;
955965

@@ -1020,8 +1030,6 @@ static clblasStatus gpu_dtrsm128(
10201030
{
10211031
if (order != clblasColumnMajor)
10221032
return clblasNotImplemented;
1023-
if (M < 16 || N < 16)
1024-
return clblasNotImplemented;
10251033

10261034
//for now
10271035
if (side == clblasRight)
@@ -1076,7 +1084,7 @@ static clblasStatus gpu_dtrsm128(
10761084
err = clearBuffer(commandQueues[0], InvA, size_InvA);
10771085
CL_CHECK(err);
10781086

1079-
err = diag_dtrtri128(commandQueues[0], N, uplo, diag, A, offA, InvA, ldA, inner_block_size, outer_block_size, events);
1087+
err = diag_dtrtri128(commandQueues[0], M, uplo, diag, A, offA, InvA, ldA, inner_block_size, outer_block_size, events);
10801088
CL_CHECK(err);
10811089

10821090
//
@@ -1134,23 +1142,22 @@ static clblasStatus gpu_dtrsm128(
11341142
{
11351143
/* the upper case */
11361144
/* handle the first block seperately with alpha */
1137-
std::cout << "dtrtri trsm " << std::endl;
11381145
int mm = (M % outer_block_size == 0) ? outer_block_size : (M % outer_block_size);
11391146
i = M - mm;
1140-
//DGEMM_LEFT(mm, N, mm, alpha, _(InvA, 0, i), _(B, i, 0), zero, _(X, i, 0));
1147+
DGEMM_LEFT(mm, N, mm, alpha, _(InvA, 0, i), _(B, i, 0), zero, _(X, i, 0));
11411148

11421149
if (i - outer_block_size >= 0)
11431150
{
1144-
//DGEMM_LEFT(i, N, mm, neg_one, _(A, 0, i), _(X, i, 0), alpha, _(B, 0, 0));
1151+
DGEMM_LEFT(i, N, mm, neg_one, _(A, 0, i), _(X, i, 0), alpha, _(B, 0, 0));
11451152

11461153
/* the rest blocks */
11471154
for (i = M - mm - outer_block_size; i >= 0; i -= outer_block_size) {
1148-
//DGEMM_LEFT(outer_block_size, N, outer_block_size, one, _(InvA, 0, i), _(B, i, 0), zero, _(X, i, 0));
1155+
DGEMM_LEFT(outer_block_size, N, outer_block_size, one, _(InvA, 0, i), _(B, i, 0), zero, _(X, i, 0));
11491156

11501157
if (i - outer_block_size < 0)
11511158
break;
11521159

1153-
//DGEMM_LEFT(i, N, outer_block_size, neg_one, _(A, 0, i), _(X, i, 0), one, _(B, 0, 0));
1160+
DGEMM_LEFT(i, N, outer_block_size, neg_one, _(A, 0, i), _(X, i, 0), one, _(B, 0, 0));
11541161
}
11551162
}
11561163
}
@@ -1191,7 +1198,6 @@ static clblasStatus gpu_dtrsm128(
11911198
{
11921199
/* the upper case */
11931200
/* handle the first block seperately with alpha */
1194-
std::cout << "dtrtri trsm " << std::endl;
11951201
int mm = min(outer_block_size, (int)M);
11961202
DGEMM_LEFT(mm, N, mm, alpha, _(InvA, 0, 0), _(B, 0, 0), zero, _(X, 0, 0));
11971203

0 commit comments

Comments
 (0)