Skip to content

Commit 87bab9e

Browse files
committed
Merge pull request #214 from TimmyLiu/develop
fix some exception hanlers. now test-functional all pass
2 parents aa637b6 + 6041a3a commit 87bab9e

File tree

3 files changed

+124
-14
lines changed

3 files changed

+124
-14
lines changed

src/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ cmake_minimum_required(VERSION 2.8)
1818

1919
#User toggle-able options that can be changed on the command line with -D
2020
option( BUILD_RUNTIME "Build the BLAS runtime library" ON )
21-
option( BUILD_TEST "Build the library testing suite (dependency on google test, Boost, and ACML)" ON )
21+
option( BUILD_TEST "Build the library testing suite (dependency on google test, Boost, and ACML/NETLIB BLAS)" ON )
2222
option( BUILD_PERFORMANCE "Copy the performance scripts that can measure and graph performance" OFF )
2323
option( BUILD_SAMPLE "Build the sample programs" OFF )
2424
option( BUILD_CLIENT "Build a command line clBLAS client program with a variety of configurable parameters (dependency on Boost)" OFF )

src/library/blas/xgemm.cc

Lines changed: 121 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,9 @@ static void force_gemm_column_major(
6161
printf("OpenCL error %i on line %u of %s\n", RET, __LINE__, __FILE__); \
6262
assert(false); \
6363
}
64+
#define returnIfErr(err) \
65+
if (err != CL_SUCCESS)\
66+
return static_cast<clblasStatus>(err);
6467

6568
const static unsigned int numGemmKernelArgs = 14;
6669
void *gemmKernelArgs[numGemmKernelArgs];
@@ -258,7 +261,7 @@ void makeGemmKernel(
258261
/******************************************************************************
259262
* Enqueue Gemm Kernel
260263
*****************************************************************************/
261-
void enqueueGemmKernel(
264+
cl_int enqueueGemmKernel(
262265
cl_command_queue clQueue,
263266
cl_kernel clKernel,
264267
void **kernelArgs,
@@ -271,14 +274,20 @@ void makeGemmKernel(
271274
cl_event *clEvent)
272275
{
273276
for (unsigned int i = 0; i < numKernelArgs; i++) {
274-
CL_CHECK( clSetKernelArg( clKernel, i, kernelArgSizes[i], kernelArgs[i]) )
277+
cl_int err = clSetKernelArg(clKernel, i, kernelArgSizes[i], kernelArgs[i]);
278+
if (err != CL_SUCCESS)
279+
return err;
275280
}
276281
/*printf("global={%llu, %llu} local={%llu, %llu}\n",
277282
globalWorkSize[0], globalWorkSize[1],
278283
localWorkSize[0], localWorkSize[1] );*/
279-
CL_CHECK( clEnqueueNDRangeKernel( clQueue, clKernel,
280-
2, NULL, globalWorkSize, localWorkSize,
281-
numEventsInWaitList, eventWaitList, clEvent ) )
284+
cl_uint err = clEnqueueNDRangeKernel(clQueue, clKernel,
285+
2, NULL, globalWorkSize, localWorkSize,
286+
numEventsInWaitList, eventWaitList, clEvent);
287+
if (err != CL_SUCCESS)
288+
return err;
289+
290+
return CL_SUCCESS;
282291
}
283292

284293

@@ -325,6 +334,8 @@ clblasGemm(
325334
const cl_event *eventWaitList,
326335
cl_event *events)
327336
{
337+
338+
328339
// cast types to opencl types
329340
cl_mem A = iA;
330341
cl_mem B = iB;
@@ -389,10 +400,13 @@ clblasGemm(
389400
cl_int err;
390401
cl_device_id clDevice;
391402
err = clGetCommandQueueInfo( commandQueues[0], CL_QUEUE_DEVICE, sizeof(clDevice), &clDevice, NULL);
392-
CL_CHECK(err)
403+
//CL_CHECK(err)
404+
returnIfErr(err);
405+
393406
cl_uint clDeviceNumCUs;
394407
err = clGetDeviceInfo( clDevice, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(clDeviceNumCUs), &clDeviceNumCUs, NULL);
395-
CL_CHECK(err)
408+
//CL_CHECK(err)
409+
returnIfErr(err);
396410
unsigned int deviceIdealNumThreads = (8 /*waves per CU*/)*(64 /*threads per wave*/)*clDeviceNumCUs;
397411
float optimalNumElementsPerThread = ((float)M*N) / deviceIdealNumThreads;
398412
//optimalNumElementsPerThread = 32;
@@ -562,11 +576,12 @@ clblasGemm(
562576
if (needTileKernel) {
563577
//printf("enqueueing tile kernel\n");
564578
size_t globalWorkSize[2] = {(M/macroTileNumRows)*workGroupNumRows, (N/macroTileNumCols)*workGroupNumCols };
565-
enqueueGemmKernel( commandQueues[numKernelsEnqueued%numCommandQueues], *tileClKernel,
579+
err = enqueueGemmKernel( commandQueues[numKernelsEnqueued%numCommandQueues], *tileClKernel,
566580
gemmKernelArgs, gemmKernelArgSizes, numGemmKernelArgs,
567581
globalWorkSize, localWorkSize,
568582
numEventsInWaitList, eventWaitList,
569583
&events[numKernelsEnqueued%numCommandQueues] );
584+
returnIfErr(err);
570585
numKernelsEnqueued++;
571586
}
572587

@@ -576,11 +591,12 @@ clblasGemm(
576591
if (needRowKernel) {
577592
//printf("enqueueing row kernel\n");
578593
size_t globalWorkSize[2] = {1*workGroupNumRows, (N/macroTileNumCols)*workGroupNumCols };
579-
enqueueGemmKernel( commandQueues[numKernelsEnqueued%numCommandQueues], *rowClKernel,
594+
err = enqueueGemmKernel( commandQueues[numKernelsEnqueued%numCommandQueues], *rowClKernel,
580595
gemmKernelArgs, gemmKernelArgSizes, numGemmKernelArgs,
581596
globalWorkSize, localWorkSize,
582597
numEventsInWaitList, eventWaitList,
583598
&events[numKernelsEnqueued%numCommandQueues] );
599+
returnIfErr(err);
584600
numKernelsEnqueued++;
585601
}
586602

@@ -590,11 +606,12 @@ clblasGemm(
590606
if (needColKernel) {
591607
//printf("enqueueing col kernel\n");
592608
size_t globalWorkSize[2] = { (M/macroTileNumRows)*workGroupNumRows, 1*workGroupNumCols };
593-
enqueueGemmKernel( commandQueues[numKernelsEnqueued%numCommandQueues], *colClKernel,
609+
err = enqueueGemmKernel( commandQueues[numKernelsEnqueued%numCommandQueues], *colClKernel,
594610
gemmKernelArgs, gemmKernelArgSizes, numGemmKernelArgs,
595611
globalWorkSize, localWorkSize,
596612
numEventsInWaitList, eventWaitList,
597613
&events[numKernelsEnqueued%numCommandQueues] );
614+
returnIfErr(err);
598615
numKernelsEnqueued++;
599616
}
600617

@@ -604,11 +621,12 @@ clblasGemm(
604621
if (needCornerKernel) {
605622
//printf("enqueueing corner kernel\n");
606623
size_t globalWorkSize[2] = { 1*workGroupNumRows, 1*workGroupNumCols };
607-
enqueueGemmKernel( commandQueues[numKernelsEnqueued%numCommandQueues], *cornerClKernel,
624+
err = enqueueGemmKernel( commandQueues[numKernelsEnqueued%numCommandQueues], *cornerClKernel,
608625
gemmKernelArgs, gemmKernelArgSizes, numGemmKernelArgs,
609626
globalWorkSize, localWorkSize,
610627
numEventsInWaitList, eventWaitList,
611628
&events[numKernelsEnqueued%numCommandQueues] );
629+
returnIfErr(err);
612630
numKernelsEnqueued++;
613631
}
614632

@@ -637,6 +655,29 @@ clblasSgemm(
637655
const cl_event *eventWaitList,
638656
cl_event *events)
639657
{
658+
// check if memory objects are valid
659+
clblasStatus clblasErr = clblasSuccess;
660+
clblasErr = checkMemObjects(A, B, C, true, A_MAT_ERRSET, B_MAT_ERRSET, C_MAT_ERRSET);
661+
if (clblasErr != clblasSuccess)
662+
return clblasErr;
663+
664+
if (K != 0)
665+
{
666+
//check matrix A
667+
clblasErr = checkMatrixSizes(TYPE_FLOAT, order, transA, M, K, A, offA, lda, A_MAT_ERRSET);
668+
if (clblasErr != clblasSuccess)
669+
return clblasErr;
670+
671+
//check matrix B
672+
clblasErr = checkMatrixSizes(TYPE_FLOAT, order, transB, K, N, B, offB, ldb, B_MAT_ERRSET);
673+
if (clblasErr != clblasSuccess)
674+
return clblasErr;
675+
}
676+
//check matrix C
677+
clblasErr = checkMatrixSizes(TYPE_FLOAT, order, clblasNoTrans, M, N, C, offC, ldc, C_MAT_ERRSET);
678+
if (clblasErr != clblasSuccess)
679+
return clblasErr;
680+
640681
return clblasGemm(
641682
order,
642683
transA,
@@ -674,6 +715,29 @@ clblasDgemm( clblasOrder order,
674715
const cl_event *eventWaitList,
675716
cl_event *events)
676717
{
718+
// check if memory objects are valid
719+
clblasStatus clblasErr = clblasSuccess;
720+
clblasErr = checkMemObjects(A, B, C, true, A_MAT_ERRSET, B_MAT_ERRSET, C_MAT_ERRSET);
721+
if (clblasErr != clblasSuccess)
722+
return clblasErr;
723+
724+
if (K != 0)
725+
{
726+
//check matrix A
727+
clblasErr = checkMatrixSizes(TYPE_DOUBLE, order, transA, M, K, A, offA, lda, A_MAT_ERRSET);
728+
if (clblasErr != clblasSuccess)
729+
return clblasErr;
730+
731+
//check matrix B
732+
clblasErr = checkMatrixSizes(TYPE_DOUBLE, order, transB, K, N, B, offB, ldb, B_MAT_ERRSET);
733+
if (clblasErr != clblasSuccess)
734+
return clblasErr;
735+
}
736+
//check matrix C
737+
clblasErr = checkMatrixSizes(TYPE_DOUBLE, order, clblasNoTrans, M, N, C, offC, ldc, C_MAT_ERRSET);
738+
if (clblasErr != clblasSuccess)
739+
return clblasErr;
740+
677741
return clblasGemm(
678742
order,
679743
transA,
@@ -712,6 +776,29 @@ clblasCgemm(
712776
const cl_event *eventWaitList,
713777
cl_event *events)
714778
{
779+
// check if memory objects are valid
780+
clblasStatus clblasErr = clblasSuccess;
781+
clblasErr = checkMemObjects(A, B, C, true, A_MAT_ERRSET, B_MAT_ERRSET, C_MAT_ERRSET);
782+
if (clblasErr != clblasSuccess)
783+
return clblasErr;
784+
785+
if (K != 0)
786+
{
787+
//check matrix A
788+
clblasErr = checkMatrixSizes(TYPE_COMPLEX_FLOAT, order, transA, M, K, A, offA, lda, A_MAT_ERRSET);
789+
if (clblasErr != clblasSuccess)
790+
return clblasErr;
791+
792+
//check matrix B
793+
clblasErr = checkMatrixSizes(TYPE_COMPLEX_FLOAT, order, transB, K, N, B, offB, ldb, B_MAT_ERRSET);
794+
if (clblasErr != clblasSuccess)
795+
return clblasErr;
796+
}
797+
//check matrix C
798+
clblasErr = checkMatrixSizes(TYPE_COMPLEX_FLOAT, order, clblasNoTrans, M, N, C, offC, ldc, C_MAT_ERRSET);
799+
if (clblasErr != clblasSuccess)
800+
return clblasErr;
801+
715802
return clblasGemm(
716803
order,
717804
transA,
@@ -750,6 +837,29 @@ clblasZgemm(
750837
const cl_event *eventWaitList,
751838
cl_event *events)
752839
{
840+
// check if memory objects are valid
841+
clblasStatus clblasErr = clblasSuccess;
842+
clblasErr = checkMemObjects(A, B, C, true, A_MAT_ERRSET, B_MAT_ERRSET, C_MAT_ERRSET);
843+
if (clblasErr != clblasSuccess)
844+
return clblasErr;
845+
846+
if (K != 0)
847+
{
848+
//check matrix A
849+
clblasErr = checkMatrixSizes(TYPE_COMPLEX_DOUBLE, order, transA, M, K, A, offA, lda, A_MAT_ERRSET);
850+
if (clblasErr != clblasSuccess)
851+
return clblasErr;
852+
853+
//check matrix B
854+
clblasErr = checkMatrixSizes(TYPE_COMPLEX_DOUBLE, order, transB, K, N, B, offB, ldb, B_MAT_ERRSET);
855+
if (clblasErr != clblasSuccess)
856+
return clblasErr;
857+
}
858+
//check matrix C
859+
clblasErr = checkMatrixSizes(TYPE_COMPLEX_DOUBLE, order, clblasNoTrans, M, N, C, offC, ldc, C_MAT_ERRSET);
860+
if (clblasErr != clblasSuccess)
861+
return clblasErr;
862+
753863
return clblasGemm(
754864
order,
755865
transA,

src/tests/functional/func-error.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -272,7 +272,7 @@ TEST(ERROR, InvalidMemObjectsymm) {
272272
}
273273
TEST(ERROR, InvalidValuesymm) {
274274
ErrorClass<SymmMetod<FloatComplex> > ec;
275-
ec.error(clblasInsufficientMemMatB);
275+
ec.error(clblasInsufficientMemMatA);
276276
}
277277

278278
TEST(ERROR, InvalidDevicesymm) {
@@ -512,7 +512,7 @@ TEST(ERROR, InvalidMemObjecthemm) {
512512
TEST(ERROR, InvalidValuehemm) {
513513

514514
ErrorClass<HemmMetod<DoubleComplex> > ec;
515-
ec.error(clblasInsufficientMemMatB);
515+
ec.error(clblasInsufficientMemMatA);
516516
}
517517

518518
TEST(ERROR, InvalidDevicehemm) {

0 commit comments

Comments
 (0)