Skip to content

Commit 00a29c6

Browse files
tingxingdongKent Knox
authored andcommitted
allow users to easily verify the gemm/trmm GPU results with the netlib cblas through client (#274)
* (1)update readme: netlib is preferred.(2)now you can verify the correctness of gemm&trmm through client * give more details of how to get CBLAS on windows * find the netlib library dir & library in Cmake files * forget to add this file * disable the validation on windows currently: no easy solution of building/linking netlib CBLAS on windows
1 parent d20977e commit 00a29c6

File tree

8 files changed

+1130
-822
lines changed

8 files changed

+1130
-822
lines changed

README.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -197,8 +197,12 @@ The simple example below shows how to use clBLAS to compute an OpenCL accelerate
197197
198198
### Test infrastructure
199199
* Googletest v1.6
200-
* ACML on windows/linux; Accelerate on Mac OSX
201200
* Latest Boost
201+
* CPU BLAS
202+
- Netlib CBLAS (recommended)
203+
Ubuntu: install by "apt-get install libblas-dev"
204+
Windows: download & install lapack-3.6.0 which comes with CBLAS
205+
- or ACML on windows/linux; Accelerate on Mac OSX
202206
203207
### Performance infrastructure
204208
* Python

src/CMakeLists.txt

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,14 @@ if( BUILD_TEST )
265265
endif( )
266266
endif( )
267267

268+
if( BUILD_CLIENT )
269+
if( NETLIB_FOUND )
270+
else( )
271+
message( WARNING "Not find Netlib; BUILD_CLIENT needs the Netlib CBLAS library" )
272+
endif()
273+
endif()
274+
275+
268276
# This will define OPENCL_FOUND
269277
find_package( OpenCL ${OPENCL_VERSION} )
270278

src/FindNetlib.cmake

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,25 @@ if( NOT contains_BLAS EQUAL -1 )
100100
FIND_PACKAGE_HANDLE_STANDARD_ARGS( NETLIB DEFAULT_MSG Netlib_BLAS_LIBRARY )
101101
endif( )
102102

103+
104+
#look for netlib cblas header
105+
if( UNIX )
106+
find_path(Netlib_INCLUDE_DIRS cblas.h
107+
HINTS
108+
/usr/include
109+
)
110+
else()
111+
find_path(Netlib_INCLUDE_DIRS cblas.h
112+
HINTS
113+
${Netlib_ROOT}/CBLAS/include/
114+
)
115+
endif()
116+
117+
if( Netlib_INCLUDE_DIRS )
118+
else()
119+
message(WARNING "Cannot find cblas.h")
120+
endif()
121+
103122
if( NETLIB_FOUND )
104123
list( APPEND Netlib_LIBRARIES ${Netlib_BLAS_LIBRARY} )
105124
else( )

src/client/CMakeLists.txt

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
# ########################################################################
22
# Copyright 2013 Advanced Micro Devices, Inc.
3-
#
3+
#
44
# Licensed under the Apache License, Version 2.0 (the "License");
55
# you may not use this file except in compliance with the License.
66
# You may obtain a copy of the License at
7-
#
7+
#
88
# http://www.apache.org/licenses/LICENSE-2.0
9-
#
9+
#
1010
# Unless required by applicable law or agreed to in writing, software
1111
# distributed under the License is distributed on an "AS IS" BASIS,
1212
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -48,10 +48,11 @@ include_directories(
4848
${clBLAS_SOURCE_DIR}
4949
${clBLAS_SOURCE_DIR}/include
5050
${clBLAS_SOURCE_DIR}/tests/include
51+
${Netlib_INCLUDE_DIRS}
5152
.)
5253

5354
add_executable(client ${CLIENT_SRC} ${CLIENT_HEADER})
54-
target_link_libraries(client ${Boost_LIBRARIES} ${OPENCL_LIBRARIES} clBLAS)
55+
target_link_libraries(client ${Netlib_LIBRARIES} ${Boost_LIBRARIES} ${OPENCL_LIBRARIES} clBLAS)
5556
set_target_properties( client PROPERTIES
5657
RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/staging"
5758
OUTPUT_NAME clBLAS-client )

src/client/clfunc_common.hpp

Lines changed: 78 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,11 @@
2727
#include "test-limits.h"
2828
#include "dis_warning.h"
2929

30+
#if defined ( _WIN32 ) || defined ( _WIN64 )
31+
#else
32+
#include "cblas.h"
33+
#endif
34+
3035
#include "clBLAS.h"
3136
#if defined(__APPLE__) || defined(__MACOSX)
3237
#include <OpenCL/cl_ext.h>
@@ -77,6 +82,57 @@ randomScale()
7782
return t;
7883
}
7984

85+
#if defined ( _WIN32 ) || defined ( _WIN64 )
86+
#else
87+
88+
CBLAS_ORDER
89+
clblasToCblas_order(clblasOrder value)
90+
{
91+
switch (value) {
92+
case clblasRowMajor: return CblasRowMajor;
93+
case clblasColumnMajor: return CblasColMajor;
94+
}
95+
}
96+
97+
CBLAS_TRANSPOSE
98+
clblasToCblas_operation(clblasTranspose value)
99+
{
100+
switch (value) {
101+
case clblasNoTrans: return CblasNoTrans;
102+
case clblasTrans: return CblasTrans;
103+
case clblasConjTrans: return CblasConjTrans;
104+
}
105+
}
106+
107+
CBLAS_UPLO
108+
clblasToCblas_fill(clblasUplo value)
109+
{
110+
switch (value) {
111+
case clblasUpper: return CblasUpper;
112+
case clblasLower: return CblasLower;
113+
}
114+
}
115+
116+
CBLAS_SIDE
117+
clblasToCblas_side(clblasSide value)
118+
{
119+
switch (value) {
120+
case clblasLeft: return CblasLeft;
121+
case clblasRight: return CblasRight;
122+
}
123+
}
124+
125+
CBLAS_DIAG
126+
clblasToCblas_diag(clblasDiag value)
127+
{
128+
switch (value) {
129+
case clblasNonUnit: return CblasNonUnit;
130+
case clblasUnit: return CblasUnit;
131+
}
132+
}
133+
134+
#endif
135+
80136
std::string
81137
prettyPrintClStatus( const cl_int& status )
82138
{
@@ -269,7 +325,7 @@ class clblasFunc
269325
virtual ~clblasFunc()
270326
{
271327
clblasTeardown();
272-
328+
273329
for (unsigned int i = 0; i < numQueues; i++) {
274330
OPENCL_V_THROW( clReleaseCommandQueue(queues_[i]), "releasing command queue" );
275331
}
@@ -278,36 +334,38 @@ class clblasFunc
278334

279335
void wait_and_check()
280336
{
281-
cl_int err;
337+
cl_int err;
282338
cl_int wait_status = clWaitForEvents(1, &event_);
283339

284340
if( wait_status != CL_SUCCESS )
285341
{
286-
if( wait_status == CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST )
287-
{
288-
clGetEventInfo( event_, CL_EVENT_COMMAND_EXECUTION_STATUS,
342+
if( wait_status == CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST )
343+
{
344+
clGetEventInfo( event_, CL_EVENT_COMMAND_EXECUTION_STATUS,
289345
sizeof(cl_int), &err, NULL );
290-
std::cout << "blas function execution status error: " << err << std::endl;
346+
std::cout << "blas function execution status error: " << err << std::endl;
291347
exit(1);
292-
}
348+
}
293349
else
294350
{
295-
std::cout << "blas function wait status error: " << wait_status << std::endl;
351+
std::cout << "blas function wait status error: " << wait_status << std::endl;
296352
exit(1);
297353
}
298354
}
299355
}
300356

301357
double time_in_ns()
302358
{
303-
StatisticalTimer& timer = StatisticalTimer::getInstance( );
359+
StatisticalTimer& timer = StatisticalTimer::getInstance( );
304360
return timer.getAverageTime( timer_id ) * 1e9;
305361
}
306362

363+
virtual void validate_with_cblas(int v) {}
364+
307365
virtual void call_func() = 0;
308366
virtual double gflops() = 0;
309367
virtual std::string gflops_formula() = 0;
310-
virtual void setup_apiCallCount(cl_uint apiCallCount){}
368+
virtual void setup_apiCallCount(cl_uint apiCallCount){}
311369
virtual void setup_buffer(int order_option, int side_option,
312370
int uplo_option, int diag_option, int
313371
transA_option, int transB_option,
@@ -317,20 +375,20 @@ class clblasFunc
317375
virtual void initialize_cpu_buffer() = 0;
318376
virtual void initialize_gpu_buffer() = 0;
319377
virtual void reset_gpu_write_buffer() = 0;
320-
virtual void read_gpu_buffer() = 0;
321-
virtual void roundtrip_func() = 0;
322-
virtual void roundtrip_func_rect() {}
323-
virtual void allochostptr_roundtrip_func() {}
324-
virtual void usehostptr_roundtrip_func() {}
325-
virtual void copyhostptr_roundtrip_func() {}
326-
virtual void usepersismem_roundtrip_func() {}
327-
virtual void roundtrip_setup_buffer(int order_option, int side_option,
378+
virtual void read_gpu_buffer() = 0;
379+
virtual void roundtrip_func() = 0;
380+
virtual void roundtrip_func_rect() {}
381+
virtual void allochostptr_roundtrip_func() {}
382+
virtual void usehostptr_roundtrip_func() {}
383+
virtual void copyhostptr_roundtrip_func() {}
384+
virtual void usepersismem_roundtrip_func() {}
385+
virtual void roundtrip_setup_buffer(int order_option, int side_option,
328386
int uplo_option, int diag_option, int
329387
transA_option, int transB_option,
330388
size_t M, size_t N, size_t K, size_t lda,
331389
size_t ldb, size_t ldc, size_t offA, size_t offBX,
332390
size_t offCY, double alpha, double beta) = 0;
333-
virtual void releaseGPUBuffer_deleteCPUBuffer()=0;
391+
virtual void releaseGPUBuffer_deleteCPUBuffer()=0;
334392
StatisticalTimer& timer;
335393
StatisticalTimer::sTimerID timer_id;
336394

@@ -347,7 +405,7 @@ class clblasFunc
347405
clblasOrder order_;
348406
cl_event event_;
349407
size_t maxMemAllocSize;
408+
int validate_;
350409
}; // class clblasFunc
351410

352411
#endif // ifndef CLBLAS_BENCHMARK_COMMON_HXX__
353-

0 commit comments

Comments
 (0)