Skip to content

Commit d16f7b3

Browse files
committed
Merge pull request #210 from TimmyLiu/master
merge develop branch into master branch. bump the version number to 2.10
2 parents 0fc3d3f + fffd478 commit d16f7b3

File tree

100 files changed

+1540
-411
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

100 files changed

+1540
-411
lines changed

.travis.yml

Lines changed: 38 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ language: cpp
1515
# sudo: false instructs travis to build our project in a docker VM (faster)
1616
# Can not yet install fglrx packages with 'false'
1717
sudo: required # false
18+
dist: trusty
1819

1920
# os: expands the build matrix to include multiple os's
2021
# disable linux, as we get sporadic failures on building boost, needs investigation
@@ -43,6 +44,7 @@ addons:
4344
# boost-latest contains boost v1.55
4445
- boost-latest
4546
packages:
47+
- gfortran
4648
# g++-4.8 is minimum version considered to be the first good c++11 gnu compiler
4749
# - g++-4.8
4850
# - clang-3.6
@@ -66,6 +68,8 @@ addons:
6668
env:
6769
global:
6870
- CLBLAS_ROOT=${TRAVIS_BUILD_DIR}/bin/make/release
71+
- OPENCL_REGISTRY=https://www.khronos.org/registry/cl
72+
- OPENCL_ROOT=${TRAVIS_BUILD_DIR}/bin/opencl
6973

7074
# The following filters our build matrix; we are interested in linux-gcc & osx-clang
7175
matrix:
@@ -77,13 +81,13 @@ matrix:
7781

7882
before_install:
7983
# Remove the following linux clause when fglrx can be installed with sudo: false
80-
- if [ ${TRAVIS_OS_NAME} == "linux" ]; then
81-
sudo apt-get update -qq &&
82-
sudo apt-get install -qq fglrx=2:13.350.1-0ubuntu0.0.1;
83-
fi
84-
- if [ ${TRAVIS_OS_NAME} == "linux" ]; then
85-
export OPENCL_ROOT="${TRAVIS_BUILD_DIR}/opencl-headers";
86-
fi
84+
#- if [ ${TRAVIS_OS_NAME} == "linux" ]; then
85+
# sudo apt-get update -qq &&
86+
# sudo apt-get install -qq fglrx=2:13.350.1-0ubuntu0.0.1;
87+
# fi
88+
#- if [ ${TRAVIS_OS_NAME} == "linux" ]; then
89+
# export OPENCL_ROOT="${TRAVIS_BUILD_DIR}/opencl-headers";
90+
# fi
8791
- if [ ${TRAVIS_OS_NAME} == "osx" ]; then
8892
brew update;
8993
brew outdated boost || brew upgrade boost;
@@ -97,10 +101,33 @@ before_install:
97101
install:
98102
# 'Precise' only distributes v1.1 opencl headers; download 1.2 headers from khronos website
99103
# Remove when the travis VM upgrades to 'trusty' or beyond
104+
#- if [ ${TRAVIS_OS_NAME} == "linux" ]; then
105+
# mkdir -p ${OPENCL_ROOT}/include/CL;
106+
# pushd ${OPENCL_ROOT}/include/CL;
107+
# wget -w 1 -r -np -nd -nv -A h,hpp https://www.khronos.org/registry/cl/api/1.2/;
108+
# popd;
109+
# fi
110+
# The following linux logic is necessary because of Travis's move to the GCE platform, which does not
111+
# currently contain packages for fglrx: https://github.com/travis-ci/travis-ci/issues/5221
112+
# We build our own linkable .so file
100113
- if [ ${TRAVIS_OS_NAME} == "linux" ]; then
101-
mkdir -p ${OPENCL_ROOT}/include/CL;
102-
pushd ${OPENCL_ROOT}/include/CL;
103-
wget -w 1 -r -np -nd -nv -A h,hpp https://www.khronos.org/registry/cl/api/1.2/;
114+
mkdir -p ${OPENCL_ROOT};
115+
pushd ${OPENCL_ROOT};
116+
wget ${OPENCL_REGISTRY}/specs/opencl-icd-1.2.11.0.tgz;
117+
tar -xf opencl-icd-1.2.11.0.tgz;
118+
mv ./icd/* .;
119+
mkdir -p inc/CL;
120+
pushd inc/CL;
121+
wget -r -w 1 -np -nd -nv -A h,hpp https://www.khronos.org/registry/cl/api/1.2/;
122+
wget -w 1 -np -nd -nv -A h,hpp https://www.khronos.org/registry/cl/api/2.1/cl.hpp;
123+
popd;
124+
mkdir -p lib;
125+
pushd lib;
126+
cmake -G "Unix Makefiles" ..;
127+
make;
128+
cp ../bin/libOpenCL.so .;
129+
popd;
130+
mv inc/ include/;
104131
popd;
105132
fi
106133
# osx image does not contain cl.hpp file; download from Khronos
@@ -114,7 +141,7 @@ install:
114141
before_script:
115142
- mkdir -p ${CLBLAS_ROOT}
116143
- pushd ${CLBLAS_ROOT}
117-
- cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_TEST=OFF -DBUILD_CLIENT=ON -DOCL_VERSION=2.0 -DOPENCL_ROOT=${OPENCL_ROOT} ${TRAVIS_BUILD_DIR}/src
144+
- cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_TEST=OFF -DBUILD_CLIENT=OFF -DOCL_VERSION=2.0 -DOPENCL_ROOT=${OPENCL_ROOT} ${TRAVIS_BUILD_DIR}/src
118145

119146
# use script: to execute build steps
120147
script:

doc/performance/clBLAS_2.9.0/FIJINANO/clblas290_fijinano_cgemm_col_nt_1520.csv

Lines changed: 180 additions & 0 deletions
Large diffs are not rendered by default.

doc/performance/clBLAS_2.9.0/FIJINANO/clblas290_fijinano_dgemm_col_nt_1520.csv

Lines changed: 180 additions & 0 deletions
Large diffs are not rendered by default.

doc/performance/clBLAS_2.9.0/FIJINANO/clblas290_fijinano_sgemm_col_nt_1520.csv

Lines changed: 181 additions & 0 deletions
Large diffs are not rendered by default.

doc/performance/clBLAS_2.9.0/FIJINANO/clblas290_fijinano_zgemm_col_nt_1520.csv

Lines changed: 158 additions & 0 deletions
Large diffs are not rendered by default.

src/CMakeLists.txt

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -82,8 +82,8 @@ message( STATUS "You have confirmed OpenCL ${OPENCL_VERSION} is supported in you
8282
if ( APPLE )
8383
set(CORR_TEST_WITH_ACML OFF CACHE BOOL "Use ACML library in correctness tests")
8484
else ( )
85-
message(STATUS "CORR_TEST_WITH_ACML set to ON")
86-
set(CORR_TEST_WITH_ACML ON CACHE BOOL "Use ACML library in correctness tests")
85+
message(STATUS "CORR_TEST_WITH_ACML set to OFF. Try link with libblas.so")
86+
set(CORR_TEST_WITH_ACML OFF CACHE BOOL "Use ACML library in correctness tests")
8787
endif( )
8888

8989
if( CMAKE_GENERATOR MATCHES "NMake" )
@@ -95,8 +95,8 @@ if( CMAKE_GENERATOR MATCHES "NMake" )
9595
endif( )
9696
endif( )
9797

98-
# If we are on linux, and we wish to link with the netlib BLAS implementation, we need to have a valid fortran compiler
99-
if( NOT CORR_TEST_WITH_ACML AND NOT WIN32 AND NOT APPLE )
98+
# If we are on linux, and we wish to link with the netlib BLAS implementation when BUILD_TEST is ON, we need to have a valid fortran compiler
99+
if(BUILD_TEST AND NOT CORR_TEST_WITH_ACML AND NOT WIN32 AND NOT APPLE)
100100
project(clBLAS Fortran C CXX )
101101
else( )
102102
project(clBLAS C CXX)
@@ -108,7 +108,7 @@ if( NOT DEFINED clBLAS_VERSION_MAJOR )
108108
endif( )
109109

110110
if( NOT DEFINED clBLAS_VERSION_MINOR )
111-
set( clBLAS_VERSION_MINOR 8 )
111+
set( clBLAS_VERSION_MINOR 10 )
112112
endif( )
113113

114114
if( NOT DEFINED clBLAS_VERSION_PATCH )

src/FindNetlib.cmake

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,10 +74,10 @@ endif( )
7474

7575
# The library name available from Netlib has different names for 64bit and 32bit libs
7676
if( LIB64 )
77-
set( Netlib_BLAS_LIBNAME libblas )
77+
set( Netlib_BLAS_LIBNAME blas )
7878
# set( Netlib_BLAS_LIBNAME BLAS ) Even though the download is named BLAS, the linker expects the .dll to be called libblas.dll
7979
else( )
80-
set( Netlib_BLAS_LIBNAME libblas )
80+
set( Netlib_BLAS_LIBNAME blas )
8181
endif( )
8282

8383
list( FIND Netlib_FIND_COMPONENTS BLAS contains_BLAS )
@@ -91,6 +91,7 @@ if( NOT contains_BLAS EQUAL -1 )
9191
PATHS
9292
/usr/lib
9393
/usr/local/lib
94+
/usr/lib/libblas
9495
DOC "Netlib dynamic library path"
9596
PATH_SUFFIXES lib
9697
)

src/library/CMakeLists.txt

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
# limitations under the License.
1515
# ########################################################################
1616

17+
find_package(PythonInterp REQUIRED)
1718

1819

1920

@@ -43,6 +44,7 @@ set(AUTOGEMM_HEADERS
4344
)
4445

4546
set(AUTOGEMM_SRC
47+
${CMAKE_SOURCE_DIR}/library/blas/AutoGemm/UserGemmKernelSources/UserGemmClKernels.cc
4648
${CMAKE_BINARY_DIR}/include/AutoGemmIncludes/AutoGemmClKernels.cpp
4749
${CMAKE_BINARY_DIR}/include/AutoGemmIncludes/AutoGemmKernelBuildOptionsBinary.cpp
4850
${CMAKE_BINARY_DIR}/include/AutoGemmIncludes/AutoGemmKernelBinaries.cpp
@@ -90,6 +92,10 @@ option( PRECOMPILE_GEMM_TRANS_CN "AutoGemm: pre-compile CN transpose cases" OFF)
9092
option( PRECOMPILE_GEMM_TRANS_CT "AutoGemm: pre-compile CT transpose cases" OFF)
9193
option( PRECOMPILE_GEMM_TRANS_CC "AutoGemm: pre-compile CC transpose cases" OFF)
9294

95+
set( AUTOGEMM_ARCHITECTURE "Hawaii" CACHE STRING "AutoGemm: device for kernel selection logic" )
96+
set_property( CACHE AUTOGEMM_ARCHITECTURE PROPERTY STRINGS "Hawaii" "Fiji" )
97+
98+
9399

94100
# opencl compiler version
95101
#set( PRECOMPILE_GEMM_OPENCL_VERSION "2.0" CACHE STRING "OpenCL compiler version supported by device driver." )
@@ -197,7 +203,7 @@ set( AUTOGEMM_PRECOMPILE_HEADER_SRC ${CMAKE_SOURCE_DIR}/library/blas/AutoGemm/Ke
197203
set( AUTOGEMM_PRECOMPILE_HEADER_OUT ${CMAKE_BINARY_DIR}/include/AutoGemmIncludes/AutoGemmKernelsToPreCompile.h )
198204
add_custom_command(
199205
OUTPUT ${AUTOGEMM_PRECOMPILE_HEADER_OUT}
200-
COMMAND python ${CMAKE_SOURCE_DIR}/library/blas/AutoGemm/KernelsToPreCompile.py ${AGPC_ARGS}
206+
COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_SOURCE_DIR}/library/blas/AutoGemm/KernelsToPreCompile.py ${AGPC_ARGS}
201207
DEPENDS ${AUTOGEMM_PRECOMPILE_HEADER_SRC}
202208
)
203209

@@ -238,7 +244,7 @@ endif()#endif precompile active
238244
################################################################################
239245
add_custom_command(
240246
OUTPUT ${AUTOGEMM_HEADERS} ${AUTOGEMM_SRC}
241-
COMMAND python ${CMAKE_SOURCE_DIR}/library/blas/AutoGemm/AutoGemm.py --output-path ${CMAKE_BINARY_DIR}/include --opencl-compiler-version ${OPENCL_VERSION}
247+
COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_SOURCE_DIR}/library/blas/AutoGemm/AutoGemm.py --output-path ${CMAKE_BINARY_DIR}/include --opencl-compiler-version ${OPENCL_VERSION} --architecture ${AUTOGEMM_ARCHITECTURE}
242248
DEPENDS ${AUTOGEMM_SCRIPTS}
243249
)
244250

@@ -851,6 +857,7 @@ set(CLBLAS_ALL_SOURCES
851857
#${USERGEMM_SRC}
852858
#${USERGEMM_HEADERS}
853859
)
860+
add_definitions(-DOPENCL_VERSION="${OPENCL_VERSION}")
854861
add_library(clBLAS ${CLBLAS_ALL_SOURCES})
855862
add_dependencies(clBLAS GENERATE_CLT)
856863

src/library/blas/AutoGemm/AutoGemm.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import Common
2222
import Includes
2323
import KernelSelection
24+
import AutoGemmParameters
2425
import KernelOpenCL
2526

2627

@@ -32,16 +33,17 @@
3233
ap = argparse.ArgumentParser(description="AutoGemm")
3334
ap.add_argument("--output-path", dest="output" )
3435
ap.add_argument("--opencl-compiler-version", dest="clCompilerVersion", action="store", choices=["1.1", "1.2", "2.0" ])
36+
ap.add_argument("--architecture", dest="architecture", action="store", choices=["Hawaii", "Fiji" ])
3537
args = ap.parse_args()
3638
if args.output:
3739
Common.setOutputPath(args.output)
3840
else:
39-
print "AutoGemm.py: Warning: No output path specified; default is working directory."
41+
print("AutoGemm.py: Warning: No output path specified; default is working directory.")
4042

41-
print "AutoGemm.py: using OpenCL " + args.clCompilerVersion + " compiler"
43+
print("AutoGemm.py: using OpenCL " + args.clCompilerVersion + " compiler")
4244
Common.setClCompilerVersion(args.clCompilerVersion)
45+
AutoGemmParameters.setArchitecture(args.architecture)
4346

4447
KernelOpenCL.writeOpenCLKernels()
4548
KernelSelection.writeKernelSelection()
4649
Includes.writeIncludes()
47-

src/library/blas/AutoGemm/AutoGemmParameters.py

Lines changed: 54 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
# Tile Parameters for Kernel Selection Data
66
################################################################################
77

8-
kernelSelectionData = {
8+
kernelSelectionDataHawaii = {
99
# [ size, fallback tile, [ valid tiles ] ],
1010
"s":[
1111
[ 4000, [ 16, 16, 6, 6], [ [ 16, 16, 6, 6] ] ],
@@ -54,17 +54,55 @@
5454
],
5555
}
5656

57-
"""
58-
for testing all micro-tile sizes
59-
[ 128, [ 16, 16, 8, 8], [ [ 16, 16, 8, 8] ] ],
60-
[ 112, [ 16, 16, 7, 7], [ [ 16, 16, 7, 7] ] ],
61-
[ 96, [ 16, 16, 6, 6], [ [ 16, 16, 6, 6] ] ],
62-
[ 80, [ 16, 16, 5, 5], [ [ 16, 16, 5, 5] ] ],
63-
[ 64, [ 16, 16, 4, 4], [ [ 16, 16, 4, 4] ] ],
64-
[ 48, [ 16, 16, 3, 3], [ [ 16, 16, 3, 3] ] ],
65-
[ 32, [ 16, 16, 2, 2], [ [ 16, 16, 2, 2] ] ],
66-
[ 0, [ 16, 16, 1, 1], [ [ 16, 16, 1, 1] ] ],
67-
"""
57+
kernelSelectionDataFiji = {
58+
"s":[
59+
[ 3072, [ 16, 16, 6, 6], [ [ 16, 16, 6, 6], [ 16, 16, 5, 5], [ 16, 16, 4, 4] ] ],
60+
[ 2240, [ 16, 16, 6, 6], [ [ 16, 16, 6, 6], [ 16, 16, 4, 4], [ 16, 16, 5, 5], [ 16, 16, 3, 3] ] ],
61+
[ 1760, [ 16, 16, 4, 4], [ [ 16, 16, 6, 6], [ 16, 16, 4, 4], [ 16, 16, 5, 5], [ 16, 16, 3, 3] ] ],
62+
[ 1600, [ 16, 16, 4, 4], [ [ 16, 16, 4, 4], [ 16, 16, 6, 6], [ 16, 16, 5, 5], [ 16, 16, 3, 3] ] ],
63+
[ 1056, [ 16, 16, 4, 4], [ [ 16, 16, 4, 4], [ 16, 16, 6, 6], [ 16, 16, 5, 5], [ 16, 16, 3, 3], [ 16, 16, 2, 2] ] ],
64+
[ 960, [ 16, 16, 4, 4], [ [ 16, 16, 4, 4], [ 16, 16, 5, 5], [ 16, 16, 3, 3], [ 16, 16, 2, 2] ] ],
65+
[ 736, [ 16, 16, 3, 3], [ [ 16, 16, 4, 4], [ 16, 16, 3, 3], [ 16, 16, 5, 5], [ 16, 16, 2, 2] ] ],
66+
[ 528, [ 16, 16, 3, 3], [ [ 16, 16, 4, 4], [ 16, 16, 3, 3], [ 16, 16, 2, 2], [ 16, 16, 1, 1] ] ],
67+
[ 432, [ 16, 16, 2, 2], [ [ 16, 16, 3, 3], [ 16, 16, 2, 2], [ 16, 16, 1, 1] ] ],
68+
[ 320, [ 16, 16, 2, 2], [ [ 16, 16, 2, 2], [ 16, 16, 1, 1] ] ],
69+
[ 0, [ 16, 16, 1, 1], [ [ 16, 16, 1, 1] ] ],
70+
],
71+
"d":[
72+
[ 3200, [ 16, 16, 4, 4], [ [ 16, 16, 4, 4], [ 16, 16, 5, 5], [ 16, 16, 2, 2], [ 8, 8, 6, 6 ] ] ],
73+
[ 1632, [ 16, 16, 2, 2], [ [ 16, 16, 4, 4], [ 16, 16, 2, 2], [ 16, 16, 5, 5], [ 8, 8, 6, 6 ] ] ],
74+
[ 1280, [ 16, 16, 2, 2], [ [ 16, 16, 4, 4], [ 16, 16, 2, 2], [ 16, 16, 5, 5], [ 8, 8, 6, 6 ], [ 16, 16, 1, 1] ] ],
75+
[ 1056, [ 16, 16, 2, 2], [ [ 16, 16, 2, 2], [ 16, 16, 1, 1] ] ],
76+
[ 672, [ 16, 16, 2, 2], [ [ 16, 16, 1, 1] ] ],
77+
[ 0, [ 16, 16, 1, 1], [ [ 16, 16, 1, 1] ] ],
78+
],
79+
"c":[
80+
[ 2240, [ 16, 16, 4, 4], [ [ 16, 16, 4, 4], [ 16, 16, 6, 6], ] ],
81+
[ 1440, [ 16, 16, 4, 4], [ [ 16, 16, 4, 4], [ 16, 16, 6, 6], [ 16, 16, 5, 5], [ 16, 16, 2, 2] ] ],
82+
[ 1088, [ 16, 16, 2, 2], [ [ 16, 16, 4, 4], [ 16, 16, 3, 3], [ 16, 16, 2, 2], [ 16, 16, 5, 5] ] ],
83+
[ 704, [ 16, 16, 2, 2], [ [ 16, 16, 2, 2], [ 16, 16, 3, 3], [ 16, 16, 5, 5] ] ],
84+
[ 528, [ 16, 16, 2, 2], [ [ 16, 16, 2, 2], [ 16, 16, 3, 3], [ 16, 16, 1, 1] ] ],
85+
[ 336, [ 16, 16, 2, 2], [ [ 16, 16, 2, 2], [ 16, 16, 1, 1] ] ],
86+
[ 0, [ 16, 16, 1, 1], [ [ 16, 16, 1, 1] ] ],
87+
],
88+
"z":[
89+
[ 2528, [ 16, 16, 2, 2], [ [ 16, 16, 4, 4], [ 16, 16, 2, 2], [ 16, 16, 3, 3] ] ],
90+
[ 1872, [ 16, 16, 2, 2], [ [ 16, 16, 2, 2], [ 16, 16, 3, 3], [ 16, 16, 1, 1] ] ],
91+
[ 1040, [ 16, 16, 2, 2], [ [ 16, 16, 2, 2], [ 16, 16, 1, 1] ] ],
92+
[ 768, [ 16, 16, 1, 1], [ [ 16, 16, 2, 2], [ 16, 16, 1, 1] ] ],
93+
[ 0, [ 16, 16, 1, 1], [ [ 16, 16, 1, 1] ] ],
94+
]
95+
}
96+
97+
kernelSelectionData = kernelSelectionDataHawaii
98+
def setArchitecture(architecture):
99+
global kernelSelectionData, kernelSelectionDataHawaii, kernelSelectionDataFiji
100+
101+
if architecture == "Fiji":
102+
kernelSelectionData = kernelSelectionDataFiji
103+
else:
104+
kernelSelectionData = kernelSelectionDataHawaii
105+
68106

69107
################################################################################
70108
# Non-Tile Parameters
@@ -89,20 +127,20 @@ def getTilesForPrecision(precision):
89127
validTiles = sizeData[2]
90128
# add valid tiles
91129
for tileParams in validTiles:
92-
#print tileParams
130+
#print(tileParams)
93131
tile.workGroupNumRows = tileParams[0]
94132
tile.workGroupNumCols = tileParams[1]
95133
tile.microTileNumRows = tileParams[2]
96134
tile.microTileNumCols = tileParams[3]
97135
tile.macroTileNumRows = tile.workGroupNumRows*tile.microTileNumRows
98136
tile.macroTileNumCols = tile.workGroupNumCols*tile.microTileNumCols
99-
#print tile.getName()
137+
#print(tile.getName())
100138
for unroll in unrolls[precision]:
101139
tile.unroll = unroll
102140
if tile.isValid():
103141
tiles.append( copy.copy(tile) )
104142
else:
105-
print tile.getName() + " - SKIPPING - "
143+
print(tile.getName() + " - SKIPPING - ")
106144

107145
# add fallback tile
108146
tile.workGroupNumRows = fallbackTile[0]
@@ -116,7 +154,7 @@ def getTilesForPrecision(precision):
116154
if tile.isValid():
117155
tiles.append( copy.copy(tile) )
118156
else:
119-
print tile.getName() + " - SKIPPING - "
157+
print(tile.getName() + " - SKIPPING - ")
120158

121159
setTiles = set(tiles)
122160
tiles = list( setTiles )

0 commit comments

Comments
 (0)