Skip to content

Commit 0482e1c

Browse files
committed
merged develop to master; bumped version to 2.8.0
2 parents 9731ea2 + feadbbb commit 0482e1c

File tree

183 files changed

+32985
-1192
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

183 files changed

+32985
-1192
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,3 +20,6 @@
2020

2121
# flags.txt file
2222
*flags.txt
23+
24+
# vim temp files
25+
.*.swp

.travis.yml

Lines changed: 124 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1,54 +1,134 @@
1+
# Ubuntu name decoder ring; https://en.wikipedia.org/wiki/List_of_Ubuntu_releases
2+
# Ubuntu 12.04 LTS (Precise Pangolin) <== Travis CI VM image
3+
# Ubuntu 12.10 (Quantal Quetzal)
4+
# Ubuntu 13.04 (Raring Ringtail)
5+
# Ubuntu 13.10 (Saucy Salamander)
6+
# Ubuntu 14.04 LTS (Trusty Tahr)
7+
# Ubuntu 14.10 (Utopic Unicorn)
8+
# Ubuntu 15.04 (Vivid Vervet)
9+
# Ubuntu 15.10 (Wily Werewolf)
10+
# Ubuntu 16.04 LTS (Xenial Xantus)
11+
12+
# language: instructs travis what compilers && environment to set up in build matrix
113
language: cpp
214

15+
# sudo: false instructs travis to build our project in a docker VM (faster)
16+
# Can not yet install fglrx packages with 'false'
17+
sudo: required # false
18+
19+
# os: expands the build matrix to include multiple os's
20+
# disable linux, as we get sporadic failures on building boost, needs investigation
21+
os:
22+
- linux
23+
- osx
24+
25+
# compiler: expands the build matrix to include multiple compilers (per os)
326
compiler:
427
- gcc
28+
- clang
29+
30+
addons:
31+
# apt: is disabled on osx builds
32+
# apt: needed by docker framework to install project dependencies without
33+
# sudo. Apt uses published Ubunto PPA's from https://launchpad.net/
34+
# https://github.com/travis-ci/apt-source-whitelist/blob/master/ubuntu.json
35+
apt:
36+
sources:
37+
# ubuntu-toolchain-r-test contains newer versions of gcc to install
38+
# - ubuntu-toolchain-r-test
39+
# llvm-toolchain-precise-3.6 contains newer versions of clang to install
40+
# - llvm-toolchain-precise-3.6
41+
# kubuntu-backports contains newer versions of cmake to install
42+
- kubuntu-backports
43+
# boost-latest contains boost v1.55
44+
- boost-latest
45+
packages:
46+
# g++-4.8 is minimum version considered to be the first good c++11 gnu compiler
47+
# - g++-4.8
48+
# - clang-3.6
49+
# We require v2.8.12 minimum
50+
- cmake
51+
# I'm finding problems between pre-compiled versions of boost ublas, with gtest
52+
# stl_algobase.h: error: no matching function for call to swap()
53+
- libboost-program-options1.55-dev
54+
# - libboost-serialization1.55-dev
55+
# - libboost-filesystem1.55-dev
56+
# - libboost-system1.55-dev
57+
# - libboost-regex1.55-dev
58+
# The package opencl-headers on 'precise' only installs v1.1 cl headers; uncomment for 'trusty' or greater
59+
# - opencl-headers
60+
# Uncomment one of the following when fglrx modules are added to the apt whitelist
61+
# - fglrx
62+
# - fglrx=2:8.960-0ubuntu1
63+
# - fglrx=2:13.350.1-0ubuntu0.0.1
64+
65+
# env: specifies additional global variables to define per row in build matrix
66+
env:
67+
global:
68+
- CLBLAS_ROOT=${TRAVIS_BUILD_DIR}/bin/make/release
69+
70+
# The following filters our build matrix; we are interested in linux-gcc & osx-clang
71+
matrix:
72+
exclude:
73+
- os: linux
74+
compiler: clang
75+
- os: osx
76+
compiler: gcc
577

678
before_install:
7-
- sudo apt-get update -qq
8-
- sudo apt-get install -qq fglrx libboost-program-options-dev
9-
# Uncomment below to help verify the installs above work
10-
# - ls -la /usr/lib/libboost*
11-
# - ls -la /usr/include/boost
79+
# Remove the following linux clause when fglrx can be installed with sudo: false
80+
- if [ ${TRAVIS_OS_NAME} == "linux" ]; then
81+
sudo apt-get update -qq &&
82+
sudo apt-get install -qq fglrx=2:13.350.1-0ubuntu0.0.1;
83+
fi
84+
- if [ ${TRAVIS_OS_NAME} == "linux" ]; then
85+
export OPENCL_ROOT="${TRAVIS_BUILD_DIR}/opencl-headers";
86+
fi
87+
- if [ ${TRAVIS_OS_NAME} == "osx" ]; then
88+
brew update;
89+
brew outdated boost || brew upgrade boost;
90+
brew outdated cmake || brew upgrade cmake;
91+
fi
92+
# - if [ ${CXX} = "g++" ]; then export CXX="g++-4.8" CC="gcc-4.8"; fi
93+
- cmake --version;
94+
- ${CC} --version;
95+
- ${CXX} --version;
1296

97+
install:
98+
# 'Precise' only distributes v1.1 opencl headers; download 1.2 headers from khronos website
99+
# Remove when the travis VM upgrades to 'trusty' or beyond
100+
- if [ ${TRAVIS_OS_NAME} == "linux" ]; then
101+
mkdir -p ${OPENCL_ROOT}/include/CL;
102+
pushd ${OPENCL_ROOT}/include/CL;
103+
wget -w 1 -r -np -nd -nv -A h,hpp https://www.khronos.org/registry/cl/api/1.2/;
104+
popd;
105+
fi
106+
# osx image does not contain cl.hpp file; download from Khronos
107+
# - if [ ${TRAVIS_OS_NAME} == "osx" ]; then
108+
# pushd /System/Library/Frameworks/OpenCL.framework/Versions/A/Headers/;
109+
# sudo wget -w 1 -np -nd -nv -A h,hpp https://www.khronos.org/registry/cl/api/1.2/cl.hpp;
110+
# popd;
111+
# fi
112+
113+
# Use before_script: to run configure steps
13114
before_script:
14-
- cd ${TRAVIS_BUILD_DIR}
15-
# download OpenCL 1.2 header files since Travis CI only provides 1.1
16-
- mkdir -p OpenCLInclude/CL
17-
- cd OpenCLInclude/CL
18-
#- wget -r --no-parent -nH --cut-dirs=4 --reject="index.html*" https://www.khronos.org/registry/cl/api/1.2/
19-
- wget https://www.khronos.org/registry/cl/api/1.2/cl.h
20-
- wget https://www.khronos.org/registry/cl/api/1.2/cl.hpp
21-
- wget https://www.khronos.org/registry/cl/api/1.2/cl_d3d10.h
22-
- wget https://www.khronos.org/registry/cl/api/1.2/cl_d3d11.h
23-
- wget https://www.khronos.org/registry/cl/api/1.2/cl_dx9_media_sharing.h
24-
- wget https://www.khronos.org/registry/cl/api/1.2/cl_egl.h
25-
- wget https://www.khronos.org/registry/cl/api/1.2/cl_ext.h
26-
- wget https://www.khronos.org/registry/cl/api/1.2/cl_gl.h
27-
- wget https://www.khronos.org/registry/cl/api/1.2/cl_gl_ext.h
28-
- wget https://www.khronos.org/registry/cl/api/1.2/cl_platform.h
29-
- wget https://www.khronos.org/registry/cl/api/1.2/opencl.h
30-
- ls
31-
- pwd
32-
- cd ../..
33-
- mkdir -p bin/clBLAS
34-
- cd bin/clBLAS
35-
- cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_TEST=OFF -DBUILD_CLIENT=ON -DOPENCL_INCLUDE_DIRS:PATH=$PWD/../../OpenCLInclude -DCMAKE_INSTALL_PREFIX:PATH=$PWD/package ../../src
36-
37-
script:
38-
- make install
39-
# - ls -Rla package
40-
# Run a simple test to validate that the build works; CPU device in a VM
41-
- cd package/bin
42-
- export LD_LIBRARY_PATH=${TRAVIS_BUILD_DIR}/bin/clBLAS/package/lib64:${LD_LIBRARY_PATH}
43-
- ./clBLAS-client --cpu
44-
45-
after_success:
46-
- cd ${TRAVIS_BUILD_DIR}/bin/clBLAS
115+
- mkdir -p ${CLBLAS_ROOT}
116+
- pushd ${CLBLAS_ROOT}
117+
- cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_TEST=OFF -DBUILD_CLIENT=ON -DOCL_VERSION=2.0 -DOPENCL_ROOT=${OPENCL_ROOT} ${TRAVIS_BUILD_DIR}/src
118+
119+
# use script: to execute build steps
120+
script:
47121
- make package
48122

49-
notifications:
50-
email:
51-
52-
on_success: change
53-
on_failure: always
54-
123+
deploy:
124+
provider: releases
125+
prerelease: true
126+
draft: true
127+
skip_cleanup: true
128+
api_key:
129+
secure: MBkxtcfSk+4UvGRO+WRhmS86vIVzAs0LIF2sAtr/S+Ed+OdUAuhZypUsDXGWtK3mL55v9c8BZXefFfHfJqElcNmyHKwCptbCR/JiM8YBtjoy2/RW1NcJUZp+QuRlk23xPADj7QkPjv7dfrQUMitkLUXAD+uTmMe2l8gmlbhMrQqPBKhb+31FNv6Lmo6oa6GjbiGi7qjsrJc7uQjhppLam+M7BZbBALGbIqMIrb2BMDMMhBoDbb4zSKrSg3+krd3kKiCClJlK7xjIlyFXZ527ETQ+PMtIeQb0eJ3aQwa4caBRCm5BDzt8GnJ48S88EkynbQioCEE87ebcyOM7M+wfslW/Fm1Y86X5odIljkOmTNKoDvgLxc9vUCBtMyVHNIgZcToPdsrMsGxcHV+JtU3yVQVm6dnA5P/zG5bA+aBjsd7p7BdOE4fdhvZV5XRAk/wmiyWalF7hKJxHIiWAKknL+tpPDDUF+fHmDDsdf7yRDJBegNcKfw4+m19MIvLn9fbiNVCtwCAL1T4yWkIEpi4MRMDPtftmkZPbi6UwluOJUTeCeHe4en99Yu2haemNPqXs6rR0LlXGk31GQwzlrNfb+94F5tT2a4Ka4PsruA2NMW/IYCYEE5Gu7PihVDR031Fn9cdCU9kefUgyB07rJD6q/W+ljsU0osyg7VxyfMg8rkw=
130+
file: ${CLBLAS_ROOT}/clBLAS-build/*.tar.gz
131+
file_glob: true
132+
on:
133+
all_branches: true
134+
tags: true

README.md

Lines changed: 25 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
1+
## Build Status
2+
| Build branch | master | develop |
3+
|-----|-----|-----|
4+
| GCC/Clang x64 | [![Build Status](https://travis-ci.org/clMathLibraries/clBLAS.svg?branch=master)](https://travis-ci.org/clMathLibraries/clBLAS/branches) | [![Build Status](https://travis-ci.org/clMathLibraries/clBLAS.svg?branch=develop)](https://travis-ci.org/clMathLibraries/clBLAS/branches) |
5+
| Visual Studio x64 | [![Build status](https://ci.appveyor.com/api/projects/status/v384bi6e8xv8nxjm/branch/master?svg=true)](https://ci.appveyor.com/project/kknox/clblas-5ph9i/branch/master)|[![Build status](https://ci.appveyor.com/api/projects/status/v384bi6e8xv8nxjm/branch/develop?svg=true)](https://ci.appveyor.com/project/kknox/clblas-5ph9i/branch/develop) |
6+
17
clBLAS
28
=====
3-
[![Build Status](https://travis-ci.org/clMathLibraries/clBLAS.png)](https://travis-ci.org/clMathLibraries/clBLAS)
4-
5-
69
This repository houses the code for the OpenCL™ BLAS portion of clMath.
710
The complete set of BLAS level 1, 2 & 3 routines is implemented. Please
811
see Netlib BLAS for the list of supported routines. In addition to GPU
@@ -20,30 +23,20 @@ library does generate and enqueue optimized OpenCL kernels, relieving
2023
the user from the task of writing, optimizing and maintaining kernel
2124
code themselves.
2225

23-
## clBLAS update notes 04/2015
24-
- A subset of GEMM and TRSM can be off-line compiled for Hawaii, Bonaire and Tahiti device at compile-time. This feature
25-
eliminates the overhead of calling clBuildProgram() at run-time.
26-
- Off-line compilation can be done with OpenCL 1.1, OpenCL 1.2 and OpenCl 2.0 runtime. However, for better
27-
performance OpenCL 2.0 is recommended. Library user can select "OCL_VERSION" from CMake to ensure the library with
28-
OpenCL version. It is library user's responsibility to ensure compatible hardware and driver.
29-
- Added flags_public.txt file that contains OpenCL compiler flags used by off-line compilation. The flags_public.txt
30-
will only be loaded when OCL_VERSION is 2.0.
31-
- User can off-line compile one or more supported device by selecting
32-
OCL_OFFLINE_BUILD_BONAIRE_KERNEL
33-
OCL_OFFLINE_BUILD_HAWII_KERNEL
34-
OCL_OFFLINE_BUILD_TAHITI_KERNEL.
35-
However, compile for more than one device at a time might result in running out of heap memory. Thus, compile for
36-
one device at a time is recommended.
37-
- User may also supply specific OpenCL compiler path with OCL_COMPILER_DIR or the library will load default OpenCL compiler.
38-
- The minimum driver requirement for off-line compilation is 14.502.
39-
26+
## clBLAS update notes 09/2015
27+
28+
- Introducing [AutoGemm](http://github.com/clMathLibraries/clBLAS/wiki/AutoGemm)
29+
- clBLAS's Gemm implementation has been comprehensively overhauled to use AutoGemm. AutoGemm is a suite of python scripts which generate optimized kernels and kernel selection logic, for all precisions, transposes, tile sizes and so on.
30+
- CMake is configured to use AutoGemm for clBLAS so the build and usage experience of Gemm remains unchanged (only performance and maintainability has been improved). Kernel sources are generated at build time (not runtime) and can be configured within CMake to be pre-compiled at build time.
31+
- clBLAS users with unique Gemm requirements can customize AutoGemm to their needs (such as non-default tile sizes for very small or very skinny matrices); see [AutoGemm](http://github.com/clMathLibraries/clBLAS/wiki/AutoGemm) documentation for details.
32+
4033

4134
## clBLAS library user documentation
4235

4336
[Library and API documentation][] for developers is available online as
4437
a GitHub Pages website
4538

46-
### Google Groups
39+
## Google Groups
4740

4841
Two mailing lists have been created for the clMath projects:
4942

@@ -108,10 +101,10 @@ The simple example below shows how to use clBLAS to compute an OpenCL accelerate
108101
static const cl_float beta = 20;
109102

110103
static cl_float C[M*N] = {
111-
11, 12, 13,
112-
21, 22, 23,
113-
31, 32, 33,
114-
41, 42, 43,
104+
11, 12, 13,
105+
21, 22, 23,
106+
31, 32, 33,
107+
41, 42, 43,
115108
};
116109
static const size_t ldc = N; /* i.e. ldc = N */
117110

@@ -155,13 +148,13 @@ The simple example below shows how to use clBLAS to compute an OpenCL accelerate
155148
err = clEnqueueWriteBuffer( queue, bufC, CL_TRUE, 0,
156149
M * N * sizeof( *C ), C, 0, NULL, NULL );
157150

158-
/* Call clBLAS extended function. Perform gemm for the lower right sub-matrices */
159-
err = clblasSgemm( clblasRowMajor, clblasNoTrans, clblasNoTrans,
160-
M, N, K,
161-
alpha, bufA, 0, lda,
162-
bufB, 0, ldb, beta,
163-
bufC, 0, ldc,
164-
1, &queue, 0, NULL, &event );
151+
/* Call clBLAS extended function. Perform gemm for the lower right sub-matrices */
152+
err = clblasSgemm( clblasRowMajor, clblasNoTrans, clblasNoTrans,
153+
M, N, K,
154+
alpha, bufA, 0, lda,
155+
bufB, 0, ldb, beta,
156+
bufC, 0, ldc,
157+
1, &queue, 0, NULL, &event );
165158

166159
/* Wait for calculations to be finished. */
167160
err = clWaitForEvents( 1, &event );

appveyor.yml

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
# Appveyor OS list
2+
# Windows Server 2012 R2 (x64) <== Appveyor default image
3+
# Visual Studio 2015
4+
5+
# os: expands the build matrix to include multiple os's
6+
os:
7+
- Windows Server 2012
8+
9+
# compiler: expands the build matrix to include multiple compilers (per os)
10+
platform:
11+
- x64
12+
13+
configuration:
14+
- Release
15+
16+
# Only clone the top level commit; don't bother with history
17+
shallow_clone: true
18+
19+
# environment: specifies additional global variables to define per row in build matrix
20+
environment:
21+
global:
22+
CLBLAS_ROOT: "%APPVEYOR_BUILD_FOLDER%\\bin\\nmake\\release"
23+
OPENCL_ROOT: "%APPVEYOR_BUILD_FOLDER%\\bin\\opencl"
24+
# BOOST_ROOT: "C:/Libraries/boost" # boost 1.56, 32-bit only
25+
BOOST_ROOT: "C:\\Libraries\\boost_1_58_0"
26+
OPENCL_REGISTRY: "https://www.khronos.org/registry/cl"
27+
28+
init:
29+
- echo init step
30+
- cmake --version
31+
- C:\"Program Files (x86)"\"Microsoft Visual Studio 12.0"\VC\vcvarsall.bat %PLATFORM%
32+
# Uncomment the following to display Remote Desktop connection details
33+
# - ps: iex ((new-object net.webclient).DownloadString('https://raw.githubusercontent.com/appveyor/ci/master/scripts/enable-rdp.ps1'))
34+
35+
# We need to create an opencl import library that clblas can link against
36+
# Vendor based OpenCL packages are hard to use because of download size, registration requirements
37+
# and unattended installs not well supported
38+
install:
39+
- echo install step
40+
- ps: mkdir $env:OPENCL_ROOT
41+
- ps: pushd $env:OPENCL_ROOT
42+
- ps: $opencl_registry = $env:OPENCL_REGISTRY
43+
# This downloads the source to the example/demo icd library
44+
- ps: wget $opencl_registry/specs/opencl-icd-1.2.11.0.tgz -OutFile opencl-icd-1.2.11.0.tgz
45+
- ps: 7z x opencl-icd-1.2.11.0.tgz
46+
- ps: 7z x opencl-icd-1.2.11.0.tar
47+
- ps: mv .\icd\* .
48+
# This downloads all the opencl header files
49+
# The cmake build files expect a directory called inc
50+
- ps: mkdir inc/CL
51+
- ps: wget $opencl_registry/api/1.2/ | select -ExpandProperty links | where {$_.href -like "*.h*"} | select -ExpandProperty outerText | foreach{ wget $opencl_registry/api/1.2/$_ -OutFile inc/CL/$_ }
52+
# - ps: dir; if( $lastexitcode -eq 0 ){ dir include/CL } else { Write-Output boom }
53+
# Create the static import lib in a directory called lib, so findopencl() will find it
54+
- ps: mkdir lib
55+
- ps: pushd lib
56+
- cmake -G "NMake Makefiles" ..
57+
- nmake
58+
- ps: popd
59+
# Rename the inc directory to include, so FindOpencl() will find it
60+
- ps: ren inc include
61+
- ps: popd
62+
- ps: popd
63+
64+
# before_build is used to run configure steps
65+
before_build:
66+
- echo before_build step
67+
# Boost 1.58 is not installed in typical fashion, help FindBoost() find binary libs with BOOST_LIBRARYDIR
68+
- ps: $env:BOOST_LIBRARYDIR = "$env:BOOST_ROOT/lib64-msvc-12.0"
69+
- ps: mkdir $env:CLBLAS_ROOT
70+
- ps: pushd $env:CLBLAS_ROOT
71+
- cmake -G "NMake Makefiles" -DCMAKE_BUILD_TYPE=%CONFIGURATION% -DBUILD_TEST=OFF -DBUILD_CLIENT=ON -DOCL_VERSION=2.0 -DOPENCL_ROOT=%OPENCL_ROOT% %APPVEYOR_BUILD_FOLDER%/src
72+
73+
# build_script invokes the compiler
74+
build_script:
75+
- echo build_script step
76+
- nmake package
77+
78+
after_build:
79+
- echo after_build step
80+
- ps: ls $env:CLBLAS_ROOT
81+
- ps: mv $env:CLBLAS_ROOT\*.zip $env:APPVEYOR_BUILD_FOLDER
82+
83+
# Appyeyor will save a copy of the package in it's personal storage
84+
artifacts:
85+
- path: '*.zip'
86+
name: binary_zip
87+
type: zip
88+
89+
# on_finish always executes regardless of passed or failed builds
90+
on_finish:
91+
- echo on_finish step
92+
93+
# Appveyor will push the artifacts it has saved to GitHub 'releases' tab
94+
deploy:
95+
provider: GitHub
96+
auth_token:
97+
secure: dRXIWJKpU7h2RsHX7RqmyYCtCw+Q9O3X5MArloY6p34GZC1w7bp+jQYTZqbdO7bw
98+
artifact: binary_zip
99+
draft: true
100+
prerelease: true
101+
on:
102+
appveyor_repo_tag: true
103+
104+
# Uncomment the following to pause the VM and wait for RDP connetion to debug
105+
# - ps: $blockRdp = $true; iex ((new-object net.webclient).DownloadString('https://raw.githubusercontent.com/appveyor/ci/master/scripts/enable-rdp.ps1'))

0 commit comments

Comments
 (0)