clMathLibraries
diff --git a/‎.gitignore‎
Lines changed: 3 additions & 0 deletions b/‎.gitignore‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎.travis.yml‎
Lines changed: 124 additions & 44 deletions b/‎.travis.yml‎
Lines changed: 124 additions & 44 deletions
diff --git a/‎README.md‎
Lines changed: 25 additions & 32 deletions b/‎README.md‎
Lines changed: 25 additions & 32 deletions
diff --git a/‎appveyor.yml‎
Lines changed: 105 additions & 0 deletions b/‎appveyor.yml‎
Lines changed: 105 additions & 0 deletions
@@ -20,3 +20,6 @@
 
 # flags.txt file
 *flags.txt
+
+# vim temp files
+.*.swp
@@ -1,54 +1,134 @@
+# Ubuntu name decoder ring; https://en.wikipedia.org/wiki/List_of_Ubuntu_releases
+# Ubuntu 12.04 LTS (Precise Pangolin) <== Travis CI VM image
+# Ubuntu 12.10 (Quantal Quetzal)
+# Ubuntu 13.04 (Raring Ringtail)
+# Ubuntu 13.10 (Saucy Salamander)
+# Ubuntu 14.04 LTS (Trusty Tahr)
+# Ubuntu 14.10 (Utopic Unicorn)
+# Ubuntu 15.04 (Vivid Vervet)
+# Ubuntu 15.10 (Wily Werewolf)
+# Ubuntu 16.04 LTS (Xenial Xantus)
+
+# language: instructs travis what compilers && environment to set up in build matrix
 language: cpp
 
+# sudo: false instructs travis to build our project in a docker VM (faster)
+# Can not yet install fglrx packages with 'false'
+sudo: required # false
+
+# os: expands the build matrix to include multiple os's
+# disable linux, as we get sporadic failures on building boost, needs investigation
+os:
+  - linux
+  - osx
+
+# compiler: expands the build matrix to include multiple compilers (per os)
 compiler:
   - gcc
+  - clang
+
+addons:
+  # apt: is disabled on osx builds
+  # apt: needed by docker framework to install project dependencies without
+  # sudo.  Apt uses published Ubunto PPA's from https://launchpad.net/
+  # https://github.com/travis-ci/apt-source-whitelist/blob/master/ubuntu.json
+  apt:
+    sources:
+      # ubuntu-toolchain-r-test contains newer versions of gcc to install
+      # - ubuntu-toolchain-r-test
+      # llvm-toolchain-precise-3.6 contains newer versions of clang to install
+      # - llvm-toolchain-precise-3.6
+      # kubuntu-backports contains newer versions of cmake to install
+      - kubuntu-backports
+      # boost-latest contains boost v1.55
+      - boost-latest
+    packages:
+      # g++-4.8 is minimum version considered to be the first good c++11 gnu compiler
+      # - g++-4.8
+      # - clang-3.6
+      # We require v2.8.12 minimum
+      - cmake
+      # I'm finding problems between pre-compiled versions of boost ublas, with gtest
+      # stl_algobase.h: error: no matching function for call to swap()
+      - libboost-program-options1.55-dev
+      # - libboost-serialization1.55-dev
+      # - libboost-filesystem1.55-dev
+      # - libboost-system1.55-dev
+      # - libboost-regex1.55-dev
+      # The package opencl-headers on 'precise' only installs v1.1 cl headers; uncomment for 'trusty' or greater
+#      - opencl-headers
+      # Uncomment one of the following when fglrx modules are added to the apt whitelist
+#      - fglrx
+#      - fglrx=2:8.960-0ubuntu1
+#      - fglrx=2:13.350.1-0ubuntu0.0.1
+
+# env: specifies additional global variables to define per row in build matrix
+env:
+  global:
+    - CLBLAS_ROOT=${TRAVIS_BUILD_DIR}/bin/make/release
+
+# The following filters our build matrix; we are interested in linux-gcc & osx-clang
+matrix:
+  exclude:
+    - os: linux
+      compiler: clang
+    - os: osx
+      compiler: gcc
 
 before_install:
-  - sudo apt-get update -qq
-  - sudo apt-get install -qq fglrx libboost-program-options-dev
-# Uncomment below to help verify the installs above work
-#  - ls -la /usr/lib/libboost*
-#  - ls -la /usr/include/boost
+  # Remove the following linux clause when fglrx can be installed with sudo: false
+  - if [ ${TRAVIS_OS_NAME} == "linux" ]; then
+      sudo apt-get update -qq &&
+      sudo apt-get install -qq fglrx=2:13.350.1-0ubuntu0.0.1;
+    fi
+  - if [ ${TRAVIS_OS_NAME} == "linux" ]; then
+      export OPENCL_ROOT="${TRAVIS_BUILD_DIR}/opencl-headers";
+    fi
+  - if [ ${TRAVIS_OS_NAME} == "osx" ]; then
+      brew update;
+      brew outdated boost || brew upgrade boost;
+      brew outdated cmake || brew upgrade cmake;
+    fi
+  # - if [ ${CXX} = "g++" ]; then export CXX="g++-4.8" CC="gcc-4.8"; fi
+  - cmake --version;
+  - ${CC} --version;
+  - ${CXX} --version;
 
+install:
+  # 'Precise' only distributes v1.1 opencl headers; download 1.2 headers from khronos website
+  # Remove when the travis VM upgrades to 'trusty' or beyond
+  - if [ ${TRAVIS_OS_NAME} == "linux" ]; then
+      mkdir -p ${OPENCL_ROOT}/include/CL;
+      pushd ${OPENCL_ROOT}/include/CL;
+      wget -w 1 -r -np -nd -nv -A h,hpp https://www.khronos.org/registry/cl/api/1.2/;
+      popd;
+    fi
+  # osx image does not contain cl.hpp file; download from Khronos
+  # - if [ ${TRAVIS_OS_NAME} == "osx" ]; then
+  #     pushd /System/Library/Frameworks/OpenCL.framework/Versions/A/Headers/;
+  #     sudo wget -w 1 -np -nd -nv -A h,hpp https://www.khronos.org/registry/cl/api/1.2/cl.hpp;
+  #     popd;
+  #   fi
+
+# Use before_script: to run configure steps
 before_script:
-  - cd ${TRAVIS_BUILD_DIR}
-# download OpenCL 1.2 header files since Travis CI only provides 1.1
-  - mkdir -p OpenCLInclude/CL
-  - cd OpenCLInclude/CL
-  #- wget -r --no-parent -nH --cut-dirs=4 --reject="index.html*" https://www.khronos.org/registry/cl/api/1.2/
-  - wget https://www.khronos.org/registry/cl/api/1.2/cl.h
-  - wget https://www.khronos.org/registry/cl/api/1.2/cl.hpp
-  - wget https://www.khronos.org/registry/cl/api/1.2/cl_d3d10.h
-  - wget https://www.khronos.org/registry/cl/api/1.2/cl_d3d11.h
-  - wget https://www.khronos.org/registry/cl/api/1.2/cl_dx9_media_sharing.h
-  - wget https://www.khronos.org/registry/cl/api/1.2/cl_egl.h
-  - wget https://www.khronos.org/registry/cl/api/1.2/cl_ext.h
-  - wget https://www.khronos.org/registry/cl/api/1.2/cl_gl.h
-  - wget https://www.khronos.org/registry/cl/api/1.2/cl_gl_ext.h
-  - wget https://www.khronos.org/registry/cl/api/1.2/cl_platform.h
-  - wget https://www.khronos.org/registry/cl/api/1.2/opencl.h
-  - ls
-  - pwd
-  - cd ../..
-  - mkdir -p bin/clBLAS
-  - cd bin/clBLAS
-  - cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_TEST=OFF -DBUILD_CLIENT=ON -DOPENCL_INCLUDE_DIRS:PATH=$PWD/../../OpenCLInclude -DCMAKE_INSTALL_PREFIX:PATH=$PWD/package ../../src
-
-script: 
-  - make install
-#  - ls -Rla package
-# Run a simple test to validate that the build works; CPU device in a VM
-  - cd package/bin
-  - export LD_LIBRARY_PATH=${TRAVIS_BUILD_DIR}/bin/clBLAS/package/lib64:${LD_LIBRARY_PATH}
-  - ./clBLAS-client --cpu
-
-after_success:
-  - cd ${TRAVIS_BUILD_DIR}/bin/clBLAS
+  - mkdir -p ${CLBLAS_ROOT}
+  - pushd ${CLBLAS_ROOT}
+  - cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_TEST=OFF -DBUILD_CLIENT=ON -DOCL_VERSION=2.0 -DOPENCL_ROOT=${OPENCL_ROOT} ${TRAVIS_BUILD_DIR}/src
+
+# use script: to execute build steps
+script:
   - make package
 
-notifications:
-   email:
-     - [email protected]
-   on_success: change
-   on_failure: always
-   
+deploy:
+  provider: releases
+  prerelease: true
+  draft: true
+  skip_cleanup: true
+  api_key:
+    secure: MBkxtcfSk+4UvGRO+WRhmS86vIVzAs0LIF2sAtr/S+Ed+OdUAuhZypUsDXGWtK3mL55v9c8BZXefFfHfJqElcNmyHKwCptbCR/JiM8YBtjoy2/RW1NcJUZp+QuRlk23xPADj7QkPjv7dfrQUMitkLUXAD+uTmMe2l8gmlbhMrQqPBKhb+31FNv6Lmo6oa6GjbiGi7qjsrJc7uQjhppLam+M7BZbBALGbIqMIrb2BMDMMhBoDbb4zSKrSg3+krd3kKiCClJlK7xjIlyFXZ527ETQ+PMtIeQb0eJ3aQwa4caBRCm5BDzt8GnJ48S88EkynbQioCEE87ebcyOM7M+wfslW/Fm1Y86X5odIljkOmTNKoDvgLxc9vUCBtMyVHNIgZcToPdsrMsGxcHV+JtU3yVQVm6dnA5P/zG5bA+aBjsd7p7BdOE4fdhvZV5XRAk/wmiyWalF7hKJxHIiWAKknL+tpPDDUF+fHmDDsdf7yRDJBegNcKfw4+m19MIvLn9fbiNVCtwCAL1T4yWkIEpi4MRMDPtftmkZPbi6UwluOJUTeCeHe4en99Yu2haemNPqXs6rR0LlXGk31GQwzlrNfb+94F5tT2a4Ka4PsruA2NMW/IYCYEE5Gu7PihVDR031Fn9cdCU9kefUgyB07rJD6q/W+ljsU0osyg7VxyfMg8rkw=
+  file: ${CLBLAS_ROOT}/clBLAS-build/*.tar.gz
+  file_glob: true
+  on:
+    all_branches: true
+    tags: true
@@ -1,8 +1,11 @@
+## Build Status
+| Build branch | master | develop |
+|-----|-----|-----|
+| GCC/Clang x64 | [![Build Status](https://travis-ci.org/clMathLibraries/clBLAS.svg?branch=master)](https://travis-ci.org/clMathLibraries/clBLAS/branches) | [![Build Status](https://travis-ci.org/clMathLibraries/clBLAS.svg?branch=develop)](https://travis-ci.org/clMathLibraries/clBLAS/branches) |
+| Visual Studio x64 | [![Build status](https://ci.appveyor.com/api/projects/status/v384bi6e8xv8nxjm/branch/master?svg=true)](https://ci.appveyor.com/project/kknox/clblas-5ph9i/branch/master)|[![Build status](https://ci.appveyor.com/api/projects/status/v384bi6e8xv8nxjm/branch/develop?svg=true)](https://ci.appveyor.com/project/kknox/clblas-5ph9i/branch/develop) |
+
 clBLAS
 =====
-[![Build Status](https://travis-ci.org/clMathLibraries/clBLAS.png)](https://travis-ci.org/clMathLibraries/clBLAS)
-
-
 This repository houses the code for the OpenCL™ BLAS portion of clMath.
 The complete set of BLAS level 1, 2 & 3 routines is implemented. Please
 see Netlib BLAS for the list of supported routines. In addition to GPU
@@ -20,30 +23,20 @@ library does generate and enqueue optimized OpenCL kernels, relieving
 the user from the task of writing, optimizing and maintaining kernel
 code themselves.
 
-## clBLAS update notes 04/2015
--   A subset of GEMM and TRSM can be off-line compiled for Hawaii, Bonaire and Tahiti device at compile-time. This feature
-    eliminates the overhead of calling clBuildProgram() at run-time.
--   Off-line compilation can be done with OpenCL 1.1, OpenCL 1.2 and OpenCl 2.0 runtime. However, for better
-    performance OpenCL 2.0 is recommended. Library user can select "OCL_VERSION" from CMake to ensure the library with
-    OpenCL version. It is library user's responsibility to ensure compatible hardware and driver.
--   Added flags_public.txt file that contains OpenCL compiler flags used by off-line compilation. The flags_public.txt
-    will only be loaded when OCL_VERSION is 2.0.
--   User can off-line compile one or more supported device by selecting 
-    OCL_OFFLINE_BUILD_BONAIRE_KERNEL
-    OCL_OFFLINE_BUILD_HAWII_KERNEL
-    OCL_OFFLINE_BUILD_TAHITI_KERNEL.
-    However, compile for more than one device at a time might result in running out of heap memory. Thus, compile for
-    one device at a time is recommended.
--   User may also supply specific OpenCL compiler path with OCL_COMPILER_DIR or the library will load default OpenCL compiler.
--   The minimum driver requirement for off-line compilation is 14.502.
-    
+## clBLAS update notes 09/2015
+
+- Introducing [AutoGemm](http://github.com/clMathLibraries/clBLAS/wiki/AutoGemm)
+  - clBLAS's Gemm implementation has been comprehensively overhauled to use AutoGemm. AutoGemm is a suite of python scripts which generate optimized kernels and kernel selection logic, for all precisions, transposes, tile sizes and so on.
+  - CMake is configured to use AutoGemm for clBLAS so the build and usage experience of Gemm remains unchanged (only performance and maintainability has been improved). Kernel sources are generated at build time (not runtime) and can be configured within CMake to be pre-compiled at build time.
+  - clBLAS users with unique Gemm requirements can customize AutoGemm to their needs (such as non-default tile sizes for very small or very skinny matrices); see [AutoGemm](http://github.com/clMathLibraries/clBLAS/wiki/AutoGemm) documentation for details.
+
 
 ## clBLAS library user documentation
 
 [Library and API documentation][] for developers is available online as
 a GitHub Pages website
 
-### Google Groups
+## Google Groups
 
 Two mailing lists have been created for the clMath projects:
 
@@ -108,10 +101,10 @@ The simple example below shows how to use clBLAS to compute an OpenCL accelerate
     static const cl_float beta = 20;
 
     static cl_float C[M*N] = {
-    11, 12, 13,
-    21, 22, 23,
-    31, 32, 33,
-    41, 42, 43, 
+        11, 12, 13,
+        21, 22, 23,
+        31, 32, 33,
+        41, 42, 43,
     };
     static const size_t ldc = N;        /* i.e. ldc = N */
 
@@ -155,13 +148,13 @@ The simple example below shows how to use clBLAS to compute an OpenCL accelerate
     err = clEnqueueWriteBuffer( queue, bufC, CL_TRUE, 0,
         M * N * sizeof( *C ), C, 0, NULL, NULL );
 
-    /* Call clBLAS extended function. Perform gemm for the lower right sub-matrices */
-    err = clblasSgemm( clblasRowMajor, clblasNoTrans, clblasNoTrans, 
-							M, N, K,
-							alpha, bufA, 0, lda,
-							bufB, 0, ldb, beta,
-							bufC, 0, ldc,
-							1, &queue, 0, NULL, &event );
+        /* Call clBLAS extended function. Perform gemm for the lower right sub-matrices */
+        err = clblasSgemm( clblasRowMajor, clblasNoTrans, clblasNoTrans,
+                                M, N, K,
+                                alpha, bufA, 0, lda,
+                                bufB, 0, ldb, beta,
+                                bufC, 0, ldc,
+                                1, &queue, 0, NULL, &event );
 
     /* Wait for calculations to be finished. */
     err = clWaitForEvents( 1, &event );
 
@@ -0,0 +1,105 @@
+# Appveyor OS list
+# Windows Server 2012 R2 (x64) <== Appveyor default image
+# Visual Studio 2015
+
+# os: expands the build matrix to include multiple os's
+os:
+  - Windows Server 2012
+
+# compiler: expands the build matrix to include multiple compilers (per os)
+platform:
+  - x64
+
+configuration:
+  - Release
+
+# Only clone the top level commit; don't bother with history
+shallow_clone: true
+
+# environment: specifies additional global variables to define per row in build matrix
+environment:
+  global:
+    CLBLAS_ROOT: "%APPVEYOR_BUILD_FOLDER%\\bin\\nmake\\release"
+    OPENCL_ROOT: "%APPVEYOR_BUILD_FOLDER%\\bin\\opencl"
+    # BOOST_ROOT: "C:/Libraries/boost"   # boost 1.56, 32-bit only
+    BOOST_ROOT: "C:\\Libraries\\boost_1_58_0"
+    OPENCL_REGISTRY: "https://www.khronos.org/registry/cl"
+
+init:
+  - echo init step
+  - cmake --version
+  - C:\"Program Files (x86)"\"Microsoft Visual Studio 12.0"\VC\vcvarsall.bat %PLATFORM%
+  # Uncomment the following to display Remote Desktop connection details
+  # - ps: iex ((new-object net.webclient).DownloadString('https://raw.githubusercontent.com/appveyor/ci/master/scripts/enable-rdp.ps1'))
+
+# We need to create an opencl import library that clblas can link against
+# Vendor based OpenCL packages are hard to use because of download size, registration requirements
+# and unattended installs not well supported
+install:
+  - echo install step
+  - ps: mkdir $env:OPENCL_ROOT
+  - ps: pushd $env:OPENCL_ROOT
+  - ps: $opencl_registry = $env:OPENCL_REGISTRY
+  # This downloads the source to the example/demo icd library
+  - ps: wget $opencl_registry/specs/opencl-icd-1.2.11.0.tgz -OutFile opencl-icd-1.2.11.0.tgz
+  - ps: 7z x opencl-icd-1.2.11.0.tgz
+  - ps: 7z x opencl-icd-1.2.11.0.tar
+  - ps: mv .\icd\* .
+  # This downloads all the opencl header files
+  # The cmake build files expect a directory called inc
+  - ps: mkdir inc/CL
+  - ps: wget $opencl_registry/api/1.2/ | select -ExpandProperty links | where {$_.href -like "*.h*"} | select -ExpandProperty outerText | foreach{ wget $opencl_registry/api/1.2/$_ -OutFile inc/CL/$_ }
+  # - ps: dir; if( $lastexitcode -eq 0 ){ dir include/CL } else { Write-Output boom }
+  # Create the static import lib in a directory called lib, so findopencl() will find it
+  - ps: mkdir lib
+  - ps: pushd lib
+  - cmake -G "NMake Makefiles" ..
+  - nmake
+  - ps: popd
+  # Rename the inc directory to include, so FindOpencl() will find it
+  - ps: ren inc include
+  - ps: popd
+  - ps: popd
+
+# before_build is used to run configure steps
+before_build:
+  - echo before_build step
+  # Boost 1.58 is not installed in typical fashion, help FindBoost() find binary libs with BOOST_LIBRARYDIR
+  - ps: $env:BOOST_LIBRARYDIR = "$env:BOOST_ROOT/lib64-msvc-12.0"
+  - ps: mkdir $env:CLBLAS_ROOT
+  - ps: pushd $env:CLBLAS_ROOT
+  - cmake -G "NMake Makefiles" -DCMAKE_BUILD_TYPE=%CONFIGURATION% -DBUILD_TEST=OFF -DBUILD_CLIENT=ON -DOCL_VERSION=2.0 -DOPENCL_ROOT=%OPENCL_ROOT% %APPVEYOR_BUILD_FOLDER%/src
+
+# build_script invokes the compiler
+build_script:
+  - echo build_script step
+  - nmake package
+
+after_build:
+  - echo after_build step
+  - ps: ls $env:CLBLAS_ROOT
+  - ps: mv $env:CLBLAS_ROOT\*.zip $env:APPVEYOR_BUILD_FOLDER
+
+# Appyeyor will save a copy of the package in it's personal storage
+artifacts:
+  - path: '*.zip'
+    name: binary_zip
+    type: zip
+
+# on_finish always executes regardless of passed or failed builds
+on_finish:
+  - echo on_finish step
+
+# Appveyor will push the artifacts it has saved to GitHub 'releases' tab
+deploy:
+  provider: GitHub
+  auth_token:
+    secure: dRXIWJKpU7h2RsHX7RqmyYCtCw+Q9O3X5MArloY6p34GZC1w7bp+jQYTZqbdO7bw
+  artifact: binary_zip
+  draft: true
+  prerelease: true
+  on:
+    appveyor_repo_tag: true
+
+  # Uncomment the following to pause the VM and wait for RDP connetion to debug
+  # - ps: $blockRdp = $true; iex ((new-object net.webclient).DownloadString('https://raw.githubusercontent.com/appveyor/ci/master/scripts/enable-rdp.ps1'))