Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 34 additions & 5 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,36 @@ CMAKE_MINIMUM_REQUIRED(VERSION 2.8)
# project name
PROJECT(FourierConvolutionCUDALib CXX C)
if(NOT(${CMAKE_VERSION} VERSION_LESS "3.0.0"))
cmake_policy(SET CMP0042 NEW)
cmake_policy(SET CMP0042 NEW)
endif()

if(${CMAKE_VERSION} VERSION_GREATER "3.1")
cmake_policy(SET CMP0054 NEW)
cmake_policy(SET CMP0054 NEW)
endif()

set(CMAKE_CXX_STANDARD 03)
set(CMAKE_CXX_STANDARD_REQUIRED ON)

option(ENABLE_TESTING "enable the test suite (requires boost to be installed" ON)
option(ENABLE_BENCHMARKS "enable the benchmark suite (requires google/benchmark to be installed" OFF)
# option(ENABLE_CXX11_ABI "enable _GLIBCXX_USE_CXX11_ABI in GCC 5.0+" ON)
# if(${WITH_CXX11_ABI})
# set(CXX11_ABI_VALUE 1)
# else()
# set(CXX11_ABI_VALUE 0)
# endif()


# IF(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
# if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER "5.0")
# set(WITH_CXX11_ABI ON)
# set(CXX11_ABI_VALUE 1)
# endif()

# add_definitions(-D_GLIBCXX_USE_CXX11_ABI=${CXX11_ABI_VALUE})
# message(">> [${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}] adding -D_GLIBCXX_USE_CXX11_ABI=${CXX11_ABI_VALUE}")
# endif()

# version number
SET (FOURIERCONVOLUTIONCUDALIB_NAME "CUDA FOURIER CONVOLUTION LIBRARY")
SET (FOURIERCONVOLUTIONCUDALIB_CODENAME "${PROJECT_NAME}")
Expand Down Expand Up @@ -75,7 +97,7 @@ IF(INCLUDE_CUDA)
FIND_PACKAGE(CUDA)
IF(CUDA_FOUND)
SET(CUDA_VERBOSE_BUILD ON)
set(CUDA_ARCHS 10;20;30;35;37;50;52;60;61;70)
#set(CUDA_ARCHS 10;20;30;35;37;50;52;60;61;70)

SET(CUDA_HOST_COMPILER "${CMAKE_CXX_COMPILER}")
IF(APPLE)
Expand All @@ -92,6 +114,7 @@ IF(INCLUDE_CUDA)
ENDIF()
ENDIF(APPLE)

if(NOT DEFINED SMS)
set(CUDA_ARCHS 10;20;21)

IF("${CUDA_VERSION}" VERSION_GREATER "4.5")
Expand All @@ -118,7 +141,9 @@ IF(INCLUDE_CUDA)
IF("${CUDA_VERSION}" VERSION_GREATER "8.0")
list(APPEND CUDA_ARCHS 70)#8.0+
ENDIF()

else()
set(CUDA_ARCHS ${SMS})
endif()

list(SORT CUDA_ARCHS)

Expand All @@ -133,7 +158,11 @@ IF(INCLUDE_CUDA)

list(APPEND CUDA_NVCC_FLAGS "-gencode arch=compute_${CUDA_HIGHEST_SM},code=compute_${CUDA_HIGHEST_SM}")

MESSAGE(">> CUDA version ${CUDA_VERSION} detected, compiling for Compute Capability/ies ${CUDA_ARCHS} (highest SM: ${CUDA_HIGHEST_SM})")
MESSAGE(">> CUDA version ${CUDA_VERSION} detected, compiling for Compute Capability/ies ${CUDA_ARCHS} (highest SM: ${CUDA_HIGHEST_SM})")

# if(WITH_CXX11_ABI)
# list(APPEND CUDA_NVCC_FLAGS "-D_GLIBCXX_USE_CXX11_ABI=${CXX11_ABI_VALUE}")
# endif()

set(CUDA_NVCC_FLAGS_RELEASE ${CUDA_NVCC_FLAGS_RELEASE};-O2;--use_fast_math)
set(CUDA_NVCC_FLAGS_DEBUG ${CUDA_NVCC_FLAGS_DEBUG};-g;-G)
Expand Down
14 changes: 14 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,20 @@ $ cmake -DCMAKE_INSTALL_PREFIX=/directory/of/your/choice -DBOOST_ROOT=/path/to/b

Here, ```/path/to/boost/root``` should contain the boost libraries and the boost headers.

Benchmarks
----------

The repo contains a small utility (in alpha stage) that can be used to run benchmarks. To enable building it, do:

``` bash
$ cd repo
$ mkdir build
$ cd build
$ cmake -DENABLE_BENCHMARKS=ON ..
$ make
$ ./tests/bench_gpu_convolve
[gpu 0] inplace, 10x, (image 128**3, kernel 3**3) 1.326021s wall, 1.020000s user + 0.300000s system = 1.320000s CPU (99.5%)
```

How to get Help
===============
Expand Down
19 changes: 13 additions & 6 deletions tests/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
INCLUDE_DIRECTORIES(.)

FIND_PACKAGE (Boost 1.42 QUIET COMPONENTS # system filesystem
unit_test_framework REQUIRED)
FIND_PACKAGE (Boost 1.42 QUIET COMPONENTS system filesystem timer unit_test_framework program_options
REQUIRED)
IF(Boost_FOUND)
INCLUDE_DIRECTORIES(${Boost_INCLUDE_DIRS})
LINK_DIRECTORIES(${Boost_LIBRARY_DIRS})
ENDIF()

FIND_PACKAGE(CUDA)


IF(CUDA_FOUND)
INCLUDE_DIRECTORIES(${PROJECT_SOURCE_DIR}/src)
LINK_DIRECTORIES(${PROJECT_BINARY_DIR}/src)
Expand All @@ -17,21 +18,27 @@ CUDA_ADD_EXECUTABLE(test_gpu_convolve test_gpu_convolve.cpp image_stack_utils.cp
CUDA_ADD_EXECUTABLE(test_gpu_numerical_stability test_gpu_numerical_stability.cpp image_stack_utils.cpp)
CUDA_ADD_EXECUTABLE(test_how_cufft_works test_how_cufft_works.cu image_stack_utils.cpp)


IF(Boost_FOUND)

MESSAGE(">> Boost UTF: ${Boost_LIBRARIES} ")
target_link_libraries(test_gpu_convolve ${Boost_LIBRARIES} ${PROJECT_NAME})
MESSAGE(">> Boost UTF: ${Boost_UNIT_TEST_FRAMEWORK_LIBRARY} ")
target_link_libraries(test_gpu_convolve ${Boost_UNIT_TEST_FRAMEWORK_LIBRARY} ${PROJECT_NAME})
set_target_properties(test_gpu_convolve PROPERTIES COMPILE_FLAGS "-DBOOST_TEST_DYN_LINK")

target_link_libraries(test_gpu_numerical_stability ${Boost_LIBRARIES} ${PROJECT_NAME})
target_link_libraries(test_gpu_numerical_stability ${Boost_UNIT_TEST_FRAMEWORK_LIBRARY} ${PROJECT_NAME})
set_target_properties(test_gpu_numerical_stability PROPERTIES COMPILE_FLAGS "-DBOOST_TEST_DYN_LINK")

target_link_libraries(test_how_cufft_works ${Boost_LIBRARIES} )
target_link_libraries(test_how_cufft_works ${Boost_UNIT_TEST_FRAMEWORK_LIBRARY} )
CUDA_ADD_CUFFT_TO_TARGET( test_how_cufft_works )

if(ENABLE_BENCHMARKS)
CUDA_ADD_EXECUTABLE(bench_gpu_convolve bench_gpu_convolve.cu)
target_link_libraries(bench_gpu_convolve ${Boost_TIMER_LIBRARY} ${Boost_PROGRAM_OPTIONS_LIBRARY} ${PROJECT_NAME})
endif()

ENDIF(Boost_FOUND)


ELSE(CUDA_FOUND)
MESSAGE(WARNING "Skipping GPU based tests, CUDA not found\!")
ENDIF(CUDA_FOUND)
Expand Down
59 changes: 59 additions & 0 deletions tests/bench_gpu_convolve.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#include <boost/timer/timer.hpp>
#include <boost/program_options.hpp>

#include "padd_utils.h"

#include "convolution3Dfft.h"
#include "test_utils.hpp"
#include "image_stack_utils.h"
#include "traits.hpp"

#include <vector>
#include <iostream>

using namespace boost::timer;
namespace po = boost::program_options;

int main(int ac, char** av) {

// Declare the supported options.
po::options_description desc("Allowed options");
desc.add_options()
("help", "produce help message")
("image_size", po::value<int>()->default_value(128), "set the 3D image size, so the image will extent sizexsizexsize")
("kernel_size", po::value<int>()->default_value(3), "set the kernel size, so the kernel will extent sizexsizexsize")
("gpu", po::value<int>()->default_value(-1), "gpu device to use, if value=-1, the highest device with highest compute capability is used")
;

po::variables_map vm;
po::store(po::parse_command_line(ac, av, desc), vm);
po::notify(vm);

if (vm.count("help")) {
std::cout << desc << "\n";
return 1;
}

int device_id = vm["gpu"].as<int>();
if(device_id < 0)
device_id = selectDeviceWithHighestComputeCapability();

std::vector<int> image_dims(3,vm["image_size"].as<int>());
std::size_t image_len = std::pow(vm["image_size"].as<int>(),3);
std::vector<float> image(image_len,0.);

std::vector<int> kernel_dims(3,vm["kernel_size"].as<int>());
std::size_t kernel_len = std::pow(vm["kernel_size"].as<int>(),3);
std::vector<float> kernel(kernel_len,0);

cpu_timer timer;
for (int i = 0;i<10;++i){

convolution3DfftCUDAInPlace(&image[0], &image_dims[0] ,
&kernel[0], &kernel_dims[0] ,
device_id);
}
std::cout << "[gpu "<< device_id << "] inplace, 10x, (image "<< image_dims.front() <<"**3, kernel "<< kernel_dims.front() <<"**3)" << timer.format() << '\n';


}