Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 32 additions & 11 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ foreach(p LIB INCLUDE SHARE)
endif()
endforeach()


#########################################################################################################
## CUDA related
# project options
Expand All @@ -75,7 +76,6 @@ IF(INCLUDE_CUDA)
FIND_PACKAGE(CUDA)
IF(CUDA_FOUND)
SET(CUDA_VERBOSE_BUILD ON)
set(CUDA_ARCHS 10;20;30;35;37;50;52;60;61;70)

SET(CUDA_HOST_COMPILER "${CMAKE_CXX_COMPILER}")
IF(APPLE)
Expand All @@ -92,6 +92,8 @@ IF(INCLUDE_CUDA)
ENDIF()
ENDIF(APPLE)

#derived from the default values of the SMS variable in
#<pathto-cuda-sdk-version>/samples/1_Utilities/deviceQuery/Makefile
set(CUDA_ARCHS 10;20;21)

IF("${CUDA_VERSION}" VERSION_GREATER "4.5")
Expand All @@ -112,11 +114,12 @@ IF(INCLUDE_CUDA)
ENDIF()

IF("${CUDA_VERSION}" VERSION_GREATER "7.5")
list(APPEND CUDA_ARCHS 60 61)#8.0+
list(APPEND CUDA_ARCHS 60 61)#8.0 and 8.0+
ENDIF()

IF("${CUDA_VERSION}" VERSION_GREATER "8.0")
list(APPEND CUDA_ARCHS 70)#8.0+
list(APPEND CUDA_ARCHS 70)#9.0 and 9.0+

ENDIF()


Expand All @@ -133,7 +136,7 @@ IF(INCLUDE_CUDA)

list(APPEND CUDA_NVCC_FLAGS "-gencode arch=compute_${CUDA_HIGHEST_SM},code=compute_${CUDA_HIGHEST_SM}")

MESSAGE(">> CUDA version ${CUDA_VERSION} detected, compiling for Compute Capability/ies ${CUDA_ARCHS} (highest SM: ${CUDA_HIGHEST_SM})")
MESSAGE(">> CUDA ${CUDA_VERSION} detected, compiling for Compute Capability/ies ${CUDA_ARCHS} (highest SM: ${CUDA_HIGHEST_SM})")

set(CUDA_NVCC_FLAGS_RELEASE ${CUDA_NVCC_FLAGS_RELEASE};-O2;--use_fast_math)
set(CUDA_NVCC_FLAGS_DEBUG ${CUDA_NVCC_FLAGS_DEBUG};-g;-G)
Expand All @@ -159,15 +162,33 @@ ENDIF(INCLUDE_CUDA)
# add subdirectories
ADD_SUBDIRECTORY(src)

FIND_PACKAGE (Boost 1.42 COMPONENTS system filesystem unit_test_framework thread QUIET)
IF(Boost_FOUND AND ENABLE_TESTING)
unset(Boost_FOUND)
ADD_SUBDIRECTORY(tests)
enable_testing()
include("CTestLists.txt")

#########################################################################################################
## Finding Boost
IF("${CUDA_VERSION}" VERSION_LESS "9.0")
#https://gitlab.kitware.com/cmake/cmake/commit/6297d6c7fd6f80dafe17c0aefeeb0439432aa4b0
FIND_PACKAGE (Boost 1.42 COMPONENTS system filesystem unit_test_framework thread QUIET)

if("${Boost_VERSION_MINOR}" VERSION_SMALLER "66" AND "${Boost_VERSION_MAJOR}" VERSION_EQUAL "1")
#https://svn.boost.org/trac10/ticket/13152
#tests with 1.66.0 showed that the tests can be compiled
set(ENABLE_TESTING OFF)
endif()
ELSE()
message(WARNING "CUDA 9+ detected: boost contains a 'feature' due to nvcc dropping support for the __CUDACC_VER__ macro when going to version 9; switching off tests completely (boost 1.66.0 is the first boost release that works with CUDA 9, but wasn't supported by cmake up until at least 3.10.2)")
ENDIF()


IF(Boost_FOUND)
if(ENABLE_TESTING)
unset(Boost_FOUND)
ADD_SUBDIRECTORY(tests)
enable_testing()
include("CTestLists.txt")
endif()
ELSE()
if(NOT Boost_FOUND)
MESSAGE(">> Boost libraries not found")
MESSAGE(">> Boost libraries not found or deliberately ignored")
endif()
MESSAGE(">> skipping test suite")
ENDIF()
Expand Down
9 changes: 3 additions & 6 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# include directories
# src directory
INCLUDE_DIRECTORIES(.)
# # include directories
# # src directory
# INCLUDE_DIRECTORIES(.)

IF(APPLE)
SET(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -stdlib=libc++")
Expand All @@ -27,9 +27,6 @@ INCLUDE_DIRECTORIES(${PROJECT_BINARY_DIR}/src)
ENDIF()
ENDIF()

TARGET_LINK_LIBRARIES(${PROJECT_NAME}_static ${CUDA_CUDA_LIBRARY})
TARGET_LINK_LIBRARIES(${PROJECT_NAME} ${CUDA_CUDA_LIBRARY})

SET_TARGET_PROPERTIES(${PROJECT_NAME} PROPERTIES PUBLIC_HEADER "${PROJECT_SOURCE_DIR}/src/convolution3Dfft.h;${PROJECT_NAME}_Export.h")
SET_TARGET_PROPERTIES(${PROJECT_NAME}_static PROPERTIES PUBLIC_HEADER "${PROJECT_SOURCE_DIR}/src/convolution3Dfft.h;${PROJECT_NAME}_Export.h")

Expand Down
21 changes: 10 additions & 11 deletions src/standardCUDAfunctions.cu
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
* Author: preibisch
*/
#include "book.h"
#include "cuda.h"

#include "convolution3Dfft.h"

Expand All @@ -17,12 +16,11 @@
int computeCapability = 0;
int meta = 0;
int value = -1;
int major = 0;
int minor = 0;

cudaDeviceProp dp;
for (short devIdx = 0; devIdx < numDevices; ++devIdx) {
cuDeviceComputeCapability(&major, &minor, devIdx);
meta = 10 * major + minor;
cudaGetDeviceProperties(&dp,devIdx);
meta = 10 * dp.major + dp.minor;
if (meta > computeCapability) {
computeCapability = meta;
value = devIdx;
Expand All @@ -34,18 +32,19 @@

int getCUDAcomputeCapabilityMajorVersion(int devCUDA)
{
int major = 0, minor = 0;
cuDeviceComputeCapability(&major, &minor,devCUDA);
cudaDeviceProp dp;
cudaGetDeviceProperties(&dp, devCUDA);

return major;
return dp.major;
}

int getCUDAcomputeCapabilityMinorVersion(int devCUDA)
{
int major = 0, minor = 0;
cuDeviceComputeCapability(&major, &minor,devCUDA);

return minor;
cudaDeviceProp dp;
cudaGetDeviceProperties(&dp, devCUDA);

return dp.minor;
}

int getNumDevicesCUDA()
Expand Down
14 changes: 4 additions & 10 deletions tests/test_how_cufft_works.cu
Original file line number Diff line number Diff line change
@@ -1,12 +1,6 @@
#define BOOST_TEST_DYN_LINK
#define BOOST_TEST_MODULE TEST_HOW_CUFFT_WORKS

#ifdef __CUDACC_VER_MAJOR__
#if __CUDACC_VER_MAJOR__ >= 9
#define __CUDACC_VER__ 90000
#endif
#endif

#include "boost/test/unit_test.hpp"
#include <numeric>
#include <vector>
Expand All @@ -15,16 +9,16 @@
#define FC_TRACE false
#endif


#include "cufft.h"
#include "cufft_test.cuh"

#include "test_utils.hpp"
#include "image_stack_utils.h"
#include "traits.hpp"
#include "book.h"


#include "cufft.h"
#include "cufft_test.cuh"


namespace fourierconvolution {

typedef boost::multi_array<cufftComplex,3> frequ_stack;
Expand Down
58 changes: 29 additions & 29 deletions tests/test_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@ namespace fourierconvolution {
float operator()(){

return float(value);

}

};

template<typename in_type, typename out_type = in_type>
Expand All @@ -24,9 +24,9 @@ namespace fourierconvolution {

out_type value = _first - _second;
return (value*value);

}

};


Expand All @@ -37,50 +37,50 @@ namespace fourierconvolution {

ramp():
value(0){};

float operator()(){

return value++;

}

};


template <typename stack_type, typename value_policy = ramp>
struct stack_fixture {


stack_type stack;
stack_type kernel;

template <typename T>
stack_fixture(const std::vector<T>& _stack_shape,
const std::vector<T>& _kernel_shape):
const std::vector<T>& _kernel_shape):
stack(_stack_shape),
kernel(_kernel_shape){

value_policy operation;
std::fill(kernel.data(),kernel.data()+kernel.num_elements(),0);
std::generate(stack.data(),stack.data()+stack.num_elements(),operation);

}

};



};

template <typename stack_type>
double l2norm(const stack_type& _reference, const stack_type& _data){
double l2norm = std::inner_product(_data.data(),
_data.data() + _data.num_elements(),
_reference.data(),
0.,
std::plus<double>(),
fourierconvolution::diff_squared<float,double>()
);
_data.data() + _data.num_elements(),
_reference.data(),
0.,
std::plus<double>(),
fourierconvolution::diff_squared<float,double>()
);

double value = std::sqrt(l2norm)/_data.num_elements();

Expand All @@ -90,18 +90,18 @@ double l2norm(const stack_type& _reference, const stack_type& _data){
template <typename stack_type>
double l2norm_by_nvidia(const stack_type& _reference, const stack_type& _data){
double l2norm = std::inner_product(_data.data(),
_data.data() + _data.num_elements(),
_reference.data(),
0.,
std::plus<double>(),
fourierconvolution::diff_squared<float,double>()
);
_data.data() + _data.num_elements(),
_reference.data(),
0.,
std::plus<double>(),
fourierconvolution::diff_squared<float,double>()
);

double reference = std::inner_product(_data.data(),
_data.data() + _data.num_elements(),
_data.data(),
0.);
_data.data() + _data.num_elements(),
_data.data(),
0.);

double value = std::sqrt(l2norm)/std::sqrt(reference);

return value;
Expand Down