Skip to content

Commit 5aa4efe

Browse files
committed
- initial commit
1 parent 88814e1 commit 5aa4efe

28 files changed

+2306
-0
lines changed

CMakeLists.txt

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
cmake_minimum_required(VERSION 3.12)
2+
#Requires cmake 3.12 for first class cuda support with visual studio
3+
4+
project(cuNSearch LANGUAGES CXX CUDA)
5+
# Visual studio solution directories.
6+
set_property(GLOBAL PROPERTY USE_FOLDERS on)
7+
8+
9+
option(CUNSEARCH_USE_DOUBLE_PRECISION "Use double precision." ON)
10+
11+
if(CUNSEARCH_USE_DOUBLE_PRECISION)
12+
message(STATUS "cuNSearch::Real = double")
13+
else()
14+
message(STATUS "cuNSearch::Real = float")
15+
endif(CUNSEARCH_USE_DOUBLE_PRECISION)
16+
17+
if(CUNSEARCH_USE_DOUBLE_PRECISION)
18+
add_compile_options(-DCUNSEARCH_USE_DOUBLE_PRECISION)
19+
endif(CUNSEARCH_USE_DOUBLE_PRECISION)
20+
21+
set(CMAKE_CXX_STANDARD 11)
22+
set(CMAKE_CXX_STANDARD_REQUIRED ON)
23+
SET(CMAKE_DEBUG_POSTFIX "_d")
24+
25+
find_package(CUDA 9.0 REQUIRED)
26+
27+
set (INCLUDE_HEADERS
28+
include/PointSet.h
29+
include/ActivationTable.h
30+
include/Common.h
31+
include/cuNSearch.h
32+
)
33+
34+
set (HEADER_FILES
35+
src/Types.h
36+
src/cuNSearchDeviceData.h
37+
src/GridInfo.h
38+
src/NotImplementedException.h
39+
src/PointSetImplementation.h
40+
src/cuNSearchKernels.cuh
41+
src/helper_linearIndex.h
42+
src/helper_mortonCode.h
43+
Utils/cuda_helper.h
44+
Utils/Timing.h
45+
Utils/IDFactory.h
46+
)
47+
48+
set (SOURCE_FILES
49+
src/PointSet.cpp
50+
src/PointSetImplementation.cu
51+
src/cuNSearch.cu
52+
src/cuNSearchDeviceData.cu
53+
src/cuNSearchKernels.cu
54+
Utils/cuda_helper.cpp
55+
Utils/Timing.cpp
56+
Utils/IDFactory.cpp
57+
)
58+
59+
include_directories(
60+
"include"
61+
"Utils"
62+
${CUDA_INCLUDE_DIRS}
63+
${CUDA_CUT_INCLUDE_DIR}
64+
)
65+
66+
67+
add_library(cuNSearch STATIC ${INCLUDE_HEADERS} ${HEADER_FILES} ${SOURCE_FILES})
68+
69+
target_link_libraries(cuNSearch ${CUDA_LIBRARIES})
70+
target_compile_definitions(cuNSearch PUBLIC $<$<CONFIG:DEBUG>:DEBUG>)
71+
72+
install(FILES ${INCLUDE_HEADERS}
73+
DESTINATION include/)
74+
75+
install(TARGETS cuNSearch
76+
RUNTIME DESTINATION bin
77+
LIBRARY DESTINATION lib
78+
ARCHIVE DESTINATION lib
79+
)
80+
81+
option(BUILD_DEMO "Build example of how to use this library."
82+
ON)
83+
if(BUILD_DEMO)
84+
add_subdirectory(demo)
85+
endif(BUILD_DEMO)
86+
87+
88+
unset(USE_DOUBLE_PRECISION CACHE)

README.md

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
# cuNSearch
2+
A C++/CUDA library to efficiently compute neighborhood information on the GPU for 3D point clouds within a fixed radius. Suitable for many applications, e.g. neighborhood search for SPH fluid simulations.
3+
4+
The library interface is similar to CompactNSearch (https://github.com/InteractiveComputerGraphics/CompactNSearch)
5+
## Libraries using cuNSearch
6+
7+
* [SPlisHSPlasH](https://github.com/InteractiveComputerGraphics/SPlisHSPlasH) - A C++ library for the physically-based simulation of fluids using Smoothed Particle Hydrodynamics (see screenshot)
8+
9+
![](images/screenshot.jpg)
10+
11+
## Build Instructions
12+
13+
This project is based on [CMake](https://cmake.org/). Simply generate project, Makefiles, etc. using [CMake](https://cmake.org/) and compile the project with the compiler of your choice.
14+
15+
Requirements:
16+
- CMake 3.12
17+
- CUDA SDK 9.0 or newer
18+
- C++ 11
19+
20+
The code was tested with the following configurations:
21+
- Windows 10 64-bit, CMake 3.12.3, Visual Studio 2017, CUDA SDK 10.1
22+
- Debian 9 64-bit, CMake 3.12.3, GCC 6.3.0, CUDA SDK 9.2
23+
24+
25+
## Usage
26+
A data structure to perform a neighborhood search can be created by calling the constructor given a fixed search radius ```r```.
27+
```c++
28+
cuNSearch::NeighborhoodSearch nsearch(r);
29+
```
30+
An arbitrary number of point clouds can then be added to the data structure using the method ```add_point_set```. The library expects the point positions to be contiguously stored in an array-like structure. The method will return a unique id associated with the initialized point set.
31+
```c++
32+
std::vector<std::array<Real, 3>> positions;
33+
// ... Fill array with 3 * n real numbers representing three-dimensional point positions.
34+
unsigned int point_set_id = nsearch.add_point_set(positions.front().data(), positions.size());
35+
nsearch.find_neighbors();
36+
```
37+
In order to generate the neighborhood information simply execute the following command
38+
```c++
39+
nsearch.find_neighbors();
40+
```
41+
Finally, the neighborhood information can be accessed as follows
42+
```c++
43+
PointSet const& ps = nsearch.point_set(point_set_id);
44+
for (int i = 0; i < ps.n_points(); ++i)
45+
{
46+
for (int j = 0; j < ps.n_neighbors(i); ++j)
47+
{
48+
// Return PointID of the jth neighbor of the ith particle in the 0th point set.
49+
PointID const& pid = ps.neighbor(0, i, j);
50+
// ...
51+
// Do whatever you want with the point id. The id contains two indices.
52+
// The first field pid.point_set_id represents the unique point set id returnd by add_point_set.
53+
// The second field pid.point_id stands for the index of the neighboring particle within
54+
// the containing point set.
55+
// ...
56+
}
57+
}
58+
```
59+
60+
Besides the basic functionality the library offers to compute a rule for reordering the points according to a space-filling Z curve. The reordering will improve the performance of future neighborhood queries and accesses. The rule can be computed via
61+
```c++
62+
nsearch.z_sort();
63+
```
64+
Please note that the actual reordering must be invoked by the user by
65+
```c++
66+
ps.sort_field(positions.data());
67+
```
68+
Assuming that there is additional information stored per-point (e.g. velocity, color, mass etc.) the information **must** also be reorded using the same method to maintain consistency. Subsequently, the ```find_neighbors``` function has to be invoked again to update the neighborhood information.
69+
70+
Another self-explaining (benchmark) [demo](demo/main.cu) is contained in the project.
71+
72+
## Activation Table
73+
74+
When maintaining multiple it is sometimes desired that only certain point sets can find points from other point sets. Therefore an activation table is implemented where the user can specify whether a point set i searches points in another point set j. When nothing else is specified all point sets will search points in all other point sets. The activation table can be modified with e.g.
75+
```c++
76+
nsearch.set_active(i, j, false)
77+
```
78+
79+
## Common mistakes and issues
80+
81+
Visual Studio may not detect changes in ".cu" files.
82+
83+
Use of thrust library in cpp files: Some thrust classes can only be used when the file is compiled by the nvidia compiler nvcc.
84+
This is usually solved by change the file ending to .cu to mark the file for the nvcc compiler.
85+
86+
## References
87+
88+
* R. Hoetzlein, 2014. "Fast Fixed-Radius Nearest Neighbors: Interactive Million-Particle Fluids", GPU Technology Conference (GTC), Santa Clara, CA.

Utils/IDFactory.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
#include "IDFactory.h"
2+
3+
using namespace cuNSearch;
4+
5+
int IDFactory::id = 0;
6+

Utils/IDFactory.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
#pragma once
2+
3+
namespace cuNSearch
4+
{
5+
/** Factory for unique ids.
6+
*/
7+
class IDFactory
8+
{
9+
private:
10+
/** Current id */
11+
static int id;
12+
13+
public:
14+
static int getId() { return id++; }
15+
};
16+
}
17+

Utils/Timing.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
#include "Timing.h"
2+
3+
using namespace cuNSearch;
4+
5+
std::unordered_map<int, AverageTime> Timing::m_averageTimes;
6+
std::stack<TimingHelper> Timing::m_timingStack;
7+
bool Timing::m_dontPrintTimes = false;
8+
unsigned int Timing::m_startCounter = 0;
9+
unsigned int Timing::m_stopCounter = 0;

Utils/Timing.h

Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
#ifndef __TIMING_H__
2+
#define __TIMING_H__
3+
4+
#if defined(WIN32) || defined(_WIN32) || defined(WIN64)
5+
#define FORCE_INLINE __forceinline
6+
#else
7+
#define FORCE_INLINE __attribute__((always_inline))
8+
#endif
9+
10+
#include <iostream>
11+
#include <stack>
12+
#include <unordered_map>
13+
//#include "Common/Common.h"
14+
15+
#include <chrono>
16+
#include "IDFactory.h"
17+
18+
namespace cuNSearch
19+
{
20+
struct TimingHelper
21+
{
22+
std::chrono::time_point<std::chrono::high_resolution_clock> start;
23+
std::string name;
24+
};
25+
26+
struct AverageTime
27+
{
28+
double totalTime;
29+
unsigned int counter;
30+
std::string name;
31+
};
32+
33+
class Timing
34+
{
35+
public:
36+
static bool m_dontPrintTimes;
37+
static unsigned int m_startCounter;
38+
static unsigned int m_stopCounter;
39+
static std::stack<TimingHelper> m_timingStack;
40+
static std::unordered_map<int, AverageTime> m_averageTimes;
41+
42+
static void reset()
43+
{
44+
while (!m_timingStack.empty())
45+
m_timingStack.pop();
46+
m_averageTimes.clear();
47+
m_startCounter = 0;
48+
m_stopCounter = 0;
49+
}
50+
51+
FORCE_INLINE static void startTiming(const std::string& name = std::string(""))
52+
{
53+
TimingHelper h;
54+
h.start = std::chrono::high_resolution_clock::now();
55+
h.name = name;
56+
Timing::m_timingStack.push(h);
57+
Timing::m_startCounter++;
58+
}
59+
60+
FORCE_INLINE static double stopTiming(bool print = true)
61+
{
62+
if (!Timing::m_timingStack.empty())
63+
{
64+
Timing::m_stopCounter++;
65+
std::chrono::time_point<std::chrono::high_resolution_clock> stop = std::chrono::high_resolution_clock::now();
66+
TimingHelper h = Timing::m_timingStack.top();
67+
Timing::m_timingStack.pop();
68+
std::chrono::duration<double> elapsed_seconds = stop - h.start;
69+
double t = elapsed_seconds.count() * 1000.0;
70+
71+
if (print)
72+
std::cout << "time " << h.name.c_str() << ": " << t << " ms\n" << std::flush;
73+
return t;
74+
}
75+
return 0;
76+
}
77+
78+
FORCE_INLINE static double stopTiming(bool print, int &id)
79+
{
80+
if (id == -1)
81+
id = IDFactory::getId();
82+
if (!Timing::m_timingStack.empty())
83+
{
84+
Timing::m_stopCounter++;
85+
std::chrono::time_point<std::chrono::high_resolution_clock> stop = std::chrono::high_resolution_clock::now();
86+
TimingHelper h = Timing::m_timingStack.top();
87+
Timing::m_timingStack.pop();
88+
89+
std::chrono::duration<double> elapsed_seconds = stop - h.start;
90+
double t = elapsed_seconds.count() * 1000.0;
91+
92+
if (print && !Timing::m_dontPrintTimes)
93+
std::cout << "time " << h.name.c_str() << ": " << t << " ms\n" << std::flush;
94+
95+
if (id >= 0)
96+
{
97+
std::unordered_map<int, AverageTime>::iterator iter;
98+
iter = Timing::m_averageTimes.find(id);
99+
if (iter != Timing::m_averageTimes.end())
100+
{
101+
Timing::m_averageTimes[id].totalTime += t;
102+
Timing::m_averageTimes[id].counter++;
103+
}
104+
else
105+
{
106+
AverageTime at;
107+
at.counter = 1;
108+
at.totalTime = t;
109+
at.name = h.name;
110+
Timing::m_averageTimes[id] = at;
111+
}
112+
}
113+
return t;
114+
}
115+
return 0;
116+
}
117+
118+
FORCE_INLINE static void printAverageTimes()
119+
{
120+
std::unordered_map<int, AverageTime>::iterator iter;
121+
for (iter = Timing::m_averageTimes.begin(); iter != Timing::m_averageTimes.end(); iter++)
122+
{
123+
AverageTime &at = iter->second;
124+
const double avgTime = at.totalTime / at.counter;
125+
std::cout << "Average time " << at.name.c_str() << ": " << avgTime << " ms\n" << std::flush;
126+
}
127+
if (Timing::m_startCounter != Timing::m_stopCounter)
128+
std::cout << "Problem: " << Timing::m_startCounter << " calls of startTiming and " << Timing::m_stopCounter << " calls of stopTiming.\n " << std::flush;
129+
std::cout << "---------------------------------------------------------------------------\n\n";
130+
}
131+
132+
FORCE_INLINE static void printTimeSums()
133+
{
134+
std::unordered_map<int, AverageTime>::iterator iter;
135+
for (iter = Timing::m_averageTimes.begin(); iter != Timing::m_averageTimes.end(); iter++)
136+
{
137+
AverageTime &at = iter->second;
138+
const double timeSum = at.totalTime;
139+
std::cout << "Time sum " << at.name.c_str() << ": " << timeSum << " ms\n" << std::flush;
140+
}
141+
if (Timing::m_startCounter != Timing::m_stopCounter)
142+
std::cout << "Problem: " << Timing::m_startCounter << " calls of startTiming and " << Timing::m_stopCounter << " calls of stopTiming.\n " << std::flush;
143+
std::cout << "---------------------------------------------------------------------------\n\n";
144+
}
145+
};
146+
147+
}
148+
149+
#endif

0 commit comments

Comments
 (0)