Skip to content

Commit 4b1f201

Browse files
committed
Add TBB
1 parent 1d2fe4d commit 4b1f201

File tree

2 files changed

+112
-73
lines changed

2 files changed

+112
-73
lines changed

CMakeLists.txt

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,21 @@ if(UGU_USE_OPENMP)
266266
endif()
267267
endif()
268268

269+
option(UGU_USE_TBB "Use TBB" ON)
270+
message("UGU_USE_TBB: ${UGU_USE_TBB}")
271+
if(UGU_USE_TBB)
272+
find_package(TBB CONFIG)
273+
message("TBB_FOUND: ${TBB_FOUND}")
274+
if(TBB_FOUND)
275+
set(Ugu_DEFINES ${Ugu_DEFINES} UGU_USE_TBB)
276+
set(Ugu_INCLUDE_DIRS ${Ugu_INCLUDE_DIRS} ${TBB_INCLUDE_DIRS})
277+
set(Ugu_LIBS ${Ugu_LIBS} TBB::tbb TBB::tbbmalloc)
278+
else()
279+
message(WARNING "TBB not found. Disable UGU_USE_TBB")
280+
set(UGU_USE_TBB OFF)
281+
endif()
282+
endif()
283+
269284
option(UGU_USE_CUDA "Use CUDA" OFF)
270285
message("UGU_USE_CUDA: ${UGU_USE_CUDA}")
271286
if(UGU_USE_CUDA)
@@ -503,7 +518,15 @@ if(UGU_USE_CUDA)
503518
CUDA_SEPARABLE_COMPILATION ON
504519
CUDA_RESOLVE_DEVICE_SYMBOLS ON
505520
)
506-
target_link_libraries(${Ugu_LIB} my_cuda_flags)
521+
set(LIBS_LINED_TO_Ugu ${LIBS_LINED_TO_Ugu} my_cuda_flags)
522+
endif()
523+
524+
if(UGU_USE_TBB)
525+
set(LIBS_LINED_TO_Ugu ${LIBS_LINED_TO_Ugu} TBB::tbb TBB::tbbmalloc)
526+
endif()
527+
528+
if (UGU_USE_CUDA OR UGU_USE_TBB)
529+
target_link_libraries(${Ugu_LIB} PUBLIC ${LIBS_LINED_TO_Ugu})
507530
endif()
508531

509532
set_as_cache(Ugu_LIBS "${Ugu_LIBS}" "Ugu_LIBS")

src/util/geom_util.cc

Lines changed: 88 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -290,12 +290,98 @@ struct EdgeHash {
290290
}
291291
};
292292

293-
static inline uint64_t MakeEdgeKey(int a, int b) {
293+
inline uint64_t MakeEdgeKey(int a, int b) {
294294
uint32_t v0 = static_cast<uint32_t>(std::min(a, b));
295295
uint32_t v1 = static_cast<uint32_t>(std::max(a, b));
296296
return (static_cast<uint64_t>(v0) << 32) | v1;
297297
}
298298

299+
void BuildFaceAdjacencyCSRParallel_TwoPass(
300+
const std::vector<Eigen::Vector3i>& faces, std::vector<int>& offsets,
301+
std::vector<int>& neighbors) {
302+
int num_faces = static_cast<int>(faces.size());
303+
if (num_faces == 0) {
304+
offsets.assign(1, 0);
305+
neighbors.clear();
306+
return;
307+
}
308+
int E = num_faces * 3;
309+
310+
// 1. Make edge list
311+
std::vector<uint64_t> edgeKeys(E);
312+
std::vector<int> faceIds(E);
313+
for (int fid = 0; fid < num_faces; ++fid) {
314+
const auto& f = faces[fid];
315+
int idx = fid * 3;
316+
edgeKeys[idx + 0] = MakeEdgeKey(f[0], f[1]);
317+
faceIds[idx + 0] = fid;
318+
edgeKeys[idx + 1] = MakeEdgeKey(f[1], f[2]);
319+
faceIds[idx + 1] = fid;
320+
edgeKeys[idx + 2] = MakeEdgeKey(f[2], f[0]);
321+
faceIds[idx + 2] = fid;
322+
}
323+
324+
// 2. Sort
325+
std::vector<int> idx(E);
326+
std::iota(idx.begin(), idx.end(), 0);
327+
#if !defined(_WIN32) && !defined(UGU_USE_TBB)
328+
// This case, std::execution::par_unseq may depend on TBB but not linked
329+
std::sort(idx.begin(), idx.end(),
330+
[&](int a, int b) { return edgeKeys[a] < edgeKeys[b]; });
331+
#else
332+
std::sort(std::execution::par_unseq, idx.begin(), idx.end(),
333+
[&](int a, int b) { return edgeKeys[a] < edgeKeys[b]; });
334+
#endif
335+
336+
// 3. Pass1: Count neighbors per face
337+
offsets.assign(num_faces + 1, 0);
338+
for (int p = 0; p < E;) {
339+
int q = p + 1;
340+
uint64_t key = edgeKeys[idx[p]];
341+
while (q < E && edgeKeys[idx[q]] == key) ++q;
342+
343+
// The number of faces sharing this edge is k = q - p
344+
// So, each face in this group is adjacent to (k - 1) others
345+
if (q - p > 1) {
346+
for (int i = p; i < q; ++i) {
347+
offsets[faceIds[idx[i]] + 1] += (q - p - 1);
348+
}
349+
}
350+
p = q;
351+
}
352+
353+
// Compute cumulative sum
354+
for (int i = 1; i <= num_faces; ++i) {
355+
offsets[i] += offsets[i - 1];
356+
}
357+
358+
// 4. Pass2: Fill neighbors array
359+
neighbors.resize(offsets[num_faces]);
360+
std::vector<int> current_pos = offsets; // Copy start positions for each face
361+
362+
for (int p = 0; p < E;) {
363+
int q = p + 1;
364+
uint64_t key = edgeKeys[idx[p]];
365+
while (q < E && edgeKeys[idx[q]] == key) ++q;
366+
367+
if (q - p > 1) {
368+
for (int i = p; i < q; ++i) {
369+
for (int j = p; j < q; ++j) {
370+
if (i == j) continue;
371+
372+
int face_from = faceIds[idx[i]];
373+
int face_to = faceIds[idx[j]];
374+
375+
// Write the adjacent face to the corresponding position and advance
376+
// the pointer
377+
neighbors[current_pos[face_from]++] = face_to;
378+
}
379+
}
380+
}
381+
p = q;
382+
}
383+
}
384+
299385
} // namespace
300386

301387
namespace ugu {
@@ -1874,77 +1960,7 @@ void BuildFaceAdjacencyCSR(const std::vector<Eigen::Vector3i>& faces,
18741960
void BuildFaceAdjacencyCSRParallel(const std::vector<Eigen::Vector3i>& faces,
18751961
std::vector<int>& offsets,
18761962
std::vector<int>& neighbors) {
1877-
int num_faces = (int)faces.size();
1878-
int E = num_faces * 3;
1879-
1880-
// 1) Make Edge list
1881-
std::vector<uint64_t> edgeKeys(E);
1882-
std::vector<int> faceIds(E);
1883-
for (int fid = 0; fid < num_faces; ++fid) {
1884-
const auto& f = faces[fid];
1885-
int idx = fid * 3;
1886-
edgeKeys[idx + 0] = MakeEdgeKey(f[0], f[1]);
1887-
faceIds[idx + 0] = fid;
1888-
edgeKeys[idx + 1] = MakeEdgeKey(f[1], f[2]);
1889-
faceIds[idx + 1] = fid;
1890-
edgeKeys[idx + 2] = MakeEdgeKey(f[2], f[0]);
1891-
faceIds[idx + 2] = fid;
1892-
}
1893-
1894-
// 2) Apply parallel sort for index array
1895-
// https://qiita.com/Nabetani/items/2dc2264764e2c68e7bcf
1896-
std::vector<int> idx(E);
1897-
std::iota(idx.begin(), idx.end(), 0);
1898-
1899-
// FIXME!:
1900-
// Depending on the environment, linux may fail to link TBB if
1901-
// std::execution::par_unseq was set...
1902-
#ifdef _WIN32
1903-
std::sort(std::execution::par_unseq, idx.begin(), idx.end(),
1904-
[&](int a, int b) { return edgeKeys[a] < edgeKeys[b]; });
1905-
#else
1906-
std::sort(idx.begin(), idx.end(),
1907-
[&](int a, int b) { return edgeKeys[a] < edgeKeys[b]; });
1908-
#endif
1909-
1910-
// 3) Grouping and collect adjacent pairs
1911-
std::vector<std::pair<int, int>> adjPairs;
1912-
adjPairs.reserve(E);
1913-
for (int p = 0; p < E;) {
1914-
int q = p + 1;
1915-
uint64_t key = edgeKeys[idx[p]];
1916-
while (q < E && edgeKeys[idx[q]] == key) ++q;
1917-
// Enumrate all combinations in the group
1918-
for (int i = p; i < q; ++i) {
1919-
for (int j = p; j < q; ++j) {
1920-
if (i != j) adjPairs.emplace_back(faceIds[idx[i]], faceIds[idx[j]]);
1921-
}
1922-
}
1923-
p = q;
1924-
}
1925-
1926-
// 4) Remove duplication
1927-
1928-
// FIXME!:
1929-
// Depending on the environment, linux may fail to link TBB if
1930-
// std::execution::par_unseq was set...
1931-
#ifdef _WIN32
1932-
std::sort(std::execution::par_unseq, adjPairs.begin(), adjPairs.end());
1933-
#else
1934-
std::sort(adjPairs.begin(), adjPairs.end());
1935-
#endif
1936-
adjPairs.erase(std::unique(adjPairs.begin(), adjPairs.end()), adjPairs.end());
1937-
1938-
// 5) Convert to CSR format
1939-
offsets.assign(num_faces + 1, 0);
1940-
for (auto& pr : adjPairs) offsets[pr.first + 1]++;
1941-
for (int i = 1; i <= num_faces; ++i) offsets[i] += offsets[i - 1];
1942-
neighbors.resize(adjPairs.size());
1943-
std::vector<int> ptr = offsets;
1944-
for (auto& pr : adjPairs) {
1945-
int f = pr.first;
1946-
neighbors[ptr[f]++] = pr.second;
1947-
}
1963+
BuildFaceAdjacencyCSRParallel_TwoPass(faces, offsets, neighbors);
19481964
}
19491965

19501966
} // namespace ugu

0 commit comments

Comments
 (0)