@@ -2418,11 +2418,10 @@ __device__ const int edgeCorners[12][2] = {
24182418};
24192419
24202420__global__ void BuildVerticesKernel (const VoxelCudaNaive* voxels, float3 bb_min,
2421- float3 resolution,
2422- int3 vn, // voxel_num
2423- float iso_level, int * d_edgeVertexIds,
2424- int * d_vtxCounter, float3 * d_vertices,
2425- float weight, int max_vertices_num) {
2421+ float3 resolution, int3 vn, float iso_level,
2422+ int * d_edgeVertexIds, int * d_vtxCounter,
2423+ float3 * d_vertices, float weight,
2424+ int max_vertices_num, int * d_overflow) {
24262425 int ix = blockIdx .x * blockDim .x + threadIdx .x ;
24272426 int iy = blockIdx .y * blockDim .y + threadIdx .y ;
24282427 int iz = blockIdx .z * blockDim .z + threadIdx .z ;
@@ -2434,51 +2433,57 @@ __global__ void BuildVerticesKernel(const VoxelCudaNaive* voxels, float3 bb_min,
24342433 int edges = d_edgeTable[cubeIndex];
24352434 if (edges == 0 ) return ;
24362435
2437- // ベースとなる flat インデックス
2438- // int baseFlat = iz * vn.y * vn.x + iy * vn.x + ix;
2439-
24402436 for (int e = 0 ; e < 12 ; e++) {
24412437 if (!(edges & (1 << e))) continue ;
24422438
2443- // 1) エッジキーを計算して頂点IDテーブルを予約
24442439 int key = computeEdgeKey (ix, iy, iz, e, vn.x , vn.y , vn.z );
2440+
2441+ // Acquire ownership of this edge.
24452442 int old = atomicCAS (&d_edgeVertexIds[key], -1 , 0 );
2446- if (old != -1 ) continue ; // 既に誰かが生成済み
2443+ if (old != -1 ) continue ; // someone else already owns or created it
24472444
2448- // 2) 新しい頂点ID を確保
24492445 int vid = atomicAdd (d_vtxCounter, 1 );
24502446
2451- if (max_vertices_num < vid) {
2447+ // IMPORTANT: bounds check must be vid >= max_vertices_num (0-based).
2448+ if (vid >= max_vertices_num) {
2449+ // Roll back the lock so future retries won't see a stale 0.
2450+ atomicExch (&d_edgeVertexIds[key], -1 );
2451+ atomicExch (d_overflow, 1 );
24522452 return ;
24532453 }
24542454
2455+ // Publish the vertex id.
24552456 atomicExch (&d_edgeVertexIds[key], vid);
24562457
2457- // 3) エッジに対応する 2 つのコーナー番号
24582458 int c0_id = edgeCorners[e][0 ];
24592459 int c1_id = edgeCorners[e][1 ];
24602460
2461- // 4) それぞれのコーナーのグリッド座標 (gx,gy,gz) を計算
24622461 int gx0 = ix + cornerOffset[c0_id][0 ];
24632462 int gy0 = iy + cornerOffset[c0_id][1 ];
24642463 int gz0 = iz + cornerOffset[c0_id][2 ];
24652464 int gx1 = ix + cornerOffset[c1_id][0 ];
24662465 int gy1 = iy + cornerOffset[c1_id][1 ];
24672466 int gz1 = iz + cornerOffset[c1_id][2 ];
24682467
2469- // 5) flat index に戻す(必要なら使わずに直接 sdf 参照も可)
24702468 int flat0 = gz0 * vn.y * vn.x + gy0 * vn.x + gx0;
24712469 int flat1 = gz1 * vn.y * vn.x + gy1 * vn.x + gx1;
24722470
2473- // 6) 実ワールド座標を計算
2471+ // Optional safety: avoid division by zero if update_num can be 0.
2472+ int u0 = voxels[flat0].update_num ;
2473+ int u1 = voxels[flat1].update_num ;
2474+ if (u0 <= 0 || u1 <= 0 || weight == 0 .0f ) {
2475+ // Invalidate this edge entry and signal failure (safe fallback).
2476+ atomicExch (&d_edgeVertexIds[key], -1 );
2477+ atomicExch (d_overflow, 1 );
2478+ return ;
2479+ }
2480+
24742481 float3 p1 = voxel_idx2pos (make_int3 (gx0, gy0, gz0), bb_min, resolution);
24752482 float3 p2 = voxel_idx2pos (make_int3 (gx1, gy1, gz1), bb_min, resolution);
24762483
2477- // 7) SDF 値を取得
2478- float v1 = voxels[flat0].sdf_sum / float (voxels[flat0].update_num * weight);
2479- float v2 = voxels[flat1].sdf_sum / float (voxels[flat1].update_num * weight);
2484+ float v1 = voxels[flat0].sdf_sum / float (u0 * weight);
2485+ float v2 = voxels[flat1].sdf_sum / float (u1 * weight);
24802486
2481- // 8) 線形補間で頂点位置を求めて書き込み
24822487 d_vertices[vid] = VertexInterp (p1, p2, v1, v2, iso_level);
24832488 }
24842489}
@@ -2487,36 +2492,53 @@ __global__ void BuildVerticesKernel(const VoxelCudaNaive* voxels, float3 bb_min,
24872492__global__ void BuildFacesKernel (const VoxelCudaNaive* voxels, float3 bb_min,
24882493 float3 resolution, int3 vn, float iso_level,
24892494 int * d_edgeVertexIds, int * d_idxCounter,
2490- int * d_faces, int max_faces) {
2495+ int * d_faces, int max_faces, int * d_overflow ) {
24912496 int ix = blockIdx .x * blockDim .x + threadIdx .x ;
24922497 int iy = blockIdx .y * blockDim .y + threadIdx .y ;
24932498 int iz = blockIdx .z * blockDim .z + threadIdx .z ;
24942499 if (ix >= vn.x - 1 || iy >= vn.y - 1 || iz >= vn.z - 1 ) return ;
2500+
24952501 int cubeIndex =
24962502 calcCubeIndex (voxels, ix, iy, iz, vn, bb_min, resolution, iso_level);
24972503 if (cubeIndex < 0 ) return ;
2504+
24982505 int * tri = (int *)(&d_triTable[cubeIndex][0 ]);
24992506 for (int i = 0 ; tri[i] != -1 ; i += 3 ) {
25002507 int e0 = tri[i], e1 = tri[i + 1 ], e2 = tri[i + 2 ];
2508+
25012509 int k0 = computeEdgeKey (ix, iy, iz, e0 , vn.x , vn.y , vn.z );
25022510 int k1 = computeEdgeKey (ix, iy, iz, e1 , vn.x , vn.y , vn.z );
25032511 int k2 = computeEdgeKey (ix, iy, iz, e2 , vn.x , vn.y , vn.z );
2512+
25042513 int v0 = d_edgeVertexIds[k0];
25052514 int v1 = d_edgeVertexIds[k1];
25062515 int v2 = d_edgeVertexIds[k2];
2516+
2517+ // Optional safety: if any edge is missing/locked, fail fast.
2518+ if (v0 < 0 || v1 < 0 || v2 < 0 ) {
2519+ atomicExch (d_overflow, 1 );
2520+ return ;
2521+ }
2522+
25072523 int idx = atomicAdd (d_idxCounter, 3 );
2508- if (max_faces < idx) {
2524+
2525+ // IMPORTANT: idx writes idx, idx+1, idx+2 (0-based).
2526+ if (idx + 2 >= max_faces) {
2527+ atomicExch (d_overflow, 1 );
25092528 return ;
25102529 }
2530+
25112531 d_faces[idx + 0 ] = v2;
25122532 d_faces[idx + 1 ] = v1;
25132533 d_faces[idx + 2 ] = v0;
25142534 }
25152535}
2536+
25162537__global__ void BuildFacesKernelWithNormal (
25172538 const VoxelCudaNaive* voxels, float3 bb_min, float3 resolution, int3 vn,
25182539 float iso_level, int * d_edgeVertexIds, int * d_idxCounter, int * d_faces,
2519- float3 * d_vertices, float3 * d_face_normals, int max_faces) {
2540+ float3 * d_vertices, float3 * d_face_normals, int max_faces,
2541+ int * d_overflow) {
25202542 int ix = blockIdx .x * blockDim .x + threadIdx .x ;
25212543 int iy = blockIdx .y * blockDim .y + threadIdx .y ;
25222544 int iz = blockIdx .z * blockDim .z + threadIdx .z ;
@@ -2534,7 +2556,8 @@ __global__ void BuildFacesKernelWithNormal(
25342556 int v1 = d_edgeVertexIds[k1];
25352557 int v2 = d_edgeVertexIds[k2];
25362558 int idx = atomicAdd (d_idxCounter, 3 );
2537- if (max_faces < idx) {
2559+ if (idx + 2 >= max_faces) {
2560+ atomicExch (d_overflow, 1 );
25382561 return ;
25392562 }
25402563 d_faces[idx + 0 ] = v2;
@@ -2959,6 +2982,9 @@ class VoxelGridCudaNaive::Impl {
29592982 cudaMemcpyToSymbol (c_voxel_num, &voxel_num_, sizeof (int3 ));
29602983 cudaMemcpyToSymbol (c_trunc, &option_.truncation_band , sizeof (float ));
29612984
2985+ cudaMalloc (&d_overflow_, sizeof (int ));
2986+ cudaMemset (d_overflow_, 0 , sizeof (int ));
2987+
29622988 return true ;
29632989 }
29642990
@@ -3331,18 +3357,28 @@ class VoxelGridCudaNaive::Impl {
33313357 constexpr float iso_level = 0 .f ;
33323358 constexpr int tri_ratio = 2 ;
33333359 while (true ) {
3360+ cudaMemset (d_overflow_, 0 , sizeof (int ));
3361+
33343362 BuildVerticesKernel<<<grid, block>>> (
33353363 d_voxels_, bb_min_, resolution_, voxel_num_, iso_level,
3336- d_edgeVertexIds, d_vtxCounter, d_vertices, option_.weight , max_tris_);
3364+ d_edgeVertexIds, d_vtxCounter, d_vertices, option_.weight , max_tris_,
3365+ d_overflow_);
3366+
33373367 checkCudaErrors (cudaGetLastError ());
33383368 checkCudaErrors (cudaDeviceSynchronize ());
3369+
3370+ int h_overflow = 0 ;
3371+ cudaMemcpy (&h_overflow, d_overflow_, sizeof (int ), cudaMemcpyDeviceToHost);
3372+
33393373 int h_vcount = 0 ;
33403374 cudaMemcpy (&h_vcount, d_vtxCounter, sizeof (int ), cudaMemcpyDeviceToHost);
3341- if (h_vcount < max_tris_) {
3375+
3376+ if (h_overflow == 0 && h_vcount <= max_tris_) {
33423377 num_vertices_ = h_vcount;
33433378 break ;
33443379 }
3345- // If memory is not enough, reallocate
3380+
3381+ // Retry (buffer insufficient or kernel signaled overflow)
33463382 cudaMemset (d_vtxCounter, 0 , sizeof (int ));
33473383 cudaMemset (d_edgeVertexIds, -1 , sizeof (int ) * numEdges);
33483384 EnsureTriangleVertexMemory (h_vcount * tri_ratio);
@@ -3355,22 +3391,28 @@ class VoxelGridCudaNaive::Impl {
33553391 }
33563392
33573393 while (true ) {
3394+
3395+ cudaMemset (d_overflow_, 0 , sizeof (int ));
3396+
33583397 if (with_face_normals) {
33593398 BuildFacesKernelWithNormal<<<grid, block>>> (
33603399 d_voxels_, bb_min_, resolution_, voxel_num_, iso_level,
33613400 d_edgeVertexIds, d_idxCounter, d_faces, d_vertices, d_face_normals,
3362- max_faces_);
3401+ max_faces_, d_overflow_ );
33633402 } else {
33643403 BuildFacesKernel<<<grid, block>>> (
33653404 d_voxels_, bb_min_, resolution_, voxel_num_, iso_level,
3366- d_edgeVertexIds, d_idxCounter, d_faces, max_faces_);
3405+ d_edgeVertexIds, d_idxCounter, d_faces, max_faces_, d_overflow_ );
33673406 }
33683407 checkCudaErrors (cudaGetLastError ());
33693408 checkCudaErrors (cudaDeviceSynchronize ());
33703409
3410+ int h_overflow = 0 ;
3411+ cudaMemcpy (&h_overflow, d_overflow_, sizeof (int ), cudaMemcpyDeviceToHost);
3412+
33713413 int h_icount = 0 ;
33723414 cudaMemcpy (&h_icount, d_idxCounter, sizeof (int ), cudaMemcpyDeviceToHost);
3373- if (h_icount < max_faces_) {
3415+ if (h_overflow == 0 && h_icount < max_faces_) {
33743416 num_faces_ = h_icount / 3 ;
33753417 break ;
33763418 }
@@ -3746,6 +3788,11 @@ class VoxelGridCudaNaive::Impl {
37463788 max_tris_ = 0 ;
37473789
37483790 d_mesh_process_buf_.Free ();
3791+
3792+ if (d_overflow_) {
3793+ cudaFree (d_overflow_);
3794+ d_overflow_ = nullptr ;
3795+ }
37493796 }
37503797
37513798 void EnsureTriangleVertexMemory (int tris_num) {
@@ -3816,6 +3863,8 @@ class VoxelGridCudaNaive::Impl {
38163863 int num_faces_{0 };
38173864 int num_vertices_{0 };
38183865
3866+ int * d_overflow_ = nullptr ; // device flag: 0 ok, 1 overflow/invalid
3867+
38193868 ugu::RemoveSmallConnectedComponentsBuf d_mesh_process_buf_;
38203869};
38213870
0 commit comments