Skip to content

Commit 3021524

Browse files
author
Bernhard Kerbl
committed
Initial commit
0 parents  commit 3021524

File tree

16 files changed

+4143
-0
lines changed

16 files changed

+4143
-0
lines changed

CMakeLists.txt

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
cmake_minimum_required(VERSION 3.20)
2+
3+
project(DiffRast LANGUAGES CUDA CXX)
4+
5+
set(CMAKE_CXX_STANDARD 17)
6+
set(CMAKE_CXX_EXTENSIONS OFF)
7+
set(CMAKE_CUDA_STANDARD 17)
8+
9+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
10+
11+
add_library(CudaRasterizer
12+
cuda_rasterizer/backward.h
13+
cuda_rasterizer/backward.cu
14+
cuda_rasterizer/forward.h
15+
cuda_rasterizer/forward.cu
16+
cuda_rasterizer/auxiliary.h
17+
cuda_rasterizer/rasterizer_impl.cu
18+
cuda_rasterizer/rasterizer_impl.h
19+
cuda_rasterizer/rasterizer.h
20+
)
21+
22+
set_target_properties(CudaRasterizer PROPERTIES CUDA_ARCHITECTURES "75;86")
23+
24+
target_include_directories(CudaRasterizer PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/cuda_rasterizer)
25+
target_include_directories(CudaRasterizer PRIVATE third_party/glm ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})

cuda_rasterizer/auxiliary.h

Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
#ifndef CUDA_RASTERIZER_AUXILIARY_H_INCLUDED
2+
#define CUDA_RASTERIZER_AUXILIARY_H_INCLUDED
3+
4+
#include "config.h"
5+
6+
#define BLOCK_SIZE (BLOCK_X * BLOCK_Y)
7+
#define NUM_WARPS (BLOCK_SIZE/32)
8+
9+
// Spherical harmonics coefficients
10+
__device__ constexpr float SH_C0 = 0.28209479177387814f;
11+
__device__ constexpr float SH_C1 = 0.4886025119029199f;
12+
__device__ constexpr float SH_C2[] = {
13+
1.0925484305920792f,
14+
-1.0925484305920792f,
15+
0.31539156525252005f,
16+
-1.0925484305920792f,
17+
0.5462742152960396f
18+
};
19+
__device__ constexpr float SH_C3[] = {
20+
-0.5900435899266435f,
21+
2.890611442640554f,
22+
-0.4570457994644658f,
23+
0.3731763325901154f,
24+
-0.4570457994644658f,
25+
1.445305721320277f,
26+
-0.5900435899266435f
27+
};
28+
29+
__forceinline __host__ __device__ float ndc2Pix(float v, int S)
30+
{
31+
return ((v + 1.0) * S - 1.0) * 0.5;
32+
}
33+
34+
__forceinline __device__ void getRect(const float2 p, int max_radius, uint2& rect_min, uint2& rect_max, dim3 grid)
35+
{
36+
rect_min = {
37+
min(grid.x, max((int)0, (int)((p.x - max_radius) / BLOCK_X))),
38+
min(grid.y, max((int)0, (int)((p.y - max_radius) / BLOCK_Y)))
39+
};
40+
rect_max = {
41+
min(grid.x, max((int)0, (int)((p.x + max_radius + BLOCK_X - 1) / BLOCK_X))),
42+
min(grid.y, max((int)0, (int)((p.y + max_radius + BLOCK_Y - 1) / BLOCK_Y)))
43+
};
44+
}
45+
46+
__forceinline __device__ float3 transformPoint4x3(const float3& p, const float* matrix)
47+
{
48+
float3 transformed = {
49+
matrix[0] * p.x + matrix[4] * p.y + matrix[8] * p.z + matrix[12],
50+
matrix[1] * p.x + matrix[5] * p.y + matrix[9] * p.z + matrix[13],
51+
matrix[2] * p.x + matrix[6] * p.y + matrix[10] * p.z + matrix[14],
52+
};
53+
return transformed;
54+
}
55+
56+
__forceinline __device__ float4 transformPoint4x4(const float3& p, const float* matrix)
57+
{
58+
float4 transformed = {
59+
matrix[0] * p.x + matrix[4] * p.y + matrix[8] * p.z + matrix[12],
60+
matrix[1] * p.x + matrix[5] * p.y + matrix[9] * p.z + matrix[13],
61+
matrix[2] * p.x + matrix[6] * p.y + matrix[10] * p.z + matrix[14],
62+
matrix[3] * p.x + matrix[7] * p.y + matrix[11] * p.z + matrix[15]
63+
};
64+
return transformed;
65+
}
66+
67+
__forceinline __device__ float3 transformVec4x3(const float3& p, const float* matrix)
68+
{
69+
float3 transformed = {
70+
matrix[0] * p.x + matrix[4] * p.y + matrix[8] * p.z,
71+
matrix[1] * p.x + matrix[5] * p.y + matrix[9] * p.z,
72+
matrix[2] * p.x + matrix[6] * p.y + matrix[10] * p.z,
73+
};
74+
return transformed;
75+
}
76+
77+
__forceinline __device__ float3 transformVec4x3Transpose(const float3& p, const float* matrix)
78+
{
79+
float3 transformed = {
80+
matrix[0] * p.x + matrix[1] * p.y + matrix[2] * p.z,
81+
matrix[4] * p.x + matrix[5] * p.y + matrix[6] * p.z,
82+
matrix[8] * p.x + matrix[9] * p.y + matrix[10] * p.z,
83+
};
84+
return transformed;
85+
}
86+
87+
__forceinline __device__ float dnormvdz(float3 v, float3 dv)
88+
{
89+
float sum2 = v.x * v.x + v.y * v.y + v.z * v.z;
90+
float invsum32 = 1.0f / sqrt(sum2 * sum2 * sum2);
91+
float dnormvdz = (-v.x * v.z * dv.x - v.y * v.z * dv.y + (sum2 - v.z * v.z) * dv.z) * invsum32;
92+
return dnormvdz;
93+
}
94+
95+
__forceinline __device__ float3 dnormvdv(float3 v, float3 dv)
96+
{
97+
float sum2 = v.x * v.x + v.y * v.y + v.z * v.z;
98+
float invsum32 = 1.0f / sqrt(sum2 * sum2 * sum2);
99+
100+
float3 dnormvdv;
101+
dnormvdv.x = ((+sum2 - v.x * v.x) * dv.x - v.y * v.x * dv.y - v.z * v.x * dv.z) * invsum32;
102+
dnormvdv.y = (-v.x * v.y * dv.x + (sum2 - v.y * v.y) * dv.y - v.z * v.y * dv.z) * invsum32;
103+
dnormvdv.z = (-v.x * v.z * dv.x - v.y * v.z * dv.y + (sum2 - v.z * v.z) * dv.z) * invsum32;
104+
return dnormvdv;
105+
}
106+
107+
__forceinline __device__ float4 dnormvdv(float4 v, float4 dv)
108+
{
109+
float sum2 = v.x * v.x + v.y * v.y + v.z * v.z + v.w * v.w;
110+
float invsum32 = 1.0f / sqrt(sum2 * sum2 * sum2);
111+
112+
float4 vdv = { v.x * dv.x, v.y * dv.y, v.z * dv.z, v.w * dv.w };
113+
float vdv_sum = vdv.x + vdv.y + vdv.z + vdv.w;
114+
float4 dnormvdv;
115+
dnormvdv.x = ((sum2 - v.x * v.x) * dv.x - v.x * (vdv_sum - vdv.x)) * invsum32;
116+
dnormvdv.y = ((sum2 - v.y * v.y) * dv.y - v.y * (vdv_sum - vdv.y)) * invsum32;
117+
dnormvdv.z = ((sum2 - v.z * v.z) * dv.z - v.z * (vdv_sum - vdv.z)) * invsum32;
118+
dnormvdv.w = ((sum2 - v.w * v.w) * dv.w - v.w * (vdv_sum - vdv.w)) * invsum32;
119+
return dnormvdv;
120+
}
121+
122+
__forceinline __device__ float sigmoid(float x)
123+
{
124+
return 1.0f / (1.0f + expf(-x));
125+
}
126+
127+
__forceinline __device__ bool in_frustum(int idx,
128+
const float* orig_points,
129+
const float* viewmatrix,
130+
const float* projmatrix,
131+
bool prefiltered,
132+
float3& p_view)
133+
{
134+
float3 p_orig = { orig_points[3 * idx], orig_points[3 * idx + 1], orig_points[3 * idx + 2] };
135+
136+
// Bring points to screen space
137+
float4 p_hom = transformPoint4x4(p_orig, projmatrix);
138+
float p_w = 1.0f / (p_hom.w + 0.0000001f);
139+
float3 p_proj = { p_hom.x * p_w, p_hom.y * p_w, p_hom.z * p_w };
140+
p_view = transformPoint4x3(p_orig, viewmatrix);
141+
142+
if (p_view.z <= 0.2f || ((p_proj.x < -1.3 || p_proj.x > 1.3 || p_proj.y < -1.3 || p_proj.y > 1.3)))
143+
{
144+
if (prefiltered)
145+
{
146+
printf("Point is filtered although prefiltered is set. This shouldn't happen!");
147+
__trap();
148+
}
149+
return false;
150+
}
151+
return true;
152+
}
153+
154+
#endif

0 commit comments

Comments
 (0)