1+ #ifndef CUDA_RASTERIZER_AUXILIARY_H_INCLUDED
2+ #define CUDA_RASTERIZER_AUXILIARY_H_INCLUDED
3+
4+ #include "config.h"
5+
6+ #define BLOCK_SIZE (BLOCK_X * BLOCK_Y)
7+ #define NUM_WARPS (BLOCK_SIZE/32)
8+
9+ // Spherical harmonics coefficients
10+ __device__ constexpr float SH_C0 = 0.28209479177387814f ;
11+ __device__ constexpr float SH_C1 = 0.4886025119029199f ;
12+ __device__ constexpr float SH_C2 [] = {
13+ 1.0925484305920792f ,
14+ -1.0925484305920792f ,
15+ 0.31539156525252005f ,
16+ -1.0925484305920792f ,
17+ 0.5462742152960396f
18+ };
19+ __device__ constexpr float SH_C3 [] = {
20+ -0.5900435899266435f ,
21+ 2.890611442640554f ,
22+ -0.4570457994644658f ,
23+ 0.3731763325901154f ,
24+ -0.4570457994644658f ,
25+ 1.445305721320277f ,
26+ -0.5900435899266435f
27+ };
28+
29+ __forceinline __host__ __device__ float ndc2Pix (float v , int S )
30+ {
31+ return ((v + 1.0 ) * S - 1.0 ) * 0.5 ;
32+ }
33+
34+ __forceinline __device__ void getRect (const float2 p , int max_radius , uint2 & rect_min , uint2 & rect_max , dim3 grid )
35+ {
36+ rect_min = {
37+ min (grid .x , max ((int )0 , (int )((p .x - max_radius ) / BLOCK_X ))),
38+ min (grid .y , max ((int )0 , (int )((p .y - max_radius ) / BLOCK_Y )))
39+ };
40+ rect_max = {
41+ min (grid .x , max ((int )0 , (int )((p .x + max_radius + BLOCK_X - 1 ) / BLOCK_X ))),
42+ min (grid .y , max ((int )0 , (int )((p .y + max_radius + BLOCK_Y - 1 ) / BLOCK_Y )))
43+ };
44+ }
45+
46+ __forceinline __device__ float3 transformPoint4x3 (const float3 & p , const float * matrix )
47+ {
48+ float3 transformed = {
49+ matrix [0 ] * p .x + matrix [4 ] * p .y + matrix [8 ] * p .z + matrix [12 ],
50+ matrix [1 ] * p .x + matrix [5 ] * p .y + matrix [9 ] * p .z + matrix [13 ],
51+ matrix [2 ] * p .x + matrix [6 ] * p .y + matrix [10 ] * p .z + matrix [14 ],
52+ };
53+ return transformed ;
54+ }
55+
56+ __forceinline __device__ float4 transformPoint4x4 (const float3 & p , const float * matrix )
57+ {
58+ float4 transformed = {
59+ matrix [0 ] * p .x + matrix [4 ] * p .y + matrix [8 ] * p .z + matrix [12 ],
60+ matrix [1 ] * p .x + matrix [5 ] * p .y + matrix [9 ] * p .z + matrix [13 ],
61+ matrix [2 ] * p .x + matrix [6 ] * p .y + matrix [10 ] * p .z + matrix [14 ],
62+ matrix [3 ] * p .x + matrix [7 ] * p .y + matrix [11 ] * p .z + matrix [15 ]
63+ };
64+ return transformed ;
65+ }
66+
67+ __forceinline __device__ float3 transformVec4x3 (const float3 & p , const float * matrix )
68+ {
69+ float3 transformed = {
70+ matrix [0 ] * p .x + matrix [4 ] * p .y + matrix [8 ] * p .z ,
71+ matrix [1 ] * p .x + matrix [5 ] * p .y + matrix [9 ] * p .z ,
72+ matrix [2 ] * p .x + matrix [6 ] * p .y + matrix [10 ] * p .z ,
73+ };
74+ return transformed ;
75+ }
76+
77+ __forceinline __device__ float3 transformVec4x3Transpose (const float3 & p , const float * matrix )
78+ {
79+ float3 transformed = {
80+ matrix [0 ] * p .x + matrix [1 ] * p .y + matrix [2 ] * p .z ,
81+ matrix [4 ] * p .x + matrix [5 ] * p .y + matrix [6 ] * p .z ,
82+ matrix [8 ] * p .x + matrix [9 ] * p .y + matrix [10 ] * p .z ,
83+ };
84+ return transformed ;
85+ }
86+
87+ __forceinline __device__ float dnormvdz (float3 v , float3 dv )
88+ {
89+ float sum2 = v .x * v .x + v .y * v .y + v .z * v .z ;
90+ float invsum32 = 1.0f / sqrt (sum2 * sum2 * sum2 );
91+ float dnormvdz = (- v .x * v .z * dv .x - v .y * v .z * dv .y + (sum2 - v .z * v .z ) * dv .z ) * invsum32 ;
92+ return dnormvdz ;
93+ }
94+
95+ __forceinline __device__ float3 dnormvdv (float3 v , float3 dv )
96+ {
97+ float sum2 = v .x * v .x + v .y * v .y + v .z * v .z ;
98+ float invsum32 = 1.0f / sqrt (sum2 * sum2 * sum2 );
99+
100+ float3 dnormvdv ;
101+ dnormvdv .x = ((+ sum2 - v .x * v .x ) * dv .x - v .y * v .x * dv .y - v .z * v .x * dv .z ) * invsum32 ;
102+ dnormvdv .y = (- v .x * v .y * dv .x + (sum2 - v .y * v .y ) * dv .y - v .z * v .y * dv .z ) * invsum32 ;
103+ dnormvdv .z = (- v .x * v .z * dv .x - v .y * v .z * dv .y + (sum2 - v .z * v .z ) * dv .z ) * invsum32 ;
104+ return dnormvdv ;
105+ }
106+
107+ __forceinline __device__ float4 dnormvdv (float4 v , float4 dv )
108+ {
109+ float sum2 = v .x * v .x + v .y * v .y + v .z * v .z + v .w * v .w ;
110+ float invsum32 = 1.0f / sqrt (sum2 * sum2 * sum2 );
111+
112+ float4 vdv = { v .x * dv .x , v .y * dv .y , v .z * dv .z , v .w * dv .w };
113+ float vdv_sum = vdv .x + vdv .y + vdv .z + vdv .w ;
114+ float4 dnormvdv ;
115+ dnormvdv .x = ((sum2 - v .x * v .x ) * dv .x - v .x * (vdv_sum - vdv .x )) * invsum32 ;
116+ dnormvdv .y = ((sum2 - v .y * v .y ) * dv .y - v .y * (vdv_sum - vdv .y )) * invsum32 ;
117+ dnormvdv .z = ((sum2 - v .z * v .z ) * dv .z - v .z * (vdv_sum - vdv .z )) * invsum32 ;
118+ dnormvdv .w = ((sum2 - v .w * v .w ) * dv .w - v .w * (vdv_sum - vdv .w )) * invsum32 ;
119+ return dnormvdv ;
120+ }
121+
122+ __forceinline __device__ float sigmoid (float x )
123+ {
124+ return 1.0f / (1.0f + expf (- x ));
125+ }
126+
127+ __forceinline __device__ bool in_frustum (int idx ,
128+ const float * orig_points ,
129+ const float * viewmatrix ,
130+ const float * projmatrix ,
131+ bool prefiltered ,
132+ float3 & p_view )
133+ {
134+ float3 p_orig = { orig_points [3 * idx ], orig_points [3 * idx + 1 ], orig_points [3 * idx + 2 ] };
135+
136+ // Bring points to screen space
137+ float4 p_hom = transformPoint4x4 (p_orig , projmatrix );
138+ float p_w = 1.0f / (p_hom .w + 0.0000001f );
139+ float3 p_proj = { p_hom .x * p_w , p_hom .y * p_w , p_hom .z * p_w };
140+ p_view = transformPoint4x3 (p_orig , viewmatrix );
141+
142+ if (p_view .z <= 0.2f || ((p_proj .x < -1.3 || p_proj .x > 1.3 || p_proj .y < -1.3 || p_proj .y > 1.3 )))
143+ {
144+ if (prefiltered )
145+ {
146+ printf ("Point is filtered although prefiltered is set. This shouldn't happen!" );
147+ __trap ();
148+ }
149+ return false;
150+ }
151+ return true;
152+ }
153+
154+ #endif
0 commit comments