-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcudax
More file actions
149 lines (126 loc) · 4.11 KB
/
cudax
File metadata and controls
149 lines (126 loc) · 4.11 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
#pragma once
#include <cuda.h>
#ifdef __CUDACC__
#include <cuda_runtime.h>
#include <math_functions.h>
#else
#include <host_defines.h>
#include <builtin_types.h>
#endif
#ifndef MATHX_API
#define MATHX_API __host__ __device__
#endif
#include "mathx"
namespace cudax
{
// Half-precision floating-point bit representation vectors
typedef glm::vec<unsigned short, 2> hbvec2;
typedef glm::vec<unsigned short, 3> hbvec3;
typedef glm::vec<unsigned short, 4> hbvec4;
template <class T>
inline __host__ __device__ T* asRuntimePtr(CUdeviceptr ptr)
{
return reinterpret_cast<T*>( static_cast<size_t>(ptr) );
}
template <class T>
inline __host__ __device__ CUdeviceptr asDriverPtr(T* ptr)
{
return static_cast<CUdeviceptr>( reinterpret_cast<size_t>(ptr) );
}
template <class T, class X>
inline __host__ __device__ T as_(X const& x)
{
static_assert(sizeof(T) == sizeof(X), "size mismatch");
union U { X x; T t; char c[sizeof(T)]; } u;
u.x = x;
return u.t;
// return reinterpret_cast<U const&>(x).t;
}
template <class T, size_t S>
inline __device__ __host__ char (&arraylen_helper(T const volatile (&a)[S]))[S];
#ifndef arraylen
#define arraylen(x) sizeof(::cudax::arraylen_helper(x))
#endif
#ifdef __CUDACC__
template <class Type, class SurfaceType>
__device__ inline Type surf2Dread(SurfaceType surf, int2 coord, cudaSurfaceBoundaryMode boundaryMode = cudaBoundaryModeTrap)
{
Type x;
::surf2Dread(&x, surf, coord.x * (int) sizeof(Type), coord.y, boundaryMode);
return x;
}
template <class Type, class SurfaceType>
__device__ inline void surf2Dwrite(Type const& x, SurfaceType surf, int2 coord, cudaSurfaceBoundaryMode boundaryMode = cudaBoundaryModeTrap)
{
using glm::to_cuda;
::surf2Dwrite(to_cuda(x), surf, coord.x * (int) sizeof(Type), coord.y, boundaryMode);
}
template <class Type, class SurfaceType>
__device__ inline Type surf3Dread(SurfaceType surf, int3 coord, cudaSurfaceBoundaryMode boundaryMode = cudaBoundaryModeTrap)
{
Type x;
::surf3Dread(&x, surf, coord.x * (int) sizeof(Type), coord.y, coord.z, boundaryMode);
return x;
}
template <class Type, class SurfaceType>
__device__ inline void surf3Dwrite(Type const& x, SurfaceType surf, int3 coord, cudaSurfaceBoundaryMode boundaryMode = cudaBoundaryModeTrap)
{
using glm::to_cuda;
::surf3Dwrite(to_cuda(x), surf, coord.x * (int) sizeof(Type), coord.y, coord.z, boundaryMode);
}
template <class Type, class Vec>
__device__ inline Type tex2D(cudaTextureObject_t surf, Vec coord)
{
return ::tex2D<Type>(surf, (float) coord.x, (float) coord.y);
}
template <class Type, class SurfaceType, class Vec>
__device__ inline Type tex2D(SurfaceType surf, Vec coord)
{
return ::tex2D(surf, (float) coord.x, (float) coord.y);
}
template <class Type, class Vec>
__device__ inline Type tex3D(cudaTextureObject_t surf, Vec coord)
{
return ::tex3D<Type>(surf, (float) coord.x, (float) coord.y, (float) coord.z);
}
template <class Type, class SurfaceType, class Vec>
__device__ inline Type tex3D(SurfaceType surf, Vec coord)
{
return ::tex3D(surf, (float) coord.x, (float) coord.y, (float) coord.z);
}
__device__ inline hbvec3 toHalf(glm::vec3 const& v)
{
return hbvec3( __float2half_rn(v.x), __float2half_rn(v.y), __float2half_rn(v.z) );
}
__device__ inline hbvec4 toHalf(glm::vec4 const& v)
{
return hbvec4( __float2half_rn(v.x), __float2half_rn(v.y), __float2half_rn(v.z), __float2half_rn(v.w) );
}
__device__ inline glm::vec3 fromHalf(hbvec3 const& v)
{
return glm::vec3( __half2float(v.x), __half2float(v.y), __half2float(v.z) );
}
__device__ inline glm::vec4 fromHalf(hbvec4 const& v)
{
return glm::vec4( __half2float(v.x), __half2float(v.y), __half2float(v.z), __half2float(v.w) );
}
__device__ inline float fast_sqrt(float x)
{
float r;
asm("sqrt.approx.f32 %0, %1;" : "=f"(r) : "f"(x));
return r;
}
__device__ inline float fast_rsqrt(float x)
{
float r;
asm("rsqrt.approx.f32 %0, %1;" : "=f"(r) : "f"(x));
return r;
}
__device__ inline float fast_rcp(float x)
{
float r;
asm("rcp.approx.f32 %0, %1;" : "=f"(r) : "f"(x));
return r;
}
#endif
} // namespace