Skip to content

Commit a1e904f

Browse files
[NVIDIA] Bunch new activations (#1000)
* Uncomment HSiwsh in single_layer_tests/activation.cpp 'cause it long time implemented; added bunch new implementations for activation functions * Updated docs/cuda_opset.md with new supported operations * Small performance improvments for SoftSign kernel --------- Co-authored-by: Denis Kotov <[email protected]>
1 parent 205b012 commit a1e904f

File tree

17 files changed

+399
-8
lines changed

17 files changed

+399
-8
lines changed

modules/nvidia_plugin/docs/cuda_opset.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ The semantics match corresponding nGraph operation classes declared in `namespac
9090
| [Mod](https://github.com/openvinotoolkit/openvino/blob/master/docs/ops/arithmetic/Mod_1.md) | Supported |
9191
| [MVN](https://github.com/openvinotoolkit/openvino/blob/master/docs/ops/normalization/MVN_6.md) | Supported |
9292
| [Multiply](https://github.com/openvinotoolkit/openvino/blob/master/docs/ops/arithmetic/Multiply_1.md) | Supported* |
93-
| [Negative](https://github.com/openvinotoolkit/openvino/blob/master/docs/ops/arithmetic/Negative_1.md) | Not Supported |
93+
| [Negative](https://github.com/openvinotoolkit/openvino/blob/master/docs/ops/arithmetic/Negative_1.md) | Supported |
9494
| [NonMaxSuppression](https://github.com/openvinotoolkit/openvino/blob/master/docs/ops/sort/NonMaxSuppression_5.md) | Not Supported |
9595
| [NonZero](https://github.com/openvinotoolkit/openvino/blob/master/docs/ops/condition/NonZero_3.md) | Not Supported |
9696
| [NormalizeL2](https://github.com/openvinotoolkit/openvino/blob/master/docs/ops/normalization/NormalizeL2_1.md) | Not Supported |
@@ -134,11 +134,12 @@ The semantics match corresponding nGraph operation classes declared in `namespac
134134
| [ShapeOf](https://github.com/openvinotoolkit/openvino/blob/master/docs/ops/shape/ShapeOf_3.md) | Not Supported |
135135
| [ShuffleChannels](https://github.com/openvinotoolkit/openvino/blob/master/docs/ops/movement/ShuffleChannels_1.md) | Not Supported |
136136
| [Sigmoid](https://github.com/openvinotoolkit/openvino/blob/master/docs/ops/activation/Sigmoid_1.md) | Supported |
137-
| [Sign](https://github.com/openvinotoolkit/openvino/blob/master/docs/ops/arithmetic/Sign_1.md) | Not Supported |
137+
| [Sign](https://github.com/openvinotoolkit/openvino/blob/master/docs/ops/arithmetic/Sign_1.md) | Supported |
138138
| [Sin](https://github.com/openvinotoolkit/openvino/blob/master/docs/ops/arithmetic/Sin_1.md) | Supported |
139139
| [Sinh](https://github.com/openvinotoolkit/openvino/blob/master/docs/ops/arithmetic/Sinh_1.md) | Supported |
140140
| [SoftMax](https://github.com/openvinotoolkit/openvino/blob/master/docs/ops/activation/SoftMax_1.md) | Supported |
141141
| [SoftPlus](https://github.com/openvinotoolkit/openvino/blob/master/docs/ops/activation/SoftPlus_4.md) | Not Supported |
142+
| [SoftSign](https://github.com/openvinotoolkit/openvino/blob/master/docs/ops/activation/SoftSign_1.md) | Supported |
142143
| [SpaceToBatch](https://github.com/openvinotoolkit/openvino/blob/master/docs/ops/movement/SpaceToBatch_2.md) | Not Supported |
143144
| [SpaceToDepth](https://github.com/openvinotoolkit/openvino/blob/master/docs/ops/movement/SpaceToDepth_1.md) | Not Supported |
144145
| [Split](https://github.com/openvinotoolkit/openvino/blob/master/docs/ops/movement/Split_1.md) | Supported* |

modules/nvidia_plugin/src/cuda/math.cuh

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,46 @@ inline __device__ T log(T a) {
166166
return static_cast<T>(::logf(static_cast<float>(a)));
167167
}
168168

169+
template <typename T>
170+
inline __device__ T sign_float(T x) {
171+
static_assert(std::is_floating_point<T>::value, "T should be floating_point type");
172+
if (x < 0.0f) return -1.0f;
173+
if (x > 0.0f) return 1.0f;
174+
return 0.0f;
175+
}
176+
177+
template <>
178+
inline __device__ __half sign_float<__half>(__half x) {
179+
const __half zero = __float2half(0.0f);
180+
if (x < zero) return __half(-1.0f);
181+
if (x > zero) return __half(1.0f);
182+
return zero;
183+
}
184+
185+
#ifdef CUDA_HAS_BF16_TYPE
186+
template <>
187+
inline __device__ __nv_bfloat16 sign_float<__nv_bfloat16>(__nv_bfloat16 x) {
188+
const __nv_bfloat16 zero = __float2bfloat16(0.0f);
189+
if (x < zero) return __nv_bfloat16(-1.0f);
190+
if (x > zero) return __nv_bfloat16(1.0f);
191+
return zero;
192+
}
193+
#endif
194+
195+
template <typename T>
196+
inline __device__ T sign_int(T x) {
197+
static_assert(std::is_integral<T>::value && !std::is_unsigned<T>::value,
198+
"T should be integer type");
199+
return static_cast<T>((x > 0) - (x < 0));
200+
}
201+
202+
template <typename T>
203+
inline __device__ T sign_uint(T x) {
204+
static_assert(std::is_integral<T>::value && std::is_unsigned<T>::value,
205+
"T should be unsigned integer type");
206+
return static_cast<T>(x > 0);
207+
}
208+
169209
#ifdef __CUDACC__
170210
/* ==================== __half ===================== */
171211
template <>

modules/nvidia_plugin/src/kernels/details/elementwise_binary.cuh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ __global__ void elementwise_binary_broadcasting(const T* in0,
4747

4848
#endif // __CUDACC__
4949

50-
template <typename ElementTypes, template <typename> typename OP>
50+
template <typename ElementTypes, template <typename... TArgs> typename OP>
5151
class ElementwiseBinary {
5252
public:
5353
ElementwiseBinary(Type_t element_type, size_t out_num_elements, size_t max_threads_per_block)

modules/nvidia_plugin/src/kernels/details/elementwise_unary.cuh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ __global__ void elementwise_unary(const T* in, size_t num_elements, T* out, Args
3333

3434
#endif // __CUDACC__
3535

36-
template <typename ElementTypes, template <typename> typename OP>
36+
template <typename ElementTypes, template <typename... TArgs> typename OP>
3737
class ElementwiseUnary {
3838
public:
3939
ElementwiseUnary(Type_t element_type, size_t max_threads_per_block, size_t num_elements)
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
// Copyright (C) 2021-2023 Intel Corporation
2+
// SPDX-License-Identifier: Apache-2.0
3+
//
4+
5+
#include "negative.hpp"
6+
7+
namespace ov {
8+
namespace nvidia_gpu {
9+
namespace kernel {
10+
11+
namespace cumath = CUDA::math;
12+
13+
template <typename T>
14+
struct NegativeOpImpl {
15+
__device__ static inline T op(T x) {
16+
return -x;
17+
}
18+
};
19+
20+
Negative::Negative(Type_t element_type, size_t max_threads_per_block, size_t num_elements)
21+
: impl_{element_type, max_threads_per_block, num_elements} {}
22+
23+
void Negative::operator()(cudaStream_t stream, const void* in0, void* out) const {
24+
impl_(stream, in0, out);
25+
}
26+
27+
} // namespace kernel
28+
} // namespace nvidia_gpu
29+
} // namespace ov
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
// Copyright (C) 2021-2023 Intel Corporation
2+
// SPDX-License-Identifier: Apache-2.0
3+
//
4+
5+
#pragma once
6+
7+
#include "details/cuda_type_traits.hpp"
8+
#include "details/elementwise_unary.cuh"
9+
10+
namespace ov {
11+
namespace nvidia_gpu {
12+
namespace kernel {
13+
14+
template <typename T>
15+
struct NegativeOpImpl;
16+
/**
17+
* Elementwise Acosh operation
18+
*/
19+
class Negative {
20+
public:
21+
Negative(Type_t element_type, size_t max_threads_per_block, size_t num_elements);
22+
23+
void operator()(cudaStream_t stream, const void* in0, void* out) const;
24+
25+
private:
26+
ElementwiseUnary<AllElementTypesSwitch, NegativeOpImpl> impl_;
27+
};
28+
29+
} // namespace kernel
30+
} // namespace nvidia_gpu
31+
} // namespace ov
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
// Copyright (C) 2021-2023 Intel Corporation
2+
// SPDX-License-Identifier: Apache-2.0
3+
//
4+
5+
#include "sign.hpp"
6+
7+
namespace ov {
8+
namespace nvidia_gpu {
9+
namespace kernel {
10+
11+
namespace cumath = CUDA::math;
12+
13+
template <typename T, typename Enable = void>
14+
struct SignOpImpl {
15+
__device__ static inline T op(T x);
16+
};
17+
18+
template <>
19+
struct SignOpImpl<char> {
20+
__device__ static inline char op(char x) {
21+
return cumath::sign_int(x);
22+
}
23+
};
24+
25+
template <typename T>
26+
struct SignOpImpl<T, typename std::enable_if<std::is_integral<T>::value &&
27+
!std::is_unsigned<T>::value>::type> {
28+
__device__ static inline T op(T x) {
29+
return cumath::sign_int(x);
30+
}
31+
};
32+
33+
template <typename T>
34+
struct SignOpImpl<T, typename std::enable_if<std::is_integral<T>::value &&
35+
std::is_unsigned<T>::value>::type> {
36+
__device__ static inline T op(T x) {
37+
return cumath::sign_uint(x);
38+
}
39+
};
40+
41+
template <typename T>
42+
struct SignOpImpl<T, typename std::enable_if<std::is_floating_point<T>::value>::type> {
43+
__device__ static inline T op(T x) {
44+
return cumath::sign_float(x);
45+
}
46+
};
47+
48+
template <>
49+
struct SignOpImpl<__nv_bfloat16> {
50+
__device__ static inline __nv_bfloat16 op(__nv_bfloat16 x) {
51+
return cumath::sign_float(x);
52+
}
53+
};
54+
55+
template <>
56+
struct SignOpImpl<__half> {
57+
__device__ static inline __half op(__half x) {
58+
return cumath::sign_float(x);
59+
}
60+
};
61+
62+
Sign::Sign(Type_t element_type, size_t max_threads_per_block, size_t num_elements)
63+
: impl_{element_type, max_threads_per_block, num_elements} {}
64+
65+
void Sign::operator()(cudaStream_t stream, const void* in0, void* out) const {
66+
impl_(stream, in0, out);
67+
}
68+
69+
} // namespace kernel
70+
} // namespace nvidia_gpu
71+
} // namespace ov
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
// Copyright (C) 2021-2023 Intel Corporation
2+
// SPDX-License-Identifier: Apache-2.0
3+
//
4+
5+
#pragma once
6+
7+
#include "details/cuda_type_traits.hpp"
8+
#include "details/elementwise_unary.cuh"
9+
10+
namespace ov {
11+
namespace nvidia_gpu {
12+
namespace kernel {
13+
14+
template <typename T, typename>
15+
struct SignOpImpl;
16+
/**
17+
* Elementwise Sign operation
18+
*/
19+
class Sign {
20+
public:
21+
Sign(Type_t element_type, size_t max_threads_per_block, size_t num_elements);
22+
23+
void operator()(cudaStream_t stream, const void* in0, void* out) const;
24+
25+
private:
26+
ElementwiseUnary<AllElementTypesSwitch, SignOpImpl> impl_;
27+
};
28+
29+
} // namespace kernel
30+
} // namespace nvidia_gpu
31+
} // namespace ov
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
// Copyright (C) 2021-2023 Intel Corporation
2+
// SPDX-License-Identifier: Apache-2.0
3+
//
4+
5+
#include "soft_sign.hpp"
6+
7+
namespace ov {
8+
namespace nvidia_gpu {
9+
namespace kernel {
10+
11+
namespace cumath = CUDA::math;
12+
13+
template <typename T>
14+
__device__ constexpr T one = static_cast<T>(1);
15+
16+
template <typename T>
17+
struct SoftSignOpImpl {
18+
__device__ static inline T op(T x) {
19+
return x / (one<T> + cumath::abs(x));
20+
}
21+
};
22+
23+
template <>
24+
struct SoftSignOpImpl<__nv_bfloat16> {
25+
__device__ static inline __nv_bfloat16 op(__nv_bfloat16 x) {
26+
return x / (__nv_bfloat16(1.0f) + cumath::abs(x));
27+
}
28+
};
29+
30+
template <>
31+
struct SoftSignOpImpl<__half> {
32+
__device__ static inline __half op(__half x) {
33+
return x / (__half(1.0f) + cumath::abs(x));
34+
}
35+
};
36+
37+
SoftSign::SoftSign(Type_t element_type, size_t max_threads_per_block, size_t num_elements)
38+
: impl_{element_type, max_threads_per_block, num_elements} {}
39+
40+
void SoftSign::operator()(cudaStream_t stream, const void* in0, void* out) const {
41+
impl_(stream, in0, out);
42+
}
43+
44+
} // namespace kernel
45+
} // namespace nvidia_gpu
46+
} // namespace ov
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
// Copyright (C) 2021-2023 Intel Corporation
2+
// SPDX-License-Identifier: Apache-2.0
3+
//
4+
5+
#pragma once
6+
7+
#include "details/cuda_type_traits.hpp"
8+
#include "details/elementwise_unary.cuh"
9+
10+
namespace ov {
11+
namespace nvidia_gpu {
12+
namespace kernel {
13+
14+
template <typename T>
15+
struct SoftSignOpImpl;
16+
/**
17+
* Elementwise Sign operation
18+
*/
19+
class SoftSign {
20+
public:
21+
SoftSign(Type_t element_type, size_t max_threads_per_block, size_t num_elements);
22+
23+
void operator()(cudaStream_t stream, const void* in0, void* out) const;
24+
25+
private:
26+
ElementwiseUnary<AllElementTypesSwitch, SoftSignOpImpl> impl_;
27+
};
28+
29+
} // namespace kernel
30+
} // namespace nvidia_gpu
31+
} // namespace ov

0 commit comments

Comments
 (0)