|
26 | 26 | // RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK_PTX72_SM86 %s |
27 | 27 |
|
28 | 28 | // RUN: %clang_cc1 -ffp-contract=off -triple nvptx-unknown-unknown -target-cpu \ |
29 | | -// RUN: sm_53 -target-feature +ptx42 -fcuda-is-device -fnative-half-type \ |
| 29 | +// RUN: sm_53 -target-feature +ptx65 -fcuda-is-device -fnative-half-type \ |
30 | 30 | // RUN: -emit-llvm -o - -x cuda %s \ |
31 | | -// RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK_PTX42_SM53 %s |
| 31 | +// RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK_PTX65_SM53 %s |
32 | 32 |
|
33 | 33 | // RUN: %clang_cc1 -ffp-contract=off -triple nvptx64-unknown-unknown \ |
34 | | -// RUN: -target-cpu sm_53 -target-feature +ptx42 -fcuda-is-device \ |
| 34 | +// RUN: -target-cpu sm_53 -target-feature +ptx65 -fcuda-is-device \ |
35 | 35 | // RUN: -fnative-half-type -emit-llvm -o - -x cuda %s \ |
36 | | -// RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK_PTX42_SM53 %s |
| 36 | +// RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK_PTX65_SM53 %s |
37 | 37 |
|
38 | 38 | #define __device__ __attribute__((device)) |
39 | 39 |
|
@@ -108,25 +108,25 @@ __device__ void nvvm_fma_f16_f16x2_sm80() { |
108 | 108 | // CHECK-LABEL: nvvm_fma_f16_f16x2_sm53 |
109 | 109 | __device__ void nvvm_fma_f16_f16x2_sm53() { |
110 | 110 | #if __CUDA_ARCH__ >= 530 |
111 | | - // CHECK_PTX42_SM53: call half @llvm.nvvm.fma.rn.f16 |
| 111 | + // CHECK_PTX65_SM53: call half @llvm.nvvm.fma.rn.f16 |
112 | 112 | __nvvm_fma_rn_f16(0.1f16, 0.1f16, 0.1f16); |
113 | | - // CHECK_PTX42_SM53: call half @llvm.nvvm.fma.rn.ftz.f16 |
| 113 | + // CHECK_PTX65_SM53: call half @llvm.nvvm.fma.rn.ftz.f16 |
114 | 114 | __nvvm_fma_rn_ftz_f16(0.1f16, 0.1f16, 0.1f16); |
115 | | - // CHECK_PTX42_SM53: call half @llvm.nvvm.fma.rn.sat.f16 |
| 115 | + // CHECK_PTX65_SM53: call half @llvm.nvvm.fma.rn.sat.f16 |
116 | 116 | __nvvm_fma_rn_sat_f16(0.1f16, 0.1f16, 0.1f16); |
117 | | - // CHECK_PTX42_SM53: call half @llvm.nvvm.fma.rn.ftz.sat.f16 |
| 117 | + // CHECK_PTX65_SM53: call half @llvm.nvvm.fma.rn.ftz.sat.f16 |
118 | 118 | __nvvm_fma_rn_ftz_sat_f16(0.1f16, 0.1f16, 0.1f16); |
119 | 119 |
|
120 | | - // CHECK_PTX42_SM53: call <2 x half> @llvm.nvvm.fma.rn.f16x2 |
| 120 | + // CHECK_PTX65_SM53: call <2 x half> @llvm.nvvm.fma.rn.f16x2 |
121 | 121 | __nvvm_fma_rn_f16x2({0.1f16, 0.7f16}, {0.1f16, 0.7f16}, |
122 | 122 | {0.1f16, 0.7f16}); |
123 | | - // CHECK_PTX42_SM53: call <2 x half> @llvm.nvvm.fma.rn.ftz.f16x2 |
| 123 | + // CHECK_PTX65_SM53: call <2 x half> @llvm.nvvm.fma.rn.ftz.f16x2 |
124 | 124 | __nvvm_fma_rn_ftz_f16x2({0.1f16, 0.7f16}, {0.1f16, 0.7f16}, |
125 | 125 | {0.1f16, 0.7f16}); |
126 | | - // CHECK_PTX42_SM53: call <2 x half> @llvm.nvvm.fma.rn.sat.f16x2 |
| 126 | + // CHECK_PTX65_SM53: call <2 x half> @llvm.nvvm.fma.rn.sat.f16x2 |
127 | 127 | __nvvm_fma_rn_sat_f16x2({0.1f16, 0.7f16}, {0.1f16, 0.7f16}, |
128 | 128 | {0.1f16, 0.7f16}); |
129 | | - // CHECK_PTX42_SM53: call <2 x half> @llvm.nvvm.fma.rn.ftz.sat.f16x2 |
| 129 | + // CHECK_PTX65_SM53: call <2 x half> @llvm.nvvm.fma.rn.ftz.sat.f16x2 |
130 | 130 | __nvvm_fma_rn_ftz_sat_f16x2({0.1f16, 0.7f16}, {0.1f16, 0.7f16}, |
131 | 131 | {0.1f16, 0.7f16}); |
132 | 132 | #endif |
@@ -173,6 +173,23 @@ __device__ void nvvm_min_max_sm86() { |
173 | 173 | // CHECK: ret void |
174 | 174 | } |
175 | 175 |
|
| 176 | +// CHECK-LABEL: nvvm_fabs_f16 |
| 177 | +__device__ void nvvm_fabs_f16() { |
| 178 | +#if __CUDA_ARCH__ >= 530 |
| 179 | + // CHECK: call half @llvm.nvvm.fabs.f16 |
| 180 | + __nvvm_fabs_f16(0.1f16); |
| 181 | + // CHECK: call half @llvm.nvvm.fabs.ftz.f16 |
| 182 | + __nvvm_fabs_ftz_f16(0.1f16); |
| 183 | + // CHECK: call <2 x half> @llvm.nvvm.fabs.v2f16 |
| 184 | + __nvvm_fabs_f16x2({0.1f16, 0.7f16}); |
| 185 | + // CHECK: call <2 x half> @llvm.nvvm.fabs.ftz.v2f16 |
| 186 | + __nvvm_fabs_ftz_f16x2({0.1f16, 0.7f16}); |
| 187 | +#endif |
| 188 | + // CHECK: ret void |
| 189 | +} |
| 190 | + |
| 191 | + |
| 192 | + |
176 | 193 | typedef __fp16 __fp16v2 __attribute__((ext_vector_type(2))); |
177 | 194 |
|
178 | 195 | // CHECK-LABEL: nvvm_ldg_native_half_types |
|
0 commit comments