|
1 | 1 | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
2 |
| -; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s |
3 |
| -; RUN: llc -global-isel=0 -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s |
| 2 | +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG %s |
| 3 | +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG %s |
4 | 4 |
|
5 |
| -; RUN: llc -global-isel=1 -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s |
6 |
| -; RUN: llc -global-isel=1 -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s |
| 5 | +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GISEL %s |
| 6 | +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GISEL %s |
7 | 7 |
|
8 | 8 | define double @v_sqrt_f64(double %x) {
|
9 | 9 | ; GCN-LABEL: v_sqrt_f64:
|
@@ -115,9 +115,219 @@ define amdgpu_ps <2 x i32> @s_sqrt_f64_ninf(double inreg %x) {
|
115 | 115 | ret <2 x i32> %insert.1
|
116 | 116 | }
|
117 | 117 |
|
| 118 | +define amdgpu_ps <2 x i32> @s_sqrt_f64_afn(double inreg %x) { |
| 119 | +; GCN-LABEL: s_sqrt_f64_afn: |
| 120 | +; GCN: ; %bb.0: |
| 121 | +; GCN-NEXT: v_sqrt_f64_e32 v[0:1], s[0:1] |
| 122 | +; GCN-NEXT: v_readfirstlane_b32 s0, v0 |
| 123 | +; GCN-NEXT: v_readfirstlane_b32 s1, v1 |
| 124 | +; GCN-NEXT: ; return to shader part epilog |
| 125 | + %result = call afn double @llvm.sqrt.f64(double %x) |
| 126 | + %cast = bitcast double %result to <2 x i32> |
| 127 | + %cast.0 = extractelement <2 x i32> %cast, i32 0 |
| 128 | + %cast.1 = extractelement <2 x i32> %cast, i32 1 |
| 129 | + %lane.0 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.0) |
| 130 | + %lane.1 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.1) |
| 131 | + %insert.0 = insertelement <2 x i32> poison, i32 %lane.0, i32 0 |
| 132 | + %insert.1 = insertelement <2 x i32> %insert.0, i32 %lane.1, i32 1 |
| 133 | + ret <2 x i32> %insert.1 |
| 134 | +} |
| 135 | + |
| 136 | +define amdgpu_ps <2 x i32> @s_sqrt_f64_afn_nnan_ninf(double inreg %x) { |
| 137 | +; GCN-LABEL: s_sqrt_f64_afn_nnan_ninf: |
| 138 | +; GCN: ; %bb.0: |
| 139 | +; GCN-NEXT: v_sqrt_f64_e32 v[0:1], s[0:1] |
| 140 | +; GCN-NEXT: v_readfirstlane_b32 s0, v0 |
| 141 | +; GCN-NEXT: v_readfirstlane_b32 s1, v1 |
| 142 | +; GCN-NEXT: ; return to shader part epilog |
| 143 | + %result = call afn nnan ninf double @llvm.sqrt.f64(double %x) |
| 144 | + %cast = bitcast double %result to <2 x i32> |
| 145 | + %cast.0 = extractelement <2 x i32> %cast, i32 0 |
| 146 | + %cast.1 = extractelement <2 x i32> %cast, i32 1 |
| 147 | + %lane.0 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.0) |
| 148 | + %lane.1 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.1) |
| 149 | + %insert.0 = insertelement <2 x i32> poison, i32 %lane.0, i32 0 |
| 150 | + %insert.1 = insertelement <2 x i32> %insert.0, i32 %lane.1, i32 1 |
| 151 | + ret <2 x i32> %insert.1 |
| 152 | +} |
| 153 | + |
| 154 | +define double @v_sqrt_f64_nsz(double %x) { |
| 155 | +; GCN-LABEL: v_sqrt_f64_nsz: |
| 156 | +; GCN: ; %bb.0: |
| 157 | +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 158 | +; GCN-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1] |
| 159 | +; GCN-NEXT: s_setpc_b64 s[30:31] |
| 160 | + %result = call nsz double @llvm.sqrt.f64(double %x) |
| 161 | + ret double %result |
| 162 | +} |
| 163 | + |
| 164 | +define double @v_sqrt_f64_nnan_ninf(double %x) { |
| 165 | +; GCN-LABEL: v_sqrt_f64_nnan_ninf: |
| 166 | +; GCN: ; %bb.0: |
| 167 | +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 168 | +; GCN-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1] |
| 169 | +; GCN-NEXT: s_setpc_b64 s[30:31] |
| 170 | + %result = call nnan ninf double @llvm.sqrt.f64(double %x) |
| 171 | + ret double %result |
| 172 | +} |
| 173 | + |
| 174 | +define double @v_sqrt_f64_nnan_ninf_nsz(double %x) { |
| 175 | +; GCN-LABEL: v_sqrt_f64_nnan_ninf_nsz: |
| 176 | +; GCN: ; %bb.0: |
| 177 | +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 178 | +; GCN-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1] |
| 179 | +; GCN-NEXT: s_setpc_b64 s[30:31] |
| 180 | + %result = call nnan ninf nsz double @llvm.sqrt.f64(double %x) |
| 181 | + ret double %result |
| 182 | +} |
| 183 | + |
| 184 | +define double @v_sqrt_f64_afn(double %x) { |
| 185 | +; GCN-LABEL: v_sqrt_f64_afn: |
| 186 | +; GCN: ; %bb.0: |
| 187 | +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 188 | +; GCN-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1] |
| 189 | +; GCN-NEXT: s_setpc_b64 s[30:31] |
| 190 | + %result = call afn double @llvm.sqrt.f64(double %x) |
| 191 | + ret double %result |
| 192 | +} |
| 193 | + |
| 194 | +define double @v_sqrt_f64_afn_nsz(double %x) { |
| 195 | +; GCN-LABEL: v_sqrt_f64_afn_nsz: |
| 196 | +; GCN: ; %bb.0: |
| 197 | +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 198 | +; GCN-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1] |
| 199 | +; GCN-NEXT: s_setpc_b64 s[30:31] |
| 200 | + %result = call afn nsz double @llvm.sqrt.f64(double %x) |
| 201 | + ret double %result |
| 202 | +} |
| 203 | + |
| 204 | +define <2 x double> @v_sqrt_v2f64_afn(<2 x double> %x) { |
| 205 | +; GCN-LABEL: v_sqrt_v2f64_afn: |
| 206 | +; GCN: ; %bb.0: |
| 207 | +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 208 | +; GCN-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1] |
| 209 | +; GCN-NEXT: v_sqrt_f64_e32 v[2:3], v[2:3] |
| 210 | +; GCN-NEXT: s_setpc_b64 s[30:31] |
| 211 | + %result = call afn <2 x double> @llvm.sqrt.v2f64(<2 x double> %x) |
| 212 | + ret <2 x double> %result |
| 213 | +} |
| 214 | + |
| 215 | +define double @v_sqrt_f64_afn_nnan(double %x) { |
| 216 | +; GCN-LABEL: v_sqrt_f64_afn_nnan: |
| 217 | +; GCN: ; %bb.0: |
| 218 | +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 219 | +; GCN-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1] |
| 220 | +; GCN-NEXT: s_setpc_b64 s[30:31] |
| 221 | + %result = call afn nnan double @llvm.sqrt.f64(double %x) |
| 222 | + ret double %result |
| 223 | +} |
| 224 | + |
| 225 | +define double @v_sqrt_f64_fabs_afn_ninf(double %x) { |
| 226 | +; GCN-LABEL: v_sqrt_f64_fabs_afn_ninf: |
| 227 | +; GCN: ; %bb.0: |
| 228 | +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 229 | +; GCN-NEXT: v_sqrt_f64_e64 v[0:1], |v[0:1]| |
| 230 | +; GCN-NEXT: s_setpc_b64 s[30:31] |
| 231 | + %fabs = call double @llvm.fabs.f64(double %x) |
| 232 | + %result = call afn ninf double @llvm.sqrt.f64(double %fabs) |
| 233 | + ret double %result |
| 234 | +} |
| 235 | + |
| 236 | +define double @v_sqrt_f64_afn_nnan_ninf(double %x) { |
| 237 | +; GCN-LABEL: v_sqrt_f64_afn_nnan_ninf: |
| 238 | +; GCN: ; %bb.0: |
| 239 | +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 240 | +; GCN-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1] |
| 241 | +; GCN-NEXT: s_setpc_b64 s[30:31] |
| 242 | + %result = call afn nnan ninf double @llvm.sqrt.f64(double %x) |
| 243 | + ret double %result |
| 244 | +} |
| 245 | + |
| 246 | +define <2 x double> @v_sqrt_v2f64_afn_nnan_ninf(<2 x double> %x) { |
| 247 | +; GCN-LABEL: v_sqrt_v2f64_afn_nnan_ninf: |
| 248 | +; GCN: ; %bb.0: |
| 249 | +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 250 | +; GCN-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1] |
| 251 | +; GCN-NEXT: v_sqrt_f64_e32 v[2:3], v[2:3] |
| 252 | +; GCN-NEXT: s_setpc_b64 s[30:31] |
| 253 | + %result = call afn nnan ninf <2 x double> @llvm.sqrt.v2f64(<2 x double> %x) |
| 254 | + ret <2 x double> %result |
| 255 | +} |
| 256 | + |
| 257 | +define double @v_sqrt_f64_afn_nnan_ninf_nsz(double %x) { |
| 258 | +; GCN-LABEL: v_sqrt_f64_afn_nnan_ninf_nsz: |
| 259 | +; GCN: ; %bb.0: |
| 260 | +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 261 | +; GCN-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1] |
| 262 | +; GCN-NEXT: s_setpc_b64 s[30:31] |
| 263 | + %result = call afn nnan ninf nsz double @llvm.sqrt.f64(double %x) |
| 264 | + ret double %result |
| 265 | +} |
| 266 | + |
| 267 | +define double @v_sqrt_f64__approx_func_fp_math(double %x) #2 { |
| 268 | +; GCN-LABEL: v_sqrt_f64__approx_func_fp_math: |
| 269 | +; GCN: ; %bb.0: |
| 270 | +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 271 | +; GCN-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1] |
| 272 | +; GCN-NEXT: s_setpc_b64 s[30:31] |
| 273 | + %result = call nsz double @llvm.sqrt.f64(double %x) |
| 274 | + ret double %result |
| 275 | +} |
| 276 | + |
| 277 | +define double @v_sqrt_f64__enough_unsafe_attrs(double %x) #3 { |
| 278 | +; GCN-LABEL: v_sqrt_f64__enough_unsafe_attrs: |
| 279 | +; GCN: ; %bb.0: |
| 280 | +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 281 | +; GCN-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1] |
| 282 | +; GCN-NEXT: s_setpc_b64 s[30:31] |
| 283 | + %result = call nsz double @llvm.sqrt.f64(double %x) |
| 284 | + ret double %result |
| 285 | +} |
| 286 | + |
| 287 | +define double @v_sqrt_f64__unsafe_attr(double %x) #4 { |
| 288 | +; GCN-LABEL: v_sqrt_f64__unsafe_attr: |
| 289 | +; GCN: ; %bb.0: |
| 290 | +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 291 | +; GCN-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1] |
| 292 | +; GCN-NEXT: s_setpc_b64 s[30:31] |
| 293 | + %result = call nsz double @llvm.sqrt.f64(double %x) |
| 294 | + ret double %result |
| 295 | +} |
| 296 | + |
| 297 | +define <2 x double> @v_sqrt_v2f64(<2 x double> %x) { |
| 298 | +; GCN-LABEL: v_sqrt_v2f64: |
| 299 | +; GCN: ; %bb.0: |
| 300 | +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 301 | +; GCN-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1] |
| 302 | +; GCN-NEXT: v_sqrt_f64_e32 v[2:3], v[2:3] |
| 303 | +; GCN-NEXT: s_setpc_b64 s[30:31] |
| 304 | + %result = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %x) |
| 305 | + ret <2 x double> %result |
| 306 | +} |
| 307 | + |
| 308 | +define <3 x double> @v_sqrt_v3f64(<3 x double> %x) { |
| 309 | +; GCN-LABEL: v_sqrt_v3f64: |
| 310 | +; GCN: ; %bb.0: |
| 311 | +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 312 | +; GCN-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1] |
| 313 | +; GCN-NEXT: v_sqrt_f64_e32 v[2:3], v[2:3] |
| 314 | +; GCN-NEXT: v_sqrt_f64_e32 v[4:5], v[4:5] |
| 315 | +; GCN-NEXT: s_setpc_b64 s[30:31] |
| 316 | + %result = call <3 x double> @llvm.sqrt.v3f64(<3 x double> %x) |
| 317 | + ret <3 x double> %result |
| 318 | +} |
| 319 | + |
118 | 320 | declare double @llvm.fabs.f64(double) #0
|
119 | 321 | declare double @llvm.sqrt.f64(double) #0
|
| 322 | +declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) #0 |
| 323 | +declare <3 x double> @llvm.sqrt.v3f64(<3 x double>) #0 |
120 | 324 | declare i32 @llvm.amdgcn.readfirstlane(i32) #1
|
121 | 325 |
|
122 | 326 | attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
123 | 327 | attributes #1 = { convergent nounwind willreturn memory(none) }
|
| 328 | +attributes #2 = { "approx-func-fp-math"="true" } |
| 329 | +attributes #3 = { "approx-func-fp-math"="true" "no-nans-fp-math"="true" "no-infs-fp-math"="true" } |
| 330 | +attributes #4 = { "unsafe-fp-math"="true" } |
| 331 | +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: |
| 332 | +; GISEL: {{.*}} |
| 333 | +; SDAG: {{.*}} |
0 commit comments