|
13 | 13 | ; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11,GFX1-GISEL,GFX1-GISEL-TRUE16 %s |
14 | 14 | ; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11,GFX1-GISEL,GFX1-GISEL-FAKE16 %s |
15 | 15 |
|
| 16 | +; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12-SDAG %s |
| 17 | +; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12-GISEL %s |
16 | 18 |
|
17 | 19 | ; FIXME: promotion not handled without f16 insts |
18 | 20 |
|
@@ -52,6 +54,26 @@ define half @v_constained_fmul_f16_fpexcept_strict(half %x, half %y) #0 { |
52 | 54 | ; GFX1-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
53 | 55 | ; GFX1-GISEL-FAKE16-NEXT: v_mul_f16_e32 v0, v0, v1 |
54 | 56 | ; GFX1-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31] |
| 57 | +; |
| 58 | +; GFX12-SDAG-LABEL: v_constained_fmul_f16_fpexcept_strict: |
| 59 | +; GFX12-SDAG: ; %bb.0: |
| 60 | +; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| 61 | +; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 |
| 62 | +; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 |
| 63 | +; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 |
| 64 | +; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 |
| 65 | +; GFX12-SDAG-NEXT: v_mul_f16_e32 v0, v0, v1 |
| 66 | +; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] |
| 67 | +; |
| 68 | +; GFX12-GISEL-LABEL: v_constained_fmul_f16_fpexcept_strict: |
| 69 | +; GFX12-GISEL: ; %bb.0: |
| 70 | +; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| 71 | +; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 |
| 72 | +; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 |
| 73 | +; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 |
| 74 | +; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 |
| 75 | +; GFX12-GISEL-NEXT: v_mul_f16_e32 v0, v0, v1 |
| 76 | +; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] |
55 | 77 | %val = call half @llvm.experimental.constrained.fmul.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.strict") |
56 | 78 | ret half %val |
57 | 79 | } |
@@ -92,6 +114,26 @@ define half @v_constained_fmul_f16_fpexcept_ignore(half %x, half %y) #0 { |
92 | 114 | ; GFX1-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
93 | 115 | ; GFX1-GISEL-FAKE16-NEXT: v_mul_f16_e32 v0, v0, v1 |
94 | 116 | ; GFX1-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31] |
| 117 | +; |
| 118 | +; GFX12-SDAG-LABEL: v_constained_fmul_f16_fpexcept_ignore: |
| 119 | +; GFX12-SDAG: ; %bb.0: |
| 120 | +; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| 121 | +; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 |
| 122 | +; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 |
| 123 | +; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 |
| 124 | +; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 |
| 125 | +; GFX12-SDAG-NEXT: v_mul_f16_e32 v0, v0, v1 |
| 126 | +; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] |
| 127 | +; |
| 128 | +; GFX12-GISEL-LABEL: v_constained_fmul_f16_fpexcept_ignore: |
| 129 | +; GFX12-GISEL: ; %bb.0: |
| 130 | +; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| 131 | +; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 |
| 132 | +; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 |
| 133 | +; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 |
| 134 | +; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 |
| 135 | +; GFX12-GISEL-NEXT: v_mul_f16_e32 v0, v0, v1 |
| 136 | +; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] |
95 | 137 | %val = call half @llvm.experimental.constrained.fmul.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") |
96 | 138 | ret half %val |
97 | 139 | } |
@@ -132,6 +174,26 @@ define half @v_constained_fmul_f16_fpexcept_maytrap(half %x, half %y) #0 { |
132 | 174 | ; GFX1-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
133 | 175 | ; GFX1-GISEL-FAKE16-NEXT: v_mul_f16_e32 v0, v0, v1 |
134 | 176 | ; GFX1-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31] |
| 177 | +; |
| 178 | +; GFX12-SDAG-LABEL: v_constained_fmul_f16_fpexcept_maytrap: |
| 179 | +; GFX12-SDAG: ; %bb.0: |
| 180 | +; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| 181 | +; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 |
| 182 | +; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 |
| 183 | +; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 |
| 184 | +; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 |
| 185 | +; GFX12-SDAG-NEXT: v_mul_f16_e32 v0, v0, v1 |
| 186 | +; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] |
| 187 | +; |
| 188 | +; GFX12-GISEL-LABEL: v_constained_fmul_f16_fpexcept_maytrap: |
| 189 | +; GFX12-GISEL: ; %bb.0: |
| 190 | +; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| 191 | +; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 |
| 192 | +; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 |
| 193 | +; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 |
| 194 | +; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 |
| 195 | +; GFX12-GISEL-NEXT: v_mul_f16_e32 v0, v0, v1 |
| 196 | +; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] |
135 | 197 | %val = call half @llvm.experimental.constrained.fmul.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap") |
136 | 198 | ret half %val |
137 | 199 | } |
@@ -164,6 +226,26 @@ define <2 x half> @v_constained_fmul_v2f16_fpexcept_strict(<2 x half> %x, <2 x h |
164 | 226 | ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
165 | 227 | ; GFX10PLUS-NEXT: v_pk_mul_f16 v0, v0, v1 |
166 | 228 | ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] |
| 229 | +; |
| 230 | +; GFX12-SDAG-LABEL: v_constained_fmul_v2f16_fpexcept_strict: |
| 231 | +; GFX12-SDAG: ; %bb.0: |
| 232 | +; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| 233 | +; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 |
| 234 | +; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 |
| 235 | +; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 |
| 236 | +; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 |
| 237 | +; GFX12-SDAG-NEXT: v_pk_mul_f16 v0, v0, v1 |
| 238 | +; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] |
| 239 | +; |
| 240 | +; GFX12-GISEL-LABEL: v_constained_fmul_v2f16_fpexcept_strict: |
| 241 | +; GFX12-GISEL: ; %bb.0: |
| 242 | +; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| 243 | +; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 |
| 244 | +; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 |
| 245 | +; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 |
| 246 | +; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 |
| 247 | +; GFX12-GISEL-NEXT: v_pk_mul_f16 v0, v0, v1 |
| 248 | +; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] |
167 | 249 | %val = call <2 x half> @llvm.experimental.constrained.fmul.v2f16(<2 x half> %x, <2 x half> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") |
168 | 250 | ret <2 x half> %val |
169 | 251 | } |
@@ -196,6 +278,26 @@ define <2 x half> @v_constained_fmul_v2f16_fpexcept_ignore(<2 x half> %x, <2 x h |
196 | 278 | ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
197 | 279 | ; GFX10PLUS-NEXT: v_pk_mul_f16 v0, v0, v1 |
198 | 280 | ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] |
| 281 | +; |
| 282 | +; GFX12-SDAG-LABEL: v_constained_fmul_v2f16_fpexcept_ignore: |
| 283 | +; GFX12-SDAG: ; %bb.0: |
| 284 | +; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| 285 | +; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 |
| 286 | +; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 |
| 287 | +; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 |
| 288 | +; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 |
| 289 | +; GFX12-SDAG-NEXT: v_pk_mul_f16 v0, v0, v1 |
| 290 | +; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] |
| 291 | +; |
| 292 | +; GFX12-GISEL-LABEL: v_constained_fmul_v2f16_fpexcept_ignore: |
| 293 | +; GFX12-GISEL: ; %bb.0: |
| 294 | +; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| 295 | +; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 |
| 296 | +; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 |
| 297 | +; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 |
| 298 | +; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 |
| 299 | +; GFX12-GISEL-NEXT: v_pk_mul_f16 v0, v0, v1 |
| 300 | +; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] |
199 | 301 | %val = call <2 x half> @llvm.experimental.constrained.fmul.v2f16(<2 x half> %x, <2 x half> %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") |
200 | 302 | ret <2 x half> %val |
201 | 303 | } |
@@ -228,6 +330,26 @@ define <2 x half> @v_constained_fmul_v2f16_fpexcept_maytrap(<2 x half> %x, <2 x |
228 | 330 | ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
229 | 331 | ; GFX10PLUS-NEXT: v_pk_mul_f16 v0, v0, v1 |
230 | 332 | ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] |
| 333 | +; |
| 334 | +; GFX12-SDAG-LABEL: v_constained_fmul_v2f16_fpexcept_maytrap: |
| 335 | +; GFX12-SDAG: ; %bb.0: |
| 336 | +; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| 337 | +; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 |
| 338 | +; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 |
| 339 | +; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 |
| 340 | +; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 |
| 341 | +; GFX12-SDAG-NEXT: v_pk_mul_f16 v0, v0, v1 |
| 342 | +; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] |
| 343 | +; |
| 344 | +; GFX12-GISEL-LABEL: v_constained_fmul_v2f16_fpexcept_maytrap: |
| 345 | +; GFX12-GISEL: ; %bb.0: |
| 346 | +; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| 347 | +; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 |
| 348 | +; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 |
| 349 | +; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 |
| 350 | +; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 |
| 351 | +; GFX12-GISEL-NEXT: v_pk_mul_f16 v0, v0, v1 |
| 352 | +; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] |
231 | 353 | %val = call <2 x half> @llvm.experimental.constrained.fmul.v2f16(<2 x half> %x, <2 x half> %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap") |
232 | 354 | ret <2 x half> %val |
233 | 355 | } |
@@ -299,6 +421,28 @@ define <3 x half> @v_constained_fmul_v3f16_fpexcept_strict(<3 x half> %x, <3 x h |
299 | 421 | ; GFX1-GISEL-NEXT: v_pk_mul_f16 v0, v0, v2 |
300 | 422 | ; GFX1-GISEL-NEXT: v_pk_mul_f16 v1, v1, v3 |
301 | 423 | ; GFX1-GISEL-NEXT: s_setpc_b64 s[30:31] |
| 424 | +; |
| 425 | +; GFX12-SDAG-LABEL: v_constained_fmul_v3f16_fpexcept_strict: |
| 426 | +; GFX12-SDAG: ; %bb.0: |
| 427 | +; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| 428 | +; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 |
| 429 | +; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 |
| 430 | +; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 |
| 431 | +; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 |
| 432 | +; GFX12-SDAG-NEXT: v_pk_mul_f16 v0, v0, v2 |
| 433 | +; GFX12-SDAG-NEXT: v_mul_f16_e32 v1, v1, v3 |
| 434 | +; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] |
| 435 | +; |
| 436 | +; GFX12-GISEL-LABEL: v_constained_fmul_v3f16_fpexcept_strict: |
| 437 | +; GFX12-GISEL: ; %bb.0: |
| 438 | +; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| 439 | +; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 |
| 440 | +; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 |
| 441 | +; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 |
| 442 | +; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 |
| 443 | +; GFX12-GISEL-NEXT: v_pk_mul_f16 v0, v0, v2 |
| 444 | +; GFX12-GISEL-NEXT: v_pk_mul_f16 v1, v1, v3 |
| 445 | +; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] |
302 | 446 | %val = call <3 x half> @llvm.experimental.constrained.fmul.v3f16(<3 x half> %x, <3 x half> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") |
303 | 447 | ret <3 x half> %val |
304 | 448 | } |
@@ -394,6 +538,38 @@ define <4 x half> @v_constained_fmul_v4f16_fpexcept_strict(<4 x half> %x, <4 x h |
394 | 538 | ; GFX1-GISEL-NEXT: v_pk_mul_f16 v0, v0, v2 |
395 | 539 | ; GFX1-GISEL-NEXT: v_pk_mul_f16 v1, v1, v3 |
396 | 540 | ; GFX1-GISEL-NEXT: s_setpc_b64 s[30:31] |
| 541 | +; |
| 542 | +; GFX12-SDAG-LABEL: v_constained_fmul_v4f16_fpexcept_strict: |
| 543 | +; GFX12-SDAG: ; %bb.0: |
| 544 | +; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| 545 | +; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 |
| 546 | +; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 |
| 547 | +; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 |
| 548 | +; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 |
| 549 | +; GFX12-SDAG-NEXT: v_lshrrev_b32_e32 v4, 16, v3 |
| 550 | +; GFX12-SDAG-NEXT: v_lshrrev_b32_e32 v5, 16, v2 |
| 551 | +; GFX12-SDAG-NEXT: v_lshrrev_b32_e32 v6, 16, v0 |
| 552 | +; GFX12-SDAG-NEXT: v_lshrrev_b32_e32 v7, 16, v1 |
| 553 | +; GFX12-SDAG-NEXT: v_mul_f16_e32 v1, v1, v3 |
| 554 | +; GFX12-SDAG-NEXT: v_mul_f16_e32 v0, v0, v2 |
| 555 | +; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) |
| 556 | +; GFX12-SDAG-NEXT: v_mul_f16_e32 v2, v6, v5 |
| 557 | +; GFX12-SDAG-NEXT: v_mul_f16_e32 v3, v7, v4 |
| 558 | +; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) |
| 559 | +; GFX12-SDAG-NEXT: v_perm_b32 v0, v2, v0, 0x5040100 |
| 560 | +; GFX12-SDAG-NEXT: v_perm_b32 v1, v3, v1, 0x5040100 |
| 561 | +; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] |
| 562 | +; |
| 563 | +; GFX12-GISEL-LABEL: v_constained_fmul_v4f16_fpexcept_strict: |
| 564 | +; GFX12-GISEL: ; %bb.0: |
| 565 | +; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| 566 | +; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 |
| 567 | +; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 |
| 568 | +; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 |
| 569 | +; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 |
| 570 | +; GFX12-GISEL-NEXT: v_pk_mul_f16 v0, v0, v2 |
| 571 | +; GFX12-GISEL-NEXT: v_pk_mul_f16 v1, v1, v3 |
| 572 | +; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] |
397 | 573 | %val = call <4 x half> @llvm.experimental.constrained.fmul.v4f16(<4 x half> %x, <4 x half> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") |
398 | 574 | ret <4 x half> %val |
399 | 575 | } |
@@ -429,6 +605,20 @@ define amdgpu_ps half @s_constained_fmul_f16_fpexcept_strict(half inreg %x, half |
429 | 605 | ; GFX1-GISEL-FAKE16: ; %bb.0: |
430 | 606 | ; GFX1-GISEL-FAKE16-NEXT: v_mul_f16_e64 v0, s2, s3 |
431 | 607 | ; GFX1-GISEL-FAKE16-NEXT: ; return to shader part epilog |
| 608 | +; |
| 609 | +; GFX12-SDAG-LABEL: s_constained_fmul_f16_fpexcept_strict: |
| 610 | +; GFX12-SDAG: ; %bb.0: |
| 611 | +; GFX12-SDAG-NEXT: s_mul_f16 s0, s2, s3 |
| 612 | +; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_3) |
| 613 | +; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, s0 |
| 614 | +; GFX12-SDAG-NEXT: ; return to shader part epilog |
| 615 | +; |
| 616 | +; GFX12-GISEL-LABEL: s_constained_fmul_f16_fpexcept_strict: |
| 617 | +; GFX12-GISEL: ; %bb.0: |
| 618 | +; GFX12-GISEL-NEXT: s_mul_f16 s0, s2, s3 |
| 619 | +; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_3) |
| 620 | +; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, s0 |
| 621 | +; GFX12-GISEL-NEXT: ; return to shader part epilog |
432 | 622 | %val = call half @llvm.experimental.constrained.fmul.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.strict") |
433 | 623 | ret half %val |
434 | 624 | } |
@@ -468,6 +658,16 @@ define amdgpu_ps <2 x half> @s_constained_fmul_v2f16_fpexcept_strict(<2 x half> |
468 | 658 | ; GFX10PLUS: ; %bb.0: |
469 | 659 | ; GFX10PLUS-NEXT: v_pk_mul_f16 v0, s2, s3 |
470 | 660 | ; GFX10PLUS-NEXT: ; return to shader part epilog |
| 661 | +; |
| 662 | +; GFX12-SDAG-LABEL: s_constained_fmul_v2f16_fpexcept_strict: |
| 663 | +; GFX12-SDAG: ; %bb.0: |
| 664 | +; GFX12-SDAG-NEXT: v_pk_mul_f16 v0, s2, s3 |
| 665 | +; GFX12-SDAG-NEXT: ; return to shader part epilog |
| 666 | +; |
| 667 | +; GFX12-GISEL-LABEL: s_constained_fmul_v2f16_fpexcept_strict: |
| 668 | +; GFX12-GISEL: ; %bb.0: |
| 669 | +; GFX12-GISEL-NEXT: v_pk_mul_f16 v0, s2, s3 |
| 670 | +; GFX12-GISEL-NEXT: ; return to shader part epilog |
471 | 671 | %val = call <2 x half> @llvm.experimental.constrained.fmul.v2f16(<2 x half> %x, <2 x half> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") |
472 | 672 | ret <2 x half> %val |
473 | 673 | } |
|
0 commit comments