|
2 | 2 | ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck %s -check-prefix=GFX9 |
3 | 3 | ; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 < %s | FileCheck %s -check-prefix=GFX10 |
4 | 4 |
|
5 | | -define bfloat @bf16_oeq_v_i(bfloat %arg, bfloat %arg1) { |
6 | | -; GFX9-LABEL: bf16_oeq_v_i: |
7 | | -; GFX9: ; %bb.0: ; %bb |
8 | | -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
9 | | -; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v0 |
10 | | -; GFX9-NEXT: s_mov_b32 s4, 0x42420000 |
11 | | -; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, s4, v2 |
12 | | -; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc |
13 | | -; GFX9-NEXT: s_setpc_b64 s[30:31] |
14 | | -; |
15 | | -; GFX10-LABEL: bf16_oeq_v_i: |
16 | | -; GFX10: ; %bb.0: ; %bb |
17 | | -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
18 | | -; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v0 |
19 | | -; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0x42420000, v2 |
20 | | -; GFX10-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo |
21 | | -; GFX10-NEXT: s_setpc_b64 s[30:31] |
22 | | -bb: |
23 | | - %fcmp = fcmp oeq bfloat %arg, 0xR4242 |
24 | | - %select = select i1 %fcmp, bfloat %arg, bfloat %arg1 |
25 | | - ret bfloat %select |
26 | | -} |
27 | | - |
28 | 5 | define float @f32_oeq_v_i(float %arg, float %arg1) { |
29 | 6 | ; GFX9-LABEL: f32_oeq_v_i: |
30 | 7 | ; GFX9: ; %bb.0: ; %bb |
@@ -440,3 +417,181 @@ bb: |
440 | 417 | %select = select i1 %fcmp, half %arg, half %arg1 |
441 | 418 | ret half %select |
442 | 419 | } |
| 420 | + |
| 421 | +define float @f32_oeq_negz_i(float %arg, float %arg1) { |
| 422 | +; GFX9-LABEL: f32_oeq_negz_i: |
| 423 | +; GFX9: ; %bb.0: ; %bb |
| 424 | +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 425 | +; GFX9-NEXT: s_brev_b32 s4, 1 |
| 426 | +; GFX9-NEXT: v_bfrev_b32_e32 v2, 1 |
| 427 | +; GFX9-NEXT: v_cmp_neq_f32_e32 vcc, s4, v0 |
| 428 | +; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc |
| 429 | +; GFX9-NEXT: s_setpc_b64 s[30:31] |
| 430 | +; |
| 431 | +; GFX10-LABEL: f32_oeq_negz_i: |
| 432 | +; GFX10: ; %bb.0: ; %bb |
| 433 | +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 434 | +; GFX10-NEXT: v_cmp_neq_f32_e32 vcc_lo, 0x80000000, v0 |
| 435 | +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x80000000, v1, vcc_lo |
| 436 | +; GFX10-NEXT: s_setpc_b64 s[30:31] |
| 437 | +bb: |
| 438 | + %fcmp = fcmp oeq float %arg, -0.000000e+00 |
| 439 | + %select = select i1 %fcmp, float -0.000000e+00, float %arg1 |
| 440 | + ret float %select |
| 441 | +} |
| 442 | + |
| 443 | +define float @f32_oeq_negz_z(float %arg, float %arg1) { |
| 444 | +; GFX9-LABEL: f32_oeq_negz_z: |
| 445 | +; GFX9: ; %bb.0: ; %bb |
| 446 | +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 447 | +; GFX9-NEXT: s_brev_b32 s4, 1 |
| 448 | +; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, s4, v0 |
| 449 | +; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc |
| 450 | +; GFX9-NEXT: s_setpc_b64 s[30:31] |
| 451 | +; |
| 452 | +; GFX10-LABEL: f32_oeq_negz_z: |
| 453 | +; GFX10: ; %bb.0: ; %bb |
| 454 | +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 455 | +; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0x80000000, v0 |
| 456 | +; GFX10-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo |
| 457 | +; GFX10-NEXT: s_setpc_b64 s[30:31] |
| 458 | +bb: |
| 459 | + %fcmp = fcmp oeq float %arg, -0.000000e+00 |
| 460 | + %select = select i1 %fcmp, float %arg, float %arg1 |
| 461 | + ret float %select |
| 462 | +} |
| 463 | + |
| 464 | +define half @f16_oeq_negz_i(half %arg, half %arg1) { |
| 465 | +; GFX9-LABEL: f16_oeq_negz_i: |
| 466 | +; GFX9: ; %bb.0: ; %bb |
| 467 | +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 468 | +; GFX9-NEXT: s_mov_b32 s4, 0x8000 |
| 469 | +; GFX9-NEXT: v_cmp_neq_f16_e32 vcc, s4, v0 |
| 470 | +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc |
| 471 | +; GFX9-NEXT: s_setpc_b64 s[30:31] |
| 472 | +; |
| 473 | +; GFX10-LABEL: f16_oeq_negz_i: |
| 474 | +; GFX10: ; %bb.0: ; %bb |
| 475 | +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 476 | +; GFX10-NEXT: v_cmp_neq_f16_e32 vcc_lo, 0x8000, v0 |
| 477 | +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x8000, v1, vcc_lo |
| 478 | +; GFX10-NEXT: s_setpc_b64 s[30:31] |
| 479 | +bb: |
| 480 | + %fcmp = fcmp oeq half %arg, -0.000000e+00 |
| 481 | + %select = select i1 %fcmp, half -0.000000e+00, half %arg1 |
| 482 | + ret half %select |
| 483 | +} |
| 484 | + |
| 485 | +define half @f16_oeq_negz_z(half %arg, half %arg1) { |
| 486 | +; GFX9-LABEL: f16_oeq_negz_z: |
| 487 | +; GFX9: ; %bb.0: ; %bb |
| 488 | +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 489 | +; GFX9-NEXT: s_mov_b32 s4, 0x8000 |
| 490 | +; GFX9-NEXT: v_cmp_eq_f16_e32 vcc, s4, v0 |
| 491 | +; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc |
| 492 | +; GFX9-NEXT: s_setpc_b64 s[30:31] |
| 493 | +; |
| 494 | +; GFX10-LABEL: f16_oeq_negz_z: |
| 495 | +; GFX10: ; %bb.0: ; %bb |
| 496 | +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 497 | +; GFX10-NEXT: v_cmp_eq_f16_e32 vcc_lo, 0x8000, v0 |
| 498 | +; GFX10-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo |
| 499 | +; GFX10-NEXT: s_setpc_b64 s[30:31] |
| 500 | +bb: |
| 501 | + %fcmp = fcmp oeq half %arg, -0.000000e+00 |
| 502 | + %select = select i1 %fcmp, half %arg, half %arg1 |
| 503 | + ret half %select |
| 504 | +} |
| 505 | + |
| 506 | +define double @f64_oeq_z_i(double %arg, double %arg1) { |
| 507 | +; GFX9-LABEL: f64_oeq_z_i: |
| 508 | +; GFX9: ; %bb.0: ; %bb |
| 509 | +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 510 | +; GFX9-NEXT: v_cmp_neq_f64_e32 vcc, 0, v[0:1] |
| 511 | +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc |
| 512 | +; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc |
| 513 | +; GFX9-NEXT: s_setpc_b64 s[30:31] |
| 514 | +; |
| 515 | +; GFX10-LABEL: f64_oeq_z_i: |
| 516 | +; GFX10: ; %bb.0: ; %bb |
| 517 | +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 518 | +; GFX10-NEXT: v_cmp_neq_f64_e32 vcc_lo, 0, v[0:1] |
| 519 | +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo |
| 520 | +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc_lo |
| 521 | +; GFX10-NEXT: s_setpc_b64 s[30:31] |
| 522 | +bb: |
| 523 | + %fcmp = fcmp oeq double %arg, 0.000000e+00 |
| 524 | + %select = select i1 %fcmp, double 0.000000e+00, double %arg1 |
| 525 | + ret double %select |
| 526 | +} |
| 527 | + |
| 528 | +define double @f64_oeq_z_z(double %arg, double %arg1) { |
| 529 | +; GFX9-LABEL: f64_oeq_z_z: |
| 530 | +; GFX9: ; %bb.0: ; %bb |
| 531 | +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 532 | +; GFX9-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1] |
| 533 | +; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc |
| 534 | +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc |
| 535 | +; GFX9-NEXT: s_setpc_b64 s[30:31] |
| 536 | +; |
| 537 | +; GFX10-LABEL: f64_oeq_z_z: |
| 538 | +; GFX10: ; %bb.0: ; %bb |
| 539 | +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 540 | +; GFX10-NEXT: v_cmp_eq_f64_e32 vcc_lo, 0, v[0:1] |
| 541 | +; GFX10-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo |
| 542 | +; GFX10-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo |
| 543 | +; GFX10-NEXT: s_setpc_b64 s[30:31] |
| 544 | +bb: |
| 545 | + %fcmp = fcmp oeq double %arg, 0.000000e+00 |
| 546 | + %select = select i1 %fcmp, double %arg, double %arg1 |
| 547 | + ret double %select |
| 548 | +} |
| 549 | + |
| 550 | +define double @f64_oeq_negz_i(double %arg, double %arg1) { |
| 551 | +; GFX9-LABEL: f64_oeq_negz_i: |
| 552 | +; GFX9: ; %bb.0: ; %bb |
| 553 | +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 554 | +; GFX9-NEXT: s_mov_b32 s4, 0 |
| 555 | +; GFX9-NEXT: s_brev_b32 s5, 1 |
| 556 | +; GFX9-NEXT: v_cmp_neq_f64_e32 vcc, s[4:5], v[0:1] |
| 557 | +; GFX9-NEXT: v_bfrev_b32_e32 v1, 1 |
| 558 | +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc |
| 559 | +; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc |
| 560 | +; GFX9-NEXT: s_setpc_b64 s[30:31] |
| 561 | +; |
| 562 | +; GFX10-LABEL: f64_oeq_negz_i: |
| 563 | +; GFX10: ; %bb.0: ; %bb |
| 564 | +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 565 | +; GFX10-NEXT: v_cmp_neq_f64_e32 vcc_lo, 0x80000000, v[0:1] |
| 566 | +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo |
| 567 | +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x80000000, v3, vcc_lo |
| 568 | +; GFX10-NEXT: s_setpc_b64 s[30:31] |
| 569 | +bb: |
| 570 | + %fcmp = fcmp oeq double %arg, -0.000000e+00 |
| 571 | + %select = select i1 %fcmp, double -0.000000e+00, double %arg1 |
| 572 | + ret double %select |
| 573 | +} |
| 574 | + |
| 575 | +define double @f64_oeq_negz_z(double %arg, double %arg1) { |
| 576 | +; GFX9-LABEL: f64_oeq_negz_z: |
| 577 | +; GFX9: ; %bb.0: ; %bb |
| 578 | +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 579 | +; GFX9-NEXT: s_mov_b32 s4, 0 |
| 580 | +; GFX9-NEXT: s_brev_b32 s5, 1 |
| 581 | +; GFX9-NEXT: v_cmp_eq_f64_e32 vcc, s[4:5], v[0:1] |
| 582 | +; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc |
| 583 | +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc |
| 584 | +; GFX9-NEXT: s_setpc_b64 s[30:31] |
| 585 | +; |
| 586 | +; GFX10-LABEL: f64_oeq_negz_z: |
| 587 | +; GFX10: ; %bb.0: ; %bb |
| 588 | +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 589 | +; GFX10-NEXT: v_cmp_eq_f64_e32 vcc_lo, 0x80000000, v[0:1] |
| 590 | +; GFX10-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo |
| 591 | +; GFX10-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo |
| 592 | +; GFX10-NEXT: s_setpc_b64 s[30:31] |
| 593 | +bb: |
| 594 | + %fcmp = fcmp oeq double %arg, -0.000000e+00 |
| 595 | + %select = select i1 %fcmp, double %arg, double %arg1 |
| 596 | + ret double %select |
| 597 | +} |
0 commit comments