Skip to content

Commit a9a5fab

Browse files
author
Salinas, David
authored
[SelectionDAG] Utilizing target hook convertSelectOfConstantsToMath f… (llvm#1239)
2 parents 150dcea + 8f68f3e commit a9a5fab

29 files changed

+2809
-3182
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28265,7 +28265,8 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
2826528265
if ((Fold || Swap) &&
2826628266
TLI.getBooleanContents(CmpOpVT) ==
2826728267
TargetLowering::ZeroOrOneBooleanContent &&
28268-
(!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) {
28268+
(!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT)) &&
28269+
TLI.convertSelectOfConstantsToMath(VT)) {
2826928270

2827028271
if (Swap) {
2827128272
CC = ISD::getSetCCInverse(CC, CmpOpVT);

llvm/test/CodeGen/AArch64/bfis-in-loop.ll

Lines changed: 40 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -13,25 +13,26 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
1313
define i64 @bfis_in_loop_zero() {
1414
; CHECK-LABEL: bfis_in_loop_zero:
1515
; CHECK: // %bb.0: // %entry
16-
; CHECK-NEXT: adrp x8, :got:global
17-
; CHECK-NEXT: mov x0, xzr
18-
; CHECK-NEXT: mov w9, wzr
19-
; CHECK-NEXT: ldr x8, [x8, :got_lo12:global]
20-
; CHECK-NEXT: ldr x8, [x8]
21-
; CHECK-NEXT: .LBB0_1: // %midblock
22-
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
23-
; CHECK-NEXT: ldrh w10, [x8, #72]
24-
; CHECK-NEXT: ldr x13, [x8, #8]
25-
; CHECK-NEXT: lsr w11, w10, #8
26-
; CHECK-NEXT: cmp w10, #0
27-
; CHECK-NEXT: ldr x8, [x13, #16]
28-
; CHECK-NEXT: cset w12, ne
29-
; CHECK-NEXT: csel w9, w9, w11, eq
30-
; CHECK-NEXT: and x11, x0, #0xffffffff00000000
31-
; CHECK-NEXT: bfi w10, w9, #8, #24
32-
; CHECK-NEXT: orr x11, x11, x12, lsl #16
33-
; CHECK-NEXT: orr x0, x11, x10
34-
; CHECK-NEXT: cbnz x13, .LBB0_1
16+
; CHECK-NEXT: adrp x9, :got:global
17+
; CHECK-NEXT: mov x0, xzr
18+
; CHECK-NEXT: mov w8, wzr
19+
; CHECK-NEXT: ldr x9, [x9, :got_lo12:global]
20+
; CHECK-NEXT: mov w10, #65536 // =0x10000
21+
; CHECK-NEXT: ldr x9, [x9]
22+
; CHECK-NEXT: .LBB0_1: // %midblock
23+
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
24+
; CHECK-NEXT: ldrh w11, [x9, #72]
25+
; CHECK-NEXT: and x13, x0, #0xffffffff00000000
26+
; CHECK-NEXT: lsr w12, w11, #8
27+
; CHECK-NEXT: cmp w11, #0
28+
; CHECK-NEXT: csel w8, w8, w12, eq
29+
; CHECK-NEXT: ldr x12, [x9, #8]
30+
; CHECK-NEXT: csel x9, xzr, x10, eq
31+
; CHECK-NEXT: bfi w11, w8, #8, #24
32+
; CHECK-NEXT: orr x13, x9, x13
33+
; CHECK-NEXT: ldr x9, [x12, #16]
34+
; CHECK-NEXT: orr x0, x13, x11
35+
; CHECK-NEXT: cbnz x12, .LBB0_1
3536
; CHECK-NEXT: // %bb.2: // %exit
3637
; CHECK-NEXT: ret
3738
entry:
@@ -80,25 +81,26 @@ exit:
8081
define i64 @bfis_in_loop_undef() {
8182
; CHECK-LABEL: bfis_in_loop_undef:
8283
; CHECK: // %bb.0: // %entry
83-
; CHECK-NEXT: adrp x9, :got:global
84-
; CHECK-NEXT: mov w8, wzr
85-
; CHECK-NEXT: // implicit-def: $x0
86-
; CHECK-NEXT: ldr x9, [x9, :got_lo12:global]
87-
; CHECK-NEXT: ldr x9, [x9]
88-
; CHECK-NEXT: .LBB1_1: // %midblock
89-
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
90-
; CHECK-NEXT: ldrh w10, [x9, #72]
91-
; CHECK-NEXT: ldr x13, [x9, #8]
92-
; CHECK-NEXT: lsr w11, w10, #8
93-
; CHECK-NEXT: cmp w10, #0
94-
; CHECK-NEXT: ldr x9, [x13, #16]
95-
; CHECK-NEXT: cset w12, ne
96-
; CHECK-NEXT: csel w8, w8, w11, eq
97-
; CHECK-NEXT: and x11, x0, #0xffffffff00000000
98-
; CHECK-NEXT: bfi w10, w8, #8, #24
99-
; CHECK-NEXT: orr x11, x11, x12, lsl #16
100-
; CHECK-NEXT: orr x0, x11, x10
101-
; CHECK-NEXT: cbnz x13, .LBB1_1
84+
; CHECK-NEXT: adrp x9, :got:global
85+
; CHECK-NEXT: mov w8, wzr
86+
; CHECK-NEXT: // implicit-def: $x0
87+
; CHECK-NEXT: ldr x9, [x9, :got_lo12:global]
88+
; CHECK-NEXT: ldr x10, [x9]
89+
; CHECK-NEXT: mov w9, #65536 // =0x10000
90+
; CHECK-NEXT: .LBB1_1: // %midblock
91+
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
92+
; CHECK-NEXT: ldrh w11, [x10, #72]
93+
; CHECK-NEXT: and x13, x0, #0xffffffff00000000
94+
; CHECK-NEXT: lsr w12, w11, #8
95+
; CHECK-NEXT: cmp w11, #0
96+
; CHECK-NEXT: csel w8, w8, w12, eq
97+
; CHECK-NEXT: ldr x12, [x10, #8]
98+
; CHECK-NEXT: csel x10, xzr, x9, eq
99+
; CHECK-NEXT: bfi w11, w8, #8, #24
100+
; CHECK-NEXT: orr x13, x10, x13
101+
; CHECK-NEXT: ldr x10, [x12, #16]
102+
; CHECK-NEXT: orr x0, x13, x11
103+
; CHECK-NEXT: cbnz x12, .LBB1_1
102104
; CHECK-NEXT: // %bb.2: // %exit
103105
; CHECK-NEXT: ret
104106
entry:

llvm/test/CodeGen/AArch64/select_cc.ll

Lines changed: 52 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -2,51 +2,79 @@
22
; RUN: llc < %s -mtriple=aarch64 | FileCheck %s
33

44
define i64 @select_ogt_float(float %a, float %b) {
5-
; CHECK-LABEL: select_ogt_float:
6-
; CHECK: // %bb.0: // %entry
7-
; CHECK-NEXT: fcmp s0, s1
8-
; CHECK-NEXT: cset w8, gt
9-
; CHECK-NEXT: ubfiz x0, x8, #2, #32
10-
; CHECK-NEXT: ret
5+
; CHECK-SD-LABEL: select_ogt_float:
6+
; CHECK-SD: // %bb.0: // %entry
7+
; CHECK-SD-NEXT: fcmp s0, s1
8+
; CHECK-SD-NEXT: mov w8, #4 // =0x4
9+
; CHECK-SD-NEXT: csel x0, x8, xzr, gt
10+
; CHECK-SD-NEXT: ret
11+
;
12+
; CHECK-GI-LABEL: select_ogt_float:
13+
; CHECK-GI: // %bb.0: // %entry
14+
; CHECK-GI-NEXT: fcmp s0, s1
15+
; CHECK-GI-NEXT: cset w8, gt
16+
; CHECK-GI-NEXT: lsl x0, x8, #2
17+
; CHECK-GI-NEXT: ret
1118
entry:
1219
%cc = fcmp ogt float %a, %b
1320
%sel = select i1 %cc, i64 4, i64 0
1421
ret i64 %sel
1522
}
1623

1724
define i64 @select_ule_float_inverse(float %a, float %b) {
18-
; CHECK-LABEL: select_ule_float_inverse:
19-
; CHECK: // %bb.0: // %entry
20-
; CHECK-NEXT: fcmp s0, s1
21-
; CHECK-NEXT: cset w8, gt
22-
; CHECK-NEXT: ubfiz x0, x8, #2, #32
23-
; CHECK-NEXT: ret
25+
; CHECK-SD-LABEL: select_ule_float_inverse:
26+
; CHECK-SD: // %bb.0: // %entry
27+
; CHECK-SD-NEXT: fcmp s0, s1
28+
; CHECK-SD-NEXT: mov w8, #4 // =0x4
29+
; CHECK-SD-NEXT: csel x0, xzr, x8, le
30+
; CHECK-SD-NEXT: ret
31+
;
32+
; CHECK-GI-LABEL: select_ule_float_inverse:
33+
; CHECK-GI: // %bb.0: // %entry
34+
; CHECK-GI-NEXT: fcmp s0, s1
35+
; CHECK-GI-NEXT: cset w8, gt
36+
; CHECK-GI-NEXT: lsl x0, x8, #2
37+
; CHECK-GI-NEXT: ret
2438
entry:
2539
%cc = fcmp ule float %a, %b
2640
%sel = select i1 %cc, i64 0, i64 4
2741
ret i64 %sel
2842
}
2943

3044
define i64 @select_eq_i32(i32 %a, i32 %b) {
31-
; CHECK-LABEL: select_eq_i32:
32-
; CHECK: // %bb.0: // %entry
33-
; CHECK-NEXT: cmp w0, w1
34-
; CHECK-NEXT: cset w8, eq
35-
; CHECK-NEXT: ubfiz x0, x8, #2, #32
36-
; CHECK-NEXT: ret
45+
; CHECK-SD-LABEL: select_eq_i32:
46+
; CHECK-SD: // %bb.0: // %entry
47+
; CHECK-SD-NEXT: mov w8, #4 // =0x4
48+
; CHECK-SD-NEXT: cmp w0, w1
49+
; CHECK-SD-NEXT: csel x0, x8, xzr, eq
50+
; CHECK-SD-NEXT: ret
51+
;
52+
; CHECK-GI-LABEL: select_eq_i32:
53+
; CHECK-GI: // %bb.0: // %entry
54+
; CHECK-GI-NEXT: cmp w0, w1
55+
; CHECK-GI-NEXT: cset w8, eq
56+
; CHECK-GI-NEXT: lsl x0, x8, #2
57+
; CHECK-GI-NEXT: ret
3758
entry:
3859
%cc = icmp eq i32 %a, %b
3960
%sel = select i1 %cc, i64 4, i64 0
4061
ret i64 %sel
4162
}
4263

4364
define i64 @select_ne_i32_inverse(i32 %a, i32 %b) {
44-
; CHECK-LABEL: select_ne_i32_inverse:
45-
; CHECK: // %bb.0: // %entry
46-
; CHECK-NEXT: cmp w0, w1
47-
; CHECK-NEXT: cset w8, eq
48-
; CHECK-NEXT: ubfiz x0, x8, #2, #32
49-
; CHECK-NEXT: ret
65+
; CHECK-SD-LABEL: select_ne_i32_inverse:
66+
; CHECK-SD: // %bb.0: // %entry
67+
; CHECK-SD-NEXT: mov w8, #4 // =0x4
68+
; CHECK-SD-NEXT: cmp w0, w1
69+
; CHECK-SD-NEXT: csel x0, xzr, x8, ne
70+
; CHECK-SD-NEXT: ret
71+
;
72+
; CHECK-GI-LABEL: select_ne_i32_inverse:
73+
; CHECK-GI: // %bb.0: // %entry
74+
; CHECK-GI-NEXT: cmp w0, w1
75+
; CHECK-GI-NEXT: cset w8, eq
76+
; CHECK-GI-NEXT: lsl x0, x8, #2
77+
; CHECK-GI-NEXT: ret
5078
entry:
5179
%cc = icmp ne i32 %a, %b
5280
%sel = select i1 %cc, i64 0, i64 4

llvm/test/CodeGen/AArch64/selectopt-const.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -13,24 +13,24 @@ define i32 @test_const(ptr %in1, ptr %in2, ptr %out, i32 %n, ptr %tbl) {
1313
; CHECK-NEXT: mov w8, w3
1414
; CHECK-NEXT: movk w9, #16309, lsl #16
1515
; CHECK-NEXT: fmov s0, w9
16+
; CHECK-NEXT: mov w9, #16 // =0x10
1617
; CHECK-NEXT: .p2align 5, , 16
1718
; CHECK-NEXT: .LBB0_2: // %for.body
1819
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
1920
; CHECK-NEXT: ldr s4, [x1], #4
20-
; CHECK-NEXT: ldr w9, [x0], #4
21-
; CHECK-NEXT: add w9, w9, #10
22-
; CHECK-NEXT: scvtf d3, w9
21+
; CHECK-NEXT: ldr w10, [x0], #4
22+
; CHECK-NEXT: add w10, w10, #10
23+
; CHECK-NEXT: scvtf d3, w10
2324
; CHECK-NEXT: fmadd s4, s4, s0, s1
2425
; CHECK-NEXT: fabs s4, s4
2526
; CHECK-NEXT: fcvt d4, s4
2627
; CHECK-NEXT: fdiv d3, d3, d4
2728
; CHECK-NEXT: fcmp d3, d2
28-
; CHECK-NEXT: cset w9, lt
29+
; CHECK-NEXT: csel x10, x9, xzr, lt
2930
; CHECK-NEXT: subs x8, x8, #1
30-
; CHECK-NEXT: ubfiz x9, x9, #4, #32
31-
; CHECK-NEXT: ldr s3, [x4, x9]
32-
; CHECK-NEXT: fcvtzs w9, s3
33-
; CHECK-NEXT: str w9, [x2], #4
31+
; CHECK-NEXT: ldr s3, [x4, x10]
32+
; CHECK-NEXT: fcvtzs w10, s3
33+
; CHECK-NEXT: str w10, [x2], #4
3434
; CHECK-NEXT: b.ne .LBB0_2
3535
; CHECK-NEXT: .LBB0_3: // %for.cond.cleanup
3636
; CHECK-NEXT: mov w0, wzr

llvm/test/CodeGen/AMDGPU/amdgcn.private-memory.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@ declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
1919
; GCN-ALLOCA: v_add_{{[iu]}}32_e32 [[RESULT:v[0-9]+]], vcc, v{{[0-9]+}}, v0
2020

2121
; GCN-PROMOTE: s_cmp_eq_u32 [[IN]], 1
22-
; GCN-PROMOTE-NEXT: s_cselect_b64 vcc, -1, 0
23-
; GCN-PROMOTE-NEXT: v_addc_u32_e32 [[RESULT:v[0-9]+]], vcc, 0, v0, vcc
22+
; GCN-PROMOTE-NEXT: s_cselect_b32 [[SCC:s[0-9]+]], 1, 0
23+
; GCN-PROMOTE-NEXT: v_add_{{[iu]}}32_e32 [[RESULT:v[0-9]+]], vcc, [[SCC]], v0
2424

2525
; GCN: buffer_store_dword [[RESULT]]
2626
define amdgpu_kernel void @work_item_info(ptr addrspace(1) %out, i32 %in) {

llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -82,17 +82,16 @@ define float @test_pow_fast_f32__integral_y(float %x, i32 %y.i) {
8282
; CHECK-NEXT: v_cvt_f32_i32_e32 v1, v1
8383
; CHECK-NEXT: s_mov_b32 s4, 0x800000
8484
; CHECK-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
85-
; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
85+
; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 32, vcc
8686
; CHECK-NEXT: v_cvt_i32_f32_e32 v1, v1
87-
; CHECK-NEXT: v_lshlrev_b32_e32 v3, 5, v3
8887
; CHECK-NEXT: v_ldexp_f32 v3, |v0|, v3
8988
; CHECK-NEXT: v_log_f32_e32 v3, v3
90-
; CHECK-NEXT: v_cvt_f32_i32_e32 v4, v1
9189
; CHECK-NEXT: v_mov_b32_e32 v2, 0x42000000
90+
; CHECK-NEXT: v_cvt_f32_i32_e32 v4, v1
9291
; CHECK-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
9392
; CHECK-NEXT: v_sub_f32_e32 v2, v3, v2
94-
; CHECK-NEXT: v_mul_f32_e32 v3, v2, v4
9593
; CHECK-NEXT: s_mov_b32 s4, 0xc2fc0000
94+
; CHECK-NEXT: v_mul_f32_e32 v3, v2, v4
9695
; CHECK-NEXT: v_mov_b32_e32 v5, 0x42800000
9796
; CHECK-NEXT: v_cmp_gt_f32_e32 vcc, s4, v3
9897
; CHECK-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc
@@ -228,8 +227,7 @@ define float @test_powr_fast_f32(float %x, float %y) {
228227
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
229228
; CHECK-NEXT: s_mov_b32 s4, 0x800000
230229
; CHECK-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
231-
; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
232-
; CHECK-NEXT: v_lshlrev_b32_e32 v3, 5, v3
230+
; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 32, vcc
233231
; CHECK-NEXT: v_ldexp_f32 v0, v0, v3
234232
; CHECK-NEXT: v_log_f32_e32 v0, v0
235233
; CHECK-NEXT: v_mov_b32_e32 v2, 0x42000000
@@ -368,8 +366,7 @@ define float @test_pown_fast_f32(float %x, i32 %y) {
368366
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
369367
; CHECK-NEXT: s_mov_b32 s4, 0x800000
370368
; CHECK-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
371-
; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
372-
; CHECK-NEXT: v_lshlrev_b32_e32 v3, 5, v3
369+
; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 32, vcc
373370
; CHECK-NEXT: v_ldexp_f32 v3, |v0|, v3
374371
; CHECK-NEXT: v_log_f32_e32 v3, v3
375372
; CHECK-NEXT: v_cvt_f32_i32_e32 v4, v1
@@ -511,8 +508,7 @@ define float @test_pown_fast_f32_known_even(float %x, i32 %y.arg) {
511508
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
512509
; CHECK-NEXT: s_mov_b32 s4, 0x800000
513510
; CHECK-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
514-
; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
515-
; CHECK-NEXT: v_lshlrev_b32_e32 v3, 5, v3
511+
; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 32, vcc
516512
; CHECK-NEXT: v_ldexp_f32 v0, |v0|, v3
517513
; CHECK-NEXT: v_lshlrev_b32_e32 v1, 1, v1
518514
; CHECK-NEXT: v_log_f32_e32 v0, v0
@@ -651,8 +647,7 @@ define float @test_pown_fast_f32_known_odd(float %x, i32 %y.arg) {
651647
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
652648
; CHECK-NEXT: s_mov_b32 s4, 0x800000
653649
; CHECK-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
654-
; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
655-
; CHECK-NEXT: v_lshlrev_b32_e32 v3, 5, v3
650+
; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 32, vcc
656651
; CHECK-NEXT: v_ldexp_f32 v3, |v0|, v3
657652
; CHECK-NEXT: v_or_b32_e32 v1, 1, v1
658653
; CHECK-NEXT: v_log_f32_e32 v3, v3

0 commit comments

Comments
 (0)