Skip to content

Commit cc5ddae

Browse files
[NVPTX] Fix ISel patterns for i1 sint_to_fp (#110866)
NVPTX has ZeroOrNegativeOneBooleanContent, therefore we need to use -1 as the constant for i1 sint_to_fp operations in instruction selection.
1 parent 5589096 commit cc5ddae

File tree

2 files changed

+35
-16
lines changed

2 files changed

+35
-16
lines changed

llvm/lib/Target/NVPTX/NVPTXInstrInfo.td

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3126,11 +3126,12 @@ foreach ta = [v2f16, v2bf16, v2i16, v4i8, i32] in {
31263126

31273127
// NOTE: pred->fp are currently sub-optimal due to an issue in TableGen where
31283128
// we cannot specify floating-point literals in isel patterns. Therefore, we
3129-
// use an integer selp to select either 1 or 0 and then cvt to floating-point.
3129+
// use an integer selp to select either 1 (or -1 in case of signed) or 0
3130+
// and then cvt to floating-point.
31303131

31313132
// sint -> f16
31323133
def : Pat<(f16 (sint_to_fp Int1Regs:$a)),
3133-
(CVT_f16_s32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>;
3134+
(CVT_f16_s32 (SELP_s32ii -1, 0, Int1Regs:$a), CvtRN)>;
31343135
def : Pat<(f16 (sint_to_fp Int16Regs:$a)),
31353136
(CVT_f16_s16 Int16Regs:$a, CvtRN)>;
31363137
def : Pat<(f16 (sint_to_fp Int32Regs:$a)),
@@ -3170,7 +3171,7 @@ def : Pat<(bf16 (uint_to_fp Int64Regs:$a)),
31703171

31713172
// sint -> f32
31723173
def : Pat<(f32 (sint_to_fp Int1Regs:$a)),
3173-
(CVT_f32_s32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>;
3174+
(CVT_f32_s32 (SELP_s32ii -1, 0, Int1Regs:$a), CvtRN)>;
31743175
def : Pat<(f32 (sint_to_fp Int16Regs:$a)),
31753176
(CVT_f32_s16 Int16Regs:$a, CvtRN)>;
31763177
def : Pat<(f32 (sint_to_fp Int32Regs:$a)),
@@ -3190,7 +3191,7 @@ def : Pat<(f32 (uint_to_fp Int64Regs:$a)),
31903191

31913192
// sint -> f64
31923193
def : Pat<(f64 (sint_to_fp Int1Regs:$a)),
3193-
(CVT_f64_s32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>;
3194+
(CVT_f64_s32 (SELP_s32ii -1, 0, Int1Regs:$a), CvtRN)>;
31943195
def : Pat<(f64 (sint_to_fp Int16Regs:$a)),
31953196
(CVT_f64_s16 Int16Regs:$a, CvtRN)>;
31963197
def : Pat<(f64 (sint_to_fp Int32Regs:$a)),

llvm/test/CodeGen/NVPTX/i1-int-to-fp.ll

Lines changed: 30 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,37 +2,55 @@
22
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
33

44
; CHECK-LABEL: foo
5-
; CHECK: setp
6-
; CHECK: selp
7-
; CHECK: cvt.rn.f32.u32
5+
; CHECK: setp.eq.b16 %[[P:p[0-9]+]], %{{.*}}, 1;
6+
; CHECK: selp.u32 %[[R:r[0-9]+]], 1, 0, %[[P]];
7+
; CHECK: cvt.rn.f32.u32 %f{{.*}}, %[[R]]
88
define float @foo(i1 %a) {
99
%ret = uitofp i1 %a to float
1010
ret float %ret
1111
}
1212

1313
; CHECK-LABEL: foo2
14-
; CHECK: setp
15-
; CHECK: selp
16-
; CHECK: cvt.rn.f32.s32
14+
; CHECK: setp.eq.b16 %[[P:p[0-9]+]], %{{.*}}, 1;
15+
; CHECK: selp.s32 %[[R:r[0-9]+]], -1, 0, %[[P]];
16+
; CHECK: cvt.rn.f32.s32 %f{{.*}}, %[[R]]
1717
define float @foo2(i1 %a) {
1818
%ret = sitofp i1 %a to float
1919
ret float %ret
2020
}
2121

2222
; CHECK-LABEL: foo3
23-
; CHECK: setp
24-
; CHECK: selp
25-
; CHECK: cvt.rn.f64.u32
23+
; CHECK: setp.eq.b16 %[[P:p[0-9]+]], %{{.*}}, 1;
24+
; CHECK: selp.u32 %[[R:r[0-9]+]], 1, 0, %[[P]];
25+
; CHECK: cvt.rn.f64.u32 %fd{{.*}}, %[[R]]
2626
define double @foo3(i1 %a) {
2727
%ret = uitofp i1 %a to double
2828
ret double %ret
2929
}
3030

3131
; CHECK-LABEL: foo4
32-
; CHECK: setp
33-
; CHECK: selp
34-
; CHECK: cvt.rn.f64.s32
32+
; CHECK: setp.eq.b16 %[[P:p[0-9]+]], %{{.*}}, 1;
33+
; CHECK: selp.s32 %[[R:r[0-9]+]], -1, 0, %[[P]];
34+
; CHECK: cvt.rn.f64.s32 %fd{{.*}}, %[[R]]
3535
define double @foo4(i1 %a) {
3636
%ret = sitofp i1 %a to double
3737
ret double %ret
3838
}
39+
40+
; CHECK-LABEL: foo5
41+
; CHECK: setp.eq.b16 %[[P:p[0-9]+]], %{{.*}}, 1;
42+
; CHECK: selp.u32 %[[R:r[0-9]+]], 1, 0, %[[P]];
43+
; CHECK: cvt.rn.f16.u32 %{{.*}}, %[[R]]
44+
define half @foo5(i1 %a) {
45+
%ret = uitofp i1 %a to half
46+
ret half %ret
47+
}
48+
49+
; CHECK-LABEL: foo6
50+
; CHECK: setp.eq.b16 %[[P:p[0-9]+]], %{{.*}}, 1;
51+
; CHECK: selp.s32 %[[R:r[0-9]+]], -1, 0, %[[P]];
52+
; CHECK: cvt.rn.f16.s32 %{{.*}}, %[[R]]
53+
define half @foo6(i1 %a) {
54+
%ret = sitofp i1 %a to half
55+
ret half %ret
56+
}

0 commit comments

Comments
 (0)