Skip to content

Commit 8e5571c

Browse files
committed
[NVPTX] designate fabs and fneg as free
1 parent e46e4bf commit 8e5571c

File tree

3 files changed

+15
-16
lines changed

3 files changed

+15
-16
lines changed

llvm/lib/Target/NVPTX/NVPTXISelLowering.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,9 @@ class NVPTXTargetLowering : public TargetLowering {
261261
return true;
262262
}
263263

264+
bool isFAbsFree(EVT VT) const override { return true; }
265+
bool isFNegFree(EVT VT) const override { return true; }
266+
264267
private:
265268
const NVPTXSubtarget &STI; // cache the subtarget here
266269
SDValue getParamSymbol(SelectionDAG &DAG, int idx, EVT) const;

llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -182,8 +182,8 @@ define <2 x bfloat> @test_fneg(<2 x bfloat> %a) #0 {
182182
; CHECK-NEXT: .reg .b32 %r<3>;
183183
; CHECK-EMPTY:
184184
; CHECK-NEXT: // %bb.0:
185-
; CHECK-NEXT: ld.param.u32 %r1, [test_fneg_param_0];
186-
; CHECK-NEXT: xor.b32 %r2, %r1, -2147450880;
185+
; CHECK-NEXT: ld.param.b32 %r1, [test_fneg_param_0];
186+
; CHECK-NEXT: neg.bf16x2 %r2, %r1;
187187
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
188188
; CHECK-NEXT: ret;
189189
%r = fneg <2 x bfloat> %a
@@ -532,8 +532,8 @@ define <2 x bfloat> @test_fabs(<2 x bfloat> %a) #0 {
532532
; CHECK-NEXT: .reg .b32 %r<3>;
533533
; CHECK-EMPTY:
534534
; CHECK-NEXT: // %bb.0:
535-
; CHECK-NEXT: ld.param.u32 %r1, [test_fabs_param_0];
536-
; CHECK-NEXT: and.b32 %r2, %r1, 2147450879;
535+
; CHECK-NEXT: ld.param.b32 %r1, [test_fabs_param_0];
536+
; CHECK-NEXT: abs.bf16x2 %r2, %r1;
537537
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
538538
; CHECK-NEXT: ret;
539539
%r = call <2 x bfloat> @llvm.fabs.f16(<2 x bfloat> %a)

llvm/test/CodeGen/NVPTX/fabs-fneg-free.ll

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,12 @@ target triple = "nvptx64-nvidia-cuda"
66
define float @fabs_free(i32 %in) {
77
; CHECK-LABEL: fabs_free(
88
; CHECK: {
9-
; CHECK-NEXT: .reg .b32 %r<3>;
10-
; CHECK-NEXT: .reg .f32 %f<2>;
9+
; CHECK-NEXT: .reg .f32 %f<3>;
1110
; CHECK-EMPTY:
1211
; CHECK-NEXT: // %bb.0:
13-
; CHECK-NEXT: ld.param.u32 %r1, [fabs_free_param_0];
14-
; CHECK-NEXT: and.b32 %r2, %r1, 2147483647;
15-
; CHECK-NEXT: mov.b32 %f1, %r2;
16-
; CHECK-NEXT: st.param.f32 [func_retval0], %f1;
12+
; CHECK-NEXT: ld.param.f32 %f1, [fabs_free_param_0];
13+
; CHECK-NEXT: abs.f32 %f2, %f1;
14+
; CHECK-NEXT: st.param.f32 [func_retval0], %f2;
1715
; CHECK-NEXT: ret;
1816
%b = bitcast i32 %in to float
1917
%f = call float @llvm.fabs.f32(float %b)
@@ -23,14 +21,12 @@ define float @fabs_free(i32 %in) {
2321
define float @fneg_free(i32 %in) {
2422
; CHECK-LABEL: fneg_free(
2523
; CHECK: {
26-
; CHECK-NEXT: .reg .b32 %r<3>;
27-
; CHECK-NEXT: .reg .f32 %f<2>;
24+
; CHECK-NEXT: .reg .f32 %f<3>;
2825
; CHECK-EMPTY:
2926
; CHECK-NEXT: // %bb.0:
30-
; CHECK-NEXT: ld.param.u32 %r1, [fneg_free_param_0];
31-
; CHECK-NEXT: xor.b32 %r2, %r1, -2147483648;
32-
; CHECK-NEXT: mov.b32 %f1, %r2;
33-
; CHECK-NEXT: st.param.f32 [func_retval0], %f1;
27+
; CHECK-NEXT: ld.param.f32 %f1, [fneg_free_param_0];
28+
; CHECK-NEXT: neg.f32 %f2, %f1;
29+
; CHECK-NEXT: st.param.f32 [func_retval0], %f2;
3430
; CHECK-NEXT: ret;
3531
%b = bitcast i32 %in to float
3632
%f = fneg float %b

0 commit comments

Comments
 (0)