Skip to content

Commit 965b68e

Browse files
authored
[NVPTX] Prevent fptrunc of v2f32 from being folded into store (#149571)
1 parent b02787d commit 965b68e

File tree

4 files changed

+48
-5
lines changed

4 files changed

+48
-5
lines changed

llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -731,6 +731,8 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
731731
setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
732732
setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
733733
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
734+
setTruncStoreAction(MVT::v2f32, MVT::v2f16, Expand);
735+
setTruncStoreAction(MVT::v2f32, MVT::v2bf16, Expand);
734736

735737
// PTX does not support load / store predicate registers
736738
setOperationAction(ISD::LOAD, MVT::i1, Custom);

llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -359,11 +359,12 @@ define <2 x bfloat> @test_select_cc_bf16_f32(<2 x bfloat> %a, <2 x bfloat> %b,
359359
define <2 x bfloat> @test_fptrunc_2xfloat(<2 x float> %a) #0 {
360360
; CHECK-LABEL: test_fptrunc_2xfloat(
361361
; CHECK: {
362-
; CHECK-NEXT: .reg .b64 %rd<2>;
362+
; CHECK-NEXT: .reg .b32 %r<4>;
363363
; CHECK-EMPTY:
364364
; CHECK-NEXT: // %bb.0:
365-
; CHECK-NEXT: ld.param.b64 %rd1, [test_fptrunc_2xfloat_param_0];
366-
; CHECK-NEXT: st.param.b32 [func_retval0], %rd1;
365+
; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fptrunc_2xfloat_param_0];
366+
; CHECK-NEXT: cvt.rn.bf16x2.f32 %r3, %r2, %r1;
367+
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
367368
; CHECK-NEXT: ret;
368369
%r = fptrunc <2 x float> %a to <2 x bfloat>
369370
ret <2 x bfloat> %r

llvm/test/CodeGen/NVPTX/f16x2-instructions.ll

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1499,11 +1499,16 @@ define <2 x half> @test_sitofp_2xi32_fadd(<2 x i32> %a, <2 x half> %b) #0 {
14991499
define <2 x half> @test_fptrunc_2xfloat(<2 x float> %a) #0 {
15001500
; CHECK-LABEL: test_fptrunc_2xfloat(
15011501
; CHECK: {
1502+
; CHECK-NEXT: .reg .b16 %rs<3>;
1503+
; CHECK-NEXT: .reg .b32 %r<4>;
15021504
; CHECK-NEXT: .reg .b64 %rd<2>;
15031505
; CHECK-EMPTY:
15041506
; CHECK-NEXT: // %bb.0:
1505-
; CHECK-NEXT: ld.param.b64 %rd1, [test_fptrunc_2xfloat_param_0];
1506-
; CHECK-NEXT: st.param.b32 [func_retval0], %rd1;
1507+
; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fptrunc_2xfloat_param_0];
1508+
; CHECK-NEXT: cvt.rn.f16.f32 %rs1, %r2;
1509+
; CHECK-NEXT: cvt.rn.f16.f32 %rs2, %r1;
1510+
; CHECK-NEXT: mov.b32 %r3, {%rs2, %rs1};
1511+
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
15071512
; CHECK-NEXT: ret;
15081513
%r = fptrunc <2 x float> %a to <2 x half>
15091514
ret <2 x half> %r

llvm/test/CodeGen/NVPTX/f32x2-instructions.ll

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2108,6 +2108,41 @@ define <2 x float> @test_uitofp_2xi32_to_2xfloat(<2 x i32> %a) #0 {
21082108
ret <2 x float> %r
21092109
}
21102110

2111+
define void @test_trunc_to_v2bf16(<2 x float> %a, ptr %p) {
2112+
; CHECK-LABEL: test_trunc_to_v2bf16(
2113+
; CHECK: {
2114+
; CHECK-NEXT: .reg .b32 %r<4>;
2115+
; CHECK-NEXT: .reg .b64 %rd<3>;
2116+
; CHECK-EMPTY:
2117+
; CHECK-NEXT: // %bb.0:
2118+
; CHECK-NEXT: ld.param.b64 %rd2, [test_trunc_to_v2bf16_param_1];
2119+
; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_trunc_to_v2bf16_param_0];
2120+
; CHECK-NEXT: cvt.rn.bf16x2.f32 %r3, %r2, %r1;
2121+
; CHECK-NEXT: st.b32 [%rd2], %r3;
2122+
; CHECK-NEXT: ret;
2123+
%trunc = fptrunc <2 x float> %a to <2 x bfloat>
2124+
store <2 x bfloat> %trunc, ptr %p
2125+
ret void
2126+
}
2127+
2128+
define void @test_trunc_to_v2f16(<2 x float> %a, ptr %p) {
2129+
; CHECK-LABEL: test_trunc_to_v2f16(
2130+
; CHECK: {
2131+
; CHECK-NEXT: .reg .b32 %r<4>;
2132+
; CHECK-NEXT: .reg .b64 %rd<3>;
2133+
; CHECK-EMPTY:
2134+
; CHECK-NEXT: // %bb.0:
2135+
; CHECK-NEXT: ld.param.b64 %rd2, [test_trunc_to_v2f16_param_1];
2136+
; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_trunc_to_v2f16_param_0];
2137+
; CHECK-NEXT: cvt.rn.f16x2.f32 %r3, %r2, %r1;
2138+
; CHECK-NEXT: st.b32 [%rd2], %r3;
2139+
; CHECK-NEXT: ret;
2140+
%trunc = fptrunc <2 x float> %a to <2 x half>
2141+
store <2 x half> %trunc, ptr %p
2142+
ret void
2143+
}
2144+
2145+
21112146
attributes #0 = { nounwind }
21122147
attributes #1 = { "unsafe-fp-math" = "true" }
21132148
attributes #2 = { "denormal-fp-math"="preserve-sign" }

0 commit comments

Comments
 (0)