Skip to content

Commit 946872a

Browse files
committed
Revert "[NVPTX] Remove Float register classes (llvm#140487)"
This reverts commit 76c9bfe.
1 parent 11224c9 commit 946872a

File tree

96 files changed

+7314
-9137
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

96 files changed

+7314
-9137
lines changed

llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,10 @@ unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) {
222222
Ret = (3 << 28);
223223
} else if (RC == &NVPTX::Int64RegsRegClass) {
224224
Ret = (4 << 28);
225+
} else if (RC == &NVPTX::Float32RegsRegClass) {
226+
Ret = (5 << 28);
227+
} else if (RC == &NVPTX::Float64RegsRegClass) {
228+
Ret = (6 << 28);
225229
} else if (RC == &NVPTX::Int128RegsRegClass) {
226230
Ret = (7 << 28);
227231
} else {

llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -586,8 +586,8 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
586586
addRegisterClass(MVT::v4i8, &NVPTX::Int32RegsRegClass);
587587
addRegisterClass(MVT::i32, &NVPTX::Int32RegsRegClass);
588588
addRegisterClass(MVT::i64, &NVPTX::Int64RegsRegClass);
589-
addRegisterClass(MVT::f32, &NVPTX::Int32RegsRegClass);
590-
addRegisterClass(MVT::f64, &NVPTX::Int64RegsRegClass);
589+
addRegisterClass(MVT::f32, &NVPTX::Float32RegsRegClass);
590+
addRegisterClass(MVT::f64, &NVPTX::Float64RegsRegClass);
591591
addRegisterClass(MVT::f16, &NVPTX::Int16RegsRegClass);
592592
addRegisterClass(MVT::v2f16, &NVPTX::Int32RegsRegClass);
593593
addRegisterClass(MVT::bf16, &NVPTX::Int16RegsRegClass);
@@ -4858,21 +4858,24 @@ NVPTXTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
48584858
case 'b':
48594859
return std::make_pair(0U, &NVPTX::Int1RegsRegClass);
48604860
case 'c':
4861+
return std::make_pair(0U, &NVPTX::Int16RegsRegClass);
48614862
case 'h':
48624863
return std::make_pair(0U, &NVPTX::Int16RegsRegClass);
48634864
case 'r':
4864-
case 'f':
48654865
return std::make_pair(0U, &NVPTX::Int32RegsRegClass);
48664866
case 'l':
48674867
case 'N':
4868-
case 'd':
48694868
return std::make_pair(0U, &NVPTX::Int64RegsRegClass);
48704869
case 'q': {
48714870
if (STI.getSmVersion() < 70)
48724871
report_fatal_error("Inline asm with 128 bit operands is only "
48734872
"supported for sm_70 and higher!");
48744873
return std::make_pair(0U, &NVPTX::Int128RegsRegClass);
48754874
}
4875+
case 'f':
4876+
return std::make_pair(0U, &NVPTX::Float32RegsRegClass);
4877+
case 'd':
4878+
return std::make_pair(0U, &NVPTX::Float64RegsRegClass);
48764879
}
48774880
}
48784881
return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);

llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,11 +44,19 @@ void NVPTXInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
4444
} else if (DestRC == &NVPTX::Int16RegsRegClass) {
4545
Op = NVPTX::MOV16r;
4646
} else if (DestRC == &NVPTX::Int32RegsRegClass) {
47-
Op = NVPTX::IMOV32r;
47+
Op = (SrcRC == &NVPTX::Int32RegsRegClass ? NVPTX::IMOV32r
48+
: NVPTX::BITCONVERT_32_F2I);
4849
} else if (DestRC == &NVPTX::Int64RegsRegClass) {
49-
Op = NVPTX::IMOV64r;
50+
Op = (SrcRC == &NVPTX::Int64RegsRegClass ? NVPTX::IMOV64r
51+
: NVPTX::BITCONVERT_64_F2I);
5052
} else if (DestRC == &NVPTX::Int128RegsRegClass) {
5153
Op = NVPTX::IMOV128r;
54+
} else if (DestRC == &NVPTX::Float32RegsRegClass) {
55+
Op = (SrcRC == &NVPTX::Float32RegsRegClass ? NVPTX::FMOV32r
56+
: NVPTX::BITCONVERT_32_I2F);
57+
} else if (DestRC == &NVPTX::Float64RegsRegClass) {
58+
Op = (SrcRC == &NVPTX::Float64RegsRegClass ? NVPTX::FMOV64r
59+
: NVPTX::BITCONVERT_64_I2F);
5260
} else {
5361
llvm_unreachable("Bad register copy");
5462
}

llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,10 @@ using namespace llvm;
2525

2626
namespace llvm {
2727
StringRef getNVPTXRegClassName(TargetRegisterClass const *RC) {
28+
if (RC == &NVPTX::Float32RegsRegClass)
29+
return ".b32";
30+
if (RC == &NVPTX::Float64RegsRegClass)
31+
return ".b64";
2832
if (RC == &NVPTX::Int128RegsRegClass)
2933
return ".b128";
3034
if (RC == &NVPTX::Int64RegsRegClass)
@@ -59,6 +63,10 @@ StringRef getNVPTXRegClassName(TargetRegisterClass const *RC) {
5963
}
6064

6165
StringRef getNVPTXRegClassStr(TargetRegisterClass const *RC) {
66+
if (RC == &NVPTX::Float32RegsRegClass)
67+
return "%f";
68+
if (RC == &NVPTX::Float64RegsRegClass)
69+
return "%fd";
6270
if (RC == &NVPTX::Int128RegsRegClass)
6371
return "%rq";
6472
if (RC == &NVPTX::Int64RegsRegClass)

llvm/lib/Target/NVPTX/NVPTXRegisterInfo.td

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@ foreach i = 0...4 in {
4040
def RQ#i : NVPTXReg<"%rq"#i>; // 128-bit
4141
def H#i : NVPTXReg<"%h"#i>; // 16-bit float
4242
def HH#i : NVPTXReg<"%hh"#i>; // 2x16-bit float
43+
def F#i : NVPTXReg<"%f"#i>; // 32-bit float
44+
def FL#i : NVPTXReg<"%fd"#i>; // 64-bit float
4345

4446
// Arguments
4547
def ia#i : NVPTXReg<"%ia"#i>;
@@ -57,13 +59,14 @@ foreach i = 0...31 in {
5759
//===----------------------------------------------------------------------===//
5860
def Int1Regs : NVPTXRegClass<[i1], 8, (add (sequence "P%u", 0, 4))>;
5961
def Int16Regs : NVPTXRegClass<[i16, f16, bf16], 16, (add (sequence "RS%u", 0, 4))>;
60-
def Int32Regs : NVPTXRegClass<[i32, v2f16, v2bf16, v2i16, v4i8, f32], 32,
62+
def Int32Regs : NVPTXRegClass<[i32, v2f16, v2bf16, v2i16, v4i8], 32,
6163
(add (sequence "R%u", 0, 4),
6264
VRFrame32, VRFrameLocal32)>;
63-
def Int64Regs : NVPTXRegClass<[i64, f64], 64, (add (sequence "RL%u", 0, 4), VRFrame64, VRFrameLocal64)>;
65+
def Int64Regs : NVPTXRegClass<[i64], 64, (add (sequence "RL%u", 0, 4), VRFrame64, VRFrameLocal64)>;
6466
// 128-bit regs are not defined as general regs in NVPTX. They are used for inlineASM only.
6567
def Int128Regs : NVPTXRegClass<[i128], 128, (add (sequence "RQ%u", 0, 4))>;
66-
68+
def Float32Regs : NVPTXRegClass<[f32], 32, (add (sequence "F%u", 0, 4))>;
69+
def Float64Regs : NVPTXRegClass<[f64], 64, (add (sequence "FL%u", 0, 4))>;
6770
def Int32ArgRegs : NVPTXRegClass<[i32], 32, (add (sequence "ia%u", 0, 4))>;
6871
def Int64ArgRegs : NVPTXRegClass<[i64], 64, (add (sequence "la%u", 0, 4))>;
6972
def Float32ArgRegs : NVPTXRegClass<[f32], 32, (add (sequence "fa%u", 0, 4))>;
@@ -72,6 +75,3 @@ def Float64ArgRegs : NVPTXRegClass<[f64], 64, (add (sequence "da%u", 0, 4))>;
7275
// Read NVPTXRegisterInfo.cpp to see how VRFrame and VRDepot are used.
7376
def SpecialRegs : NVPTXRegClass<[i32], 32, (add VRFrame32, VRFrameLocal32, VRDepot,
7477
(sequence "ENVREG%u", 0, 31))>;
75-
76-
defvar Float32Regs = Int32Regs;
77-
defvar Float64Regs = Int64Regs;

llvm/test/CodeGen/MIR/NVPTX/expected-floating-point-literal.mir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@
1212
---
1313
name: test
1414
registers:
15-
- { id: 0, class: int32regs }
16-
- { id: 1, class: int32regs }
15+
- { id: 0, class: float32regs }
16+
- { id: 1, class: float32regs }
1717
body: |
1818
bb.0.entry:
1919
%0 = LD_f32 0, 4, 1, 2, 32, &test_param_0, 0

llvm/test/CodeGen/MIR/NVPTX/floating-point-immediate-operands.mir

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -30,24 +30,24 @@
3030
---
3131
name: test
3232
registers:
33-
- { id: 0, class: int32regs }
34-
- { id: 1, class: int64regs }
33+
- { id: 0, class: float32regs }
34+
- { id: 1, class: float64regs }
3535
- { id: 2, class: int32regs }
36-
- { id: 3, class: int64regs }
37-
- { id: 4, class: int32regs }
38-
- { id: 5, class: int32regs }
39-
- { id: 6, class: int32regs }
40-
- { id: 7, class: int32regs }
36+
- { id: 3, class: float64regs }
37+
- { id: 4, class: float32regs }
38+
- { id: 5, class: float32regs }
39+
- { id: 6, class: float32regs }
40+
- { id: 7, class: float32regs }
4141
body: |
4242
bb.0.entry:
4343
%0 = LD_f32 0, 0, 4, 2, 32, &test_param_0, 0
4444
%1 = CVT_f64_f32 %0, 0
4545
%2 = LD_i32 0, 0, 4, 0, 32, &test_param_1, 0
46-
; CHECK: %3:int64regs = FADD_rnf64ri %1, double 3.250000e+00
46+
; CHECK: %3:float64regs = FADD_rnf64ri %1, double 3.250000e+00
4747
%3 = FADD_rnf64ri %1, double 3.250000e+00
4848
%4 = CVT_f32_f64 %3, 5
4949
%5 = CVT_f32_s32 %2, 5
50-
; CHECK: %6:int32regs = FADD_rnf32ri %5, float 6.250000e+00
50+
; CHECK: %6:float32regs = FADD_rnf32ri %5, float 6.250000e+00
5151
%6 = FADD_rnf32ri %5, float 6.250000e+00
5252
%7 = FMUL_rnf32rr %6, %4
5353
StoreRetvalF32 %7, 0
@@ -56,24 +56,24 @@ body: |
5656
---
5757
name: test2
5858
registers:
59-
- { id: 0, class: int32regs }
60-
- { id: 1, class: int64regs }
59+
- { id: 0, class: float32regs }
60+
- { id: 1, class: float64regs }
6161
- { id: 2, class: int32regs }
62-
- { id: 3, class: int64regs }
63-
- { id: 4, class: int32regs }
64-
- { id: 5, class: int32regs }
65-
- { id: 6, class: int32regs }
66-
- { id: 7, class: int32regs }
62+
- { id: 3, class: float64regs }
63+
- { id: 4, class: float32regs }
64+
- { id: 5, class: float32regs }
65+
- { id: 6, class: float32regs }
66+
- { id: 7, class: float32regs }
6767
body: |
6868
bb.0.entry:
6969
%0 = LD_f32 0, 0, 4, 2, 32, &test2_param_0, 0
7070
%1 = CVT_f64_f32 %0, 0
7171
%2 = LD_i32 0, 0, 4, 0, 32, &test2_param_1, 0
72-
; CHECK: %3:int64regs = FADD_rnf64ri %1, double 0x7FF8000000000000
72+
; CHECK: %3:float64regs = FADD_rnf64ri %1, double 0x7FF8000000000000
7373
%3 = FADD_rnf64ri %1, double 0x7FF8000000000000
7474
%4 = CVT_f32_f64 %3, 5
7575
%5 = CVT_f32_s32 %2, 5
76-
; CHECK: %6:int32regs = FADD_rnf32ri %5, float 0x7FF8000000000000
76+
; CHECK: %6:float32regs = FADD_rnf32ri %5, float 0x7FF8000000000000
7777
%6 = FADD_rnf32ri %5, float 0x7FF8000000000000
7878
%7 = FMUL_rnf32rr %6, %4
7979
StoreRetvalF32 %7, 0

llvm/test/CodeGen/MIR/NVPTX/floating-point-invalid-type-error.mir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@
1212
---
1313
name: test
1414
registers:
15-
- { id: 0, class: int32regs }
16-
- { id: 1, class: int32regs }
15+
- { id: 0, class: float32regs }
16+
- { id: 1, class: float32regs }
1717
body: |
1818
bb.0.entry:
1919
%0 = LD_f32 0, 4, 1, 2, 32, &test_param_0, 0

llvm/test/CodeGen/NVPTX/LoadStoreVectorizer.ll

Lines changed: 53 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -45,36 +45,36 @@ define half @fh(ptr %p) {
4545
; ENABLED-LABEL: fh(
4646
; ENABLED: {
4747
; ENABLED-NEXT: .reg .b16 %rs<10>;
48-
; ENABLED-NEXT: .reg .b32 %r<13>;
48+
; ENABLED-NEXT: .reg .b32 %f<13>;
4949
; ENABLED-NEXT: .reg .b64 %rd<2>;
5050
; ENABLED-EMPTY:
5151
; ENABLED-NEXT: // %bb.0:
5252
; ENABLED-NEXT: ld.param.b64 %rd1, [fh_param_0];
5353
; ENABLED-NEXT: ld.v4.b16 {%rs1, %rs2, %rs3, %rs4}, [%rd1];
5454
; ENABLED-NEXT: ld.b16 %rs5, [%rd1+8];
55-
; ENABLED-NEXT: cvt.f32.f16 %r1, %rs2;
56-
; ENABLED-NEXT: cvt.f32.f16 %r2, %rs1;
57-
; ENABLED-NEXT: add.rn.f32 %r3, %r2, %r1;
58-
; ENABLED-NEXT: cvt.rn.f16.f32 %rs6, %r3;
59-
; ENABLED-NEXT: cvt.f32.f16 %r4, %rs4;
60-
; ENABLED-NEXT: cvt.f32.f16 %r5, %rs3;
61-
; ENABLED-NEXT: add.rn.f32 %r6, %r5, %r4;
62-
; ENABLED-NEXT: cvt.rn.f16.f32 %rs7, %r6;
63-
; ENABLED-NEXT: cvt.f32.f16 %r7, %rs7;
64-
; ENABLED-NEXT: cvt.f32.f16 %r8, %rs6;
65-
; ENABLED-NEXT: add.rn.f32 %r9, %r8, %r7;
66-
; ENABLED-NEXT: cvt.rn.f16.f32 %rs8, %r9;
67-
; ENABLED-NEXT: cvt.f32.f16 %r10, %rs8;
68-
; ENABLED-NEXT: cvt.f32.f16 %r11, %rs5;
69-
; ENABLED-NEXT: add.rn.f32 %r12, %r10, %r11;
70-
; ENABLED-NEXT: cvt.rn.f16.f32 %rs9, %r12;
55+
; ENABLED-NEXT: cvt.f32.f16 %f1, %rs2;
56+
; ENABLED-NEXT: cvt.f32.f16 %f2, %rs1;
57+
; ENABLED-NEXT: add.rn.f32 %f3, %f2, %f1;
58+
; ENABLED-NEXT: cvt.rn.f16.f32 %rs6, %f3;
59+
; ENABLED-NEXT: cvt.f32.f16 %f4, %rs4;
60+
; ENABLED-NEXT: cvt.f32.f16 %f5, %rs3;
61+
; ENABLED-NEXT: add.rn.f32 %f6, %f5, %f4;
62+
; ENABLED-NEXT: cvt.rn.f16.f32 %rs7, %f6;
63+
; ENABLED-NEXT: cvt.f32.f16 %f7, %rs7;
64+
; ENABLED-NEXT: cvt.f32.f16 %f8, %rs6;
65+
; ENABLED-NEXT: add.rn.f32 %f9, %f8, %f7;
66+
; ENABLED-NEXT: cvt.rn.f16.f32 %rs8, %f9;
67+
; ENABLED-NEXT: cvt.f32.f16 %f10, %rs8;
68+
; ENABLED-NEXT: cvt.f32.f16 %f11, %rs5;
69+
; ENABLED-NEXT: add.rn.f32 %f12, %f10, %f11;
70+
; ENABLED-NEXT: cvt.rn.f16.f32 %rs9, %f12;
7171
; ENABLED-NEXT: st.param.b16 [func_retval0], %rs9;
7272
; ENABLED-NEXT: ret;
7373
;
7474
; DISABLED-LABEL: fh(
7575
; DISABLED: {
7676
; DISABLED-NEXT: .reg .b16 %rs<10>;
77-
; DISABLED-NEXT: .reg .b32 %r<13>;
77+
; DISABLED-NEXT: .reg .b32 %f<13>;
7878
; DISABLED-NEXT: .reg .b64 %rd<2>;
7979
; DISABLED-EMPTY:
8080
; DISABLED-NEXT: // %bb.0:
@@ -84,22 +84,22 @@ define half @fh(ptr %p) {
8484
; DISABLED-NEXT: ld.b16 %rs3, [%rd1+4];
8585
; DISABLED-NEXT: ld.b16 %rs4, [%rd1+6];
8686
; DISABLED-NEXT: ld.b16 %rs5, [%rd1+8];
87-
; DISABLED-NEXT: cvt.f32.f16 %r1, %rs2;
88-
; DISABLED-NEXT: cvt.f32.f16 %r2, %rs1;
89-
; DISABLED-NEXT: add.rn.f32 %r3, %r2, %r1;
90-
; DISABLED-NEXT: cvt.rn.f16.f32 %rs6, %r3;
91-
; DISABLED-NEXT: cvt.f32.f16 %r4, %rs4;
92-
; DISABLED-NEXT: cvt.f32.f16 %r5, %rs3;
93-
; DISABLED-NEXT: add.rn.f32 %r6, %r5, %r4;
94-
; DISABLED-NEXT: cvt.rn.f16.f32 %rs7, %r6;
95-
; DISABLED-NEXT: cvt.f32.f16 %r7, %rs7;
96-
; DISABLED-NEXT: cvt.f32.f16 %r8, %rs6;
97-
; DISABLED-NEXT: add.rn.f32 %r9, %r8, %r7;
98-
; DISABLED-NEXT: cvt.rn.f16.f32 %rs8, %r9;
99-
; DISABLED-NEXT: cvt.f32.f16 %r10, %rs8;
100-
; DISABLED-NEXT: cvt.f32.f16 %r11, %rs5;
101-
; DISABLED-NEXT: add.rn.f32 %r12, %r10, %r11;
102-
; DISABLED-NEXT: cvt.rn.f16.f32 %rs9, %r12;
87+
; DISABLED-NEXT: cvt.f32.f16 %f1, %rs2;
88+
; DISABLED-NEXT: cvt.f32.f16 %f2, %rs1;
89+
; DISABLED-NEXT: add.rn.f32 %f3, %f2, %f1;
90+
; DISABLED-NEXT: cvt.rn.f16.f32 %rs6, %f3;
91+
; DISABLED-NEXT: cvt.f32.f16 %f4, %rs4;
92+
; DISABLED-NEXT: cvt.f32.f16 %f5, %rs3;
93+
; DISABLED-NEXT: add.rn.f32 %f6, %f5, %f4;
94+
; DISABLED-NEXT: cvt.rn.f16.f32 %rs7, %f6;
95+
; DISABLED-NEXT: cvt.f32.f16 %f7, %rs7;
96+
; DISABLED-NEXT: cvt.f32.f16 %f8, %rs6;
97+
; DISABLED-NEXT: add.rn.f32 %f9, %f8, %f7;
98+
; DISABLED-NEXT: cvt.rn.f16.f32 %rs8, %f9;
99+
; DISABLED-NEXT: cvt.f32.f16 %f10, %rs8;
100+
; DISABLED-NEXT: cvt.f32.f16 %f11, %rs5;
101+
; DISABLED-NEXT: add.rn.f32 %f12, %f10, %f11;
102+
; DISABLED-NEXT: cvt.rn.f16.f32 %rs9, %f12;
103103
; DISABLED-NEXT: st.param.b16 [func_retval0], %rs9;
104104
; DISABLED-NEXT: ret;
105105
%p.1 = getelementptr half, ptr %p, i32 1
@@ -121,37 +121,37 @@ define half @fh(ptr %p) {
121121
define float @ff(ptr %p) {
122122
; ENABLED-LABEL: ff(
123123
; ENABLED: {
124-
; ENABLED-NEXT: .reg .b32 %r<10>;
124+
; ENABLED-NEXT: .reg .b32 %f<10>;
125125
; ENABLED-NEXT: .reg .b64 %rd<2>;
126126
; ENABLED-EMPTY:
127127
; ENABLED-NEXT: // %bb.0:
128128
; ENABLED-NEXT: ld.param.b64 %rd1, [ff_param_0];
129-
; ENABLED-NEXT: ld.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1];
130-
; ENABLED-NEXT: ld.b32 %r5, [%rd1+16];
131-
; ENABLED-NEXT: add.rn.f32 %r6, %r1, %r2;
132-
; ENABLED-NEXT: add.rn.f32 %r7, %r3, %r4;
133-
; ENABLED-NEXT: add.rn.f32 %r8, %r6, %r7;
134-
; ENABLED-NEXT: add.rn.f32 %r9, %r8, %r5;
135-
; ENABLED-NEXT: st.param.b32 [func_retval0], %r9;
129+
; ENABLED-NEXT: ld.v4.b32 {%f1, %f2, %f3, %f4}, [%rd1];
130+
; ENABLED-NEXT: ld.b32 %f5, [%rd1+16];
131+
; ENABLED-NEXT: add.rn.f32 %f6, %f1, %f2;
132+
; ENABLED-NEXT: add.rn.f32 %f7, %f3, %f4;
133+
; ENABLED-NEXT: add.rn.f32 %f8, %f6, %f7;
134+
; ENABLED-NEXT: add.rn.f32 %f9, %f8, %f5;
135+
; ENABLED-NEXT: st.param.b32 [func_retval0], %f9;
136136
; ENABLED-NEXT: ret;
137137
;
138138
; DISABLED-LABEL: ff(
139139
; DISABLED: {
140-
; DISABLED-NEXT: .reg .b32 %r<10>;
140+
; DISABLED-NEXT: .reg .b32 %f<10>;
141141
; DISABLED-NEXT: .reg .b64 %rd<2>;
142142
; DISABLED-EMPTY:
143143
; DISABLED-NEXT: // %bb.0:
144144
; DISABLED-NEXT: ld.param.b64 %rd1, [ff_param_0];
145-
; DISABLED-NEXT: ld.b32 %r1, [%rd1];
146-
; DISABLED-NEXT: ld.b32 %r2, [%rd1+4];
147-
; DISABLED-NEXT: ld.b32 %r3, [%rd1+8];
148-
; DISABLED-NEXT: ld.b32 %r4, [%rd1+12];
149-
; DISABLED-NEXT: ld.b32 %r5, [%rd1+16];
150-
; DISABLED-NEXT: add.rn.f32 %r6, %r1, %r2;
151-
; DISABLED-NEXT: add.rn.f32 %r7, %r3, %r4;
152-
; DISABLED-NEXT: add.rn.f32 %r8, %r6, %r7;
153-
; DISABLED-NEXT: add.rn.f32 %r9, %r8, %r5;
154-
; DISABLED-NEXT: st.param.b32 [func_retval0], %r9;
145+
; DISABLED-NEXT: ld.b32 %f1, [%rd1];
146+
; DISABLED-NEXT: ld.b32 %f2, [%rd1+4];
147+
; DISABLED-NEXT: ld.b32 %f3, [%rd1+8];
148+
; DISABLED-NEXT: ld.b32 %f4, [%rd1+12];
149+
; DISABLED-NEXT: ld.b32 %f5, [%rd1+16];
150+
; DISABLED-NEXT: add.rn.f32 %f6, %f1, %f2;
151+
; DISABLED-NEXT: add.rn.f32 %f7, %f3, %f4;
152+
; DISABLED-NEXT: add.rn.f32 %f8, %f6, %f7;
153+
; DISABLED-NEXT: add.rn.f32 %f9, %f8, %f5;
154+
; DISABLED-NEXT: st.param.b32 [func_retval0], %f9;
155155
; DISABLED-NEXT: ret;
156156
%p.1 = getelementptr float, ptr %p, i32 1
157157
%p.2 = getelementptr float, ptr %p, i32 2

0 commit comments

Comments
 (0)