diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td index 9cb53fb27a2d2..84b962b2a8607 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -768,7 +768,7 @@ def BGE : BranchCC_rri<0b101, "bge">; def BLTU : BranchCC_rri<0b110, "bltu">; def BGEU : BranchCC_rri<0b111, "bgeu">; -let IsSignExtendingOpW = 1, canFoldAsLoad = 1 in { +let IsSignExtendingOpW = 1, canFoldAsLoad = 1, isReMaterializable = 1 in { def LB : Load_ri<0b000, "lb">, Sched<[WriteLDB, ReadMemBase]>; def LH : Load_ri<0b001, "lh">, Sched<[WriteLDH, ReadMemBase]>; def LW : Load_ri<0b010, "lw">, Sched<[WriteLDW, ReadMemBase]>; @@ -889,7 +889,7 @@ def CSRRCI : CSR_ii<0b111, "csrrci">; /// RV64I instructions let Predicates = [IsRV64] in { -let canFoldAsLoad = 1 in { +let canFoldAsLoad = 1, isReMaterializable = 1 in { def LWU : Load_ri<0b110, "lwu">, Sched<[WriteLDW, ReadMemBase]>; def LD : Load_ri<0b011, "ld">, Sched<[WriteLDD, ReadMemBase]>; } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td index 4ffe3e62ac501..deacd41e6469a 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td @@ -71,7 +71,7 @@ defvar DExtsRV64 = [DExt, ZdinxExt]; //===----------------------------------------------------------------------===// let Predicates = [HasStdExtD] in { -let canFoldAsLoad = 1 in +let canFoldAsLoad = 1, isReMaterializable = 1 in def FLD : FPLoad_r<0b011, "fld", FPR64, WriteFLD64>; // Operands for stores are in the order srcreg, base, offset rather than diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td index b30f8ec820c15..bd191001b75ec 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td @@ -330,7 +330,7 @@ class PseudoFROUND //===----------------------------------------------------------------------===// let Predicates = [HasStdExtF] in { -let canFoldAsLoad = 1 in +let canFoldAsLoad = 1, isReMaterializable = 1 in def FLW : FPLoad_r<0b010, "flw", FPR32, WriteFLD32>; // Operands for stores are in the order srcreg, base, offset rather than diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td index 1c6a5afcda49b..c172d1739ba61 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td @@ -90,7 +90,7 @@ defvar ZfhminDExts = [ZfhminDExt, ZhinxminZdinxExt, ZhinxminZdinx32Ext]; //===----------------------------------------------------------------------===// let Predicates = [HasHalfFPLoadStoreMove] in { -let canFoldAsLoad = 1 in +let canFoldAsLoad = 1, isReMaterializable = 1 in def FLH : FPLoad_r<0b001, "flh", FPR16, WriteFLD16>; // Operands for stores are in the order srcreg, base, offset rather than diff --git a/llvm/test/CodeGen/RISCV/remat.ll b/llvm/test/CodeGen/RISCV/remat.ll index 8490dd0877d30..532f833ed70c8 100644 --- a/llvm/test/CodeGen/RISCV/remat.ll +++ b/llvm/test/CodeGen/RISCV/remat.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -O1 -mtriple=riscv64 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -O1 -mtriple=riscv64 -mattr=+d,+zfh,+zfbfmin -verify-machineinstrs < %s | FileCheck %s @a = common global i32 0, align 4 @l = common global i32 0, align 4 @@ -200,3 +200,168 @@ for.end: ; preds = %for.inc, %entry } declare i32 @foo(i32, i32, i32, i32, i32, i32) + +define void @remat_load(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, double %8, double %9, double %10, double %11, double %12, double %13, double %14, double %15, i8 %stackarg0, i16 %stackarg1, i32 %stackarg2, i64 %stackarg3, half %stackarg4, bfloat %stackarg5, float %stackarg6, double %stackarg7, ptr %p) nounwind { +; CHECK-LABEL: remat_load: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -208 +; CHECK-NEXT: sd ra, 200(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 192(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s1, 184(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s2, 176(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s3, 168(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s4, 160(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s5, 152(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s6, 144(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s7, 136(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s8, 128(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s9, 120(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s10, 112(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s11, 104(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs0, 96(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs1, 88(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs2, 80(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs3, 72(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs4, 64(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs5, 56(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs6, 48(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs7, 40(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs8, 32(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs9, 24(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs10, 16(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs11, 8(sp) # 8-byte Folded Spill +; CHECK-NEXT: fld fa5, 264(sp) +; CHECK-NEXT: flw fa4, 256(sp) +; CHECK-NEXT: flh fa3, 248(sp) +; CHECK-NEXT: flh fa2, 240(sp) +; CHECK-NEXT: ld a0, 272(sp) +; CHECK-NEXT: lbu a4, 208(sp) +; CHECK-NEXT: lh a3, 216(sp) +; CHECK-NEXT: lw a2, 224(sp) +; CHECK-NEXT: ld a1, 232(sp) +; CHECK-NEXT: sb a4, 0(a0) +; CHECK-NEXT: sh a3, 0(a0) +; CHECK-NEXT: sw a2, 0(a0) +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: fsh fa2, 0(a0) +; CHECK-NEXT: fsh fa3, 0(a0) +; CHECK-NEXT: fsw fa4, 0(a0) +; CHECK-NEXT: fsd fa5, 0(a0) +; CHECK-NEXT: #APP +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: ld a0, 272(sp) +; CHECK-NEXT: lbu a1, 208(sp) +; CHECK-NEXT: sb a1, 0(a0) +; CHECK-NEXT: lh a1, 216(sp) +; CHECK-NEXT: sh a1, 0(a0) +; CHECK-NEXT: lw a1, 224(sp) +; CHECK-NEXT: sw a1, 0(a0) +; CHECK-NEXT: ld a1, 232(sp) +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: flh fa5, 240(sp) +; CHECK-NEXT: fsh fa5, 0(a0) +; CHECK-NEXT: flh fa5, 248(sp) +; CHECK-NEXT: fsh fa5, 0(a0) +; CHECK-NEXT: flw fa5, 256(sp) +; CHECK-NEXT: fsw fa5, 0(a0) +; CHECK-NEXT: fld fa5, 264(sp) +; CHECK-NEXT: fsd fa5, 0(a0) +; CHECK-NEXT: ld ra, 200(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s0, 192(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s1, 184(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s2, 176(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s3, 168(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s4, 160(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s5, 152(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s6, 144(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s7, 136(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s8, 128(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s9, 120(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s10, 112(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s11, 104(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs0, 96(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs1, 88(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs2, 80(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs3, 72(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs4, 64(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs5, 56(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs6, 48(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs7, 40(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs8, 32(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs9, 24(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs10, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs11, 8(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 208 +; CHECK-NEXT: ret +entry: + ; Force loading the stack arguments to create their live interval + store volatile i8 %stackarg0, ptr %p + store volatile i16 %stackarg1, ptr %p + store volatile i32 %stackarg2, ptr %p + store volatile i64 %stackarg3, ptr %p + store volatile half %stackarg4, ptr %p + store volatile bfloat %stackarg5, ptr %p + store volatile float %stackarg6, ptr %p + store volatile double %stackarg7, ptr %p + tail call void asm sideeffect "", "~{x1},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x16},~{x17},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28},~{x29},~{x30},~{x31},~{f0},~{f1},~{f2},~{f3},~{f4},~{f5},~{f6},~{f7},~{f8},~{f9},~{f10},~{f11},~{f12},~{f13},~{f14},~{f15},~{f16},~{f17},~{f18},~{f19},~{f20},~{f21},~{f22},~{f23},~{f24},~{f25},~{f26},~{f27},~{f28},~{f29},~{f30},~{f31}"() + ; Now use them after spilling everything to force rematerialization + store volatile i8 %stackarg0, ptr %p + store volatile i16 %stackarg1, ptr %p + store volatile i32 %stackarg2, ptr %p + store volatile i64 %stackarg3, ptr %p + store volatile half %stackarg4, ptr %p + store volatile bfloat %stackarg5, ptr %p + store volatile float %stackarg6, ptr %p + store volatile double %stackarg7, ptr %p + ret void +} + +; We could remat the load of the constant global if we extended the live +; interval of the high bits of the address. + +@const = constant i32 42 +define i32 @constglobal_load() nounwind { +; CHECK-LABEL: constglobal_load: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -112 +; CHECK-NEXT: sd ra, 104(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 96(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s1, 88(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s2, 80(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s3, 72(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s4, 64(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s5, 56(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s6, 48(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s7, 40(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s8, 32(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s9, 24(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s10, 16(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s11, 8(sp) # 8-byte Folded Spill +; CHECK-NEXT: lui a0, %hi(const) +; CHECK-NEXT: lw a0, %lo(const)(a0) +; CHECK-NEXT: sd a0, 0(sp) # 8-byte Folded Spill +; CHECK-NEXT: #APP +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: ld a0, 0(sp) # 8-byte Folded Reload +; CHECK-NEXT: addiw a0, a0, 1 +; CHECK-NEXT: ld ra, 104(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s0, 96(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s1, 88(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s2, 80(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s3, 72(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s4, 64(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s5, 56(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s6, 48(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s7, 40(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s8, 32(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s9, 24(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s10, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s11, 8(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 112 +; CHECK-NEXT: ret +entry: + %global = load i32, ptr @const + tail call void asm sideeffect "", "~{x1},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x16},~{x17},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28},~{x29},~{x30},~{x31}"() + %a = add i32 %global, 1 + ret i32 %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/pr95865.ll b/llvm/test/CodeGen/RISCV/rvv/pr95865.ll index ab9849631663c..01d66b344ec2e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/pr95865.ll +++ b/llvm/test/CodeGen/RISCV/rvv/pr95865.ll @@ -40,8 +40,6 @@ define i32 @main(i1 %arg.1, i64 %arg.2, i1 %arg.3, i64 %arg.4, i1 %arg.5,