Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions llvm/lib/Target/RISCV/RISCVInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -768,7 +768,7 @@ def BGE : BranchCC_rri<0b101, "bge">;
def BLTU : BranchCC_rri<0b110, "bltu">;
def BGEU : BranchCC_rri<0b111, "bgeu">;

let IsSignExtendingOpW = 1, canFoldAsLoad = 1 in {
let IsSignExtendingOpW = 1, canFoldAsLoad = 1, isReMaterializable = 1 in {
def LB : Load_ri<0b000, "lb">, Sched<[WriteLDB, ReadMemBase]>;
def LH : Load_ri<0b001, "lh">, Sched<[WriteLDH, ReadMemBase]>;
def LW : Load_ri<0b010, "lw">, Sched<[WriteLDW, ReadMemBase]>;
Expand Down Expand Up @@ -889,7 +889,7 @@ def CSRRCI : CSR_ii<0b111, "csrrci">;
/// RV64I instructions

let Predicates = [IsRV64] in {
let canFoldAsLoad = 1 in {
let canFoldAsLoad = 1, isReMaterializable = 1 in {
def LWU : Load_ri<0b110, "lwu">, Sched<[WriteLDW, ReadMemBase]>;
def LD : Load_ri<0b011, "ld">, Sched<[WriteLDD, ReadMemBase]>;
}
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/RISCV/RISCVInstrInfoD.td
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ defvar DExtsRV64 = [DExt, ZdinxExt];
//===----------------------------------------------------------------------===//

let Predicates = [HasStdExtD] in {
let canFoldAsLoad = 1 in
let canFoldAsLoad = 1, isReMaterializable = 1 in
def FLD : FPLoad_r<0b011, "fld", FPR64, WriteFLD64>;

// Operands for stores are in the order srcreg, base, offset rather than
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/RISCV/RISCVInstrInfoF.td
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,7 @@ class PseudoFROUND<DAGOperand Ty, ValueType vt, ValueType intvt = XLenVT>
//===----------------------------------------------------------------------===//

let Predicates = [HasStdExtF] in {
let canFoldAsLoad = 1 in
let canFoldAsLoad = 1, isReMaterializable = 1 in
def FLW : FPLoad_r<0b010, "flw", FPR32, WriteFLD32>;

// Operands for stores are in the order srcreg, base, offset rather than
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ defvar ZfhminDExts = [ZfhminDExt, ZhinxminZdinxExt, ZhinxminZdinx32Ext];
//===----------------------------------------------------------------------===//

let Predicates = [HasHalfFPLoadStoreMove] in {
let canFoldAsLoad = 1 in
let canFoldAsLoad = 1, isReMaterializable = 1 in
def FLH : FPLoad_r<0b001, "flh", FPR16, WriteFLD16>;

// Operands for stores are in the order srcreg, base, offset rather than
Expand Down
167 changes: 166 additions & 1 deletion llvm/test/CodeGen/RISCV/remat.ll
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -O1 -mtriple=riscv64 -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -O1 -mtriple=riscv64 -mattr=+d,+zfh,+zfbfmin -verify-machineinstrs < %s | FileCheck %s

@a = common global i32 0, align 4
@l = common global i32 0, align 4
Expand Down Expand Up @@ -200,3 +200,168 @@ for.end: ; preds = %for.inc, %entry
}

declare i32 @foo(i32, i32, i32, i32, i32, i32)

define void @remat_load(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, double %8, double %9, double %10, double %11, double %12, double %13, double %14, double %15, i8 %stackarg0, i16 %stackarg1, i32 %stackarg2, i64 %stackarg3, half %stackarg4, bfloat %stackarg5, float %stackarg6, double %stackarg7, ptr %p) nounwind {
; CHECK-LABEL: remat_load:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addi sp, sp, -208
; CHECK-NEXT: sd ra, 200(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s0, 192(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s1, 184(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s2, 176(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s3, 168(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s4, 160(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s5, 152(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s6, 144(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s7, 136(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s8, 128(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s9, 120(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s10, 112(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s11, 104(sp) # 8-byte Folded Spill
; CHECK-NEXT: fsd fs0, 96(sp) # 8-byte Folded Spill
; CHECK-NEXT: fsd fs1, 88(sp) # 8-byte Folded Spill
; CHECK-NEXT: fsd fs2, 80(sp) # 8-byte Folded Spill
; CHECK-NEXT: fsd fs3, 72(sp) # 8-byte Folded Spill
; CHECK-NEXT: fsd fs4, 64(sp) # 8-byte Folded Spill
; CHECK-NEXT: fsd fs5, 56(sp) # 8-byte Folded Spill
; CHECK-NEXT: fsd fs6, 48(sp) # 8-byte Folded Spill
; CHECK-NEXT: fsd fs7, 40(sp) # 8-byte Folded Spill
; CHECK-NEXT: fsd fs8, 32(sp) # 8-byte Folded Spill
; CHECK-NEXT: fsd fs9, 24(sp) # 8-byte Folded Spill
; CHECK-NEXT: fsd fs10, 16(sp) # 8-byte Folded Spill
; CHECK-NEXT: fsd fs11, 8(sp) # 8-byte Folded Spill
; CHECK-NEXT: fld fa5, 264(sp)
; CHECK-NEXT: flw fa4, 256(sp)
; CHECK-NEXT: flh fa3, 248(sp)
; CHECK-NEXT: flh fa2, 240(sp)
; CHECK-NEXT: ld a0, 272(sp)
; CHECK-NEXT: lbu a4, 208(sp)
; CHECK-NEXT: lh a3, 216(sp)
; CHECK-NEXT: lw a2, 224(sp)
; CHECK-NEXT: ld a1, 232(sp)
; CHECK-NEXT: sb a4, 0(a0)
; CHECK-NEXT: sh a3, 0(a0)
; CHECK-NEXT: sw a2, 0(a0)
; CHECK-NEXT: sd a1, 0(a0)
; CHECK-NEXT: fsh fa2, 0(a0)
; CHECK-NEXT: fsh fa3, 0(a0)
; CHECK-NEXT: fsw fa4, 0(a0)
; CHECK-NEXT: fsd fa5, 0(a0)
; CHECK-NEXT: #APP
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: ld a0, 272(sp)
; CHECK-NEXT: lbu a1, 208(sp)
; CHECK-NEXT: sb a1, 0(a0)
; CHECK-NEXT: lh a1, 216(sp)
; CHECK-NEXT: sh a1, 0(a0)
; CHECK-NEXT: lw a1, 224(sp)
; CHECK-NEXT: sw a1, 0(a0)
; CHECK-NEXT: ld a1, 232(sp)
; CHECK-NEXT: sd a1, 0(a0)
; CHECK-NEXT: flh fa5, 240(sp)
; CHECK-NEXT: fsh fa5, 0(a0)
; CHECK-NEXT: flh fa5, 248(sp)
; CHECK-NEXT: fsh fa5, 0(a0)
; CHECK-NEXT: flw fa5, 256(sp)
; CHECK-NEXT: fsw fa5, 0(a0)
; CHECK-NEXT: fld fa5, 264(sp)
; CHECK-NEXT: fsd fa5, 0(a0)
; CHECK-NEXT: ld ra, 200(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s0, 192(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s1, 184(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s2, 176(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s3, 168(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s4, 160(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s5, 152(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s6, 144(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s7, 136(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s8, 128(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s9, 120(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s10, 112(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s11, 104(sp) # 8-byte Folded Reload
; CHECK-NEXT: fld fs0, 96(sp) # 8-byte Folded Reload
; CHECK-NEXT: fld fs1, 88(sp) # 8-byte Folded Reload
; CHECK-NEXT: fld fs2, 80(sp) # 8-byte Folded Reload
; CHECK-NEXT: fld fs3, 72(sp) # 8-byte Folded Reload
; CHECK-NEXT: fld fs4, 64(sp) # 8-byte Folded Reload
; CHECK-NEXT: fld fs5, 56(sp) # 8-byte Folded Reload
; CHECK-NEXT: fld fs6, 48(sp) # 8-byte Folded Reload
; CHECK-NEXT: fld fs7, 40(sp) # 8-byte Folded Reload
; CHECK-NEXT: fld fs8, 32(sp) # 8-byte Folded Reload
; CHECK-NEXT: fld fs9, 24(sp) # 8-byte Folded Reload
; CHECK-NEXT: fld fs10, 16(sp) # 8-byte Folded Reload
; CHECK-NEXT: fld fs11, 8(sp) # 8-byte Folded Reload
; CHECK-NEXT: addi sp, sp, 208
; CHECK-NEXT: ret
entry:
; Force loading the stack arguments to create their live interval
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you explain this comment? This doesn't make sense to me.

store volatile i8 %stackarg0, ptr %p
store volatile i16 %stackarg1, ptr %p
store volatile i32 %stackarg2, ptr %p
store volatile i64 %stackarg3, ptr %p
store volatile half %stackarg4, ptr %p
store volatile bfloat %stackarg5, ptr %p
store volatile float %stackarg6, ptr %p
store volatile double %stackarg7, ptr %p
tail call void asm sideeffect "", "~{x1},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x16},~{x17},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28},~{x29},~{x30},~{x31},~{f0},~{f1},~{f2},~{f3},~{f4},~{f5},~{f6},~{f7},~{f8},~{f9},~{f10},~{f11},~{f12},~{f13},~{f14},~{f15},~{f16},~{f17},~{f18},~{f19},~{f20},~{f21},~{f22},~{f23},~{f24},~{f25},~{f26},~{f27},~{f28},~{f29},~{f30},~{f31}"()
; Now use them after spilling everything to force rematerialization
store volatile i8 %stackarg0, ptr %p
store volatile i16 %stackarg1, ptr %p
store volatile i32 %stackarg2, ptr %p
store volatile i64 %stackarg3, ptr %p
store volatile half %stackarg4, ptr %p
store volatile bfloat %stackarg5, ptr %p
store volatile float %stackarg6, ptr %p
store volatile double %stackarg7, ptr %p
ret void
}

; We could remat the load of the constant global if we extended the live
; interval of the high bits of the address.

@const = constant i32 42
define i32 @constglobal_load() nounwind {
; CHECK-LABEL: constglobal_load:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addi sp, sp, -112
; CHECK-NEXT: sd ra, 104(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s0, 96(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s1, 88(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s2, 80(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s3, 72(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s4, 64(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s5, 56(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s6, 48(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s7, 40(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s8, 32(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s9, 24(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s10, 16(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s11, 8(sp) # 8-byte Folded Spill
; CHECK-NEXT: lui a0, %hi(const)
; CHECK-NEXT: lw a0, %lo(const)(a0)
; CHECK-NEXT: sd a0, 0(sp) # 8-byte Folded Spill
; CHECK-NEXT: #APP
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: ld a0, 0(sp) # 8-byte Folded Reload
; CHECK-NEXT: addiw a0, a0, 1
; CHECK-NEXT: ld ra, 104(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s0, 96(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s1, 88(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s2, 80(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s3, 72(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s4, 64(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s5, 56(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s6, 48(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s7, 40(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s8, 32(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s9, 24(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s10, 16(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s11, 8(sp) # 8-byte Folded Reload
; CHECK-NEXT: addi sp, sp, 112
; CHECK-NEXT: ret
entry:
%global = load i32, ptr @const
tail call void asm sideeffect "", "~{x1},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x16},~{x17},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28},~{x29},~{x30},~{x31}"()
%a = add i32 %global, 1
ret i32 %a
}
4 changes: 1 addition & 3 deletions llvm/test/CodeGen/RISCV/rvv/pr95865.ll
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,6 @@ define i32 @main(i1 %arg.1, i64 %arg.2, i1 %arg.3, i64 %arg.4, i1 %arg.5, <vscal
; CHECK-NEXT: li t0, 12
; CHECK-NEXT: li s0, 4
; CHECK-NEXT: li t1, 20
; CHECK-NEXT: ld a1, 112(sp)
; CHECK-NEXT: sd a1, 0(sp) # 8-byte Folded Spill
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: andi t3, a4, 1
Expand Down Expand Up @@ -142,7 +140,7 @@ define i32 @main(i1 %arg.1, i64 %arg.2, i1 %arg.3, i64 %arg.4, i1 %arg.5, <vscal
; CHECK-NEXT: j .LBB0_11
; CHECK-NEXT: .LBB0_12: # %for.body7.us.19
; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma
; CHECK-NEXT: ld a0, 0(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld a0, 112(sp)
; CHECK-NEXT: vmv.s.x v16, a0
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma
Expand Down