-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[RISCV] Add tune info for postra scheduling direction #115864
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[RISCV] Add tune info for postra scheduling direction #115864
Conversation
|
@llvm/pr-subscribers-tablegen @llvm/pr-subscribers-llvm-globalisel Author: Pengcheng Wang (wangpc-pp) ChangesThis helps improve scheduling result (more bubbles can be filled). There are two commits in this PR:
Compared to Patch is 15.68 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/115864.diff 1029 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
index e7db1ededf383b..3fb756b0fab170 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
@@ -16,6 +16,7 @@
#include "RISCV.h"
#include "RISCVFrameLowering.h"
#include "RISCVTargetMachine.h"
+#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/CodeGen/MacroFusion.h"
#include "llvm/CodeGen/ScheduleDAGMutation.h"
#include "llvm/MC/TargetRegistry.h"
@@ -199,3 +200,11 @@ unsigned RISCVSubtarget::getMinimumJumpTableEntries() const {
? RISCVMinimumJumpTableEntries
: TuneInfo->MinimumJumpTableEntries;
}
+
+void RISCVSubtarget::overridePostRASchedPolicy(MachineSchedPolicy &Policy,
+ unsigned NumRegionInstrs) const {
+ // Do bidirectional scheduling since it provides a more balanced scheduling
+ // leading to better performance. This will increase compile time.
+ Policy.OnlyTopDown = false;
+ Policy.OnlyBottomUp = false;
+}
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h
index f59a3737ae76f9..5d1d64f5694243 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.h
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -124,7 +124,10 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
}
bool enableMachineScheduler() const override { return true; }
- bool enablePostRAScheduler() const override { return UsePostRAScheduler; }
+ bool enablePostRAScheduler() const override {
+ // FIXNE: Just for tests, will revert this change when landing.
+ return true;
+ }
Align getPrefFunctionAlignment() const {
return Align(TuneInfo->PrefFunctionAlignment);
@@ -327,6 +330,9 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
unsigned getTailDupAggressiveThreshold() const {
return TuneInfo->TailDupAggressiveThreshold;
}
+
+ void overridePostRASchedPolicy(MachineSchedPolicy &Policy,
+ unsigned NumRegionInstrs) const override;
};
} // End llvm namespace
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/alu-roundtrip.ll b/llvm/test/CodeGen/RISCV/GlobalISel/alu-roundtrip.ll
index 330f8b16065f13..45eb3478eef739 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/alu-roundtrip.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/alu-roundtrip.ll
@@ -25,18 +25,18 @@ define i32 @add_i8_signext_i32(i8 %a, i8 %b) {
; RV32IM-LABEL: add_i8_signext_i32:
; RV32IM: # %bb.0: # %entry
; RV32IM-NEXT: slli a0, a0, 24
-; RV32IM-NEXT: srai a0, a0, 24
; RV32IM-NEXT: slli a1, a1, 24
; RV32IM-NEXT: srai a1, a1, 24
+; RV32IM-NEXT: srai a0, a0, 24
; RV32IM-NEXT: add a0, a0, a1
; RV32IM-NEXT: ret
;
; RV64IM-LABEL: add_i8_signext_i32:
; RV64IM: # %bb.0: # %entry
; RV64IM-NEXT: slli a0, a0, 56
-; RV64IM-NEXT: srai a0, a0, 56
; RV64IM-NEXT: slli a1, a1, 56
; RV64IM-NEXT: srai a1, a1, 56
+; RV64IM-NEXT: srai a0, a0, 56
; RV64IM-NEXT: add a0, a0, a1
; RV64IM-NEXT: ret
entry:
@@ -49,15 +49,15 @@ entry:
define i32 @add_i8_zeroext_i32(i8 %a, i8 %b) {
; RV32IM-LABEL: add_i8_zeroext_i32:
; RV32IM: # %bb.0: # %entry
-; RV32IM-NEXT: andi a0, a0, 255
; RV32IM-NEXT: andi a1, a1, 255
+; RV32IM-NEXT: andi a0, a0, 255
; RV32IM-NEXT: add a0, a0, a1
; RV32IM-NEXT: ret
;
; RV64IM-LABEL: add_i8_zeroext_i32:
; RV64IM: # %bb.0: # %entry
-; RV64IM-NEXT: andi a0, a0, 255
; RV64IM-NEXT: andi a1, a1, 255
+; RV64IM-NEXT: andi a0, a0, 255
; RV64IM-NEXT: add a0, a0, a1
; RV64IM-NEXT: ret
entry:
@@ -404,8 +404,8 @@ define i64 @add_i64(i64 %a, i64 %b) {
; RV32IM-LABEL: add_i64:
; RV32IM: # %bb.0: # %entry
; RV32IM-NEXT: add a0, a0, a2
-; RV32IM-NEXT: sltu a2, a0, a2
; RV32IM-NEXT: add a1, a1, a3
+; RV32IM-NEXT: sltu a2, a0, a2
; RV32IM-NEXT: add a1, a1, a2
; RV32IM-NEXT: ret
;
@@ -439,8 +439,8 @@ define i64 @sub_i64(i64 %a, i64 %b) {
; RV32IM-LABEL: sub_i64:
; RV32IM: # %bb.0: # %entry
; RV32IM-NEXT: sub a4, a0, a2
-; RV32IM-NEXT: sltu a0, a0, a2
; RV32IM-NEXT: sub a1, a1, a3
+; RV32IM-NEXT: sltu a0, a0, a2
; RV32IM-NEXT: sub a1, a1, a0
; RV32IM-NEXT: mv a0, a4
; RV32IM-NEXT: ret
@@ -460,8 +460,8 @@ define i64 @subi_i64(i64 %a) {
; RV32IM-NEXT: lui a2, 1048275
; RV32IM-NEXT: addi a2, a2, -1548
; RV32IM-NEXT: add a0, a0, a2
-; RV32IM-NEXT: sltu a2, a0, a2
; RV32IM-NEXT: addi a1, a1, -1
+; RV32IM-NEXT: sltu a2, a0, a2
; RV32IM-NEXT: add a1, a1, a2
; RV32IM-NEXT: ret
;
@@ -480,8 +480,8 @@ define i64 @neg_i64(i64 %a) {
; RV32IM-LABEL: neg_i64:
; RV32IM: # %bb.0: # %entry
; RV32IM-NEXT: neg a2, a0
-; RV32IM-NEXT: snez a0, a0
; RV32IM-NEXT: neg a1, a1
+; RV32IM-NEXT: snez a0, a0
; RV32IM-NEXT: sub a1, a1, a0
; RV32IM-NEXT: mv a0, a2
; RV32IM-NEXT: ret
@@ -500,8 +500,8 @@ entry:
define i64 @and_i64(i64 %a, i64 %b) {
; RV32IM-LABEL: and_i64:
; RV32IM: # %bb.0: # %entry
-; RV32IM-NEXT: and a0, a0, a2
; RV32IM-NEXT: and a1, a1, a3
+; RV32IM-NEXT: and a0, a0, a2
; RV32IM-NEXT: ret
;
; RV64IM-LABEL: and_i64:
@@ -516,8 +516,8 @@ entry:
define i64 @andi_i64(i64 %a) {
; RV32IM-LABEL: andi_i64:
; RV32IM: # %bb.0: # %entry
-; RV32IM-NEXT: andi a0, a0, 1234
; RV32IM-NEXT: li a1, 0
+; RV32IM-NEXT: andi a0, a0, 1234
; RV32IM-NEXT: ret
;
; RV64IM-LABEL: andi_i64:
@@ -532,8 +532,8 @@ entry:
define i64 @or_i64(i64 %a, i64 %b) {
; RV32IM-LABEL: or_i64:
; RV32IM: # %bb.0: # %entry
-; RV32IM-NEXT: or a0, a0, a2
; RV32IM-NEXT: or a1, a1, a3
+; RV32IM-NEXT: or a0, a0, a2
; RV32IM-NEXT: ret
;
; RV64IM-LABEL: or_i64:
@@ -563,8 +563,8 @@ entry:
define i64 @xor_i64(i64 %a, i64 %b) {
; RV32IM-LABEL: xor_i64:
; RV32IM: # %bb.0: # %entry
-; RV32IM-NEXT: xor a0, a0, a2
; RV32IM-NEXT: xor a1, a1, a3
+; RV32IM-NEXT: xor a0, a0, a2
; RV32IM-NEXT: ret
;
; RV64IM-LABEL: xor_i64:
@@ -597,8 +597,8 @@ define i64 @mul_i64(i64 %a, i64 %b) {
; RV32IM-NEXT: mul a4, a0, a2
; RV32IM-NEXT: mul a1, a1, a2
; RV32IM-NEXT: mul a3, a0, a3
-; RV32IM-NEXT: mulhu a0, a0, a2
; RV32IM-NEXT: add a1, a1, a3
+; RV32IM-NEXT: mulhu a0, a0, a2
; RV32IM-NEXT: add a1, a1, a0
; RV32IM-NEXT: mv a0, a4
; RV32IM-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/bitmanip.ll b/llvm/test/CodeGen/RISCV/GlobalISel/bitmanip.ll
index f33ba1d7a302ef..acd32cff21cad3 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/bitmanip.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/bitmanip.ll
@@ -6,18 +6,18 @@ define i2 @bitreverse_i2(i2 %x) {
; RV32-LABEL: bitreverse_i2:
; RV32: # %bb.0:
; RV32-NEXT: slli a1, a0, 1
-; RV32-NEXT: andi a1, a1, 2
; RV32-NEXT: andi a0, a0, 3
; RV32-NEXT: srli a0, a0, 1
+; RV32-NEXT: andi a1, a1, 2
; RV32-NEXT: or a0, a1, a0
; RV32-NEXT: ret
;
; RV64-LABEL: bitreverse_i2:
; RV64: # %bb.0:
; RV64-NEXT: slli a1, a0, 1
-; RV64-NEXT: andi a1, a1, 2
; RV64-NEXT: andi a0, a0, 3
; RV64-NEXT: srli a0, a0, 1
+; RV64-NEXT: andi a1, a1, 2
; RV64-NEXT: or a0, a1, a0
; RV64-NEXT: ret
%rev = call i2 @llvm.bitreverse.i2(i2 %x)
@@ -31,8 +31,8 @@ define i3 @bitreverse_i3(i3 %x) {
; RV32-NEXT: andi a1, a1, 4
; RV32-NEXT: andi a0, a0, 7
; RV32-NEXT: andi a2, a0, 2
-; RV32-NEXT: or a1, a1, a2
; RV32-NEXT: srli a0, a0, 2
+; RV32-NEXT: or a1, a1, a2
; RV32-NEXT: or a0, a1, a0
; RV32-NEXT: ret
;
@@ -42,8 +42,8 @@ define i3 @bitreverse_i3(i3 %x) {
; RV64-NEXT: andi a1, a1, 4
; RV64-NEXT: andi a0, a0, 7
; RV64-NEXT: andi a2, a0, 2
-; RV64-NEXT: or a1, a1, a2
; RV64-NEXT: srli a0, a0, 2
+; RV64-NEXT: or a1, a1, a2
; RV64-NEXT: or a0, a1, a0
; RV64-NEXT: ret
%rev = call i3 @llvm.bitreverse.i3(i3 %x)
@@ -61,8 +61,8 @@ define i4 @bitreverse_i4(i4 %x) {
; RV32-NEXT: andi a0, a0, 15
; RV32-NEXT: srli a2, a0, 1
; RV32-NEXT: andi a2, a2, 2
-; RV32-NEXT: or a1, a1, a2
; RV32-NEXT: srli a0, a0, 3
+; RV32-NEXT: or a1, a1, a2
; RV32-NEXT: or a0, a1, a0
; RV32-NEXT: ret
;
@@ -76,8 +76,8 @@ define i4 @bitreverse_i4(i4 %x) {
; RV64-NEXT: andi a0, a0, 15
; RV64-NEXT: srli a2, a0, 1
; RV64-NEXT: andi a2, a2, 2
-; RV64-NEXT: or a1, a1, a2
; RV64-NEXT: srli a0, a0, 3
+; RV64-NEXT: or a1, a1, a2
; RV64-NEXT: or a0, a1, a0
; RV64-NEXT: ret
%rev = call i4 @llvm.bitreverse.i4(i4 %x)
@@ -103,8 +103,8 @@ define i7 @bitreverse_i7(i7 %x) {
; RV32-NEXT: srli a3, a0, 4
; RV32-NEXT: andi a3, a3, 2
; RV32-NEXT: or a2, a2, a3
-; RV32-NEXT: or a1, a1, a2
; RV32-NEXT: srli a0, a0, 6
+; RV32-NEXT: or a1, a1, a2
; RV32-NEXT: or a0, a1, a0
; RV32-NEXT: ret
;
@@ -126,8 +126,8 @@ define i7 @bitreverse_i7(i7 %x) {
; RV64-NEXT: srli a3, a0, 4
; RV64-NEXT: andi a3, a3, 2
; RV64-NEXT: or a2, a2, a3
-; RV64-NEXT: or a1, a1, a2
; RV64-NEXT: srli a0, a0, 6
+; RV64-NEXT: or a1, a1, a2
; RV64-NEXT: or a0, a1, a0
; RV64-NEXT: ret
%rev = call i7 @llvm.bitreverse.i7(i7 %x)
@@ -163,9 +163,9 @@ define i24 @bitreverse_i24(i24 %x) {
; RV32-NEXT: addi a1, a1, -1366
; RV32-NEXT: and a2, a1, a2
; RV32-NEXT: and a2, a0, a2
-; RV32-NEXT: srli a2, a2, 1
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: and a0, a0, a1
+; RV32-NEXT: srli a2, a2, 1
; RV32-NEXT: or a0, a2, a0
; RV32-NEXT: ret
;
@@ -197,9 +197,9 @@ define i24 @bitreverse_i24(i24 %x) {
; RV64-NEXT: addiw a1, a1, -1366
; RV64-NEXT: and a2, a1, a2
; RV64-NEXT: and a2, a0, a2
-; RV64-NEXT: srli a2, a2, 1
; RV64-NEXT: slli a0, a0, 1
; RV64-NEXT: and a0, a0, a1
+; RV64-NEXT: srli a2, a2, 1
; RV64-NEXT: or a0, a2, a0
; RV64-NEXT: ret
%rev = call i24 @llvm.bitreverse.i24(i24 %x)
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/constbarrier-rv32.ll b/llvm/test/CodeGen/RISCV/GlobalISel/constbarrier-rv32.ll
index 70d1b25309c844..9bea20efb3eccd 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/constbarrier-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/constbarrier-rv32.ll
@@ -46,10 +46,10 @@ define void @constant_fold_barrier_i128(ptr %p) {
; RV32-NEXT: or a1, a4, a1
; RV32-NEXT: add a5, a5, zero
; RV32-NEXT: add a1, a5, a1
-; RV32-NEXT: sw a2, 0(a0)
-; RV32-NEXT: sw a6, 4(a0)
-; RV32-NEXT: sw a3, 8(a0)
; RV32-NEXT: sw a1, 12(a0)
+; RV32-NEXT: sw a3, 8(a0)
+; RV32-NEXT: sw a6, 4(a0)
+; RV32-NEXT: sw a2, 0(a0)
; RV32-NEXT: ret
entry:
%x = load i128, ptr %p
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/constbarrier-rv64.ll b/llvm/test/CodeGen/RISCV/GlobalISel/constbarrier-rv64.ll
index 51e8b6da39d099..be4ade025b413f 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/constbarrier-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/constbarrier-rv64.ll
@@ -25,8 +25,8 @@ define i128 @constant_fold_barrier_i128(i128 %x) {
; RV64-NEXT: and a0, a0, a2
; RV64-NEXT: and a1, a1, zero
; RV64-NEXT: add a0, a0, a2
-; RV64-NEXT: sltu a2, a0, a2
; RV64-NEXT: add a1, a1, zero
+; RV64-NEXT: sltu a2, a0, a2
; RV64-NEXT: add a1, a1, a2
; RV64-NEXT: ret
entry:
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/double-convert.ll b/llvm/test/CodeGen/RISCV/GlobalISel/double-convert.ll
index a4f92640697bc7..33ac5cc5a07443 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/double-convert.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/double-convert.ll
@@ -43,8 +43,8 @@ define i32 @fcvt_wu_d(double %a) nounwind {
define i32 @fcvt_wu_d_multiple_use(double %x, ptr %y) nounwind {
; RV32IFD-LABEL: fcvt_wu_d_multiple_use:
; RV32IFD: # %bb.0:
-; RV32IFD-NEXT: fcvt.wu.d a1, fa0, rtz
; RV32IFD-NEXT: li a0, 1
+; RV32IFD-NEXT: fcvt.wu.d a1, fa0, rtz
; RV32IFD-NEXT: beqz a1, .LBB4_2
; RV32IFD-NEXT: # %bb.1:
; RV32IFD-NEXT: mv a0, a1
@@ -156,8 +156,8 @@ define i64 @fmv_x_d(double %a, double %b) nounwind {
; RV32IFD-NEXT: addi sp, sp, -16
; RV32IFD-NEXT: fadd.d fa5, fa0, fa1
; RV32IFD-NEXT: fsd fa5, 8(sp)
-; RV32IFD-NEXT: lw a0, 8(sp)
; RV32IFD-NEXT: lw a1, 12(sp)
+; RV32IFD-NEXT: lw a0, 8(sp)
; RV32IFD-NEXT: addi sp, sp, 16
; RV32IFD-NEXT: ret
;
@@ -214,8 +214,8 @@ define double @fmv_d_x(i64 %a, i64 %b) nounwind {
; RV32IFD-NEXT: sw a0, 8(sp)
; RV32IFD-NEXT: sw a1, 12(sp)
; RV32IFD-NEXT: fld fa5, 8(sp)
-; RV32IFD-NEXT: sw a2, 8(sp)
; RV32IFD-NEXT: sw a3, 12(sp)
+; RV32IFD-NEXT: sw a2, 8(sp)
; RV32IFD-NEXT: fld fa4, 8(sp)
; RV32IFD-NEXT: fadd.d fa0, fa5, fa4
; RV32IFD-NEXT: addi sp, sp, 16
@@ -223,8 +223,8 @@ define double @fmv_d_x(i64 %a, i64 %b) nounwind {
;
; RV64IFD-LABEL: fmv_d_x:
; RV64IFD: # %bb.0:
-; RV64IFD-NEXT: fmv.d.x fa5, a0
; RV64IFD-NEXT: fmv.d.x fa4, a1
+; RV64IFD-NEXT: fmv.d.x fa5, a0
; RV64IFD-NEXT: fadd.d fa0, fa5, fa4
; RV64IFD-NEXT: ret
%1 = bitcast i64 %a to double
@@ -330,17 +330,17 @@ define signext i16 @fcvt_w_s_i16(double %a) nounwind {
define zeroext i16 @fcvt_wu_s_i16(double %a) nounwind {
; RV32IFD-LABEL: fcvt_wu_s_i16:
; RV32IFD: # %bb.0:
-; RV32IFD-NEXT: fcvt.wu.d a0, fa0, rtz
; RV32IFD-NEXT: lui a1, 16
; RV32IFD-NEXT: addi a1, a1, -1
+; RV32IFD-NEXT: fcvt.wu.d a0, fa0, rtz
; RV32IFD-NEXT: and a0, a0, a1
; RV32IFD-NEXT: ret
;
; RV64IFD-LABEL: fcvt_wu_s_i16:
; RV64IFD: # %bb.0:
-; RV64IFD-NEXT: fcvt.wu.d a0, fa0, rtz
; RV64IFD-NEXT: lui a1, 16
; RV64IFD-NEXT: addiw a1, a1, -1
+; RV64IFD-NEXT: fcvt.wu.d a0, fa0, rtz
; RV64IFD-NEXT: and a0, a0, a1
; RV64IFD-NEXT: ret
%1 = fptoui double %a to i16
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/float-convert.ll b/llvm/test/CodeGen/RISCV/GlobalISel/float-convert.ll
index 7e96d529af36ff..6ccef58d488108 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/float-convert.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/float-convert.ll
@@ -27,8 +27,8 @@ define i32 @fcvt_wu_s(float %a) nounwind {
define i32 @fcvt_wu_s_multiple_use(float %x, ptr %y) nounwind {
; RV32IF-LABEL: fcvt_wu_s_multiple_use:
; RV32IF: # %bb.0:
-; RV32IF-NEXT: fcvt.wu.s a1, fa0, rtz
; RV32IF-NEXT: li a0, 1
+; RV32IF-NEXT: fcvt.wu.s a1, fa0, rtz
; RV32IF-NEXT: beqz a1, .LBB2_2
; RV32IF-NEXT: # %bb.1:
; RV32IF-NEXT: mv a0, a1
@@ -120,8 +120,8 @@ define float @fcvt_s_wu_load(ptr %p) nounwind {
define float @fmv_w_x(i32 %a, i32 %b) nounwind {
; CHECKIF-LABEL: fmv_w_x:
; CHECKIF: # %bb.0:
-; CHECKIF-NEXT: fmv.w.x fa5, a0
; CHECKIF-NEXT: fmv.w.x fa4, a1
+; CHECKIF-NEXT: fmv.w.x fa5, a0
; CHECKIF-NEXT: fadd.s fa0, fa5, fa4
; CHECKIF-NEXT: ret
; Ensure fmv.w.x is generated even for a soft float calling convention
@@ -302,17 +302,17 @@ define signext i16 @fcvt_w_s_i16(float %a) nounwind {
define zeroext i16 @fcvt_wu_s_i16(float %a) nounwind {
; RV32IF-LABEL: fcvt_wu_s_i16:
; RV32IF: # %bb.0:
-; RV32IF-NEXT: fcvt.wu.s a0, fa0, rtz
; RV32IF-NEXT: lui a1, 16
; RV32IF-NEXT: addi a1, a1, -1
+; RV32IF-NEXT: fcvt.wu.s a0, fa0, rtz
; RV32IF-NEXT: and a0, a0, a1
; RV32IF-NEXT: ret
;
; RV64IF-LABEL: fcvt_wu_s_i16:
; RV64IF: # %bb.0:
-; RV64IF-NEXT: fcvt.wu.s a0, fa0, rtz
; RV64IF-NEXT: lui a1, 16
; RV64IF-NEXT: addiw a1, a1, -1
+; RV64IF-NEXT: fcvt.wu.s a0, fa0, rtz
; RV64IF-NEXT: and a0, a0, a1
; RV64IF-NEXT: ret
%1 = fptoui float %a to i16
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/fpr-gpr-copy-rv32.ll b/llvm/test/CodeGen/RISCV/GlobalISel/fpr-gpr-copy-rv32.ll
index 1757e5550f81ae..250e8edafa836f 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/fpr-gpr-copy-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/fpr-gpr-copy-rv32.ll
@@ -9,8 +9,8 @@
define float @fadd(float %x, float %y) {
; RV32I-LABEL: fadd:
; RV32I: # %bb.0:
-; RV32I-NEXT: fmv.w.x fa5, a0
; RV32I-NEXT: fmv.w.x fa4, a1
+; RV32I-NEXT: fmv.w.x fa5, a0
; RV32I-NEXT: fadd.s fa5, fa5, fa4
; RV32I-NEXT: fmv.x.w a0, fa5
; RV32I-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/fpr-gpr-copy-rv64.ll b/llvm/test/CodeGen/RISCV/GlobalISel/fpr-gpr-copy-rv64.ll
index 287bbbad6d52d7..717ecac7300b1b 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/fpr-gpr-copy-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/fpr-gpr-copy-rv64.ll
@@ -9,8 +9,8 @@
define double @fadd_f64(double %x, double %y) {
; RV64I-LABEL: fadd_f64:
; RV64I: # %bb.0:
-; RV64I-NEXT: fmv.d.x fa5, a0
; RV64I-NEXT: fmv.d.x fa4, a1
+; RV64I-NEXT: fmv.d.x fa5, a0
; RV64I-NEXT: fadd.d fa5, fa5, fa4
; RV64I-NEXT: fmv.x.d a0, fa5
; RV64I-NEXT: ret
@@ -30,6 +30,13 @@ define float @fadd_f32(float %x, float %y) {
; RV32I-NEXT: fadd.d fa5, fa5, fa4
; RV32I-NEXT: fmv.x.d a0, fa5
; RV32I-NEXT: ret
+; RV64I-LABEL: fadd_f32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: fmv.w.x fa4, a1
+; RV64I-NEXT: fmv.w.x fa5, a0
+; RV64I-NEXT: fadd.s fa5, fa5, fa4
+; RV64I-NEXT: fmv.x.w a0, fa5
+; RV64I-NEXT: ret
%a = fadd float %x, %y
ret float %a
}
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/iabs.ll b/llvm/test/CodeGen/RISCV/GlobalISel/iabs.ll
index 05989c310541b8..82540a3976f357 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/iabs.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/iabs.ll
@@ -120,8 +120,8 @@ define i64 @abs64(i64 %x) {
; RV32I-NEXT: sltu a3, a0, a2
; RV32I-NEXT: add a1, a1, a2
; RV32I-NEXT: add a1, a1, a3
-; RV32I-NEXT: xor a0, a0, a2
; RV32I-NEXT: xor a1, a1, a2
+; RV32I-NEXT: xor a0, a0, a2
; RV32I-NEXT: ret
;
; RV32ZBB-LABEL: abs64:
@@ -131,8 +131,8 @@ define i64 @abs64(i64 %x) {
; RV32ZBB-NEXT: sltu a3, a0, a2
; RV32ZBB-NEXT: add a1, a1, a2
; RV32ZBB-NEXT: add a1, a1, a3
-; RV32ZBB-NEXT: xor a0, a0, a2
; RV32ZBB-NEXT: xor a1, a1, a2
+; RV32ZBB-NEXT: xor a0, a0, a2
; RV32ZBB-NEXT: ret
;
; RV64I-LABEL: abs64:
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/jumptable.ll b/llvm/test/CodeGen/RISCV/GlobalISel/jumptable.ll
index 9dda1a241e042b..018c135cc8626c 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/jumptable.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/jumptable.ll
@@ -15,13 +15,13 @@
define void @above_threshold(i32 signext %in, ptr %out) nounwind {
; RV32I-SMALL-LABEL: above_threshold:
; RV32I-SMALL: # %bb.0: # %entry
-; RV32I-SMALL-NEXT: li a2, 5
; RV32I-SMALL-NEXT: addi a0, a0, -1
+; RV32I-SMALL-NEXT: li a2, 5
; RV32I-SMALL-NEXT: bltu a2, a0, .LBB0_9
; RV32I-SMALL-NEXT: # %bb.1: # %entry
; RV32I-SMALL-NEXT: lui a2, %hi(.LJTI0_0)
-; RV32I-SMALL-NEXT: addi a2, a2, %lo(.LJTI0_0)
; RV32I-SMALL-NEXT: slli a0, a0, 2
+; RV32I-SMALL-NEXT: addi a2, a2, %lo(.LJTI0_0)
; RV32I-SMALL-NEXT: add a0, a2, a0
; RV32I-SMALL-NEXT: lw a0, 0(a0)
; RV32I-SMALL-NEXT: jr a0
@@ -49,14 +49,14 @@ define void @above_threshold(i32 signext %in, ptr %out) nounwind {
;
; RV32I-MEDIUM-LABEL: above_threshold:
; RV32I-MEDIUM: # %bb.0: # %entry
-; RV32I-MEDIUM-NEXT: li a2, 5
; RV32I-MEDIUM-NEXT: addi a0, a0, -1
+; RV32I-MEDIUM-NEXT: li a2, 5
; RV32I-MEDIUM-NEXT: bltu a2, a0, .LBB0_9
; RV32I-MEDIUM-NEXT: # %bb.1: # %entry
; RV32I-MEDIUM-NEXT: .Lpcrel_hi0:
; RV32I-MEDIUM-NEXT: auipc a2, %pcrel_hi(.LJTI0_0)
-; RV32I-MEDIUM-NEXT: addi a2, a2, %pcrel_lo(.Lpcrel_hi0)
; RV32I-MEDIUM-NEXT: slli a0, a0, 2
+; RV32I-MEDIUM-NEXT: addi a2, a2, %pcrel_lo(.Lpcrel_hi0)
; RV32I-MEDIUM-NEXT: add a0, a2, a0
; RV32I-MEDIUM-NEXT: lw a0, 0(a0)
; RV32I-MEDIUM-NEXT: jr a0
@@ -84,14 +84,14 @@ define void @above_threshold(i32 signext %in, ptr %out) nounwind {
;
; RV32I-PIC-LABEL: above_threshold:
; RV32I-PIC: # %bb.0: # %entry
-; RV32I-PIC-NEXT: li a2, 5
; RV32I-PIC-NEXT: addi a0, a0, -1
+; RV32I-PIC-NEXT: li a2, 5
; RV32I-PIC-NEXT: bltu a2, a0, .LBB0_9
; RV32I-PIC-NEXT: # %bb.1: # %entry
; RV32I-PIC-NEXT: .Lpcrel_hi0:
; RV32I-PIC-NEXT: auipc a2, %pcrel_hi(.LJTI0_0)
-; RV32I-PIC-NEXT: addi a2, a2, %pcrel_lo(.Lpcrel_hi0)
; RV32I-PIC-NEXT: slli a0, a0, 2
+; RV32I-PIC-NEXT: addi a2, a2, %pcrel_lo(.Lpcrel_hi0)
; RV32I-PIC-NEXT: add a0, a2, a0
; RV32I-PIC-NEXT: lw a0, 0(a0)
; RV32I-PIC-NEXT: add a0, a0, a2
@@ -120,13 +120,13 @@ define void @above_threshold(i32 signext %in, ptr %out) nounwind {
;
; RV64I-SMALL-LABEL: above_threshold:
; RV64...
[truncated]
|
preames
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
At least for me, the diff here is too large to meaningfully review. I think we need an alternate form of justification that this is profitable. Can you present either some performance numbers or at minimum some kind of statistic that demonstrates value?
For context, I'm not particularly skeptical of the patch - it seems to make sense - the diffs are just way too big to be meaningfully skimmed.
|
How are we evaluating this change? On spills? On impact to dynamic IC? On impact to runtime on real hardware? |
IIUC, PostRA scheduling basically won't impact on spills and dynamic instruction count. I will show some cycles/IPC based on GEM5. |
|
I got some results on different platforms.
The results differ on different platforms so it is really hard to determine a default value. I will make it a target feature and leave the default value to be |
I have no objection making it into a tuning feature. |
f618de6 to
4237baf
Compare
4237baf to
ae40c24
Compare
|
Let me make sure I understand. BottomUp and bi-directional RA scheduling were added last year by @michaelmaitland 9106b58. I don't think we ended up enabling inside SiFive. Have any other targets adopted it yet? |
Not yet. And I just found some issues when enabling postra bidirectional scheduling: #116592 and #116584. |
ae40c24 to
c3841ef
Compare
Let's go with top down by default since that's what other target use. |
The results differ on different platforms so it is really hard to determine a common default value. Tune info for postra scheduling direction is added and CPUs can set their own preferable postra scheduling direction.
c3841ef to
4989eb0
Compare
Done. |
michaelmaitland
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
topperc
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
The results differ on different platforms so it is really hard to
determine a common default value.
Tune info for postra scheduling direction is added and CPUs can
set their own preferable postra scheduling direction.