-
Notifications
You must be signed in to change notification settings - Fork 5.1k
[RISC-V] Improve clamped subtract & increment #118530
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
Tagging subscribers to this area: @JulieLeeMSFT, @jakobbotsch |
For future PRs: these constructs are a good candidate to augment the if-conversion pass (#116581) |
Diffs are based on 172,775 contexts (51,182 MinOpts, 121,593 FullOpts). Overall (-256 bytes)
MinOpts (-8 bytes)
FullOpts (-248 bytes)
Example diffslinux.riscv64.Checked.2.mch-12 (-5.36%) : 38740.dasm - StackallocTests:Test20000_SkipLocalsInit(byref):System.Guid (FullOpts)@@ -41,17 +41,14 @@ G_M62621_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0200 {s1}, byre
jalr a0 // <unknown method>
lui a0, 0xD1FFAB1E
addiw a0, a0, 0xD1FFAB1E
- sltu a1, sp, a0
- sub a0, sp, a0
- beqz a1, pc+8 (2 instructions)
- mv a0, zero
+ maxu a1, sp, a0
+ sub a0, a1, a0
lui a2, 0xD1FFAB1E
mv a1, sp
lw zero, 0xD1FFAB1E(a1)
sub a1, a1, a2
bgeu a1, a0, pc-8 (-2 instructions)
mv sp, a0
- mv a0, sp
lui t6, 0xD1FFAB1E
addiw t6, t6, 0xD1FFAB1E
slli t6, t6, 16
@@ -70,7 +67,7 @@ G_M62621_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0200 {s1}, byre
auipc ra, 0xD1FFAB1E
jalr ra // CORINFO_HELP_FAIL_FAST
; byrRegs -[s1]
- ;; size=140 bbWeight=1 PerfScore 62.50
+ ;; size=128 bbWeight=1 PerfScore 58.00
G_M62621_IG03: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref, epilog, nogc
addi sp, fp, -24
ld s1, 40(sp)
@@ -81,7 +78,7 @@ G_M62621_IG03: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref,
RWD00 dq 9ABCDEF012345678h
-; Total bytes of code 224, prolog size 56, PerfScore 98.00, instruction count 42, allocated bytes for code 224 (MethodHash=43680b62) for method StackallocTests:Test20000_SkipLocalsInit(byref):System.Guid (FullOpts)
+; Total bytes of code 212, prolog size 56, PerfScore 93.50, instruction count 39, allocated bytes for code 212 (MethodHash=43680b62) for method StackallocTests:Test20000_SkipLocalsInit(byref):System.Guid (FullOpts)
; ============================================================
Unwind Info:
@@ -92,7 +89,7 @@ Unwind Info:
E bit : 0
X bit : 0
Vers : 0
- Function Length : 56 (0x00038) Actual length = 224 (0x0000e0)
+ Function Length : 53 (0x00035) Actual length = 212 (0x0000d4)
---- Epilog scopes ----
---- Scope 0
Epilog Start Offset : 3523193630 (0xd1ffab1e) Actual offset = 3523193630 (0xd1ffab1e) Offset from main function begin = 3523193630 (0xd1ffab1e) -8 (-2.35%) : 124769.dasm - T:dirtyStack() (MinOpts)@@ -61,18 +61,16 @@ G_M4777_IG04: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
mv a1, a0
addi a1, a1, 0xD1FFAB1E
andi a1, a1, -16
- sltu a2, sp, a1
- sub a1, sp, a1
- beqz a2, pc+8 (2 instructions)
- mv a1, zero
+ maxu a2, sp, a1
+ sub a1, a2, a1
lui a3, 0xD1FFAB1E
mv a2, sp
lw zero, 0xD1FFAB1E(a2)
sub a2, a2, a3
bgeu a2, a1, pc-8 (-2 instructions)
mv sp, a1
- mv a0, sp
- ;; size=88 bbWeight=1 PerfScore 25.50
+ mv a0, a1
+ ;; size=80 bbWeight=1 PerfScore 21.50
G_M4777_IG05: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
sd a0, -8(fp)
sw zero, -12(fp)
@@ -119,7 +117,7 @@ G_M4777_IG08: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref,
RWD00 dq 9ABCDEF012345678h
-; Total bytes of code 340, prolog size 52, PerfScore 123.00, instruction count 60, allocated bytes for code 340 (MethodHash=759ced56) for method T:dirtyStack() (MinOpts)
+; Total bytes of code 332, prolog size 52, PerfScore 119.00, instruction count 58, allocated bytes for code 332 (MethodHash=759ced56) for method T:dirtyStack() (MinOpts)
; ============================================================
Unwind Info:
@@ -130,7 +128,7 @@ Unwind Info:
E bit : 0
X bit : 0
Vers : 0
- Function Length : 85 (0x00055) Actual length = 340 (0x000154)
+ Function Length : 83 (0x00053) Actual length = 332 (0x00014c)
---- Epilog scopes ----
---- Scope 0
Epilog Start Offset : 3523193630 (0xd1ffab1e) Actual offset = 3523193630 (0xd1ffab1e) Offset from main function begin = 3523193630 (0xd1ffab1e) -24 (-1.46%) : 599.dasm - (dynamicClass):IL_STUB_PInvoke(int,System.String,System.String,System.String):int (FullOpts)@@ -139,18 +139,16 @@ G_M26182_IG05: ; bbWeight=0.50, gcrefRegs=C0200 {s1 s2 s3}, byrefRegs=000
mv a0, a2
addi a0, a0, 0xD1FFAB1E
andi a0, a0, -16
- sltu a1, sp, a0
- sub a0, sp, a0
- beqz a1, pc+8 (2 instructions)
- mv a0, zero
+ maxu a1, sp, a0
+ sub a0, a1, a0
lui a3, 0xD1FFAB1E
mv a1, sp
lw zero, 0xD1FFAB1E(a1)
sub a1, a1, a3
bgeu a1, a0, pc-8 (-2 instructions)
mv sp, a0
- mv a2, sp
- ;; size=116 bbWeight=0.50 PerfScore 19.00
+ mv a2, a0
+ ;; size=108 bbWeight=0.50 PerfScore 17.00
G_M26182_IG06: ; bbWeight=0.50, gcrefRegs=C0200 {s1 s2 s3}, byrefRegs=0000 {}, byref
sd a2, -24(fp)
;; size=4 bbWeight=0.50 PerfScore 2.00
@@ -208,18 +206,16 @@ G_M26182_IG09: ; bbWeight=0.50, gcrefRegs=C0000 {s2 s3}, byrefRegs=0000 {
mv a0, a2
addi a0, a0, 0xD1FFAB1E
andi a0, a0, -16
- sltu a1, sp, a0
- sub a0, sp, a0
- beqz a1, pc+8 (2 instructions)
- mv a0, zero
+ maxu a1, sp, a0
+ sub a0, a1, a0
lui a3, 0xD1FFAB1E
mv a1, sp
lw zero, 0xD1FFAB1E(a1)
sub a1, a1, a3
bgeu a1, a0, pc-8 (-2 instructions)
mv sp, a0
- mv a2, sp
- ;; size=116 bbWeight=0.50 PerfScore 19.00
+ mv a2, a0
+ ;; size=108 bbWeight=0.50 PerfScore 17.00
G_M26182_IG10: ; bbWeight=0.50, gcrefRegs=C0000 {s2 s3}, byrefRegs=0000 {}, byref
sd a2, -40(fp)
;; size=4 bbWeight=0.50 PerfScore 2.00
@@ -277,18 +273,16 @@ G_M26182_IG13: ; bbWeight=0.50, gcrefRegs=80000 {s3}, byrefRegs=0000 {},
mv a0, a2
addi a0, a0, 0xD1FFAB1E
andi a0, a0, -16
- sltu a1, sp, a0
- sub a0, sp, a0
- beqz a1, pc+8 (2 instructions)
- mv a0, zero
+ maxu a1, sp, a0
+ sub a0, a1, a0
lui a3, 0xD1FFAB1E
mv a1, sp
lw zero, 0xD1FFAB1E(a1)
sub a1, a1, a3
bgeu a1, a0, pc-8 (-2 instructions)
mv sp, a0
- mv a2, sp
- ;; size=116 bbWeight=0.50 PerfScore 19.00
+ mv a2, a0
+ ;; size=108 bbWeight=0.50 PerfScore 17.00
G_M26182_IG14: ; bbWeight=0.50, gcrefRegs=80000 {s3}, byrefRegs=0000 {}, byref
sd a2, -56(fp)
;; size=4 bbWeight=0.50 PerfScore 2.00
@@ -556,7 +550,7 @@ G_M26182_IG34: ; bbWeight=0, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref,
RWD00 dq 9ABCDEF012345678h
-; Total bytes of code 1640, prolog size 116, PerfScore 396.75, instruction count 311, allocated bytes for code 1640 (MethodHash=57cb99b9) for method (dynamicClass):IL_STUB_PInvoke(int,System.String,System.String,System.String):int (FullOpts)
+; Total bytes of code 1616, prolog size 116, PerfScore 390.75, instruction count 305, allocated bytes for code 1616 (MethodHash=57cb99b9) for method (dynamicClass):IL_STUB_PInvoke(int,System.String,System.String,System.String):int (FullOpts)
; ============================================================
Unwind Info:
@@ -567,7 +561,7 @@ Unwind Info:
E bit : 0
X bit : 0
Vers : 0
- Function Length : 336 (0x00150) Actual length = 1344 (0x000540)
+ Function Length : 330 (0x0014a) Actual length = 1320 (0x000528)
---- Epilog scopes ----
---- Scope 0
Epilog Start Offset : 3523193630 (0xd1ffab1e) Actual offset = 3523193630 (0xd1ffab1e) Offset from main function begin = 3523193630 (0xd1ffab1e) +0 (0.00%) : 172672.dasm - Microsoft.Diagnostics.Tracing.ZippedETLWriter:GetNGenPdbs(System.String,Microsoft.Diagnostics.Symbols.SymbolReader,System.IO.TextWriter):System.Collections.Generic.List`1[System.String] (FullOpts)No diffs found? +0 (0.00%) : 171872.dasm - Microsoft.Diagnostics.Symbols.SymbolReader:BypassSystem32FileRedirection(System.String):System.String (FullOpts)No diffs found? +0 (0.00%) : 171504.dasm - JIT.HardwareIntrinsics.X86._Avx512DQ.UnaryOpTest__ConvertToVector512Int64DoubletoInt64ToPositiveInfinity+TestStruct:Create():JIT.HardwareIntrinsics.X86._Avx512DQ.UnaryOpTest__ConvertToVector512Int64DoubletoInt64ToPositiveInfinity+TestStruct (MinOpts)No diffs found? DetailsSize improvements/regressions per collection
PerfScore improvements/regressions per collection
Context information
jit-analyze output |
@risc-vv /run |
RISC-V pull_request-CLR-QEMU: 9092 / 9131 (99.57%)
report.xml, report.md, failures.xml, testclr_details.tar.zst RISC-V pull_request-CLR-VF2: 9091 / 9130 (99.57%)
report.xml, report.md, failures.xml, testclr_details.tar.zst RISC-V pull_request-FX-QEMU: 0 / 1 (0.00%)
report.xml, report.md, failures.xml, testclr_details.tar.zst RISC-V pull_request-FX-VF2: 0 / 62 (0.00%)
report.xml, report.md, failures.xml, testclr_details.tar.zst Build information and commandsGIT: |
Improve codegens with hardcoded subtract clamped to 0 (in localloc) and increment clamped to maximum (in inc_saturate).
Part of #84834, cc @dotnet/samsung