From c1f7df12dfb77c3af70505647ce005602d2bc03b Mon Sep 17 00:00:00 2001 From: Alex Bradbury Date: Mon, 30 Sep 2024 14:42:13 +0100 Subject: [PATCH] [RISCV] Add pattern for PACK/PACKH in common misaligned load case PACKH is currently only selected for assembling the first two bytes of a misligned load. A fairly complex RV32-only pattern is added for producing PACKH+PACKH+PACK to assemble the result of a misaligned 32-bit load. Another pattern was added that just covers PACKH for shifted offsets 16 and 24, producing a packh and shift to replace two shifts and an 'or'. This slightly improves RV64IZKBK for a 64-bit load, but fails to match for the misaligned 32-bit load because the load of the upper byte is anyext in the SelectionDAG. --- llvm/lib/Target/RISCV/RISCVInstrInfoZb.td | 13 +++- .../CodeGen/RISCV/unaligned-load-store.ll | 75 ++++++++++--------- 2 files changed, 53 insertions(+), 35 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td index 8e6f281027695..3eb1fc6869403 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td @@ -599,15 +599,26 @@ def : Pat<(or (shl (zexti8 (XLenVT GPR:$rs2)), (XLenVT 8)), def : Pat<(and (or (shl GPR:$rs2, (XLenVT 8)), (zexti8 (XLenVT GPR:$rs1))), 0xFFFF), (PACKH GPR:$rs1, GPR:$rs2)>; +def : Pat<(or (shl (zexti8 (XLenVT GPR:$rs2)), (XLenVT 24)), + (shl (zexti8 (XLenVT GPR:$rs1)), (XLenVT 16))), + (SLLI (PACKH GPR:$rs1, GPR:$rs2), (XLenVT 16))>; def : Pat<(binop_allhusers (shl GPR:$rs2, (XLenVT 8)), (zexti8 (XLenVT GPR:$rs1))), (PACKH GPR:$rs1, GPR:$rs2)>; } // Predicates = [HasStdExtZbkb] -let Predicates = [HasStdExtZbkb, IsRV32] in +let Predicates = [HasStdExtZbkb, IsRV32] in { def : Pat<(i32 (or (zexti16 (i32 GPR:$rs1)), (shl GPR:$rs2, (i32 16)))), (PACK GPR:$rs1, GPR:$rs2)>; +def : Pat<(or (or + (shl (zexti8 (XLenVT GPR:$op1rs2)), (XLenVT 24)), + (shl (zexti8 (XLenVT GPR:$op1rs1)), (XLenVT 16))), + (or + (shl (zexti8 (XLenVT GPR:$op0rs2)), (XLenVT 8)), + (zexti8 (XLenVT GPR:$op0rs1)))), + (PACK (PACKH GPR:$op0rs1, GPR:$op0rs2), (PACKH GPR:$op1rs1, GPR:$op1rs2))>; +} let Predicates = [HasStdExtZbkb, IsRV64] in { def : Pat<(i64 (or (zexti32 (i64 GPR:$rs1)), (shl GPR:$rs2, (i64 32)))), diff --git a/llvm/test/CodeGen/RISCV/unaligned-load-store.ll b/llvm/test/CodeGen/RISCV/unaligned-load-store.ll index 74d34b2b64d41..0ca9fc065ff2d 100644 --- a/llvm/test/CodeGen/RISCV/unaligned-load-store.ll +++ b/llvm/test/CodeGen/RISCV/unaligned-load-store.ll @@ -97,18 +97,29 @@ define i32 @load_i32(ptr %p) { ; SLOWBASE-NEXT: or a0, a0, a1 ; SLOWBASE-NEXT: ret ; -; SLOWZBKB-LABEL: load_i32: -; SLOWZBKB: # %bb.0: -; SLOWZBKB-NEXT: lbu a1, 0(a0) -; SLOWZBKB-NEXT: lbu a2, 1(a0) -; SLOWZBKB-NEXT: lbu a3, 2(a0) -; SLOWZBKB-NEXT: lbu a0, 3(a0) -; SLOWZBKB-NEXT: packh a1, a1, a2 -; SLOWZBKB-NEXT: slli a3, a3, 16 -; SLOWZBKB-NEXT: slli a0, a0, 24 -; SLOWZBKB-NEXT: or a0, a0, a3 -; SLOWZBKB-NEXT: or a0, a0, a1 -; SLOWZBKB-NEXT: ret +; RV32IZBKB-LABEL: load_i32: +; RV32IZBKB: # %bb.0: +; RV32IZBKB-NEXT: lbu a1, 1(a0) +; RV32IZBKB-NEXT: lbu a2, 2(a0) +; RV32IZBKB-NEXT: lbu a3, 3(a0) +; RV32IZBKB-NEXT: lbu a0, 0(a0) +; RV32IZBKB-NEXT: packh a2, a2, a3 +; RV32IZBKB-NEXT: packh a0, a0, a1 +; RV32IZBKB-NEXT: pack a0, a0, a2 +; RV32IZBKB-NEXT: ret +; +; RV64IZBKB-LABEL: load_i32: +; RV64IZBKB: # %bb.0: +; RV64IZBKB-NEXT: lbu a1, 0(a0) +; RV64IZBKB-NEXT: lbu a2, 1(a0) +; RV64IZBKB-NEXT: lbu a3, 2(a0) +; RV64IZBKB-NEXT: lbu a0, 3(a0) +; RV64IZBKB-NEXT: packh a1, a1, a2 +; RV64IZBKB-NEXT: slli a3, a3, 16 +; RV64IZBKB-NEXT: slli a0, a0, 24 +; RV64IZBKB-NEXT: or a0, a0, a3 +; RV64IZBKB-NEXT: or a0, a0, a1 +; RV64IZBKB-NEXT: ret ; ; FAST-LABEL: load_i32: ; FAST: # %bb.0: @@ -176,20 +187,16 @@ define i64 @load_i64(ptr %p) { ; RV32IZBKB-NEXT: lbu a2, 1(a0) ; RV32IZBKB-NEXT: lbu a3, 2(a0) ; RV32IZBKB-NEXT: lbu a4, 3(a0) +; RV32IZBKB-NEXT: lbu a5, 4(a0) +; RV32IZBKB-NEXT: lbu a6, 5(a0) +; RV32IZBKB-NEXT: lbu a7, 6(a0) +; RV32IZBKB-NEXT: lbu t0, 7(a0) +; RV32IZBKB-NEXT: packh a0, a3, a4 ; RV32IZBKB-NEXT: packh a1, a1, a2 -; RV32IZBKB-NEXT: slli a3, a3, 16 -; RV32IZBKB-NEXT: slli a4, a4, 24 -; RV32IZBKB-NEXT: or a3, a4, a3 -; RV32IZBKB-NEXT: lbu a2, 4(a0) -; RV32IZBKB-NEXT: lbu a4, 5(a0) -; RV32IZBKB-NEXT: lbu a5, 6(a0) -; RV32IZBKB-NEXT: lbu a6, 7(a0) -; RV32IZBKB-NEXT: or a0, a3, a1 -; RV32IZBKB-NEXT: packh a1, a2, a4 -; RV32IZBKB-NEXT: slli a5, a5, 16 -; RV32IZBKB-NEXT: slli a6, a6, 24 -; RV32IZBKB-NEXT: or a2, a6, a5 -; RV32IZBKB-NEXT: or a1, a2, a1 +; RV32IZBKB-NEXT: pack a0, a1, a0 +; RV32IZBKB-NEXT: packh a1, a7, t0 +; RV32IZBKB-NEXT: packh a2, a5, a6 +; RV32IZBKB-NEXT: pack a1, a2, a1 ; RV32IZBKB-NEXT: ret ; ; RV64IZBKB-LABEL: load_i64: @@ -199,18 +206,16 @@ define i64 @load_i64(ptr %p) { ; RV64IZBKB-NEXT: lbu a3, 6(a0) ; RV64IZBKB-NEXT: lbu a4, 7(a0) ; RV64IZBKB-NEXT: packh a1, a1, a2 -; RV64IZBKB-NEXT: slli a3, a3, 16 -; RV64IZBKB-NEXT: slli a4, a4, 24 -; RV64IZBKB-NEXT: or a3, a4, a3 -; RV64IZBKB-NEXT: lbu a2, 0(a0) +; RV64IZBKB-NEXT: packh a2, a3, a4 +; RV64IZBKB-NEXT: lbu a3, 0(a0) ; RV64IZBKB-NEXT: lbu a4, 1(a0) ; RV64IZBKB-NEXT: lbu a5, 2(a0) ; RV64IZBKB-NEXT: lbu a0, 3(a0) -; RV64IZBKB-NEXT: or a1, a3, a1 -; RV64IZBKB-NEXT: packh a2, a2, a4 -; RV64IZBKB-NEXT: slli a5, a5, 16 -; RV64IZBKB-NEXT: slli a0, a0, 24 -; RV64IZBKB-NEXT: or a0, a0, a5 +; RV64IZBKB-NEXT: slli a2, a2, 16 +; RV64IZBKB-NEXT: or a1, a2, a1 +; RV64IZBKB-NEXT: packh a2, a3, a4 +; RV64IZBKB-NEXT: packh a0, a5, a0 +; RV64IZBKB-NEXT: slli a0, a0, 16 ; RV64IZBKB-NEXT: or a0, a0, a2 ; RV64IZBKB-NEXT: pack a0, a0, a1 ; RV64IZBKB-NEXT: ret @@ -574,3 +579,5 @@ define void @store_large_constant(ptr %x) { store i64 18364758544493064720, ptr %x, align 1 ret void } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; SLOWZBKB: {{.*}}