Skip to content

Commit f55281a

Browse files
authored
[RISCV] Add a high half PACKW+PACK pattern for RV64. (#152760)
Similar to the PACKH+PACK pattern for RV32. We can end up with the shift left by 32 neeed by our PACK pattern hidden behind an OR that packs 2 half words.
1 parent 9da4d74 commit f55281a

File tree

2 files changed

+75
-2
lines changed

2 files changed

+75
-2
lines changed

llvm/lib/Target/RISCV/RISCVInstrInfoZb.td

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -697,6 +697,16 @@ def : Pat<(i64 (or (or (zexti16 (XLenVT GPR:$rs1)),
697697
(shl (zexti8 (XLenVT GPR:$op1rs2)), (XLenVT 16))),
698698
(sext_inreg (shl GPR:$op1rs1, (XLenVT 24)), i32))),
699699
(PACKW GPR:$rs1, (XLenVT (PACKH GPR:$op1rs1, GPR:$op1rs2)))>;
700+
701+
// Match a pattern of 2 halfwords being inserted into bits [63:32], with bits
702+
// bits [31:0] coming from a zero extended value. We can use pack with packw for
703+
// bits [63:32]. If bits [63:31] can also be a packw, it can be matched
704+
// separately.
705+
def : Pat<(or (or (shl GPR:$op1rs2, (i64 48)),
706+
(shl (zexti16 (i64 GPR:$op1rs1)), (i64 32))),
707+
(zexti32 (i64 GPR:$rs1))),
708+
(PACK (XLenVT GPR:$rs1),
709+
(XLenVT (PACKW GPR:$op1rs1, GPR:$op1rs2)))>;
700710
} // Predicates = [HasStdExtZbkb, IsRV64]
701711

702712
let Predicates = [HasStdExtZbb, IsRV32] in

llvm/test/CodeGen/RISCV/unaligned-load-store.ll

Lines changed: 65 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,69 @@ define i64 @load_i64(ptr %p) {
232232
ret i64 %res
233233
}
234234

235+
define i64 @load_i64_align2(ptr %p) {
236+
; RV32I-LABEL: load_i64_align2:
237+
; RV32I: # %bb.0:
238+
; RV32I-NEXT: lhu a1, 2(a0)
239+
; RV32I-NEXT: lhu a2, 0(a0)
240+
; RV32I-NEXT: lhu a3, 6(a0)
241+
; RV32I-NEXT: lhu a4, 4(a0)
242+
; RV32I-NEXT: slli a0, a1, 16
243+
; RV32I-NEXT: or a0, a0, a2
244+
; RV32I-NEXT: slli a1, a3, 16
245+
; RV32I-NEXT: or a1, a1, a4
246+
; RV32I-NEXT: ret
247+
;
248+
; RV64I-LABEL: load_i64_align2:
249+
; RV64I: # %bb.0:
250+
; RV64I-NEXT: lhu a1, 2(a0)
251+
; RV64I-NEXT: lhu a2, 0(a0)
252+
; RV64I-NEXT: lhu a3, 4(a0)
253+
; RV64I-NEXT: lhu a0, 6(a0)
254+
; RV64I-NEXT: slli a1, a1, 16
255+
; RV64I-NEXT: or a1, a1, a2
256+
; RV64I-NEXT: slli a3, a3, 32
257+
; RV64I-NEXT: slli a0, a0, 48
258+
; RV64I-NEXT: or a0, a0, a3
259+
; RV64I-NEXT: or a0, a0, a1
260+
; RV64I-NEXT: ret
261+
;
262+
; RV32IZBKB-LABEL: load_i64_align2:
263+
; RV32IZBKB: # %bb.0:
264+
; RV32IZBKB-NEXT: lhu a1, 0(a0)
265+
; RV32IZBKB-NEXT: lhu a2, 2(a0)
266+
; RV32IZBKB-NEXT: lhu a3, 4(a0)
267+
; RV32IZBKB-NEXT: lhu a4, 6(a0)
268+
; RV32IZBKB-NEXT: pack a0, a1, a2
269+
; RV32IZBKB-NEXT: pack a1, a3, a4
270+
; RV32IZBKB-NEXT: ret
271+
;
272+
; RV64IZBKB-LABEL: load_i64_align2:
273+
; RV64IZBKB: # %bb.0:
274+
; RV64IZBKB-NEXT: lhu a1, 2(a0)
275+
; RV64IZBKB-NEXT: lhu a2, 4(a0)
276+
; RV64IZBKB-NEXT: lhu a3, 6(a0)
277+
; RV64IZBKB-NEXT: lhu a0, 0(a0)
278+
; RV64IZBKB-NEXT: packw a2, a2, a3
279+
; RV64IZBKB-NEXT: packw a0, a0, a1
280+
; RV64IZBKB-NEXT: pack a0, a0, a2
281+
; RV64IZBKB-NEXT: ret
282+
;
283+
; RV32I-FAST-LABEL: load_i64_align2:
284+
; RV32I-FAST: # %bb.0:
285+
; RV32I-FAST-NEXT: lw a2, 0(a0)
286+
; RV32I-FAST-NEXT: lw a1, 4(a0)
287+
; RV32I-FAST-NEXT: mv a0, a2
288+
; RV32I-FAST-NEXT: ret
289+
;
290+
; RV64I-FAST-LABEL: load_i64_align2:
291+
; RV64I-FAST: # %bb.0:
292+
; RV64I-FAST-NEXT: ld a0, 0(a0)
293+
; RV64I-FAST-NEXT: ret
294+
%res = load i64, ptr %p, align 2
295+
ret i64 %res
296+
}
297+
235298
define void @store_i8(ptr %p, i8 %v) {
236299
; ALL-LABEL: store_i8:
237300
; ALL: # %bb.0:
@@ -569,8 +632,8 @@ define void @store_large_constant(ptr %x) {
569632
;
570633
; RV64I-FAST-LABEL: store_large_constant:
571634
; RV64I-FAST: # %bb.0:
572-
; RV64I-FAST-NEXT: lui a1, %hi(.LCPI16_0)
573-
; RV64I-FAST-NEXT: ld a1, %lo(.LCPI16_0)(a1)
635+
; RV64I-FAST-NEXT: lui a1, %hi(.LCPI17_0)
636+
; RV64I-FAST-NEXT: ld a1, %lo(.LCPI17_0)(a1)
574637
; RV64I-FAST-NEXT: sd a1, 0(a0)
575638
; RV64I-FAST-NEXT: ret
576639
store i64 18364758544493064720, ptr %x, align 1

0 commit comments

Comments
 (0)