Skip to content

Commit b9f05cb

Browse files
author
git apple-llvm automerger
committed
Merge commit '6ba6efea8438' from llvm.org/main into next
2 parents ed84f8d + 6ba6efe commit b9f05cb

File tree

2 files changed

+88
-6
lines changed

2 files changed

+88
-6
lines changed

llvm/lib/Target/RISCV/RISCVInstrInfoZb.td

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -641,13 +641,15 @@ def : Pat<(binop_allhusers<or> (shl GPR:$rs2, (XLenVT 8)),
641641
let Predicates = [HasStdExtZbkb, IsRV32] in {
642642
def : Pat<(i32 (or (zexti16 (i32 GPR:$rs1)), (shl GPR:$rs2, (i32 16)))),
643643
(PACK GPR:$rs1, GPR:$rs2)>;
644-
def : Pat<(or (or
645-
(shl (zexti8 (XLenVT GPR:$op1rs2)), (XLenVT 24)),
644+
645+
// Match a pattern of 2 bytes being inserted into bits [31:16], with bits
646+
// bits [15:0] coming from a zero extended value. We can use pack with packh for
647+
// bits [31:16]. If bits [15:0] can also be a packh, it can be matched
648+
// separately.
649+
def : Pat<(or (or (shl (zexti8 (XLenVT GPR:$op1rs2)), (XLenVT 24)),
646650
(shl (zexti8 (XLenVT GPR:$op1rs1)), (XLenVT 16))),
647-
(or
648-
(shl (zexti8 (XLenVT GPR:$op0rs2)), (XLenVT 8)),
649-
(zexti8 (XLenVT GPR:$op0rs1)))),
650-
(PACK (XLenVT (PACKH GPR:$op0rs1, GPR:$op0rs2)),
651+
(zexti16 (XLenVT GPR:$rs1))),
652+
(PACK (XLenVT GPR:$rs1),
651653
(XLenVT (PACKH GPR:$op1rs1, GPR:$op1rs2)))>;
652654
}
653655

llvm/test/CodeGen/RISCV/rv32zbkb.ll

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -319,3 +319,83 @@ define i64 @zext_i16_to_i64(i16 %a) nounwind {
319319
%1 = zext i16 %a to i64
320320
ret i64 %1
321321
}
322+
323+
define i32 @pack_lo_packh_hi_packh(i8 zeroext %0, i8 zeroext %1, i8 zeroext %2, i8 zeroext %3) nounwind {
324+
; RV32I-LABEL: pack_lo_packh_hi_packh:
325+
; RV32I: # %bb.0:
326+
; RV32I-NEXT: slli a1, a1, 8
327+
; RV32I-NEXT: slli a2, a2, 16
328+
; RV32I-NEXT: slli a3, a3, 24
329+
; RV32I-NEXT: or a0, a0, a1
330+
; RV32I-NEXT: or a2, a2, a3
331+
; RV32I-NEXT: or a0, a0, a2
332+
; RV32I-NEXT: ret
333+
;
334+
; RV32ZBKB-LABEL: pack_lo_packh_hi_packh:
335+
; RV32ZBKB: # %bb.0:
336+
; RV32ZBKB-NEXT: packh a0, a0, a1
337+
; RV32ZBKB-NEXT: packh a1, a2, a3
338+
; RV32ZBKB-NEXT: pack a0, a0, a1
339+
; RV32ZBKB-NEXT: ret
340+
%a = zext i8 %0 to i32
341+
%b = zext i8 %1 to i32
342+
%c = zext i8 %2 to i32
343+
%d = zext i8 %3 to i32
344+
%e = shl i32 %b, 8
345+
%f = shl i32 %c, 16
346+
%g = shl i32 %d, 24
347+
%h = or i32 %a, %e
348+
%i = or i32 %h, %f
349+
%j = or i32 %i, %g
350+
ret i32 %j
351+
}
352+
353+
define i32 @pack_lo_zext_hi_packh(i16 zeroext %0, i8 zeroext %1, i8 zeroext %2) nounwind {
354+
; RV32I-LABEL: pack_lo_zext_hi_packh:
355+
; RV32I: # %bb.0:
356+
; RV32I-NEXT: slli a1, a2, 16
357+
; RV32I-NEXT: slli a2, a2, 24
358+
; RV32I-NEXT: or a1, a2, a1
359+
; RV32I-NEXT: or a0, a1, a0
360+
; RV32I-NEXT: ret
361+
;
362+
; RV32ZBKB-LABEL: pack_lo_zext_hi_packh:
363+
; RV32ZBKB: # %bb.0:
364+
; RV32ZBKB-NEXT: packh a1, a2, a2
365+
; RV32ZBKB-NEXT: pack a0, a0, a1
366+
; RV32ZBKB-NEXT: ret
367+
%a = zext i16 %0 to i32
368+
%b = zext i8 %1 to i32
369+
%c = zext i8 %2 to i32
370+
%d = shl i32 %c, 8
371+
%e = or i32 %c, %d
372+
%f = shl i32 %e, 16
373+
%g = or i32 %f, %a
374+
ret i32 %g
375+
}
376+
377+
; Negative test, %a isn't extended so we can't use pack for the outer or, but
378+
; we can use packh for the high half.
379+
define i32 @pack_lo_noext_hi_packh(i32 %a, i8 zeroext %1, i8 zeroext %2) nounwind {
380+
; RV32I-LABEL: pack_lo_noext_hi_packh:
381+
; RV32I: # %bb.0:
382+
; RV32I-NEXT: slli a1, a2, 16
383+
; RV32I-NEXT: slli a2, a2, 24
384+
; RV32I-NEXT: or a1, a2, a1
385+
; RV32I-NEXT: or a0, a1, a0
386+
; RV32I-NEXT: ret
387+
;
388+
; RV32ZBKB-LABEL: pack_lo_noext_hi_packh:
389+
; RV32ZBKB: # %bb.0:
390+
; RV32ZBKB-NEXT: packh a1, a2, a2
391+
; RV32ZBKB-NEXT: slli a1, a1, 16
392+
; RV32ZBKB-NEXT: or a0, a1, a0
393+
; RV32ZBKB-NEXT: ret
394+
%b = zext i8 %1 to i32
395+
%c = zext i8 %2 to i32
396+
%d = shl i32 %c, 8
397+
%e = or i32 %c, %d
398+
%f = shl i32 %e, 16
399+
%g = or i32 %f, %a
400+
ret i32 %g
401+
}

0 commit comments

Comments
 (0)