Skip to content

Commit 5768e9d

Browse files
topperckrishna2803
authored andcommitted
[RISCV] Simplify one of the RV32 PACK isel patterns. (llvm#152045)
This pattern previously checked a specific variant of 4 bytes being packed that is generated by unaligned load expansion. Our individual PACK patterns don't handle that particular case because a DAG combine turns (or (or A, (shl B, 8)), (shl (or C, (shl D, 8)), 16)) into (or (or A, (shl B, 8)), (or (shl C, 16), (shl D, 24)). After this, the outer OR doesn't have a shl operand so we needed a pattern that looks through 2 layers of OR. To match this pattern we don't need to look at the (or A, (shl B, 8)) part since that part wasn't affected by the DAG combine and can be matched to PACKH by itself. It's enough to make sure that part of the pattern has zeros in the upper 16 bits. This allows tablegen to automatically generate more permutations of this pattern. The associative variant expansion is limited to 3 children.
1 parent a8eed81 commit 5768e9d

File tree

2 files changed

+88
-6
lines changed

2 files changed

+88
-6
lines changed

llvm/lib/Target/RISCV/RISCVInstrInfoZb.td

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -641,13 +641,15 @@ def : Pat<(binop_allhusers<or> (shl GPR:$rs2, (XLenVT 8)),
641641
let Predicates = [HasStdExtZbkb, IsRV32] in {
642642
def : Pat<(i32 (or (zexti16 (i32 GPR:$rs1)), (shl GPR:$rs2, (i32 16)))),
643643
(PACK GPR:$rs1, GPR:$rs2)>;
644-
def : Pat<(or (or
645-
(shl (zexti8 (XLenVT GPR:$op1rs2)), (XLenVT 24)),
644+
645+
// Match a pattern of 2 bytes being inserted into bits [31:16], with bits
646+
// bits [15:0] coming from a zero extended value. We can use pack with packh for
647+
// bits [31:16]. If bits [15:0] can also be a packh, it can be matched
648+
// separately.
649+
def : Pat<(or (or (shl (zexti8 (XLenVT GPR:$op1rs2)), (XLenVT 24)),
646650
(shl (zexti8 (XLenVT GPR:$op1rs1)), (XLenVT 16))),
647-
(or
648-
(shl (zexti8 (XLenVT GPR:$op0rs2)), (XLenVT 8)),
649-
(zexti8 (XLenVT GPR:$op0rs1)))),
650-
(PACK (XLenVT (PACKH GPR:$op0rs1, GPR:$op0rs2)),
651+
(zexti16 (XLenVT GPR:$rs1))),
652+
(PACK (XLenVT GPR:$rs1),
651653
(XLenVT (PACKH GPR:$op1rs1, GPR:$op1rs2)))>;
652654
}
653655

llvm/test/CodeGen/RISCV/rv32zbkb.ll

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -319,3 +319,83 @@ define i64 @zext_i16_to_i64(i16 %a) nounwind {
319319
%1 = zext i16 %a to i64
320320
ret i64 %1
321321
}
322+
323+
define i32 @pack_lo_packh_hi_packh(i8 zeroext %0, i8 zeroext %1, i8 zeroext %2, i8 zeroext %3) nounwind {
324+
; RV32I-LABEL: pack_lo_packh_hi_packh:
325+
; RV32I: # %bb.0:
326+
; RV32I-NEXT: slli a1, a1, 8
327+
; RV32I-NEXT: slli a2, a2, 16
328+
; RV32I-NEXT: slli a3, a3, 24
329+
; RV32I-NEXT: or a0, a0, a1
330+
; RV32I-NEXT: or a2, a2, a3
331+
; RV32I-NEXT: or a0, a0, a2
332+
; RV32I-NEXT: ret
333+
;
334+
; RV32ZBKB-LABEL: pack_lo_packh_hi_packh:
335+
; RV32ZBKB: # %bb.0:
336+
; RV32ZBKB-NEXT: packh a0, a0, a1
337+
; RV32ZBKB-NEXT: packh a1, a2, a3
338+
; RV32ZBKB-NEXT: pack a0, a0, a1
339+
; RV32ZBKB-NEXT: ret
340+
%a = zext i8 %0 to i32
341+
%b = zext i8 %1 to i32
342+
%c = zext i8 %2 to i32
343+
%d = zext i8 %3 to i32
344+
%e = shl i32 %b, 8
345+
%f = shl i32 %c, 16
346+
%g = shl i32 %d, 24
347+
%h = or i32 %a, %e
348+
%i = or i32 %h, %f
349+
%j = or i32 %i, %g
350+
ret i32 %j
351+
}
352+
353+
define i32 @pack_lo_zext_hi_packh(i16 zeroext %0, i8 zeroext %1, i8 zeroext %2) nounwind {
354+
; RV32I-LABEL: pack_lo_zext_hi_packh:
355+
; RV32I: # %bb.0:
356+
; RV32I-NEXT: slli a1, a2, 16
357+
; RV32I-NEXT: slli a2, a2, 24
358+
; RV32I-NEXT: or a1, a2, a1
359+
; RV32I-NEXT: or a0, a1, a0
360+
; RV32I-NEXT: ret
361+
;
362+
; RV32ZBKB-LABEL: pack_lo_zext_hi_packh:
363+
; RV32ZBKB: # %bb.0:
364+
; RV32ZBKB-NEXT: packh a1, a2, a2
365+
; RV32ZBKB-NEXT: pack a0, a0, a1
366+
; RV32ZBKB-NEXT: ret
367+
%a = zext i16 %0 to i32
368+
%b = zext i8 %1 to i32
369+
%c = zext i8 %2 to i32
370+
%d = shl i32 %c, 8
371+
%e = or i32 %c, %d
372+
%f = shl i32 %e, 16
373+
%g = or i32 %f, %a
374+
ret i32 %g
375+
}
376+
377+
; Negative test, %a isn't extended so we can't use pack for the outer or, but
378+
; we can use packh for the high half.
379+
define i32 @pack_lo_noext_hi_packh(i32 %a, i8 zeroext %1, i8 zeroext %2) nounwind {
380+
; RV32I-LABEL: pack_lo_noext_hi_packh:
381+
; RV32I: # %bb.0:
382+
; RV32I-NEXT: slli a1, a2, 16
383+
; RV32I-NEXT: slli a2, a2, 24
384+
; RV32I-NEXT: or a1, a2, a1
385+
; RV32I-NEXT: or a0, a1, a0
386+
; RV32I-NEXT: ret
387+
;
388+
; RV32ZBKB-LABEL: pack_lo_noext_hi_packh:
389+
; RV32ZBKB: # %bb.0:
390+
; RV32ZBKB-NEXT: packh a1, a2, a2
391+
; RV32ZBKB-NEXT: slli a1, a1, 16
392+
; RV32ZBKB-NEXT: or a0, a1, a0
393+
; RV32ZBKB-NEXT: ret
394+
%b = zext i8 %1 to i32
395+
%c = zext i8 %2 to i32
396+
%d = shl i32 %c, 8
397+
%e = or i32 %c, %d
398+
%f = shl i32 %e, 16
399+
%g = or i32 %f, %a
400+
ret i32 %g
401+
}

0 commit comments

Comments
 (0)