Skip to content

Commit 886b213

Browse files
authored
[RISCV] Relax one of the zexti8 in the PACKH+PACK(W)/SLLI patterns. (#152384)
For RV32 we don't need the byte shifted by 24 to be zero extend since the extended bits are shifted out. For RV64, we don't need the byte shifted by 24 to be zero extended if the upper 32 bits of the result aren't demanded.
1 parent 3d1c1a5 commit 886b213

File tree

3 files changed

+178
-19
lines changed

3 files changed

+178
-19
lines changed

llvm/lib/Target/RISCV/RISCVInstrInfoZb.td

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -629,9 +629,6 @@ def : Pat<(or (shl (zexti8 (XLenVT GPR:$rs2)), (XLenVT 8)),
629629
def : Pat<(and (or (shl GPR:$rs2, (XLenVT 8)),
630630
(zexti8 (XLenVT GPR:$rs1))), 0xFFFF),
631631
(PACKH GPR:$rs1, GPR:$rs2)>;
632-
def : Pat<(or (shl (zexti8 (XLenVT GPR:$rs2)), (XLenVT 24)),
633-
(shl (zexti8 (XLenVT GPR:$rs1)), (XLenVT 16))),
634-
(SLLI (XLenVT (PACKH GPR:$rs1, GPR:$rs2)), (XLenVT 16))>;
635632

636633
def : Pat<(binop_allhusers<or> (shl GPR:$rs2, (XLenVT 8)),
637634
(zexti8 (XLenVT GPR:$rs1))),
@@ -642,11 +639,15 @@ let Predicates = [HasStdExtZbkb, IsRV32] in {
642639
def : Pat<(i32 (or (zexti16 (i32 GPR:$rs1)), (shl GPR:$rs2, (i32 16)))),
643640
(PACK GPR:$rs1, GPR:$rs2)>;
644641

642+
def : Pat<(or (shl GPR:$rs2, (XLenVT 24)),
643+
(shl (zexti8 (XLenVT GPR:$rs1)), (XLenVT 16))),
644+
(SLLI (XLenVT (PACKH GPR:$rs1, GPR:$rs2)), (XLenVT 16))>;
645+
645646
// Match a pattern of 2 bytes being inserted into bits [31:16], with bits
646647
// bits [15:0] coming from a zero extended value. We can use pack with packh for
647648
// bits [31:16]. If bits [15:0] can also be a packh, it can be matched
648649
// separately.
649-
def : Pat<(or (or (shl (zexti8 (XLenVT GPR:$op1rs2)), (XLenVT 24)),
650+
def : Pat<(or (or (shl GPR:$op1rs2, (XLenVT 24)),
650651
(shl (zexti8 (XLenVT GPR:$op1rs1)), (XLenVT 16))),
651652
(zexti16 (XLenVT GPR:$rs1))),
652653
(PACK (XLenVT GPR:$rs1),
@@ -657,6 +658,13 @@ let Predicates = [HasStdExtZbkb, IsRV64] in {
657658
def : Pat<(i64 (or (zexti32 (i64 GPR:$rs1)), (shl GPR:$rs2, (i64 32)))),
658659
(PACK GPR:$rs1, GPR:$rs2)>;
659660

661+
def : Pat<(or (shl (zexti8 (XLenVT GPR:$rs2)), (XLenVT 24)),
662+
(shl (zexti8 (XLenVT GPR:$rs1)), (XLenVT 16))),
663+
(SLLI (XLenVT (PACKH GPR:$rs1, GPR:$rs2)), (XLenVT 16))>;
664+
def : Pat<(binop_allwusers<or> (shl GPR:$rs2, (XLenVT 24)),
665+
(shl (zexti8 (XLenVT GPR:$rs1)), (XLenVT 16))),
666+
(SLLI (XLenVT (PACKH GPR:$rs1, GPR:$rs2)), (XLenVT 16))>;
667+
660668
def : Pat<(binop_allwusers<or> (shl GPR:$rs2, (i64 16)),
661669
(zexti16 (i64 GPR:$rs1))),
662670
(PACKW GPR:$rs1, GPR:$rs2)>;
@@ -669,19 +677,19 @@ def : Pat<(i64 (or (sext_inreg (shl GPR:$rs2, (i64 16)), i32),
669677
// ignored. We can use packw with packh for bits [31:16]. If bits [15:0] can
670678
// also be a packh, it can be matched separately.
671679
def : Pat<(binop_allwusers<or>
672-
(or (shl (zexti8 (XLenVT GPR:$op1rs2)), (XLenVT 24)),
680+
(or (shl GPR:$op1rs2, (XLenVT 24)),
673681
(shl (zexti8 (XLenVT GPR:$op1rs1)), (XLenVT 16))),
674682
(zexti16 (XLenVT GPR:$rs1))),
675683
(PACKW GPR:$rs1, (XLenVT (PACKH GPR:$op1rs1, GPR:$op1rs2)))>;
676684
// We need to manually reassociate the patterns because of the binop_allwusers.
677685
def : Pat<(binop_allwusers<or>
678686
(or (zexti16 (XLenVT GPR:$rs1)),
679687
(shl (zexti8 (XLenVT GPR:$op1rs1)), (XLenVT 16))),
680-
(shl (zexti8 (XLenVT GPR:$op1rs2)), (XLenVT 24))),
688+
(shl GPR:$op1rs2, (XLenVT 24))),
681689
(PACKW GPR:$rs1, (XLenVT (PACKH GPR:$op1rs1, GPR:$op1rs2)))>;
682690
def : Pat<(binop_allwusers<or>
683691
(or (zexti16 (XLenVT GPR:$rs1)),
684-
(shl (zexti8 (XLenVT GPR:$op1rs1)), (XLenVT 24))),
692+
(shl GPR:$op1rs1, (XLenVT 24))),
685693
(shl (zexti8 (XLenVT GPR:$op1rs2)), (XLenVT 16))),
686694
(PACKW GPR:$rs1, (XLenVT (PACKH GPR:$op1rs1, GPR:$op1rs2)))>;
687695
} // Predicates = [HasStdExtZbkb, IsRV64]

llvm/test/CodeGen/RISCV/rv32zbkb.ll

Lines changed: 65 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -350,25 +350,58 @@ define i32 @pack_lo_packh_hi_packh(i8 zeroext %0, i8 zeroext %1, i8 zeroext %2,
350350
ret i32 %j
351351
}
352352

353+
define i32 @pack_lo_packh_hi_packh_2(i8 %0, i8 %1, i8 %2, i8 %3) nounwind {
354+
; RV32I-LABEL: pack_lo_packh_hi_packh_2:
355+
; RV32I: # %bb.0:
356+
; RV32I-NEXT: zext.b a0, a0
357+
; RV32I-NEXT: zext.b a1, a1
358+
; RV32I-NEXT: zext.b a2, a2
359+
; RV32I-NEXT: slli a3, a3, 24
360+
; RV32I-NEXT: slli a1, a1, 8
361+
; RV32I-NEXT: slli a2, a2, 16
362+
; RV32I-NEXT: or a0, a0, a1
363+
; RV32I-NEXT: or a2, a2, a3
364+
; RV32I-NEXT: or a0, a0, a2
365+
; RV32I-NEXT: ret
366+
;
367+
; RV32ZBKB-LABEL: pack_lo_packh_hi_packh_2:
368+
; RV32ZBKB: # %bb.0:
369+
; RV32ZBKB-NEXT: packh a0, a0, a1
370+
; RV32ZBKB-NEXT: packh a1, a2, a3
371+
; RV32ZBKB-NEXT: pack a0, a0, a1
372+
; RV32ZBKB-NEXT: ret
373+
%a = zext i8 %0 to i32
374+
%b = zext i8 %1 to i32
375+
%c = zext i8 %2 to i32
376+
%d = zext i8 %3 to i32
377+
%e = shl i32 %b, 8
378+
%f = shl i32 %c, 16
379+
%g = shl i32 %d, 24
380+
%h = or i32 %a, %e
381+
%i = or i32 %h, %f
382+
%j = or i32 %i, %g
383+
ret i32 %j
384+
}
385+
353386
define i32 @pack_lo_zext_hi_packh(i16 zeroext %0, i8 zeroext %1, i8 zeroext %2) nounwind {
354387
; RV32I-LABEL: pack_lo_zext_hi_packh:
355388
; RV32I: # %bb.0:
356-
; RV32I-NEXT: slli a1, a2, 16
389+
; RV32I-NEXT: slli a1, a1, 16
357390
; RV32I-NEXT: slli a2, a2, 24
358391
; RV32I-NEXT: or a1, a2, a1
359392
; RV32I-NEXT: or a0, a1, a0
360393
; RV32I-NEXT: ret
361394
;
362395
; RV32ZBKB-LABEL: pack_lo_zext_hi_packh:
363396
; RV32ZBKB: # %bb.0:
364-
; RV32ZBKB-NEXT: packh a1, a2, a2
397+
; RV32ZBKB-NEXT: packh a1, a1, a2
365398
; RV32ZBKB-NEXT: pack a0, a0, a1
366399
; RV32ZBKB-NEXT: ret
367400
%a = zext i16 %0 to i32
368401
%b = zext i8 %1 to i32
369402
%c = zext i8 %2 to i32
370403
%d = shl i32 %c, 8
371-
%e = or i32 %c, %d
404+
%e = or i32 %b, %d
372405
%f = shl i32 %e, 16
373406
%g = or i32 %f, %a
374407
ret i32 %g
@@ -379,22 +412,48 @@ define i32 @pack_lo_zext_hi_packh(i16 zeroext %0, i8 zeroext %1, i8 zeroext %2)
379412
define i32 @pack_lo_noext_hi_packh(i32 %a, i8 zeroext %1, i8 zeroext %2) nounwind {
380413
; RV32I-LABEL: pack_lo_noext_hi_packh:
381414
; RV32I: # %bb.0:
382-
; RV32I-NEXT: slli a1, a2, 16
415+
; RV32I-NEXT: slli a1, a1, 16
383416
; RV32I-NEXT: slli a2, a2, 24
384417
; RV32I-NEXT: or a1, a2, a1
385418
; RV32I-NEXT: or a0, a1, a0
386419
; RV32I-NEXT: ret
387420
;
388421
; RV32ZBKB-LABEL: pack_lo_noext_hi_packh:
389422
; RV32ZBKB: # %bb.0:
390-
; RV32ZBKB-NEXT: packh a1, a2, a2
423+
; RV32ZBKB-NEXT: packh a1, a1, a2
424+
; RV32ZBKB-NEXT: slli a1, a1, 16
425+
; RV32ZBKB-NEXT: or a0, a1, a0
426+
; RV32ZBKB-NEXT: ret
427+
%b = zext i8 %1 to i32
428+
%c = zext i8 %2 to i32
429+
%d = shl i32 %c, 8
430+
%e = or i32 %b, %d
431+
%f = shl i32 %e, 16
432+
%g = or i32 %f, %a
433+
ret i32 %g
434+
}
435+
436+
; Make sure we can match packh+slli without having the input bytes zero extended.
437+
define i32 @pack_lo_noext_hi_packh_nozeroext(i32 %a, i8 %1, i8 %2) nounwind {
438+
; RV32I-LABEL: pack_lo_noext_hi_packh_nozeroext:
439+
; RV32I: # %bb.0:
440+
; RV32I-NEXT: zext.b a1, a1
441+
; RV32I-NEXT: slli a2, a2, 24
442+
; RV32I-NEXT: slli a1, a1, 16
443+
; RV32I-NEXT: or a0, a2, a0
444+
; RV32I-NEXT: or a0, a0, a1
445+
; RV32I-NEXT: ret
446+
;
447+
; RV32ZBKB-LABEL: pack_lo_noext_hi_packh_nozeroext:
448+
; RV32ZBKB: # %bb.0:
449+
; RV32ZBKB-NEXT: packh a1, a1, a2
391450
; RV32ZBKB-NEXT: slli a1, a1, 16
392451
; RV32ZBKB-NEXT: or a0, a1, a0
393452
; RV32ZBKB-NEXT: ret
394453
%b = zext i8 %1 to i32
395454
%c = zext i8 %2 to i32
396455
%d = shl i32 %c, 8
397-
%e = or i32 %c, %d
456+
%e = or i32 %b, %d
398457
%f = shl i32 %e, 16
399458
%g = or i32 %f, %a
400459
ret i32 %g

llvm/test/CodeGen/RISCV/rv64zbkb.ll

Lines changed: 98 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -459,10 +459,46 @@ define void @pack_lo_packh_hi_packh_2(i8 zeroext %0, i8 zeroext %1, i8 zeroext %
459459
ret void
460460
}
461461

462+
define void @pack_lo_packh_hi_packh_3(i8 %0, i8 %1, i8 %2, i8 %3, ptr %p) nounwind {
463+
; RV64I-LABEL: pack_lo_packh_hi_packh_3:
464+
; RV64I: # %bb.0:
465+
; RV64I-NEXT: zext.b a0, a0
466+
; RV64I-NEXT: zext.b a1, a1
467+
; RV64I-NEXT: zext.b a2, a2
468+
; RV64I-NEXT: slli a3, a3, 24
469+
; RV64I-NEXT: slli a1, a1, 8
470+
; RV64I-NEXT: slli a2, a2, 16
471+
; RV64I-NEXT: or a0, a3, a0
472+
; RV64I-NEXT: or a0, a0, a1
473+
; RV64I-NEXT: or a0, a2, a0
474+
; RV64I-NEXT: sw a0, 0(a4)
475+
; RV64I-NEXT: ret
476+
;
477+
; RV64ZBKB-LABEL: pack_lo_packh_hi_packh_3:
478+
; RV64ZBKB: # %bb.0:
479+
; RV64ZBKB-NEXT: packh a0, a0, a1
480+
; RV64ZBKB-NEXT: packh a1, a3, a2
481+
; RV64ZBKB-NEXT: packw a0, a0, a1
482+
; RV64ZBKB-NEXT: sw a0, 0(a4)
483+
; RV64ZBKB-NEXT: ret
484+
%a = zext i8 %0 to i32
485+
%b = zext i8 %1 to i32
486+
%c = zext i8 %2 to i32
487+
%d = zext i8 %3 to i32
488+
%e = shl i32 %b, 8
489+
%f = shl i32 %c, 16
490+
%g = shl i32 %d, 24
491+
%h = or i32 %a, %e
492+
%i = or i32 %g, %h
493+
%j = or i32 %f, %i
494+
store i32 %j, ptr %p
495+
ret void
496+
}
497+
462498
define void @pack_lo_zext_hi_packh(i16 zeroext %0, i8 zeroext %1, i8 zeroext %2, ptr %p) nounwind {
463499
; RV64I-LABEL: pack_lo_zext_hi_packh:
464500
; RV64I: # %bb.0:
465-
; RV64I-NEXT: slli a1, a2, 16
501+
; RV64I-NEXT: slli a1, a1, 16
466502
; RV64I-NEXT: slli a2, a2, 24
467503
; RV64I-NEXT: or a1, a2, a1
468504
; RV64I-NEXT: or a0, a1, a0
@@ -471,15 +507,15 @@ define void @pack_lo_zext_hi_packh(i16 zeroext %0, i8 zeroext %1, i8 zeroext %2,
471507
;
472508
; RV64ZBKB-LABEL: pack_lo_zext_hi_packh:
473509
; RV64ZBKB: # %bb.0:
474-
; RV64ZBKB-NEXT: packh a1, a2, a2
510+
; RV64ZBKB-NEXT: packh a1, a1, a2
475511
; RV64ZBKB-NEXT: packw a0, a0, a1
476512
; RV64ZBKB-NEXT: sw a0, 0(a3)
477513
; RV64ZBKB-NEXT: ret
478514
%a = zext i16 %0 to i32
479515
%b = zext i8 %1 to i32
480516
%c = zext i8 %2 to i32
481517
%d = shl i32 %c, 8
482-
%e = or i32 %c, %d
518+
%e = or i32 %b, %d
483519
%f = shl i32 %e, 16
484520
%g = or i32 %f, %a
485521
store i32 %g, ptr %p
@@ -491,7 +527,7 @@ define void @pack_lo_zext_hi_packh(i16 zeroext %0, i8 zeroext %1, i8 zeroext %2,
491527
define void @pack_lo_noext_hi_packh(i32 %a, i8 zeroext %1, i8 zeroext %2, ptr %p) nounwind {
492528
; RV64I-LABEL: pack_lo_noext_hi_packh:
493529
; RV64I: # %bb.0:
494-
; RV64I-NEXT: slli a1, a2, 16
530+
; RV64I-NEXT: slli a1, a1, 16
495531
; RV64I-NEXT: slli a2, a2, 24
496532
; RV64I-NEXT: or a1, a2, a1
497533
; RV64I-NEXT: or a0, a1, a0
@@ -500,17 +536,73 @@ define void @pack_lo_noext_hi_packh(i32 %a, i8 zeroext %1, i8 zeroext %2, ptr %p
500536
;
501537
; RV64ZBKB-LABEL: pack_lo_noext_hi_packh:
502538
; RV64ZBKB: # %bb.0:
503-
; RV64ZBKB-NEXT: packh a1, a2, a2
539+
; RV64ZBKB-NEXT: packh a1, a1, a2
540+
; RV64ZBKB-NEXT: slli a1, a1, 16
541+
; RV64ZBKB-NEXT: or a0, a1, a0
542+
; RV64ZBKB-NEXT: sw a0, 0(a3)
543+
; RV64ZBKB-NEXT: ret
544+
%b = zext i8 %1 to i32
545+
%c = zext i8 %2 to i32
546+
%d = shl i32 %c, 8
547+
%e = or i32 %b, %d
548+
%f = shl i32 %e, 16
549+
%g = or i32 %f, %a
550+
store i32 %g, ptr %p
551+
ret void
552+
}
553+
554+
; Make sure we can match packh+slli without having the input bytes zero extended.
555+
define void @pack_i32_lo_noext_hi_packh_nozeroext(i32 %a, i8 %1, i8 %2, ptr %p) nounwind {
556+
; RV64I-LABEL: pack_i32_lo_noext_hi_packh_nozeroext:
557+
; RV64I: # %bb.0:
558+
; RV64I-NEXT: zext.b a1, a1
559+
; RV64I-NEXT: slli a2, a2, 24
560+
; RV64I-NEXT: slli a1, a1, 16
561+
; RV64I-NEXT: or a0, a2, a0
562+
; RV64I-NEXT: or a0, a0, a1
563+
; RV64I-NEXT: sw a0, 0(a3)
564+
; RV64I-NEXT: ret
565+
;
566+
; RV64ZBKB-LABEL: pack_i32_lo_noext_hi_packh_nozeroext:
567+
; RV64ZBKB: # %bb.0:
568+
; RV64ZBKB-NEXT: packh a1, a1, a2
504569
; RV64ZBKB-NEXT: slli a1, a1, 16
505570
; RV64ZBKB-NEXT: or a0, a1, a0
506571
; RV64ZBKB-NEXT: sw a0, 0(a3)
507572
; RV64ZBKB-NEXT: ret
508573
%b = zext i8 %1 to i32
509574
%c = zext i8 %2 to i32
510575
%d = shl i32 %c, 8
511-
%e = or i32 %c, %d
576+
%e = or i32 %b, %d
512577
%f = shl i32 %e, 16
513578
%g = or i32 %f, %a
514579
store i32 %g, ptr %p
515580
ret void
516581
}
582+
583+
; Make sure we can match packh+slli without having the input bytes zero extended.
584+
define i64 @pack_i64_lo_noext_hi_packh_nozeroext(i64 %a, i8 %1, i8 %2, ptr %p) nounwind {
585+
; RV64I-LABEL: pack_i64_lo_noext_hi_packh_nozeroext:
586+
; RV64I: # %bb.0:
587+
; RV64I-NEXT: zext.b a1, a1
588+
; RV64I-NEXT: zext.b a2, a2
589+
; RV64I-NEXT: slli a1, a1, 16
590+
; RV64I-NEXT: slli a2, a2, 24
591+
; RV64I-NEXT: or a1, a2, a1
592+
; RV64I-NEXT: or a0, a1, a0
593+
; RV64I-NEXT: ret
594+
;
595+
; RV64ZBKB-LABEL: pack_i64_lo_noext_hi_packh_nozeroext:
596+
; RV64ZBKB: # %bb.0:
597+
; RV64ZBKB-NEXT: packh a1, a1, a2
598+
; RV64ZBKB-NEXT: slli a1, a1, 16
599+
; RV64ZBKB-NEXT: or a0, a1, a0
600+
; RV64ZBKB-NEXT: ret
601+
%b = zext i8 %1 to i64
602+
%c = zext i8 %2 to i64
603+
%d = shl i64 %c, 8
604+
%e = or i64 %b, %d
605+
%f = shl i64 %e, 16
606+
%g = or i64 %f, %a
607+
ret i64 %g
608+
}

0 commit comments

Comments
 (0)