Skip to content

Commit e232f05

Browse files
authored
[RISCV] Add packw+packh isel pattern for unaligned loads on RV64. (#152159)
This is similar to an existing pattern from RV32 with the simpliflication proposed by #152045. Instead of pack we need to use packw and we need to know that the upper 32 bits are being ignored since packw sign extends from bit 31. The use of allBinOpWUsers prevents tablegen from automatically reassociating the pattern so we need to do it manually. Tablegen is still able to commute operands though.
1 parent ef9834c commit e232f05

File tree

3 files changed

+152
-11
lines changed

3 files changed

+152
-11
lines changed

llvm/lib/Target/RISCV/RISCVInstrInfoZb.td

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -663,6 +663,27 @@ def : Pat<(binop_allwusers<or> (shl GPR:$rs2, (i64 16)),
663663
def : Pat<(i64 (or (sext_inreg (shl GPR:$rs2, (i64 16)), i32),
664664
(zexti16 (i64 GPR:$rs1)))),
665665
(PACKW GPR:$rs1, GPR:$rs2)>;
666+
667+
// Match a pattern of 2 bytes being inserted into bits [31:16], with bits
668+
// bits [15:0] coming from a zero extended value, and bits [63:32] being
669+
// ignored. We can use packw with packh for bits [31:16]. If bits [15:0] can
670+
// also be a packh, it can be matched separately.
671+
def : Pat<(binop_allwusers<or>
672+
(or (shl (zexti8 (XLenVT GPR:$op1rs2)), (XLenVT 24)),
673+
(shl (zexti8 (XLenVT GPR:$op1rs1)), (XLenVT 16))),
674+
(zexti16 (XLenVT GPR:$rs1))),
675+
(PACKW GPR:$rs1, (XLenVT (PACKH GPR:$op1rs1, GPR:$op1rs2)))>;
676+
// We need to manually reassociate the patterns because of the binop_allwusers.
677+
def : Pat<(binop_allwusers<or>
678+
(or (zexti16 (XLenVT GPR:$rs1)),
679+
(shl (zexti8 (XLenVT GPR:$op1rs1)), (XLenVT 16))),
680+
(shl (zexti8 (XLenVT GPR:$op1rs2)), (XLenVT 24))),
681+
(PACKW GPR:$rs1, (XLenVT (PACKH GPR:$op1rs1, GPR:$op1rs2)))>;
682+
def : Pat<(binop_allwusers<or>
683+
(or (zexti16 (XLenVT GPR:$rs1)),
684+
(shl (zexti8 (XLenVT GPR:$op1rs1)), (XLenVT 24))),
685+
(shl (zexti8 (XLenVT GPR:$op1rs2)), (XLenVT 16))),
686+
(PACKW GPR:$rs1, (XLenVT (PACKH GPR:$op1rs1, GPR:$op1rs2)))>;
666687
} // Predicates = [HasStdExtZbkb, IsRV64]
667688

668689
let Predicates = [HasStdExtZbb, IsRV32] in

llvm/test/CodeGen/RISCV/rv64zbkb.ll

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -392,3 +392,125 @@ define i64 @zext_i16_to_i64(i16 %a) nounwind {
392392
%1 = zext i16 %a to i64
393393
ret i64 %1
394394
}
395+
396+
define void @pack_lo_packh_hi_packh(i8 zeroext %0, i8 zeroext %1, i8 zeroext %2, i8 zeroext %3, ptr %p) nounwind {
397+
; RV64I-LABEL: pack_lo_packh_hi_packh:
398+
; RV64I: # %bb.0:
399+
; RV64I-NEXT: slli a1, a1, 8
400+
; RV64I-NEXT: slli a2, a2, 16
401+
; RV64I-NEXT: slli a3, a3, 24
402+
; RV64I-NEXT: or a0, a0, a1
403+
; RV64I-NEXT: or a2, a2, a3
404+
; RV64I-NEXT: or a0, a0, a2
405+
; RV64I-NEXT: sw a0, 0(a4)
406+
; RV64I-NEXT: ret
407+
;
408+
; RV64ZBKB-LABEL: pack_lo_packh_hi_packh:
409+
; RV64ZBKB: # %bb.0:
410+
; RV64ZBKB-NEXT: packh a0, a0, a1
411+
; RV64ZBKB-NEXT: packh a1, a2, a3
412+
; RV64ZBKB-NEXT: packw a0, a0, a1
413+
; RV64ZBKB-NEXT: sw a0, 0(a4)
414+
; RV64ZBKB-NEXT: ret
415+
%a = zext i8 %0 to i32
416+
%b = zext i8 %1 to i32
417+
%c = zext i8 %2 to i32
418+
%d = zext i8 %3 to i32
419+
%e = shl i32 %b, 8
420+
%f = shl i32 %c, 16
421+
%g = shl i32 %d, 24
422+
%h = or i32 %a, %e
423+
%i = or i32 %h, %f
424+
%j = or i32 %i, %g
425+
store i32 %j, ptr %p
426+
ret void
427+
}
428+
429+
define void @pack_lo_packh_hi_packh_2(i8 zeroext %0, i8 zeroext %1, i8 zeroext %2, i8 zeroext %3, ptr %p) nounwind {
430+
; RV64I-LABEL: pack_lo_packh_hi_packh_2:
431+
; RV64I: # %bb.0:
432+
; RV64I-NEXT: slli a1, a1, 8
433+
; RV64I-NEXT: slli a2, a2, 16
434+
; RV64I-NEXT: slli a3, a3, 24
435+
; RV64I-NEXT: or a0, a0, a1
436+
; RV64I-NEXT: or a2, a2, a3
437+
; RV64I-NEXT: or a0, a2, a0
438+
; RV64I-NEXT: sw a0, 0(a4)
439+
; RV64I-NEXT: ret
440+
;
441+
; RV64ZBKB-LABEL: pack_lo_packh_hi_packh_2:
442+
; RV64ZBKB: # %bb.0:
443+
; RV64ZBKB-NEXT: packh a0, a0, a1
444+
; RV64ZBKB-NEXT: packh a1, a3, a2
445+
; RV64ZBKB-NEXT: packw a0, a0, a1
446+
; RV64ZBKB-NEXT: sw a0, 0(a4)
447+
; RV64ZBKB-NEXT: ret
448+
%a = zext i8 %0 to i32
449+
%b = zext i8 %1 to i32
450+
%c = zext i8 %2 to i32
451+
%d = zext i8 %3 to i32
452+
%e = shl i32 %b, 8
453+
%f = shl i32 %c, 16
454+
%g = shl i32 %d, 24
455+
%h = or i32 %a, %e
456+
%i = or i32 %g, %h
457+
%j = or i32 %f, %i
458+
store i32 %j, ptr %p
459+
ret void
460+
}
461+
462+
define void @pack_lo_zext_hi_packh(i16 zeroext %0, i8 zeroext %1, i8 zeroext %2, ptr %p) nounwind {
463+
; RV64I-LABEL: pack_lo_zext_hi_packh:
464+
; RV64I: # %bb.0:
465+
; RV64I-NEXT: slli a1, a2, 16
466+
; RV64I-NEXT: slli a2, a2, 24
467+
; RV64I-NEXT: or a1, a2, a1
468+
; RV64I-NEXT: or a0, a1, a0
469+
; RV64I-NEXT: sw a0, 0(a3)
470+
; RV64I-NEXT: ret
471+
;
472+
; RV64ZBKB-LABEL: pack_lo_zext_hi_packh:
473+
; RV64ZBKB: # %bb.0:
474+
; RV64ZBKB-NEXT: packh a1, a2, a2
475+
; RV64ZBKB-NEXT: packw a0, a0, a1
476+
; RV64ZBKB-NEXT: sw a0, 0(a3)
477+
; RV64ZBKB-NEXT: ret
478+
%a = zext i16 %0 to i32
479+
%b = zext i8 %1 to i32
480+
%c = zext i8 %2 to i32
481+
%d = shl i32 %c, 8
482+
%e = or i32 %c, %d
483+
%f = shl i32 %e, 16
484+
%g = or i32 %f, %a
485+
store i32 %g, ptr %p
486+
ret void
487+
}
488+
489+
; Negative test, %a isn't extended so we can't use packw for the outer or, but
490+
; we can use packh for the high half.
491+
define void @pack_lo_noext_hi_packh(i32 %a, i8 zeroext %1, i8 zeroext %2, ptr %p) nounwind {
492+
; RV64I-LABEL: pack_lo_noext_hi_packh:
493+
; RV64I: # %bb.0:
494+
; RV64I-NEXT: slli a1, a2, 16
495+
; RV64I-NEXT: slli a2, a2, 24
496+
; RV64I-NEXT: or a1, a2, a1
497+
; RV64I-NEXT: or a0, a1, a0
498+
; RV64I-NEXT: sw a0, 0(a3)
499+
; RV64I-NEXT: ret
500+
;
501+
; RV64ZBKB-LABEL: pack_lo_noext_hi_packh:
502+
; RV64ZBKB: # %bb.0:
503+
; RV64ZBKB-NEXT: packh a1, a2, a2
504+
; RV64ZBKB-NEXT: slli a1, a1, 16
505+
; RV64ZBKB-NEXT: or a0, a1, a0
506+
; RV64ZBKB-NEXT: sw a0, 0(a3)
507+
; RV64ZBKB-NEXT: ret
508+
%b = zext i8 %1 to i32
509+
%c = zext i8 %2 to i32
510+
%d = shl i32 %c, 8
511+
%e = or i32 %c, %d
512+
%f = shl i32 %e, 16
513+
%g = or i32 %f, %a
514+
store i32 %g, ptr %p
515+
ret void
516+
}

llvm/test/CodeGen/RISCV/unaligned-load-store.ll

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -204,18 +204,16 @@ define i64 @load_i64(ptr %p) {
204204
; RV64IZBKB-NEXT: lbu a2, 5(a0)
205205
; RV64IZBKB-NEXT: lbu a3, 6(a0)
206206
; RV64IZBKB-NEXT: lbu a4, 7(a0)
207-
; RV64IZBKB-NEXT: lbu a5, 0(a0)
208-
; RV64IZBKB-NEXT: lbu a6, 1(a0)
209-
; RV64IZBKB-NEXT: lbu a7, 2(a0)
210-
; RV64IZBKB-NEXT: lbu a0, 3(a0)
207+
; RV64IZBKB-NEXT: lbu a5, 1(a0)
208+
; RV64IZBKB-NEXT: lbu a6, 2(a0)
209+
; RV64IZBKB-NEXT: lbu a7, 3(a0)
210+
; RV64IZBKB-NEXT: lbu a0, 0(a0)
211+
; RV64IZBKB-NEXT: packh a3, a3, a4
211212
; RV64IZBKB-NEXT: packh a1, a1, a2
212-
; RV64IZBKB-NEXT: packh a2, a3, a4
213-
; RV64IZBKB-NEXT: packh a3, a5, a6
214-
; RV64IZBKB-NEXT: packh a0, a7, a0
215-
; RV64IZBKB-NEXT: slli a2, a2, 16
216-
; RV64IZBKB-NEXT: slli a0, a0, 16
217-
; RV64IZBKB-NEXT: or a1, a2, a1
218-
; RV64IZBKB-NEXT: or a0, a0, a3
213+
; RV64IZBKB-NEXT: packh a2, a6, a7
214+
; RV64IZBKB-NEXT: packh a0, a0, a5
215+
; RV64IZBKB-NEXT: packw a1, a1, a3
216+
; RV64IZBKB-NEXT: packw a0, a0, a2
219217
; RV64IZBKB-NEXT: pack a0, a0, a1
220218
; RV64IZBKB-NEXT: ret
221219
;

0 commit comments

Comments
 (0)