@@ -392,3 +392,125 @@ define i64 @zext_i16_to_i64(i16 %a) nounwind {
392392 %1 = zext i16 %a to i64
393393 ret i64 %1
394394}
395+
396+ define void @pack_lo_packh_hi_packh (i8 zeroext %0 , i8 zeroext %1 , i8 zeroext %2 , i8 zeroext %3 , ptr %p ) nounwind {
397+ ; RV64I-LABEL: pack_lo_packh_hi_packh:
398+ ; RV64I: # %bb.0:
399+ ; RV64I-NEXT: slli a1, a1, 8
400+ ; RV64I-NEXT: slli a2, a2, 16
401+ ; RV64I-NEXT: slli a3, a3, 24
402+ ; RV64I-NEXT: or a0, a0, a1
403+ ; RV64I-NEXT: or a2, a2, a3
404+ ; RV64I-NEXT: or a0, a0, a2
405+ ; RV64I-NEXT: sw a0, 0(a4)
406+ ; RV64I-NEXT: ret
407+ ;
408+ ; RV64ZBKB-LABEL: pack_lo_packh_hi_packh:
409+ ; RV64ZBKB: # %bb.0:
410+ ; RV64ZBKB-NEXT: packh a0, a0, a1
411+ ; RV64ZBKB-NEXT: packh a1, a2, a3
412+ ; RV64ZBKB-NEXT: packw a0, a0, a1
413+ ; RV64ZBKB-NEXT: sw a0, 0(a4)
414+ ; RV64ZBKB-NEXT: ret
415+ %a = zext i8 %0 to i32
416+ %b = zext i8 %1 to i32
417+ %c = zext i8 %2 to i32
418+ %d = zext i8 %3 to i32
419+ %e = shl i32 %b , 8
420+ %f = shl i32 %c , 16
421+ %g = shl i32 %d , 24
422+ %h = or i32 %a , %e
423+ %i = or i32 %h , %f
424+ %j = or i32 %i , %g
425+ store i32 %j , ptr %p
426+ ret void
427+ }
428+
429+ define void @pack_lo_packh_hi_packh_2 (i8 zeroext %0 , i8 zeroext %1 , i8 zeroext %2 , i8 zeroext %3 , ptr %p ) nounwind {
430+ ; RV64I-LABEL: pack_lo_packh_hi_packh_2:
431+ ; RV64I: # %bb.0:
432+ ; RV64I-NEXT: slli a1, a1, 8
433+ ; RV64I-NEXT: slli a2, a2, 16
434+ ; RV64I-NEXT: slli a3, a3, 24
435+ ; RV64I-NEXT: or a0, a0, a1
436+ ; RV64I-NEXT: or a2, a2, a3
437+ ; RV64I-NEXT: or a0, a2, a0
438+ ; RV64I-NEXT: sw a0, 0(a4)
439+ ; RV64I-NEXT: ret
440+ ;
441+ ; RV64ZBKB-LABEL: pack_lo_packh_hi_packh_2:
442+ ; RV64ZBKB: # %bb.0:
443+ ; RV64ZBKB-NEXT: packh a0, a0, a1
444+ ; RV64ZBKB-NEXT: packh a1, a3, a2
445+ ; RV64ZBKB-NEXT: packw a0, a0, a1
446+ ; RV64ZBKB-NEXT: sw a0, 0(a4)
447+ ; RV64ZBKB-NEXT: ret
448+ %a = zext i8 %0 to i32
449+ %b = zext i8 %1 to i32
450+ %c = zext i8 %2 to i32
451+ %d = zext i8 %3 to i32
452+ %e = shl i32 %b , 8
453+ %f = shl i32 %c , 16
454+ %g = shl i32 %d , 24
455+ %h = or i32 %a , %e
456+ %i = or i32 %g , %h
457+ %j = or i32 %f , %i
458+ store i32 %j , ptr %p
459+ ret void
460+ }
461+
462+ define void @pack_lo_zext_hi_packh (i16 zeroext %0 , i8 zeroext %1 , i8 zeroext %2 , ptr %p ) nounwind {
463+ ; RV64I-LABEL: pack_lo_zext_hi_packh:
464+ ; RV64I: # %bb.0:
465+ ; RV64I-NEXT: slli a1, a2, 16
466+ ; RV64I-NEXT: slli a2, a2, 24
467+ ; RV64I-NEXT: or a1, a2, a1
468+ ; RV64I-NEXT: or a0, a1, a0
469+ ; RV64I-NEXT: sw a0, 0(a3)
470+ ; RV64I-NEXT: ret
471+ ;
472+ ; RV64ZBKB-LABEL: pack_lo_zext_hi_packh:
473+ ; RV64ZBKB: # %bb.0:
474+ ; RV64ZBKB-NEXT: packh a1, a2, a2
475+ ; RV64ZBKB-NEXT: packw a0, a0, a1
476+ ; RV64ZBKB-NEXT: sw a0, 0(a3)
477+ ; RV64ZBKB-NEXT: ret
478+ %a = zext i16 %0 to i32
479+ %b = zext i8 %1 to i32
480+ %c = zext i8 %2 to i32
481+ %d = shl i32 %c , 8
482+ %e = or i32 %c , %d
483+ %f = shl i32 %e , 16
484+ %g = or i32 %f , %a
485+ store i32 %g , ptr %p
486+ ret void
487+ }
488+
489+ ; Negative test, %a isn't extended so we can't use packw for the outer or, but
490+ ; we can use packh for the high half.
491+ define void @pack_lo_noext_hi_packh (i32 %a , i8 zeroext %1 , i8 zeroext %2 , ptr %p ) nounwind {
492+ ; RV64I-LABEL: pack_lo_noext_hi_packh:
493+ ; RV64I: # %bb.0:
494+ ; RV64I-NEXT: slli a1, a2, 16
495+ ; RV64I-NEXT: slli a2, a2, 24
496+ ; RV64I-NEXT: or a1, a2, a1
497+ ; RV64I-NEXT: or a0, a1, a0
498+ ; RV64I-NEXT: sw a0, 0(a3)
499+ ; RV64I-NEXT: ret
500+ ;
501+ ; RV64ZBKB-LABEL: pack_lo_noext_hi_packh:
502+ ; RV64ZBKB: # %bb.0:
503+ ; RV64ZBKB-NEXT: packh a1, a2, a2
504+ ; RV64ZBKB-NEXT: slli a1, a1, 16
505+ ; RV64ZBKB-NEXT: or a0, a1, a0
506+ ; RV64ZBKB-NEXT: sw a0, 0(a3)
507+ ; RV64ZBKB-NEXT: ret
508+ %b = zext i8 %1 to i32
509+ %c = zext i8 %2 to i32
510+ %d = shl i32 %c , 8
511+ %e = or i32 %c , %d
512+ %f = shl i32 %e , 16
513+ %g = or i32 %f , %a
514+ store i32 %g , ptr %p
515+ ret void
516+ }
0 commit comments