@@ -779,3 +779,80 @@ define i128 @shl128_shamt32(i128 %a, i32 signext %b) nounwind {
779779 %1 = shl i128 %a , %zext
780780 ret i128 %1
781781}
782+
783+ ; Do some arithmetic on the i32 shift amount before the zext nneg. This
784+ ; arithmetic will be promoted using a W instruction RV64. Make sure we can use
785+ ; this to avoid an unncessary zext of the shift amount.
786+ define i128 @shl128_shamt32_arith (i128 %a , i32 signext %b ) nounwind {
787+ ; RV32I-LABEL: shl128_shamt32_arith:
788+ ; RV32I: # %bb.0:
789+ ; RV32I-NEXT: addi sp, sp, -32
790+ ; RV32I-NEXT: lw a3, 0(a1)
791+ ; RV32I-NEXT: lw a4, 4(a1)
792+ ; RV32I-NEXT: lw a5, 8(a1)
793+ ; RV32I-NEXT: lw a1, 12(a1)
794+ ; RV32I-NEXT: addi a2, a2, 1
795+ ; RV32I-NEXT: sw zero, 0(sp)
796+ ; RV32I-NEXT: sw zero, 4(sp)
797+ ; RV32I-NEXT: sw zero, 8(sp)
798+ ; RV32I-NEXT: sw zero, 12(sp)
799+ ; RV32I-NEXT: addi a6, sp, 16
800+ ; RV32I-NEXT: srli a7, a2, 3
801+ ; RV32I-NEXT: andi t0, a2, 31
802+ ; RV32I-NEXT: andi a7, a7, 12
803+ ; RV32I-NEXT: sub a6, a6, a7
804+ ; RV32I-NEXT: sw a3, 16(sp)
805+ ; RV32I-NEXT: sw a4, 20(sp)
806+ ; RV32I-NEXT: sw a5, 24(sp)
807+ ; RV32I-NEXT: sw a1, 28(sp)
808+ ; RV32I-NEXT: lw a1, 0(a6)
809+ ; RV32I-NEXT: lw a3, 4(a6)
810+ ; RV32I-NEXT: lw a4, 8(a6)
811+ ; RV32I-NEXT: lw a5, 12(a6)
812+ ; RV32I-NEXT: xori a6, t0, 31
813+ ; RV32I-NEXT: sll a7, a3, a2
814+ ; RV32I-NEXT: srli t0, a1, 1
815+ ; RV32I-NEXT: sll a5, a5, a2
816+ ; RV32I-NEXT: sll a1, a1, a2
817+ ; RV32I-NEXT: sll a2, a4, a2
818+ ; RV32I-NEXT: srli a3, a3, 1
819+ ; RV32I-NEXT: srli a4, a4, 1
820+ ; RV32I-NEXT: srl t0, t0, a6
821+ ; RV32I-NEXT: srl a3, a3, a6
822+ ; RV32I-NEXT: srl a4, a4, a6
823+ ; RV32I-NEXT: or a6, a7, t0
824+ ; RV32I-NEXT: or a2, a2, a3
825+ ; RV32I-NEXT: or a4, a5, a4
826+ ; RV32I-NEXT: sw a1, 0(a0)
827+ ; RV32I-NEXT: sw a6, 4(a0)
828+ ; RV32I-NEXT: sw a2, 8(a0)
829+ ; RV32I-NEXT: sw a4, 12(a0)
830+ ; RV32I-NEXT: addi sp, sp, 32
831+ ; RV32I-NEXT: ret
832+ ;
833+ ; RV64I-LABEL: shl128_shamt32_arith:
834+ ; RV64I: # %bb.0:
835+ ; RV64I-NEXT: addi a2, a2, 1
836+ ; RV64I-NEXT: slli a4, a2, 32
837+ ; RV64I-NEXT: srli a4, a4, 32
838+ ; RV64I-NEXT: addi a3, a4, -64
839+ ; RV64I-NEXT: bltz a3, .LBB17_2
840+ ; RV64I-NEXT: # %bb.1:
841+ ; RV64I-NEXT: sll a1, a0, a4
842+ ; RV64I-NEXT: j .LBB17_3
843+ ; RV64I-NEXT: .LBB17_2:
844+ ; RV64I-NEXT: sll a1, a1, a2
845+ ; RV64I-NEXT: srli a5, a0, 1
846+ ; RV64I-NEXT: not a4, a4
847+ ; RV64I-NEXT: srl a4, a5, a4
848+ ; RV64I-NEXT: or a1, a1, a4
849+ ; RV64I-NEXT: .LBB17_3:
850+ ; RV64I-NEXT: sll a0, a0, a2
851+ ; RV64I-NEXT: srai a3, a3, 63
852+ ; RV64I-NEXT: and a0, a3, a0
853+ ; RV64I-NEXT: ret
854+ %c = add i32 %b , 1
855+ %zext = zext nneg i32 %c to i128
856+ %1 = shl i128 %a , %zext
857+ ret i128 %1
858+ }
0 commit comments