@@ -756,3 +756,104 @@ define void @lmul_8_x9() nounwind {
756756 %v9 = alloca <vscale x 8 x i64 >
757757 ret void
758758}
759+
760+ define void @lmul_16_align () nounwind {
761+ ; NOZBA-LABEL: lmul_16_align:
762+ ; NOZBA: # %bb.0:
763+ ; NOZBA-NEXT: addi sp, sp, -144
764+ ; NOZBA-NEXT: sd ra, 136(sp) # 8-byte Folded Spill
765+ ; NOZBA-NEXT: sd s0, 128(sp) # 8-byte Folded Spill
766+ ; NOZBA-NEXT: addi s0, sp, 144
767+ ; NOZBA-NEXT: csrr a0, vlenb
768+ ; NOZBA-NEXT: li a1, 24
769+ ; NOZBA-NEXT: mul a0, a0, a1
770+ ; NOZBA-NEXT: sub sp, sp, a0
771+ ; NOZBA-NEXT: andi sp, sp, -128
772+ ; NOZBA-NEXT: vsetvli a0, zero, e64, m8, ta, ma
773+ ; NOZBA-NEXT: vmv.v.i v8, 0
774+ ; NOZBA-NEXT: csrr a0, vlenb
775+ ; NOZBA-NEXT: add a0, sp, a0
776+ ; NOZBA-NEXT: addi a0, a0, 128
777+ ; NOZBA-NEXT: vs8r.v v8, (a0)
778+ ; NOZBA-NEXT: csrr a1, vlenb
779+ ; NOZBA-NEXT: slli a1, a1, 3
780+ ; NOZBA-NEXT: add a0, a0, a1
781+ ; NOZBA-NEXT: vs8r.v v8, (a0)
782+ ; NOZBA-NEXT: vsetvli a0, zero, e64, m1, ta, ma
783+ ; NOZBA-NEXT: vmv.v.i v8, 0
784+ ; NOZBA-NEXT: addi a0, sp, 128
785+ ; NOZBA-NEXT: vs1r.v v8, (a0)
786+ ; NOZBA-NEXT: addi sp, s0, -144
787+ ; NOZBA-NEXT: ld ra, 136(sp) # 8-byte Folded Reload
788+ ; NOZBA-NEXT: ld s0, 128(sp) # 8-byte Folded Reload
789+ ; NOZBA-NEXT: addi sp, sp, 144
790+ ; NOZBA-NEXT: ret
791+ ;
792+ ; ZBA-LABEL: lmul_16_align:
793+ ; ZBA: # %bb.0:
794+ ; ZBA-NEXT: addi sp, sp, -144
795+ ; ZBA-NEXT: sd ra, 136(sp) # 8-byte Folded Spill
796+ ; ZBA-NEXT: sd s0, 128(sp) # 8-byte Folded Spill
797+ ; ZBA-NEXT: addi s0, sp, 144
798+ ; ZBA-NEXT: csrr a0, vlenb
799+ ; ZBA-NEXT: slli a0, a0, 3
800+ ; ZBA-NEXT: sh1add a0, a0, a0
801+ ; ZBA-NEXT: sub sp, sp, a0
802+ ; ZBA-NEXT: andi sp, sp, -128
803+ ; ZBA-NEXT: vsetvli a0, zero, e64, m8, ta, ma
804+ ; ZBA-NEXT: vmv.v.i v8, 0
805+ ; ZBA-NEXT: csrr a0, vlenb
806+ ; ZBA-NEXT: add a0, sp, a0
807+ ; ZBA-NEXT: addi a0, a0, 128
808+ ; ZBA-NEXT: vs8r.v v8, (a0)
809+ ; ZBA-NEXT: csrr a1, vlenb
810+ ; ZBA-NEXT: sh3add a0, a1, a0
811+ ; ZBA-NEXT: vs8r.v v8, (a0)
812+ ; ZBA-NEXT: vsetvli a0, zero, e64, m1, ta, ma
813+ ; ZBA-NEXT: vmv.v.i v8, 0
814+ ; ZBA-NEXT: addi a0, sp, 128
815+ ; ZBA-NEXT: vs1r.v v8, (a0)
816+ ; ZBA-NEXT: addi sp, s0, -144
817+ ; ZBA-NEXT: ld ra, 136(sp) # 8-byte Folded Reload
818+ ; ZBA-NEXT: ld s0, 128(sp) # 8-byte Folded Reload
819+ ; ZBA-NEXT: addi sp, sp, 144
820+ ; ZBA-NEXT: ret
821+ ;
822+ ; NOMUL-LABEL: lmul_16_align:
823+ ; NOMUL: # %bb.0:
824+ ; NOMUL-NEXT: addi sp, sp, -144
825+ ; NOMUL-NEXT: sd ra, 136(sp) # 8-byte Folded Spill
826+ ; NOMUL-NEXT: sd s0, 128(sp) # 8-byte Folded Spill
827+ ; NOMUL-NEXT: addi s0, sp, 144
828+ ; NOMUL-NEXT: csrr a0, vlenb
829+ ; NOMUL-NEXT: slli a0, a0, 3
830+ ; NOMUL-NEXT: mv a1, a0
831+ ; NOMUL-NEXT: slli a0, a0, 1
832+ ; NOMUL-NEXT: add a0, a0, a1
833+ ; NOMUL-NEXT: sub sp, sp, a0
834+ ; NOMUL-NEXT: andi sp, sp, -128
835+ ; NOMUL-NEXT: vsetvli a0, zero, e64, m8, ta, ma
836+ ; NOMUL-NEXT: vmv.v.i v8, 0
837+ ; NOMUL-NEXT: csrr a0, vlenb
838+ ; NOMUL-NEXT: add a0, sp, a0
839+ ; NOMUL-NEXT: addi a0, a0, 128
840+ ; NOMUL-NEXT: vs8r.v v8, (a0)
841+ ; NOMUL-NEXT: csrr a1, vlenb
842+ ; NOMUL-NEXT: slli a1, a1, 3
843+ ; NOMUL-NEXT: add a0, a0, a1
844+ ; NOMUL-NEXT: vs8r.v v8, (a0)
845+ ; NOMUL-NEXT: vsetvli a0, zero, e64, m1, ta, ma
846+ ; NOMUL-NEXT: vmv.v.i v8, 0
847+ ; NOMUL-NEXT: addi a0, sp, 128
848+ ; NOMUL-NEXT: vs1r.v v8, (a0)
849+ ; NOMUL-NEXT: addi sp, s0, -144
850+ ; NOMUL-NEXT: ld ra, 136(sp) # 8-byte Folded Reload
851+ ; NOMUL-NEXT: ld s0, 128(sp) # 8-byte Folded Reload
852+ ; NOMUL-NEXT: addi sp, sp, 144
853+ ; NOMUL-NEXT: ret
854+ %v1 = alloca <vscale x 16 x i64 >
855+ %v2 = alloca <vscale x 1 x i64 >
856+ store <vscale x 16 x i64 > zeroinitializer , ptr %v1
857+ store <vscale x 1 x i64 > zeroinitializer , ptr %v2
858+ ret void
859+ }
0 commit comments